rpg-extract

Extractor for RPG Maker VX Ace
git clone https://git.ce9e.org/rpg-extract.git

commit
003e5cc2d9b7d2191966cf689712526a95b6f624
parent
20a163b86366d50397dd32155e6580d59d9537aa
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2026-01-25 17:53
init

Diffstat

A README.md 29 +++++++++++++++++++++++++++++
A rpg_extract.py 311 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 340 insertions, 0 deletions


diff --git a/README.md b/README.md

@@ -0,0 +1,29 @@
   -1     1 # Extractor for RPG Maker VX Ace
   -1     2 
   -1     3 This is a script to extract files from RGP Maker VX Ace to a more
   -1     4 human-readbale format.
   -1     5 
   -1     6 Specifically:
   -1     7 
   -1     8 -   Extract files from `Game.rgss3a`
   -1     9 -   Convert rvdata2 files to JSON
   -1    10 -   Extract ruby code from `Data/Scripts.rvdata2`
   -1    11 
   -1    12 ## Usage
   -1    13 
   -1    14 ```
   -1    15 python3 rpg_extract.py /path/to/game/
   -1    16 ```
   -1    17 
   -1    18 ## Warning
   -1    19 
   -1    20 This script can and will overwrite files. It is recommended to run it one a
   -1    21 copy of the game folder.
   -1    22 
   -1    23 ## Prior Art
   -1    24 
   -1    25 -   <https://github.com/d9pouces/RubyMarshal.git>
   -1    26 -   <https://petschko.org/tools/mv_decrypter/index.html>
   -1    27 -   <https://github.com/uuksu/RPGMakerDecrypter>
   -1    28 -   <https://gitlab.com/rgss/rgsstool>
   -1    29 -   <https://saveeditor.online/>

diff --git a/rpg_extract.py b/rpg_extract.py

@@ -0,0 +1,311 @@
   -1     1 import base64
   -1     2 import json
   -1     3 import math
   -1     4 import struct
   -1     5 import sys
   -1     6 import zlib
   -1     7 from pathlib import Path
   -1     8 
   -1     9 
   -1    10 class IterFile:
   -1    11     def __init__(self, iterator):
   -1    12         self.iterator = iterator
   -1    13         self.buf = b''
   -1    14 
   -1    15     def read(self, size):
   -1    16         try:
   -1    17             while size < 0 or len(self.buf) < size:
   -1    18                 self.buf += next(self.iterator)
   -1    19         except StopIteration:
   -1    20             pass
   -1    21         result = self.buf[:size]
   -1    22         self.buf = self.buf[size:]
   -1    23         return result
   -1    24 
   -1    25 
   -1    26 class Rgss3aReader:
   -1    27     def __init__(self, fh):
   -1    28         self.fh = fh
   -1    29 
   -1    30     def read_int32(self):
   -1    31         return struct.unpack('I', self.fh.read(4))[0]
   -1    32 
   -1    33     def decrypt_bytes(self, size, key):
   -1    34         raw = self.fh.read(size)
   -1    35         k = struct.pack('I', key)
   -1    36         return bytes([b ^ k[i % 4] for i, b in enumerate(raw)])
   -1    37 
   -1    38     def iter_chunks(self, offset, size, initial_key):
   -1    39         pos = 0
   -1    40         key = initial_key
   -1    41 
   -1    42         while True:
   -1    43             n = min(4, size - pos)
   -1    44             if n == 0:
   -1    45                 break
   -1    46             pos += n
   -1    47 
   -1    48             yield self.decrypt_bytes(n, key)
   -1    49             key = (key * 7 + 3) % (1 << 32)
   -1    50 
   -1    51     def read_toc(self):
   -1    52         assert self.fh.read(7) == b'RGSSAD\0'
   -1    53         assert self.fh.read(1) == b'\x03'
   -1    54         key = self.read_int32() * 9 + 3
   -1    55 
   -1    56         toc = {}
   -1    57         while True:
   -1    58             offset = self.read_int32() ^ key
   -1    59             size = self.read_int32() ^ key
   -1    60             file_key = self.read_int32() ^ key
   -1    61             name_size = self.read_int32() ^ key
   -1    62             if offset == 0:
   -1    63                 break
   -1    64             name = self.decrypt_bytes(name_size, key).decode('utf-8')
   -1    65             toc[name] = IterFile(self.iter_chunks(offset, size, file_key))
   -1    66         return toc
   -1    67 
   -1    68 
   -1    69 class RubyMarshalReader:
   -1    70     # https://docs.ruby-lang.org/en/2.2.0/marshal_rdoc.html
   -1    71 
   -1    72     def __init__(self, fh):
   -1    73         self.fh = fh
   -1    74         self.symbols = []
   -1    75         self.objects = []
   -1    76 
   -1    77     def check_version(self):
   -1    78         version = self.fh.read(2)
   -1    79         if version:
   -1    80             assert version[0] == 4
   -1    81             assert version[1] <= 8
   -1    82             return True
   -1    83         else:
   -1    84             return False
   -1    85 
   -1    86     def read_bytes(self, size):
   -1    87         b = self.fh.read(size)
   -1    88         return {
   -1    89             'type': 'Bytes',
   -1    90             'value': base64.b64encode(b).decode('ascii'),
   -1    91         }
   -1    92 
   -1    93     def read_long(self):
   -1    94         first = struct.unpack('b', self.fh.read(1))[0]
   -1    95 
   -1    96         if first == 0:
   -1    97             return 0
   -1    98         elif -4 <= first <= 4:
   -1    99             k = 0
   -1   100             for b in reversed(self.fh.read(abs(first))):
   -1   101                 k = k << 8 | b
   -1   102             if first < 0:
   -1   103                 k -= 1 << (8 * abs(first))
   -1   104             return k
   -1   105         elif first > 0:
   -1   106             return first - 5
   -1   107         else:
   -1   108             return first + 5
   -1   109 
   -1   110     def read_symbol(self):
   -1   111         typ, value = self._read_obj()
   -1   112         assert typ == b':', typ
   -1   113         return value
   -1   114 
   -1   115     def read_obj(self):
   -1   116         _, obj = self._read_obj()
   -1   117         return obj
   -1   118 
   -1   119     def read_multidict(self):
   -1   120         # can have non-string keys
   -1   121         result = []
   -1   122         size = self.read_long()
   -1   123         for _ in range(size):
   -1   124             key = self.read_obj()
   -1   125             result.append((key, self.read_obj()))
   -1   126         return result
   -1   127 
   -1   128     def read_dict(self):
   -1   129         result = {}
   -1   130         size = self.read_long()
   -1   131         for _ in range(size):
   -1   132             key = self.read_symbol()
   -1   133             result[key] = self.read_obj()
   -1   134         return result
   -1   135 
   -1   136     def _read_complex_obj(self, typ):
   -1   137         if typ == b'I':
   -1   138             t, obj = self._read_obj()
   -1   139             data = self.read_dict()
   -1   140             if t == b'"' and data == {'E': True}:
   -1   141                 assert obj['type'] == 'Bytes'
   -1   142                 b = base64.b64decode(obj['value'])
   -1   143                 try:
   -1   144                     return b.decode()
   -1   145                 except UnicodeDecodeError:
   -1   146                     return zlib.decompress(b).decode()
   -1   147             return {
   -1   148                 'type': 'Instance',
   -1   149                 'instance': obj,
   -1   150                 'vars': data,
   -1   151             }
   -1   152         elif typ == b'e':
   -1   153             return {
   -1   154                 'type': 'Extended',
   -1   155                 'obj': self.read_obj(),
   -1   156                 'mod': self.read_obj(),  # should be symbol!?
   -1   157             }
   -1   158         elif typ == b'[':
   -1   159             size = self.read_long()
   -1   160             return [self.read_obj() for _ in range(size)]
   -1   161         elif typ == b'l':
   -1   162             sign = self.fh.read(1)
   -1   163             assert sign in [b'+', b'-']
   -1   164             size = self.read_long()
   -1   165             value = 0
   -1   166             for b in reversed(self.fh.read(size * 2)):
   -1   167                 value = value << 8 | b
   -1   168             if sign == b'-':
   -1   169                 value = -value
   -1   170             return value
   -1   171         elif typ == b'"':
   -1   172             size = self.read_long()
   -1   173             return self.read_bytes(size)
   -1   174         elif typ == b'f':
   -1   175             size = self.read_long()
   -1   176             s = self.fh.read(size)
   -1   177             if s == b'inf':
   -1   178                 return math.inf
   -1   179             elif s == b'-inf':
   -1   180                 return -math.inf
   -1   181             elif s == b'nan':
   -1   182                 return math.nan
   -1   183             else:
   -1   184                 return float(s.split(b'\0', 1)[0].decode())
   -1   185         elif typ in [b'c', b'm', b'M']:
   -1   186             size = self.read_long()
   -1   187             return {
   -1   188                 'type': {
   -1   189                     b'c': 'ClassRef',
   -1   190                     b'm': 'ModuleRef',
   -1   191                     b'M': 'ClassRef/ModuleRef',
   -1   192                 }[typ],
   -1   193                 'data': self.read_bytes(size),
   -1   194             }
   -1   195         elif typ in [b'{', b'}']:
   -1   196             return {
   -1   197                 'type': 'Dict',
   -1   198                 'items': self.read_multidict(),
   -1   199                 'default': self.read_obj() if typ == b'}' else None,
   -1   200             }
   -1   201         elif typ in [b'd', b'C', b'U']:
   -1   202             return {
   -1   203                 'type': {
   -1   204                     b'd': 'Data',
   -1   205                     b'C': 'Class',
   -1   206                     b'U': 'User Marshal'
   -1   207                 }[typ],
   -1   208                 'name': self.read_symbol(),
   -1   209                 'data': self.read_obj(),
   -1   210             }
   -1   211         elif typ in [b'o', b'S']:
   -1   212             return {
   -1   213                 'type': {b'o': 'Object', b'S': 'Struct'}[typ],
   -1   214                 'class': self.read_symbol(),
   -1   215                 'data': self.read_dict(),
   -1   216             }
   -1   217         elif typ == b'/':
   -1   218             size = self.read_long()
   -1   219             return {
   -1   220                 'type': 'regex',
   -1   221                 'value': self.fh.read(size).decode(),
   -1   222                 'opts': self.fh.read(1)[0],
   -1   223             }
   -1   224         elif typ == b'u':
   -1   225             name = self.read_symbol()
   -1   226             size = self.read_long()
   -1   227             return {
   -1   228                 'type': 'User Defined',
   -1   229                 'name': name,
   -1   230                 'data': self.read_bytes(size),
   -1   231             }
   -1   232         else:
   -1   233             raise ValueError(typ)
   -1   234 
   -1   235     def _read_obj(self):
   -1   236         typ = self.fh.read(1)
   -1   237         if typ == b'T':
   -1   238             value = True
   -1   239         elif typ == b'F':
   -1   240             value = False
   -1   241         elif typ == b'0':
   -1   242             value = None
   -1   243         elif typ == b'i':
   -1   244             value = self.read_long()
   -1   245         elif typ == b':':
   -1   246             size = self.read_long()
   -1   247             s = self.fh.read(size).decode()
   -1   248             self.symbols.append(s)
   -1   249             value = s
   -1   250         elif typ == b';':
   -1   251             i = self.read_long()
   -1   252             return b':', self.symbols[i]
   -1   253         elif typ == b'@':
   -1   254             i = self.read_long()
   -1   255             value = self.objects[i]
   -1   256         else:
   -1   257             i = len(self.objects)
   -1   258             self.objects.append(None)
   -1   259             value = self._read_complex_obj(typ)
   -1   260             self.objects[i] = value
   -1   261         return typ, value
   -1   262 
   -1   263 
   -1   264 def log(path, i, total):
   -1   265     progress = i * 100 // total
   -1   266     print(f'[{progress: 3}%] {path}')
   -1   267 
   -1   268 
   -1   269 def extract_scripts(root, scripts, i, total):
   -1   270     root.mkdir(parents=True, exist_ok=True)
   -1   271     for j, (id, title, content) in enumerate(scripts):
   -1   272         if not content:
   -1   273             continue
   -1   274         path = root / f'{j:06}_{id}_{title.replace("/", "_")}.rb'
   -1   275         log(path, i, total)
   -1   276         with path.open('w') as fh:
   -1   277             fh.write(content)
   -1   278 
   -1   279 
   -1   280 if __name__ == '__main__':
   -1   281     root = Path(sys.argv[1])
   -1   282 
   -1   283     with open(root / 'Game.rgss3a', 'rb') as fh:
   -1   284         reader = Rgss3aReader(fh)
   -1   285         toc = reader.read_toc()
   -1   286         total = len(toc)
   -1   287 
   -1   288         for i, (name, fi) in enumerate(toc.items()):
   -1   289             path = root / name.replace('\\', '/')
   -1   290             path.parent.mkdir(parents=True, exist_ok=True)
   -1   291 
   -1   292             if path.suffix == '.rvdata2':
   -1   293                 objs = []
   -1   294                 while True:
   -1   295                     r = RubyMarshalReader(fi)
   -1   296                     if not r.check_version():
   -1   297                         break
   -1   298                     objs.append(r.read_obj())
   -1   299 
   -1   300                 if name == 'Data\\Scripts.rvdata2':
   -1   301                     assert len(objs) == 1
   -1   302                     extract_scripts(root / 'Scripts', objs[0], i, total)
   -1   303                 else:
   -1   304                     log(path.with_suffix(".json"), i, total)
   -1   305                     with path.with_suffix('.json').open('w') as fo:
   -1   306                         json.dump(objs, fo, indent=2)
   -1   307             else:
   -1   308                 log(path, i, total)
   -1   309                 with path.open('wb') as fo:
   -1   310                     while chunk := fi.read(4):
   -1   311                         fo.write(chunk)