- commit
- 003e5cc2d9b7d2191966cf689712526a95b6f624
- parent
- 20a163b86366d50397dd32155e6580d59d9537aa
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2026-01-25 17:53
init
Diffstat
| A | README.md | 29 | +++++++++++++++++++++++++++++ |
| A | rpg_extract.py | 311 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 340 insertions, 0 deletions
diff --git a/README.md b/README.md
@@ -0,0 +1,29 @@ -1 1 # Extractor for RPG Maker VX Ace -1 2 -1 3 This is a script to extract files from RGP Maker VX Ace to a more -1 4 human-readbale format. -1 5 -1 6 Specifically: -1 7 -1 8 - Extract files from `Game.rgss3a` -1 9 - Convert rvdata2 files to JSON -1 10 - Extract ruby code from `Data/Scripts.rvdata2` -1 11 -1 12 ## Usage -1 13 -1 14 ``` -1 15 python3 rpg_extract.py /path/to/game/ -1 16 ``` -1 17 -1 18 ## Warning -1 19 -1 20 This script can and will overwrite files. It is recommended to run it one a -1 21 copy of the game folder. -1 22 -1 23 ## Prior Art -1 24 -1 25 - <https://github.com/d9pouces/RubyMarshal.git> -1 26 - <https://petschko.org/tools/mv_decrypter/index.html> -1 27 - <https://github.com/uuksu/RPGMakerDecrypter> -1 28 - <https://gitlab.com/rgss/rgsstool> -1 29 - <https://saveeditor.online/>
diff --git a/rpg_extract.py b/rpg_extract.py
@@ -0,0 +1,311 @@
-1 1 import base64
-1 2 import json
-1 3 import math
-1 4 import struct
-1 5 import sys
-1 6 import zlib
-1 7 from pathlib import Path
-1 8
-1 9
-1 10 class IterFile:
-1 11 def __init__(self, iterator):
-1 12 self.iterator = iterator
-1 13 self.buf = b''
-1 14
-1 15 def read(self, size):
-1 16 try:
-1 17 while size < 0 or len(self.buf) < size:
-1 18 self.buf += next(self.iterator)
-1 19 except StopIteration:
-1 20 pass
-1 21 result = self.buf[:size]
-1 22 self.buf = self.buf[size:]
-1 23 return result
-1 24
-1 25
-1 26 class Rgss3aReader:
-1 27 def __init__(self, fh):
-1 28 self.fh = fh
-1 29
-1 30 def read_int32(self):
-1 31 return struct.unpack('I', self.fh.read(4))[0]
-1 32
-1 33 def decrypt_bytes(self, size, key):
-1 34 raw = self.fh.read(size)
-1 35 k = struct.pack('I', key)
-1 36 return bytes([b ^ k[i % 4] for i, b in enumerate(raw)])
-1 37
-1 38 def iter_chunks(self, offset, size, initial_key):
-1 39 pos = 0
-1 40 key = initial_key
-1 41
-1 42 while True:
-1 43 n = min(4, size - pos)
-1 44 if n == 0:
-1 45 break
-1 46 pos += n
-1 47
-1 48 yield self.decrypt_bytes(n, key)
-1 49 key = (key * 7 + 3) % (1 << 32)
-1 50
-1 51 def read_toc(self):
-1 52 assert self.fh.read(7) == b'RGSSAD\0'
-1 53 assert self.fh.read(1) == b'\x03'
-1 54 key = self.read_int32() * 9 + 3
-1 55
-1 56 toc = {}
-1 57 while True:
-1 58 offset = self.read_int32() ^ key
-1 59 size = self.read_int32() ^ key
-1 60 file_key = self.read_int32() ^ key
-1 61 name_size = self.read_int32() ^ key
-1 62 if offset == 0:
-1 63 break
-1 64 name = self.decrypt_bytes(name_size, key).decode('utf-8')
-1 65 toc[name] = IterFile(self.iter_chunks(offset, size, file_key))
-1 66 return toc
-1 67
-1 68
-1 69 class RubyMarshalReader:
-1 70 # https://docs.ruby-lang.org/en/2.2.0/marshal_rdoc.html
-1 71
-1 72 def __init__(self, fh):
-1 73 self.fh = fh
-1 74 self.symbols = []
-1 75 self.objects = []
-1 76
-1 77 def check_version(self):
-1 78 version = self.fh.read(2)
-1 79 if version:
-1 80 assert version[0] == 4
-1 81 assert version[1] <= 8
-1 82 return True
-1 83 else:
-1 84 return False
-1 85
-1 86 def read_bytes(self, size):
-1 87 b = self.fh.read(size)
-1 88 return {
-1 89 'type': 'Bytes',
-1 90 'value': base64.b64encode(b).decode('ascii'),
-1 91 }
-1 92
-1 93 def read_long(self):
-1 94 first = struct.unpack('b', self.fh.read(1))[0]
-1 95
-1 96 if first == 0:
-1 97 return 0
-1 98 elif -4 <= first <= 4:
-1 99 k = 0
-1 100 for b in reversed(self.fh.read(abs(first))):
-1 101 k = k << 8 | b
-1 102 if first < 0:
-1 103 k -= 1 << (8 * abs(first))
-1 104 return k
-1 105 elif first > 0:
-1 106 return first - 5
-1 107 else:
-1 108 return first + 5
-1 109
-1 110 def read_symbol(self):
-1 111 typ, value = self._read_obj()
-1 112 assert typ == b':', typ
-1 113 return value
-1 114
-1 115 def read_obj(self):
-1 116 _, obj = self._read_obj()
-1 117 return obj
-1 118
-1 119 def read_multidict(self):
-1 120 # can have non-string keys
-1 121 result = []
-1 122 size = self.read_long()
-1 123 for _ in range(size):
-1 124 key = self.read_obj()
-1 125 result.append((key, self.read_obj()))
-1 126 return result
-1 127
-1 128 def read_dict(self):
-1 129 result = {}
-1 130 size = self.read_long()
-1 131 for _ in range(size):
-1 132 key = self.read_symbol()
-1 133 result[key] = self.read_obj()
-1 134 return result
-1 135
-1 136 def _read_complex_obj(self, typ):
-1 137 if typ == b'I':
-1 138 t, obj = self._read_obj()
-1 139 data = self.read_dict()
-1 140 if t == b'"' and data == {'E': True}:
-1 141 assert obj['type'] == 'Bytes'
-1 142 b = base64.b64decode(obj['value'])
-1 143 try:
-1 144 return b.decode()
-1 145 except UnicodeDecodeError:
-1 146 return zlib.decompress(b).decode()
-1 147 return {
-1 148 'type': 'Instance',
-1 149 'instance': obj,
-1 150 'vars': data,
-1 151 }
-1 152 elif typ == b'e':
-1 153 return {
-1 154 'type': 'Extended',
-1 155 'obj': self.read_obj(),
-1 156 'mod': self.read_obj(), # should be symbol!?
-1 157 }
-1 158 elif typ == b'[':
-1 159 size = self.read_long()
-1 160 return [self.read_obj() for _ in range(size)]
-1 161 elif typ == b'l':
-1 162 sign = self.fh.read(1)
-1 163 assert sign in [b'+', b'-']
-1 164 size = self.read_long()
-1 165 value = 0
-1 166 for b in reversed(self.fh.read(size * 2)):
-1 167 value = value << 8 | b
-1 168 if sign == b'-':
-1 169 value = -value
-1 170 return value
-1 171 elif typ == b'"':
-1 172 size = self.read_long()
-1 173 return self.read_bytes(size)
-1 174 elif typ == b'f':
-1 175 size = self.read_long()
-1 176 s = self.fh.read(size)
-1 177 if s == b'inf':
-1 178 return math.inf
-1 179 elif s == b'-inf':
-1 180 return -math.inf
-1 181 elif s == b'nan':
-1 182 return math.nan
-1 183 else:
-1 184 return float(s.split(b'\0', 1)[0].decode())
-1 185 elif typ in [b'c', b'm', b'M']:
-1 186 size = self.read_long()
-1 187 return {
-1 188 'type': {
-1 189 b'c': 'ClassRef',
-1 190 b'm': 'ModuleRef',
-1 191 b'M': 'ClassRef/ModuleRef',
-1 192 }[typ],
-1 193 'data': self.read_bytes(size),
-1 194 }
-1 195 elif typ in [b'{', b'}']:
-1 196 return {
-1 197 'type': 'Dict',
-1 198 'items': self.read_multidict(),
-1 199 'default': self.read_obj() if typ == b'}' else None,
-1 200 }
-1 201 elif typ in [b'd', b'C', b'U']:
-1 202 return {
-1 203 'type': {
-1 204 b'd': 'Data',
-1 205 b'C': 'Class',
-1 206 b'U': 'User Marshal'
-1 207 }[typ],
-1 208 'name': self.read_symbol(),
-1 209 'data': self.read_obj(),
-1 210 }
-1 211 elif typ in [b'o', b'S']:
-1 212 return {
-1 213 'type': {b'o': 'Object', b'S': 'Struct'}[typ],
-1 214 'class': self.read_symbol(),
-1 215 'data': self.read_dict(),
-1 216 }
-1 217 elif typ == b'/':
-1 218 size = self.read_long()
-1 219 return {
-1 220 'type': 'regex',
-1 221 'value': self.fh.read(size).decode(),
-1 222 'opts': self.fh.read(1)[0],
-1 223 }
-1 224 elif typ == b'u':
-1 225 name = self.read_symbol()
-1 226 size = self.read_long()
-1 227 return {
-1 228 'type': 'User Defined',
-1 229 'name': name,
-1 230 'data': self.read_bytes(size),
-1 231 }
-1 232 else:
-1 233 raise ValueError(typ)
-1 234
-1 235 def _read_obj(self):
-1 236 typ = self.fh.read(1)
-1 237 if typ == b'T':
-1 238 value = True
-1 239 elif typ == b'F':
-1 240 value = False
-1 241 elif typ == b'0':
-1 242 value = None
-1 243 elif typ == b'i':
-1 244 value = self.read_long()
-1 245 elif typ == b':':
-1 246 size = self.read_long()
-1 247 s = self.fh.read(size).decode()
-1 248 self.symbols.append(s)
-1 249 value = s
-1 250 elif typ == b';':
-1 251 i = self.read_long()
-1 252 return b':', self.symbols[i]
-1 253 elif typ == b'@':
-1 254 i = self.read_long()
-1 255 value = self.objects[i]
-1 256 else:
-1 257 i = len(self.objects)
-1 258 self.objects.append(None)
-1 259 value = self._read_complex_obj(typ)
-1 260 self.objects[i] = value
-1 261 return typ, value
-1 262
-1 263
-1 264 def log(path, i, total):
-1 265 progress = i * 100 // total
-1 266 print(f'[{progress: 3}%] {path}')
-1 267
-1 268
-1 269 def extract_scripts(root, scripts, i, total):
-1 270 root.mkdir(parents=True, exist_ok=True)
-1 271 for j, (id, title, content) in enumerate(scripts):
-1 272 if not content:
-1 273 continue
-1 274 path = root / f'{j:06}_{id}_{title.replace("/", "_")}.rb'
-1 275 log(path, i, total)
-1 276 with path.open('w') as fh:
-1 277 fh.write(content)
-1 278
-1 279
-1 280 if __name__ == '__main__':
-1 281 root = Path(sys.argv[1])
-1 282
-1 283 with open(root / 'Game.rgss3a', 'rb') as fh:
-1 284 reader = Rgss3aReader(fh)
-1 285 toc = reader.read_toc()
-1 286 total = len(toc)
-1 287
-1 288 for i, (name, fi) in enumerate(toc.items()):
-1 289 path = root / name.replace('\\', '/')
-1 290 path.parent.mkdir(parents=True, exist_ok=True)
-1 291
-1 292 if path.suffix == '.rvdata2':
-1 293 objs = []
-1 294 while True:
-1 295 r = RubyMarshalReader(fi)
-1 296 if not r.check_version():
-1 297 break
-1 298 objs.append(r.read_obj())
-1 299
-1 300 if name == 'Data\\Scripts.rvdata2':
-1 301 assert len(objs) == 1
-1 302 extract_scripts(root / 'Scripts', objs[0], i, total)
-1 303 else:
-1 304 log(path.with_suffix(".json"), i, total)
-1 305 with path.with_suffix('.json').open('w') as fo:
-1 306 json.dump(objs, fo, indent=2)
-1 307 else:
-1 308 log(path, i, total)
-1 309 with path.open('wb') as fo:
-1 310 while chunk := fi.read(4):
-1 311 fo.write(chunk)