mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Initial gen6-to-yaml ripping stuff
This commit is contained in:
parent
54ea67a804
commit
949eafb957
9 changed files with 1841 additions and 0 deletions
0
pokedex/extract/__init__.py
Normal file
0
pokedex/extract/__init__.py
Normal file
0
pokedex/extract/lib/__init__.py
Normal file
0
pokedex/extract/lib/__init__.py
Normal file
87
pokedex/extract/lib/base.py
Normal file
87
pokedex/extract/lib/base.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
"""Base or helper classes used a lot for dealing with file formats.
|
||||||
|
"""
|
||||||
|
import io
|
||||||
|
import struct
|
||||||
|
|
||||||
|
|
||||||
|
class Substream:
|
||||||
|
"""Wraps a stream and pretends it starts at an offset other than 0.
|
||||||
|
|
||||||
|
Partly implements the file interface.
|
||||||
|
|
||||||
|
This type always seeks before reading, but doesn't do so afterwards, so
|
||||||
|
interleaving reads with the underlying stream may not do what you want.
|
||||||
|
"""
|
||||||
|
def __init__(self, stream, offset=0, length=-1):
|
||||||
|
if isinstance(stream, Substream):
|
||||||
|
self.stream = stream.stream
|
||||||
|
self.offset = offset + stream.offset
|
||||||
|
else:
|
||||||
|
self.stream = stream
|
||||||
|
self.offset = offset
|
||||||
|
|
||||||
|
self.length = length
|
||||||
|
self.pos = 0
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<{} of {} at {}>".format(
|
||||||
|
type(self).__name__, self.stream, self.offset)
|
||||||
|
|
||||||
|
def read(self, n=-1):
|
||||||
|
self.stream.seek(self.offset + self.pos)
|
||||||
|
if n < 0:
|
||||||
|
n = self.length
|
||||||
|
elif self.length >= 0 and n > self.length:
|
||||||
|
n = self.length
|
||||||
|
data = self.stream.read(n)
|
||||||
|
self.pos += len(data)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def seek(self, offset):
|
||||||
|
offset = max(offset, 0)
|
||||||
|
if self.length >= 0:
|
||||||
|
offset = min(offset, self.length)
|
||||||
|
self.stream.seek(self.offset + offset)
|
||||||
|
self.pos = self.tell()
|
||||||
|
|
||||||
|
def tell(self):
|
||||||
|
return self.stream.tell() - self.offset
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
if self.length < 0:
|
||||||
|
pos = self.stream.tell()
|
||||||
|
self.stream.seek(0, io.SEEK_END)
|
||||||
|
parent_length = self.stream.tell()
|
||||||
|
self.stream.seek(pos)
|
||||||
|
return parent_length - self.offset
|
||||||
|
else:
|
||||||
|
return self.length
|
||||||
|
|
||||||
|
def peek(self, n):
|
||||||
|
pos = self.stream.tell()
|
||||||
|
self.stream.seek(self.offset + self.pos)
|
||||||
|
data = self.stream.read(n)
|
||||||
|
self.stream.seek(pos)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def unpack(self, fmt):
|
||||||
|
"""Unpacks a struct format from the current position in the stream."""
|
||||||
|
data = self.read(struct.calcsize(fmt))
|
||||||
|
return struct.unpack(fmt, data)
|
||||||
|
|
||||||
|
def slice(self, offset, length=-1):
|
||||||
|
# TODO limit or warn if length is too long for this slice?
|
||||||
|
return Substream(self, self.offset + offset, length)
|
||||||
|
|
||||||
|
|
||||||
|
class _ContainerFile:
|
||||||
|
slices = ()
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.slices)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.slices)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self.slices[key]
|
182
pokedex/extract/lib/clim.py
Normal file
182
pokedex/extract/lib/clim.py
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
import math
|
||||||
|
import struct
|
||||||
|
|
||||||
|
import construct as c
|
||||||
|
|
||||||
|
clim_header_struct = c.Struct(
|
||||||
|
'clim_header',
|
||||||
|
c.Magic(b'CLIM'),
|
||||||
|
c.Const(c.ULInt16('endianness'), 0xfeff),
|
||||||
|
c.Const(c.ULInt16('header_length'), 0x14),
|
||||||
|
c.ULInt32('version'),
|
||||||
|
c.ULInt32('file_size'),
|
||||||
|
c.ULInt32('blocks_ct'),
|
||||||
|
)
|
||||||
|
imag_header_struct = c.Struct(
|
||||||
|
'imag_header',
|
||||||
|
c.Magic(b'imag'),
|
||||||
|
c.Const(c.ULInt32('section_length'), 0x10),
|
||||||
|
c.ULInt16('width'),
|
||||||
|
c.ULInt16('height'),
|
||||||
|
c.Enum(
|
||||||
|
c.ULInt32('format'),
|
||||||
|
L8=0,
|
||||||
|
A8=1,
|
||||||
|
LA4=2,
|
||||||
|
LA8=3,
|
||||||
|
HILO8=4,
|
||||||
|
RGB565=5,
|
||||||
|
RGB8=6,
|
||||||
|
RGBA5551=7,
|
||||||
|
RGBA4=8,
|
||||||
|
RGBA8=9,
|
||||||
|
ETC1=10,
|
||||||
|
ETC1A4=11,
|
||||||
|
L4=12,
|
||||||
|
A4=13,
|
||||||
|
#ETC1=19,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
COLOR_DECODERS = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _register_color_decoder(name, *, bpp, depth):
|
||||||
|
def register(f):
|
||||||
|
COLOR_DECODERS[name] = f, bpp, depth
|
||||||
|
return f
|
||||||
|
return register
|
||||||
|
|
||||||
|
|
||||||
|
@_register_color_decoder('RGBA4', bpp=2, depth=4)
|
||||||
|
def decode_rgba4(data):
|
||||||
|
# The idea is that every uint16 is a packed rrrrggggbbbbaaaa, but when
|
||||||
|
# written out little-endian this becomes bbbbaaaarrrrgggg and there's just
|
||||||
|
# no pretty way to deal with this
|
||||||
|
for i in range(0, len(data), 2):
|
||||||
|
ba = data[i]
|
||||||
|
rg = data[i + 1]
|
||||||
|
r = (((rg & 0xf0) >> 4) * 255 + 7) // 15
|
||||||
|
g = (((rg & 0x0f) >> 0) * 255 + 7) // 15
|
||||||
|
b = (((ba & 0xf0) >> 4) * 255 + 7) // 15
|
||||||
|
a = (((ba & 0x0f) >> 0) * 255 + 7) // 15
|
||||||
|
yield r, g, b, a
|
||||||
|
|
||||||
|
|
||||||
|
@_register_color_decoder('RGBA5551', bpp=2, depth=5)
|
||||||
|
def decode_rgba5551(data, *, start=0, count=None):
|
||||||
|
# I am extremely irritated that construct cannot parse this mess for me
|
||||||
|
# rrrrrgggggbbbbba
|
||||||
|
if count is None:
|
||||||
|
end = len(data)
|
||||||
|
else:
|
||||||
|
end = start + count * 2
|
||||||
|
|
||||||
|
for i in range(start, end, 2):
|
||||||
|
datum = data[i] + data[i + 1] * 256
|
||||||
|
r = (((datum >> 11) & 0x1f) * 255 + 15) // 31
|
||||||
|
g = (((datum >> 6) & 0x1f) * 255 + 15) // 31
|
||||||
|
b = (((datum >> 1) & 0x1f) * 255 + 15) // 31
|
||||||
|
a = (datum & 0x1) * 255
|
||||||
|
yield r, g, b, a
|
||||||
|
|
||||||
|
|
||||||
|
del _register_color_decoder
|
||||||
|
|
||||||
|
|
||||||
|
def apply_palette(palette, data, *, start=0):
|
||||||
|
# TODO i am annoyed that this does a pointless copy, but i assume islice()
|
||||||
|
# has even more overhead...
|
||||||
|
if start != 0:
|
||||||
|
data = data[start:]
|
||||||
|
|
||||||
|
if len(palette) <= 16:
|
||||||
|
# Short palettes allow cramming two pixels into each byte
|
||||||
|
return (
|
||||||
|
palette[idx]
|
||||||
|
for byte in data
|
||||||
|
for idx in (byte >> 4, byte & 0x0f)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return map(palette.__getitem__, data)
|
||||||
|
|
||||||
|
|
||||||
|
def untile_pixels(raw_pixels, width, height):
|
||||||
|
"""Unscramble pixels into plain old rows.
|
||||||
|
|
||||||
|
The pixels are arranged in 8×8 tiles, and each tile is a third-
|
||||||
|
iteration Z-order curve.
|
||||||
|
|
||||||
|
Taken from: https://github.com/Zhorken/pokemon-x-y-icons/
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Images are stored padded to powers of two
|
||||||
|
stored_width = 2 ** math.ceil(math.log(width) / math.log(2))
|
||||||
|
stored_height = 2 ** math.ceil(math.log(height) / math.log(2))
|
||||||
|
num_pixels = stored_width * stored_height
|
||||||
|
tile_width = stored_width // 8
|
||||||
|
|
||||||
|
pixels = [
|
||||||
|
[None for x in range(width)]
|
||||||
|
for y in range(height)
|
||||||
|
]
|
||||||
|
|
||||||
|
for n, pixel in enumerate(raw_pixels):
|
||||||
|
if n >= num_pixels:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Find the coordinates of the top-left corner of the current tile.
|
||||||
|
# n.b. The image is eight tiles wide, and each tile is 8×8 pixels.
|
||||||
|
tile_num = n // 64
|
||||||
|
tile_y = tile_num // tile_width * 8
|
||||||
|
tile_x = tile_num % tile_width * 8
|
||||||
|
|
||||||
|
# Determine the pixel's coordinates within the tile
|
||||||
|
# http://en.wikipedia.org/wiki/Z-order_curve#Coordinate_values
|
||||||
|
within_tile = n % 64
|
||||||
|
|
||||||
|
sub_x = (
|
||||||
|
(within_tile & 0b000001) |
|
||||||
|
(within_tile & 0b000100) >> 1 |
|
||||||
|
(within_tile & 0b010000) >> 2
|
||||||
|
)
|
||||||
|
sub_y = (
|
||||||
|
(within_tile & 0b000010) >> 1 |
|
||||||
|
(within_tile & 0b001000) >> 2 |
|
||||||
|
(within_tile & 0b100000) >> 3
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add up the pixel's coordinates within the whole image
|
||||||
|
x = tile_x + sub_x
|
||||||
|
y = tile_y + sub_y
|
||||||
|
|
||||||
|
if x < width and y < height:
|
||||||
|
pixels[y][x] = pixel
|
||||||
|
|
||||||
|
return pixels
|
||||||
|
|
||||||
|
|
||||||
|
def decode_clim(data):
|
||||||
|
imag_header = imag_header_struct.parse(data[-20:])
|
||||||
|
if imag_header.format not in COLOR_DECODERS:
|
||||||
|
raise ValueError(
|
||||||
|
"don't know how to decode {} pixels".format(imag_header.format))
|
||||||
|
color_decoder, color_bpp, color_depth = COLOR_DECODERS[imag_header.format]
|
||||||
|
|
||||||
|
mode, = struct.unpack_from('<H', data, 0)
|
||||||
|
if mode == 2:
|
||||||
|
# Paletted
|
||||||
|
palette_length, = struct.unpack_from('<H', data, 2)
|
||||||
|
palette = list(color_decoder(data, start=4, count=palette_length))
|
||||||
|
data_start = 4 + palette_length * color_bpp
|
||||||
|
scrambled_pixels = apply_palette(palette, data[data_start:])
|
||||||
|
else:
|
||||||
|
scrambled_pixels = color_decoder(data)
|
||||||
|
|
||||||
|
pixels = untile_pixels(
|
||||||
|
scrambled_pixels,
|
||||||
|
imag_header.width,
|
||||||
|
imag_header.height,
|
||||||
|
)
|
||||||
|
return imag_header.width, imag_header.height, color_depth, pixels
|
307
pokedex/extract/lib/garc.py
Normal file
307
pokedex/extract/lib/garc.py
Normal file
|
@ -0,0 +1,307 @@
|
||||||
|
"""Support for reading the GARC generic container format used in the 3DS
|
||||||
|
filesystem.
|
||||||
|
|
||||||
|
Based on code by Zhorken: https://github.com/Zhorken/pokemon-x-y-icons
|
||||||
|
and Kaphotics: https://github.com/kwsch/GARCTool
|
||||||
|
"""
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
import struct
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import construct as c
|
||||||
|
|
||||||
|
from . import lzss3
|
||||||
|
from .base import _ContainerFile, Substream
|
||||||
|
from .pc import PokemonContainerFile
|
||||||
|
|
||||||
|
|
||||||
|
def count_bits(n):
|
||||||
|
c = 0
|
||||||
|
while n:
|
||||||
|
c += n & 1
|
||||||
|
n >>= 1
|
||||||
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
garc_header_struct = c.Struct(
|
||||||
|
'garc_header',
|
||||||
|
c.Magic(b'CRAG'),
|
||||||
|
c.Const(c.ULInt32('header_size'), 0x1c),
|
||||||
|
c.Const(c.ULInt16('byte_order'), 0xfeff),
|
||||||
|
c.Const(c.ULInt16('mystery1'), 0x0400),
|
||||||
|
c.Const(c.ULInt32('chunks_ct'), 4),
|
||||||
|
c.ULInt32('data_offset'),
|
||||||
|
c.ULInt32('garc_length'),
|
||||||
|
c.ULInt32('last_length'),
|
||||||
|
)
|
||||||
|
fato_header_struct = c.Struct(
|
||||||
|
'fato_header',
|
||||||
|
c.Magic(b'OTAF'),
|
||||||
|
c.ULInt32('header_size'),
|
||||||
|
c.ULInt16('count'),
|
||||||
|
c.Const(c.ULInt16('padding'), 0xffff),
|
||||||
|
c.Array(
|
||||||
|
lambda ctx: ctx.count,
|
||||||
|
c.ULInt32('fatb_offsets'),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
fatb_header_struct = c.Struct(
|
||||||
|
'fatb_header',
|
||||||
|
c.Magic(b'BTAF'),
|
||||||
|
c.ULInt32('fatb_length'),
|
||||||
|
c.ULInt32('count'),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GARCFile(_ContainerFile):
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.stream = stream = Substream(stream)
|
||||||
|
|
||||||
|
garc_header = garc_header_struct.parse_stream(self.stream)
|
||||||
|
# FATO (file allocation table... offsets?)
|
||||||
|
fato_header = fato_header_struct.parse_stream(self.stream)
|
||||||
|
# FATB (file allocation table)
|
||||||
|
fatb_header = fatb_header_struct.parse_stream(self.stream)
|
||||||
|
|
||||||
|
fatb_start = garc_header.header_size + fato_header.header_size
|
||||||
|
assert stream.tell() == fatb_start + 12
|
||||||
|
|
||||||
|
self.slices = []
|
||||||
|
for i, offset in enumerate(fato_header.fatb_offsets):
|
||||||
|
stream.seek(fatb_start + offset + 12)
|
||||||
|
|
||||||
|
slices = []
|
||||||
|
bits, = struct.unpack('<L', stream.read(4))
|
||||||
|
while bits:
|
||||||
|
if bits & 1:
|
||||||
|
start, end, length = struct.unpack('<3L', stream.read(12))
|
||||||
|
assert end - 4 < start + length <= end
|
||||||
|
slices.append((garc_header.data_offset + start, length))
|
||||||
|
bits >>= 1
|
||||||
|
|
||||||
|
self.slices.append(GARCEntry(stream, slices))
|
||||||
|
|
||||||
|
# FIMB
|
||||||
|
stream.seek(fatb_start + fatb_header.fatb_length)
|
||||||
|
magic, fimb_header_length, fimb_length = struct.unpack(
|
||||||
|
'<4s2L', stream.read(12))
|
||||||
|
assert magic == b'BMIF'
|
||||||
|
assert fimb_header_length == 0xC
|
||||||
|
|
||||||
|
|
||||||
|
class GARCEntry(object):
|
||||||
|
def __init__(self, stream, slices):
|
||||||
|
self.stream = stream
|
||||||
|
self.slices = slices
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
start, length = self.slices[i]
|
||||||
|
ss = self.stream.slice(start, length)
|
||||||
|
if ss.peek(1) in [b'\x10', b'\x11']:
|
||||||
|
# XXX this sucks but there's no real way to know for sure whether
|
||||||
|
# data is compressed or not. maybe just bake this into the caller
|
||||||
|
# and let them deal with it, same way we do with text decoding?
|
||||||
|
# TODO it would be nice if this could be done lazily for 'inspect'
|
||||||
|
# purposes, since the first four bytes are enough to tell you the
|
||||||
|
# size
|
||||||
|
try:
|
||||||
|
data = lzss3.decompress_bytes(ss.read())
|
||||||
|
except Exception:
|
||||||
|
ss.seek(0)
|
||||||
|
else:
|
||||||
|
return Substream(BytesIO(data))
|
||||||
|
return ss
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.slices)
|
||||||
|
|
||||||
|
|
||||||
|
XY_CHAR_MAP = {
|
||||||
|
0x307f: 0x202f, # nbsp
|
||||||
|
0xe08d: 0x2026, # ellipsis
|
||||||
|
0xe08e: 0x2642, # female sign
|
||||||
|
0xe08f: 0x2640, # male sign
|
||||||
|
}
|
||||||
|
|
||||||
|
XY_VAR_NAMES = {
|
||||||
|
0xff00: "COLOR",
|
||||||
|
0x0100: "TRNAME",
|
||||||
|
0x0101: "PKNAME",
|
||||||
|
0x0102: "PKNICK",
|
||||||
|
0x0103: "TYPE",
|
||||||
|
0x0105: "LOCATION",
|
||||||
|
0x0106: "ABILITY",
|
||||||
|
0x0107: "MOVE",
|
||||||
|
0x0108: "ITEM1",
|
||||||
|
0x0109: "ITEM2",
|
||||||
|
0x010a: "sTRBAG",
|
||||||
|
0x010b: "BOX",
|
||||||
|
0x010d: "EVSTAT",
|
||||||
|
0x0110: "OPOWER",
|
||||||
|
0x0127: "RIBBON",
|
||||||
|
0x0134: "MIINAME",
|
||||||
|
0x013e: "WEATHER",
|
||||||
|
0x0189: "TRNICK",
|
||||||
|
0x018a: "1stchrTR",
|
||||||
|
0x018b: "SHOUTOUT",
|
||||||
|
0x018e: "BERRY",
|
||||||
|
0x018f: "REMFEEL",
|
||||||
|
0x0190: "REMQUAL",
|
||||||
|
0x0191: "WEBSITE",
|
||||||
|
0x019c: "CHOICECOS",
|
||||||
|
0x01a1: "GSYNCID",
|
||||||
|
0x0192: "PRVIDSAY",
|
||||||
|
0x0193: "BTLTEST",
|
||||||
|
0x0195: "GENLOC",
|
||||||
|
0x0199: "CHOICEFOOD",
|
||||||
|
0x019a: "HOTELITEM",
|
||||||
|
0x019b: "TAXISTOP",
|
||||||
|
0x019f: "MAISTITLE",
|
||||||
|
0x1000: "ITEMPLUR0",
|
||||||
|
0x1001: "ITEMPLUR1",
|
||||||
|
0x1100: "GENDBR",
|
||||||
|
0x1101: "NUMBRNCH",
|
||||||
|
0x1302: "iCOLOR2",
|
||||||
|
0x1303: "iCOLOR3",
|
||||||
|
0x0200: "NUM1",
|
||||||
|
0x0201: "NUM2",
|
||||||
|
0x0202: "NUM3",
|
||||||
|
0x0203: "NUM4",
|
||||||
|
0x0204: "NUM5",
|
||||||
|
0x0205: "NUM6",
|
||||||
|
0x0206: "NUM7",
|
||||||
|
0x0207: "NUM8",
|
||||||
|
0x0208: "NUM9",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _xy_inner_keygen(key):
|
||||||
|
while True:
|
||||||
|
yield key
|
||||||
|
key = ((key << 3) | (key >> 13)) & 0xffff
|
||||||
|
|
||||||
|
|
||||||
|
def _xy_outer_keygen():
|
||||||
|
key = 0x7c89
|
||||||
|
while True:
|
||||||
|
yield _xy_inner_keygen(key)
|
||||||
|
key = (key + 0x2983) & 0xffff
|
||||||
|
|
||||||
|
|
||||||
|
def decrypt_xy_text(data):
|
||||||
|
text_sections, lines, length, initial_key, section_data = struct.unpack_from(
|
||||||
|
'<HHLLl', data)
|
||||||
|
|
||||||
|
outer_keygen = _xy_outer_keygen()
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for i in range(lines):
|
||||||
|
keygen = next(outer_keygen)
|
||||||
|
s = []
|
||||||
|
offset, length = struct.unpack_from('<lh', data, i * 8 + section_data + 4)
|
||||||
|
offset += section_data
|
||||||
|
start = offset
|
||||||
|
characters = []
|
||||||
|
for ech in struct.unpack_from("<{}H".format(length), data, offset):
|
||||||
|
characters.append(ech ^ next(keygen))
|
||||||
|
|
||||||
|
chiter = iter(characters)
|
||||||
|
for c in chiter:
|
||||||
|
if c == 0:
|
||||||
|
break
|
||||||
|
elif c == 0x10:
|
||||||
|
# Goofy variable thing
|
||||||
|
length = next(chiter)
|
||||||
|
typ = next(chiter)
|
||||||
|
if typ == 0xbe00:
|
||||||
|
# Pause, then scroll
|
||||||
|
s.append('\r')
|
||||||
|
elif typ == 0xbe01:
|
||||||
|
# Pause, then clear screen
|
||||||
|
s.append('\f')
|
||||||
|
elif typ == 0xbe02:
|
||||||
|
# Pause for some amount of time?
|
||||||
|
s.append("{{pause:{}}}".format(next(chiter)))
|
||||||
|
elif typ == 0xbdff:
|
||||||
|
# Empty text line? Includes line number, maybe for finding unused lines?
|
||||||
|
s.append("{{blank:{}}}".format(next(chiter)))
|
||||||
|
else:
|
||||||
|
s.append("{{{}:{}}}".format(
|
||||||
|
XY_VAR_NAMES.get(typ, "{:04x}".format(typ)),
|
||||||
|
','.join(str(next(chiter)) for _ in range(length - 1)),
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
s.append(chr(XY_CHAR_MAP.get(c, c)))
|
||||||
|
|
||||||
|
ret.append(''.join(s))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
parser = make_arg_parser()
|
||||||
|
args = parser.parse_args(args)
|
||||||
|
args.cb(args)
|
||||||
|
|
||||||
|
|
||||||
|
def do_inspect(args):
|
||||||
|
with open(args.path, 'rb') as f:
|
||||||
|
garc = GARCFile(f)
|
||||||
|
for i, topfile in enumerate(garc):
|
||||||
|
print("File #{}, {} entr{}".format(
|
||||||
|
i, len(topfile), 'y' if len(topfile) == 1 else 'ies'))
|
||||||
|
for j, subfile in enumerate(topfile):
|
||||||
|
print(' ', j, len(subfile), end='')
|
||||||
|
if subfile.peek(2) == b'PC':
|
||||||
|
print(" -- appears to be a PC file (generic container)")
|
||||||
|
pcfile = PokemonContainerFile(subfile)
|
||||||
|
for k, entry in enumerate(pcfile):
|
||||||
|
print(' ', repr(entry.read(50)))
|
||||||
|
else:
|
||||||
|
print('', repr(subfile.read(50)))
|
||||||
|
|
||||||
|
|
||||||
|
def do_extract(args):
|
||||||
|
with open(args.path, 'rb') as f:
|
||||||
|
garc = GARCFile(f)
|
||||||
|
# TODO shouldn't path really be a directory, so you can mass-extract everything? do i want to do that ever?
|
||||||
|
# TODO actually respect mode, fileno, entryno
|
||||||
|
for i, topfile in enumerate(garc):
|
||||||
|
# TODO i guess this should be a list, or??
|
||||||
|
if args.fileno is not all and args.fileno != i:
|
||||||
|
continue
|
||||||
|
for j, subfile in enumerate(topfile):
|
||||||
|
# TODO auto-detect extension, maybe? depending on mode?
|
||||||
|
outfile = Path("{}-{}-{}".format(args.out, i, j))
|
||||||
|
with outfile.open('wb') as g:
|
||||||
|
# TODO should use copyfileobj
|
||||||
|
g.write(subfile.read())
|
||||||
|
print("wrote", outfile)
|
||||||
|
|
||||||
|
|
||||||
|
def make_arg_parser():
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
p = ArgumentParser()
|
||||||
|
sp = p.add_subparsers(metavar='command')
|
||||||
|
|
||||||
|
inspect_p = sp.add_parser('inspect', help='examine a particular file')
|
||||||
|
inspect_p.set_defaults(cb=do_inspect)
|
||||||
|
inspect_p.add_argument('path', help='relative path to a game file')
|
||||||
|
inspect_p.add_argument('mode', nargs='?', default='shorthex')
|
||||||
|
inspect_p.add_argument('fileno', nargs='?', default=all)
|
||||||
|
inspect_p.add_argument('entryno', nargs='?', default=all)
|
||||||
|
|
||||||
|
extract_p = sp.add_parser('extract', help='extract contents of a file')
|
||||||
|
extract_p.set_defaults(cb=do_extract)
|
||||||
|
extract_p.add_argument('path', help='relative path to a game file')
|
||||||
|
extract_p.add_argument('out', help='filename to use for extraction')
|
||||||
|
extract_p.add_argument('mode', nargs='?', default='raw')
|
||||||
|
extract_p.add_argument('fileno', nargs='?', default=all)
|
||||||
|
extract_p.add_argument('entryno', nargs='?', default=all)
|
||||||
|
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(sys.argv[1:])
|
287
pokedex/extract/lib/lzss3.py
Normal file
287
pokedex/extract/lib/lzss3.py
Normal file
|
@ -0,0 +1,287 @@
|
||||||
|
"""Support for the LZSS compression format.
|
||||||
|
|
||||||
|
Taken from magical's nlzss project: https://github.com/magical/nlzss
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import division
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from sys import stdin, stderr, exit
|
||||||
|
from os import SEEK_SET, SEEK_CUR, SEEK_END
|
||||||
|
from errno import EPIPE
|
||||||
|
from struct import pack, unpack
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ('decompress', 'decompress_file', 'decompress_bytes',
|
||||||
|
'decompress_overlay', 'DecompressionError')
|
||||||
|
|
||||||
|
|
||||||
|
class DecompressionError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def bits(byte):
|
||||||
|
return ((byte >> 7) & 1,
|
||||||
|
(byte >> 6) & 1,
|
||||||
|
(byte >> 5) & 1,
|
||||||
|
(byte >> 4) & 1,
|
||||||
|
(byte >> 3) & 1,
|
||||||
|
(byte >> 2) & 1,
|
||||||
|
(byte >> 1) & 1,
|
||||||
|
(byte) & 1)
|
||||||
|
|
||||||
|
|
||||||
|
def decompress_raw_lzss10(indata, decompressed_size, _overlay=False):
|
||||||
|
"""Decompress LZSS-compressed bytes. Returns a bytearray."""
|
||||||
|
data = bytearray()
|
||||||
|
|
||||||
|
it = iter(indata)
|
||||||
|
|
||||||
|
if _overlay:
|
||||||
|
disp_extra = 3
|
||||||
|
else:
|
||||||
|
disp_extra = 1
|
||||||
|
|
||||||
|
def writebyte(b):
|
||||||
|
data.append(b)
|
||||||
|
|
||||||
|
def readbyte():
|
||||||
|
return next(it)
|
||||||
|
|
||||||
|
def readshort():
|
||||||
|
# big-endian
|
||||||
|
a = next(it)
|
||||||
|
b = next(it)
|
||||||
|
return (a << 8) | b
|
||||||
|
|
||||||
|
def copybyte():
|
||||||
|
data.append(next(it))
|
||||||
|
|
||||||
|
while len(data) < decompressed_size:
|
||||||
|
b = readbyte()
|
||||||
|
flags = bits(b)
|
||||||
|
for flag in flags:
|
||||||
|
if flag == 0:
|
||||||
|
copybyte()
|
||||||
|
elif flag == 1:
|
||||||
|
sh = readshort()
|
||||||
|
count = (sh >> 0xc) + 3
|
||||||
|
disp = (sh & 0xfff) + disp_extra
|
||||||
|
|
||||||
|
for _ in range(count):
|
||||||
|
writebyte(data[-disp])
|
||||||
|
else:
|
||||||
|
raise ValueError(flag)
|
||||||
|
|
||||||
|
if decompressed_size <= len(data):
|
||||||
|
break
|
||||||
|
|
||||||
|
if len(data) != decompressed_size:
|
||||||
|
raise DecompressionError(
|
||||||
|
"decompressed size does not match the expected size")
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def decompress_raw_lzss11(indata, decompressed_size):
|
||||||
|
"""Decompress LZSS-compressed bytes. Returns a bytearray."""
|
||||||
|
data = bytearray()
|
||||||
|
|
||||||
|
it = iter(indata)
|
||||||
|
|
||||||
|
def writebyte(b):
|
||||||
|
data.append(b)
|
||||||
|
|
||||||
|
def readbyte():
|
||||||
|
return next(it)
|
||||||
|
|
||||||
|
def copybyte():
|
||||||
|
data.append(next(it))
|
||||||
|
|
||||||
|
while len(data) < decompressed_size:
|
||||||
|
b = readbyte()
|
||||||
|
flags = bits(b)
|
||||||
|
for flag in flags:
|
||||||
|
if flag == 0:
|
||||||
|
copybyte()
|
||||||
|
elif flag == 1:
|
||||||
|
b = readbyte()
|
||||||
|
indicator = b >> 4
|
||||||
|
|
||||||
|
if indicator == 0:
|
||||||
|
# 8 bit count, 12 bit disp
|
||||||
|
# indicator is 0, don't need to mask b
|
||||||
|
count = (b << 4)
|
||||||
|
b = readbyte()
|
||||||
|
count += b >> 4
|
||||||
|
count += 0x11
|
||||||
|
elif indicator == 1:
|
||||||
|
# 16 bit count, 12 bit disp
|
||||||
|
count = ((b & 0xf) << 12) + (readbyte() << 4)
|
||||||
|
b = readbyte()
|
||||||
|
count += b >> 4
|
||||||
|
count += 0x111
|
||||||
|
else:
|
||||||
|
# indicator is count (4 bits), 12 bit disp
|
||||||
|
count = indicator
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
disp = ((b & 0xf) << 8) + readbyte()
|
||||||
|
disp += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
for _ in range(count):
|
||||||
|
writebyte(data[-disp])
|
||||||
|
except IndexError:
|
||||||
|
raise Exception(count, disp, len(data), sum(1 for x in it))
|
||||||
|
else:
|
||||||
|
raise ValueError(flag)
|
||||||
|
|
||||||
|
if decompressed_size <= len(data):
|
||||||
|
break
|
||||||
|
|
||||||
|
if len(data) != decompressed_size:
|
||||||
|
raise DecompressionError(
|
||||||
|
"decompressed size does not match the expected size")
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def decompress_overlay(f, out):
|
||||||
|
# the compression header is at the end of the file
|
||||||
|
f.seek(-8, SEEK_END)
|
||||||
|
header = f.read(8)
|
||||||
|
|
||||||
|
# decompression goes backwards.
|
||||||
|
# end < here < start
|
||||||
|
|
||||||
|
# end_delta == here - decompression end address
|
||||||
|
# start_delta == decompression start address - here
|
||||||
|
end_delta, start_delta = unpack("<LL", header)
|
||||||
|
|
||||||
|
filelen = f.tell()
|
||||||
|
|
||||||
|
padding = end_delta >> 0x18
|
||||||
|
end_delta &= 0xFFFFFF
|
||||||
|
decompressed_size = start_delta + end_delta
|
||||||
|
|
||||||
|
f.seek(-end_delta, SEEK_END)
|
||||||
|
|
||||||
|
data = bytearray()
|
||||||
|
data.extend(f.read(end_delta - padding))
|
||||||
|
data.reverse()
|
||||||
|
|
||||||
|
uncompressed_data = decompress_raw_lzss10(
|
||||||
|
data, decompressed_size, _overlay=True)
|
||||||
|
uncompressed_data.reverse()
|
||||||
|
|
||||||
|
# first we write up to the portion of the file which was "overwritten" by
|
||||||
|
# the decompressed data, then the decompressed data itself.
|
||||||
|
# i wonder if it's possible for decompression to overtake the compressed
|
||||||
|
# data, so that the decompression code is reading its own output...
|
||||||
|
f.seek(0, SEEK_SET)
|
||||||
|
out.write(f.read(filelen - end_delta))
|
||||||
|
out.write(uncompressed_data)
|
||||||
|
|
||||||
|
|
||||||
|
def decompress(obj):
|
||||||
|
"""Decompress LZSS-compressed bytes or a file-like object.
|
||||||
|
|
||||||
|
Shells out to decompress_file() or decompress_bytes() depending on
|
||||||
|
whether or not the passed-in object has a 'read' attribute or not.
|
||||||
|
|
||||||
|
Returns a bytearray."""
|
||||||
|
if hasattr(obj, 'read'):
|
||||||
|
return decompress_file(obj)
|
||||||
|
else:
|
||||||
|
return decompress_bytes(obj)
|
||||||
|
|
||||||
|
|
||||||
|
def decompress_bytes(data):
|
||||||
|
"""Decompress LZSS-compressed bytes. Returns a bytearray."""
|
||||||
|
header = data[:4]
|
||||||
|
if header[0] == 0x10:
|
||||||
|
decompress_raw = decompress_raw_lzss10
|
||||||
|
elif header[0] == 0x11:
|
||||||
|
decompress_raw = decompress_raw_lzss11
|
||||||
|
else:
|
||||||
|
raise DecompressionError("not as lzss-compressed file")
|
||||||
|
|
||||||
|
decompressed_size, = unpack("<L", header[1:] + b'\x00')
|
||||||
|
|
||||||
|
data = data[4:]
|
||||||
|
return decompress_raw(data, decompressed_size)
|
||||||
|
|
||||||
|
|
||||||
|
def decompress_file(f):
|
||||||
|
"""Decompress an LZSS-compressed file. Returns a bytearray.
|
||||||
|
|
||||||
|
This isn't any more efficient than decompress_bytes, as it reads
|
||||||
|
the entire file into memory. It is offered as a convenience.
|
||||||
|
"""
|
||||||
|
header = f.read(4)
|
||||||
|
if header[0] == 0x10:
|
||||||
|
decompress_raw = decompress_raw_lzss10
|
||||||
|
elif header[0] == 0x11:
|
||||||
|
decompress_raw = decompress_raw_lzss11
|
||||||
|
else:
|
||||||
|
raise DecompressionError("not as lzss-compressed file")
|
||||||
|
|
||||||
|
decompressed_size, = unpack("<L", header[1:] + b'\x00')
|
||||||
|
|
||||||
|
data = f.read()
|
||||||
|
return decompress_raw(data, decompressed_size)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
if args is None:
|
||||||
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
if '--overlay' in args:
|
||||||
|
args.remove('--overlay')
|
||||||
|
overlay = True
|
||||||
|
else:
|
||||||
|
overlay = False
|
||||||
|
|
||||||
|
if len(args) < 1 or args[0] == '-':
|
||||||
|
if overlay:
|
||||||
|
print("Can't decompress overlays from stdin", file=stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if hasattr(stdin, 'detach'):
|
||||||
|
f = stdin.detach()
|
||||||
|
else:
|
||||||
|
f = stdin
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
f = open(args[0], "rb")
|
||||||
|
except IOError as e:
|
||||||
|
print(e, file=stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
stdout = sys.stdout
|
||||||
|
if hasattr(stdout, 'detach'):
|
||||||
|
# grab the underlying binary stream
|
||||||
|
stdout = stdout.detach()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if overlay:
|
||||||
|
decompress_overlay(f, stdout)
|
||||||
|
else:
|
||||||
|
stdout.write(decompress_file(f))
|
||||||
|
except IOError as e:
|
||||||
|
if e.errno == EPIPE:
|
||||||
|
# don't complain about a broken pipe
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
except (DecompressionError,) as e:
|
||||||
|
print(e, file=stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
exit(main())
|
19
pokedex/extract/lib/pc.py
Normal file
19
pokedex/extract/lib/pc.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
"""Allegedly stands for 'Pokémon Container'. Completely generic, dead-simple
|
||||||
|
container format.
|
||||||
|
"""
|
||||||
|
from .base import _ContainerFile, Substream
|
||||||
|
|
||||||
|
|
||||||
|
class PokemonContainerFile(_ContainerFile):
|
||||||
|
magic = b'PC'
|
||||||
|
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.stream = stream = Substream(stream)
|
||||||
|
|
||||||
|
magic, entry_ct = stream.unpack('<2sH')
|
||||||
|
assert magic == b'PC'
|
||||||
|
|
||||||
|
self.slices = []
|
||||||
|
for _ in range(entry_ct):
|
||||||
|
start, end = stream.unpack('<LL')
|
||||||
|
self.slices.append(self.stream.slice(start, end - start))
|
115
pokedex/extract/lib/text.py
Normal file
115
pokedex/extract/lib/text.py
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
def merge_japanese_texts(kanji, kana, html=False):
|
||||||
|
"""Combine a (presuambly equivalent) pair of kanji and kana strings into a
|
||||||
|
single string of kanji with furigana.
|
||||||
|
|
||||||
|
If `html` is truthy, the return value will contain HTML ruby tags;
|
||||||
|
otherwise it will use the Unicode "interlinear annotation" characters.
|
||||||
|
|
||||||
|
This relies on the Needleman–Wunsch algorithm for sequence alignment:
|
||||||
|
https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
|
||||||
|
"""
|
||||||
|
# TODO maybe this is faster, but then -1 doesn't work
|
||||||
|
#table = [
|
||||||
|
# [None for _ in range(len(kana))]
|
||||||
|
# for _ in range(len(kanji))
|
||||||
|
#]
|
||||||
|
table = {}
|
||||||
|
# continue left, continue up, are the characters equivalent, score for this
|
||||||
|
# cell
|
||||||
|
table[-1, -1] = False, False, True, 0
|
||||||
|
|
||||||
|
isjunk = {}
|
||||||
|
for ch in kanji + kana:
|
||||||
|
isjunk[ch] = ch.isspace() or ch in '。'
|
||||||
|
|
||||||
|
# initialize, TODO, something about scoring compared to a gap
|
||||||
|
for i, ch in enumerate(kanji):
|
||||||
|
table[i, -1] = True, False, False, -1 - i
|
||||||
|
for i, ch in enumerate(kana):
|
||||||
|
table[-1, i] = False, True, False, -1 - i
|
||||||
|
for a, ach in enumerate(kanji):
|
||||||
|
for b, bch in enumerate(kana):
|
||||||
|
options = []
|
||||||
|
# Continue diagonally means two characters together, either a match
|
||||||
|
# or a mismatch
|
||||||
|
if ach == bch or (isjunk[ach] and isjunk[bch]):
|
||||||
|
equiv = True
|
||||||
|
score = 1
|
||||||
|
else:
|
||||||
|
equiv = False
|
||||||
|
score = -1
|
||||||
|
options.append((True, True, equiv, table[a - 1, b - 1][2] + score))
|
||||||
|
|
||||||
|
# Continue from or side means an indel... -1
|
||||||
|
if isjunk[ach]:
|
||||||
|
score = 0
|
||||||
|
else:
|
||||||
|
score = -1
|
||||||
|
options.append((True, False, equiv, table[a - 1, b][2] + score))
|
||||||
|
if isjunk[bch]:
|
||||||
|
score = 0
|
||||||
|
else:
|
||||||
|
score = -1
|
||||||
|
options.append((False, True, equiv, table[a, b - 1][2] + score))
|
||||||
|
|
||||||
|
# Strictly speaking, in the case of a tie, all of the "best"
|
||||||
|
# choices are supposed to be preserved. But we should never have a
|
||||||
|
# tie, and we have an arbitrary choice of which to use in the end
|
||||||
|
# anyway, so screw it.
|
||||||
|
table[a, b] = max(options, key=lambda opt: opt[2])
|
||||||
|
|
||||||
|
if html:
|
||||||
|
ruby_format = "<ruby><rb>{}</rb><rt>{}</rt></ruby>"
|
||||||
|
else:
|
||||||
|
ruby_format = "\ufff9{}\ufffa{}\ufffb"
|
||||||
|
|
||||||
|
def add_mismatches(mismatch_a, mismatch_b, final):
|
||||||
|
# Need to pop out any extra junk characters at the beginning or end --
|
||||||
|
# but only the kanji ones stay, since kanji is "canonical"
|
||||||
|
while mismatch_a and isjunk[mismatch_a[0]]:
|
||||||
|
final.append(mismatch_a.pop(0))
|
||||||
|
while mismatch_b and isjunk[mismatch_b[0]]:
|
||||||
|
mismatch_b.pop(0)
|
||||||
|
endjunk = []
|
||||||
|
while mismatch_a and isjunk[mismatch_a[-1]]:
|
||||||
|
endjunk.append(mismatch_a.pop())
|
||||||
|
while mismatch_b and isjunk[mismatch_b[-1]]:
|
||||||
|
mismatch_b.pop()
|
||||||
|
final.append(ruby_format.format(
|
||||||
|
''.join(reversed(mismatch_a)),
|
||||||
|
''.join(reversed(mismatch_b)),
|
||||||
|
))
|
||||||
|
final.extend(endjunk)
|
||||||
|
del mismatch_a[:]
|
||||||
|
del mismatch_b[:]
|
||||||
|
|
||||||
|
final = []
|
||||||
|
mismatch_a = []
|
||||||
|
mismatch_b = []
|
||||||
|
a = len(kanji) - 1
|
||||||
|
b = len(kana) - 1
|
||||||
|
while True:
|
||||||
|
walk_left, walk_up, equiv, score = table[a, b]
|
||||||
|
if walk_left and walk_up:
|
||||||
|
if equiv:
|
||||||
|
if mismatch_a or mismatch_b:
|
||||||
|
add_mismatches(mismatch_a, mismatch_b, final)
|
||||||
|
final.append(kanji[a])
|
||||||
|
else:
|
||||||
|
mismatch_a.append(kanji[a])
|
||||||
|
mismatch_b.append(kana[b])
|
||||||
|
a -= 1
|
||||||
|
b -= 1
|
||||||
|
elif walk_left:
|
||||||
|
mismatch_a.append(kanji[a])
|
||||||
|
a -= 1
|
||||||
|
elif walk_up:
|
||||||
|
mismatch_b.append(kana[b])
|
||||||
|
b -= 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
if mismatch_a or mismatch_b:
|
||||||
|
add_mismatches(mismatch_a, mismatch_b, final)
|
||||||
|
|
||||||
|
return ''.join(reversed(final))
|
844
pokedex/extract/oras.py
Normal file
844
pokedex/extract/oras.py
Normal file
|
@ -0,0 +1,844 @@
|
||||||
|
"""Dumps data from Omega Ruby and Alpha Sapphire.
|
||||||
|
|
||||||
|
Filesystem reference: http://www.projectpokemon.org/wiki/ORAS_File_System
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
from collections import OrderedDict
|
||||||
|
from contextlib import contextmanager
|
||||||
|
import itertools
|
||||||
|
import math
|
||||||
|
from pathlib import Path
|
||||||
|
import shutil
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from construct import Array, BitField, Bitwise, Magic, OptionalGreedyRange, Padding, Pointer, Struct, SLInt8, SLInt16, ULInt8, ULInt16, ULInt32
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from .lib.garc import GARCFile, decrypt_xy_text
|
||||||
|
from .lib.text import merge_japanese_texts
|
||||||
|
|
||||||
|
|
||||||
|
# TODO fix some hardcoding in here
|
||||||
|
# TODO finish converting garc parsing to use construct, if possible, i think (i would not miss substream)
|
||||||
|
# way way more sprite work in here...
|
||||||
|
|
||||||
|
|
||||||
|
CANON_LANGUAGES = ('ja', 'en', 'fr', 'it', 'de', 'es', 'ko')
|
||||||
|
ORAS_SCRIPT_FILES = {
|
||||||
|
'ja-kana': 'rom/a/0/7/1',
|
||||||
|
'ja-kanji': 'rom/a/0/7/2',
|
||||||
|
'en': 'rom/a/0/7/3',
|
||||||
|
'fr': 'rom/a/0/7/4',
|
||||||
|
'it': 'rom/a/0/7/5',
|
||||||
|
'de': 'rom/a/0/7/6',
|
||||||
|
'es': 'rom/a/0/7/7',
|
||||||
|
'ko': 'rom/a/0/7/8',
|
||||||
|
}
|
||||||
|
ORAS_SCRIPT_ENTRIES = {
|
||||||
|
'form-names': 5,
|
||||||
|
# TODO these might be backwards, i'm just guessing
|
||||||
|
'species-flavor-alpha-sapphire': 6,
|
||||||
|
'species-flavor-omega-ruby': 7,
|
||||||
|
'move-contest-flavor': 13,
|
||||||
|
'move-names': 14,
|
||||||
|
# Note: table 15 is also a list of move names, but with a few at the end
|
||||||
|
# missing? XY leftovers?
|
||||||
|
'move-flavor': 16,
|
||||||
|
'type-names': 18,
|
||||||
|
'ability-flavor': 36,
|
||||||
|
'ability-names': 37,
|
||||||
|
'nature-names': 51,
|
||||||
|
'species-names': 98,
|
||||||
|
}
|
||||||
|
# The first element in each list is the name of the BASE form -- if it's not
|
||||||
|
# None, the base form will be saved under two filenames
|
||||||
|
ORAS_EXTRA_SPRITE_NAMES = {
|
||||||
|
# Cosplay Pikachu
|
||||||
|
25: (None, 'rockstar', 'belle', 'popstar', 'phd', 'libre', 'cosplay'),
|
||||||
|
# Unown
|
||||||
|
201: tuple('abcdefghijklmnopqrstuvwxyz') + ('exclamation', 'question'),
|
||||||
|
# Castform
|
||||||
|
351: (None, 'sunny', 'rainy', 'snowy'),
|
||||||
|
# Kyogre and Groudon
|
||||||
|
382: (None, 'primal',),
|
||||||
|
383: (None, 'primal',),
|
||||||
|
# Deoxys
|
||||||
|
386: ('normal', 'attack', 'defense', 'speed'),
|
||||||
|
# Burmy and Wormadam
|
||||||
|
412: ('plant', 'sandy', 'trash'),
|
||||||
|
413: ('plant', 'sandy', 'trash'),
|
||||||
|
# Cherrim
|
||||||
|
421: ('overcast', 'sunshine',),
|
||||||
|
# Shellos and Gastrodon
|
||||||
|
422: ('west', 'east',),
|
||||||
|
423: ('west', 'east',),
|
||||||
|
# Rotom
|
||||||
|
479: (None, 'heat', 'wash', 'frost', 'fan', 'mow'),
|
||||||
|
# Giratina
|
||||||
|
487: ('altered', 'origin',),
|
||||||
|
# Shaymin
|
||||||
|
492: ('land', 'sky',),
|
||||||
|
# Arceus
|
||||||
|
493: (
|
||||||
|
'normal', 'fighting', 'flying', 'poison', 'ground', 'rock', 'bug',
|
||||||
|
'ghost', 'steel', 'fire', 'water', 'grass', 'electric', 'psychic',
|
||||||
|
'ice', 'dragon', 'dark', 'fairy',
|
||||||
|
),
|
||||||
|
# Basculin
|
||||||
|
550: ('red-striped', 'blue-striped',),
|
||||||
|
# Darmanitan
|
||||||
|
555: ('standard', 'zen',),
|
||||||
|
# Deerling and Sawsbuck
|
||||||
|
585: ('sprint', 'summer', 'autumn', 'winter'),
|
||||||
|
586: ('sprint', 'summer', 'autumn', 'winter'),
|
||||||
|
# Tornadus, Thundurus, and Landorus
|
||||||
|
641: ('incarnate', 'therian'),
|
||||||
|
642: ('incarnate', 'therian'),
|
||||||
|
645: ('incarnate', 'therian'),
|
||||||
|
# Kyurem
|
||||||
|
646: (None, 'white', 'black'),
|
||||||
|
# Keldeo
|
||||||
|
647: ('ordinary', 'resolute'),
|
||||||
|
# Meloetta
|
||||||
|
648: ('aria', 'pirouette'),
|
||||||
|
# Genesect
|
||||||
|
649: (None, 'douse', 'shock', 'burn', 'chill'),
|
||||||
|
# Vivillon
|
||||||
|
666: (
|
||||||
|
'icy-snow', 'polar', 'tundra', 'continental', 'garden', 'elegant',
|
||||||
|
'meadow', 'modern', 'marine', 'archipelago', 'high-plains',
|
||||||
|
'sandstorm', 'river', 'monsoon', 'savanna', 'sun', 'ocean', 'jungle',
|
||||||
|
'fancy', 'poke-ball',
|
||||||
|
),
|
||||||
|
# Flabébé/Floette/Florges
|
||||||
|
669: ('red', 'yellow', 'orange', 'blue', 'white'),
|
||||||
|
670: ('red', 'yellow', 'orange', 'blue', 'white', 'eternal'),
|
||||||
|
671: ('red', 'yellow', 'orange', 'blue', 'white'),
|
||||||
|
# Furfrou
|
||||||
|
676: (
|
||||||
|
'natural', 'heart', 'star', 'diamond', 'debutante', 'matron', 'dandy',
|
||||||
|
'la-reine', 'kabuki', 'pharaoh',
|
||||||
|
),
|
||||||
|
# Meowstic
|
||||||
|
#678: [male, female]
|
||||||
|
# Aegislash
|
||||||
|
681: ('shield', 'blade'),
|
||||||
|
# Pumpkaboo/Gourgeist
|
||||||
|
710: ('average', 'small', 'large', 'super'),
|
||||||
|
711: ('average', 'small', 'large', 'super'),
|
||||||
|
# Xerneas
|
||||||
|
716: ('neutral', 'active'),
|
||||||
|
# Hoopa
|
||||||
|
720: ('confined', 'unbound'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pokemon_struct = Struct(
|
||||||
|
'pokemon',
|
||||||
|
ULInt8('stat_hp'),
|
||||||
|
ULInt8('stat_atk'),
|
||||||
|
ULInt8('stat_def'),
|
||||||
|
ULInt8('stat_speed'),
|
||||||
|
ULInt8('stat_spatk'),
|
||||||
|
ULInt8('stat_spdef'),
|
||||||
|
ULInt8('type1'),
|
||||||
|
ULInt8('type2'),
|
||||||
|
ULInt8('catch_rate'),
|
||||||
|
ULInt8('stage'),
|
||||||
|
ULInt16('effort'),
|
||||||
|
ULInt16('held_item1'),
|
||||||
|
ULInt16('held_item2'),
|
||||||
|
ULInt16('held_item3'), # dark grass from bw, unused in oras?
|
||||||
|
ULInt8('gender_rate'),
|
||||||
|
ULInt8('steps_to_hatch'),
|
||||||
|
ULInt8('base_happiness'),
|
||||||
|
ULInt8('exp_curve'),
|
||||||
|
ULInt8('egg_group1'),
|
||||||
|
ULInt8('egg_group2'),
|
||||||
|
ULInt8('ability1'),
|
||||||
|
ULInt8('ability2'),
|
||||||
|
ULInt8('ability_dream'),
|
||||||
|
ULInt8('safari_escape'),
|
||||||
|
ULInt16('form_species_start'),
|
||||||
|
ULInt16('form_sprite_start'),
|
||||||
|
ULInt8('form_count'),
|
||||||
|
ULInt8('color'),
|
||||||
|
ULInt16('base_exp'),
|
||||||
|
ULInt16('height'),
|
||||||
|
ULInt16('weight'),
|
||||||
|
Bitwise(
|
||||||
|
BitField('machines', 14 * 8, swapped=True),
|
||||||
|
),
|
||||||
|
Padding(2),
|
||||||
|
ULInt32('tutors'),
|
||||||
|
ULInt16('mystery1'),
|
||||||
|
ULInt16('mystery2'),
|
||||||
|
ULInt32('bp_tutors1'),
|
||||||
|
ULInt32('bp_tutors2'),
|
||||||
|
ULInt32('bp_tutors3'),
|
||||||
|
ULInt32('bp_tutors4'),
|
||||||
|
)
|
||||||
|
|
||||||
|
pokemon_mega_evolutions_struct = Array(
|
||||||
|
3,
|
||||||
|
Struct(
|
||||||
|
'pokemon_mega_evolutions',
|
||||||
|
ULInt16('number'),
|
||||||
|
ULInt16('mode'),
|
||||||
|
ULInt16('mega_stone_itemid'),
|
||||||
|
Padding(2),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
egg_moves_struct = Struct(
|
||||||
|
'egg_moves',
|
||||||
|
ULInt16('count'),
|
||||||
|
Array(
|
||||||
|
lambda ctx: ctx.count,
|
||||||
|
ULInt16('moveids'),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
level_up_moves_struct = OptionalGreedyRange(
|
||||||
|
Struct(
|
||||||
|
'level_up_pair',
|
||||||
|
SLInt16('moveid'),
|
||||||
|
SLInt16('level'),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
move_struct = Struct(
|
||||||
|
'move',
|
||||||
|
ULInt8('type'),
|
||||||
|
ULInt8('category'),
|
||||||
|
ULInt8('damage_class'),
|
||||||
|
ULInt8('power'),
|
||||||
|
ULInt8('accuracy'),
|
||||||
|
ULInt8('pp'),
|
||||||
|
SLInt8('priority'),
|
||||||
|
ULInt8('min_max_hits'),
|
||||||
|
SLInt16('caused_effect'),
|
||||||
|
ULInt8('effect_chance'),
|
||||||
|
ULInt8('status'),
|
||||||
|
ULInt8('min_turns'),
|
||||||
|
ULInt8('max_turns'),
|
||||||
|
ULInt8('crit_rate'),
|
||||||
|
ULInt8('flinch_chance'),
|
||||||
|
ULInt16('effect'),
|
||||||
|
SLInt8('recoil'),
|
||||||
|
ULInt8('healing'),
|
||||||
|
ULInt8('range'), # ok
|
||||||
|
Bitwise(
|
||||||
|
BitField('stat_change', 24),
|
||||||
|
),
|
||||||
|
Bitwise(
|
||||||
|
BitField('stat_amount', 24),
|
||||||
|
),
|
||||||
|
Bitwise(
|
||||||
|
BitField('stat_chance', 24),
|
||||||
|
),
|
||||||
|
ULInt8('padding0'), # ok
|
||||||
|
ULInt8('padding1'), # ok
|
||||||
|
ULInt16('flags'),
|
||||||
|
ULInt8('padding2'), # ok
|
||||||
|
ULInt8('extra'),
|
||||||
|
)
|
||||||
|
move_container_struct = Struct(
|
||||||
|
'move_container',
|
||||||
|
Magic(b'WD'), # waza... descriptions?
|
||||||
|
ULInt16('record_ct'),
|
||||||
|
Array(
|
||||||
|
lambda ctx: ctx.record_ct,
|
||||||
|
Struct(
|
||||||
|
'records',
|
||||||
|
ULInt32('offset'),
|
||||||
|
Pointer(lambda ctx: ctx.offset, move_struct),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
pokemon_sprite_struct = Struct(
|
||||||
|
'pokemon_sprite_config',
|
||||||
|
ULInt16('index'),
|
||||||
|
ULInt16('female_index'),
|
||||||
|
ULInt32('form_index_offset'),
|
||||||
|
ULInt32('right_index_offset'),
|
||||||
|
ULInt16('form_count'),
|
||||||
|
ULInt16('right_count'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# There are 63 tutor move bits in use, but only 60 move tutors -- the moves
|
||||||
|
# appear to be largely inherited from B2W2 but these are just not exposed in
|
||||||
|
# ORAS
|
||||||
|
ORAS_UNUSED_MOVE_TUTORS = {'dark-pulse', 'roost', 'sleep-talk'}
|
||||||
|
# Unsure where this is in the binary
|
||||||
|
ORAS_NORMAL_MOVE_TUTORS = (
|
||||||
|
'grass-pledge',
|
||||||
|
'fire-pledge',
|
||||||
|
'water-pledge',
|
||||||
|
'frenzy-plant',
|
||||||
|
'blast-burn',
|
||||||
|
'hydro-cannon',
|
||||||
|
'draco-meteor',
|
||||||
|
'dragon-ascent',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def read_garc(path):
|
||||||
|
with path.open('rb') as f:
|
||||||
|
yield GARCFile(f)
|
||||||
|
|
||||||
|
|
||||||
|
# XXX christ lol. taken from SO. fodder for camel maybe
|
||||||
|
def represent_ordereddict(dumper, data):
|
||||||
|
value = []
|
||||||
|
|
||||||
|
for item_key, item_value in data.items():
|
||||||
|
node_key = dumper.represent_data(item_key)
|
||||||
|
node_value = dumper.represent_data(item_value)
|
||||||
|
|
||||||
|
value.append((node_key, node_value))
|
||||||
|
|
||||||
|
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
|
||||||
|
yaml.add_representer(OrderedDict, represent_ordereddict)
|
||||||
|
|
||||||
|
|
||||||
|
def represent_tuple(dumper, data):
|
||||||
|
return yaml.nodes.SequenceNode(
|
||||||
|
u'tag:yaml.org,2002:seq',
|
||||||
|
[dumper.represent_data(item) for item in data],
|
||||||
|
flow_style=True,
|
||||||
|
)
|
||||||
|
yaml.add_representer(tuple, represent_tuple)
|
||||||
|
|
||||||
|
|
||||||
|
def dump_to_yaml(data, f):
|
||||||
|
# TODO gonna need a better way to handle flow style
|
||||||
|
yaml.dump(
|
||||||
|
data, f,
|
||||||
|
default_flow_style=False,
|
||||||
|
allow_unicode=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_data(root, out):
|
||||||
|
# TODO big conceptual question for the yaml thing: how do we decide how the
|
||||||
|
# identifiers work in the per-version data? the "global" identifiers are
|
||||||
|
# in theory based on the names from the latest version, and the game dump
|
||||||
|
# scripts shouldn't have to care about what the latest version is
|
||||||
|
# 1. make the canon data not be keyed by identifier (makes it hard to
|
||||||
|
# follow what's going on in flavor text files etc, and unclear how to match
|
||||||
|
# up items across versions)
|
||||||
|
# 2. make each version's data keyed by its own identifiers (makes it hard
|
||||||
|
# to align them all when loading everything, and unclear how to match up
|
||||||
|
# items whose names change across versions)
|
||||||
|
# 3. hardcode a mapping of version+identifier pairs to their current
|
||||||
|
# identifiers, when they changed, which is a little ugly but also solves
|
||||||
|
# all the match-up problems and is what we'd basically have to do anyway
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Names and flavor text
|
||||||
|
texts = {}
|
||||||
|
for lang, fn in ORAS_SCRIPT_FILES.items():
|
||||||
|
texts[lang] = {}
|
||||||
|
with read_garc(root / fn) as garc:
|
||||||
|
for entryname, entryid in ORAS_SCRIPT_ENTRIES.items():
|
||||||
|
entry = garc[entryid][0]
|
||||||
|
texts[lang][entryname] = decrypt_xy_text(entry.read())
|
||||||
|
|
||||||
|
# Japanese text is special! It's written in both kanji and kana, and we
|
||||||
|
# want to combine them
|
||||||
|
texts['ja'] = {}
|
||||||
|
for entryname in ORAS_SCRIPT_ENTRIES:
|
||||||
|
kanjis = texts['ja-kanji'][entryname]
|
||||||
|
kanas = texts['ja-kana'][entryname]
|
||||||
|
# But not if they're names of things.
|
||||||
|
# (TODO this might not be true in the case of, say, towns? in which
|
||||||
|
# case, what do we do? we want to ultimately put these in urls and
|
||||||
|
# whatnot, right, but we don't want furigana there :S do we need a
|
||||||
|
# separate "identifier" field /per language/?)
|
||||||
|
if entryname.endswith('names'):
|
||||||
|
assert kanjis == kanas
|
||||||
|
texts['ja'][entryname] = kanjis
|
||||||
|
else:
|
||||||
|
assert len(kanas) == len(kanjis)
|
||||||
|
texts['ja'][entryname] = [
|
||||||
|
merge_japanese_texts(kanji, kana)
|
||||||
|
for (kanji, kana) in zip(kanjis, kanas)
|
||||||
|
]
|
||||||
|
del texts['ja-kanji']
|
||||||
|
del texts['ja-kana']
|
||||||
|
|
||||||
|
identifiers = {}
|
||||||
|
identifiers['species'] = [
|
||||||
|
# TODO better identifier creation, to be determined later, but surely
|
||||||
|
# want to lose . and '
|
||||||
|
# TODO handling forms here is awkward since the form names are
|
||||||
|
# contained in the personal struct
|
||||||
|
((species_name or '') + '-' + form_name).lower().replace(' ', '-')
|
||||||
|
for (species_name, form_name) in itertools.zip_longest(
|
||||||
|
texts['en']['species-names'],
|
||||||
|
texts['en']['form-names'],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
identifiers['move'] = [
|
||||||
|
# TODO better identifier creation, to be determined later, but surely
|
||||||
|
# want to lose . and '
|
||||||
|
name.lower().replace(' ', '-')
|
||||||
|
for name in texts['en']['move-names']
|
||||||
|
]
|
||||||
|
|
||||||
|
textdir = out / 'script'
|
||||||
|
if not textdir.exists():
|
||||||
|
textdir.mkdir()
|
||||||
|
for lang in CANON_LANGUAGES:
|
||||||
|
with (textdir / (lang + '.yaml')).open('w') as f:
|
||||||
|
# TODO this should use identifiers, not be lists
|
||||||
|
# TODO need to skip slot 0 which is junk
|
||||||
|
dump_to_yaml(texts[lang], f)
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Scrape some useful bits from the binary
|
||||||
|
with (root / 'exe/code.bin').open('rb') as f:
|
||||||
|
# Tutored moves
|
||||||
|
tutor_moves = dict(tutors=ORAS_NORMAL_MOVE_TUTORS)
|
||||||
|
f.seek(0x004960f8)
|
||||||
|
for n in range(1, 5):
|
||||||
|
key = "bp_tutors{}".format(n)
|
||||||
|
moves = tutor_moves[key] = []
|
||||||
|
while True:
|
||||||
|
moveid, = struct.unpack('<H', f.read(2))
|
||||||
|
if moveid >= len(identifiers['move']):
|
||||||
|
break
|
||||||
|
moves.append(identifiers['move'][moveid])
|
||||||
|
|
||||||
|
# TMs
|
||||||
|
machines = []
|
||||||
|
f.seek(0x004a67ee)
|
||||||
|
machineids = struct.unpack('<107H', f.read(2 * 107))
|
||||||
|
# Order appears to be based on some gen 4 legacy: TMs 1 through 92, HMs
|
||||||
|
# 1 through 6, then the other eight TMs and the last HM. But the bits
|
||||||
|
# in the Pokémon structs are in the expected order of 1 through 100, 1
|
||||||
|
# through 7
|
||||||
|
machines = [
|
||||||
|
identifiers['move'][moveid]
|
||||||
|
for moveid in
|
||||||
|
machineids[0:92] +
|
||||||
|
machineids[98:106] +
|
||||||
|
machineids[92:98] +
|
||||||
|
machineids[106:]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Pokémon structs
|
||||||
|
pokemon_data = []
|
||||||
|
with read_garc(root / 'rom/a/1/9/5') as garc:
|
||||||
|
personals = [subfile[0].read() for subfile in garc]
|
||||||
|
_pokemon_forms = {} # "real" species id => (base species id, form name id)
|
||||||
|
_next_name_form_id = 723
|
||||||
|
for i, personal in enumerate(personals[:-1]):
|
||||||
|
record = pokemon_struct.parse(personal)
|
||||||
|
# TODO transform to an OD somehow probably
|
||||||
|
pokemon_data.append(record)
|
||||||
|
#print("{:3d} {:15s} {} {:5d} {:5d}".format(
|
||||||
|
# i,
|
||||||
|
# identifiers['species'][baseid],
|
||||||
|
# ('0'*16 + bin(record.mystery1)[2:])[-16:],
|
||||||
|
# record.mystery2,
|
||||||
|
# record.stage,
|
||||||
|
#))
|
||||||
|
# TODO some pokemon have sprite starts but no species start, because their sprites vary obv
|
||||||
|
if record.form_count > 1:
|
||||||
|
# The form names appear to be all just jammed at the end in order,
|
||||||
|
# completely unrelated to either of the "start" offsets here
|
||||||
|
for offset in range(record.form_count - 1):
|
||||||
|
#form_name = texts['en']['form-names'][_next_name_form_id]
|
||||||
|
|
||||||
|
if record.form_species_start:
|
||||||
|
# TODO still no idea how "intangible" forms are being
|
||||||
|
# handled in the new schema
|
||||||
|
_pokemon_forms[record.form_species_start + offset] = i, _next_name_form_id
|
||||||
|
|
||||||
|
_next_name_form_id += 1
|
||||||
|
|
||||||
|
if record.form_species_start:
|
||||||
|
for offset in range(record.form_count - 1):
|
||||||
|
# TODO grab the form names argh
|
||||||
|
identifiers['species'][record.form_species_start + offset] = identifiers['species'][i]
|
||||||
|
|
||||||
|
#for i in range(723, 825 + 1):
|
||||||
|
# base_species_id, form_name_id = _pokemon_forms[i]
|
||||||
|
# species_name = texts['en']['species-names'][base_species_id]
|
||||||
|
# form_name = texts['en']['form-names'][form_name_id]
|
||||||
|
# print(i, species_name, '/', form_name)
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Move stats
|
||||||
|
movesets = OrderedDict()
|
||||||
|
with read_garc(root / 'rom/a/1/8/9') as garc:
|
||||||
|
# Only one subfile
|
||||||
|
data = garc[0][0].read()
|
||||||
|
container = move_container_struct.parse(data)
|
||||||
|
for n, record in enumerate(container.records):
|
||||||
|
m = record.move
|
||||||
|
# TODO with the release of oras all moves have contest types and effects again! where are they??
|
||||||
|
#print("{:3d} {:20s} | {m.type:3d} {m.power:3d} {m.pp:2d} {m.accuracy:3d} / {m.priority:2d} {m.range:2d} {m.damage_class:1d} / {m.effect:3d} {m.caused_effect:3d} {m.effect_chance:3d} -- {m.status:3d} {m.min_turns:3d} {m.max_turns:3d} {m.crit_rate:3d} {m.flinch_chance:3d} {m.recoil:4d} {m.healing:3d} / {m.stat_change:06x} {m.stat_amount:06x} {m.stat_chance:06x} / {m.padding0:3d} {m.padding1:3d} {m.flags:04x} {m.padding2:3d} {m.extra:3d}".format(
|
||||||
|
# n,
|
||||||
|
# identifiers['move'][n],
|
||||||
|
# m=record.move,
|
||||||
|
#))
|
||||||
|
|
||||||
|
# Egg moves
|
||||||
|
with read_garc(root / 'rom/a/1/9/0') as garc:
|
||||||
|
for i, subfile in enumerate(garc):
|
||||||
|
ident = identifiers['species'][i]
|
||||||
|
data = subfile[0].read()
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
container = egg_moves_struct.parse(data)
|
||||||
|
moveset = movesets.setdefault(ident, OrderedDict())
|
||||||
|
eggset = moveset['egg'] = []
|
||||||
|
for moveid in container.moveids:
|
||||||
|
eggset.append(identifiers['move'][moveid])
|
||||||
|
|
||||||
|
# Level-up moves
|
||||||
|
with read_garc(root / 'rom/a/1/9/1') as garc:
|
||||||
|
for i, subfile in enumerate(garc):
|
||||||
|
ident = identifiers['species'][i]
|
||||||
|
level_up_moves = subfile[0].read()
|
||||||
|
moveset = movesets.setdefault(ident, OrderedDict())
|
||||||
|
levelset = moveset['level'] = []
|
||||||
|
lastlevel = None
|
||||||
|
order = 1
|
||||||
|
for pair in level_up_moves_struct.parse(level_up_moves):
|
||||||
|
# End is indicated with -1, -1
|
||||||
|
if pair.moveid <= 0:
|
||||||
|
break
|
||||||
|
levelset.append((
|
||||||
|
pair.level,
|
||||||
|
identifiers['move'][pair.moveid],
|
||||||
|
))
|
||||||
|
|
||||||
|
if pair.level == lastlevel:
|
||||||
|
order += 1
|
||||||
|
else:
|
||||||
|
lastlevel = pair.level
|
||||||
|
order = 1
|
||||||
|
|
||||||
|
# Evolution
|
||||||
|
#with read_garc(root / 'rom/a/1/9/2') as garc:
|
||||||
|
# for subfile in garc:
|
||||||
|
# evolution = subfile[0].read()
|
||||||
|
# print(repr(evolution))
|
||||||
|
# Mega evolution
|
||||||
|
#with read_garc(root / 'rom/a/1/9/3') as garc:
|
||||||
|
# for subfile in garc:
|
||||||
|
# evolution = subfile[0].read()
|
||||||
|
# print(repr(evolution))
|
||||||
|
# TODO what is a/1/9/4? 8 files of 404 bytes each
|
||||||
|
# Baby Pokémon
|
||||||
|
#with read_garc(root / 'rom/a/1/9/6') as garc:
|
||||||
|
# for subfile in garc:
|
||||||
|
# baby_pokemon = subfile[0].read()
|
||||||
|
# print(repr(baby_pokemon))
|
||||||
|
# Item stats
|
||||||
|
#with read_garc(root / 'rom/a/1/9/7') as garc:
|
||||||
|
# for subfile in garc:
|
||||||
|
# item_stats = subfile[0].read()
|
||||||
|
# print(repr(item_stats))
|
||||||
|
|
||||||
|
# Tutor moves (from the personal structs)
|
||||||
|
for i, datum in enumerate(pokemon_data):
|
||||||
|
ident = identifiers['species'][i]
|
||||||
|
moveset = movesets.setdefault(ident, OrderedDict())
|
||||||
|
tutorset = moveset['tutor'] = []
|
||||||
|
for key, tutors in tutor_moves.items():
|
||||||
|
for bit, moveident in enumerate(tutors):
|
||||||
|
if moveident in ORAS_UNUSED_MOVE_TUTORS:
|
||||||
|
continue
|
||||||
|
if not datum[key] & (1 << bit):
|
||||||
|
continue
|
||||||
|
tutorset.append(moveident)
|
||||||
|
|
||||||
|
# TMs
|
||||||
|
machineset = moveset['machine'] = []
|
||||||
|
for bit, moveident in enumerate(machines):
|
||||||
|
if not datum['machines'] & (1 << bit):
|
||||||
|
continue
|
||||||
|
machineset.append(moveident)
|
||||||
|
|
||||||
|
with (out / 'movesets.yaml').open('w') as f:
|
||||||
|
dump_to_yaml(movesets, f)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_box_sprites(root, out):
|
||||||
|
filenames = {}
|
||||||
|
with (root / 'exe/code.bin').open('rb') as f:
|
||||||
|
# Form configuration, used to put sprites in the right order
|
||||||
|
# NOTE: in x/y the address is 0x0043ea98
|
||||||
|
f.seek(0x0047d650)
|
||||||
|
# TODO need to do a different thing for main sprites
|
||||||
|
# TODO magic number
|
||||||
|
for n in range(722):
|
||||||
|
sprite = pokemon_sprite_struct.parse_stream(f)
|
||||||
|
assert sprite.index not in filenames
|
||||||
|
filenames[sprite.index] = "{}".format(n)
|
||||||
|
if sprite.female_index != sprite.index:
|
||||||
|
assert sprite.female_index not in filenames
|
||||||
|
filenames[sprite.female_index] = "{}-female".format(n)
|
||||||
|
# Note that these addresses are relative to RAM, and the binary is
|
||||||
|
# loaded into RAM starting at 0x100000, so we need to subtract that
|
||||||
|
# to get a file position
|
||||||
|
pos = f.tell()
|
||||||
|
form_indices = ()
|
||||||
|
right_indices = ()
|
||||||
|
|
||||||
|
if sprite.form_index_offset:
|
||||||
|
f.seek(sprite.form_index_offset - 0x100000)
|
||||||
|
form_indices = struct.unpack(
|
||||||
|
"<{}H".format(sprite.form_count),
|
||||||
|
f.read(2 * sprite.form_count),
|
||||||
|
)
|
||||||
|
for form, form_idx in enumerate(form_indices):
|
||||||
|
# Ignore the first form, since it's the default and thus
|
||||||
|
# covered by `index` already
|
||||||
|
if form == 0:
|
||||||
|
continue
|
||||||
|
if form_idx == sprite.index:
|
||||||
|
continue
|
||||||
|
assert form_idx not in filenames
|
||||||
|
filenames[form_idx] = "{}-form{}".format(n, form)
|
||||||
|
|
||||||
|
if sprite.right_index_offset:
|
||||||
|
f.seek(sprite.right_index_offset - 0x100000)
|
||||||
|
right_indices = struct.unpack(
|
||||||
|
"<{}H".format(sprite.right_count),
|
||||||
|
f.read(2 * sprite.right_count),
|
||||||
|
)
|
||||||
|
if sprite.form_count:
|
||||||
|
assert sprite.right_count == sprite.form_count
|
||||||
|
for form, (form_idx, right_idx) in enumerate(zip(form_indices, right_indices)):
|
||||||
|
if form_idx == right_idx:
|
||||||
|
continue
|
||||||
|
if form != 0:
|
||||||
|
suffix = "form{}-right".format(form)
|
||||||
|
else:
|
||||||
|
suffix = 'right'
|
||||||
|
assert right_idx not in filenames
|
||||||
|
filenames[right_idx] = "{}-{}".format(n, suffix)
|
||||||
|
else:
|
||||||
|
assert sprite.right_count == 2
|
||||||
|
assert right_indices[0] == right_indices[1]
|
||||||
|
if right_indices[0] != sprite.index:
|
||||||
|
assert right_indices[0] not in filenames
|
||||||
|
filenames[right_indices[0]] = "{}-right".format(n)
|
||||||
|
|
||||||
|
f.seek(pos)
|
||||||
|
|
||||||
|
pokemon_sprites_dir = out
|
||||||
|
if not pokemon_sprites_dir.exists():
|
||||||
|
pokemon_sprites_dir.mkdir()
|
||||||
|
with read_garc(root / 'rom/a/0/9/1') as garc:
|
||||||
|
from .lib.clim import decode_clim
|
||||||
|
for i, subfile in enumerate(garc):
|
||||||
|
if i == 0:
|
||||||
|
# Dummy blank sprite, not interesting to us
|
||||||
|
continue
|
||||||
|
elif i in filenames:
|
||||||
|
filename = filenames[i] + '.png'
|
||||||
|
elif i == len(garc) - 1:
|
||||||
|
# Very last one is egg
|
||||||
|
filename = 'egg.png'
|
||||||
|
else:
|
||||||
|
# This is a duplicate Entei sprite that's not used
|
||||||
|
assert i in (333,)
|
||||||
|
continue
|
||||||
|
|
||||||
|
data = subfile[0].read()
|
||||||
|
width, height, color_depth, pixels = decode_clim(data)
|
||||||
|
png_writer = png.Writer(
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
|
alpha=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# this library is so fucking stupid
|
||||||
|
# TODO strictly speaking we could just write out a paletted PNG directly
|
||||||
|
# TODO add sBIT chunk indicating original bit depth
|
||||||
|
with (pokemon_sprites_dir / filename).open('wb') as f:
|
||||||
|
png_writer.write(f, (itertools.chain(*row) for row in pixels))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_dex_sprites(root, out):
|
||||||
|
# Some Pokémon have dex sprites for their forms, too, and they're all
|
||||||
|
# clustered together, so we have to do a little work to fix the numbering.
|
||||||
|
# Luckily the dex sprites are in the same order as the models
|
||||||
|
# (unsurprising, as they're just model renders), which also tells us what
|
||||||
|
# Pokémon have female forms. The mega evolution map tells us which forms
|
||||||
|
# are megas, and the rest are listed manually above as
|
||||||
|
# ORAS_EXTRA_SPRITE_NAMES.
|
||||||
|
|
||||||
|
# Grab the list of megas first
|
||||||
|
num_megas = {} # pokemonid => number of mega evos
|
||||||
|
with read_garc(root / 'rom/a/1/9/3') as garc:
|
||||||
|
for pokemonid, subfile in enumerate(garc):
|
||||||
|
mega_evos = pokemon_mega_evolutions_struct.parse_stream(subfile[0])
|
||||||
|
num_megas[pokemonid] = max(
|
||||||
|
mega_evo.number for mega_evo in mega_evos)
|
||||||
|
|
||||||
|
# Then construct filenames, using num_megas plus information from the model
|
||||||
|
# index
|
||||||
|
filenames = {} # model/sprite number => filename, sans extension
|
||||||
|
duplicate_filenames = [] # pairs of (copy from, copy to)
|
||||||
|
with read_garc(root / 'rom/a/0/0/8') as garc:
|
||||||
|
f = garc[0][0]
|
||||||
|
# TODO magic number
|
||||||
|
for n in range(721):
|
||||||
|
# Unlike /virtually everywhere else/, Pokémon are zero-indexed here
|
||||||
|
pokemonid = n + 1
|
||||||
|
# Index of the first model (also zero-indexed), how many models the
|
||||||
|
# Pokémon has, and some flags
|
||||||
|
start, count, flags = struct.unpack('<HBB', f.read(4))
|
||||||
|
model_num = start + 1
|
||||||
|
# For some asinine reason, Xerneas is counted as two separate
|
||||||
|
# Pokémon in the dex sprites but not the models, so we have to
|
||||||
|
# shift everything after it back by 1
|
||||||
|
if pokemonid == 716:
|
||||||
|
count = 2
|
||||||
|
elif pokemonid >= 717:
|
||||||
|
model_num += 1
|
||||||
|
|
||||||
|
filenames[model_num] = str(pokemonid)
|
||||||
|
form_count = count - 1 # discount "base" form
|
||||||
|
total_model_count = model_num + count - 1
|
||||||
|
|
||||||
|
# Some "forms" have no real default, so we save the sprite both as
|
||||||
|
# nnn.png and nnn-form.png, to guarantee that nnn.png always exists
|
||||||
|
if pokemonid in ORAS_EXTRA_SPRITE_NAMES:
|
||||||
|
if ORAS_EXTRA_SPRITE_NAMES[pokemonid][0] is not None:
|
||||||
|
duplicate_filenames.append((
|
||||||
|
str(pokemonid),
|
||||||
|
"{}-{}".format(
|
||||||
|
pokemonid, ORAS_EXTRA_SPRITE_NAMES[pokemonid][0]),
|
||||||
|
))
|
||||||
|
|
||||||
|
# Don't know what flag 1 is; everything has it.
|
||||||
|
# Flag 2 means the first alternate form is a female variant.
|
||||||
|
if flags & 2:
|
||||||
|
assert form_count > 0
|
||||||
|
form_count -= 1
|
||||||
|
model_num += 1
|
||||||
|
filenames[model_num] = "female/{}".format(pokemonid)
|
||||||
|
# Flag 4 just means there are more forms?
|
||||||
|
if flags & 4:
|
||||||
|
assert form_count
|
||||||
|
|
||||||
|
assert 1 or 1 == sum((
|
||||||
|
form_count == 0,
|
||||||
|
num_megas[pokemonid] > 0,
|
||||||
|
pokemonid in ORAS_EXTRA_SPRITE_NAMES,
|
||||||
|
))
|
||||||
|
if num_megas[pokemonid]:
|
||||||
|
assert form_count == num_megas[pokemonid]
|
||||||
|
assert pokemonid not in ORAS_EXTRA_SPRITE_NAMES
|
||||||
|
model_num += 1
|
||||||
|
if form_count == 1:
|
||||||
|
filenames[model_num] = "{}-mega".format(pokemonid)
|
||||||
|
else:
|
||||||
|
# Charizard and Mewtwo
|
||||||
|
assert form_count == 2
|
||||||
|
filenames[model_num] = "{}-mega-x".format(pokemonid)
|
||||||
|
filenames[model_num + 1] = "{}-mega-y".format(pokemonid)
|
||||||
|
elif pokemonid in ORAS_EXTRA_SPRITE_NAMES:
|
||||||
|
for form_name in ORAS_EXTRA_SPRITE_NAMES[pokemonid][1:]:
|
||||||
|
model_num += 1
|
||||||
|
filenames[model_num] = "{}-{}".format(pokemonid, form_name)
|
||||||
|
|
||||||
|
# And now, do the ripping
|
||||||
|
# TODO This will save Unown A as 201.png, and not create a 201-a.png
|
||||||
|
pokemon_sprites_dir = out
|
||||||
|
with read_garc(root / 'rom/a/2/6/3') as garc:
|
||||||
|
from .lib.clim import decode_clim
|
||||||
|
for i, subfile in enumerate(garc):
|
||||||
|
shiny_prefix = ''
|
||||||
|
if i > total_model_count:
|
||||||
|
i -= total_model_count
|
||||||
|
shiny_prefix = 'shiny/'
|
||||||
|
|
||||||
|
if i == 0:
|
||||||
|
# Dummy blank sprite, not interesting to us
|
||||||
|
continue
|
||||||
|
elif 37 <= i <= 41:
|
||||||
|
# Cosplay Pikachu's outfits -- the sprites are blank, so saving
|
||||||
|
# these is not particularly useful
|
||||||
|
continue
|
||||||
|
elif i in filenames:
|
||||||
|
filename = shiny_prefix + filenames[i] + '.png'
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Can't find a filename for sprite number {}".format(i))
|
||||||
|
|
||||||
|
data = subfile[0].read()
|
||||||
|
width, height, color_depth, pixels = decode_clim(data)
|
||||||
|
png_writer = png.Writer(
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
|
alpha=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# this library is so fucking stupid
|
||||||
|
# TODO strictly speaking we could just write out a paletted PNG directly
|
||||||
|
# TODO add sBIT chunk indicating original bit depth
|
||||||
|
path = pokemon_sprites_dir / filename
|
||||||
|
parent = path.parent
|
||||||
|
if not parent.exists():
|
||||||
|
parent.mkdir(parents=False)
|
||||||
|
|
||||||
|
with path.open('wb') as f:
|
||||||
|
png_writer.write(f, (itertools.chain(*row) for row in pixels))
|
||||||
|
|
||||||
|
for source, dest in duplicate_filenames:
|
||||||
|
shutil.copyfile(
|
||||||
|
str(pokemon_sprites_dir / source) + '.png',
|
||||||
|
str(pokemon_sprites_dir / dest) + '.png',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _munge_source_arg(strpath):
|
||||||
|
path = Path(strpath)
|
||||||
|
if not path.is_dir():
|
||||||
|
raise argparse.ArgumentTypeError(
|
||||||
|
"{!r} is not a directory".format(strpath))
|
||||||
|
|
||||||
|
# TODO something something romfs, exefs
|
||||||
|
return path
|
||||||
|
|
||||||
|
def make_arg_parser():
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument('what', choices=('data', 'dex-sprites', 'box-sprites'), help='what to extract')
|
||||||
|
# TODO should verify that this is an actual game dump, and find the rom/exe
|
||||||
|
p.add_argument('source', type=_munge_source_arg, help='path to an unpacked game image')
|
||||||
|
p.add_argument('dest', type=_munge_source_arg, help='directory to dump the results into')
|
||||||
|
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
parser = make_arg_parser()
|
||||||
|
args = parser.parse_args(args)
|
||||||
|
|
||||||
|
# TODO support 'all', and just make some subdirectories per thing
|
||||||
|
# TODO or maybe merge all the sprite things together since stuff will need moving around anyway idk
|
||||||
|
if args.what == 'data':
|
||||||
|
extract_data(args.source, args.dest)
|
||||||
|
elif args.what == 'dex-sprites':
|
||||||
|
extract_dex_sprites(args.source, args.dest)
|
||||||
|
elif args.what == 'box-sprites':
|
||||||
|
extract_box_sprites(args.source, args.dest)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
main(sys.argv[1:])
|
Loading…
Reference in a new issue