From 949eafb957359dee2628a1878ec144bc6a693a84 Mon Sep 17 00:00:00 2001 From: "Eevee (Lexy Munroe)" Date: Fri, 26 Feb 2016 10:05:51 -0800 Subject: [PATCH] Initial gen6-to-yaml ripping stuff --- pokedex/extract/__init__.py | 0 pokedex/extract/lib/__init__.py | 0 pokedex/extract/lib/base.py | 87 ++++ pokedex/extract/lib/clim.py | 182 +++++++ pokedex/extract/lib/garc.py | 307 ++++++++++++ pokedex/extract/lib/lzss3.py | 287 +++++++++++ pokedex/extract/lib/pc.py | 19 + pokedex/extract/lib/text.py | 115 +++++ pokedex/extract/oras.py | 844 ++++++++++++++++++++++++++++++++ 9 files changed, 1841 insertions(+) create mode 100644 pokedex/extract/__init__.py create mode 100644 pokedex/extract/lib/__init__.py create mode 100644 pokedex/extract/lib/base.py create mode 100644 pokedex/extract/lib/clim.py create mode 100644 pokedex/extract/lib/garc.py create mode 100644 pokedex/extract/lib/lzss3.py create mode 100644 pokedex/extract/lib/pc.py create mode 100644 pokedex/extract/lib/text.py create mode 100644 pokedex/extract/oras.py diff --git a/pokedex/extract/__init__.py b/pokedex/extract/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pokedex/extract/lib/__init__.py b/pokedex/extract/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pokedex/extract/lib/base.py b/pokedex/extract/lib/base.py new file mode 100644 index 0000000..d67f1fd --- /dev/null +++ b/pokedex/extract/lib/base.py @@ -0,0 +1,87 @@ +"""Base or helper classes used a lot for dealing with file formats. +""" +import io +import struct + + +class Substream: + """Wraps a stream and pretends it starts at an offset other than 0. + + Partly implements the file interface. + + This type always seeks before reading, but doesn't do so afterwards, so + interleaving reads with the underlying stream may not do what you want. + """ + def __init__(self, stream, offset=0, length=-1): + if isinstance(stream, Substream): + self.stream = stream.stream + self.offset = offset + stream.offset + else: + self.stream = stream + self.offset = offset + + self.length = length + self.pos = 0 + + def __repr__(self): + return "<{} of {} at {}>".format( + type(self).__name__, self.stream, self.offset) + + def read(self, n=-1): + self.stream.seek(self.offset + self.pos) + if n < 0: + n = self.length + elif self.length >= 0 and n > self.length: + n = self.length + data = self.stream.read(n) + self.pos += len(data) + return data + + def seek(self, offset): + offset = max(offset, 0) + if self.length >= 0: + offset = min(offset, self.length) + self.stream.seek(self.offset + offset) + self.pos = self.tell() + + def tell(self): + return self.stream.tell() - self.offset + + def __len__(self): + if self.length < 0: + pos = self.stream.tell() + self.stream.seek(0, io.SEEK_END) + parent_length = self.stream.tell() + self.stream.seek(pos) + return parent_length - self.offset + else: + return self.length + + def peek(self, n): + pos = self.stream.tell() + self.stream.seek(self.offset + self.pos) + data = self.stream.read(n) + self.stream.seek(pos) + return data + + def unpack(self, fmt): + """Unpacks a struct format from the current position in the stream.""" + data = self.read(struct.calcsize(fmt)) + return struct.unpack(fmt, data) + + def slice(self, offset, length=-1): + # TODO limit or warn if length is too long for this slice? + return Substream(self, self.offset + offset, length) + + +class _ContainerFile: + slices = () + + def __len__(self): + return len(self.slices) + + def __iter__(self): + return iter(self.slices) + + def __getitem__(self, key): + return self.slices[key] diff --git a/pokedex/extract/lib/clim.py b/pokedex/extract/lib/clim.py new file mode 100644 index 0000000..6d5b540 --- /dev/null +++ b/pokedex/extract/lib/clim.py @@ -0,0 +1,182 @@ +import math +import struct + +import construct as c + +clim_header_struct = c.Struct( + 'clim_header', + c.Magic(b'CLIM'), + c.Const(c.ULInt16('endianness'), 0xfeff), + c.Const(c.ULInt16('header_length'), 0x14), + c.ULInt32('version'), + c.ULInt32('file_size'), + c.ULInt32('blocks_ct'), +) +imag_header_struct = c.Struct( + 'imag_header', + c.Magic(b'imag'), + c.Const(c.ULInt32('section_length'), 0x10), + c.ULInt16('width'), + c.ULInt16('height'), + c.Enum( + c.ULInt32('format'), + L8=0, + A8=1, + LA4=2, + LA8=3, + HILO8=4, + RGB565=5, + RGB8=6, + RGBA5551=7, + RGBA4=8, + RGBA8=9, + ETC1=10, + ETC1A4=11, + L4=12, + A4=13, + #ETC1=19, + ) +) + + +COLOR_DECODERS = {} + + +def _register_color_decoder(name, *, bpp, depth): + def register(f): + COLOR_DECODERS[name] = f, bpp, depth + return f + return register + + +@_register_color_decoder('RGBA4', bpp=2, depth=4) +def decode_rgba4(data): + # The idea is that every uint16 is a packed rrrrggggbbbbaaaa, but when + # written out little-endian this becomes bbbbaaaarrrrgggg and there's just + # no pretty way to deal with this + for i in range(0, len(data), 2): + ba = data[i] + rg = data[i + 1] + r = (((rg & 0xf0) >> 4) * 255 + 7) // 15 + g = (((rg & 0x0f) >> 0) * 255 + 7) // 15 + b = (((ba & 0xf0) >> 4) * 255 + 7) // 15 + a = (((ba & 0x0f) >> 0) * 255 + 7) // 15 + yield r, g, b, a + + +@_register_color_decoder('RGBA5551', bpp=2, depth=5) +def decode_rgba5551(data, *, start=0, count=None): + # I am extremely irritated that construct cannot parse this mess for me + # rrrrrgggggbbbbba + if count is None: + end = len(data) + else: + end = start + count * 2 + + for i in range(start, end, 2): + datum = data[i] + data[i + 1] * 256 + r = (((datum >> 11) & 0x1f) * 255 + 15) // 31 + g = (((datum >> 6) & 0x1f) * 255 + 15) // 31 + b = (((datum >> 1) & 0x1f) * 255 + 15) // 31 + a = (datum & 0x1) * 255 + yield r, g, b, a + + +del _register_color_decoder + + +def apply_palette(palette, data, *, start=0): + # TODO i am annoyed that this does a pointless copy, but i assume islice() + # has even more overhead... + if start != 0: + data = data[start:] + + if len(palette) <= 16: + # Short palettes allow cramming two pixels into each byte + return ( + palette[idx] + for byte in data + for idx in (byte >> 4, byte & 0x0f) + ) + else: + return map(palette.__getitem__, data) + + +def untile_pixels(raw_pixels, width, height): + """Unscramble pixels into plain old rows. + + The pixels are arranged in 8×8 tiles, and each tile is a third- + iteration Z-order curve. + + Taken from: https://github.com/Zhorken/pokemon-x-y-icons/ + """ + + # Images are stored padded to powers of two + stored_width = 2 ** math.ceil(math.log(width) / math.log(2)) + stored_height = 2 ** math.ceil(math.log(height) / math.log(2)) + num_pixels = stored_width * stored_height + tile_width = stored_width // 8 + + pixels = [ + [None for x in range(width)] + for y in range(height) + ] + + for n, pixel in enumerate(raw_pixels): + if n >= num_pixels: + break + + # Find the coordinates of the top-left corner of the current tile. + # n.b. The image is eight tiles wide, and each tile is 8×8 pixels. + tile_num = n // 64 + tile_y = tile_num // tile_width * 8 + tile_x = tile_num % tile_width * 8 + + # Determine the pixel's coordinates within the tile + # http://en.wikipedia.org/wiki/Z-order_curve#Coordinate_values + within_tile = n % 64 + + sub_x = ( + (within_tile & 0b000001) | + (within_tile & 0b000100) >> 1 | + (within_tile & 0b010000) >> 2 + ) + sub_y = ( + (within_tile & 0b000010) >> 1 | + (within_tile & 0b001000) >> 2 | + (within_tile & 0b100000) >> 3 + ) + + # Add up the pixel's coordinates within the whole image + x = tile_x + sub_x + y = tile_y + sub_y + + if x < width and y < height: + pixels[y][x] = pixel + + return pixels + + +def decode_clim(data): + imag_header = imag_header_struct.parse(data[-20:]) + if imag_header.format not in COLOR_DECODERS: + raise ValueError( + "don't know how to decode {} pixels".format(imag_header.format)) + color_decoder, color_bpp, color_depth = COLOR_DECODERS[imag_header.format] + + mode, = struct.unpack_from('>= 1 + return c + + +garc_header_struct = c.Struct( + 'garc_header', + c.Magic(b'CRAG'), + c.Const(c.ULInt32('header_size'), 0x1c), + c.Const(c.ULInt16('byte_order'), 0xfeff), + c.Const(c.ULInt16('mystery1'), 0x0400), + c.Const(c.ULInt32('chunks_ct'), 4), + c.ULInt32('data_offset'), + c.ULInt32('garc_length'), + c.ULInt32('last_length'), +) +fato_header_struct = c.Struct( + 'fato_header', + c.Magic(b'OTAF'), + c.ULInt32('header_size'), + c.ULInt16('count'), + c.Const(c.ULInt16('padding'), 0xffff), + c.Array( + lambda ctx: ctx.count, + c.ULInt32('fatb_offsets'), + ), +) +fatb_header_struct = c.Struct( + 'fatb_header', + c.Magic(b'BTAF'), + c.ULInt32('fatb_length'), + c.ULInt32('count'), +) + + +class GARCFile(_ContainerFile): + def __init__(self, stream): + self.stream = stream = Substream(stream) + + garc_header = garc_header_struct.parse_stream(self.stream) + # FATO (file allocation table... offsets?) + fato_header = fato_header_struct.parse_stream(self.stream) + # FATB (file allocation table) + fatb_header = fatb_header_struct.parse_stream(self.stream) + + fatb_start = garc_header.header_size + fato_header.header_size + assert stream.tell() == fatb_start + 12 + + self.slices = [] + for i, offset in enumerate(fato_header.fatb_offsets): + stream.seek(fatb_start + offset + 12) + + slices = [] + bits, = struct.unpack('>= 1 + + self.slices.append(GARCEntry(stream, slices)) + + # FIMB + stream.seek(fatb_start + fatb_header.fatb_length) + magic, fimb_header_length, fimb_length = struct.unpack( + '<4s2L', stream.read(12)) + assert magic == b'BMIF' + assert fimb_header_length == 0xC + + +class GARCEntry(object): + def __init__(self, stream, slices): + self.stream = stream + self.slices = slices + + def __getitem__(self, i): + start, length = self.slices[i] + ss = self.stream.slice(start, length) + if ss.peek(1) in [b'\x10', b'\x11']: + # XXX this sucks but there's no real way to know for sure whether + # data is compressed or not. maybe just bake this into the caller + # and let them deal with it, same way we do with text decoding? + # TODO it would be nice if this could be done lazily for 'inspect' + # purposes, since the first four bytes are enough to tell you the + # size + try: + data = lzss3.decompress_bytes(ss.read()) + except Exception: + ss.seek(0) + else: + return Substream(BytesIO(data)) + return ss + + def __len__(self): + return len(self.slices) + + +XY_CHAR_MAP = { + 0x307f: 0x202f, # nbsp + 0xe08d: 0x2026, # ellipsis + 0xe08e: 0x2642, # female sign + 0xe08f: 0x2640, # male sign +} + +XY_VAR_NAMES = { + 0xff00: "COLOR", + 0x0100: "TRNAME", + 0x0101: "PKNAME", + 0x0102: "PKNICK", + 0x0103: "TYPE", + 0x0105: "LOCATION", + 0x0106: "ABILITY", + 0x0107: "MOVE", + 0x0108: "ITEM1", + 0x0109: "ITEM2", + 0x010a: "sTRBAG", + 0x010b: "BOX", + 0x010d: "EVSTAT", + 0x0110: "OPOWER", + 0x0127: "RIBBON", + 0x0134: "MIINAME", + 0x013e: "WEATHER", + 0x0189: "TRNICK", + 0x018a: "1stchrTR", + 0x018b: "SHOUTOUT", + 0x018e: "BERRY", + 0x018f: "REMFEEL", + 0x0190: "REMQUAL", + 0x0191: "WEBSITE", + 0x019c: "CHOICECOS", + 0x01a1: "GSYNCID", + 0x0192: "PRVIDSAY", + 0x0193: "BTLTEST", + 0x0195: "GENLOC", + 0x0199: "CHOICEFOOD", + 0x019a: "HOTELITEM", + 0x019b: "TAXISTOP", + 0x019f: "MAISTITLE", + 0x1000: "ITEMPLUR0", + 0x1001: "ITEMPLUR1", + 0x1100: "GENDBR", + 0x1101: "NUMBRNCH", + 0x1302: "iCOLOR2", + 0x1303: "iCOLOR3", + 0x0200: "NUM1", + 0x0201: "NUM2", + 0x0202: "NUM3", + 0x0203: "NUM4", + 0x0204: "NUM5", + 0x0205: "NUM6", + 0x0206: "NUM7", + 0x0207: "NUM8", + 0x0208: "NUM9", +} + + +def _xy_inner_keygen(key): + while True: + yield key + key = ((key << 3) | (key >> 13)) & 0xffff + + +def _xy_outer_keygen(): + key = 0x7c89 + while True: + yield _xy_inner_keygen(key) + key = (key + 0x2983) & 0xffff + + +def decrypt_xy_text(data): + text_sections, lines, length, initial_key, section_data = struct.unpack_from( + '> 7) & 1, + (byte >> 6) & 1, + (byte >> 5) & 1, + (byte >> 4) & 1, + (byte >> 3) & 1, + (byte >> 2) & 1, + (byte >> 1) & 1, + (byte) & 1) + + +def decompress_raw_lzss10(indata, decompressed_size, _overlay=False): + """Decompress LZSS-compressed bytes. Returns a bytearray.""" + data = bytearray() + + it = iter(indata) + + if _overlay: + disp_extra = 3 + else: + disp_extra = 1 + + def writebyte(b): + data.append(b) + + def readbyte(): + return next(it) + + def readshort(): + # big-endian + a = next(it) + b = next(it) + return (a << 8) | b + + def copybyte(): + data.append(next(it)) + + while len(data) < decompressed_size: + b = readbyte() + flags = bits(b) + for flag in flags: + if flag == 0: + copybyte() + elif flag == 1: + sh = readshort() + count = (sh >> 0xc) + 3 + disp = (sh & 0xfff) + disp_extra + + for _ in range(count): + writebyte(data[-disp]) + else: + raise ValueError(flag) + + if decompressed_size <= len(data): + break + + if len(data) != decompressed_size: + raise DecompressionError( + "decompressed size does not match the expected size") + + return data + + +def decompress_raw_lzss11(indata, decompressed_size): + """Decompress LZSS-compressed bytes. Returns a bytearray.""" + data = bytearray() + + it = iter(indata) + + def writebyte(b): + data.append(b) + + def readbyte(): + return next(it) + + def copybyte(): + data.append(next(it)) + + while len(data) < decompressed_size: + b = readbyte() + flags = bits(b) + for flag in flags: + if flag == 0: + copybyte() + elif flag == 1: + b = readbyte() + indicator = b >> 4 + + if indicator == 0: + # 8 bit count, 12 bit disp + # indicator is 0, don't need to mask b + count = (b << 4) + b = readbyte() + count += b >> 4 + count += 0x11 + elif indicator == 1: + # 16 bit count, 12 bit disp + count = ((b & 0xf) << 12) + (readbyte() << 4) + b = readbyte() + count += b >> 4 + count += 0x111 + else: + # indicator is count (4 bits), 12 bit disp + count = indicator + count += 1 + + disp = ((b & 0xf) << 8) + readbyte() + disp += 1 + + try: + for _ in range(count): + writebyte(data[-disp]) + except IndexError: + raise Exception(count, disp, len(data), sum(1 for x in it)) + else: + raise ValueError(flag) + + if decompressed_size <= len(data): + break + + if len(data) != decompressed_size: + raise DecompressionError( + "decompressed size does not match the expected size") + + return data + + +def decompress_overlay(f, out): + # the compression header is at the end of the file + f.seek(-8, SEEK_END) + header = f.read(8) + + # decompression goes backwards. + # end < here < start + + # end_delta == here - decompression end address + # start_delta == decompression start address - here + end_delta, start_delta = unpack("> 0x18 + end_delta &= 0xFFFFFF + decompressed_size = start_delta + end_delta + + f.seek(-end_delta, SEEK_END) + + data = bytearray() + data.extend(f.read(end_delta - padding)) + data.reverse() + + uncompressed_data = decompress_raw_lzss10( + data, decompressed_size, _overlay=True) + uncompressed_data.reverse() + + # first we write up to the portion of the file which was "overwritten" by + # the decompressed data, then the decompressed data itself. + # i wonder if it's possible for decompression to overtake the compressed + # data, so that the decompression code is reading its own output... + f.seek(0, SEEK_SET) + out.write(f.read(filelen - end_delta)) + out.write(uncompressed_data) + + +def decompress(obj): + """Decompress LZSS-compressed bytes or a file-like object. + + Shells out to decompress_file() or decompress_bytes() depending on + whether or not the passed-in object has a 'read' attribute or not. + + Returns a bytearray.""" + if hasattr(obj, 'read'): + return decompress_file(obj) + else: + return decompress_bytes(obj) + + +def decompress_bytes(data): + """Decompress LZSS-compressed bytes. Returns a bytearray.""" + header = data[:4] + if header[0] == 0x10: + decompress_raw = decompress_raw_lzss10 + elif header[0] == 0x11: + decompress_raw = decompress_raw_lzss11 + else: + raise DecompressionError("not as lzss-compressed file") + + decompressed_size, = unpack("= len(identifiers['move']): + break + moves.append(identifiers['move'][moveid]) + + # TMs + machines = [] + f.seek(0x004a67ee) + machineids = struct.unpack('<107H', f.read(2 * 107)) + # Order appears to be based on some gen 4 legacy: TMs 1 through 92, HMs + # 1 through 6, then the other eight TMs and the last HM. But the bits + # in the Pokémon structs are in the expected order of 1 through 100, 1 + # through 7 + machines = [ + identifiers['move'][moveid] + for moveid in + machineids[0:92] + + machineids[98:106] + + machineids[92:98] + + machineids[106:] + ] + + + # ------------------------------------------------------------------------- + # Pokémon structs + pokemon_data = [] + with read_garc(root / 'rom/a/1/9/5') as garc: + personals = [subfile[0].read() for subfile in garc] + _pokemon_forms = {} # "real" species id => (base species id, form name id) + _next_name_form_id = 723 + for i, personal in enumerate(personals[:-1]): + record = pokemon_struct.parse(personal) + # TODO transform to an OD somehow probably + pokemon_data.append(record) + #print("{:3d} {:15s} {} {:5d} {:5d}".format( + # i, + # identifiers['species'][baseid], + # ('0'*16 + bin(record.mystery1)[2:])[-16:], + # record.mystery2, + # record.stage, + #)) + # TODO some pokemon have sprite starts but no species start, because their sprites vary obv + if record.form_count > 1: + # The form names appear to be all just jammed at the end in order, + # completely unrelated to either of the "start" offsets here + for offset in range(record.form_count - 1): + #form_name = texts['en']['form-names'][_next_name_form_id] + + if record.form_species_start: + # TODO still no idea how "intangible" forms are being + # handled in the new schema + _pokemon_forms[record.form_species_start + offset] = i, _next_name_form_id + + _next_name_form_id += 1 + + if record.form_species_start: + for offset in range(record.form_count - 1): + # TODO grab the form names argh + identifiers['species'][record.form_species_start + offset] = identifiers['species'][i] + + #for i in range(723, 825 + 1): + # base_species_id, form_name_id = _pokemon_forms[i] + # species_name = texts['en']['species-names'][base_species_id] + # form_name = texts['en']['form-names'][form_name_id] + # print(i, species_name, '/', form_name) + + # ------------------------------------------------------------------------- + # Move stats + movesets = OrderedDict() + with read_garc(root / 'rom/a/1/8/9') as garc: + # Only one subfile + data = garc[0][0].read() + container = move_container_struct.parse(data) + for n, record in enumerate(container.records): + m = record.move + # TODO with the release of oras all moves have contest types and effects again! where are they?? + #print("{:3d} {:20s} | {m.type:3d} {m.power:3d} {m.pp:2d} {m.accuracy:3d} / {m.priority:2d} {m.range:2d} {m.damage_class:1d} / {m.effect:3d} {m.caused_effect:3d} {m.effect_chance:3d} -- {m.status:3d} {m.min_turns:3d} {m.max_turns:3d} {m.crit_rate:3d} {m.flinch_chance:3d} {m.recoil:4d} {m.healing:3d} / {m.stat_change:06x} {m.stat_amount:06x} {m.stat_chance:06x} / {m.padding0:3d} {m.padding1:3d} {m.flags:04x} {m.padding2:3d} {m.extra:3d}".format( + # n, + # identifiers['move'][n], + # m=record.move, + #)) + + # Egg moves + with read_garc(root / 'rom/a/1/9/0') as garc: + for i, subfile in enumerate(garc): + ident = identifiers['species'][i] + data = subfile[0].read() + if not data: + continue + container = egg_moves_struct.parse(data) + moveset = movesets.setdefault(ident, OrderedDict()) + eggset = moveset['egg'] = [] + for moveid in container.moveids: + eggset.append(identifiers['move'][moveid]) + + # Level-up moves + with read_garc(root / 'rom/a/1/9/1') as garc: + for i, subfile in enumerate(garc): + ident = identifiers['species'][i] + level_up_moves = subfile[0].read() + moveset = movesets.setdefault(ident, OrderedDict()) + levelset = moveset['level'] = [] + lastlevel = None + order = 1 + for pair in level_up_moves_struct.parse(level_up_moves): + # End is indicated with -1, -1 + if pair.moveid <= 0: + break + levelset.append(( + pair.level, + identifiers['move'][pair.moveid], + )) + + if pair.level == lastlevel: + order += 1 + else: + lastlevel = pair.level + order = 1 + + # Evolution + #with read_garc(root / 'rom/a/1/9/2') as garc: + # for subfile in garc: + # evolution = subfile[0].read() + # print(repr(evolution)) + # Mega evolution + #with read_garc(root / 'rom/a/1/9/3') as garc: + # for subfile in garc: + # evolution = subfile[0].read() + # print(repr(evolution)) + # TODO what is a/1/9/4? 8 files of 404 bytes each + # Baby Pokémon + #with read_garc(root / 'rom/a/1/9/6') as garc: + # for subfile in garc: + # baby_pokemon = subfile[0].read() + # print(repr(baby_pokemon)) + # Item stats + #with read_garc(root / 'rom/a/1/9/7') as garc: + # for subfile in garc: + # item_stats = subfile[0].read() + # print(repr(item_stats)) + + # Tutor moves (from the personal structs) + for i, datum in enumerate(pokemon_data): + ident = identifiers['species'][i] + moveset = movesets.setdefault(ident, OrderedDict()) + tutorset = moveset['tutor'] = [] + for key, tutors in tutor_moves.items(): + for bit, moveident in enumerate(tutors): + if moveident in ORAS_UNUSED_MOVE_TUTORS: + continue + if not datum[key] & (1 << bit): + continue + tutorset.append(moveident) + + # TMs + machineset = moveset['machine'] = [] + for bit, moveident in enumerate(machines): + if not datum['machines'] & (1 << bit): + continue + machineset.append(moveident) + + with (out / 'movesets.yaml').open('w') as f: + dump_to_yaml(movesets, f) + + +def extract_box_sprites(root, out): + filenames = {} + with (root / 'exe/code.bin').open('rb') as f: + # Form configuration, used to put sprites in the right order + # NOTE: in x/y the address is 0x0043ea98 + f.seek(0x0047d650) + # TODO need to do a different thing for main sprites + # TODO magic number + for n in range(722): + sprite = pokemon_sprite_struct.parse_stream(f) + assert sprite.index not in filenames + filenames[sprite.index] = "{}".format(n) + if sprite.female_index != sprite.index: + assert sprite.female_index not in filenames + filenames[sprite.female_index] = "{}-female".format(n) + # Note that these addresses are relative to RAM, and the binary is + # loaded into RAM starting at 0x100000, so we need to subtract that + # to get a file position + pos = f.tell() + form_indices = () + right_indices = () + + if sprite.form_index_offset: + f.seek(sprite.form_index_offset - 0x100000) + form_indices = struct.unpack( + "<{}H".format(sprite.form_count), + f.read(2 * sprite.form_count), + ) + for form, form_idx in enumerate(form_indices): + # Ignore the first form, since it's the default and thus + # covered by `index` already + if form == 0: + continue + if form_idx == sprite.index: + continue + assert form_idx not in filenames + filenames[form_idx] = "{}-form{}".format(n, form) + + if sprite.right_index_offset: + f.seek(sprite.right_index_offset - 0x100000) + right_indices = struct.unpack( + "<{}H".format(sprite.right_count), + f.read(2 * sprite.right_count), + ) + if sprite.form_count: + assert sprite.right_count == sprite.form_count + for form, (form_idx, right_idx) in enumerate(zip(form_indices, right_indices)): + if form_idx == right_idx: + continue + if form != 0: + suffix = "form{}-right".format(form) + else: + suffix = 'right' + assert right_idx not in filenames + filenames[right_idx] = "{}-{}".format(n, suffix) + else: + assert sprite.right_count == 2 + assert right_indices[0] == right_indices[1] + if right_indices[0] != sprite.index: + assert right_indices[0] not in filenames + filenames[right_indices[0]] = "{}-right".format(n) + + f.seek(pos) + + pokemon_sprites_dir = out + if not pokemon_sprites_dir.exists(): + pokemon_sprites_dir.mkdir() + with read_garc(root / 'rom/a/0/9/1') as garc: + from .lib.clim import decode_clim + for i, subfile in enumerate(garc): + if i == 0: + # Dummy blank sprite, not interesting to us + continue + elif i in filenames: + filename = filenames[i] + '.png' + elif i == len(garc) - 1: + # Very last one is egg + filename = 'egg.png' + else: + # This is a duplicate Entei sprite that's not used + assert i in (333,) + continue + + data = subfile[0].read() + width, height, color_depth, pixels = decode_clim(data) + png_writer = png.Writer( + width=width, + height=height, + alpha=True, + ) + + # this library is so fucking stupid + # TODO strictly speaking we could just write out a paletted PNG directly + # TODO add sBIT chunk indicating original bit depth + with (pokemon_sprites_dir / filename).open('wb') as f: + png_writer.write(f, (itertools.chain(*row) for row in pixels)) + + +def extract_dex_sprites(root, out): + # Some Pokémon have dex sprites for their forms, too, and they're all + # clustered together, so we have to do a little work to fix the numbering. + # Luckily the dex sprites are in the same order as the models + # (unsurprising, as they're just model renders), which also tells us what + # Pokémon have female forms. The mega evolution map tells us which forms + # are megas, and the rest are listed manually above as + # ORAS_EXTRA_SPRITE_NAMES. + + # Grab the list of megas first + num_megas = {} # pokemonid => number of mega evos + with read_garc(root / 'rom/a/1/9/3') as garc: + for pokemonid, subfile in enumerate(garc): + mega_evos = pokemon_mega_evolutions_struct.parse_stream(subfile[0]) + num_megas[pokemonid] = max( + mega_evo.number for mega_evo in mega_evos) + + # Then construct filenames, using num_megas plus information from the model + # index + filenames = {} # model/sprite number => filename, sans extension + duplicate_filenames = [] # pairs of (copy from, copy to) + with read_garc(root / 'rom/a/0/0/8') as garc: + f = garc[0][0] + # TODO magic number + for n in range(721): + # Unlike /virtually everywhere else/, Pokémon are zero-indexed here + pokemonid = n + 1 + # Index of the first model (also zero-indexed), how many models the + # Pokémon has, and some flags + start, count, flags = struct.unpack('= 717: + model_num += 1 + + filenames[model_num] = str(pokemonid) + form_count = count - 1 # discount "base" form + total_model_count = model_num + count - 1 + + # Some "forms" have no real default, so we save the sprite both as + # nnn.png and nnn-form.png, to guarantee that nnn.png always exists + if pokemonid in ORAS_EXTRA_SPRITE_NAMES: + if ORAS_EXTRA_SPRITE_NAMES[pokemonid][0] is not None: + duplicate_filenames.append(( + str(pokemonid), + "{}-{}".format( + pokemonid, ORAS_EXTRA_SPRITE_NAMES[pokemonid][0]), + )) + + # Don't know what flag 1 is; everything has it. + # Flag 2 means the first alternate form is a female variant. + if flags & 2: + assert form_count > 0 + form_count -= 1 + model_num += 1 + filenames[model_num] = "female/{}".format(pokemonid) + # Flag 4 just means there are more forms? + if flags & 4: + assert form_count + + assert 1 or 1 == sum(( + form_count == 0, + num_megas[pokemonid] > 0, + pokemonid in ORAS_EXTRA_SPRITE_NAMES, + )) + if num_megas[pokemonid]: + assert form_count == num_megas[pokemonid] + assert pokemonid not in ORAS_EXTRA_SPRITE_NAMES + model_num += 1 + if form_count == 1: + filenames[model_num] = "{}-mega".format(pokemonid) + else: + # Charizard and Mewtwo + assert form_count == 2 + filenames[model_num] = "{}-mega-x".format(pokemonid) + filenames[model_num + 1] = "{}-mega-y".format(pokemonid) + elif pokemonid in ORAS_EXTRA_SPRITE_NAMES: + for form_name in ORAS_EXTRA_SPRITE_NAMES[pokemonid][1:]: + model_num += 1 + filenames[model_num] = "{}-{}".format(pokemonid, form_name) + + # And now, do the ripping + # TODO This will save Unown A as 201.png, and not create a 201-a.png + pokemon_sprites_dir = out + with read_garc(root / 'rom/a/2/6/3') as garc: + from .lib.clim import decode_clim + for i, subfile in enumerate(garc): + shiny_prefix = '' + if i > total_model_count: + i -= total_model_count + shiny_prefix = 'shiny/' + + if i == 0: + # Dummy blank sprite, not interesting to us + continue + elif 37 <= i <= 41: + # Cosplay Pikachu's outfits -- the sprites are blank, so saving + # these is not particularly useful + continue + elif i in filenames: + filename = shiny_prefix + filenames[i] + '.png' + else: + raise ValueError( + "Can't find a filename for sprite number {}".format(i)) + + data = subfile[0].read() + width, height, color_depth, pixels = decode_clim(data) + png_writer = png.Writer( + width=width, + height=height, + alpha=True, + ) + + # this library is so fucking stupid + # TODO strictly speaking we could just write out a paletted PNG directly + # TODO add sBIT chunk indicating original bit depth + path = pokemon_sprites_dir / filename + parent = path.parent + if not parent.exists(): + parent.mkdir(parents=False) + + with path.open('wb') as f: + png_writer.write(f, (itertools.chain(*row) for row in pixels)) + + for source, dest in duplicate_filenames: + shutil.copyfile( + str(pokemon_sprites_dir / source) + '.png', + str(pokemon_sprites_dir / dest) + '.png', + ) + + +def _munge_source_arg(strpath): + path = Path(strpath) + if not path.is_dir(): + raise argparse.ArgumentTypeError( + "{!r} is not a directory".format(strpath)) + + # TODO something something romfs, exefs + return path + +def make_arg_parser(): + p = argparse.ArgumentParser() + p.add_argument('what', choices=('data', 'dex-sprites', 'box-sprites'), help='what to extract') + # TODO should verify that this is an actual game dump, and find the rom/exe + p.add_argument('source', type=_munge_source_arg, help='path to an unpacked game image') + p.add_argument('dest', type=_munge_source_arg, help='directory to dump the results into') + + return p + + +def main(args): + parser = make_arg_parser() + args = parser.parse_args(args) + + # TODO support 'all', and just make some subdirectories per thing + # TODO or maybe merge all the sprite things together since stuff will need moving around anyway idk + if args.what == 'data': + extract_data(args.source, args.dest) + elif args.what == 'dex-sprites': + extract_dex_sprites(args.source, args.dest) + elif args.what == 'box-sprites': + extract_box_sprites(args.source, args.dest) + + +if __name__ == '__main__': + import sys + main(sys.argv[1:])