From 053f2a8d22124e2931ddfcaedd48bc4ec6e5deb3 Mon Sep 17 00:00:00 2001 From: "Eevee (Lexy Munroe)" Date: Thu, 5 Jan 2017 04:57:05 -0800 Subject: [PATCH] Use YAML schema for gen 6/7; add gen7 form names; improved image support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specifically: - Add support for detecting FLIM format - Add support for more color formats - Add a small decoded image type that knows how to write itself out as a PNG - Improve ETC1 decoder to work with images whose dimensions are not powers of two, images with no alpha channel, and images with the strange FLIM pixel order - Port the gen 6/7 extractor to Construct 2.8 - Switch to using script tags in language names, to distinguish Japanese kana from kanji and Simplified from Traditional Chinese - Drop the load-time merging of kanji and kana - Add paths to various text files in SUMO - Add form names for SUMO Pokémon - Clean up identifiers a bit, especially the distinction between species and Pokémon - Use the Pokémon schema type to dump what we have so far, and give it a couple more fields that didn't exist in gen 1 - Get movesets dumping correctly - Special-case a bunch of weirdness, where the number of dex sprites doesn't match the number of models in SUMO --- pokedex/extract/lib/clim.py | 222 ++++++++- pokedex/extract/lib/etc1.py | 58 ++- pokedex/extract/lib/garc.py | 48 +- pokedex/extract/lib/pc.py | 2 +- pokedex/extract/oras.py | 870 ++++++++++++++++++++++-------------- pokedex/schema.py | 21 + 6 files changed, 846 insertions(+), 375 deletions(-) diff --git a/pokedex/extract/lib/clim.py b/pokedex/extract/lib/clim.py index 3723b50..3329855 100644 --- a/pokedex/extract/lib/clim.py +++ b/pokedex/extract/lib/clim.py @@ -1,3 +1,5 @@ +import io +import itertools import math import struct @@ -17,7 +19,7 @@ imag_header_struct = c.Struct( 'section_length' / c.Const(c.Int32ul, 0x10), 'width' / c.Int16ul, 'height' / c.Int16ul, - 'format' / c.Int32ul, + #'format' / c.Int32ul, # TODO this seems to have been expanded into several things in SUMO #c.Enum( # c.ULInt32('format'), @@ -37,13 +39,35 @@ imag_header_struct = c.Struct( # A4=13, # #ETC1=19, #) + 'unknown' / c.Int16ul, + 'format' / c.Enum( + c.Int8ul, + L8=0, + A8=1, + LA4=2, + LA8=3, + HILO8=4, + RGB565=5, + RGB8=6, + RGBA5551=7, + RGBA4=8, + RGBA8=9, + ETC1=10, + ETC1A4=11, + L4=12, + A4=13, + #ETC1=19, + ), + # RGB565=5, + # ETC1A4=11, + 'unknown2' / c.Int8ul, ) # TODO probably move these to their own module, since they aren't just for # CLIM. pixel deshuffler, too. (which should probably spit out pypng's native # format) -COLOR_DECODERS = {} +COLOR_FORMATS = {} @attr.s @@ -59,22 +83,68 @@ class ColorFormat: def __iter__(self): # TODO back compat until i fix the below code - return iter((self.decoder, self.bits_per_pixel, self.bit_depth)) + return iter((self, self.bits_per_pixel, self.bit_depth)) def _register_color_decoder(name, *, bpp, depth, alpha): def register(f): - COLOR_DECODERS[name] = ColorFormat(name, f, bpp, depth, alpha) + COLOR_FORMATS[name] = ColorFormat(name, f, bpp, depth, alpha) return f return register +@_register_color_decoder('A4', bpp=0.5, depth=4, alpha=True) +def decode_A4(data): + for a in data: + a0 = a & 0xf + a0 = (a0 << 4) | (a0 << 0) + a1 = a >> 4 + a1 = (a1 << 4) | (a1 << 0) + yield 0, 0, 0, a0 + yield 0, 0, 0, a1 + + +@_register_color_decoder('A8', bpp=1, depth=8, alpha=True) +def decode_a8(data): + for a in data: + yield 0, 0, 0, a + + +@_register_color_decoder('L4', bpp=0.5, depth=4, alpha=False) +def decode_l4(data): + for l in data: + l0 = l & 0xf + l0 = (l0 << 4) | (l0 << 0) + l1 = l >> 4 + l1 = (l1 << 4) | (l1 << 0) + yield l0, l0, l0 + yield l1, l1, l1 + + @_register_color_decoder('L8', bpp=1, depth=8, alpha=False) def decode_l8(data): for l in data: yield l, l, l +@_register_color_decoder('LA4', bpp=1, depth=4, alpha=True) +def decode_la4(data): + for la in data: + l = la >> 4 + l = (l << 4) | (l << 0) + a = (la >> 0) & 0xf + a = (a << 4) | (a << 4) + yield l, l, l, a + + +@_register_color_decoder('LA8', bpp=2, depth=8, alpha=True) +def decode_la8(data): + for i in range(0, len(data), 2): + a = data[i] + l = data[i + 1] + yield l, l, l, a + + @_register_color_decoder('RGBA4', bpp=2, depth=4, alpha=True) def decode_rgba4(data): # The idea is that every uint16 is a packed rrrrggggbbbbaaaa, but when @@ -93,15 +163,16 @@ def decode_rgba4(data): @_register_color_decoder('RGB8', bpp=3, depth=8, alpha=False) def decode_rgb8(data): for i in range(0, len(data), 3): - yield data[i:i + 3] + yield data[i:i + 3][::-1] @_register_color_decoder('RGBA8', bpp=4, depth=8, alpha=True) def decode_rgba8(data): for i in range(0, len(data), 4): - yield data[i:i + 4] + yield data[i:i + 4][::-1] +# FIXME turns out the above just are these, so, ditch these @_register_color_decoder('BGR8', bpp=3, depth=8, alpha=False) def decode_bgr8(data): for i in range(0, len(data), 3): @@ -125,6 +196,7 @@ def decode_rgba5551(data, *, start=0, count=None): for i in range(start, end, 2): datum = data[i] + data[i + 1] * 256 + # FIXME repeat rather than doing division r = (((datum >> 11) & 0x1f) * 255 + 15) // 31 g = (((datum >> 6) & 0x1f) * 255 + 15) // 31 b = (((datum >> 1) & 0x1f) * 255 + 15) // 31 @@ -132,6 +204,45 @@ def decode_rgba5551(data, *, start=0, count=None): yield r, g, b, a +@_register_color_decoder('RGB565', bpp=2, depth=5, alpha=False) +def decode_rgb565(data, *, start=0, count=None): + # FIXME i bet construct totally /can/ parse this mess for me + if count is None: + end = len(data) + else: + end = start + count * 2 + + for i in range(start, end, 2): + datum = data[i] + data[i + 1] * 256 + # FIXME repeat rather than doing division + r = (((datum >> 11) & 0x1f) * 255 + 15) // 31 + g = (((datum >> 5) & 0x3f) * 255 + 31) // 63 + b = (((datum >> 0) & 0x1f) * 255 + 15) // 31 + yield r, g, b + + +@_register_color_decoder('RGB332', bpp=1, depth=2, alpha=False) +def decode_rgb332(data, *, start=0, count=None): + if count is None: + end = len(data) + else: + end = start + count + + for i in range(start, end): + datum = data[i] + r = (datum >> 5) & 0x7 + r = (r << 5) | (r << 2) | (r >> 1) + g = (datum >> 2) & 0x7 + g = (g << 5) | (g << 2) | (g >> 1) + b = (datum >> 0) & 0x7 + b = (b << 5) | (b << 2) | (b >> 1) + yield r, g, b + + +_register_color_decoder('ETC1', bpp=0.5, depth=4, alpha=False)(None) +_register_color_decoder('ETC1A4', bpp=1, depth=4, alpha=True)(None) + + del _register_color_decoder @@ -156,12 +267,24 @@ def untile_pixels(raw_pixels, width, height, *, is_flim): Taken from: https://github.com/Zhorken/pokemon-x-y-icons/ """ + # FIXME this is a wild guess, because i've seen a 4x4 image that this just + # doesn't handle correctly, but the image is all white so i have no idea + # what the right fix is -- there's a 4 x 0x78 in 0/7/9 though... + if width < 8 or height < 8: + pixels = [] + it = iter(raw_pixels) + for r in range(height): + pixels.append([]) + for c in range(width): + pixels[-1].append(next(it)) + return pixels + # Images are stored padded to powers of two stored_width = 2 ** math.ceil(math.log(width) / math.log(2)) stored_height = 2 ** math.ceil(math.log(height) / math.log(2)) num_pixels = stored_width * stored_height - tile_width = stored_width // 8 - tile_height = stored_height // 8 + tile_width = (stored_width + 7) // 8 + tile_height = (stored_height + 7) // 8 pixels = [ [None for x in range(width)] @@ -175,6 +298,7 @@ def untile_pixels(raw_pixels, width, height, *, is_flim): # Find the coordinates of the top-left corner of the current tile. # n.b. The image is eight tiles wide, and each tile is 8×8 pixels. tile_num = n // 64 + # FIXME i found a 4x4 FLIM that this fails for??? if is_flim: # The FLIM format seems to pseudo-rotate the entire image to the # right, so tiles start in the bottom left and go up @@ -224,25 +348,38 @@ def decode_clim(data): raise ValueError("Unknown image format {}".format(file_format)) imag_header = imag_header_struct.parse(data[-20:]) - if is_flim: - # TODO SUMO hack; not sure how to get format out of this header - imag_header.format = 'RGBA5551' + #if is_flim: + # # TODO SUMO hack; not sure how to get format out of this header + # imag_header.format = 'RGBA5551' - if imag_header.format not in COLOR_DECODERS: + if imag_header.format not in COLOR_FORMATS: raise ValueError( "don't know how to decode {} pixels".format(imag_header.format)) - color_decoder, color_bpp, color_depth = COLOR_DECODERS[imag_header.format] + color_format = COLOR_FORMATS[imag_header.format] mode, = struct.unpack_from('> pixelbit) & 0x1 mod = table[hibit * 2 + lobit] color = tuple(clamp_to_byte(b + mod) for b in base) - color += (next(it),) + if use_alpha: + color += (next(it),) outpixels[y][x] = color # 4 is the bit depth; None is the palette - return width, height, 4, None, outpixels + from .clim import DecodedImageData, COLOR_FORMATS + # FIXME stupid import, wrong color format + return DecodedImageData(width, height, COLOR_FORMATS['ETC1A4'], None, outpixels) diff --git a/pokedex/extract/lib/garc.py b/pokedex/extract/lib/garc.py index 88a3d0c..065d738 100644 --- a/pokedex/extract/lib/garc.py +++ b/pokedex/extract/lib/garc.py @@ -94,13 +94,16 @@ class GARCEntry(object): def __getitem__(self, i): start, length = self.slices[i] ss = self.stream.slice(start, length) - if ss.peek(1) in [b'\x10', b'\x11']: + if ss.peek(1) in b'\x10\x11': # XXX this sucks but there's no real way to know for sure whether # data is compressed or not. maybe just bake this into the caller # and let them deal with it, same way we do with text decoding? # TODO it would be nice if this could be done lazily for 'inspect' # purposes, since the first four bytes are enough to tell you the # size + # FIXME make this work even for red herrings, maybe by finishing it + # up and doing a trial decompression of the first x bytes + #return CompressedStream(ss) try: data = lzss3.decompress_bytes(ss.read()) except Exception: @@ -113,6 +116,47 @@ class GARCEntry(object): return len(self.slices) +class CompressedStream: + def __init__(self, stream): + self.stream = stream + header = stream.read(4) + stream.seek(0) + assert header[0] in b'\x10\x11' + self.length, = struct.unpack(' note to self: X/Y ambush encounters are found in DllField.cro, starting at 0xf40d0 + 23:02 < magical> friend safari pokemon at 0x13d34a # TODO move mee elsewheeere -- actually all of these should be in their own pieces places = OrderedDict() name_index_to_place = {} @@ -853,7 +909,7 @@ def extract_data(root, out): levels = str(enc.min_level) else: levels = "{} - {}".format(enc.min_level, enc.max_level) - pokemon_ident = identifiers['species'][enc.pokemon_id & 0x1ff] + pokemon_ident = identifiers['pokémon'][enc.pokemon_id & 0x1ff] pokemon_form_bits = enc.pokemon_id >> 9 # TODO maybe turn this into, i have no idea, a # custom type? something forcibly short?? @@ -892,7 +948,9 @@ def extract_data(root, out): machines = [] #f.seek(0x004a67ee) # ORAS f.seek(0x0049795a) # SUMO + # TODO magic number (107) machineids = struct.unpack('<107H', f.read(2 * 107)) + # TODO dunno if this is still true # Order appears to be based on some gen 4 legacy: TMs 1 through 92, HMs # 1 through 6, then the other eight TMs and the last HM. But the bits # in the Pokémon structs are in the expected order of 1 through 100, 1 @@ -909,43 +967,141 @@ def extract_data(root, out): # ------------------------------------------------------------------------- # Pokémon structs - # TODO SUMO 0/1/8 seems to contain the index for the "base" species + mega_evolutions = get_mega_evolutions(root) + all_pokémon = OrderedDict() pokemon_data = [] with read_garc(root / 'rom/a/0/1/7') as garc: # SUMO #with read_garc(root / 'rom/a/1/9/5') as garc: # ORAS personals = [subfile[0].read() for subfile in garc] _pokemon_forms = {} # "real" species id => (base species id, form name id) - _next_name_form_id = 723 # TODO magic number + _next_name_form_id = 803#723 # TODO magic numbers + print("number of flavor texts", len(texts['en']['species-flavor-moon'])) for i, personal in enumerate(personals[:-1]): record = pokemon_struct.parse(personal) - # TODO transform to an OD somehow probably - pokemon_data.append(record) - print(i, hex(record.bp_tutors4)) - #print("{:3d} {:15s} {} {:5d} {:5d}".format( - # i, - # identifiers['species'][baseid], - # ('0'*16 + bin(record.mystery1)[2:])[-16:], - # record.mystery2, - # record.stage, - #)) - # TODO some pokemon have sprite starts but no species start, because their sprites vary obv - if record.form_count > 1: - # The form names appear to be all just jammed at the end in order, - # completely unrelated to either of the "start" offsets here + + # FIRST THINGS FIRST: let's deal with forms. + # TODO some pokemon, like unown, /only/ have sprite variations, so they + # don't have a form_species_start here. what do i do about them? + if (record.form_count > 1) != bool(record.form_species_start): + print("!!! sprite-only forms, argh") + # The > i check makes sure we don't run this bit a second time when we + # hit the forms themselves + if record.form_count > 1 and record.form_species_start > i: + megas = mega_evolutions[i] + if len(megas) == 1: + assert i not in FORM_NAMES + form_names = ['mega'] + elif len(megas) == 2: + assert i not in FORM_NAMES + form_names = ['mega-x', 'mega-y'] + else: + assert not megas + form_names = FORM_NAMES[i][1:] + # Fix our own name if necessary + if FORM_NAMES[i][0]: + identifiers['pokémon'][i] += '-' + FORM_NAMES[i][0] + + if record.form_count - 1 != len(form_names): + print("!!!!! MISMATCH", record.form_count - 1, len(form_names)) for offset in range(record.form_count - 1): + # Form names appear to be all just jammed at the end in order, + # completely unrelated to either of the "start" offsets here #form_name = texts['en']['form-names'][_next_name_form_id] - if record.form_species_start: - # TODO still no idea how "intangible" forms are being - # handled in the new schema - _pokemon_forms[record.form_species_start + offset] = i, _next_name_form_id - + # TODO still no idea how "intangible" forms are being + # handled in the new schema + _pokemon_forms[record.form_species_start + offset] = i, _next_name_form_id _next_name_form_id += 1 - if record.form_species_start: - for offset in range(record.form_count - 1): - # TODO grab the form names argh - identifiers['species'][record.form_species_start + offset] = identifiers['species'][i] + identifiers['pokémon'][record.form_species_start + offset] = identifiers['species'][i] + '-' + form_names[offset] + + pokémon = Pokémon() + all_pokémon[identifiers['pokémon'][i]] = pokémon + pokémon.game_index = i + + if i in _pokemon_forms: + base_species_id, form_name_id = _pokemon_forms[i] + else: + base_species_id = i + form_name_id = i + # TODO i observe this is explicitly a species name, the one thing that + # really is shared between forms + pokémon.name = OrderedDict( + (language, texts[language]['species-names'][base_species_id]) + for language in CANON_LANGUAGES) + pokémon.genus = OrderedDict( + (language, texts[language]['genus-names'][base_species_id]) + for language in CANON_LANGUAGES) + # FIXME ho ho, hang on a second, forms have their own flavor text too!! + pokémon.flavor_text = OrderedDict( + # TODO well this depends on which game you're dumping + (language, texts[language]['species-flavor-moon'][base_species_id]) + for language in CANON_LANGUAGES) + # FIXME include form names? only when they exist? can that be + # inconsistent between languages? + + pokémon.base_stats = { + 'hp': record.stat_hp, + 'attack': record.stat_atk, + 'defense': record.stat_def, + 'special-attack': record.stat_spatk, + 'special-defense': record.stat_spdef, + 'speed': record.stat_speed, + } + # FIXME pokémon.types = [record.type1] + pokémon.capture_rate = record.capture_rate + # TODO stage? + # FIXME effort + # Held items are a bit goofy; if the same item is in all three slots, it always appears! + pokémon.held_items = {} + if 0 != record.held_item1 == record.held_item2 == record.held_item3: + pokémon.held_items[identifiers['item'][record.held_item1]] = 100 + else: + if record.held_item1: + pokémon.held_items[identifiers['item'][record.held_item1]] = 50 + if record.held_item2: + pokémon.held_items[identifiers['item'][record.held_item2]] = 5 + if record.held_item3: + pokémon.held_items[identifiers['item'][record.held_item3]] = 1 + + # TODO i think this needs some normalizing? maybe renaming because + # this doesn't at all imply what it means + pokémon.gender_rate = record.gender_rate + + pokémon.hatch_counter = record.steps_to_hatch + pokémon.base_happiness = record.base_happiness + pokémon.growth_rate = record.growth_rate + # FIXME egg groups + pokémon.abilities = [ + identifiers['ability'][ability] + for ability in (record.ability1, record.ability2, record.ability_hidden) + ] + # FIXME safari escape?? + # FIXME form stuff + # FIXME color + pokémon.base_experience = record.base_exp + # FIXME what units are these! + pokémon.height = record.height + pokémon.weight = record.weight + + pokémon.moves = {} + + + + + # TODO transform to an OD somehow probably + pokemon_data.append(record) + print("{:4d} {:25s} {} {:5d} {:5d} {:20s} {:4d} {:4d} {:2d}".format( + i, + identifiers['pokémon'][i], + ('0'*16 + bin(record.mystery1)[2:])[-16:], + record.mystery2, + record.stage, + texts['en']['form-names'][i], + record.form_species_start, + record.form_sprite_start, + record.form_count, + )) #for i in range(723, 825 + 1): # base_species_id, form_name_id = _pokemon_forms[i] @@ -955,31 +1111,42 @@ def extract_data(root, out): # ------------------------------------------------------------------------- # Move stats - movesets = OrderedDict() - with read_garc(root / 'rom/a/0/1/1') as garc: # SUMO #with read_garc(root / 'rom/a/1/8/9') as garc: # ORAS + with read_garc(root / 'rom/a/0/1/1') as garc: # SUMO # Only one subfile + # TODO assert this wherever i do it data = garc[0][0].read() - container = move_container_struct.parse(data) - for n, record in enumerate(container.records): - m = record.move + print(Struct('magic' / Bytes(2), 'count' / Int16ul, 'pointers' / Array(16, Int32ul)).parse(data)) + print(move_struct.sizeof()) + records = move_container_struct.parse(data) + for i, record in enumerate(records): + #print(texts['en']['move-names'][i]) + #print(record) # TODO with the release of oras all moves have contest types and effects again! where are they?? - #print("{:3d} {:20s} | {m.type:3d} {m.power:3d} {m.pp:2d} {m.accuracy:3d} / {m.priority:2d} {m.range:2d} {m.damage_class:1d} / {m.effect:3d} {m.caused_effect:3d} {m.effect_chance:3d} -- {m.status:3d} {m.min_turns:3d} {m.max_turns:3d} {m.crit_rate:3d} {m.flinch_chance:3d} {m.recoil:4d} {m.healing:3d} / {m.stat_change:06x} {m.stat_amount:06x} {m.stat_chance:06x} / {m.padding0:3d} {m.padding1:3d} {m.flags:04x} {m.padding2:3d} {m.extra:3d}".format( - # n, - # identifiers['move'][n], - # m=record.move, - #)) + print("{:3d} {:30s} | {m.type:10s} {m.category:3d} / {m.power:3d} {m.pp:2d} {m.accuracy:3d} / {m.priority:2d} {m.range:2d} {m.damage_class:1d} / {m.effect:3d} {m.caused_effect:3d} {m.effect_chance:3d} -- {m.status:3d} {m.min_turns:3d} {m.max_turns:3d} {m.crit_rate:3d} {m.flinch_chance:3d} {m.recoil:4d} {m.healing:3d} / {m.stat_change!r} {m.stat_amount!r} {m.stat_chance!r} ~ {m.padding0:3d} {m.padding1:3d} {m.flags:04x} {m.padding2:3d} {m.extra:3d} {m.extra2:10d}".format( + i, + texts['en']['move-names'][i], + m=record, + )) + return # Egg moves with read_garc(root / 'rom/a/0/1/2') as garc: # SUMO #with read_garc(root / 'rom/a/1/9/0') as garc: # ORAS + print("number of egg moves:", len(garc)) for i, subfile in enumerate(garc): - ident = identifiers['species'][i] + ident = identifiers['pokémon'][i] data = subfile[0].read() if not data: continue container = egg_moves_struct.parse(data) - moveset = movesets.setdefault(ident, OrderedDict()) + print(i, ident, container.first_form_id, container.moveids) + # FIXME: 961 pokémon, 1063 named forms, but 1048 egg movesets. + # what? they get completely out of order after 802 and i don't + # know how to fix this. didn't magical write some code...? + if i > len(identifiers['species']): + continue + moveset = all_pokémon[ident].moves eggset = moveset['egg'] = [] for moveid in container.moveids: eggset.append(identifiers['move'][moveid]) @@ -987,10 +1154,11 @@ def extract_data(root, out): # Level-up moves with read_garc(root / 'rom/a/0/1/3') as garc: # SUMO #with read_garc(root / 'rom/a/1/9/1') as garc: # ORAS + print("number of level-up moves", len(garc)) for i, subfile in enumerate(garc): - ident = identifiers['species'][i] + ident = identifiers['pokémon'][i] level_up_moves = subfile[0].read() - moveset = movesets.setdefault(ident, OrderedDict()) + moveset = all_pokémon[ident].moves levelset = moveset['level'] = [] lastlevel = None order = 1 @@ -998,10 +1166,11 @@ def extract_data(root, out): # End is indicated with -1, -1 if pair.moveid <= 0: break - levelset.append(( - pair.level, - identifiers['move'][pair.moveid], - )) + # FIXME this is a goofy-looking structure, but it makes the + # yaml come out nicely? + levelset.append({ + pair.level: identifiers['move'][pair.moveid], + }) if pair.level == lastlevel: order += 1 @@ -1038,8 +1207,8 @@ def extract_data(root, out): # Tutor moves (from the personal structs) for i, datum in enumerate(pokemon_data): - ident = identifiers['species'][i] - moveset = movesets.setdefault(ident, OrderedDict()) + ident = identifiers['pokémon'][i] + moveset = all_pokémon[ident].moves tutorset = moveset['tutor'] = [] for key, tutors in tutor_moves.items(): for bit, moveident in enumerate(tutors): @@ -1052,27 +1221,27 @@ def extract_data(root, out): # TMs machineset = moveset['machine'] = [] for bit, moveident in enumerate(machines): - if not datum['machines'] & (1 << bit): + if not datum['machines'][bit]: continue machineset.append(moveident) - with (out / 'movesets.yaml').open('w') as f: - dump_to_yaml(movesets, f) + with (out / 'pokemon.yaml').open('w') as f: + #dump_to_yaml(all_pokémon, f) + import pokedex.schema as schema + from camel import Camel + f.write(Camel([schema.POKEDEX_TYPES]).dump(all_pokémon)) -def get_mega_counts(root): - """Return a dict mapping Pokémon ids to how many mega evolutions each one - has. +def get_mega_evolutions(root): + """Return a dict mapping Pokémon ids to a list of mega evolution records. """ - mega_counts = {} # pokemonid => number of mega evos + megas = {} #with read_garc(root / 'rom/a/1/9/3') as garc: # oras with read_garc(root / 'rom/a/0/1/5') as garc: # SUMO for pokemonid, subfile in enumerate(garc): - mega_evos = pokemon_mega_evolutions_struct.parse_stream(subfile[0]) - mega_counts[pokemonid] = max( - mega_evo.number for mega_evo in mega_evos) + megas[pokemonid] = pokemon_mega_evolutions_struct.parse_stream(subfile[0]) - return mega_counts + return megas class SpriteFileNamer: @@ -1117,7 +1286,7 @@ class SpriteFileNamer: .format(self.mega_counts[pokemonid], pokemonid)) else: # TODO should use warnings for this so it works for new games - #raise ValueError("Pokemon {} doesn't have forms".format(pokemonid)) + warnings.warn("Don't know any forms for Pokemon {}".format(pokemonid)) form = "form-{}".format(formid) # Construct the directory @@ -1192,50 +1361,30 @@ class SpriteFileNamer: shutil.copyfile(str(fn), str(fn2)) -def write_clim_to_png(f, width, height, color_depth, palette, pixels): - """Write the results of ``decode_clim`` to a file object.""" - writer_kwargs = dict(width=width, height=height) - if palette: - writer_kwargs['palette'] = palette - else: - # TODO do i really only need alpha=True if there's no palette? - writer_kwargs['alpha'] = True - writer = png.Writer(**writer_kwargs) - - # For a paletted image, I want to preserve Zhorken's good idea of - # indicating the original bit depth with an sBIT chunk. But PyPNG can't do - # that directly, so instead I have to do some nonsense. - if palette: - buf = io.BytesIO() - writer.write(buf, pixels) - - # Read the PNG as chunks, and manually add an sBIT chunk - buf.seek(0) - png_reader = png.Reader(buf) - chunks = list(png_reader.chunks()) - sbit = bytes([color_depth] * 3) - chunks.insert(1, ('sBIT', sbit)) - - # Now write the chunks to the file - png.write_chunks(f, chunks) - - else: - # Otherwise, it's... almost straightforward. - writer.write(f, (itertools.chain(*row) for row in pixels)) - - def extract_box_sprites(root, out): - namer = SpriteFileNamer( - out, get_mega_counts(root), ORAS_EXTRA_SPRITE_NAMES) + mega_counts = { + id: len(megas) + for (id, megas) in get_mega_evolutions(root).items() + } + namer = SpriteFileNamer(out, mega_counts, FORM_NAMES) with (root / 'exe/code.bin').open('rb') as f: # Form configuration, used to put sprites in the right order # NOTE: in x/y the address is 0x0043ea98 #f.seek(0x0047d650) # ORAS f.seek(0x004999d0) # SUMO - # TODO magic number - for n in range(722): + # Discard dummy zero sprite + pokemon_sprite_struct.parse_stream(f) + n = 0 + while True: sprite = pokemon_sprite_struct.parse_stream(f) + # This is not particularly reliable, but the data immediately + # following this list is some small 32-bit values, so the female + # index will be (illegally) zero + if not sprite.female_index: + break + + n += 1 namer.add(sprite.index, n) if sprite.female_index != sprite.index: namer.add(sprite.female_index, n, female=True) @@ -1289,26 +1438,28 @@ def extract_box_sprites(root, out): with read_garc(root / 'rom/a/0/6/2') as garc: # SUMO from .lib.clim import decode_clim for i, subfile in enumerate(garc): - if i == 0: - # Dummy blank sprite, not interesting to us - continue - elif i == 333: - # Duplicate Entei sprite that's not used - continue - elif i == len(garc) - 1: + # TODO ORAS ONLY + #elif i == 333: + # # Duplicate Entei sprite that's not used + # continue + if i == len(garc) - 1: # Very last one is egg namer.inject(i, 'egg.png') + # TODO this is bad. + if not namer.index_to_filenames[i]: + # Unused sprite -- e.g. index 0, or one of the dummies in SUMO + continue + data = subfile[0].read() - width, height, color_depth, palette, pixels = decode_clim(data) + image_data = decode_clim(data) # TODO this is bad. if 'right/' in namer.index_to_filenames[i][0]: - for row in pixels: - row.reverse() + image_data.mirror() with namer.open(i) as f: - write_clim_to_png(f, width, height, color_depth, palette, pixels) + image_data.write_to_png(f) def extract_dex_sprites(root, out): @@ -1317,58 +1468,103 @@ def extract_dex_sprites(root, out): # Luckily the dex sprites are in the same order as the models # (unsurprising, as they're just model renders), which also tells us what # Pokémon have female forms. The mega evolution map tells us which forms - # are megas, and the rest are listed manually above as - # ORAS_EXTRA_SPRITE_NAMES. + # are megas, and the rest are listed manually above as FORM_NAMES. - namer = SpriteFileNamer( - out, get_mega_counts(root), ORAS_EXTRA_SPRITE_NAMES) + mega_counts = { + id: len(megas) + for (id, megas) in get_mega_evolutions(root).items() + } + namer = SpriteFileNamer(out, mega_counts, FORM_NAMES) # TODO Meowstic is counted as simply female in here, but should probably be # saved with a form filename as well + # TODO should skip the extra komala and the totem forms #with read_garc(root / 'rom/a/0/0/8') as garc: # ORAS with read_garc(root / 'rom/a/0/9/4') as garc: # SUMO f = garc[0][0] - # TODO magic number - for n in range(721): - # Unlike /virtually everywhere else/, Pokémon are zero-indexed here - pokemonid = n + 1 + pokemonid = 0 + while True: + pokemonid += 1 + data = f.read(4) + # All zeroes means we're done. Maybe. More data follows after + # this, but it doesn't seem to be the same format, and I don't know + # what exactly it's for. + if data == b'\x00\x00\x00\x00': + break + # Index of the first model (also zero-indexed), how many models the # Pokémon has, and some flags - start, count, flags = struct.unpack('= 717: - model_num += 1 + # Lurantis has two models, but one dex sprite + if pokemonid == 754: + count = 1 + flags &= ~4 + # Salazzle has two models, but one dex sprite + if pokemonid == 758: + count = 1 + flags &= ~4 + # Komala has one model, but two dex sprites + # FIXME probably skip extracting it at all + if pokemonid == 775: + count = 2 + # The above all naturally throw later numbering off; compensate + if 716 < pokemonid <= 754: + model_num += 2 + elif 758 < pokemonid <= 775: + model_num -= 2 namer.add(model_num, pokemonid) + # TODO SUMO ONLY (should be += 1 for ORAS) + namer.add(model_num + 1, pokemonid, shiny=True) + model_num += 2 + form_count = count - 1 # discount "base" form + # TODO this is only used for ORAS, and should be done another way anyway total_model_count = model_num + count - 1 # Don't know what flag 1 is; everything has it. - # Flag 2 means the first alternate form is a female variant. + # Flag 2 means the first alternate form is female. if flags & 2: assert form_count > 0 form_count -= 1 - model_num += 1 namer.add(model_num, pokemonid, female=True) + namer.add(model_num + 1, pokemonid, female=True, shiny=True) + model_num += 2 # Flag 4 just means there are more forms? if flags & 4: assert form_count for formid in range(1, form_count + 1): - model_num += 1 namer.add(model_num, pokemonid, formid) + namer.add(model_num + 1, pokemonid, formid, shiny=True) + model_num += 2 # And now, do the ripping #with read_garc(root / 'rom/a/2/6/3') as garc: # ORAS - with read_garc(root / 'rom/a/2/4/0') as garc: # sun/moon demo + with read_garc(root / 'rom/a/2/4/0') as garc: # SUMO from .lib.clim import decode_clim from .lib.etc1 import decode_etc1 for i, subfile in enumerate(garc): + if i == 0: + # Dummy sprite, not interesting to us + continue + + data = subfile[0].read() + """ + with open("{}/{}.png".format(str(out), i), 'wb') as f: + write_clim_to_png(f, *decode_etc1(data)) + continue + # TODO THIS IS ALL ORAS ONLY shiny_prefix = None if i > total_model_count: i -= total_model_count @@ -1376,18 +1572,18 @@ def extract_dex_sprites(root, out): # hack in the other code shiny_prefix = 'shiny/' - if i == 0: - # Dummy blank sprite, not interesting to us - continue elif 37 <= i <= 41: # Cosplay Pikachu's outfits -- the sprites are blank, so saving # these is not particularly useful continue + """ data = subfile[0].read() - with namer.open(i, prefix=shiny_prefix) as f: - write_clim_to_png(f, *decode_etc1(data)) - #write_clim_to_png(f, *decode_clim(data)) + with namer.open(i) as f: + decode_etc1(data).write_to_png(f) + # TODO ORAS + #with namer.open(i, prefix=shiny_prefix) as f: + # decode_clim(data).write_to_png(f) def _munge_source_arg(strpath): diff --git a/pokedex/schema.py b/pokedex/schema.py index e22b47a..ab69216 100644 --- a/pokedex/schema.py +++ b/pokedex/schema.py @@ -164,6 +164,8 @@ Evolution = _ForwardDeclaration() EncounterMap = _ForwardDeclaration() MoveSet = _ForwardDeclaration() Pokedex = _ForwardDeclaration() +Item = _ForwardDeclaration() +Ability = _ForwardDeclaration() class Pokémon(VersionedLocus): @@ -173,6 +175,9 @@ class Pokémon(VersionedLocus): base_stats = _Map(Stat, int) growth_rate = _Value(GrowthRate) base_experience = _Value(int, min=0, max=255) + capture_rate = _Value(int, min=0, max=255) + held_items = _Map(Item, int) + gender_rate = _Value(int) pokedex_numbers = _Map(Pokedex, int) @@ -202,9 +207,25 @@ class Pokémon(VersionedLocus): # TODO should this be written in hex, maybe? game_index = _Value(int) + # FIXME how do i distinguish hidden ability? + abilities = _List(Ability) + Pokemon = Pokémon +MoveEffect = _ForwardDeclaration() + +class Move(VersionedLocus): + name = _Localized(str) + type = _Value(Type) + power = _Value(int) + pp = _Value(int) + accuracy = _Value(int) + effect = _Value(MoveEffect) + + + + # ------------------------------------------------------------------------------ # The repository class, primary interface to the data