veekun_pokedex/pokedex/extract/oras.py

1625 lines
58 KiB
Python
Raw Normal View History

2016-02-26 18:05:51 +00:00
"""Dumps data from Omega Ruby and Alpha Sapphire.
Filesystem reference: http://www.projectpokemon.org/wiki/ORAS_File_System
"""
import argparse
from collections import OrderedDict
from collections import defaultdict
2016-02-26 18:05:51 +00:00
from contextlib import contextmanager
from pathlib import Path
import re
2016-02-26 18:05:51 +00:00
import shutil
import struct
import warnings
from construct import (
# Simple fields
Const, Flag, Int16sl, Int16ul, Int8sl, Int8ul, Int32ul, Padding,
# Structures and meta stuff
Array, BitsInteger, BitsSwapped, Bitwise, Enum, Filter, FocusedSeq,
GreedyRange, Pointer, PrefixedArray, Range, Struct, this,
# temp
Peek, Bytes,
)
2016-02-26 18:05:51 +00:00
import yaml
from pokedex.schema import Pokémon
2016-02-26 18:05:51 +00:00
from .lib.garc import GARCFile, decrypt_xy_text
# TODO: ribbons! 080 in sumo
2016-02-26 18:05:51 +00:00
# TODO auto-detect rom vs romfs vs... whatever
2016-02-26 18:05:51 +00:00
# TODO fix some hardcoding in here
# TODO finish converting garc parsing to use construct, if possible, i think (i would not miss substream)
# way way more sprite work in here...
# TODO would be nice to have meaningful names for the file structure instead of sprinkling hardcoded ones throughout
# SUMO file list:
# a/2/8/1 "photos" from the credits
2016-02-26 18:05:51 +00:00
GROWTH_RATES = {
0: 'gr.medium',
1: 'gr.slow-then-very-fast',
2: 'gr.fast-then-very-slow',
3: 'gr.medium-slow',
4: 'gr.fast',
5: 'gr.slow',
}
TYPES = {
0: 't.normal',
1: 't.fighting',
2: 't.flying',
3: 't.poison',
4: 't.ground',
5: 't.rock',
6: 't.bug',
7: 't.ghost',
8: 't.steel',
9: 't.fire',
10: 't.water',
11: 't.grass',
12: 't.electric',
13: 't.psychic',
14: 't.ice',
15: 't.dragon',
16: 't.dark',
17: 't.fairy',
}
# ja-Hrkt: hiragana/katakana
# zh-Hans: simplified
# zh-Hant: traditional
CANON_LANGUAGES = ('ja-Hrkt', 'ja', 'en', 'fr', 'it', 'de', 'es', 'ko', 'zh-Hans', 'zh-Hant')
2016-02-26 18:05:51 +00:00
ORAS_SCRIPT_FILES = {
'ja-Hrkt': 'rom/a/0/7/1',
'ja': 'rom/a/0/7/2',
2016-02-26 18:05:51 +00:00
'en': 'rom/a/0/7/3',
'fr': 'rom/a/0/7/4',
'it': 'rom/a/0/7/5',
'de': 'rom/a/0/7/6',
'es': 'rom/a/0/7/7',
'ko': 'rom/a/0/7/8',
}
SUMO_SCRIPT_FILES = {
'ja-Hrkt': 'rom/a/0/3/0',
'ja': 'rom/a/0/3/1',
'en': 'rom/a/0/3/2',
'fr': 'rom/a/0/3/3',
'it': 'rom/a/0/3/4',
'de': 'rom/a/0/3/5',
'es': 'rom/a/0/3/6',
'ko': 'rom/a/0/3/7',
'zh-Hans': 'rom/a/0/3/8',
'zh-Hant': 'rom/a/0/3/9',
}
2016-02-26 18:05:51 +00:00
ORAS_SCRIPT_ENTRIES = {
'form-names': 5,
# TODO these might be backwards, i'm just guessing
'species-flavor-alpha-sapphire': 6,
'species-flavor-omega-ruby': 7,
'move-contest-flavor': 13,
'move-names': 14,
# Note: table 15 is also a list of move names, but with a few at the end
# missing? XY leftovers?
'move-flavor': 16,
'type-names': 18,
'ability-flavor': 36,
'ability-names': 37,
'nature-names': 51,
# Note that these place names come in pairs, in order to support X/Y's
# routes, which had both numbers and traditional street names
# TODO oughta rip those too!
'zone-names': 90,
2016-02-26 18:05:51 +00:00
'species-names': 98,
# 113: item names, with macros to branch for pluralization
# 114: copy of item names, but with "PP" in latin in korean (?!)
# 115: item names in plural (maybe interesting?)
'item-names': 116, # singular
'item-flavor': 117,
}
SUMO_SCRIPT_ENTRIES = {
# 2: bag pockets
# 81: ribbons
'form-names': 114,
# TODO a lot of these are missing
'species-flavor-sun': 119,
'species-flavor-moon': 120,
'move-contest-flavor': 109,
'move-names': 113,
# TODO 19 is z-move names
# Note: table 15 is also a list of move names, but with a few at the end
# missing? XY leftovers?
'move-flavor': 112,
'type-names': 107,
'ability-flavor': 97,
'ability-names': 96,
'nature-names': 87,
# Note that these place names come in pairs, in order to support X/Y's
# routes, which had both numbers and traditional street names
# TODO oughta rip those too!
'zone-names': 67,
# NOTE: 67 through 70 could be zone names, but could also be "where caught"
# names for Pokémon
'species-names': 55,
'pokemon-height-flavor': 115,
'genus-names': 116,
'pokemon-weight-flavor': 117,
'trainer-class-names': 106,
'berry-names': 65,
# 49 might be pokédex colors? or maybe clothing colors
# 38: item names, with macros to branch for pluralization
# 114: copy of item names, but with "PP" in latin in korean (?!)
# 37: item names in plural (maybe interesting?)
'item-names': 36, # singular
'item-flavor': 35,
2016-02-26 18:05:51 +00:00
}
# The first element in each list is the name of the BASE form.
# If it's None, then the base form is a true default in some sense, and it'll
# have the same name as the species. Mega Evolutions are a good example.
# Otherwise, there is no default; the form name will differ from the species
# name, and the first sprite will be saved under both names, e.g., Shellos.
# Note that this does NOT include megas -- those are pulled from game data.
FORM_NAMES = {
# TODO alolan are of course new in SUMO
# Rattata and Raticate
19: (None, 'alola'),
20: (None, 'alola', 'totem-alola'),
2016-02-26 18:05:51 +00:00
# Cosplay Pikachu
# TODO not in SUMO
#25: (None, 'rock-star', 'belle', 'pop-star', 'phd', 'libre', 'cosplay'),
25: (None, 'original-cap', 'hoenn-cap', 'sinnoh-cap', 'unova-cap', 'kalos-cap', 'alola-cap'),
# Raichu
26: (None, 'alola'),
# Sandshrew and Sandslash
27: (None, 'alola'),
28: (None, 'alola'),
# Vulpix and Ninetales
37: (None, 'alola'),
38: (None, 'alola'),
# Diglett and Dugtrio
50: (None, 'alola'),
51: (None, 'alola'),
# Meowth and Persian
52: (None, 'alola'),
53: (None, 'alola'),
# Geodude, Graveler, and Golem
74: (None, 'alola'),
75: (None, 'alola'),
76: (None, 'alola'),
# Geodude, Graveler, and Golem
88: (None, 'alola'),
89: (None, 'alola'),
# Exeggutor
103: (None, 'alola'),
# Marowak
105: (None, 'alola'),
2016-02-26 18:05:51 +00:00
# Unown
201: tuple('abcdefghijklmnopqrstuvwxyz') + ('exclamation', 'question'),
# Castform
351: (None, 'sunny', 'rainy', 'snowy'),
# Kyogre and Groudon
382: (None, 'primal'),
383: (None, 'primal'),
2016-02-26 18:05:51 +00:00
# Deoxys
386: ('normal', 'attack', 'defense', 'speed'),
# Burmy and Wormadam
412: ('plant', 'sandy', 'trash'),
413: ('plant', 'sandy', 'trash'),
# Cherrim
421: ('overcast', 'sunshine'),
2016-02-26 18:05:51 +00:00
# Shellos and Gastrodon
422: ('west', 'east'),
423: ('west', 'east'),
2016-02-26 18:05:51 +00:00
# Rotom
479: (None, 'heat', 'wash', 'frost', 'fan', 'mow'),
# Giratina
487: ('altered', 'origin'),
2016-02-26 18:05:51 +00:00
# Shaymin
492: ('land', 'sky'),
2016-02-26 18:05:51 +00:00
# Arceus
493: (
'normal', 'fighting', 'flying', 'poison', 'ground', 'rock', 'bug',
'ghost', 'steel', 'fire', 'water', 'grass', 'electric', 'psychic',
'ice', 'dragon', 'dark', 'fairy',
),
# Basculin
550: ('red-striped', 'blue-striped'),
2016-02-26 18:05:51 +00:00
# Darmanitan
555: ('standard', 'zen'),
2016-02-26 18:05:51 +00:00
# Deerling and Sawsbuck
585: ('spring', 'summer', 'autumn', 'winter'),
586: ('spring', 'summer', 'autumn', 'winter'),
2016-02-26 18:05:51 +00:00
# Tornadus, Thundurus, and Landorus
641: ('incarnate', 'therian'),
642: ('incarnate', 'therian'),
645: ('incarnate', 'therian'),
# Kyurem
646: (None, 'white', 'black'),
# Keldeo
647: ('ordinary', 'resolute'),
# Meloetta
648: ('aria', 'pirouette'),
# Genesect
649: (None, 'douse', 'shock', 'burn', 'chill'),
# Greninja
# TODO SUMO only
# FIXME why is the second one here at all?
658: (None, 'dupe', 'ash'),
2016-02-26 18:05:51 +00:00
# Vivillon
666: (
'icy-snow', 'polar', 'tundra', 'continental', 'garden', 'elegant',
'meadow', 'modern', 'marine', 'archipelago', 'high-plains',
'sandstorm', 'river', 'monsoon', 'savanna', 'sun', 'ocean', 'jungle',
'fancy', 'poke-ball',
),
# Flabébé/Floette/Florges
669: ('red', 'yellow', 'orange', 'blue', 'white'),
670: ('red', 'yellow', 'orange', 'blue', 'white', 'eternal'),
671: ('red', 'yellow', 'orange', 'blue', 'white'),
# Furfrou
676: (
'natural', 'heart', 'star', 'diamond', 'debutante', 'matron', 'dandy',
'la-reine', 'kabuki', 'pharaoh',
),
# Meowstic
# TODO uh oh, this is handled as forms in boxes but as gender in sprites, maybe?
678: ('male', 'female'),
2016-02-26 18:05:51 +00:00
# Aegislash
681: ('shield', 'blade'),
# Pumpkaboo/Gourgeist
710: ('average', 'small', 'large', 'super'),
711: ('average', 'small', 'large', 'super'),
# Xerneas
716: ('neutral', 'active'),
# Zygarde
# TODO SUMO only
# TODO why are 10 and 50 duplicated?
718: (None, '10', '10', '50', 'complete'),
2016-02-26 18:05:51 +00:00
# Hoopa
720: ('confined', 'unbound'),
# Gumshoos
735: (None, 'totem'),
# Vikavolt
738: (None, 'totem'),
# Oricorio
741: ('baile', 'pom-pom', 'pau', 'sensu'),
# Lycanroc
745: ('midday', 'midnight'),
# Wishiwashi
746: ('solo', 'school'),
# Lurantis
754: (None, 'totem'),
# Salazzle
758: (None, 'totem'),
# Silvally
773: (
'normal', 'fighting', 'flying', 'poison', 'ground', 'rock', 'bug',
'ghost', 'steel', 'fire', 'water', 'grass', 'electric', 'psychic',
'ice', 'dragon', 'dark', 'fairy',
),
# Minior
774: (
'red-meteor', 'orange-meteor', 'yellow-meteor', 'green-meteor',
'blue-meteor', 'indigo-meteor', 'violet-meteor',
'red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet',
),
# Mimikyu
778: ('disguised', 'busted', 'totem-disguised', 'totem-busted'),
# Kommo-o
784: (None, 'totem'),
# Magearna
801: (None, 'original'),
2016-02-26 18:05:51 +00:00
}
pokemon_struct = Struct(
'stat_hp' / Int8ul,
'stat_atk' / Int8ul,
'stat_def' / Int8ul,
'stat_speed' / Int8ul,
'stat_spatk' / Int8ul,
'stat_spdef' / Int8ul,
'type1' / Int8ul,
'type2' / Int8ul,
'capture_rate' / Int8ul,
'stage' / Int8ul,
'effort' / Int16ul,
'held_item1' / Int16ul,
'held_item2' / Int16ul,
'held_item3' / Int16ul, # dark grass from bw, unused in oras?
'gender_rate' / Int8ul,
'steps_to_hatch' / Int8ul,
'base_happiness' / Int8ul,
'growth_rate' / Enum(Int8ul, **{v: k for (k, v) in GROWTH_RATES.items()}),
'egg_group1' / Int8ul,
'egg_group2' / Int8ul,
'ability1' / Int8ul,
'ability2' / Int8ul,
'ability_hidden' / Int8ul,
'safari_escape' / Int8ul,
'form_species_start' / Int16ul,
'form_sprite_start' / Int16ul,
'form_count' / Int8ul,
'color' / Int8ul,
'base_exp' / Int16ul,
'height' / Int16ul,
'weight' / Int16ul,
'machines' / BitsSwapped(Bitwise(Array(14 * 8, Flag))),
2016-02-26 18:05:51 +00:00
Padding(2),
'tutors' / Int32ul,
'mystery1' / Int16ul,
'mystery2' / Int16ul,
'bp_tutors1' / Int32ul, # unused in sumo
'bp_tutors2' / Int32ul, # unused in sumo
'bp_tutors3' / Int32ul, # unused in sumo
'bp_tutors4' / Int32ul, # sumo: big numbers for pikachu, eevee, snorlax, mew, starter evos, couple others?? maybe special z-move item?
# TODO sumo is four bytes longer, not sure why, find out if those bytes are anything and a better way to express them
GreedyRange(Const(b'\x00')),
2016-02-26 18:05:51 +00:00
)
pokemon_mega_evolutions_struct = Filter(this.number != 0, Range(
# XY and ORAS have 3 of these, but the third never seems to be populated.
# SUMO just has 2.
2, 3,
2016-02-26 18:05:51 +00:00
Struct(
'number' / Int16ul,
'mode' / Int16ul,
'mega_stone_itemid' / Int16ul,
2016-02-26 18:05:51 +00:00
Padding(2),
)
))
2016-02-26 18:05:51 +00:00
egg_moves_struct = Struct(
'moveids' / PrefixedArray(Int16ul, Int16ul),
2016-02-26 18:05:51 +00:00
)
egg_moves_struct = Struct(
'first_form_id' / Int16ul, # TODO SUMO ONLY
'moveids' / PrefixedArray(Int16ul, Int16ul),
)
level_up_moves_struct = GreedyRange(
2016-02-26 18:05:51 +00:00
Struct(
'moveid' / Int16sl,
'level' / Int16sl,
2016-02-26 18:05:51 +00:00
),
)
move_struct = Struct(
'type' / Enum(Int8ul, **{v:k for (k, v) in TYPES.items()}),
'category' / Int8ul,
'damage_class' / Int8ul,
'power' / Int8ul,
'accuracy' / Int8ul,
'pp' / Int8ul,
'priority' / Int8sl,
'min_max_hits' / Int8ul,
'caused_effect' / Int16sl,
'effect_chance' / Int8ul,
'status' / Int8ul,
'min_turns' / Int8ul,
'max_turns' / Int8ul,
'crit_rate' / Int8ul,
'flinch_chance' / Int8ul,
'effect' / Int16ul,
'recoil' / Int8sl,
'healing' / Int8ul,
'range' / Int8ul, # ok
'stat_change' / Bitwise(Array(6, BitsInteger(4))),
'stat_amount' / Bitwise(Array(6, BitsInteger(4))),
'stat_chance' / Bitwise(Array(6, BitsInteger(4))),
'padding0' / Int8ul, # ok
'padding1' / Int8ul, # ok
'flags' / Int16ul,
'padding2' / Int8ul, # ok
'extra' / Int8ul,
# FIXME unsure whether this exists in ORAS; should use a length limiter in the parent
'extra2' / Int32ul,
2016-02-26 18:05:51 +00:00
)
move_container_struct = FocusedSeq('records',
Const(b'WD'), # waza... descriptions?
'records' / PrefixedArray(Int16ul, FocusedSeq('move',
'offset' / Int32ul,
'move' / Pointer(this.offset, move_struct),
)),
2016-02-26 18:05:51 +00:00
)
pokemon_sprite_struct = Struct(
'index' / Int16ul,
'female_index' / Int16ul,
'form_index_offset' / Int32ul,
'right_index_offset' / Int32ul,
'form_count' / Int16ul,
'right_count' / Int16ul,
2016-02-26 18:05:51 +00:00
)
encounter_struct = Struct(
# TODO top 5 bits are form stuff
'pokemon_id' / Int16ul,
'min_level' / Int8ul,
'max_level' / Int8ul,
)
encounter_table_struct = Struct(
'walk_rate' / Int8ul,
'long_grass_rate' / Int8ul,
'hidden_rate' / Int8ul,
'surf_rate' / Int8ul,
'rock_smash_rate' / Int8ul,
'old_rod_rate' / Int8ul,
'good_rod_rate' / Int8ul,
'super_rod_rate' / Int8ul,
'horde_rate' / Int8ul,
Const(b'\x00' * 5),
Array(61, encounter_struct),
Const(b'\x00' * 2),
)
ORAS_ENCOUNTER_SLOTS = [
('walk', (10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 4, 1)),
('long-grass', (10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 4, 1)),
('hidden', (60, 35, 5)), # TODO guessing here!
('surf', (50, 30, 15, 4, 1)),
('rock-smash', (50, 30, 15, 4, 1)),
('old-rod', (60, 35, 5)),
('good-rod', (60, 35, 5)),
('super-rod', (60, 35, 5)),
('horde', ((60, 5), (35, 5), (5, 5))),
]
# The only thing really linking ORAS zones together is that they share the same
# overall location/place name, so use the index of that name as a key to match
# to an extant location
ORAS_ZONE_NAME_INDEX_TO_VEEKUN_LOCATION = {
#170: Littleroot Town
#172: Oldale Town
174: 'dewford-town',
#176: Lavaridge Town
#178: Fallarbor Town
#180: Verdanturf Town
#182: Pacifidlog Town
184: 'petalburg-city',
186: 'slateport-city',
#188: Mauville City
#190: Rustboro City
#192: Fortree City
194: 'lilycove-city',
196: 'mossdeep-city',
198: 'sootopolis-city',
200: 'ever-grande-city',
#202: Pokémon League
204: 'hoenn-route-101',
206: 'hoenn-route-102',
208: 'hoenn-route-103',
210: 'hoenn-route-104',
212: 'hoenn-route-105',
214: 'hoenn-route-106',
216: 'hoenn-route-107',
218: 'hoenn-route-108',
220: 'hoenn-route-109',
222: 'hoenn-route-110',
224: 'hoenn-route-111',
226: 'hoenn-route-112',
228: 'hoenn-route-113',
230: 'hoenn-route-114',
232: 'hoenn-route-115',
234: 'hoenn-route-116',
236: 'hoenn-route-117',
238: 'hoenn-route-118',
240: 'hoenn-route-119',
242: 'hoenn-route-120',
244: 'hoenn-route-121',
246: 'hoenn-route-122',
248: 'hoenn-route-123',
250: 'hoenn-route-124',
252: 'hoenn-route-125',
254: 'hoenn-route-126',
256: 'hoenn-route-127',
258: 'hoenn-route-128',
260: 'hoenn-route-129',
262: 'hoenn-route-130',
264: 'hoenn-route-131',
266: 'hoenn-route-132',
268: 'hoenn-route-133',
270: 'hoenn-route-134',
272: 'meteor-falls',
274: 'rusturf-tunnel',
#276: ???
#278: Desert Ruins
280: 'granite-cave',
282: 'petalburg-woods',
#284: Mt. Chimney
286: 'jagged-pass',
288: 'fiery-path',
290: 'mt-pyre',
#292: Team Aqua Hideout
294: 'seafloor-cavern',
296: 'cave-of-origin',
298: 'hoenn-victory-road',
300: 'shoal-cave',
302: 'new-mauville',
#304: Sea Mauville
#306: Island Cave
#308: Ancient Tomb
#310: Sealed Chamber
#312: Scorched Slab
#314: Team Magma Hideout
316: 'sky-pillar',
#318: Battle Resort
#320: Southern Island
# TODO is this "abandoned-ship" from rse?
#322: S.S. Tidal
324: 'hoenn-safari-zone',
#326: Mirage Forest
#328: Mirage Cave
#330: Mirage Island
#332: Mirage Mountain
#334: Trackless Forest
#336: Pathless Plain
#338: Nameless Cavern
#340: Fabled Cave
#342: Gnarled Den
#344: Crescent Isle
#346: Secret Islet
#348: Soaring in the sky
#350: Secret Shore
#352: Secret Meadow
#354: Secret Base
}
# TODO wait, in the yaml thing, where do the fanon names for these go?
ORAS_ZONE_INDEX_TO_VEEKUN_AREA = {
# TODO oops i should be actually mapping these to areas in rse. many of
# them aren't split the same way, though. uh oh. if we make areas a more
# first-class thing, then... how do we deal with this? e.g. route 104 is
# two zones in oras but only one zone in rse. it's easy enough to fudge
# that with encounters, but what do you do about events etc?
26: 'hoenn-route-104--north',
27: 'hoenn-route-104--south',
# TODO should i, maybe, indicate the type of terrain an area has...?
30: 'hoenn-route-107',
64: 'hoenn-route-107--underwater',
# NOTE: split from rse
38: 'hoenn-route-112--north', # route 111 side
39: 'hoenn-route-112--south', # lavaridge town side
35: 'hoenn-route-111',
# NOTE: split from rse
37: 'hoenn-route-111--desert',
48: 'hoenn-route-120',
# NOTE: new
49: 'hoenn-route-120--tomb-area',
53: 'hoenn-route-124',
65: 'hoenn-route-124--underwater',
55: 'hoenn-route-126',
66: 'hoenn-route-126--underwater',
57: 'hoenn-route-128',
# NOTE: new
68: 'hoenn-route-128--underwater',
58: 'hoenn-route-129',
# NOTE: new
69: 'hoenn-route-129--underwater',
59: 'hoenn-route-130',
# NOTE: new
70: 'hoenn-route-130--underwater',
71: 'meteor-falls',
74: 'meteor-falls--backsmall-room', # TODO this name is dumb
# NOTE: indistinguishable
72: 'meteor-falls--back',
73: 'meteor-falls--b1f',
78: 'granite-cave--1f',
79: 'granite-cave--b1f',
80: 'granite-cave--b2f',
# NOTE: indistinguishable
86: 'mt-pyre--1f',
87: 'mt-pyre--2f',
88: 'mt-pyre--3f',
89: 'mt-pyre--4f',
90: 'mt-pyre--outside',
# NOTE: indistinguishable; split from rse
91: 'mt-pyre--summit-south',
533: 'mt-pyre--summit-north',
# NOTE: many sets of these are indistinguishable; ALL split from rse
99: 'seafloor-cavern--entrance',
100: 'seafloor-cavern--room-1',
101: 'seafloor-cavern--room-2',
102: 'seafloor-cavern--room-5',
103: 'seafloor-cavern--room-6',
104: 'seafloor-cavern--room-3',
105: 'seafloor-cavern--room-7',
106: 'seafloor-cavern--room-4',
107: 'seafloor-cavern--room-8',
108: 'seafloor-cavern--room-9',
109: 'seafloor-cavern--room-10',
# NOTE: indistinguishable
112: 'cave-of-origin--entrance',
113: 'cave-of-origin--1f',
114: 'cave-of-origin--b1f',
115: 'cave-of-origin--b2f',
116: 'cave-of-origin--b3f',
# NOTE: new? rse had this room but had no encounters in it
452: 'cave-of-origin--b4f',
# NOTE: indistinguishable
123: 'hoenn-victory-road--entrance', # NOTE: new
124: 'hoenn-victory-road--1f',
125: 'hoenn-victory-road--b1f',
# NOTE: new; rse had b2f instead
126: 'hoenn-victory-road--2f',
}
2016-02-26 18:05:51 +00:00
# There are 63 tutor move bits in use, but only 60 move tutors -- the moves
# appear to be largely inherited from B2W2 but these are just not exposed in
# ORAS
ORAS_UNUSED_MOVE_TUTORS = {'dark-pulse', 'roost', 'sleep-talk'}
# Unsure where this is in the binary
ORAS_NORMAL_MOVE_TUTORS = (
'grass-pledge',
'fire-pledge',
'water-pledge',
'frenzy-plant',
'blast-burn',
'hydro-cannon',
'draco-meteor',
'dragon-ascent',
)
# TODO ripe for being put in the pokedex codebase itself
def make_identifier(english_name):
# TODO do nidoran too
return re.sub('[. ]+', '-', english_name.lower())
2016-02-26 18:05:51 +00:00
@contextmanager
def read_garc(path):
with path.open('rb') as f:
yield GARCFile(f)
# XXX christ lol. taken from SO. fodder for camel maybe
def represent_ordereddict(dumper, data):
value = []
for item_key, item_value in data.items():
node_key = dumper.represent_data(item_key)
node_value = dumper.represent_data(item_value)
value.append((node_key, node_value))
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
yaml.add_representer(OrderedDict, represent_ordereddict)
def represent_tuple(dumper, data):
return yaml.nodes.SequenceNode(
u'tag:yaml.org,2002:seq',
[dumper.represent_data(item) for item in data],
flow_style=True,
)
yaml.add_representer(tuple, represent_tuple)
def dump_to_yaml(data, f):
# TODO gonna need a better way to handle flow style
yaml.dump(
data, f,
default_flow_style=False,
allow_unicode=True,
)
def extract_data(root, out):
# TODO big conceptual question for the yaml thing: how do we decide how the
# identifiers work in the per-version data? the "global" identifiers are
# in theory based on the names from the latest version, and the game dump
# scripts shouldn't have to care about what the latest version is
# 1. make the canon data not be keyed by identifier (makes it hard to
# follow what's going on in flavor text files etc, and unclear how to match
# up items across versions)
# 2. make each version's data keyed by its own identifiers (makes it hard
# to align them all when loading everything, and unclear how to match up
# items whose names change across versions)
# 3. hardcode a mapping of version+identifier pairs to their current
# identifiers, when they changed, which is a little ugly but also solves
# all the match-up problems and is what we'd basically have to do anyway
# -------------------------------------------------------------------------
# Names and flavor text
2016-02-26 18:05:51 +00:00
texts = {}
#for lang, fn in ORAS_SCRIPT_FILES.items():
for lang, fn in SUMO_SCRIPT_FILES.items():
2016-02-26 18:05:51 +00:00
texts[lang] = {}
with read_garc(root / fn) as garc:
#for entryname, entryid in ORAS_SCRIPT_ENTRIES.items():
for entryname, entryid in SUMO_SCRIPT_ENTRIES.items():
2016-02-26 18:05:51 +00:00
entry = garc[entryid][0]
texts[lang][entryname] = decrypt_xy_text(entry.read())
identifiers = {}
# FIXME should use a known list, mayyybe compare against this
identifiers['species'] = list(map(make_identifier, texts['en']['species-names']))
# This is totally wrong, but the Pokémon loop below fixes it as it goes
# FIXME okay that bit at the end is dumb
identifiers['pokémon'] = identifiers['species'][:] + [None] * 1000
identifiers['move'] = list(map(make_identifier, texts['en']['move-names']))
identifiers['item'] = list(map(make_identifier, texts['en']['item-names']))
identifiers['ability'] = list(map(make_identifier, texts['en']['ability-names']))
2016-02-26 18:05:51 +00:00
textdir = out / 'script'
if not textdir.exists():
textdir.mkdir()
for lang in CANON_LANGUAGES:
with (textdir / (lang + '.yaml')).open('w') as f:
# TODO this should use identifiers, not be lists
# TODO need to skip slot 0 which is junk
dump_to_yaml(texts[lang], f)
"""
# Encounters
22:42 < magical> note to self: X/Y ambush encounters are found in DllField.cro, starting at 0xf40d0
23:02 < magical> friend safari pokemon at 0x13d34a
# TODO move mee elsewheeere -- actually all of these should be in their own pieces
places = OrderedDict()
name_index_to_place = {}
name_index_counts = Counter()
zones = {}
zone_to_name_index = {}
with read_garc(root / 'rom/a/0/1/3') as garc:
# Fetch the pointer table from the encounter file first, mostly so we
# can figure out which zones have no encounters at all. For whatever
# reason, a zone with no encounters still has data -- but it uses the
# same pointer as the following zone. I don't know if the pointers
# were intended to be used as ranges or what, but it's a handy signal.
f = garc[-1][0]
# TODO SIGH, translate this to construct, i guess
magic = f.read(2)
assert magic == b'EN'
num_records = int.from_bytes(f.read(2), 'little')
encounter_pointers = []
for n in range(num_records):
encounter_pointers.append(int.from_bytes(f.read(4), 'little'))
empty_zones = set()
for n in range(num_records - 1):
if encounter_pointers[n] == encounter_pointers[n + 1]:
empty_zones.add(n)
# Every file in this GARC is ZO (zonedata) except the last one, which
# is a table of encounters for each zone.
num_zones = len(garc) - 1
for z in range(num_zones):
if z in empty_zones:
# TODO later we may want these, to hang events off of etc
continue
zone = OrderedDict()
zone['game-index'] = z
zones[z] = zone
# TODO probably worth trying to parse this stuff for real later
data = garc[z][0].read()
name_index = int.from_bytes(data[56:58], 'little')
name_bits = name_index >> 9
name_index &= 0x1ff
zone_to_name_index[z] = name_index
name_index_counts[name_index] += 1
# Create places as we go, but DO NOT assign zones to places yet,
# since the logic for figuring out zone identifiers is different
# for places with only one zone
if name_index not in name_index_to_place:
place = OrderedDict()
place['unknown--gen6-name-bits'] = name_bits
place['name'] = OrderedDict()
place['alternate-name'] = OrderedDict()
for language in CANON_LANGUAGES:
name, altname = (
texts[language]['zone-names'][name_index:name_index + 2])
place['name'][language] = name
if altname:
place['alternate-name'][language] = altname
# Drop this dict entirely if there are no alt names
if not place['alternate-name']:
del place['alternate-name']
name_index_to_place[name_index] = place
ident = ORAS_ZONE_NAME_INDEX_TO_VEEKUN_LOCATION.get(name_index)
if not ident:
# Not in veekun yet...
place['veekun--new'] = True
ident = make_identifier(place['name']['en'])
places[ident] = place
# TODO ugh
place['_identifier'] = ident
place['zones'] = OrderedDict()
# Some encounters are used more than once
seen_encounters = {}
for z, ptr in enumerate(encounter_pointers):
if z in empty_zones:
continue
zone = zones[z]
name_index = zone_to_name_index[z]
place = name_index_to_place[name_index]
# Now we have all the zones, so we can figure out identifiers and
# assign the zone to its parent place
identifier = place['_identifier']
if name_index_counts[name_index] > 1:
# TODO are these names /sometimes/ official? e.g. doesn't
# "B1F" appear sometimes?
subidentifier = ORAS_ZONE_INDEX_TO_VEEKUN_AREA.get(z)
if not subidentifier:
subidentifier = "oras-unknown-{}".format(z)
identifier = "{}--{}".format(identifier, subidentifier)
place['zones'][identifier] = zone
# Snag the actual encounters, if any.
zone['encounters'] = OrderedDict()
# TODO dumb hack for soaring through the sky, which is... nothing
if not f.read(1):
continue
f.seek(ptr)
encounter_table = encounter_table_struct.parse_stream(f)
n = 0
for method, chances in ORAS_ENCOUNTER_SLOTS:
rate_attr = method.replace('-', '_') + '_rate'
rate = getattr(encounter_table, rate_attr)
# TODO where does rate fit in here?
if rate == 0:
# TODO wrong for hordes
n += len(chances)
continue
encounters = zone['encounters'][method] = []
for chance in chances:
if isinstance(chance, tuple):
chance, groupsize = chance
else:
groupsize = 1
encounter = []
for _ in range(groupsize):
enc = encounter_table.encounter[n]
# TODO assert always zero when rate is zero, never zero when rate isn't
if enc.pokemon_id != 0:
if enc.min_level == enc.max_level:
levels = str(enc.min_level)
else:
levels = "{} - {}".format(enc.min_level, enc.max_level)
pokemon_ident = identifiers['pokémon'][enc.pokemon_id & 0x1ff]
pokemon_form_bits = enc.pokemon_id >> 9
# TODO maybe turn this into, i have no idea, a
# custom type? something forcibly short??
# TODO what do i do with the form bits?
encounter.append("{} {}".format(pokemon_ident, levels))
n += 1
if groupsize == 1:
encounters.extend(encounter)
else:
encounters.append(encounter)
with (out / 'places.yaml').open('w') as f:
dump_to_yaml(places, f)
return
"""
2016-02-26 18:05:51 +00:00
# -------------------------------------------------------------------------
# Scrape some useful bits from the binary
with (root / 'exe/code.bin').open('rb') as f:
# Tutored moves
# TODO i think these are oras only? do they exist in sumo? xy?
2016-02-26 18:05:51 +00:00
tutor_moves = dict(tutors=ORAS_NORMAL_MOVE_TUTORS)
f.seek(0x004960f8)
for n in range(1, 5):
key = "bp_tutors{}".format(n)
moves = tutor_moves[key] = []
while True:
moveid, = struct.unpack('<H', f.read(2))
if moveid >= len(identifiers['move']):
break
moves.append(identifiers['move'][moveid])
# TMs
machines = []
#f.seek(0x004a67ee) # ORAS
f.seek(0x0049795a) # SUMO
# TODO magic number (107)
2016-02-26 18:05:51 +00:00
machineids = struct.unpack('<107H', f.read(2 * 107))
# TODO dunno if this is still true
2016-02-26 18:05:51 +00:00
# Order appears to be based on some gen 4 legacy: TMs 1 through 92, HMs
# 1 through 6, then the other eight TMs and the last HM. But the bits
# in the Pokémon structs are in the expected order of 1 through 100, 1
# through 7
machines = [
identifiers['move'][moveid]
for moveid in
machineids[0:92] +
machineids[98:106] +
machineids[92:98] +
machineids[106:]
]
# -------------------------------------------------------------------------
# Pokémon structs
mega_evolutions = get_mega_evolutions(root)
all_pokémon = OrderedDict()
2016-02-26 18:05:51 +00:00
pokemon_data = []
with read_garc(root / 'rom/a/0/1/7') as garc: # SUMO
#with read_garc(root / 'rom/a/1/9/5') as garc: # ORAS
2016-02-26 18:05:51 +00:00
personals = [subfile[0].read() for subfile in garc]
_pokemon_forms = {} # "real" species id => (base species id, form name id)
_next_name_form_id = 803#723 # TODO magic numbers
print("number of flavor texts", len(texts['en']['species-flavor-moon']))
2016-02-26 18:05:51 +00:00
for i, personal in enumerate(personals[:-1]):
record = pokemon_struct.parse(personal)
# FIRST THINGS FIRST: let's deal with forms.
# TODO some pokemon, like unown, /only/ have sprite variations, so they
# don't have a form_species_start here. what do i do about them?
if (record.form_count > 1) != bool(record.form_species_start):
print("!!! sprite-only forms, argh")
# The > i check makes sure we don't run this bit a second time when we
# hit the forms themselves
if record.form_count > 1 and record.form_species_start > i:
megas = mega_evolutions[i]
if len(megas) == 1:
assert i not in FORM_NAMES
form_names = ['mega']
elif len(megas) == 2:
assert i not in FORM_NAMES
form_names = ['mega-x', 'mega-y']
else:
assert not megas
form_names = FORM_NAMES[i][1:]
# Fix our own name if necessary
if FORM_NAMES[i][0]:
identifiers['pokémon'][i] += '-' + FORM_NAMES[i][0]
if record.form_count - 1 != len(form_names):
print("!!!!! MISMATCH", record.form_count - 1, len(form_names))
2016-02-26 18:05:51 +00:00
for offset in range(record.form_count - 1):
# Form names appear to be all just jammed at the end in order,
# completely unrelated to either of the "start" offsets here
2016-02-26 18:05:51 +00:00
#form_name = texts['en']['form-names'][_next_name_form_id]
# TODO still no idea how "intangible" forms are being
# handled in the new schema
_pokemon_forms[record.form_species_start + offset] = i, _next_name_form_id
2016-02-26 18:05:51 +00:00
_next_name_form_id += 1
identifiers['pokémon'][record.form_species_start + offset] = identifiers['species'][i] + '-' + form_names[offset]
pokémon = Pokémon()
all_pokémon[identifiers['pokémon'][i]] = pokémon
pokémon.game_index = i
if i in _pokemon_forms:
base_species_id, form_name_id = _pokemon_forms[i]
else:
base_species_id = i
form_name_id = i
# TODO i observe this is explicitly a species name, the one thing that
# really is shared between forms
pokémon.name = OrderedDict(
(language, texts[language]['species-names'][base_species_id])
for language in CANON_LANGUAGES)
pokémon.genus = OrderedDict(
(language, texts[language]['genus-names'][base_species_id])
for language in CANON_LANGUAGES)
# FIXME ho ho, hang on a second, forms have their own flavor text too!!
pokémon.flavor_text = OrderedDict(
# TODO well this depends on which game you're dumping
(language, texts[language]['species-flavor-moon'][base_species_id])
for language in CANON_LANGUAGES)
# FIXME include form names? only when they exist? can that be
# inconsistent between languages?
pokémon.base_stats = {
'hp': record.stat_hp,
'attack': record.stat_atk,
'defense': record.stat_def,
'special-attack': record.stat_spatk,
'special-defense': record.stat_spdef,
'speed': record.stat_speed,
}
# FIXME pokémon.types = [record.type1]
pokémon.capture_rate = record.capture_rate
# TODO stage?
# FIXME effort
# Held items are a bit goofy; if the same item is in all three slots, it always appears!
pokémon.held_items = {}
if 0 != record.held_item1 == record.held_item2 == record.held_item3:
pokémon.held_items[identifiers['item'][record.held_item1]] = 100
else:
if record.held_item1:
pokémon.held_items[identifiers['item'][record.held_item1]] = 50
if record.held_item2:
pokémon.held_items[identifiers['item'][record.held_item2]] = 5
if record.held_item3:
pokémon.held_items[identifiers['item'][record.held_item3]] = 1
# TODO i think this needs some normalizing? maybe renaming because
# this doesn't at all imply what it means
pokémon.gender_rate = record.gender_rate
pokémon.hatch_counter = record.steps_to_hatch
pokémon.base_happiness = record.base_happiness
pokémon.growth_rate = record.growth_rate
# FIXME egg groups
pokémon.abilities = [
identifiers['ability'][ability]
for ability in (record.ability1, record.ability2, record.ability_hidden)
]
# FIXME safari escape??
# FIXME form stuff
# FIXME color
pokémon.base_experience = record.base_exp
# FIXME what units are these!
pokémon.height = record.height
pokémon.weight = record.weight
pokémon.moves = {}
# TODO transform to an OD somehow probably
pokemon_data.append(record)
print("{:4d} {:25s} {} {:5d} {:5d} {:20s} {:4d} {:4d} {:2d}".format(
i,
identifiers['pokémon'][i],
('0'*16 + bin(record.mystery1)[2:])[-16:],
record.mystery2,
record.stage,
texts['en']['form-names'][i],
record.form_species_start,
record.form_sprite_start,
record.form_count,
))
2016-02-26 18:05:51 +00:00
#for i in range(723, 825 + 1):
# base_species_id, form_name_id = _pokemon_forms[i]
# species_name = texts['en']['species-names'][base_species_id]
# form_name = texts['en']['form-names'][form_name_id]
# print(i, species_name, '/', form_name)
# -------------------------------------------------------------------------
# Move stats
#with read_garc(root / 'rom/a/1/8/9') as garc: # ORAS
with read_garc(root / 'rom/a/0/1/1') as garc: # SUMO
2016-02-26 18:05:51 +00:00
# Only one subfile
# TODO assert this wherever i do it
2016-02-26 18:05:51 +00:00
data = garc[0][0].read()
print(Struct('magic' / Bytes(2), 'count' / Int16ul, 'pointers' / Array(16, Int32ul)).parse(data))
print(move_struct.sizeof())
records = move_container_struct.parse(data)
for i, record in enumerate(records):
#print(texts['en']['move-names'][i])
#print(record)
2016-02-26 18:05:51 +00:00
# TODO with the release of oras all moves have contest types and effects again! where are they??
print("{:3d} {:30s} | {m.type:10s} {m.category:3d} / {m.power:3d} {m.pp:2d} {m.accuracy:3d} / {m.priority:2d} {m.range:2d} {m.damage_class:1d} / {m.effect:3d} {m.caused_effect:3d} {m.effect_chance:3d} -- {m.status:3d} {m.min_turns:3d} {m.max_turns:3d} {m.crit_rate:3d} {m.flinch_chance:3d} {m.recoil:4d} {m.healing:3d} / {m.stat_change!r} {m.stat_amount!r} {m.stat_chance!r} ~ {m.padding0:3d} {m.padding1:3d} {m.flags:04x} {m.padding2:3d} {m.extra:3d} {m.extra2:10d}".format(
i,
texts['en']['move-names'][i],
m=record,
))
return
2016-02-26 18:05:51 +00:00
# Egg moves
with read_garc(root / 'rom/a/0/1/2') as garc: # SUMO
#with read_garc(root / 'rom/a/1/9/0') as garc: # ORAS
print("number of egg moves:", len(garc))
2016-02-26 18:05:51 +00:00
for i, subfile in enumerate(garc):
ident = identifiers['pokémon'][i]
2016-02-26 18:05:51 +00:00
data = subfile[0].read()
if not data:
continue
container = egg_moves_struct.parse(data)
print(i, ident, container.first_form_id, container.moveids)
# FIXME: 961 pokémon, 1063 named forms, but 1048 egg movesets.
# what? they get completely out of order after 802 and i don't
# know how to fix this. didn't magical write some code...?
if i > len(identifiers['species']):
continue
moveset = all_pokémon[ident].moves
2016-02-26 18:05:51 +00:00
eggset = moveset['egg'] = []
for moveid in container.moveids:
eggset.append(identifiers['move'][moveid])
# Level-up moves
with read_garc(root / 'rom/a/0/1/3') as garc: # SUMO
#with read_garc(root / 'rom/a/1/9/1') as garc: # ORAS
print("number of level-up moves", len(garc))
2016-02-26 18:05:51 +00:00
for i, subfile in enumerate(garc):
ident = identifiers['pokémon'][i]
2016-02-26 18:05:51 +00:00
level_up_moves = subfile[0].read()
moveset = all_pokémon[ident].moves
2016-02-26 18:05:51 +00:00
levelset = moveset['level'] = []
lastlevel = None
order = 1
for pair in level_up_moves_struct.parse(level_up_moves):
# End is indicated with -1, -1
if pair.moveid <= 0:
break
# FIXME this is a goofy-looking structure, but it makes the
# yaml come out nicely?
levelset.append({
pair.level: identifiers['move'][pair.moveid],
})
2016-02-26 18:05:51 +00:00
if pair.level == lastlevel:
order += 1
else:
lastlevel = pair.level
order = 1
# Evolution
#with read_garc(root / 'rom/a/1/9/2') as garc: # ORAS
#with read_garc(root / 'rom/a/0/1/4') as garc: # SUMO?
2016-02-26 18:05:51 +00:00
# for subfile in garc:
# evolution = subfile[0].read()
# print(repr(evolution))
# Mega evolution
#with read_garc(root / 'rom/a/1/9/3') as garc: # ORAS
#with read_garc(root / 'rom/a/0/1/5') as garc: # SUMO?
2016-02-26 18:05:51 +00:00
# for subfile in garc:
# evolution = subfile[0].read()
# print(repr(evolution))
# TODO what is a/1/9/4 (ORAS) or a/0/1/6 (SUMO)? 8 files of 404 bytes each
2016-02-26 18:05:51 +00:00
# Baby Pokémon
#with read_garc(root / 'rom/a/1/9/6') as garc: # ORAS
#with read_garc(root / 'rom/a/0/1/8') as garc: # SUMO?
2016-02-26 18:05:51 +00:00
# for subfile in garc:
# baby_pokemon = subfile[0].read()
# print(repr(baby_pokemon))
2016-02-26 18:05:51 +00:00
# Item stats
# TODO
#with read_garc(root / 'rom/a/1/9/7') as garc: # ORAS
with read_garc(root / 'rom/a/0/1/9') as garc: # ORAS
for subfile in garc:
item_stats = subfile[0].read()
2016-02-26 18:05:51 +00:00
# Tutor moves (from the personal structs)
for i, datum in enumerate(pokemon_data):
ident = identifiers['pokémon'][i]
moveset = all_pokémon[ident].moves
2016-02-26 18:05:51 +00:00
tutorset = moveset['tutor'] = []
for key, tutors in tutor_moves.items():
for bit, moveident in enumerate(tutors):
if moveident in ORAS_UNUSED_MOVE_TUTORS:
continue
if not datum[key] & (1 << bit):
continue
tutorset.append(moveident)
# TMs
machineset = moveset['machine'] = []
for bit, moveident in enumerate(machines):
if not datum['machines'][bit]:
2016-02-26 18:05:51 +00:00
continue
machineset.append(moveident)
with (out / 'pokemon.yaml').open('w') as f:
#dump_to_yaml(all_pokémon, f)
import pokedex.schema as schema
from camel import Camel
f.write(Camel([schema.POKEDEX_TYPES]).dump(all_pokémon))
2016-02-26 18:05:51 +00:00
def get_mega_evolutions(root):
"""Return a dict mapping Pokémon ids to a list of mega evolution records.
"""
megas = {}
#with read_garc(root / 'rom/a/1/9/3') as garc: # oras
with read_garc(root / 'rom/a/0/1/5') as garc: # SUMO
for pokemonid, subfile in enumerate(garc):
megas[pokemonid] = pokemon_mega_evolutions_struct.parse_stream(subfile[0])
return megas
class SpriteFileNamer:
"""Do you have a big set of sprites, and a separate list of stuff
identifying them, as happens in XY and ORAS? I will sort that all out for
you.
"""
def __init__(self, out, mega_counts, form_names):
self.out = out
self.mega_counts = mega_counts
self.form_names = form_names
self.index_to_filenames = defaultdict(list)
self.seen = set()
def add(self, index, pokemonid, formid=0, right=False, back=False, shiny=False, female=False):
# Check that we don't try to do the same one twice
if index in self.index_to_filenames:
raise ValueError("Index {} is already {}".format(
index, self.index_to_filenames[index]))
key = (pokemonid, formid, right, back, shiny, female)
if key in self.seen:
raise ValueError("Duplicate sprite: {!r}".format(key))
self.seen.add(key)
# Figure out the form name
# TODO this assumes a Pokémon cannot have both forms and mega
# evolutions, which is true... for now
if pokemonid in self.form_names:
form = self.form_names[pokemonid][formid]
elif formid == 0:
form = None
elif self.mega_counts[pokemonid]:
if self.mega_counts[pokemonid] == 1:
form = ['mega'][formid - 1]
elif self.mega_counts[pokemonid] == 2:
form = ['mega-x', 'mega-y'][formid - 1]
else:
raise ValueError(
"Don't know how to name {} mega evolutions for Pokémon {}"
.format(self.mega_counts[pokemonid], pokemonid))
else:
# TODO should use warnings for this so it works for new games
warnings.warn("Don't know any forms for Pokemon {}".format(pokemonid))
form = "form-{}".format(formid)
# Construct the directory
parts = []
if right:
parts.append('right')
if back:
parts.append('back')
if shiny:
parts.append('shiny')
if female:
parts.append('female')
# Build the final filename
bare_filename = "{}.png".format(pokemonid)
if form:
parts.append("{}-{}.png".format(pokemonid, form))
else:
parts.append(bare_filename)
filename = '/'.join(parts)
self.index_to_filenames[index].append(filename)
# For named "default" forms, create two output files
if form and formid == 0:
parts[-1] = bare_filename
self.index_to_filenames[index].append('/'.join(parts))
# Special case for Meowstic: duplicate its female form as a formless
# female sprite
if form == 'female' and not female:
parts.insert(-1, 'female')
parts[-1] = bare_filename
self.index_to_filenames[index].append('/'.join(parts))
def inject(self, index, filename):
"""Manually specify the filename for an index. Helpful for edge cases
like egg sprites.
"""
if index in self.index_to_filenames:
raise ValueError("Index {} is already {}".format(
index, self.index_to_filenames[index]))
self.index_to_filenames[index].append(filename)
# TODO we oughta create aliases for any that are missing?
# pumpkaboo/gourgeist and arceus don't have separate box icons, for
# example.
@contextmanager
def open(self, index, prefix=None):
out = self.out
if prefix:
out /= prefix
filenames = self.index_to_filenames[index]
if len(filenames) == 0:
raise RuntimeError("Don't have filenames for index {}".format(index))
fn = out / filenames[0]
if not fn.parent.exists():
fn.parent.mkdir(parents=True)
with fn.open('wb') as f:
yield f
for path in filenames[1:]:
fn2 = out / path
# TODO this duplication is annoying and we can probably do it in
# one fell swoop instead of constantly rechecking, maybe during the
# same timeframe that we fill in missing forms
if not fn2.parent.exists():
fn2.parent.mkdir(parents=True)
shutil.copyfile(str(fn), str(fn2))
2016-02-26 18:05:51 +00:00
def extract_box_sprites(root, out):
mega_counts = {
id: len(megas)
for (id, megas) in get_mega_evolutions(root).items()
}
namer = SpriteFileNamer(out, mega_counts, FORM_NAMES)
2016-02-26 18:05:51 +00:00
with (root / 'exe/code.bin').open('rb') as f:
# Form configuration, used to put sprites in the right order
# NOTE: in x/y the address is 0x0043ea98
#f.seek(0x0047d650) # ORAS
f.seek(0x004999d0) # SUMO
# Discard dummy zero sprite
pokemon_sprite_struct.parse_stream(f)
n = 0
while True:
2016-02-26 18:05:51 +00:00
sprite = pokemon_sprite_struct.parse_stream(f)
# This is not particularly reliable, but the data immediately
# following this list is some small 32-bit values, so the female
# index will be (illegally) zero
if not sprite.female_index:
break
n += 1
namer.add(sprite.index, n)
2016-02-26 18:05:51 +00:00
if sprite.female_index != sprite.index:
namer.add(sprite.female_index, n, female=True)
2016-02-26 18:05:51 +00:00
# Note that these addresses are relative to RAM, and the binary is
# loaded into RAM starting at 0x100000, so we need to subtract that
# to get a file position
pos = f.tell()
form_indices = ()
right_indices = ()
if sprite.form_index_offset:
f.seek(sprite.form_index_offset - 0x100000)
form_indices = struct.unpack(
"<{}H".format(sprite.form_count),
f.read(2 * sprite.form_count),
)
for form, form_idx in enumerate(form_indices):
# Ignore the first form, since it's the default and thus
# covered by `index` already
if form == 0:
continue
if form_idx == sprite.index:
continue
namer.add(form_idx, n, form)
2016-02-26 18:05:51 +00:00
if sprite.right_index_offset:
f.seek(sprite.right_index_offset - 0x100000)
right_indices = struct.unpack(
"<{}H".format(sprite.right_count),
f.read(2 * sprite.right_count),
)
if sprite.form_count:
assert sprite.right_count == sprite.form_count
for form, (form_idx, right_idx) in enumerate(zip(form_indices, right_indices)):
if form_idx == right_idx:
continue
namer.add(right_idx, n, form, right=True)
2016-02-26 18:05:51 +00:00
else:
assert sprite.right_count == 2
assert right_indices[0] == right_indices[1]
if right_indices[0] != sprite.index:
namer.add(right_indices[0], n, right=True)
2016-02-26 18:05:51 +00:00
f.seek(pos)
pokemon_sprites_dir = out
if not pokemon_sprites_dir.exists():
pokemon_sprites_dir.mkdir()
# with read_garc(root / 'rom/a/0/9/1') as garc: # ORAS
# TODO what's in 2/5/3?
with read_garc(root / 'rom/a/0/6/2') as garc: # SUMO
2016-02-26 18:05:51 +00:00
from .lib.clim import decode_clim
for i, subfile in enumerate(garc):
# TODO ORAS ONLY
#elif i == 333:
# # Duplicate Entei sprite that's not used
# continue
if i == len(garc) - 1:
2016-02-26 18:05:51 +00:00
# Very last one is egg
namer.inject(i, 'egg.png')
2016-02-26 18:05:51 +00:00
# TODO this is bad.
if not namer.index_to_filenames[i]:
# Unused sprite -- e.g. index 0, or one of the dummies in SUMO
continue
2016-02-26 18:05:51 +00:00
data = subfile[0].read()
image_data = decode_clim(data)
2016-02-26 18:05:51 +00:00
# TODO this is bad.
if 'right/' in namer.index_to_filenames[i][0]:
image_data.mirror()
with namer.open(i) as f:
image_data.write_to_png(f)
2016-02-26 18:05:51 +00:00
def extract_dex_sprites(root, out):
# Some Pokémon have dex sprites for their forms, too, and they're all
# clustered together, so we have to do a little work to fix the numbering.
# Luckily the dex sprites are in the same order as the models
# (unsurprising, as they're just model renders), which also tells us what
# Pokémon have female forms. The mega evolution map tells us which forms
# are megas, and the rest are listed manually above as FORM_NAMES.
2016-02-26 18:05:51 +00:00
mega_counts = {
id: len(megas)
for (id, megas) in get_mega_evolutions(root).items()
}
namer = SpriteFileNamer(out, mega_counts, FORM_NAMES)
2016-02-26 18:05:51 +00:00
# TODO Meowstic is counted as simply female in here, but should probably be
# saved with a form filename as well
# TODO should skip the extra komala and the totem forms
#with read_garc(root / 'rom/a/0/0/8') as garc: # ORAS
with read_garc(root / 'rom/a/0/9/4') as garc: # SUMO
2016-02-26 18:05:51 +00:00
f = garc[0][0]
pokemonid = 0
while True:
pokemonid += 1
data = f.read(4)
# All zeroes means we're done. Maybe. More data follows after
# this, but it doesn't seem to be the same format, and I don't know
# what exactly it's for.
if data == b'\x00\x00\x00\x00':
break
2016-02-26 18:05:51 +00:00
# Index of the first model (also zero-indexed), how many models the
# Pokémon has, and some flags
start, count, flags = struct.unpack('<HBB', data)
# TODO this was CHANGED for SUMO -- for ORAS all the shiny sprites are a second block at the end!
#model_num = start + 1
model_num = start * 2 + 1
#print("pokemon {:3d} -- start {:4d} ({:4d}) -- count {:2d} -- flags {:08b}".format(pokemonid, start, model_num, count, flags))
# Fix a few odd disconnects between the model listing and the
# actual dex sprites.
# TODO there must be a dex sprite index somewhere, this is silly
# Xerneas has two models, but three dex sprites
2016-02-26 18:05:51 +00:00
if pokemonid == 716:
count = 2
# Lurantis has two models, but one dex sprite
if pokemonid == 754:
count = 1
flags &= ~4
# Salazzle has two models, but one dex sprite
if pokemonid == 758:
count = 1
flags &= ~4
# Komala has one model, but two dex sprites
# FIXME probably skip extracting it at all
if pokemonid == 775:
count = 2
# The above all naturally throw later numbering off; compensate
if 716 < pokemonid <= 754:
model_num += 2
elif 758 < pokemonid <= 775:
model_num -= 2
2016-02-26 18:05:51 +00:00
namer.add(model_num, pokemonid)
# TODO SUMO ONLY (should be += 1 for ORAS)
namer.add(model_num + 1, pokemonid, shiny=True)
model_num += 2
2016-02-26 18:05:51 +00:00
form_count = count - 1 # discount "base" form
# TODO this is only used for ORAS, and should be done another way anyway
2016-02-26 18:05:51 +00:00
total_model_count = model_num + count - 1
# Don't know what flag 1 is; everything has it.
# Flag 2 means the first alternate form is female.
2016-02-26 18:05:51 +00:00
if flags & 2:
assert form_count > 0
form_count -= 1
namer.add(model_num, pokemonid, female=True)
namer.add(model_num + 1, pokemonid, female=True, shiny=True)
model_num += 2
2016-02-26 18:05:51 +00:00
# Flag 4 just means there are more forms?
if flags & 4:
assert form_count
for formid in range(1, form_count + 1):
namer.add(model_num, pokemonid, formid)
namer.add(model_num + 1, pokemonid, formid, shiny=True)
model_num += 2
2016-02-26 18:05:51 +00:00
# And now, do the ripping
#with read_garc(root / 'rom/a/2/6/3') as garc: # ORAS
with read_garc(root / 'rom/a/2/4/0') as garc: # SUMO
2016-02-26 18:05:51 +00:00
from .lib.clim import decode_clim
from .lib.etc1 import decode_etc1
2016-02-26 18:05:51 +00:00
for i, subfile in enumerate(garc):
if i == 0:
# Dummy sprite, not interesting to us
continue
data = subfile[0].read()
"""
with open("{}/{}.png".format(str(out), i), 'wb') as f:
write_clim_to_png(f, *decode_etc1(data))
continue
# TODO THIS IS ALL ORAS ONLY
shiny_prefix = None
2016-02-26 18:05:51 +00:00
if i > total_model_count:
i -= total_model_count
# TODO this should be a real feature, as should the 'right'
# hack in the other code
2016-02-26 18:05:51 +00:00
shiny_prefix = 'shiny/'
elif 37 <= i <= 41:
# Cosplay Pikachu's outfits -- the sprites are blank, so saving
# these is not particularly useful
continue
"""
2016-02-26 18:05:51 +00:00
data = subfile[0].read()
with namer.open(i) as f:
decode_etc1(data).write_to_png(f)
# TODO ORAS
#with namer.open(i, prefix=shiny_prefix) as f:
# decode_clim(data).write_to_png(f)
2016-02-26 18:05:51 +00:00
def _munge_source_arg(strpath):
path = Path(strpath)
if not path.is_dir():
raise argparse.ArgumentTypeError(
"{!r} is not a directory".format(strpath))
# TODO something something romfs, exefs
return path
def make_arg_parser():
p = argparse.ArgumentParser()
p.add_argument('what', choices=('data', 'dex-sprites', 'box-sprites'), help='what to extract')
# TODO should verify that this is an actual game dump, and find the rom/exe
p.add_argument('source', type=_munge_source_arg, help='path to an unpacked game image')
p.add_argument('dest', type=_munge_source_arg, help='directory to dump the results into')
return p
def main(args):
parser = make_arg_parser()
args = parser.parse_args(args)
# TODO support 'all', and just make some subdirectories per thing
# TODO or maybe merge all the sprite things together since stuff will need moving around anyway idk
if args.what == 'data':
extract_data(args.source, args.dest)
elif args.what == 'dex-sprites':
extract_dex_sprites(args.source, args.dest)
elif args.what == 'box-sprites':
extract_box_sprites(args.source, args.dest)
if __name__ == '__main__':
import sys
main(sys.argv[1:])