From 060dd42c7ad69cd20110b27d7a0d20a4000cecec Mon Sep 17 00:00:00 2001 From: "Eevee (Lexy Munroe)" Date: Tue, 22 Aug 2017 14:29:07 -0700 Subject: [PATCH] Get SUMO to YAML to database basically working, finally! MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Pokémon shapes are now dumped - Machines were being dumped slightly out of order; they are now correct - Item price is now dumped correctly - Identifiers are now computed to match veekun for all Pokémon - Several bugs with evolutions were fixed - Shedinja is now special-cased, alas - Items are now loaded into the db - Pokémon are now loaded into the db, mostly correctly --- pokedex/db/multilang.py | 11 +- pokedex/extract/oras.py | 126 +++++-- pokedex/schema.py | 14 +- scripts/sumo-yaml-to-db.py | 716 ++++++++++++++++++++++++++++++++++++- 4 files changed, 816 insertions(+), 51 deletions(-) diff --git a/pokedex/db/multilang.py b/pokedex/db/multilang.py index b217032..40b1cc6 100644 --- a/pokedex/db/multilang.py +++ b/pokedex/db/multilang.py @@ -46,9 +46,14 @@ def _getset_factory_factory(column_name, string_getter): session = object_session(translations) language = translations.local_language return string_getter(text, session, language) - def setter(translations, value): - # The string must be set on the Translation directly. - raise AttributeError("Cannot set %s" % column_name) + + if underlying_type is dict: + def setter(translations, key, value): + setattr(translations, instance.value_attr, value) + else: + def setter(translations, value): + # The string must be set on the Translation directly. + raise AttributeError("Cannot set %s" % column_name) return getter, setter return getset_factory diff --git a/pokedex/extract/oras.py b/pokedex/extract/oras.py index 0a4b8e6..50f7dbd 100644 --- a/pokedex/extract/oras.py +++ b/pokedex/extract/oras.py @@ -28,6 +28,7 @@ import yaml import pokedex.schema as schema from .lib.garc import GARCFile, decrypt_xy_text +from .lib.pc import PokemonContainerFile # TODO: ribbons! 080 in sumo @@ -104,6 +105,25 @@ COLORS = { 2: 'pc.yellow', } +# NOTE: these are listed in veekun order, which doesn't match the games, just +# as with colors +SHAPES = { + 0: 'ps.ball', + 12: 'ps.squiggle', + 2: 'ps.fish', + 13: 'ps.arms', + 8: 'ps.blob', + 9: 'ps.upright', + 1: 'ps.legs', + 4: 'ps.quadruped', + 11: 'ps.wings', + 7: 'ps.tentacles', + 6: 'ps.heads', # NOTE: this is really multi-body + 10: 'ps.humanoid', + 5: 'ps.bug-wings', + 3: 'ps.armor', # NOTE/TODO?: this is really just bug-shaped +} + DAMAGE_CLASSES = { 0: 'dc.status', 1: 'dc.physical', @@ -297,7 +317,7 @@ SUMO_SCRIPT_ENTRIES = { 'pokemon-weight-flavor': 117, 'trainer-class-names': 106, 'berry-names': 65, - # 49 might be pokédex colors? or maybe clothing colors + # 49 appears to be clothing dye colors + a set of clothes patterns? # 38: item names, with macros to branch for pluralization # 114: copy of item names, but with "PP" in latin in korean (?!) @@ -428,7 +448,8 @@ FORM_NAMES = { # TODO why are 10 and 50 duplicated? 718: (None, '10', '10', '50', 'complete'), # Hoopa - 720: ('confined', 'unbound'), + # TODO should the default form be 'confined'? + 720: (None, 'unbound'), # Gumshoos 735: (None, 'totem'), # Vikavolt @@ -512,7 +533,9 @@ pokemon_struct = Struct( 'height' / Int16ul, 'weight' / Int16ul, 'machines' / BitsSwapped(Bitwise(Array(16 * 8, Flag))), + # TODO this is clearly not tutors (in sumo anyway)? bulb is 1/1/9, char 2/2/18, squirtle 4/4/36, then all zeroes until dratini 64/64/64, then same with next starters... 'tutors' / Int32ul, + # TODO appear to be all zeroes, at least in sumo 'mystery1' / Int16ul, 'mystery2' / Int16ul, # TODO these are unused in sumo @@ -664,7 +687,7 @@ item_struct = Struct( # 6 - tms # 7 - berries # 8 - key items - 'mystery0d' / Int8ul, + 'pocketish' / Int8ul, # 1 - can be used (+ consumed) by pokémon (maybe for recycle purposes) # 16 - black/white/yellow/red/blue flutes?? (not in xy?) 'mystery0e' / Int8ul, @@ -991,8 +1014,8 @@ def make_identifier(english_name): return re.sub( '[^a-zA-Z0-9-]+', '-', - english_name.lower().replace('’', ''), - ) + english_name.lower().replace('é', 'e').replace('’', '').replace('♂', '-m').replace('♀', '-f'), + ).rstrip('-') @contextmanager def read_garc(path): @@ -1372,18 +1395,18 @@ def extract_data(root, out): f.seek(0x0049795a) # SUMO # TODO magic number (107) machineids = struct.unpack('<107H', f.read(2 * 107)) - # TODO dunno if this is still true + # FIXME this is no longer true in sun/moon, of course # Order appears to be based on some gen 4 legacy: TMs 1 through 92, HMs # 1 through 6, then the other eight TMs and the last HM. But the bits # in the Pokémon structs are in the expected order of 1 through 100, 1 # through 7 machines = [ identifiers['move'][moveid] - for moveid in - machineids[0:92] + - machineids[98:106] + - machineids[92:98] + - machineids[106:] + for moveid in machineids + #machineids[0:92] + + #machineids[98:106] + + #machineids[92:98] + + #machineids[106:] ] # TODO Pokémon box sprite map @@ -1418,19 +1441,27 @@ def extract_data(root, out): item_ct = len(garc) for i, subfile in enumerate(garc): identifier = identifiers['item'][i] - if identifier == '???': + if identifier == '': # Junk non-item - # TODO striiictly speaking, maybe we should dump these anyway + # TODO striiictly speaking, maybe we should dump these + # anyway... but we have no way of knowing what they'd do, so, + # eh continue item = all_items[identifier] = schema.Item() + item.game_index = i item.name = collect_text(texts, 'item-names', i) item.flavor_text = collect_text(texts, 'item-flavor', i) raw_item = item_struct.parse_stream(subfile[0]) - item.price = raw_item.price + item.price = raw_item.price * 10 item.fling_power = raw_item.fling_power + + subfile[0].seek(0) + data = subfile[0].read() + print(f"{identifiers['item'][i]:24s} {raw_item.price:5d} {raw_item.fling_effect:3d} {raw_item.natural_gift_effect:3d} {raw_item.natural_gift_power:3d} {raw_item.natural_gift_type & 31:2d} | {raw_item.pocketish:1d} {data.hex()[:32]}") + with (out / 'items.yaml').open('w') as f: f.write(Camel([schema.POKEDEX_TYPES]).dump(all_items)) @@ -1469,7 +1500,18 @@ def extract_data(root, out): assert not unused_image_datae # ------------------------------------------------------------------------- - # Pokémon structs + # Pokémon + + # Shapes are stored separately alongside some Pokédex sorting data, since + # they're only used in the Pokédex search. In SUMO, at least, every flavor + # form has its own shape. + # TODO where is this in other games? + # TODO what are the other records in these two files? file 0 has 11 records, file 1 has 7 + with read_garc(root / 'rom/a/1/5/2') as garc: # SUMO + subfile = PokemonContainerFile(garc[1][0]) + shape_data = subfile[1].read() + # One byte per form, no parsing required! + pokémon_form_shapes = [SHAPES[shape_id] for shape_id in shape_data] # TODO document this properly sometime, somewhere, but the gist is: # - a species may have multiple forms @@ -1564,18 +1606,20 @@ def extract_data(root, out): pokémon.game_index = i base_species_id, form_name_id = concrete_form_order[i] + flavor_id = species_forms[base_species_id]['flavor_ids'][form_name_id] # TODO i observe this is explicitly a species name, the one thing that # really is shared between forms pokémon.name = collect_text(texts, 'species-names', base_species_id) pokémon.genus = collect_text(texts, 'genus-names', base_species_id) # FIXME ho ho, hang on a second, forms have their own flavor text too!! # TODO well this depends on which game you're dumping - pokémon.flavor_text = collect_text(texts, 'species-flavor-moon', base_species_id) + pokémon.flavor_text = collect_text(texts, 'species-flavor-moon', flavor_id) # FIXME this is pretty temporary hackery; ideally the file would be # arranged around species, not concrete forms pokémon.form_base_species = identifiers['species'][base_species_id] pokémon.form_number = form_name_id + pokémon.form_identifier = species_forms[base_species_id]['forms'][form_name_id] if i < len(species_forms) and not species_forms[i]['is_concrete']: pokémon.form_appearances = species_forms[i]['forms'] else: @@ -1637,6 +1681,7 @@ def extract_data(root, out): # FIXME safari escape?? pokémon.base_experience = record.base_exp pokémon.color = record.color + pokémon.shape = pokémon_form_shapes[flavor_id] # FIXME what units are these! pokémon.height = record.height pokémon.weight = record.weight @@ -1646,7 +1691,7 @@ def extract_data(root, out): # TODO transform to an OD somehow probably pokemon_data.append(record) - print("{:4d} {:25s} {} {:5d} {:5d} {:4d} {:4d} {:2d} / {p.z_crystal:3d} {p.z_base_move:3d} {p.z_move:3d} | {:10s} - {p.effort_padding:2d} - {p.effort:04x} {p.effort_hp:1d} {p.effort_attack:1d} {p.effort_defense:1d} {p.effort_speed:1d} {p.effort_special_attack:1d} {p.effort_special_defense:1d}".format( + print("{:4d} {:25s} {} {:5d} {:5d} {:4d} {:4d} {:2d} / {p.z_crystal:3d} {p.z_base_move:3d} {p.z_move:3d} | {:10s} - {p.effort_padding:2d} - {p.safari_escape:3d} {p.mystery1:5d} {p.mystery2:5d} {p.held_item3:3d} {p.tutors:10} {shape}".format( i, identifiers['pokémon'][i], ('0'*16 + bin(record.mystery1)[2:])[-16:], @@ -1657,6 +1702,7 @@ def extract_data(root, out): record.form_count, record.color, p=record, + shape=pokémon_form_shapes[flavor_id], )) # ------------------------------------------------------------------------- @@ -1762,9 +1808,13 @@ def extract_data(root, out): if i > len(identifiers['species']): continue moveset = all_pokémon[ident].moves + eggseen = set() eggset = moveset['egg'] = [] for moveid in container.moveids: - eggset.append(identifiers['move'][moveid]) + # Swinub has Mud Shot listed twice, for some reason + if moveid not in eggseen: + eggset.append(identifiers['move'][moveid]) + eggseen.add(moveid) # Level-up moves with read_garc(root / 'rom/a/0/1/3') as garc: # SUMO @@ -1811,16 +1861,18 @@ def extract_data(root, out): evo['into'] = identifiers['pokémon'][raw_evo.into_species] if raw_evo.method == 1: - evo['trade'] = 'ev.level-up' + evo['trigger'] = 'ev.level-up' evo['minimum-friendship'] = 220 elif raw_evo.method == 2: - evo['trade'] = 'ev.level-up' + evo['trigger'] = 'ev.level-up' # FIXME is this an enum? also really it's morning OR day evo['time-of-day'] = 'day' + evo['minimum-friendship'] = 220 elif raw_evo.method == 3: - evo['trade'] = 'ev.level-up' + evo['trigger'] = 'ev.level-up' # FIXME is this an enum? evo['time-of-day'] = 'night' + evo['minimum-friendship'] = 220 elif raw_evo.method == 4: evo['trigger'] = 'ev.level-up' evo['minimum-level'] = raw_evo.level @@ -1831,6 +1883,9 @@ def extract_data(root, out): evo['held-item'] = identifiers['item'][raw_evo.param] elif raw_evo.method == 7: evo['trigger'] = 'ev.trade' + # FIXME uhh this is always zero. karrablast and shelmet do + # not actually mention each other here. guess it's + # hardcoded?? awesome evo['traded-with'] = identifiers['pokémon'][raw_evo.param] elif raw_evo.method == 8: evo['trigger'] = 'ev.use-item' @@ -1880,6 +1935,7 @@ def extract_data(root, out): evo['held-item'] = identifiers['item'][raw_evo.param] elif raw_evo.method == 20: evo['trigger'] = 'ev.level-up' + evo['time-of-day'] = 'night' evo['held-item'] = identifiers['item'][raw_evo.param] elif raw_evo.method == 21: evo['trigger'] = 'ev.level-up' @@ -1920,7 +1976,9 @@ def extract_data(root, out): elif raw_evo.method == 30: evo['trigger'] = 'ev.level-up' evo['minimum-level'] = raw_evo.level - evo['party-member-type'] = TYPES[raw_evo.param] + # This is Pancham (needs Dark-type party member) but the + # type seems to be hardcoded, ugh + evo['party-member-type'] = 't.dark' elif raw_evo.method == 31: evo['trigger'] = 'ev.level-up' evo['minimum-level'] = raw_evo.level @@ -1966,18 +2024,34 @@ def extract_data(root, out): all_pokémon[identifier].evolutions.append(evo) + # Shedinja is an exceptionally special case that isn't listed in the data + # TODO having to lay this out explicitly bugs me + all_pokémon['nincada'].evolutions.append(dict( + into='shedinja', + trigger='shed', + )) + # Mega evolution # TODO # already parsed a/0/1/5 as mega_evolutions... but... that only lists # items? what lists the actual megas? OH is the number a form number?? # wow i really need a list of species -> forms eh # TODO what is a/1/9/4 (ORAS) or a/0/1/6 (SUMO)? 8 files of 404 bytes each + # in both versions, so not dependent on the number of loci # Baby Pokémon #with read_garc(root / 'rom/a/1/9/6') as garc: # ORAS - #with read_garc(root / 'rom/a/0/1/8') as garc: # SUMO? - # for subfile in garc: - # baby_pokemon = subfile[0].read() - # print(repr(baby_pokemon)) + with read_garc(root / 'rom/a/0/1/8') as garc: # SUMO? + # Last record shows something else + last_data = garc[-1][0].read() + for i, subfile in enumerate(garc[:-1]): + data = subfile[0].read() + baby_id = int.from_bytes(data[:2], 'little') + print(identifiers['species'][i], '->', identifiers['species'][baby_id]) + if data[2:] != b'\xff\xff': + print("!!!", repr(data)) + other_id = int.from_bytes(last_data[i*2:i*2+2], 'little') + if other_id != baby_id: + print("!!!", i, baby_id, other_id) # Tutor moves (from the personal structs) # FIXME why is this down here and not just in the personal loop? diff --git a/pokedex/schema.py b/pokedex/schema.py index d14d8ad..26041cd 100644 --- a/pokedex/schema.py +++ b/pokedex/schema.py @@ -165,6 +165,8 @@ Evolution = _ForwardDeclaration() EncounterMap = _ForwardDeclaration() MoveSet = _ForwardDeclaration() Pokedex = _ForwardDeclaration() +PokédexColor = _ForwardDeclaration() +PokédexShape = _ForwardDeclaration() class Ability(VersionedLocus): @@ -201,25 +203,32 @@ class Pokémon(VersionedLocus): # FIXME hackery to get forms working well enough to import back into veekun # later; this will need some cleaning up later, somehow form_base_species = _Value(str) + form_identifier = _Value(str) form_number = _Value(int) - form_appearances = _List(str) + form_appearances = _List(str) # flavor only! form_name = _Localized(str) types = _List(Type, min=1, max=2) + # FIXME how do i distinguish hidden ability? + abilities = _List(Ability, min=1, max=3) base_stats = _Map(Stat, int) growth_rate = _Value(GrowthRate) base_experience = _Value(int, min=0, max=255) effort = _Map(Stat, int) capture_rate = _Value(int, min=0, max=255) + base_happiness = _Value(int, min=0, max=255) held_items = _Map(Item, int) gender_rate = _Value(int) egg_groups = _List(EggGroup, min=1, max=2) + hatch_counter = _Value(int, min=0, max=255) pokedex_numbers = _Map(Pokedex, int) # TODO family? evolutions = _List(Evolution) + color = _Value(PokédexColor) + shape = _Value(PokédexShape) genus = _Localized(str) flavor_text = _Localized(str) # TODO maybe want little wrapper types that can display as either imperial @@ -243,9 +252,6 @@ class Pokémon(VersionedLocus): # TODO should this be written in hex, maybe? game_index = _Value(int) - # FIXME how do i distinguish hidden ability? - abilities = _List(Ability) - Pokemon = Pokémon diff --git a/scripts/sumo-yaml-to-db.py b/scripts/sumo-yaml-to-db.py index ca2311b..db7e971 100644 --- a/scripts/sumo-yaml-to-db.py +++ b/scripts/sumo-yaml-to-db.py @@ -1,7 +1,9 @@ +from collections import OrderedDict import itertools from pathlib import Path from camel import Camel +from sqlalchemy import func from sqlalchemy import inspect from sqlalchemy.orm import Load @@ -10,6 +12,25 @@ import pokedex.db.tables as t import pokedex.main as main import pokedex.schema as schema +# FIXME machine to move mapping isn't listed anywhere, oops. where does that go? + +# TODO still obviously missing: +# - pokedex order +# TODO needs manual fixing: +# - item categories +# - fling effects? +# - item effects +# - ability effects +# - has_gender_differences +# - forms_switchable +# - is_battle_only +# - form explanations +# - pokemon and form order +# - evolutions requiring particular locations +# TODO needs fixing codewise: +# - decide if i'm using these new pixel version icons or what +# - remove extraneous "Pokémon" after genus +# - out = Path('moon-out') session = pokedex.db.connect('postgresql:///veekun_pokedex') @@ -37,14 +58,24 @@ db_damage_classes = {row.identifier: row for row in session.query(t.MoveDamageCl db_move_categories = {row.identifier: row for row in session.query(t.MoveMetaCategory)} db_move_ailments = {row.identifier: row for row in session.query(t.MoveMetaAilment)} db_move_flags = {row.identifier: row for row in session.query(t.MoveFlag)} +db_move_methods = {row.identifier: row for row in session.query(t.PokemonMoveMethod)} # These are by id since move effects don't have identifiers atm db_move_effects = {row.id: row for row in session.query(t.MoveEffect)} +db_colors = {row.identifier: row for row in session.query(t.PokemonColor)} +db_shapes = {row.identifier: row for row in session.query(t.PokemonShape)} +db_growth_rates = {row.identifier: row for row in session.query(t.GrowthRate)} +db_genders = {row.identifier: row for row in session.query(t.Gender)} +db_evo_triggers = {row.identifier: row for row in session.query(t.EvolutionTrigger)} +db_egg_groups = {row.identifier: row for row in session.query(t.EggGroup)} +db_stats = OrderedDict((row.identifier, row) for row in session.query(t.Stat).order_by(t.Stat.id.asc())) + # Insert some requisite new stuff if it doesn't already exist db_sumo_generation = session.query(t.Generation).get(7) if db_sumo_generation: db_sumo_version_group = session.query(t.VersionGroup).filter_by(identifier='sun-moon').one() + db_moon = session.query(t.Version).filter_by(identifier='moon').one() else: # Distinguish simplified and traditional Chinese db_languages['zh'].identifier = 'zh-Hant' @@ -118,14 +149,98 @@ def cheap_upsert(db_obj, db_class, new_only, **data): return db_obj -def update_names(sumo_obj, db_obj): +def update_names(sumo_name_map, db_name_map): """Update the database's names as necessary, and add any missing ones""" - for lang, name in sumo_obj.name.items(): - old_name = db_obj.name_map.get(db_languages[lang]) + for lang, name in sumo_name_map.items(): + old_name = db_name_map.get(db_languages[lang]) if old_name != name: if old_name: print(f"- NOTE: changing {old_name!r} to {name!r} in {lang}") - db_obj.name_map[db_languages[lang]] = name + db_name_map[db_languages[lang]] = name + + +# Items +print() +print("--- ITEMS ---") +with (out / 'items.yaml').open(encoding='utf8') as f: + sumo_items = camel.load(f.read()) + +db_items = { + row.identifier: row for row in session.query(t.Item) + .options(Load(t.Item).joinedload('names')) +} + +for sumo_identifier, sumo_item in sumo_items.items(): + if sumo_identifier == 'none': + # FIXME just don't dump these yo + continue + print(sumo_identifier) + db_item = db_items.get(sumo_identifier) + if not db_item: + print("- new") + db_item = cheap_upsert( + db_item, + t.Item, + dict( + identifier=sumo_identifier, + # This needs to be done manually, since the categories are 100% + # fanon invention. Default to the "x/y unknown" dummy category. + # NOTE: the categories are linked to pockets but the pockets are + # different in nearly every game, so, uh + category_id=10001, + # FIXME veekun has an "effect" called "berry effect" that just means + # "do whatever the berry does", and that's terrible, and also doesn't + # match the games, SIGH + fling_effect=None, + ), + cost=sumo_item.price, + fling_power=sumo_item.fling_power or None, + ) + + # Names + update_names(sumo_item.name, db_item.name_map) + + # Populate with dummy effects + if db_item in session.new: + db_items[sumo_identifier] = db_item + db_item.short_effect_map[db_languages['en']] = f"XXX new effect for {sumo_identifier}" + db_item.effect_map[db_languages['en']] = f"XXX new effect for {sumo_identifier}" + + # Flavor text is per-version (group) and thus always new + # FIXME not idempotent + """ + for lang, flavor_text in sumo_item.flavor_text.items(): + session.add(t.ItemFlavorText( + item=db_item, + version_group=db_sumo_version_group, + language=db_languages[lang], + flavor_text=flavor_text, + )) + """ + + # Game index + # FIXME not idempotent + """ + session.add(t.ItemGameIndex( + item=db_item, + generation=db_sumo_generation, + game_index=sumo_item.game_index, + )) + """ + + # FIXME can flags be done automatically? some of them, at least? they are: + # - countable + # - consumable + # - usable-overworld + # - usable-in-battle + # - holdable + # - holdable-passive + # - holdable-active + # - underground + + # TODO aside from natural gift bits, i have no idea where berry data is, + # and i suspect our existing natural gift effects are way off :S + # Abilities @@ -134,29 +249,31 @@ print("--- ABILITIES ---") with (out / 'abilities.yaml').open(encoding='utf8') as f: abilities = camel.load(f.read()) -for (sumo_identifier, sumo_ability), db_ability in itertools.zip_longest( - abilities.items(), - session.query(t.Ability) +db_abilities = { + row.identifier: row + for row in session.query(t.Ability) .filter_by(is_main_series=True) - .order_by(t.Ability.id) .options(Load(t.Ability).joinedload('names')) -): +} + +for sumo_identifier, sumo_ability in abilities.items(): print(sumo_identifier) + db_ability = db_abilities.get(sumo_identifier) if db_ability: assert sumo_identifier == db_ability.identifier - update_names(sumo_ability, db_ability) else: - db_ability = t.Ability( + db_abilities[sumo_identifier] = db_ability = t.Ability( identifier=sumo_identifier, generation_id=7, is_main_series=True, + names=[], ) - for lang, name in sumo_ability.name.items(): - db_ability.name_map[db_languages[lang]] = name session.add(db_ability) + update_names(sumo_ability.name, db_ability.name_map) + # Flavor text is per-version (group) and thus always new - # FIXME uhh no it isn't, not if i've alreayd run this script once lol + # TODO not idempotent """ for lang, flavor_text in sumo_ability.flavor_text.items(): session.add(t.AbilityFlavorText( @@ -174,6 +291,7 @@ print("--- MOVES ---") with (out / 'moves.yaml').open(encoding='utf8') as f: moves = camel.load(f.read()) +db_moves = {} for (sumo_identifier, sumo_move), db_move in itertools.zip_longest( moves.items(), session.query(t.Move) @@ -196,10 +314,11 @@ for (sumo_identifier, sumo_move), db_move in itertools.zip_longest( session.add(effect) db_move_effects[effect_id] = effect - db_move = cheap_upsert( + db_move = db_moves[sumo_identifier] = cheap_upsert( db_move, t.Move, - dict(identifier=sumo_identifier, generation_id=7), + dict(generation_id=7, names=[]), + identifier=sumo_identifier, type=db_types[sumo_move.type.rpartition('.')[2]], power=None if sumo_move.power in (0, 1) else sumo_move.power, pp=sumo_move.pp, @@ -228,7 +347,7 @@ for (sumo_identifier, sumo_move), db_move in itertools.zip_longest( **loggable_changes)) # Names - update_names(sumo_move, db_move) + update_names(sumo_move.name, db_move.name_map) # Move flags old_flag_set = frozenset(db_move.flags) @@ -267,7 +386,7 @@ for (sumo_identifier, sumo_move), db_move in itertools.zip_longest( ) # Flavor text is per-version (group) and thus always new - # FIXME uhh no it isn't, not if i've already run this script once lol + # FIXME not idempotent """ for lang, flavor_text in sumo_move.flavor_text.items(): session.add(t.MoveFlavorText( @@ -280,6 +399,567 @@ for (sumo_identifier, sumo_move), db_move in itertools.zip_longest( session.flush() +# Pokémon! Auugh! +print() +print("--- POKéMON ---") +db_pokemons = {} +db_pokemon_forms = {} +db_pokemon_specieses = {} +for species in ( + session.query(t.PokemonSpecies) + .options( + Load(t.PokemonSpecies).joinedload('evolution_chain'), + Load(t.PokemonSpecies).joinedload('pokemon').joinedload('forms'), + Load(t.PokemonSpecies).joinedload('pokemon').subqueryload('stats'), + Load(t.PokemonSpecies).joinedload('pokemon').subqueryload('types'), + Load(t.PokemonSpecies).joinedload('pokemon').subqueryload('pokemon_abilities'), + Load(t.PokemonSpecies).subqueryload('forms'), + Load(t.PokemonSpecies).subqueryload('evolutions'), + Load(t.PokemonSpecies).subqueryload('egg_groups'), + Load(t.PokemonSpecies).subqueryload('names'), + Load(t.PokemonSpecies).joinedload('pokemon').joinedload('forms').subqueryload('names'), + ) + .all() + ): + for form in species.forms: + db_pokemon_forms[form.identifier] = form + db_pokemon_forms[species.identifier, form.form_identifier] = form + for pokemon in species.pokemon: + db_pokemons[pokemon.identifier] = pokemon + db_pokemon_specieses[species.identifier] = species + +max_pokemon_id = session.query(func.max(t.Pokemon.id)).scalar() +max_pokemon_form_id = session.query(func.max(t.PokemonForm.id)).scalar() + +with (out / 'pokemon.yaml').open(encoding='utf8') as f: + pokemon = camel.load(f.read()) + +sumo_pokemon_by_species = OrderedDict() +# This maps (Pokémon!) identifiers to { base_pokemon, members }, where +# Pokémon in the same family will (in theory) share the same value +sumo_families = dict() +sumo_evolves_from = dict() # species! +for sumo_identifier, sumo_pokemon in pokemon.items(): + if sumo_identifier == 'egg': + continue + + sumo_pokemon.identifier = sumo_identifier + sumo_species_identifier = sumo_pokemon.form_base_species + sumo_pokemon_by_species.setdefault(sumo_species_identifier, []).append(sumo_pokemon) + + # Construct the family. Basic idea is to pretend we're a new family, then + # look through the evolutions for any existing families and merge them + family = dict( + base_pokemon=sumo_identifier, + members={sumo_identifier}, + db_chain=None, + ) + try: + family['db_chain'] = db_pokemon_specieses[sumo_species_identifier].evolution_chain + except KeyError: + pass + for evolution in sumo_pokemon.evolutions: + into = evolution['into'] + sumo_evolves_from[pokemon[into].form_base_species] = sumo_species_identifier + if into in sumo_families: + # If this happens, then the current Pokémon evolves into a Pokémon + # that's already been seen, therefore this is an earlier evolution + family['members'].update(sumo_families[into]['members']) + if not family['db_chain']: + family['db_chain'] = sumo_families[into]['db_chain'] + else: + family['members'].add(into) + # Once we're done, ensure every member is using this same newly-updated dict + for member in family['members']: + sumo_families[member] = family + +for species_identifier, sumo_pokemons in sumo_pokemon_by_species.items(): + db_species = db_pokemon_specieses.get(species_identifier) + sumo_form_identifiers = sumo_pokemons[0].form_appearances + is_concrete = not sumo_form_identifiers + + if is_concrete: + sumo_form_identifiers = [sumo_pokemon.form_identifier for sumo_pokemon in sumo_pokemons] + if species_identifier in {'cherrim', 'shellos', 'gastrodon', 'floette', 'furfrou'}: + # These changed to be concrete at some point, but changing form kind is + # a pain in the ass and I don't want to do it, so let's not + is_concrete = False + + # Let's check some stuff first I guess + print(f"{species_identifier:24s}") + if db_species: + if is_concrete: + # Concrete means every form is a Pokemon, and every Pokemon has one PokemonForm + if len(db_species.pokemon) != len(db_species.forms): + print(f"- WARNING: expected the same number of Pokémon and forms but got {len(db_species.pokemon)} vs {len(db_species.forms)}") + + for form in db_species.forms: + if not form.is_default: + print(f"- WARNING: expected every form to be a default but {form.form_identifier} is not") + + sumo_pokemon_identifiers = {pokemon.identifier for pokemon in sumo_pokemons} + db_pokemon_identifiers = {pokemon.identifier for pokemon in db_species.pokemon} + added_pokemon = sumo_pokemon_identifiers - db_pokemon_identifiers + removed_pokemon = db_pokemon_identifiers - sumo_pokemon_identifiers + if added_pokemon: + print(f"- NOTE: new forms {added_pokemon}") + if removed_pokemon: + print(f"- NOTE: removed forms?? {removed_pokemon}") + else: + # Flavor means there's only one Pokemon, and it has one PokemonForm per form + if len(db_species.pokemon) > 1: + print(f"- WARNING: expected only one Pokémon but got {db_species.pokemon}") + + default_count = 0 + form_identifiers = set() + for form in db_species.forms: + form_identifiers.add(form.form_identifier) + if form.is_default: + default_count += 1 + if default_count != 1: + print(f"- WARNING: expected exactly one default but found {default_count}") + + for sumo_form_identifier in sumo_form_identifiers: + if sumo_form_identifier in form_identifiers: + form_identifiers.discard(sumo_form_identifier) + else: + print(f"- NOTE: new form {sumo_form_identifier}") + + if form_identifiers: + print(f"- NOTE: SUMO is missing forms {', '.join(sorted(ident or 'None' for ident in form_identifiers))} ({sumo_form_identifiers})") + + else: + print(f"- NOTE: new {'concrete' if is_concrete else 'flavor'} form") + print(" ", is_concrete, "|", sumo_pokemons[0].form_appearances) + print(" ", [sp.identifier for sp in sumo_pokemons]) + + # NOTE: this is a terrible way to store it in the yaml, and also it's + # inaccurate for gen 7 i think? and why do i use -1 for genderless instead + # of null lol + if sumo_pokemons[0].gender_rate == 255: + gender_rate = -1 + else: + # 31 -> 1, etc, up to 254 -> 8 + gender_rate = (sumo_pokemons[0].gender_rate + 2) // 32 + + # A Pokémon is a baby if it's the earliest evolution, it cannot breed, and + # it evolves into something that can breed + is_baby = False + sumo_identifier = sumo_pokemons[0].identifier + sumo_family = sumo_families[sumo_identifier] + is_baby = ( + sumo_family['base_pokemon'] == sumo_identifier and + sumo_pokemons[0].egg_groups == ['eg.no-eggs'] and + any(pokemon[identifier].egg_groups != ['eg.no-eggs'] + for identifier in sumo_family['members']) + ) + + # If there's no evolution chain yet, make one + # NOTE: i don't have the baby trigger items, because they don't seem to be + # data; they're in code and i've yet to find them + db_chain = sumo_family['db_chain'] + if not db_chain: + db_chain = t.EvolutionChain() + session.add(db_chain) + sumo_family['db_chain'] = db_chain + + db_species = db_pokemon_specieses[species_identifier] = cheap_upsert( + db_species, + t.PokemonSpecies, + dict( + generation_id=7, + # Avoids database fetches on new rows + evolutions=[], + egg_groups=[], + names=[], + # Doesn't apply to Pokémon not in FRLG + habitat_id=None, + # Doesn't apply to Pokémon not in Conquest + conquest_order=None, + # Needs to be populated manually + # FIXME should i get this by checking for different sprites...? i + # don't think that would quite catch everything + has_gender_differences=False, + # Needs to be populated manually + forms_switchable=False, + # Easier to populate with a separate script after the fact + order=0, + ), + id=sumo_pokemons[0].game_index, + identifier=species_identifier, + parent_species=db_pokemon_specieses[sumo_evolves_from[species_identifier]] if species_identifier in sumo_evolves_from else None, + evolution_chain=db_chain, + # NOTE: color is actually per-concrete + color=db_colors[sumo_pokemons[0].color.rpartition('.')[2]], + # NOTE: shape is actually per-flavor + shape=db_shapes[sumo_pokemons[0].shape.rpartition('.')[2]], + gender_rate=gender_rate, + # NOTE: capture rate is actually per-concrete + capture_rate=sumo_pokemons[0].capture_rate, + base_happiness=sumo_pokemons[0].base_happiness, + is_baby=is_baby, + # NOTE: this is nonsense for pokémon that can't be in eggs (which is + # not a thing i'm sure i have tracked atm, since i don't directly dump + # the egg data) + hatch_counter=sumo_pokemons[0].hatch_counter, + # NOTE: actually per concrete even though that doesn't entirely make sense haha + growth_rate=db_growth_rates[sumo_pokemons[0].growth_rate.rpartition('.')[2]], + ) + + # NOTE names are given per concrete form but are really truly a species thing + # FIXME i am not sure doing both of these at the same time actually works + update_names(sumo_pokemons[0].name, db_species.name_map) + update_names(sumo_pokemons[0].genus, db_species.genus_map) + + # Flavor text is per-version (group) and thus always new + # FIXME this is wrong; flavor text is per form! + # FIXME not idempotent + # FIXME get for sun as well + """ + for lang, flavor_text in sumo_pokemons[0].flavor_text.items(): + if flavor_text: + session.add(t.PokemonSpeciesFlavorText( + species_id=db_species.id, + version=db_moon, + language=db_languages[lang], + flavor_text=flavor_text, + )) + """ + + # FIXME i fucked something up! new pokemon's forms ended up in the + # stratosphere and also not marked as defaults. had to do: + # update pokemon_forms set id = pokemon_id, is_default = true where form_order = 1 and id > 10000 and pokemon_id between 720 and 9999; + sumo_db_pokemon_pairs = [] + sumo_db_pokemon_form_pairs = [] + if species_identifier == 'floette': + # This is a fucking mess; there are two concrete Pokémon, and one of + # them has multiple flavor forms, so, goddamn. Let's just assume + # Sun/Moon didn't change anything, I guess. + # TODO fix this? requires making a tree of concrete -> flavor and + # consolidating the below branches + for sumo_pokemon in sumo_pokemons: + if sumo_pokemon.identifier == 'floette-red': + sumo_db_pokemon_pairs.append((sumo_pokemon, db_pokemons['floette'])) + elif sumo_pokemon.identifier == 'floette-eternal': + sumo_db_pokemon_pairs.append((sumo_pokemon, db_pokemons['floette-eternal'])) + elif is_concrete: + # Concrete: multiple yaml records, each is a Pokemon row with one PokemonForm + for form_order, (sumo_pokemon, sumo_form_identifier) in enumerate(zip(sumo_pokemons, sumo_form_identifiers), start=1): + if sumo_pokemon.identifier in db_pokemons: + id = db_pokemons[sumo_pokemon.identifier].id + else: + max_pokemon_id += 1 + id = max_pokemon_id + db_pokemon = cheap_upsert( + db_pokemons.get(sumo_pokemon.identifier), + t.Pokemon, + dict( + # Avoids database fetches on new rows + types=[], + pokemon_abilities=[], + items=[], + names=[], + stats=[], + # Easier to populate manually + order=0, + ), + id=id, + identifier=sumo_pokemon.identifier, + species=db_species, + # TODO the units in the yaml don't match my goofy plan from rby + # (which i'm not 100% on anyway) + height=sumo_pokemon.height // 10, + weight=sumo_pokemon.weight, + base_experience=sumo_pokemon.base_experience, + # NOTE: this is less about a real sense of default-ness and + # more about "what form should veekun default to when looking + # at this species" (which doesn't belong in the data tbh) + is_default=form_order == 1, + ) + + db_pokemons[sumo_pokemon.identifier] = db_pokemon + sumo_db_pokemon_pairs.append((sumo_pokemon, db_pokemon)) + + db_form = next(iter(db_pokemons[sumo_pokemon.identifier].forms), None) + if db_form: + id = db_form.id + else: + max_pokemon_form_id += 1 + id = max_pokemon_form_id + db_form = cheap_upsert( + db_form, + t.PokemonForm, + dict( + version_group=db_sumo_version_group, + # Easier to do separately + order=0, + # Needs doing manually + is_battle_only=False, + ), + id=id, + identifier=sumo_pokemon.identifier, + form_identifier=sumo_form_identifier, + pokemon=db_pokemons[sumo_pokemon.identifier], + is_default=True, + is_mega=bool(sumo_form_identifier and sumo_form_identifier.startswith('mega')), + form_order=form_order, + ) + + # NOTE the db also has a "pokemon_name" field, e.g. "Sky Shaymin", + # but i don't think that's official? ok well it's marked as + # official but show me where the games say that + update_names(sumo_pokemon.form_name, db_form.form_name_map) + else: + # Flavor: one yaml record, one Pokemon, multiple PokemonForms + # TODO i think there are names for flavor form but the yaml has nowhere to store them at the moment + sumo_pokemon = sumo_pokemons[0] + db_pokemon = cheap_upsert( + next(iter(db_species.pokemon), None), + t.Pokemon, + dict( + types=[], + pokemon_abilities=[], + items=[], + names=[], + stats=[], + order=0, + ), + id=sumo_pokemons[0].game_index, + identifier=species_identifier, + species=db_species, + # TODO the units in the yaml don't match my goofy plan from rby + # (which i'm not 100% on anyway) + height=sumo_pokemon.height // 10, + weight=sumo_pokemon.weight, + base_experience=sumo_pokemon.base_experience, + is_default=True, + ) + sumo_db_pokemon_pairs.append((sumo_pokemon, db_pokemon)) + + for form_order, form_identifier in enumerate(sumo_form_identifiers, start=1): + full_form_identifier = species_identifier + ('-' + form_identifier if form_identifier else '') + if full_form_identifier in db_pokemon_forms: + id = db_pokemon_forms[full_form_identifier].id + else: + max_pokemon_form_id += 1 + id = max_pokemon_form_id + cheap_upsert( + db_pokemon_forms.get(full_form_identifier), + t.PokemonForm, + dict( + version_group=db_sumo_version_group, + order=0, + # Needs doing manually + is_battle_only=False, + ), + id=id, + identifier=full_form_identifier, + form_identifier=form_identifier, + pokemon=db_pokemon, + is_default=id < 10000, + is_mega=bool(form_identifier and form_identifier.startswith('mega')), + # FIXME this is wrong if there are existing forms that disappeared in sumo + form_order=form_order, + ) + + # FIXME: lack of 'unknown' kinda throws things off for arceus + + session.flush() + + # Egg groups + old_egg_groups = frozenset(db_species.egg_groups) + new_egg_groups = frozenset(db_egg_groups[ident.rpartition('.')[2]] for ident in sumo_pokemons[0].egg_groups) + for new_egg_group in new_egg_groups - old_egg_groups: + print(f"- adding egg group {new_egg_group}") + db_species.egg_groups.append(new_egg_group) + for old_egg_group in old_egg_groups - new_egg_groups: + print(f"- removing egg group {old_egg_group}") + db_species.egg_groups.remove(old_egg_group) + + # Do stuff that's per concrete Pokémon in the db + for sumo_pokemon, db_pokemon in sumo_db_pokemon_pairs: + # Types + for i, (type_ident, db_type) in enumerate(itertools.zip_longest(sumo_pokemon.types, db_pokemon.types)): + slot = i + 1 + _, _, veekun_ident = type_ident.rpartition('.') + if not db_type: + db_type = db_types[veekun_ident] + print(f"- adding type {db_type}") + session.add(t.PokemonType( + pokemon_id=db_pokemon.id, + type_id=db_type.id, + slot=i + 1, + )) + elif not type_ident: + print(f"- WARNING: seem to have LOST type {db_type}, this is not supported") + elif db_type.identifier == veekun_ident: + pass + else: + print(f"- WARNING: type {db_type} has CHANGED TO {type_ident}, this is not supported") + + # Stats + seen_stats = set() + for existing_stat in db_pokemon.stats: + stat_identifier = existing_stat.stat.identifier + seen_stats.add(stat_identifier) + cheap_upsert( + existing_stat, + t.Stat, + dict(), + base_stat=sumo_pokemon.base_stats[stat_identifier], + effort=sumo_pokemon.effort[stat_identifier], + ) + for stat_identifier, stat in db_stats.items(): + if stat.is_battle_only: + continue + if stat_identifier in seen_stats: + continue + db_pokemon.stats.append(t.PokemonStat( + stat=stat, + base_stat=sumo_pokemon.base_stats[stat_identifier], + effort=sumo_pokemon.effort[stat_identifier], + )) + + # Abilities + old_ability_slots = {row.slot: row for row in db_pokemon.pokemon_abilities} + new_ability_slots = {i + 1: ability_ident for (i, ability_ident) in enumerate(sumo_pokemon.abilities)} + if new_ability_slots.get(2) == new_ability_slots[1]: + del new_ability_slots[2] + if new_ability_slots.get(3) == new_ability_slots[1]: + del new_ability_slots[3] + for slot in old_ability_slots.keys() | new_ability_slots.keys(): + old_ability_row = old_ability_slots.get(slot) + new_ability_ident = new_ability_slots.get(slot) + if not old_ability_row: + _, _, veekun_ident = new_ability_ident.rpartition('.') + db_ability = db_abilities[veekun_ident] + print(f"- adding ability {db_ability}") + session.add(t.PokemonAbility( + pokemon_id=db_pokemon.id, + ability_id=db_ability.id, + slot=slot, + is_hidden=(slot == 3), + )) + elif not new_ability_ident: + print(f"- WARNING: seem to have LOST ability {old_ability_row.ability}, this is not supported") + elif old_ability_row.ability.identifier == new_ability_ident.rpartition('.')[2]: + pass + else: + _, _, veekun_ident = new_ability_ident.rpartition('.') + db_ability = db_abilities[veekun_ident] + print(f"- changing ability in slot {slot} from {old_ability_row.ability} to {db_ability}") + old_ability_row.ability = db_ability + + """ + # Items + # FIXME need items from the other game argh, they're per-version + # TODO not idempotent + for item_identifier, rarity in sumo_pokemon.held_items.items(): + session.add(t.PokemonItem( + pokemon=db_pokemon, + version=db_moon, + item=db_items[item_identifier.rpartition('.')[2]], + rarity=rarity, + )) + + # Moves + # TODO not idempotent + for method_identifier, moves in sumo_pokemon.moves.items(): + last_row = None + order = None + seen = set() + for move_identifier in moves: + if method_identifier == 'level-up': + # FIXME THIS SUX + ((level, move_identifier),) = move_identifier.items() + else: + level = 0 + if level and last_row and level == last_row.level: + if order is None: + last_row.order = 1 + order = 2 + else: + order += 1 + else: + order = None + + # TODO this is stupid but braviary learns superpower at level + # 1, twice, and I'm not really sure what to do about that; is + # it correct to remove from the data? + key = (move_identifier, level) + if key in seen: + continue + seen.add(key) + + last_row = t.PokemonMove( + pokemon=db_pokemon, + version_group=db_sumo_version_group, + move=db_moves[move_identifier.rpartition('.')[2]], + method=db_move_methods[method_identifier], + level=level, + order=order, + ) + session.add(last_row) + """ + + +# Do evolution after adding all the Pokémon, since Pokémon tend to evolve into +# later Pokémon that wouldn't have been inserted yet. It's also tricky, since +# there might be an existing matching record among several +for species_identifier, sumo_pokemons in sumo_pokemon_by_species.items(): + for sumo_evolution in sumo_pokemons[0].evolutions: + # Evolutions are on the evolver in the yaml, but evolvee in the db + db_species = db_pokemon_specieses[pokemon[sumo_evolution['into']].form_base_species] + + # NOTE: this does not seem to be in the data itself so i have to + # hardcode it here, argh + if 'traded-with' in sumo_evolution: + if species_identifier == 'karrablast': + traded_with = db_pokemon_specieses['shelmet'] + elif species_identifier == 'shelmet': + traded_with = db_pokemon_specieses['karrablast'] + else: + raise ValueError(f"Don't know who trade-evolves with {sumo_species_identifier}") + else: + traded_with = None + + expected = dict( + evolved_species=db_species, + trigger=db_evo_triggers[sumo_evolution['trigger'].rpartition('.')[2]], + trigger_item=db_items[sumo_evolution['trigger-item'].rpartition('.')[2]] if 'trigger-item' in sumo_evolution else None, + minimum_level=sumo_evolution.get('minimum-level'), + gender=db_genders[sumo_evolution['gender']] if 'gender' in sumo_evolution else None, + # NOTE: this needs populating manually; it's not in the yaml either + location=None, + held_item=db_items[sumo_evolution['held-item'].rpartition('.')[2]] if 'held-item' in sumo_evolution else None, + time_of_day=sumo_evolution.get('time-of-day'), + known_move=db_moves[sumo_evolution['known-move'].rpartition('.')[2]] if 'known-move' in sumo_evolution else None, + known_move_type=db_types[sumo_evolution['known-move-type'].rpartition('.')[2]] if 'known-move-type' in sumo_evolution else None, + minimum_happiness=sumo_evolution.get('minimum-friendship'), + minimum_beauty=sumo_evolution.get('minimum-beauty'), + minimum_affection=sumo_evolution.get('minimum-affection'), + relative_physical_stats={'attack': -1, 'defense': 1, 'equal': 0, None: None}[sumo_evolution.get('higher-physical-stat')], + party_species=db_pokemon_specieses[sumo_evolution['party-member'].rpartition('.')[2]] if 'party-member' in sumo_evolution else None, + party_type=db_types[sumo_evolution['party-member-type'].rpartition('.')[2]] if 'party-member-type' in sumo_evolution else None, + trade_species=traded_with, + needs_overworld_rain=sumo_evolution.get('overworld-weather') == 'rain', + turn_upside_down=sumo_evolution.get('upside-down', False), + ) + + # FIXME need to finish... filling this out + for db_evolution in db_species.evolutions: + if all(v == getattr(db_evolution, k) for (k, v) in expected.items()): + break + else: + print(f"- adding new evolution for {species_identifier} -> {sumo_evolution['into']}") + session.add(t.PokemonEvolution(**expected)) + +session.flush() + + + +#print("ROLLING BACK") +#session.rollback() session.commit() print() print("done")