diff --git a/pokedex/extract/oras.py b/pokedex/extract/oras.py index 2b58102..7c7c21d 100644 --- a/pokedex/extract/oras.py +++ b/pokedex/extract/oras.py @@ -12,6 +12,7 @@ import shutil import struct import warnings +from camel import Camel from construct import ( # Simple fields Const, Flag, Int16sl, Int16ul, Int8sl, Int8ul, Int32ul, Padding, @@ -23,7 +24,7 @@ from construct import ( ) import yaml -from pokedex.schema import Pokémon +import pokedex.schema as schema from .lib.garc import GARCFile, decrypt_xy_text # TODO: ribbons! 080 in sumo @@ -726,6 +727,12 @@ def dump_to_yaml(data, f): ) +def collect_text(texts, text_type, id): + return OrderedDict( + (language, texts[language][text_type][id]) + for language in CANON_LANGUAGES) + + def extract_data(root, out): # TODO big conceptual question for the yaml thing: how do we decide how the # identifiers work in the per-version data? the "global" identifiers are @@ -964,6 +971,23 @@ def extract_data(root, out): machineids[106:] ] + # ------------------------------------------------------------------------- + # Abilities + all_abilities = OrderedDict() + for i, identifier in enumerate(identifiers['ability']): + if i == 0: + # Dummy non-ability + continue + ability = all_abilities[identifier] = schema.Ability() + ability.name = collect_text(texts, 'ability-names', i) + ability.flavor_text = collect_text(texts, 'ability-flavor', i) + print(repr(ability.flavor_text['en'])) + + with (out / 'abilities.yaml').open('w') as f: + f.write(Camel([schema.POKEDEX_TYPES]).dump(all_abilities)) + + + # ------------------------------------------------------------------------- # Pokémon structs @@ -1015,7 +1039,7 @@ def extract_data(root, out): identifiers['pokémon'][record.form_species_start + offset] = identifiers['species'][i] + '-' + form_names[offset] - pokémon = Pokémon() + pokémon = schema.Pokémon() all_pokémon[identifiers['pokémon'][i]] = pokémon pokémon.game_index = i @@ -1026,17 +1050,11 @@ def extract_data(root, out): form_name_id = i # TODO i observe this is explicitly a species name, the one thing that # really is shared between forms - pokémon.name = OrderedDict( - (language, texts[language]['species-names'][base_species_id]) - for language in CANON_LANGUAGES) - pokémon.genus = OrderedDict( - (language, texts[language]['genus-names'][base_species_id]) - for language in CANON_LANGUAGES) + pokémon.name = collect_text(texts, 'species-names', base_species_id) + pokémon.genus = collect_text(texts, 'genus-names', base_species_id) # FIXME ho ho, hang on a second, forms have their own flavor text too!! - pokémon.flavor_text = OrderedDict( - # TODO well this depends on which game you're dumping - (language, texts[language]['species-flavor-moon'][base_species_id]) - for language in CANON_LANGUAGES) + # TODO well this depends on which game you're dumping + pokémon.flavor_text = collect_text(texts, 'species-flavor-moon', base_species_id) # FIXME include form names? only when they exist? can that be # inconsistent between languages? @@ -1227,8 +1245,6 @@ def extract_data(root, out): with (out / 'pokemon.yaml').open('w') as f: #dump_to_yaml(all_pokémon, f) - import pokedex.schema as schema - from camel import Camel f.write(Camel([schema.POKEDEX_TYPES]).dump(all_pokémon)) diff --git a/pokedex/schema.py b/pokedex/schema.py index ab69216..042d719 100644 --- a/pokedex/schema.py +++ b/pokedex/schema.py @@ -165,7 +165,11 @@ EncounterMap = _ForwardDeclaration() MoveSet = _ForwardDeclaration() Pokedex = _ForwardDeclaration() Item = _ForwardDeclaration() -Ability = _ForwardDeclaration() + + +class Ability(VersionedLocus): + name = _Localized(str) + flavor_text = _Localized(str) class Pokémon(VersionedLocus): @@ -291,10 +295,13 @@ class Repository: return QuantumLocusReader(identifier, cls, self.objects[cls][identifier]) -# TODO clean this garbage up -- better way of iterating the type, actually work for something other than pokemon... +# TODO clean this garbage up -- better way of iterating the type, actually work +# for something other than pokemon... the only part that varies in the dumper +# is the tag, and the only part that varies in the loader is the class (which +# is determined from the tag) POKEDEX_TYPES = camel.CamelRegistry(tag_prefix='tag:veekun.com,2005:pokedex/', tag_shorthand='!dex!') -@POKEDEX_TYPES.dumper(Locus, 'pokemon', version=None, inherit=True) +@POKEDEX_TYPES.dumper(Pokémon, 'pokemon', version=None, inherit=True) def _dump_locus(locus): data = OrderedDict() attrs = [(key, attr) for (key, attr) in type(locus).__dict__.items() if isinstance(attr, _Attribute)] @@ -319,6 +326,22 @@ def _load_locus(data, version): return obj +POKEDEX_TYPES.dumper(Ability, 'ability', version=None, inherit=True)(_dump_locus) + + +@POKEDEX_TYPES.loader('ability', version=None) +def _load_locus(data, version): + cls = Ability + # TODO wrap with a writer thing? + obj = cls() + for key, value in data.items(): + key = key.replace('-', '_') + assert hasattr(cls, key) + setattr(obj, key, value) + + return obj + + def load_repository(): repository = Repository() diff --git a/scripts/sumo-yaml-to-db.py b/scripts/sumo-yaml-to-db.py new file mode 100644 index 0000000..c18be80 --- /dev/null +++ b/scripts/sumo-yaml-to-db.py @@ -0,0 +1,137 @@ +import itertools +from pathlib import Path + +from camel import Camel +from sqlalchemy.orm import Load + +import pokedex.db +import pokedex.db.tables as t +import pokedex.main as main +import pokedex.schema as schema + + +out = Path('moon-out') +session = pokedex.db.connect('postgresql:///veekun_pokedex') +camel = Camel([schema.POKEDEX_TYPES]) + +# While many tables do have a primary key with a sequence, those sequences are +# all initialized to 1 because the data was loaded manually instead of using +# nextval(). That's a pain in the ass for us, so this fixes them up. +for table_name, table in pokedex.db.metadata.tables.items(): + if hasattr(table.c, 'id') and table.c.id.autoincrement: + session.execute(""" + SELECT setval(pg_get_serial_sequence('{table_name}', 'id'), + coalesce(max(id), 0) + 1, false) + FROM {table_name} WHERE id < 10000; + """.format(table_name=table_name)) + +db_languages = {} +for language in session.query(t.Language).all(): + db_languages[language.identifier] = language +session.local_language_id = db_languages['en'].id + +# Insert some requisite new stuff if it doesn't already exist +db_sumo_generation = session.query(t.Generation).get(7) +if db_sumo_generation: + db_sumo_version_group = session.query() +else: + # Distinguish simplified and traditional Chinese + db_languages['zh'].identifier = 'zh-Hant' + for db_language in db_languages.values(): + if db_language.order > db_languages['zh'].order: + db_language.order += 1 + session.add(t.Language( + id=12, + iso639='zh', iso3166='cn', identifier='zh-Hans', official=True, + order=db_languages['zh'].order + 1, + )) + + # Use standard names for Japanese + db_languages['ja'].identifier = 'ja-Hrkt' + db_languages['ja-kanji'].identifier = 'ja' + session.flush() + + # Refresh language list + db_languages = {} + for language in session.query(t.Language).all(): + db_languages[language.identifier] = language + db_en = db_languages['en'] + + # Versions + # TODO these all need names in other languages too + db_alola = t.Region(identifier='alola') + db_alola.name_map[db_en] = 'Alola' + session.add(db_alola) + db_sumo_generation = t.Generation( + id=7, identifier='sun-moon', + main_region=db_alola, + ) + db_sumo_version_group = t.VersionGroup( + identifier='sun-moon', + generation=db_sumo_generation, + order=17, + ) + db_sun = t.Version( + identifier='sun', + version_group=db_sumo_version_group, + ) + db_moon = t.Version( + identifier='moon', + version_group=db_sumo_version_group, + ) + # TODO find names in other languages + db_sun.name_map[db_en] = 'Sun' + db_moon.name_map[db_en] = 'Moon' + session.add_all([ + db_alola, db_sumo_generation, + db_sumo_version_group, db_sun, db_moon, + ]) + session.flush() + + +# Abilities +print() +print("--- ABILITIES ---") +with (out / 'abilities.yaml').open(encoding='utf8') as f: + abilities = camel.load(f.read()) + +for (sumo_identifier, sumo_ability), db_ability in itertools.zip_longest( + abilities.items(), + session.query(t.Ability) + .filter_by(is_main_series=True) + .order_by(t.Ability.id) + .options(Load(t.Ability).joinedload('names')) + .all() +): + print(sumo_identifier) + if db_ability: + assert sumo_identifier == db_ability.identifier + # Update names and insert new ones + for lang, name in sumo_ability.name.items(): + old_name = db_ability.name_map.get(db_languages[lang]) + if old_name != name: + if old_name: + print("- hmm! changing", old_name, "to", name, "in", lang) + db_ability.name_map[db_languages[lang]] = name + else: + db_ability = t.Ability( + identifier=sumo_identifier, + generation_id=7, + is_main_series=True, + ) + for lang, name in sumo_ability.name.items(): + db_ability.name_map[db_languages[lang]] = name + session.add(db_ability) + + # Flavor text is per-version (group) and thus always new + for lang, flavor_text in sumo_ability.flavor_text.items(): + session.add(t.AbilityFlavorText( + ability=db_ability, + version_group=db_sumo_version_group, + language=db_languages[lang], + flavor_text=flavor_text, + )) + +session.commit() +print() +print("done")