From feae105e8800d02fc1651d613157c633b131872e Mon Sep 17 00:00:00 2001
From: "Eevee (Lexy Munroe)" <eevee.git@veekun.com>
Date: Thu, 26 Jan 2017 16:29:44 -0800
Subject: [PATCH] Dump abilities from gen 6/7, and load them into the db!

---
 pokedex/extract/oras.py    |  44 ++++++++----
 pokedex/schema.py          |  29 +++++++-
 scripts/sumo-yaml-to-db.py | 137 +++++++++++++++++++++++++++++++++++++
 3 files changed, 193 insertions(+), 17 deletions(-)
 create mode 100644 scripts/sumo-yaml-to-db.py

diff --git a/pokedex/extract/oras.py b/pokedex/extract/oras.py
index 2b58102..7c7c21d 100644
--- a/pokedex/extract/oras.py
+++ b/pokedex/extract/oras.py
@@ -12,6 +12,7 @@ import shutil
 import struct
 import warnings
 
+from camel import Camel
 from construct import (
     # Simple fields
     Const, Flag, Int16sl, Int16ul, Int8sl, Int8ul, Int32ul, Padding,
@@ -23,7 +24,7 @@ from construct import (
 )
 import yaml
 
-from pokedex.schema import Pokémon
+import pokedex.schema as schema
 from .lib.garc import GARCFile, decrypt_xy_text
 
 # TODO: ribbons!  080 in sumo
@@ -726,6 +727,12 @@ def dump_to_yaml(data, f):
     )
 
 
+def collect_text(texts, text_type, id):
+    return OrderedDict(
+        (language, texts[language][text_type][id])
+        for language in CANON_LANGUAGES)
+
+
 def extract_data(root, out):
     # TODO big conceptual question for the yaml thing: how do we decide how the
     # identifiers work in the per-version data?  the "global" identifiers are
@@ -964,6 +971,23 @@ def extract_data(root, out):
                 machineids[106:]
         ]
 
+    # -------------------------------------------------------------------------
+    # Abilities
+    all_abilities = OrderedDict()
+    for i, identifier in enumerate(identifiers['ability']):
+        if i == 0:
+            # Dummy non-ability
+            continue
+        ability = all_abilities[identifier] = schema.Ability()
+        ability.name = collect_text(texts, 'ability-names', i)
+        ability.flavor_text = collect_text(texts, 'ability-flavor', i)
+        print(repr(ability.flavor_text['en']))
+
+    with (out / 'abilities.yaml').open('w') as f:
+        f.write(Camel([schema.POKEDEX_TYPES]).dump(all_abilities))
+
+
+
 
     # -------------------------------------------------------------------------
     # Pokémon structs
@@ -1015,7 +1039,7 @@ def extract_data(root, out):
 
                 identifiers['pokémon'][record.form_species_start + offset] = identifiers['species'][i] + '-' + form_names[offset]
 
-        pokémon = Pokémon()
+        pokémon = schema.Pokémon()
         all_pokémon[identifiers['pokémon'][i]] = pokémon
         pokémon.game_index = i
 
@@ -1026,17 +1050,11 @@ def extract_data(root, out):
             form_name_id = i
         # TODO i observe this is explicitly a species name, the one thing that
         # really is shared between forms
-        pokémon.name = OrderedDict(
-            (language, texts[language]['species-names'][base_species_id])
-            for language in CANON_LANGUAGES)
-        pokémon.genus = OrderedDict(
-            (language, texts[language]['genus-names'][base_species_id])
-            for language in CANON_LANGUAGES)
+        pokémon.name = collect_text(texts, 'species-names', base_species_id)
+        pokémon.genus = collect_text(texts, 'genus-names', base_species_id)
         # FIXME ho ho, hang on a second, forms have their own flavor text too!!
-        pokémon.flavor_text = OrderedDict(
-            # TODO well this depends on which game you're dumping
-            (language, texts[language]['species-flavor-moon'][base_species_id])
-            for language in CANON_LANGUAGES)
+        # TODO well this depends on which game you're dumping
+        pokémon.flavor_text = collect_text(texts, 'species-flavor-moon', base_species_id)
         # FIXME include form names?  only when they exist?  can that be
         # inconsistent between languages?
 
@@ -1227,8 +1245,6 @@ def extract_data(root, out):
 
     with (out / 'pokemon.yaml').open('w') as f:
         #dump_to_yaml(all_pokémon, f)
-        import pokedex.schema as schema
-        from camel import Camel
         f.write(Camel([schema.POKEDEX_TYPES]).dump(all_pokémon))
 
 
diff --git a/pokedex/schema.py b/pokedex/schema.py
index ab69216..042d719 100644
--- a/pokedex/schema.py
+++ b/pokedex/schema.py
@@ -165,7 +165,11 @@ EncounterMap = _ForwardDeclaration()
 MoveSet = _ForwardDeclaration()
 Pokedex = _ForwardDeclaration()
 Item = _ForwardDeclaration()
-Ability = _ForwardDeclaration()
+
+
+class Ability(VersionedLocus):
+    name = _Localized(str)
+    flavor_text = _Localized(str)
 
 
 class Pokémon(VersionedLocus):
@@ -291,10 +295,13 @@ class Repository:
         return QuantumLocusReader(identifier, cls, self.objects[cls][identifier])
 
 
-# TODO clean this garbage up -- better way of iterating the type, actually work for something other than pokemon...
+# TODO clean this garbage up -- better way of iterating the type, actually work
+# for something other than pokemon...  the only part that varies in the dumper
+# is the tag, and the only part that varies in the loader is the class (which
+# is determined from the tag)
 POKEDEX_TYPES = camel.CamelRegistry(tag_prefix='tag:veekun.com,2005:pokedex/', tag_shorthand='!dex!')
 
-@POKEDEX_TYPES.dumper(Locus, 'pokemon', version=None, inherit=True)
+@POKEDEX_TYPES.dumper(Pokémon, 'pokemon', version=None, inherit=True)
 def _dump_locus(locus):
     data = OrderedDict()
     attrs = [(key, attr) for (key, attr) in type(locus).__dict__.items() if isinstance(attr, _Attribute)]
@@ -319,6 +326,22 @@ def _load_locus(data, version):
     return obj
 
 
+POKEDEX_TYPES.dumper(Ability, 'ability', version=None, inherit=True)(_dump_locus)
+
+
+@POKEDEX_TYPES.loader('ability', version=None)
+def _load_locus(data, version):
+    cls = Ability
+    # TODO wrap with a writer thing?
+    obj = cls()
+    for key, value in data.items():
+        key = key.replace('-', '_')
+        assert hasattr(cls, key)
+        setattr(obj, key, value)
+
+    return obj
+
+
 def load_repository():
     repository = Repository()
 
diff --git a/scripts/sumo-yaml-to-db.py b/scripts/sumo-yaml-to-db.py
new file mode 100644
index 0000000..c18be80
--- /dev/null
+++ b/scripts/sumo-yaml-to-db.py
@@ -0,0 +1,137 @@
+import itertools
+from pathlib import Path
+
+from camel import Camel
+from sqlalchemy.orm import Load
+
+import pokedex.db
+import pokedex.db.tables as t
+import pokedex.main as main
+import pokedex.schema as schema
+
+
+out = Path('moon-out')
+session = pokedex.db.connect('postgresql:///veekun_pokedex')
+camel = Camel([schema.POKEDEX_TYPES])
+
+# While many tables do have a primary key with a sequence, those sequences are
+# all initialized to 1 because the data was loaded manually instead of using
+# nextval().  That's a pain in the ass for us, so this fixes them up.
+for table_name, table in pokedex.db.metadata.tables.items():
+    if hasattr(table.c, 'id') and table.c.id.autoincrement:
+        session.execute("""
+            SELECT setval(pg_get_serial_sequence('{table_name}', 'id'),
+                coalesce(max(id), 0) + 1, false)
+            FROM {table_name} WHERE id < 10000;
+            """.format(table_name=table_name))
+
+db_languages = {}
+for language in session.query(t.Language).all():
+    db_languages[language.identifier] = language
+session.local_language_id = db_languages['en'].id
+
+# Insert some requisite new stuff if it doesn't already exist
+db_sumo_generation = session.query(t.Generation).get(7)
+if db_sumo_generation:
+    db_sumo_version_group = session.query()
+else:
+    # Distinguish simplified and traditional Chinese
+    db_languages['zh'].identifier = 'zh-Hant'
+    for db_language in db_languages.values():
+        if db_language.order > db_languages['zh'].order:
+            db_language.order += 1
+    session.add(t.Language(
+        id=12,
+        iso639='zh', iso3166='cn', identifier='zh-Hans', official=True,
+        order=db_languages['zh'].order + 1,
+    ))
+
+    # Use standard names for Japanese
+    db_languages['ja'].identifier = 'ja-Hrkt'
+    db_languages['ja-kanji'].identifier = 'ja'
+    session.flush()
+
+    # Refresh language list
+    db_languages = {}
+    for language in session.query(t.Language).all():
+        db_languages[language.identifier] = language
+    db_en = db_languages['en']
+
+    # Versions
+    # TODO these all need names in other languages too
+    db_alola = t.Region(identifier='alola')
+    db_alola.name_map[db_en] = 'Alola'
+    session.add(db_alola)
+    db_sumo_generation = t.Generation(
+        id=7, identifier='sun-moon',
+        main_region=db_alola,
+    )
+    db_sumo_version_group = t.VersionGroup(
+        identifier='sun-moon',
+        generation=db_sumo_generation,
+        order=17,
+    )
+    db_sun = t.Version(
+        identifier='sun',
+        version_group=db_sumo_version_group,
+    )
+    db_moon = t.Version(
+        identifier='moon',
+        version_group=db_sumo_version_group,
+    )
+    # TODO find names in other languages
+    db_sun.name_map[db_en] = 'Sun'
+    db_moon.name_map[db_en] = 'Moon'
+    session.add_all([
+        db_alola, db_sumo_generation,
+        db_sumo_version_group, db_sun, db_moon,
+    ])
+    session.flush()
+
+
+# Abilities
+print()
+print("--- ABILITIES ---")
+with (out / 'abilities.yaml').open(encoding='utf8') as f:
+    abilities = camel.load(f.read())
+
+for (sumo_identifier, sumo_ability), db_ability in itertools.zip_longest(
+    abilities.items(),
+    session.query(t.Ability)
+        .filter_by(is_main_series=True)
+        .order_by(t.Ability.id)
+        .options(Load(t.Ability).joinedload('names'))
+        .all()
+):
+    print(sumo_identifier)
+    if db_ability:
+        assert sumo_identifier == db_ability.identifier
+        # Update names and insert new ones
+        for lang, name in sumo_ability.name.items():
+            old_name = db_ability.name_map.get(db_languages[lang])
+            if old_name != name:
+                if old_name:
+                    print("- hmm! changing", old_name, "to", name, "in", lang)
+                db_ability.name_map[db_languages[lang]] = name
+    else:
+        db_ability = t.Ability(
+            identifier=sumo_identifier,
+            generation_id=7,
+            is_main_series=True,
+        )
+        for lang, name in sumo_ability.name.items():
+            db_ability.name_map[db_languages[lang]] = name
+        session.add(db_ability)
+
+    # Flavor text is per-version (group) and thus always new
+    for lang, flavor_text in sumo_ability.flavor_text.items():
+        session.add(t.AbilityFlavorText(
+            ability=db_ability,
+            version_group=db_sumo_version_group,
+            language=db_languages[lang],
+            flavor_text=flavor_text,
+        ))
+
+session.commit()
+print()
+print("done")