Dump abilities from gen 6/7, and load them into the db!

This commit is contained in:
Eevee (Lexy Munroe) 2017-01-26 16:29:44 -08:00
parent 2631d36963
commit feae105e88
3 changed files with 193 additions and 17 deletions

View file

@ -12,6 +12,7 @@ import shutil
import struct
import warnings
from camel import Camel
from construct import (
# Simple fields
Const, Flag, Int16sl, Int16ul, Int8sl, Int8ul, Int32ul, Padding,
@ -23,7 +24,7 @@ from construct import (
)
import yaml
from pokedex.schema import Pokémon
import pokedex.schema as schema
from .lib.garc import GARCFile, decrypt_xy_text
# TODO: ribbons! 080 in sumo
@ -726,6 +727,12 @@ def dump_to_yaml(data, f):
)
def collect_text(texts, text_type, id):
return OrderedDict(
(language, texts[language][text_type][id])
for language in CANON_LANGUAGES)
def extract_data(root, out):
# TODO big conceptual question for the yaml thing: how do we decide how the
# identifiers work in the per-version data? the "global" identifiers are
@ -964,6 +971,23 @@ def extract_data(root, out):
machineids[106:]
]
# -------------------------------------------------------------------------
# Abilities
all_abilities = OrderedDict()
for i, identifier in enumerate(identifiers['ability']):
if i == 0:
# Dummy non-ability
continue
ability = all_abilities[identifier] = schema.Ability()
ability.name = collect_text(texts, 'ability-names', i)
ability.flavor_text = collect_text(texts, 'ability-flavor', i)
print(repr(ability.flavor_text['en']))
with (out / 'abilities.yaml').open('w') as f:
f.write(Camel([schema.POKEDEX_TYPES]).dump(all_abilities))
# -------------------------------------------------------------------------
# Pokémon structs
@ -1015,7 +1039,7 @@ def extract_data(root, out):
identifiers['pokémon'][record.form_species_start + offset] = identifiers['species'][i] + '-' + form_names[offset]
pokémon = Pokémon()
pokémon = schema.Pokémon()
all_pokémon[identifiers['pokémon'][i]] = pokémon
pokémon.game_index = i
@ -1026,17 +1050,11 @@ def extract_data(root, out):
form_name_id = i
# TODO i observe this is explicitly a species name, the one thing that
# really is shared between forms
pokémon.name = OrderedDict(
(language, texts[language]['species-names'][base_species_id])
for language in CANON_LANGUAGES)
pokémon.genus = OrderedDict(
(language, texts[language]['genus-names'][base_species_id])
for language in CANON_LANGUAGES)
pokémon.name = collect_text(texts, 'species-names', base_species_id)
pokémon.genus = collect_text(texts, 'genus-names', base_species_id)
# FIXME ho ho, hang on a second, forms have their own flavor text too!!
pokémon.flavor_text = OrderedDict(
# TODO well this depends on which game you're dumping
(language, texts[language]['species-flavor-moon'][base_species_id])
for language in CANON_LANGUAGES)
# TODO well this depends on which game you're dumping
pokémon.flavor_text = collect_text(texts, 'species-flavor-moon', base_species_id)
# FIXME include form names? only when they exist? can that be
# inconsistent between languages?
@ -1227,8 +1245,6 @@ def extract_data(root, out):
with (out / 'pokemon.yaml').open('w') as f:
#dump_to_yaml(all_pokémon, f)
import pokedex.schema as schema
from camel import Camel
f.write(Camel([schema.POKEDEX_TYPES]).dump(all_pokémon))

View file

@ -165,7 +165,11 @@ EncounterMap = _ForwardDeclaration()
MoveSet = _ForwardDeclaration()
Pokedex = _ForwardDeclaration()
Item = _ForwardDeclaration()
Ability = _ForwardDeclaration()
class Ability(VersionedLocus):
name = _Localized(str)
flavor_text = _Localized(str)
class Pokémon(VersionedLocus):
@ -291,10 +295,13 @@ class Repository:
return QuantumLocusReader(identifier, cls, self.objects[cls][identifier])
# TODO clean this garbage up -- better way of iterating the type, actually work for something other than pokemon...
# TODO clean this garbage up -- better way of iterating the type, actually work
# for something other than pokemon... the only part that varies in the dumper
# is the tag, and the only part that varies in the loader is the class (which
# is determined from the tag)
POKEDEX_TYPES = camel.CamelRegistry(tag_prefix='tag:veekun.com,2005:pokedex/', tag_shorthand='!dex!')
@POKEDEX_TYPES.dumper(Locus, 'pokemon', version=None, inherit=True)
@POKEDEX_TYPES.dumper(Pokémon, 'pokemon', version=None, inherit=True)
def _dump_locus(locus):
data = OrderedDict()
attrs = [(key, attr) for (key, attr) in type(locus).__dict__.items() if isinstance(attr, _Attribute)]
@ -319,6 +326,22 @@ def _load_locus(data, version):
return obj
POKEDEX_TYPES.dumper(Ability, 'ability', version=None, inherit=True)(_dump_locus)
@POKEDEX_TYPES.loader('ability', version=None)
def _load_locus(data, version):
cls = Ability
# TODO wrap with a writer thing?
obj = cls()
for key, value in data.items():
key = key.replace('-', '_')
assert hasattr(cls, key)
setattr(obj, key, value)
return obj
def load_repository():
repository = Repository()

137
scripts/sumo-yaml-to-db.py Normal file
View file

@ -0,0 +1,137 @@
import itertools
from pathlib import Path
from camel import Camel
from sqlalchemy.orm import Load
import pokedex.db
import pokedex.db.tables as t
import pokedex.main as main
import pokedex.schema as schema
out = Path('moon-out')
session = pokedex.db.connect('postgresql:///veekun_pokedex')
camel = Camel([schema.POKEDEX_TYPES])
# While many tables do have a primary key with a sequence, those sequences are
# all initialized to 1 because the data was loaded manually instead of using
# nextval(). That's a pain in the ass for us, so this fixes them up.
for table_name, table in pokedex.db.metadata.tables.items():
if hasattr(table.c, 'id') and table.c.id.autoincrement:
session.execute("""
SELECT setval(pg_get_serial_sequence('{table_name}', 'id'),
coalesce(max(id), 0) + 1, false)
FROM {table_name} WHERE id < 10000;
""".format(table_name=table_name))
db_languages = {}
for language in session.query(t.Language).all():
db_languages[language.identifier] = language
session.local_language_id = db_languages['en'].id
# Insert some requisite new stuff if it doesn't already exist
db_sumo_generation = session.query(t.Generation).get(7)
if db_sumo_generation:
db_sumo_version_group = session.query()
else:
# Distinguish simplified and traditional Chinese
db_languages['zh'].identifier = 'zh-Hant'
for db_language in db_languages.values():
if db_language.order > db_languages['zh'].order:
db_language.order += 1
session.add(t.Language(
id=12,
iso639='zh', iso3166='cn', identifier='zh-Hans', official=True,
order=db_languages['zh'].order + 1,
))
# Use standard names for Japanese
db_languages['ja'].identifier = 'ja-Hrkt'
db_languages['ja-kanji'].identifier = 'ja'
session.flush()
# Refresh language list
db_languages = {}
for language in session.query(t.Language).all():
db_languages[language.identifier] = language
db_en = db_languages['en']
# Versions
# TODO these all need names in other languages too
db_alola = t.Region(identifier='alola')
db_alola.name_map[db_en] = 'Alola'
session.add(db_alola)
db_sumo_generation = t.Generation(
id=7, identifier='sun-moon',
main_region=db_alola,
)
db_sumo_version_group = t.VersionGroup(
identifier='sun-moon',
generation=db_sumo_generation,
order=17,
)
db_sun = t.Version(
identifier='sun',
version_group=db_sumo_version_group,
)
db_moon = t.Version(
identifier='moon',
version_group=db_sumo_version_group,
)
# TODO find names in other languages
db_sun.name_map[db_en] = 'Sun'
db_moon.name_map[db_en] = 'Moon'
session.add_all([
db_alola, db_sumo_generation,
db_sumo_version_group, db_sun, db_moon,
])
session.flush()
# Abilities
print()
print("--- ABILITIES ---")
with (out / 'abilities.yaml').open(encoding='utf8') as f:
abilities = camel.load(f.read())
for (sumo_identifier, sumo_ability), db_ability in itertools.zip_longest(
abilities.items(),
session.query(t.Ability)
.filter_by(is_main_series=True)
.order_by(t.Ability.id)
.options(Load(t.Ability).joinedload('names'))
.all()
):
print(sumo_identifier)
if db_ability:
assert sumo_identifier == db_ability.identifier
# Update names and insert new ones
for lang, name in sumo_ability.name.items():
old_name = db_ability.name_map.get(db_languages[lang])
if old_name != name:
if old_name:
print("- hmm! changing", old_name, "to", name, "in", lang)
db_ability.name_map[db_languages[lang]] = name
else:
db_ability = t.Ability(
identifier=sumo_identifier,
generation_id=7,
is_main_series=True,
)
for lang, name in sumo_ability.name.items():
db_ability.name_map[db_languages[lang]] = name
session.add(db_ability)
# Flavor text is per-version (group) and thus always new
for lang, flavor_text in sumo_ability.flavor_text.items():
session.add(t.AbilityFlavorText(
ability=db_ability,
version_group=db_sumo_version_group,
language=db_languages[lang],
flavor_text=flavor_text,
))
session.commit()
print()
print("done")