Get SUMO to YAML to database basically working, finally!

- Pokémon shapes are now dumped
- Machines were being dumped slightly out of order; they are now correct
- Item price is now dumped correctly
- Identifiers are now computed to match veekun for all Pokémon
- Several bugs with evolutions were fixed
- Shedinja is now special-cased, alas
- Items are now loaded into the db
- Pokémon are now loaded into the db, mostly correctly
This commit is contained in:
Eevee (Lexy Munroe) 2017-08-22 14:29:07 -07:00
parent 698a211539
commit 060dd42c7a
4 changed files with 816 additions and 51 deletions

View file

@ -46,9 +46,14 @@ def _getset_factory_factory(column_name, string_getter):
session = object_session(translations)
language = translations.local_language
return string_getter(text, session, language)
def setter(translations, value):
# The string must be set on the Translation directly.
raise AttributeError("Cannot set %s" % column_name)
if underlying_type is dict:
def setter(translations, key, value):
setattr(translations, instance.value_attr, value)
else:
def setter(translations, value):
# The string must be set on the Translation directly.
raise AttributeError("Cannot set %s" % column_name)
return getter, setter
return getset_factory

View file

@ -28,6 +28,7 @@ import yaml
import pokedex.schema as schema
from .lib.garc import GARCFile, decrypt_xy_text
from .lib.pc import PokemonContainerFile
# TODO: ribbons! 080 in sumo
@ -104,6 +105,25 @@ COLORS = {
2: 'pc.yellow',
}
# NOTE: these are listed in veekun order, which doesn't match the games, just
# as with colors
SHAPES = {
0: 'ps.ball',
12: 'ps.squiggle',
2: 'ps.fish',
13: 'ps.arms',
8: 'ps.blob',
9: 'ps.upright',
1: 'ps.legs',
4: 'ps.quadruped',
11: 'ps.wings',
7: 'ps.tentacles',
6: 'ps.heads', # NOTE: this is really multi-body
10: 'ps.humanoid',
5: 'ps.bug-wings',
3: 'ps.armor', # NOTE/TODO?: this is really just bug-shaped
}
DAMAGE_CLASSES = {
0: 'dc.status',
1: 'dc.physical',
@ -297,7 +317,7 @@ SUMO_SCRIPT_ENTRIES = {
'pokemon-weight-flavor': 117,
'trainer-class-names': 106,
'berry-names': 65,
# 49 might be pokédex colors? or maybe clothing colors
# 49 appears to be clothing dye colors + a set of clothes patterns?
# 38: item names, with macros to branch for pluralization
# 114: copy of item names, but with "PP" in latin in korean (?!)
@ -428,7 +448,8 @@ FORM_NAMES = {
# TODO why are 10 and 50 duplicated?
718: (None, '10', '10', '50', 'complete'),
# Hoopa
720: ('confined', 'unbound'),
# TODO should the default form be 'confined'?
720: (None, 'unbound'),
# Gumshoos
735: (None, 'totem'),
# Vikavolt
@ -512,7 +533,9 @@ pokemon_struct = Struct(
'height' / Int16ul,
'weight' / Int16ul,
'machines' / BitsSwapped(Bitwise(Array(16 * 8, Flag))),
# TODO this is clearly not tutors (in sumo anyway)? bulb is 1/1/9, char 2/2/18, squirtle 4/4/36, then all zeroes until dratini 64/64/64, then same with next starters...
'tutors' / Int32ul,
# TODO appear to be all zeroes, at least in sumo
'mystery1' / Int16ul,
'mystery2' / Int16ul,
# TODO these are unused in sumo
@ -664,7 +687,7 @@ item_struct = Struct(
# 6 - tms
# 7 - berries
# 8 - key items
'mystery0d' / Int8ul,
'pocketish' / Int8ul,
# 1 - can be used (+ consumed) by pokémon (maybe for recycle purposes)
# 16 - black/white/yellow/red/blue flutes?? (not in xy?)
'mystery0e' / Int8ul,
@ -991,8 +1014,8 @@ def make_identifier(english_name):
return re.sub(
'[^a-zA-Z0-9-]+',
'-',
english_name.lower().replace('', ''),
)
english_name.lower().replace('é', 'e').replace('', '').replace('', '-m').replace('', '-f'),
).rstrip('-')
@contextmanager
def read_garc(path):
@ -1372,18 +1395,18 @@ def extract_data(root, out):
f.seek(0x0049795a) # SUMO
# TODO magic number (107)
machineids = struct.unpack('<107H', f.read(2 * 107))
# TODO dunno if this is still true
# FIXME this is no longer true in sun/moon, of course
# Order appears to be based on some gen 4 legacy: TMs 1 through 92, HMs
# 1 through 6, then the other eight TMs and the last HM. But the bits
# in the Pokémon structs are in the expected order of 1 through 100, 1
# through 7
machines = [
identifiers['move'][moveid]
for moveid in
machineids[0:92] +
machineids[98:106] +
machineids[92:98] +
machineids[106:]
for moveid in machineids
#machineids[0:92] +
#machineids[98:106] +
#machineids[92:98] +
#machineids[106:]
]
# TODO Pokémon box sprite map
@ -1418,19 +1441,27 @@ def extract_data(root, out):
item_ct = len(garc)
for i, subfile in enumerate(garc):
identifier = identifiers['item'][i]
if identifier == '???':
if identifier == '':
# Junk non-item
# TODO striiictly speaking, maybe we should dump these anyway
# TODO striiictly speaking, maybe we should dump these
# anyway... but we have no way of knowing what they'd do, so,
# eh
continue
item = all_items[identifier] = schema.Item()
item.game_index = i
item.name = collect_text(texts, 'item-names', i)
item.flavor_text = collect_text(texts, 'item-flavor', i)
raw_item = item_struct.parse_stream(subfile[0])
item.price = raw_item.price
item.price = raw_item.price * 10
item.fling_power = raw_item.fling_power
subfile[0].seek(0)
data = subfile[0].read()
print(f"{identifiers['item'][i]:24s} {raw_item.price:5d} {raw_item.fling_effect:3d} {raw_item.natural_gift_effect:3d} {raw_item.natural_gift_power:3d} {raw_item.natural_gift_type & 31:2d} | {raw_item.pocketish:1d} {data.hex()[:32]}")
with (out / 'items.yaml').open('w') as f:
f.write(Camel([schema.POKEDEX_TYPES]).dump(all_items))
@ -1469,7 +1500,18 @@ def extract_data(root, out):
assert not unused_image_datae
# -------------------------------------------------------------------------
# Pokémon structs
# Pokémon
# Shapes are stored separately alongside some Pokédex sorting data, since
# they're only used in the Pokédex search. In SUMO, at least, every flavor
# form has its own shape.
# TODO where is this in other games?
# TODO what are the other records in these two files? file 0 has 11 records, file 1 has 7
with read_garc(root / 'rom/a/1/5/2') as garc: # SUMO
subfile = PokemonContainerFile(garc[1][0])
shape_data = subfile[1].read()
# One byte per form, no parsing required!
pokémon_form_shapes = [SHAPES[shape_id] for shape_id in shape_data]
# TODO document this properly sometime, somewhere, but the gist is:
# - a species may have multiple forms
@ -1564,18 +1606,20 @@ def extract_data(root, out):
pokémon.game_index = i
base_species_id, form_name_id = concrete_form_order[i]
flavor_id = species_forms[base_species_id]['flavor_ids'][form_name_id]
# TODO i observe this is explicitly a species name, the one thing that
# really is shared between forms
pokémon.name = collect_text(texts, 'species-names', base_species_id)
pokémon.genus = collect_text(texts, 'genus-names', base_species_id)
# FIXME ho ho, hang on a second, forms have their own flavor text too!!
# TODO well this depends on which game you're dumping
pokémon.flavor_text = collect_text(texts, 'species-flavor-moon', base_species_id)
pokémon.flavor_text = collect_text(texts, 'species-flavor-moon', flavor_id)
# FIXME this is pretty temporary hackery; ideally the file would be
# arranged around species, not concrete forms
pokémon.form_base_species = identifiers['species'][base_species_id]
pokémon.form_number = form_name_id
pokémon.form_identifier = species_forms[base_species_id]['forms'][form_name_id]
if i < len(species_forms) and not species_forms[i]['is_concrete']:
pokémon.form_appearances = species_forms[i]['forms']
else:
@ -1637,6 +1681,7 @@ def extract_data(root, out):
# FIXME safari escape??
pokémon.base_experience = record.base_exp
pokémon.color = record.color
pokémon.shape = pokémon_form_shapes[flavor_id]
# FIXME what units are these!
pokémon.height = record.height
pokémon.weight = record.weight
@ -1646,7 +1691,7 @@ def extract_data(root, out):
# TODO transform to an OD somehow probably
pokemon_data.append(record)
print("{:4d} {:25s} {} {:5d} {:5d} {:4d} {:4d} {:2d} / {p.z_crystal:3d} {p.z_base_move:3d} {p.z_move:3d} | {:10s} - {p.effort_padding:2d} - {p.effort:04x} {p.effort_hp:1d} {p.effort_attack:1d} {p.effort_defense:1d} {p.effort_speed:1d} {p.effort_special_attack:1d} {p.effort_special_defense:1d}".format(
print("{:4d} {:25s} {} {:5d} {:5d} {:4d} {:4d} {:2d} / {p.z_crystal:3d} {p.z_base_move:3d} {p.z_move:3d} | {:10s} - {p.effort_padding:2d} - {p.safari_escape:3d} {p.mystery1:5d} {p.mystery2:5d} {p.held_item3:3d} {p.tutors:10} {shape}".format(
i,
identifiers['pokémon'][i],
('0'*16 + bin(record.mystery1)[2:])[-16:],
@ -1657,6 +1702,7 @@ def extract_data(root, out):
record.form_count,
record.color,
p=record,
shape=pokémon_form_shapes[flavor_id],
))
# -------------------------------------------------------------------------
@ -1762,9 +1808,13 @@ def extract_data(root, out):
if i > len(identifiers['species']):
continue
moveset = all_pokémon[ident].moves
eggseen = set()
eggset = moveset['egg'] = []
for moveid in container.moveids:
eggset.append(identifiers['move'][moveid])
# Swinub has Mud Shot listed twice, for some reason
if moveid not in eggseen:
eggset.append(identifiers['move'][moveid])
eggseen.add(moveid)
# Level-up moves
with read_garc(root / 'rom/a/0/1/3') as garc: # SUMO
@ -1811,16 +1861,18 @@ def extract_data(root, out):
evo['into'] = identifiers['pokémon'][raw_evo.into_species]
if raw_evo.method == 1:
evo['trade'] = 'ev.level-up'
evo['trigger'] = 'ev.level-up'
evo['minimum-friendship'] = 220
elif raw_evo.method == 2:
evo['trade'] = 'ev.level-up'
evo['trigger'] = 'ev.level-up'
# FIXME is this an enum? also really it's morning OR day
evo['time-of-day'] = 'day'
evo['minimum-friendship'] = 220
elif raw_evo.method == 3:
evo['trade'] = 'ev.level-up'
evo['trigger'] = 'ev.level-up'
# FIXME is this an enum?
evo['time-of-day'] = 'night'
evo['minimum-friendship'] = 220
elif raw_evo.method == 4:
evo['trigger'] = 'ev.level-up'
evo['minimum-level'] = raw_evo.level
@ -1831,6 +1883,9 @@ def extract_data(root, out):
evo['held-item'] = identifiers['item'][raw_evo.param]
elif raw_evo.method == 7:
evo['trigger'] = 'ev.trade'
# FIXME uhh this is always zero. karrablast and shelmet do
# not actually mention each other here. guess it's
# hardcoded?? awesome
evo['traded-with'] = identifiers['pokémon'][raw_evo.param]
elif raw_evo.method == 8:
evo['trigger'] = 'ev.use-item'
@ -1880,6 +1935,7 @@ def extract_data(root, out):
evo['held-item'] = identifiers['item'][raw_evo.param]
elif raw_evo.method == 20:
evo['trigger'] = 'ev.level-up'
evo['time-of-day'] = 'night'
evo['held-item'] = identifiers['item'][raw_evo.param]
elif raw_evo.method == 21:
evo['trigger'] = 'ev.level-up'
@ -1920,7 +1976,9 @@ def extract_data(root, out):
elif raw_evo.method == 30:
evo['trigger'] = 'ev.level-up'
evo['minimum-level'] = raw_evo.level
evo['party-member-type'] = TYPES[raw_evo.param]
# This is Pancham (needs Dark-type party member) but the
# type seems to be hardcoded, ugh
evo['party-member-type'] = 't.dark'
elif raw_evo.method == 31:
evo['trigger'] = 'ev.level-up'
evo['minimum-level'] = raw_evo.level
@ -1966,18 +2024,34 @@ def extract_data(root, out):
all_pokémon[identifier].evolutions.append(evo)
# Shedinja is an exceptionally special case that isn't listed in the data
# TODO having to lay this out explicitly bugs me
all_pokémon['nincada'].evolutions.append(dict(
into='shedinja',
trigger='shed',
))
# Mega evolution
# TODO
# already parsed a/0/1/5 as mega_evolutions... but... that only lists
# items? what lists the actual megas? OH is the number a form number??
# wow i really need a list of species -> forms eh
# TODO what is a/1/9/4 (ORAS) or a/0/1/6 (SUMO)? 8 files of 404 bytes each
# in both versions, so not dependent on the number of loci
# Baby Pokémon
#with read_garc(root / 'rom/a/1/9/6') as garc: # ORAS
#with read_garc(root / 'rom/a/0/1/8') as garc: # SUMO?
# for subfile in garc:
# baby_pokemon = subfile[0].read()
# print(repr(baby_pokemon))
with read_garc(root / 'rom/a/0/1/8') as garc: # SUMO?
# Last record shows something else
last_data = garc[-1][0].read()
for i, subfile in enumerate(garc[:-1]):
data = subfile[0].read()
baby_id = int.from_bytes(data[:2], 'little')
print(identifiers['species'][i], '->', identifiers['species'][baby_id])
if data[2:] != b'\xff\xff':
print("!!!", repr(data))
other_id = int.from_bytes(last_data[i*2:i*2+2], 'little')
if other_id != baby_id:
print("!!!", i, baby_id, other_id)
# Tutor moves (from the personal structs)
# FIXME why is this down here and not just in the personal loop?

View file

@ -165,6 +165,8 @@ Evolution = _ForwardDeclaration()
EncounterMap = _ForwardDeclaration()
MoveSet = _ForwardDeclaration()
Pokedex = _ForwardDeclaration()
PokédexColor = _ForwardDeclaration()
PokédexShape = _ForwardDeclaration()
class Ability(VersionedLocus):
@ -201,25 +203,32 @@ class Pokémon(VersionedLocus):
# FIXME hackery to get forms working well enough to import back into veekun
# later; this will need some cleaning up later, somehow
form_base_species = _Value(str)
form_identifier = _Value(str)
form_number = _Value(int)
form_appearances = _List(str)
form_appearances = _List(str) # flavor only!
form_name = _Localized(str)
types = _List(Type, min=1, max=2)
# FIXME how do i distinguish hidden ability?
abilities = _List(Ability, min=1, max=3)
base_stats = _Map(Stat, int)
growth_rate = _Value(GrowthRate)
base_experience = _Value(int, min=0, max=255)
effort = _Map(Stat, int)
capture_rate = _Value(int, min=0, max=255)
base_happiness = _Value(int, min=0, max=255)
held_items = _Map(Item, int)
gender_rate = _Value(int)
egg_groups = _List(EggGroup, min=1, max=2)
hatch_counter = _Value(int, min=0, max=255)
pokedex_numbers = _Map(Pokedex, int)
# TODO family?
evolutions = _List(Evolution)
color = _Value(PokédexColor)
shape = _Value(PokédexShape)
genus = _Localized(str)
flavor_text = _Localized(str)
# TODO maybe want little wrapper types that can display as either imperial
@ -243,9 +252,6 @@ class Pokémon(VersionedLocus):
# TODO should this be written in hex, maybe?
game_index = _Value(int)
# FIXME how do i distinguish hidden ability?
abilities = _List(Ability)
Pokemon = Pokémon

View file

@ -1,7 +1,9 @@
from collections import OrderedDict
import itertools
from pathlib import Path
from camel import Camel
from sqlalchemy import func
from sqlalchemy import inspect
from sqlalchemy.orm import Load
@ -10,6 +12,25 @@ import pokedex.db.tables as t
import pokedex.main as main
import pokedex.schema as schema
# FIXME machine to move mapping isn't listed anywhere, oops. where does that go?
# TODO still obviously missing:
# - pokedex order
# TODO needs manual fixing:
# - item categories
# - fling effects?
# - item effects
# - ability effects
# - has_gender_differences
# - forms_switchable
# - is_battle_only
# - form explanations
# - pokemon and form order
# - evolutions requiring particular locations
# TODO needs fixing codewise:
# - decide if i'm using these new pixel version icons or what
# - remove extraneous "Pokémon" after genus
# -
out = Path('moon-out')
session = pokedex.db.connect('postgresql:///veekun_pokedex')
@ -37,14 +58,24 @@ db_damage_classes = {row.identifier: row for row in session.query(t.MoveDamageCl
db_move_categories = {row.identifier: row for row in session.query(t.MoveMetaCategory)}
db_move_ailments = {row.identifier: row for row in session.query(t.MoveMetaAilment)}
db_move_flags = {row.identifier: row for row in session.query(t.MoveFlag)}
db_move_methods = {row.identifier: row for row in session.query(t.PokemonMoveMethod)}
# These are by id since move effects don't have identifiers atm
db_move_effects = {row.id: row for row in session.query(t.MoveEffect)}
db_colors = {row.identifier: row for row in session.query(t.PokemonColor)}
db_shapes = {row.identifier: row for row in session.query(t.PokemonShape)}
db_growth_rates = {row.identifier: row for row in session.query(t.GrowthRate)}
db_genders = {row.identifier: row for row in session.query(t.Gender)}
db_evo_triggers = {row.identifier: row for row in session.query(t.EvolutionTrigger)}
db_egg_groups = {row.identifier: row for row in session.query(t.EggGroup)}
db_stats = OrderedDict((row.identifier, row) for row in session.query(t.Stat).order_by(t.Stat.id.asc()))
# Insert some requisite new stuff if it doesn't already exist
db_sumo_generation = session.query(t.Generation).get(7)
if db_sumo_generation:
db_sumo_version_group = session.query(t.VersionGroup).filter_by(identifier='sun-moon').one()
db_moon = session.query(t.Version).filter_by(identifier='moon').one()
else:
# Distinguish simplified and traditional Chinese
db_languages['zh'].identifier = 'zh-Hant'
@ -118,14 +149,98 @@ def cheap_upsert(db_obj, db_class, new_only, **data):
return db_obj
def update_names(sumo_obj, db_obj):
def update_names(sumo_name_map, db_name_map):
"""Update the database's names as necessary, and add any missing ones"""
for lang, name in sumo_obj.name.items():
old_name = db_obj.name_map.get(db_languages[lang])
for lang, name in sumo_name_map.items():
old_name = db_name_map.get(db_languages[lang])
if old_name != name:
if old_name:
print(f"- NOTE: changing {old_name!r} to {name!r} in {lang}")
db_obj.name_map[db_languages[lang]] = name
db_name_map[db_languages[lang]] = name
# Items
print()
print("--- ITEMS ---")
with (out / 'items.yaml').open(encoding='utf8') as f:
sumo_items = camel.load(f.read())
db_items = {
row.identifier: row for row in session.query(t.Item)
.options(Load(t.Item).joinedload('names'))
}
for sumo_identifier, sumo_item in sumo_items.items():
if sumo_identifier == 'none':
# FIXME just don't dump these yo
continue
print(sumo_identifier)
db_item = db_items.get(sumo_identifier)
if not db_item:
print("- new")
db_item = cheap_upsert(
db_item,
t.Item,
dict(
identifier=sumo_identifier,
# This needs to be done manually, since the categories are 100%
# fanon invention. Default to the "x/y unknown" dummy category.
# NOTE: the categories are linked to pockets but the pockets are
# different in nearly every game, so, uh
category_id=10001,
# FIXME veekun has an "effect" called "berry effect" that just means
# "do whatever the berry does", and that's terrible, and also doesn't
# match the games, SIGH
fling_effect=None,
),
cost=sumo_item.price,
fling_power=sumo_item.fling_power or None,
)
# Names
update_names(sumo_item.name, db_item.name_map)
# Populate with dummy effects
if db_item in session.new:
db_items[sumo_identifier] = db_item
db_item.short_effect_map[db_languages['en']] = f"XXX new effect for {sumo_identifier}"
db_item.effect_map[db_languages['en']] = f"XXX new effect for {sumo_identifier}"
# Flavor text is per-version (group) and thus always new
# FIXME not idempotent
"""
for lang, flavor_text in sumo_item.flavor_text.items():
session.add(t.ItemFlavorText(
item=db_item,
version_group=db_sumo_version_group,
language=db_languages[lang],
flavor_text=flavor_text,
))
"""
# Game index
# FIXME not idempotent
"""
session.add(t.ItemGameIndex(
item=db_item,
generation=db_sumo_generation,
game_index=sumo_item.game_index,
))
"""
# FIXME can flags be done automatically? some of them, at least? they are:
# - countable
# - consumable
# - usable-overworld
# - usable-in-battle
# - holdable
# - holdable-passive
# - holdable-active
# - underground
# TODO aside from natural gift bits, i have no idea where berry data is,
# and i suspect our existing natural gift effects are way off :S
# Abilities
@ -134,29 +249,31 @@ print("--- ABILITIES ---")
with (out / 'abilities.yaml').open(encoding='utf8') as f:
abilities = camel.load(f.read())
for (sumo_identifier, sumo_ability), db_ability in itertools.zip_longest(
abilities.items(),
session.query(t.Ability)
db_abilities = {
row.identifier: row
for row in session.query(t.Ability)
.filter_by(is_main_series=True)
.order_by(t.Ability.id)
.options(Load(t.Ability).joinedload('names'))
):
}
for sumo_identifier, sumo_ability in abilities.items():
print(sumo_identifier)
db_ability = db_abilities.get(sumo_identifier)
if db_ability:
assert sumo_identifier == db_ability.identifier
update_names(sumo_ability, db_ability)
else:
db_ability = t.Ability(
db_abilities[sumo_identifier] = db_ability = t.Ability(
identifier=sumo_identifier,
generation_id=7,
is_main_series=True,
names=[],
)
for lang, name in sumo_ability.name.items():
db_ability.name_map[db_languages[lang]] = name
session.add(db_ability)
update_names(sumo_ability.name, db_ability.name_map)
# Flavor text is per-version (group) and thus always new
# FIXME uhh no it isn't, not if i've alreayd run this script once lol
# TODO not idempotent
"""
for lang, flavor_text in sumo_ability.flavor_text.items():
session.add(t.AbilityFlavorText(
@ -174,6 +291,7 @@ print("--- MOVES ---")
with (out / 'moves.yaml').open(encoding='utf8') as f:
moves = camel.load(f.read())
db_moves = {}
for (sumo_identifier, sumo_move), db_move in itertools.zip_longest(
moves.items(),
session.query(t.Move)
@ -196,10 +314,11 @@ for (sumo_identifier, sumo_move), db_move in itertools.zip_longest(
session.add(effect)
db_move_effects[effect_id] = effect
db_move = cheap_upsert(
db_move = db_moves[sumo_identifier] = cheap_upsert(
db_move,
t.Move,
dict(identifier=sumo_identifier, generation_id=7),
dict(generation_id=7, names=[]),
identifier=sumo_identifier,
type=db_types[sumo_move.type.rpartition('.')[2]],
power=None if sumo_move.power in (0, 1) else sumo_move.power,
pp=sumo_move.pp,
@ -228,7 +347,7 @@ for (sumo_identifier, sumo_move), db_move in itertools.zip_longest(
**loggable_changes))
# Names
update_names(sumo_move, db_move)
update_names(sumo_move.name, db_move.name_map)
# Move flags
old_flag_set = frozenset(db_move.flags)
@ -267,7 +386,7 @@ for (sumo_identifier, sumo_move), db_move in itertools.zip_longest(
)
# Flavor text is per-version (group) and thus always new
# FIXME uhh no it isn't, not if i've already run this script once lol
# FIXME not idempotent
"""
for lang, flavor_text in sumo_move.flavor_text.items():
session.add(t.MoveFlavorText(
@ -280,6 +399,567 @@ for (sumo_identifier, sumo_move), db_move in itertools.zip_longest(
session.flush()
# Pokémon! Auugh!
print()
print("--- POKéMON ---")
db_pokemons = {}
db_pokemon_forms = {}
db_pokemon_specieses = {}
for species in (
session.query(t.PokemonSpecies)
.options(
Load(t.PokemonSpecies).joinedload('evolution_chain'),
Load(t.PokemonSpecies).joinedload('pokemon').joinedload('forms'),
Load(t.PokemonSpecies).joinedload('pokemon').subqueryload('stats'),
Load(t.PokemonSpecies).joinedload('pokemon').subqueryload('types'),
Load(t.PokemonSpecies).joinedload('pokemon').subqueryload('pokemon_abilities'),
Load(t.PokemonSpecies).subqueryload('forms'),
Load(t.PokemonSpecies).subqueryload('evolutions'),
Load(t.PokemonSpecies).subqueryload('egg_groups'),
Load(t.PokemonSpecies).subqueryload('names'),
Load(t.PokemonSpecies).joinedload('pokemon').joinedload('forms').subqueryload('names'),
)
.all()
):
for form in species.forms:
db_pokemon_forms[form.identifier] = form
db_pokemon_forms[species.identifier, form.form_identifier] = form
for pokemon in species.pokemon:
db_pokemons[pokemon.identifier] = pokemon
db_pokemon_specieses[species.identifier] = species
max_pokemon_id = session.query(func.max(t.Pokemon.id)).scalar()
max_pokemon_form_id = session.query(func.max(t.PokemonForm.id)).scalar()
with (out / 'pokemon.yaml').open(encoding='utf8') as f:
pokemon = camel.load(f.read())
sumo_pokemon_by_species = OrderedDict()
# This maps (Pokémon!) identifiers to { base_pokemon, members }, where
# Pokémon in the same family will (in theory) share the same value
sumo_families = dict()
sumo_evolves_from = dict() # species!
for sumo_identifier, sumo_pokemon in pokemon.items():
if sumo_identifier == 'egg':
continue
sumo_pokemon.identifier = sumo_identifier
sumo_species_identifier = sumo_pokemon.form_base_species
sumo_pokemon_by_species.setdefault(sumo_species_identifier, []).append(sumo_pokemon)
# Construct the family. Basic idea is to pretend we're a new family, then
# look through the evolutions for any existing families and merge them
family = dict(
base_pokemon=sumo_identifier,
members={sumo_identifier},
db_chain=None,
)
try:
family['db_chain'] = db_pokemon_specieses[sumo_species_identifier].evolution_chain
except KeyError:
pass
for evolution in sumo_pokemon.evolutions:
into = evolution['into']
sumo_evolves_from[pokemon[into].form_base_species] = sumo_species_identifier
if into in sumo_families:
# If this happens, then the current Pokémon evolves into a Pokémon
# that's already been seen, therefore this is an earlier evolution
family['members'].update(sumo_families[into]['members'])
if not family['db_chain']:
family['db_chain'] = sumo_families[into]['db_chain']
else:
family['members'].add(into)
# Once we're done, ensure every member is using this same newly-updated dict
for member in family['members']:
sumo_families[member] = family
for species_identifier, sumo_pokemons in sumo_pokemon_by_species.items():
db_species = db_pokemon_specieses.get(species_identifier)
sumo_form_identifiers = sumo_pokemons[0].form_appearances
is_concrete = not sumo_form_identifiers
if is_concrete:
sumo_form_identifiers = [sumo_pokemon.form_identifier for sumo_pokemon in sumo_pokemons]
if species_identifier in {'cherrim', 'shellos', 'gastrodon', 'floette', 'furfrou'}:
# These changed to be concrete at some point, but changing form kind is
# a pain in the ass and I don't want to do it, so let's not
is_concrete = False
# Let's check some stuff first I guess
print(f"{species_identifier:24s}")
if db_species:
if is_concrete:
# Concrete means every form is a Pokemon, and every Pokemon has one PokemonForm
if len(db_species.pokemon) != len(db_species.forms):
print(f"- WARNING: expected the same number of Pokémon and forms but got {len(db_species.pokemon)} vs {len(db_species.forms)}")
for form in db_species.forms:
if not form.is_default:
print(f"- WARNING: expected every form to be a default but {form.form_identifier} is not")
sumo_pokemon_identifiers = {pokemon.identifier for pokemon in sumo_pokemons}
db_pokemon_identifiers = {pokemon.identifier for pokemon in db_species.pokemon}
added_pokemon = sumo_pokemon_identifiers - db_pokemon_identifiers
removed_pokemon = db_pokemon_identifiers - sumo_pokemon_identifiers
if added_pokemon:
print(f"- NOTE: new forms {added_pokemon}")
if removed_pokemon:
print(f"- NOTE: removed forms?? {removed_pokemon}")
else:
# Flavor means there's only one Pokemon, and it has one PokemonForm per form
if len(db_species.pokemon) > 1:
print(f"- WARNING: expected only one Pokémon but got {db_species.pokemon}")
default_count = 0
form_identifiers = set()
for form in db_species.forms:
form_identifiers.add(form.form_identifier)
if form.is_default:
default_count += 1
if default_count != 1:
print(f"- WARNING: expected exactly one default but found {default_count}")
for sumo_form_identifier in sumo_form_identifiers:
if sumo_form_identifier in form_identifiers:
form_identifiers.discard(sumo_form_identifier)
else:
print(f"- NOTE: new form {sumo_form_identifier}")
if form_identifiers:
print(f"- NOTE: SUMO is missing forms {', '.join(sorted(ident or 'None' for ident in form_identifiers))} ({sumo_form_identifiers})")
else:
print(f"- NOTE: new {'concrete' if is_concrete else 'flavor'} form")
print(" ", is_concrete, "|", sumo_pokemons[0].form_appearances)
print(" ", [sp.identifier for sp in sumo_pokemons])
# NOTE: this is a terrible way to store it in the yaml, and also it's
# inaccurate for gen 7 i think? and why do i use -1 for genderless instead
# of null lol
if sumo_pokemons[0].gender_rate == 255:
gender_rate = -1
else:
# 31 -> 1, etc, up to 254 -> 8
gender_rate = (sumo_pokemons[0].gender_rate + 2) // 32
# A Pokémon is a baby if it's the earliest evolution, it cannot breed, and
# it evolves into something that can breed
is_baby = False
sumo_identifier = sumo_pokemons[0].identifier
sumo_family = sumo_families[sumo_identifier]
is_baby = (
sumo_family['base_pokemon'] == sumo_identifier and
sumo_pokemons[0].egg_groups == ['eg.no-eggs'] and
any(pokemon[identifier].egg_groups != ['eg.no-eggs']
for identifier in sumo_family['members'])
)
# If there's no evolution chain yet, make one
# NOTE: i don't have the baby trigger items, because they don't seem to be
# data; they're in code and i've yet to find them
db_chain = sumo_family['db_chain']
if not db_chain:
db_chain = t.EvolutionChain()
session.add(db_chain)
sumo_family['db_chain'] = db_chain
db_species = db_pokemon_specieses[species_identifier] = cheap_upsert(
db_species,
t.PokemonSpecies,
dict(
generation_id=7,
# Avoids database fetches on new rows
evolutions=[],
egg_groups=[],
names=[],
# Doesn't apply to Pokémon not in FRLG
habitat_id=None,
# Doesn't apply to Pokémon not in Conquest
conquest_order=None,
# Needs to be populated manually
# FIXME should i get this by checking for different sprites...? i
# don't think that would quite catch everything
has_gender_differences=False,
# Needs to be populated manually
forms_switchable=False,
# Easier to populate with a separate script after the fact
order=0,
),
id=sumo_pokemons[0].game_index,
identifier=species_identifier,
parent_species=db_pokemon_specieses[sumo_evolves_from[species_identifier]] if species_identifier in sumo_evolves_from else None,
evolution_chain=db_chain,
# NOTE: color is actually per-concrete
color=db_colors[sumo_pokemons[0].color.rpartition('.')[2]],
# NOTE: shape is actually per-flavor
shape=db_shapes[sumo_pokemons[0].shape.rpartition('.')[2]],
gender_rate=gender_rate,
# NOTE: capture rate is actually per-concrete
capture_rate=sumo_pokemons[0].capture_rate,
base_happiness=sumo_pokemons[0].base_happiness,
is_baby=is_baby,
# NOTE: this is nonsense for pokémon that can't be in eggs (which is
# not a thing i'm sure i have tracked atm, since i don't directly dump
# the egg data)
hatch_counter=sumo_pokemons[0].hatch_counter,
# NOTE: actually per concrete even though that doesn't entirely make sense haha
growth_rate=db_growth_rates[sumo_pokemons[0].growth_rate.rpartition('.')[2]],
)
# NOTE names are given per concrete form but are really truly a species thing
# FIXME i am not sure doing both of these at the same time actually works
update_names(sumo_pokemons[0].name, db_species.name_map)
update_names(sumo_pokemons[0].genus, db_species.genus_map)
# Flavor text is per-version (group) and thus always new
# FIXME this is wrong; flavor text is per form!
# FIXME not idempotent
# FIXME get for sun as well
"""
for lang, flavor_text in sumo_pokemons[0].flavor_text.items():
if flavor_text:
session.add(t.PokemonSpeciesFlavorText(
species_id=db_species.id,
version=db_moon,
language=db_languages[lang],
flavor_text=flavor_text,
))
"""
# FIXME i fucked something up! new pokemon's forms ended up in the
# stratosphere and also not marked as defaults. had to do:
# update pokemon_forms set id = pokemon_id, is_default = true where form_order = 1 and id > 10000 and pokemon_id between 720 and 9999;
sumo_db_pokemon_pairs = []
sumo_db_pokemon_form_pairs = []
if species_identifier == 'floette':
# This is a fucking mess; there are two concrete Pokémon, and one of
# them has multiple flavor forms, so, goddamn. Let's just assume
# Sun/Moon didn't change anything, I guess.
# TODO fix this? requires making a tree of concrete -> flavor and
# consolidating the below branches
for sumo_pokemon in sumo_pokemons:
if sumo_pokemon.identifier == 'floette-red':
sumo_db_pokemon_pairs.append((sumo_pokemon, db_pokemons['floette']))
elif sumo_pokemon.identifier == 'floette-eternal':
sumo_db_pokemon_pairs.append((sumo_pokemon, db_pokemons['floette-eternal']))
elif is_concrete:
# Concrete: multiple yaml records, each is a Pokemon row with one PokemonForm
for form_order, (sumo_pokemon, sumo_form_identifier) in enumerate(zip(sumo_pokemons, sumo_form_identifiers), start=1):
if sumo_pokemon.identifier in db_pokemons:
id = db_pokemons[sumo_pokemon.identifier].id
else:
max_pokemon_id += 1
id = max_pokemon_id
db_pokemon = cheap_upsert(
db_pokemons.get(sumo_pokemon.identifier),
t.Pokemon,
dict(
# Avoids database fetches on new rows
types=[],
pokemon_abilities=[],
items=[],
names=[],
stats=[],
# Easier to populate manually
order=0,
),
id=id,
identifier=sumo_pokemon.identifier,
species=db_species,
# TODO the units in the yaml don't match my goofy plan from rby
# (which i'm not 100% on anyway)
height=sumo_pokemon.height // 10,
weight=sumo_pokemon.weight,
base_experience=sumo_pokemon.base_experience,
# NOTE: this is less about a real sense of default-ness and
# more about "what form should veekun default to when looking
# at this species" (which doesn't belong in the data tbh)
is_default=form_order == 1,
)
db_pokemons[sumo_pokemon.identifier] = db_pokemon
sumo_db_pokemon_pairs.append((sumo_pokemon, db_pokemon))
db_form = next(iter(db_pokemons[sumo_pokemon.identifier].forms), None)
if db_form:
id = db_form.id
else:
max_pokemon_form_id += 1
id = max_pokemon_form_id
db_form = cheap_upsert(
db_form,
t.PokemonForm,
dict(
version_group=db_sumo_version_group,
# Easier to do separately
order=0,
# Needs doing manually
is_battle_only=False,
),
id=id,
identifier=sumo_pokemon.identifier,
form_identifier=sumo_form_identifier,
pokemon=db_pokemons[sumo_pokemon.identifier],
is_default=True,
is_mega=bool(sumo_form_identifier and sumo_form_identifier.startswith('mega')),
form_order=form_order,
)
# NOTE the db also has a "pokemon_name" field, e.g. "Sky Shaymin",
# but i don't think that's official? ok well it's marked as
# official but show me where the games say that
update_names(sumo_pokemon.form_name, db_form.form_name_map)
else:
# Flavor: one yaml record, one Pokemon, multiple PokemonForms
# TODO i think there are names for flavor form but the yaml has nowhere to store them at the moment
sumo_pokemon = sumo_pokemons[0]
db_pokemon = cheap_upsert(
next(iter(db_species.pokemon), None),
t.Pokemon,
dict(
types=[],
pokemon_abilities=[],
items=[],
names=[],
stats=[],
order=0,
),
id=sumo_pokemons[0].game_index,
identifier=species_identifier,
species=db_species,
# TODO the units in the yaml don't match my goofy plan from rby
# (which i'm not 100% on anyway)
height=sumo_pokemon.height // 10,
weight=sumo_pokemon.weight,
base_experience=sumo_pokemon.base_experience,
is_default=True,
)
sumo_db_pokemon_pairs.append((sumo_pokemon, db_pokemon))
for form_order, form_identifier in enumerate(sumo_form_identifiers, start=1):
full_form_identifier = species_identifier + ('-' + form_identifier if form_identifier else '')
if full_form_identifier in db_pokemon_forms:
id = db_pokemon_forms[full_form_identifier].id
else:
max_pokemon_form_id += 1
id = max_pokemon_form_id
cheap_upsert(
db_pokemon_forms.get(full_form_identifier),
t.PokemonForm,
dict(
version_group=db_sumo_version_group,
order=0,
# Needs doing manually
is_battle_only=False,
),
id=id,
identifier=full_form_identifier,
form_identifier=form_identifier,
pokemon=db_pokemon,
is_default=id < 10000,
is_mega=bool(form_identifier and form_identifier.startswith('mega')),
# FIXME this is wrong if there are existing forms that disappeared in sumo
form_order=form_order,
)
# FIXME: lack of 'unknown' kinda throws things off for arceus
session.flush()
# Egg groups
old_egg_groups = frozenset(db_species.egg_groups)
new_egg_groups = frozenset(db_egg_groups[ident.rpartition('.')[2]] for ident in sumo_pokemons[0].egg_groups)
for new_egg_group in new_egg_groups - old_egg_groups:
print(f"- adding egg group {new_egg_group}")
db_species.egg_groups.append(new_egg_group)
for old_egg_group in old_egg_groups - new_egg_groups:
print(f"- removing egg group {old_egg_group}")
db_species.egg_groups.remove(old_egg_group)
# Do stuff that's per concrete Pokémon in the db
for sumo_pokemon, db_pokemon in sumo_db_pokemon_pairs:
# Types
for i, (type_ident, db_type) in enumerate(itertools.zip_longest(sumo_pokemon.types, db_pokemon.types)):
slot = i + 1
_, _, veekun_ident = type_ident.rpartition('.')
if not db_type:
db_type = db_types[veekun_ident]
print(f"- adding type {db_type}")
session.add(t.PokemonType(
pokemon_id=db_pokemon.id,
type_id=db_type.id,
slot=i + 1,
))
elif not type_ident:
print(f"- WARNING: seem to have LOST type {db_type}, this is not supported")
elif db_type.identifier == veekun_ident:
pass
else:
print(f"- WARNING: type {db_type} has CHANGED TO {type_ident}, this is not supported")
# Stats
seen_stats = set()
for existing_stat in db_pokemon.stats:
stat_identifier = existing_stat.stat.identifier
seen_stats.add(stat_identifier)
cheap_upsert(
existing_stat,
t.Stat,
dict(),
base_stat=sumo_pokemon.base_stats[stat_identifier],
effort=sumo_pokemon.effort[stat_identifier],
)
for stat_identifier, stat in db_stats.items():
if stat.is_battle_only:
continue
if stat_identifier in seen_stats:
continue
db_pokemon.stats.append(t.PokemonStat(
stat=stat,
base_stat=sumo_pokemon.base_stats[stat_identifier],
effort=sumo_pokemon.effort[stat_identifier],
))
# Abilities
old_ability_slots = {row.slot: row for row in db_pokemon.pokemon_abilities}
new_ability_slots = {i + 1: ability_ident for (i, ability_ident) in enumerate(sumo_pokemon.abilities)}
if new_ability_slots.get(2) == new_ability_slots[1]:
del new_ability_slots[2]
if new_ability_slots.get(3) == new_ability_slots[1]:
del new_ability_slots[3]
for slot in old_ability_slots.keys() | new_ability_slots.keys():
old_ability_row = old_ability_slots.get(slot)
new_ability_ident = new_ability_slots.get(slot)
if not old_ability_row:
_, _, veekun_ident = new_ability_ident.rpartition('.')
db_ability = db_abilities[veekun_ident]
print(f"- adding ability {db_ability}")
session.add(t.PokemonAbility(
pokemon_id=db_pokemon.id,
ability_id=db_ability.id,
slot=slot,
is_hidden=(slot == 3),
))
elif not new_ability_ident:
print(f"- WARNING: seem to have LOST ability {old_ability_row.ability}, this is not supported")
elif old_ability_row.ability.identifier == new_ability_ident.rpartition('.')[2]:
pass
else:
_, _, veekun_ident = new_ability_ident.rpartition('.')
db_ability = db_abilities[veekun_ident]
print(f"- changing ability in slot {slot} from {old_ability_row.ability} to {db_ability}")
old_ability_row.ability = db_ability
"""
# Items
# FIXME need items from the other game argh, they're per-version
# TODO not idempotent
for item_identifier, rarity in sumo_pokemon.held_items.items():
session.add(t.PokemonItem(
pokemon=db_pokemon,
version=db_moon,
item=db_items[item_identifier.rpartition('.')[2]],
rarity=rarity,
))
# Moves
# TODO not idempotent
for method_identifier, moves in sumo_pokemon.moves.items():
last_row = None
order = None
seen = set()
for move_identifier in moves:
if method_identifier == 'level-up':
# FIXME THIS SUX
((level, move_identifier),) = move_identifier.items()
else:
level = 0
if level and last_row and level == last_row.level:
if order is None:
last_row.order = 1
order = 2
else:
order += 1
else:
order = None
# TODO this is stupid but braviary learns superpower at level
# 1, twice, and I'm not really sure what to do about that; is
# it correct to remove from the data?
key = (move_identifier, level)
if key in seen:
continue
seen.add(key)
last_row = t.PokemonMove(
pokemon=db_pokemon,
version_group=db_sumo_version_group,
move=db_moves[move_identifier.rpartition('.')[2]],
method=db_move_methods[method_identifier],
level=level,
order=order,
)
session.add(last_row)
"""
# Do evolution after adding all the Pokémon, since Pokémon tend to evolve into
# later Pokémon that wouldn't have been inserted yet. It's also tricky, since
# there might be an existing matching record among several
for species_identifier, sumo_pokemons in sumo_pokemon_by_species.items():
for sumo_evolution in sumo_pokemons[0].evolutions:
# Evolutions are on the evolver in the yaml, but evolvee in the db
db_species = db_pokemon_specieses[pokemon[sumo_evolution['into']].form_base_species]
# NOTE: this does not seem to be in the data itself so i have to
# hardcode it here, argh
if 'traded-with' in sumo_evolution:
if species_identifier == 'karrablast':
traded_with = db_pokemon_specieses['shelmet']
elif species_identifier == 'shelmet':
traded_with = db_pokemon_specieses['karrablast']
else:
raise ValueError(f"Don't know who trade-evolves with {sumo_species_identifier}")
else:
traded_with = None
expected = dict(
evolved_species=db_species,
trigger=db_evo_triggers[sumo_evolution['trigger'].rpartition('.')[2]],
trigger_item=db_items[sumo_evolution['trigger-item'].rpartition('.')[2]] if 'trigger-item' in sumo_evolution else None,
minimum_level=sumo_evolution.get('minimum-level'),
gender=db_genders[sumo_evolution['gender']] if 'gender' in sumo_evolution else None,
# NOTE: this needs populating manually; it's not in the yaml either
location=None,
held_item=db_items[sumo_evolution['held-item'].rpartition('.')[2]] if 'held-item' in sumo_evolution else None,
time_of_day=sumo_evolution.get('time-of-day'),
known_move=db_moves[sumo_evolution['known-move'].rpartition('.')[2]] if 'known-move' in sumo_evolution else None,
known_move_type=db_types[sumo_evolution['known-move-type'].rpartition('.')[2]] if 'known-move-type' in sumo_evolution else None,
minimum_happiness=sumo_evolution.get('minimum-friendship'),
minimum_beauty=sumo_evolution.get('minimum-beauty'),
minimum_affection=sumo_evolution.get('minimum-affection'),
relative_physical_stats={'attack': -1, 'defense': 1, 'equal': 0, None: None}[sumo_evolution.get('higher-physical-stat')],
party_species=db_pokemon_specieses[sumo_evolution['party-member'].rpartition('.')[2]] if 'party-member' in sumo_evolution else None,
party_type=db_types[sumo_evolution['party-member-type'].rpartition('.')[2]] if 'party-member-type' in sumo_evolution else None,
trade_species=traded_with,
needs_overworld_rain=sumo_evolution.get('overworld-weather') == 'rain',
turn_upside_down=sumo_evolution.get('upside-down', False),
)
# FIXME need to finish... filling this out
for db_evolution in db_species.evolutions:
if all(v == getattr(db_evolution, k) for (k, v) in expected.items()):
break
else:
print(f"- adding new evolution for {species_identifier} -> {sumo_evolution['into']}")
session.add(t.PokemonEvolution(**expected))
session.flush()
#print("ROLLING BACK")
#session.rollback()
session.commit()
print()
print("done")