Split pokemon_moves into three tables.

The idea here is to split each pokemon's moveset into sub-movesets which can be
re-used across versions.

Thanks to some mapper magic, this should be more or less transparent to users
of pokedex. Although i'm sure inserting new moves will be a pain.

Before:

pokemon_moves: 312,305 rows

After:

pokemon_movesets: 11,268 rows
pokemon_moveset_moves: 123,645 rows
pokemon_moveset_version_groups: 23,597 rows
total: 158,510 rows
This commit is contained in:
Andrew Ekstedt 2015-05-25 18:08:42 -07:00
parent 68b83c0ec1
commit 3ace3d8aa3
5 changed files with 158688 additions and 2 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -33,7 +33,7 @@ from sqlalchemy import Column, ForeignKey, MetaData, PrimaryKeyConstraint, Table
from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta
from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import backref, relationship from sqlalchemy.orm import backref, relationship, column_property
from sqlalchemy.orm.session import Session from sqlalchemy.orm.session import Session
from sqlalchemy.orm.interfaces import AttributeExtension from sqlalchemy.orm.interfaces import AttributeExtension
from sqlalchemy.sql import and_, or_ from sqlalchemy.sql import and_, or_
@ -1898,7 +1898,7 @@ class PokemonItem(TableBase):
rarity = Column(Integer, nullable=False, rarity = Column(Integer, nullable=False,
doc=u"Chance of the Pokémon holding the item, in percent") doc=u"Chance of the Pokémon holding the item, in percent")
class PokemonMove(TableBase): class OldPokemonMove(TableBase):
u"""Record of a move a Pokémon can learn.""" u"""Record of a move a Pokémon can learn."""
__tablename__ = 'pokemon_moves' __tablename__ = 'pokemon_moves'
pokemon_id = Column(Integer, ForeignKey('pokemon.id'), nullable=False, index=True, pokemon_id = Column(Integer, ForeignKey('pokemon.id'), nullable=False, index=True,
@ -1919,6 +1919,27 @@ class PokemonMove(TableBase):
{}, {},
) )
pokemon_moveset_table = Table('pokemon_movesets', TableBase.metadata,
Column('id', Integer, primary_key=True, autoincrement=True),
#Column('hint', Unicode(79), unique=True, index=True),
Column('pokemon_id', Integer, ForeignKey('pokemon.id'), nullable=False),
Column('method_id', Integer, ForeignKey('pokemon_move_methods.id'), nullable=False, index=True),
)
pokemon_moveset_moves_table = Table('pokemon_moveset_moves', TableBase.metadata,
Column('moveset_id', Integer, ForeignKey('pokemon_movesets.id'), nullable=False),
Column('move_id', Integer, ForeignKey('moves.id'), nullable=False),
Column('level', Integer, nullable=True, autoincrement=False),
Column('order', Integer, nullable=True),
PrimaryKeyConstraint('moveset_id', 'move_id', 'level'),
)
pokemon_moveset_version_groups_table = Table('pokemon_moveset_version_groups', TableBase.metadata,
Column('moveset_id', Integer, ForeignKey('pokemon_movesets.id'), nullable=False),
Column('version_group_id', Integer, ForeignKey('version_groups.id'), nullable=False),
PrimaryKeyConstraint('moveset_id', 'version_group_id'),
)
class PokemonMoveMethod(TableBase): class PokemonMoveMethod(TableBase):
u"""A method a move can be learned by, such as "Level up" or "Tutor". """ u"""A method a move can be learned by, such as "Level up" or "Tutor". """
__tablename__ = 'pokemon_move_methods' __tablename__ = 'pokemon_move_methods'
@ -2217,6 +2238,12 @@ class VersionGroupRegion(TableBase):
region_id = Column(Integer, ForeignKey('regions.id'), primary_key=True, nullable=False, region_id = Column(Integer, ForeignKey('regions.id'), primary_key=True, nullable=False,
doc=u"The ID of the region.") doc=u"The ID of the region.")
class PokemonMove(TableBase):
__table__ = pokemon_moveset_table.join(pokemon_moveset_moves_table).join(pokemon_moveset_version_groups_table)
moveset_id = column_property(pokemon_moveset_table.c.id, pokemon_moveset_moves_table.c.moveset_id, pokemon_moveset_version_groups_table.c.moveset_id)
pokemon_move_method_id = column_property(pokemon_moveset_table.c.method_id)
### Relationships down here, to avoid dependency ordering problems ### Relationships down here, to avoid dependency ordering problems

146
scripts/split-movesets.py Normal file
View file

@ -0,0 +1,146 @@
# coding: utf-8
import itertools
from operator import itemgetter
from collections import OrderedDict
import pokedex.db
import sqlalchemy
from sqlalchemy import (Column, Integer, ForeignKey, Table, Unicode)
from sqlalchemy import ForeignKeyConstraint, PrimaryKeyConstraint, UniqueConstraint
meta = sqlalchemy.MetaData()
Base = sqlalchemy.ext.declarative.declarative_base(metadata=meta)
class OldPokemonMove(Base):
u"""Record of a move a Pokémon can learn."""
__tablename__ = 'pokemon_moves'
pokemon_id = Column(Integer, ForeignKey('pokemon.id'), nullable=False, index=True,
doc=u"ID of the Pokémon")
version_group_id = Column(Integer, ForeignKey('version_groups.id'), nullable=False, index=True,
doc=u"ID of the version group this applies to")
move_id = Column(Integer, ForeignKey('moves.id'), nullable=False, index=True,
doc=u"ID of the move")
pokemon_move_method_id = Column(Integer, ForeignKey('pokemon_move_methods.id'), nullable=False, index=True,
doc=u"ID of the method this move is learned by")
level = Column(Integer, nullable=True, index=True, autoincrement=False,
doc=u"Level the move is learned at, if applicable")
order = Column(Integer, nullable=True,
doc=u"The order which moves learned at the same level are learned in")
__table_args__ = (
PrimaryKeyConstraint('pokemon_id', 'version_group_id', 'move_id', 'pokemon_move_method_id', 'level'),
)
# The pokemon move table has much redundancy across versions, but also many small variations.
# The general strategy here is to break movesets into chunks, such that a moveset is the union
# of a number of an arbitrary number of chunks, and those chunks can be shared across versions.
pokemon_moveset_table = Table('pokemon_movesets', Base.metadata,
Column('id', Integer, primary_key=True, autoincrement=True),
#Column('hint', Unicode(79), unique=True, index=True),
Column('pokemon_id', Integer, ForeignKey('pokemon.id')),
Column('method_id', Integer, ForeignKey('pokemon_move_methods.id'), nullable=False, index=True),
)
pokemon_moveset_moves_table = Table('pokemon_moveset_moves', Base.metadata,
Column('moveset_id', Integer),
Column('move_id', Integer, ForeignKey('moves.id'), nullable=False, index=True),
Column('level', Integer, nullable=True, index=True, autoincrement=False),
Column('order', Integer, nullable=True, index=False),
ForeignKeyConstraint(['moveset_id'], ['pokemon_movesets.id']),
ForeignKeyConstraint(['move_id'], ['moves.id']),
PrimaryKeyConstraint('moveset_id', 'move_id', 'level'),
)
pokemon_moveset_version_groups_table = Table('pokemon_moveset_version_groups', Base.metadata,
Column('moveset_id', Integer),
Column('version_group_id', Integer, ForeignKey('version_groups.id'), nullable=False, index=True),
ForeignKeyConstraint(['moveset_id'], ['pokemon_movesets.id']),
PrimaryKeyConstraint('moveset_id', 'version_group_id'),
)
def upgrade():
engine = sqlalchemy.create_engine('postgresql:///pokedex')
meta.reflect(bind=engine)
pokemon_moveset_version_groups_table.drop(engine, checkfirst=True)
pokemon_moveset_moves_table.drop(engine, checkfirst=True)
pokemon_moveset_table.drop(engine, checkfirst=True)
pokemon_moveset_table.create(engine, checkfirst=True)
pokemon_moveset_moves_table.create(engine, checkfirst=True)
pokemon_moveset_version_groups_table.create(engine, checkfirst=True)
old_table = OldPokemonMove.__table__
pokemon_table = meta.tables['pokemon']
method_table = meta.tables['pokemon_move_methods']
version_group_table = meta.tables['version_groups']
generation_table = meta.tables['generations']
print "all systems go"
conn = engine.connect()
for pokemon_id, in conn.execute(sqlalchemy.select([pokemon_table.c.id])):
moves = conn.execute(
sqlalchemy.select([
old_table.c.pokemon_move_method_id,
version_group_table.c.generation_id,
old_table.c.version_group_id,
old_table.c.move_id,
old_table.c.level,
old_table.c.order,
])
.where(old_table.c.version_group_id == version_group_table.c.id)
.where(old_table.c.pokemon_id == pokemon_id)
.order_by("pokemon_move_method_id", "generation_id", version_group_table.c.order)
).fetchall()
print pokemon_id
for method_id, group in itertools.groupby(moves, itemgetter(0)):
if method_id == 4:
# TMs stayed more or less same across Gens 3&4 and 5&6
def grouper(x):
return [0, 1, 2, 3, 3, 5, 5][x[1]]
#elif method_id == 2:
# Egg moves stick around forever
#grouper = lambda _: None
else:
grouper = itemgetter(1)
for _, group in itertools.groupby(group, grouper):
movesets = OrderedDict()
for version_group_id, group in itertools.groupby(group, itemgetter(2)):
movesets[version_group_id] = set(map(itemgetter(3, 4, 5), group))
common = reduce(set.intersection, movesets.values())
if common:
create_moveset(conn, pokemon_id, method_id, common, movesets.keys())
deltas = []
for version_group_id in movesets:
moveset = movesets[version_group_id] - common
delta_ids = []
for moveset_id, delta in reversed(deltas):
if delta <= moveset:
delta_ids.append(moveset_id)
moveset -= delta
if delta_ids:
add_to_movesets(conn, pokemon_id, method_id, delta_ids, version_group_id)
if moveset:
moveset_id = create_moveset(conn, pokemon_id, method_id, moveset, [version_group_id])
deltas.append((moveset_id, moveset))
def create_moveset(conn, pokemon_id, method_id, moves, versions):
result = conn.execute(pokemon_moveset_table.insert(), pokemon_id=pokemon_id, method_id=method_id)
moveset_id, = result.inserted_primary_key
conn.execute(pokemon_moveset_moves_table.insert()
.values(moveset_id=moveset_id),
[dict(move_id=move_id, level=level, order=order) for move_id, level, order in moves])
conn.execute(pokemon_moveset_version_groups_table.insert()
.values(moveset_id=moveset_id),
[dict(version_group_id=version_group_id) for version_group_id in versions])
return moveset_id
def add_to_movesets(conn, pokemon_id, method_id, moveset_ids, version_group_id):
conn.execute(pokemon_moveset_version_groups_table.insert()
.values(version_group_id=version_group_id),
[dict(moveset_id=moveset_id) for moveset_id in moveset_ids])
if __name__ == '__main__':
upgrade()