mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
load: Add --recursive option.
Helps somewhat with #526 (`pokedex load` is slow) by making it easier to load only the tables you're interested in.
This commit is contained in:
parent
ff1c4b530d
commit
9340f24ae4
3 changed files with 71 additions and 3 deletions
54
pokedex/db/dependencies.py
Normal file
54
pokedex/db/dependencies.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
import sqlalchemy.sql.visitors as visitors
|
||||||
|
|
||||||
|
from pokedex.db.tables import metadata
|
||||||
|
|
||||||
|
# stolen from sqlalchemy.sql.util.sort_tables
|
||||||
|
def compute_dependencies(tables):
|
||||||
|
"""Construct a reverse dependency graph for the given tables.
|
||||||
|
|
||||||
|
Returns a dict which maps a table to the list of tables which depend on it.
|
||||||
|
"""
|
||||||
|
tables = list(tables)
|
||||||
|
graph = {}
|
||||||
|
def visit_foreign_key(fkey):
|
||||||
|
if fkey.use_alter:
|
||||||
|
return
|
||||||
|
parent_table = fkey.column.table
|
||||||
|
if parent_table in tables:
|
||||||
|
child_table = fkey.parent.table
|
||||||
|
if parent_table is not child_table:
|
||||||
|
graph.setdefault(parent_table, []).append(child_table)
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
visitors.traverse(table,
|
||||||
|
{'schema_visitor': True},
|
||||||
|
{'foreign_key': visit_foreign_key})
|
||||||
|
|
||||||
|
graph.setdefault(table, []).extend(table._extra_dependencies)
|
||||||
|
|
||||||
|
return graph
|
||||||
|
|
||||||
|
#: The dependency graph for pokedex.db.tables
|
||||||
|
_pokedex_graph = compute_dependencies(metadata.tables.values())
|
||||||
|
|
||||||
|
def find_dependent_tables(tables, graph=None):
|
||||||
|
"""Recursively find all tables which depend on the given tables.
|
||||||
|
|
||||||
|
The returned set does not include the original tables.
|
||||||
|
"""
|
||||||
|
if graph is None:
|
||||||
|
graph = _pokedex_graph
|
||||||
|
tables = list(tables)
|
||||||
|
dependents = set()
|
||||||
|
def add_dependents_of(table):
|
||||||
|
for dependent_table in graph.get(table, []):
|
||||||
|
if dependent_table not in dependents:
|
||||||
|
dependents.add(dependent_table)
|
||||||
|
add_dependents_of(dependent_table)
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
add_dependents_of(table)
|
||||||
|
|
||||||
|
dependents -= set(tables)
|
||||||
|
|
||||||
|
return dependents
|
|
@ -11,6 +11,7 @@ import sqlalchemy.types
|
||||||
from pokedex.db import metadata
|
from pokedex.db import metadata
|
||||||
import pokedex.db.tables as tables
|
import pokedex.db.tables as tables
|
||||||
from pokedex.defaults import get_default_csv_dir
|
from pokedex.defaults import get_default_csv_dir
|
||||||
|
from pokedex.db.dependencies import find_dependent_tables
|
||||||
|
|
||||||
|
|
||||||
def _get_table_names(metadata, patterns):
|
def _get_table_names(metadata, patterns):
|
||||||
|
@ -95,7 +96,7 @@ def _get_verbose_prints(verbose):
|
||||||
return print_start, print_status, print_done
|
return print_start, print_status, print_done
|
||||||
|
|
||||||
|
|
||||||
def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True):
|
def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True, recursive=False):
|
||||||
"""Load data from CSV files into the given database session.
|
"""Load data from CSV files into the given database session.
|
||||||
|
|
||||||
Tables are created automatically.
|
Tables are created automatically.
|
||||||
|
@ -119,6 +120,9 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
|
||||||
`safe`
|
`safe`
|
||||||
If set to False, load can be faster, but can corrupt the database if
|
If set to False, load can be faster, but can corrupt the database if
|
||||||
it crashes or is interrupted.
|
it crashes or is interrupted.
|
||||||
|
|
||||||
|
`recursive`
|
||||||
|
If set to True, load all dependent tables too.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# First take care of verbosity
|
# First take care of verbosity
|
||||||
|
@ -128,8 +132,13 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
|
||||||
if directory is None:
|
if directory is None:
|
||||||
directory = get_default_csv_dir()
|
directory = get_default_csv_dir()
|
||||||
|
|
||||||
|
# XXX why isn't this done in command_load
|
||||||
table_names = _get_table_names(metadata, tables)
|
table_names = _get_table_names(metadata, tables)
|
||||||
table_objs = [metadata.tables[name] for name in table_names]
|
table_objs = [metadata.tables[name] for name in table_names]
|
||||||
|
|
||||||
|
if recursive:
|
||||||
|
table_objs.extend(find_dependent_tables(table_objs))
|
||||||
|
|
||||||
table_objs = sqlalchemy.sql.util.sort_tables(table_objs)
|
table_objs = sqlalchemy.sql.util.sort_tables(table_objs)
|
||||||
|
|
||||||
# SQLite speed tweaks
|
# SQLite speed tweaks
|
||||||
|
|
|
@ -121,6 +121,7 @@ def command_load(*args):
|
||||||
parser = get_parser(verbose=True)
|
parser = get_parser(verbose=True)
|
||||||
parser.add_option('-d', '--directory', dest='directory', default=None)
|
parser.add_option('-d', '--directory', dest='directory', default=None)
|
||||||
parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
|
parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
|
||||||
|
parser.add_option('-r', '--recursive', dest='recursive', default=False, action='store_true')
|
||||||
parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true',
|
parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true',
|
||||||
help="Do not use backend-specific optimalizations.")
|
help="Do not use backend-specific optimalizations.")
|
||||||
options, tables = parser.parse_args(list(args))
|
options, tables = parser.parse_args(list(args))
|
||||||
|
@ -139,7 +140,8 @@ def command_load(*args):
|
||||||
drop_tables=options.drop_tables,
|
drop_tables=options.drop_tables,
|
||||||
tables=tables,
|
tables=tables,
|
||||||
verbose=options.verbose,
|
verbose=options.verbose,
|
||||||
safe=options.safe)
|
safe=options.safe,
|
||||||
|
recursive=options.recursive)
|
||||||
|
|
||||||
def command_reindex(*args):
|
def command_reindex(*args):
|
||||||
parser = get_parser(verbose=True)
|
parser = get_parser(verbose=True)
|
||||||
|
@ -277,7 +279,10 @@ System options:
|
||||||
-d|--directory=DIR By default, load and dump will use the CSV files in the
|
-d|--directory=DIR By default, load and dump will use the CSV files in the
|
||||||
pokedex install directory. Use this option to specify
|
pokedex install directory. Use this option to specify
|
||||||
a different directory.
|
a different directory.
|
||||||
-D|--drop-tables With load, drop all tables before loading data.
|
|
||||||
|
Load options:
|
||||||
|
-D|--drop-tables Drop all tables before loading data.
|
||||||
|
-r|--recursive Load (and drop) all dependent tables.
|
||||||
|
|
||||||
Additionally, load and dump accept a list of table names (possibly with
|
Additionally, load and dump accept a list of table names (possibly with
|
||||||
wildcards) and/or csv fileames as an argument list.
|
wildcards) and/or csv fileames as an argument list.
|
||||||
|
|
Loading…
Reference in a new issue