mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Faster pokedex load
for PostgreSQL #526
Also added the -S (--safe) option, which disables the backend-specific optimizations. This gives over 3× speedup on my machine :)
This commit is contained in:
parent
4daa6ab0c3
commit
bb4861b8c6
2 changed files with 33 additions and 1 deletions
|
@ -122,6 +122,8 @@ def command_load(*args):
|
||||||
parser = get_parser(verbose=True)
|
parser = get_parser(verbose=True)
|
||||||
parser.add_option('-d', '--directory', dest='directory', default=None)
|
parser.add_option('-d', '--directory', dest='directory', default=None)
|
||||||
parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
|
parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
|
||||||
|
parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true',
|
||||||
|
help="Do not use backend-specific optimalizations.")
|
||||||
options, tables = parser.parse_args(list(args))
|
options, tables = parser.parse_args(list(args))
|
||||||
|
|
||||||
if not options.engine_uri:
|
if not options.engine_uri:
|
||||||
|
@ -138,7 +140,7 @@ def command_load(*args):
|
||||||
drop_tables=options.drop_tables,
|
drop_tables=options.drop_tables,
|
||||||
tables=tables,
|
tables=tables,
|
||||||
verbose=options.verbose,
|
verbose=options.verbose,
|
||||||
safe=False)
|
safe=options.safe)
|
||||||
|
|
||||||
def command_reindex(*args):
|
def command_reindex(*args):
|
||||||
parser = get_parser(verbose=True)
|
parser = get_parser(verbose=True)
|
||||||
|
|
|
@ -168,6 +168,36 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
|
||||||
reader = csv.reader(csvfile, lineterminator='\n')
|
reader = csv.reader(csvfile, lineterminator='\n')
|
||||||
column_names = [unicode(column) for column in reader.next()]
|
column_names = [unicode(column) for column in reader.next()]
|
||||||
|
|
||||||
|
if not safe and session.connection().dialect.name == 'postgresql':
|
||||||
|
"""
|
||||||
|
Postgres' CSV dialect is nearly the same as ours, except that it
|
||||||
|
treats completely empty values as NULL, and empty quoted
|
||||||
|
strings ("") as an empty strings.
|
||||||
|
Pokedex dump does not quote empty strings. So, both empty strings
|
||||||
|
and NULLs are read in as NULL.
|
||||||
|
For an empty string in a NOT NULL column, the load will fail, and
|
||||||
|
load will fall back to the cross-backend row-by-row loading. And in
|
||||||
|
nullable columns, we already load empty stings as NULL.
|
||||||
|
"""
|
||||||
|
session.commit()
|
||||||
|
not_null_cols = [c for c in column_names if not table_obj.c[c].nullable]
|
||||||
|
if not_null_cols:
|
||||||
|
force_not_null = 'FORCE NOT NULL ' + ','.join('"%s"' % c for c in not_null_cols)
|
||||||
|
else:
|
||||||
|
force_not_null = ''
|
||||||
|
command = "COPY {table_name} ({columns}) FROM '{csvpath}' CSV HEADER {force_not_null}"
|
||||||
|
session.connection().execute(
|
||||||
|
command.format(
|
||||||
|
table_name=table_name,
|
||||||
|
csvpath=csvpath,
|
||||||
|
columns=','.join('"%s"' % c for c in column_names),
|
||||||
|
force_not_null=force_not_null,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
session.commit()
|
||||||
|
print_done()
|
||||||
|
continue
|
||||||
|
|
||||||
# Self-referential tables may contain rows with foreign keys of other
|
# Self-referential tables may contain rows with foreign keys of other
|
||||||
# rows in the same table that do not yet exist. Pull these out and add
|
# rows in the same table that do not yet exist. Pull these out and add
|
||||||
# them to the session last
|
# them to the session last
|
||||||
|
|
Loading…
Reference in a new issue