mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Faster pokedex load
for PostgreSQL #526
Also added the -S (--safe) option, which disables the backend-specific optimizations. This gives over 3× speedup on my machine :)
This commit is contained in:
parent
4daa6ab0c3
commit
bb4861b8c6
2 changed files with 33 additions and 1 deletions
|
@ -122,6 +122,8 @@ def command_load(*args):
|
|||
parser = get_parser(verbose=True)
|
||||
parser.add_option('-d', '--directory', dest='directory', default=None)
|
||||
parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
|
||||
parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true',
|
||||
help="Do not use backend-specific optimalizations.")
|
||||
options, tables = parser.parse_args(list(args))
|
||||
|
||||
if not options.engine_uri:
|
||||
|
@ -138,7 +140,7 @@ def command_load(*args):
|
|||
drop_tables=options.drop_tables,
|
||||
tables=tables,
|
||||
verbose=options.verbose,
|
||||
safe=False)
|
||||
safe=options.safe)
|
||||
|
||||
def command_reindex(*args):
|
||||
parser = get_parser(verbose=True)
|
||||
|
|
|
@ -168,6 +168,36 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
|
|||
reader = csv.reader(csvfile, lineterminator='\n')
|
||||
column_names = [unicode(column) for column in reader.next()]
|
||||
|
||||
if not safe and session.connection().dialect.name == 'postgresql':
|
||||
"""
|
||||
Postgres' CSV dialect is nearly the same as ours, except that it
|
||||
treats completely empty values as NULL, and empty quoted
|
||||
strings ("") as an empty strings.
|
||||
Pokedex dump does not quote empty strings. So, both empty strings
|
||||
and NULLs are read in as NULL.
|
||||
For an empty string in a NOT NULL column, the load will fail, and
|
||||
load will fall back to the cross-backend row-by-row loading. And in
|
||||
nullable columns, we already load empty stings as NULL.
|
||||
"""
|
||||
session.commit()
|
||||
not_null_cols = [c for c in column_names if not table_obj.c[c].nullable]
|
||||
if not_null_cols:
|
||||
force_not_null = 'FORCE NOT NULL ' + ','.join('"%s"' % c for c in not_null_cols)
|
||||
else:
|
||||
force_not_null = ''
|
||||
command = "COPY {table_name} ({columns}) FROM '{csvpath}' CSV HEADER {force_not_null}"
|
||||
session.connection().execute(
|
||||
command.format(
|
||||
table_name=table_name,
|
||||
csvpath=csvpath,
|
||||
columns=','.join('"%s"' % c for c in column_names),
|
||||
force_not_null=force_not_null,
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
print_done()
|
||||
continue
|
||||
|
||||
# Self-referential tables may contain rows with foreign keys of other
|
||||
# rows in the same table that do not yet exist. Pull these out and add
|
||||
# them to the session last
|
||||
|
|
Loading…
Reference in a new issue