Overhauled CLI. #180

- Everything now accepts -i, -e, -q, and -v.

- Plumbing commands now announce what database/index they're using and
  where they got them from.

- New command status, which does nothing but still does the announcing.

- New command reindex, which recreates only the whoosh index.
This commit is contained in:
Eevee 2010-04-24 14:04:15 -07:00
parent 6de60fd30f
commit 2204b95585
2 changed files with 249 additions and 32 deletions

View file

@ -1,8 +1,10 @@
# encoding: utf8
from optparse import OptionParser
import os
import pkg_resources
import sys
from .db import connect, metadata
import pokedex.db
import pokedex.db.load
import pokedex.lookup
@ -26,39 +28,178 @@ def main():
command_help()
def command_dump(*args):
def get_parser(verbose=True):
"""Returns an OptionParser prepopulated with the global options.
`verbose` is whether or not the options should be verbose by default.
"""
parser = OptionParser()
parser.add_option('-e', '--engine', dest='engine_uri', default=None)
parser.add_option('-d', '--directory', dest='directory', default=None)
parser.add_option('-q', '--quiet', dest='verbose', default=True, action='store_false')
options, _ = parser.parse_args(list(args))
parser.add_option('-e', '--engine', dest='engine_uri', default=os.environ.get('POKEDEX_DB_ENGINE', None))
parser.add_option('-i', '--index', dest='index_dir', default=os.environ.get('POKEDEX_INDEX_DIR', None))
parser.add_option('-q', '--quiet', dest='verbose', default=verbose, action='store_false')
parser.add_option('-v', '--verbose', dest='verbose', default=verbose, action='store_true')
return parser
def get_session(options):
"""Given a parsed options object, connects to the database and returns a
session.
"""
engine_uri = options.engine_uri
got_from = None
if engine_uri:
got_from = 'command line'
else:
engine_uri = os.environ.get('POKEDEX_DB_ENGINE', None)
if engine_uri:
got_from = 'environment'
else:
got_from = 'default setting'
session = pokedex.db.connect(engine_uri)
if options.verbose:
print "Connected to database {engine} (from {got_from})" \
.format(engine=session.bind.url, got_from=got_from)
return session
def get_lookup(options, session=None, recreate=False):
"""Given a parsed options object, opens the whoosh index and returns a
PokedexLookup object.
Unlike `get_session`, this function can actually do population as a side
effect! This is fallout from how PokedexLookup works.
"""
# TODO fix the above
if recreate and not session:
raise ValueError("get_lookup() needs an explicit session to regen the index")
index_dir = options.index_dir
got_from = None
if index_dir:
got_from = 'command line'
else:
index_dir = os.environ.get('POKEDEX_INDEX_DIR', None)
if index_dir:
got_from = 'environment'
else:
index_dir = pkg_resources.resource_filename('pokedex',
'data/whoosh-index')
got_from = 'default setting'
if options.verbose:
print "Opened lookup index {index_dir} (from {got_from})" \
.format(index_dir=index_dir, got_from=got_from)
lookup = pokedex.lookup.PokedexLookup(index_dir, session=session,
recreate=recreate)
return lookup
def print_csv_directory(options):
"""Just prints the csv directory we're about to use."""
if not options.verbose:
return
if options.directory:
csvdir = options.directory
got_from = 'command line'
else:
# This is the same as the db.load default
csvdir = pkg_resources.resource_filename('pokedex', 'data/csv')
got_from = 'default setting'
print "Using CSV directory {csvdir} (from {got_from})" \
.format(csvdir=csvdir, got_from=got_from)
### Plumbing commands
def command_dump(*args):
parser = get_parser(verbose=True)
parser.add_option('-d', '--directory', dest='directory', default=None)
options, tables = parser.parse_args(list(args))
session = get_session(options)
print_csv_directory(options)
session = connect(options.engine_uri)
pokedex.db.load.dump(session, directory=options.directory,
tables=tables,
verbose=options.verbose)
def command_load(*args):
parser = OptionParser()
parser.add_option('-e', '--engine', dest='engine_uri', default=None)
parser = get_parser(verbose=True)
parser.add_option('-d', '--directory', dest='directory', default=None)
parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
parser.add_option('-q', '--quiet', dest='verbose', default=True, action='store_false')
options, _ = parser.parse_args(list(args))
options, tables = parser.parse_args(list(args))
session = connect(options.engine_uri)
if not options.engine_uri:
print "WARNING: You're reloading the default database, but not the lookup index. They"
print " might get out of sync, and pokedex commands may not work correctly!"
print "To fix this, run `pokedex reindex` when this command finishes. Or, just use"
print "`pokedex setup` to do both at once."
print
session = get_session(options)
print_csv_directory(options)
pokedex.db.load.load(session, directory=options.directory,
drop_tables=options.drop_tables,
tables=tables,
verbose=options.verbose)
def command_reindex(*args):
parser = get_parser(verbose=True)
options, _ = parser.parse_args(list(args))
session = get_session(options)
lookup = get_lookup(options, session=session, recreate=True)
print "Recreated lookup index."
def command_setup(*args):
session = connect()
pokedex.db.load.load(session, verbose=False, drop_tables=True)
pokedex.lookup.PokedexLookup(session=session, recreate=True)
parser = get_parser(verbose=False)
options, _ = parser.parse_args(list(args))
options.directory = None
session = get_session(options)
print_csv_directory(options)
pokedex.db.load.load(session, directory=None, drop_tables=True,
verbose=options.verbose)
lookup = get_lookup(options, session=session, recreate=True)
print "Recreated lookup index."
def command_lookup(name):
results = pokedex.lookup.PokedexLookup().lookup(name)
def command_status(*args):
parser = get_parser(verbose=True)
options, _ = parser.parse_args(list(args))
options.verbose = True
options.directory = None
session = get_session(options)
print_csv_directory(options)
lookup = get_lookup(options, recreate=False)
### User-facing commands
def command_lookup(*args):
parser = get_parser(verbose=False)
options, words = parser.parse_args(list(args))
name = u' '.join(words)
session = get_session(options)
lookup = get_lookup(options, session=session, recreate=False)
results = lookup.lookup(name)
if not results:
print "No matches."
elif results[0].exact:
@ -83,6 +224,7 @@ def command_help():
print u"""pokedex -- a command-line Pokédex interface
usage: pokedex {command} [options...]
Run `pokedex setup` first, or nothing will work!
See http://bugs.veekun.com/projects/pokedex/wiki/CLI for more documentation.
Commands:
help Displays this message.
@ -91,19 +233,33 @@ Commands:
System commands:
load Load Pokédex data into a database from CSV files.
dump Dump Pokédex data from a database into CSV files.
setup Loads Pokédex data into the right place and creates a
lookup index in the right place. No options or output.
This will blow away the default database and index!
reindex Rebuilds the lookup index from the database.
setup Combines load and reindex.
status No effect, but prints which engine, index, and csv
directory would be used for other commands.
Options:
Global options:
-e|--engine=URI By default, all commands try to use a SQLite database
in the pokedex install directory. Use this option (or
a POKEDEX_DB_ENGINE environment variable) to specify an
alternate database.
-i|--index=DIR By default, all commands try to put the lookup index in
the pokedex install directory. Use this option (or a
POKEDEX_INDEX_DIR environment variable) to specify an
alternate loction.
System options:
-d|--directory By default, load and dump will use the CSV files in the
pokedex install directory. Use this option to specify
a different directory.
-D|--drop-tables With load, drop all tables before loading data.
-e|--engine=URI By default, all commands try to use a SQLite database
in the pokedex install directory. Use this option to
specify an alternate database.
-q|--quiet Turn off any unnecessary status output from dump/load.
-q|--quiet Don't print system output. This is the default for
non-system commands and setup.
-v|--verbose Print system output. This is the default for system
commands, except setup.
Additionally, load and dump accept a list of table names (possibly with
wildcards) and/or csv fileames as an argument list.
""".encode(sys.getdefaultencoding(), 'replace')
sys.exit(0)

View file

@ -1,15 +1,49 @@
"""CSV to database or vice versa."""
import csv
import os.path
import pkg_resources
import re
import sys
from sqlalchemy.orm.attributes import instrumentation_registry
import sqlalchemy.sql.util
import sqlalchemy.types
from pokedex.db import metadata
import pokedex.db.tables as tables
def _wildcard_char_to_regex(char):
"""Converts a single wildcard character to the regex equivalent."""
if char == '?':
return '.?'
elif char == '*':
return '.*'
else:
return re.escape(char)
def _wildcard_glob_to_regex(glob):
"""Converts a single wildcard glob to a regex STRING."""
# If it looks like a filename, make it not one
if '.' in glob or '/' in glob:
_, filename = os.path.split(glob)
table_name, _ = os.path.splitext(filename)
glob = table_name
return u''.join(map(_wildcard_char_to_regex, glob))
def _wildcards_to_regex(strings):
"""Converts a list of wildcard globs to a single regex object."""
regex_parts = map(_wildcard_glob_to_regex, strings)
regex = '^(?:' + '|'.join(regex_parts) + ')$'
return re.compile(regex)
def _get_verbose_prints(verbose):
"""If `verbose` is true, returns two functions: one for printing a starting
message, and the other for printing a success or failure message when
@ -44,7 +78,7 @@ def _get_verbose_prints(verbose):
return dummy, dummy
def load(session, directory=None, drop_tables=False, verbose=False):
def load(session, tables=[], directory=None, drop_tables=False, verbose=False):
"""Load data from CSV files into the given database session.
Tables are created automatically.
@ -52,6 +86,9 @@ def load(session, directory=None, drop_tables=False, verbose=False):
`session`
SQLAlchemy session to use.
`tables`
List of tables to load. If omitted, all tables are loaded.
`directory`
Directory the CSV files reside in. Defaults to the `pokedex` data
directory.
@ -70,17 +107,29 @@ def load(session, directory=None, drop_tables=False, verbose=False):
if not directory:
directory = pkg_resources.resource_filename('pokedex', 'data/csv')
if tables:
regex = _wildcards_to_regex(tables)
table_names = filter(regex.match, metadata.tables.keys())
else:
table_names = metadata.tables.keys()
table_objs = [metadata.tables[name] for name in table_names]
table_objs = sqlalchemy.sql.util.sort_tables(table_objs)
# Drop all tables if requested
if drop_tables:
print_start('Dropping tables')
metadata.drop_all()
for table in reversed(table_objs):
table.drop(checkfirst=True)
print_done()
metadata.create_all()
for table in table_objs:
table.create()
connection = session.connection()
# Okay, run through the tables and actually load the data now
for table_obj in metadata.sorted_tables:
for table_obj in table_objs:
table_name = table_obj.name
insert_stmt = table_obj.insert()
@ -163,7 +212,7 @@ def load(session, directory=None, drop_tables=False, verbose=False):
# Remembering some zillion rows in the session consumes a lot of
# RAM. Let's not do that. Commit every 1000 rows
if len(new_rows) > 1000:
if len(new_rows) >= 1000:
insert_and_commit()
insert_and_commit()
@ -186,13 +235,16 @@ def load(session, directory=None, drop_tables=False, verbose=False):
def dump(session, directory=None, verbose=False):
def dump(session, tables=[], directory=None, verbose=False):
"""Dumps the contents of a database to a set of CSV files. Probably not
useful to anyone besides a developer.
`session`
SQLAlchemy session to use.
`tables`
List of tables to dump. If omitted, all tables are dumped.
`directory`
Directory the CSV files should be put in. Defaults to the `pokedex`
data directory.
@ -208,7 +260,16 @@ def dump(session, directory=None, verbose=False):
if not directory:
directory = pkg_resources.resource_filename('pokedex', 'data/csv')
for table_name in sorted(metadata.tables.keys()):
if tables:
regex = _wildcards_to_regex(tables)
table_names = filter(regex.match, metadata.tables.keys())
else:
table_names = metadata.tables.keys()
table_names.sort()
for table_name in table_names:
print_start(table_name)
table = metadata.tables[table_name]