Split PokedexLookup(recreate=True) into its own method. #216

This commit is contained in:
Eevee 2010-05-12 22:36:12 -07:00
parent cb2bfa3032
commit 79df4768bf
3 changed files with 57 additions and 34 deletions

View file

@ -93,8 +93,10 @@ def get_lookup(options, session=None, recreate=False):
print "Opened lookup index {index_dir} (from {got_from})" \ print "Opened lookup index {index_dir} (from {got_from})" \
.format(index_dir=index_dir, got_from=got_from) .format(index_dir=index_dir, got_from=got_from)
lookup = pokedex.lookup.PokedexLookup(index_dir, session=session, lookup = pokedex.lookup.PokedexLookup(index_dir, session=session)
recreate=recreate)
if recreate:
lookup.rebuild_index()
return lookup return lookup

View file

@ -28,6 +28,24 @@ rx_is_number = re.compile('^\d+$')
LookupResult = namedtuple('LookupResult', LookupResult = namedtuple('LookupResult',
['object', 'indexed_name', 'name', 'language', 'iso3166', 'exact']) ['object', 'indexed_name', 'name', 'language', 'iso3166', 'exact'])
class UninitializedIndex(object):
class UninitializedIndexError(Exception):
pass
def __nonzero__(self):
"""Dummy object should identify itself as False."""
return False
def __bool__(self):
"""Python 3000 version of the above. Future-proofing rules!"""
return False
def __getattr__(self, *args, **kwargs):
raise self.UninitializedIndexError(
"The lookup index does not exist. Please use `pokedex setup` "
"or lookup.rebuild_index() to create it."
)
class LanguageWeighting(whoosh.scoring.Weighting): class LanguageWeighting(whoosh.scoring.Weighting):
"""A scoring class that forces otherwise-equal English results to come """A scoring class that forces otherwise-equal English results to come
before foreign results. before foreign results.
@ -67,7 +85,7 @@ class PokedexLookup(object):
) )
def __init__(self, directory=None, session=None, recreate=False): def __init__(self, directory=None, session=None):
"""Opens the whoosh index stored in the named directory. If the index """Opens the whoosh index stored in the named directory. If the index
doesn't already exist, it will be created. doesn't already exist, it will be created.
@ -76,13 +94,9 @@ class PokedexLookup(object):
`pokedex` egg directory. `pokedex` egg directory.
`session` `session`
If the index needs to be created, this database session will be Used for creating the index and retrieving objects. Defaults to an
used. Defaults to an attempt to connect to the default SQLite attempt to connect to the default SQLite database installed by
database installed by `pokedex setup`. `pokedex setup`.
`recreate`
If set to True, the whoosh index will be created even if it already
exists.
""" """
# By the time this returns, self.index, self.speller, and self.session # By the time this returns, self.index, self.speller, and self.session
@ -92,6 +106,7 @@ class PokedexLookup(object):
if not directory: if not directory:
directory = pkg_resources.resource_filename('pokedex', directory = pkg_resources.resource_filename('pokedex',
'data/whoosh-index') 'data/whoosh-index')
self.directory = directory
if session: if session:
self.session = session self.session = session
@ -99,31 +114,33 @@ class PokedexLookup(object):
self.session = connect() self.session = connect()
# Attempt to open or create the index # Attempt to open or create the index
directory_exists = os.path.exists(directory) if not os.path.exists(directory) or not os.listdir(directory):
if directory_exists and not recreate: # Directory doesn't exist OR is empty; caller needs to use
# Already exists; should be an index! Bam, done. # rebuild_index before doing anything. Provide a dummy object that
try: # complains when used
self.index = whoosh.index.open_dir(directory, indexname='MAIN') self.index = UninitializedIndex()
spell_store = whoosh.filedb.filestore.FileStorage(directory) self.speller = UninitializedIndex()
self.speller = whoosh.spelling.SpellChecker(spell_store) return
return
except whoosh.index.EmptyIndexError as e:
# Apparently not a real index. Fall out and create it
pass
# Delete and start over if we're going to bail anyway. # Otherwise, already exists; should be an index! Bam, done.
if directory_exists and recreate: # Note that this will explode if the directory exists but doesn't
# Be safe and only delete if it looks like a whoosh index, i.e., # contain an index; that's a feature
# everything starts with _ try:
if all(f[0] == '_' for f in os.listdir(directory)): self.index = whoosh.index.open_dir(directory, indexname='MAIN')
shutil.rmtree(directory) except whoosh.index.EmptyIndexError:
directory_exists = False raise IOError(
"The index directory already contains files. "
"Please use a dedicated directory for the lookup index."
)
if not directory_exists: # Create speller, and done
os.mkdir(directory) spell_store = whoosh.filedb.filestore.FileStorage(directory)
self.speller = whoosh.spelling.SpellChecker(spell_store)
### Create index def rebuild_index(self):
"""Creates the index from scratch."""
schema = whoosh.fields.Schema( schema = whoosh.fields.Schema(
name=whoosh.fields.ID(stored=True), name=whoosh.fields.ID(stored=True),
table=whoosh.fields.ID(stored=True), table=whoosh.fields.ID(stored=True),
@ -133,8 +150,11 @@ class PokedexLookup(object):
display_name=whoosh.fields.STORED, # non-lowercased name display_name=whoosh.fields.STORED, # non-lowercased name
) )
self.index = whoosh.index.create_in(directory, schema=schema, if not os.path.exists(self.directory):
indexname='MAIN') os.mkdir(self.directory)
self.index = whoosh.index.create_in(self.directory, schema=schema,
indexname='MAIN')
writer = self.index.writer() writer = self.index.writer()
# Index every name in all our tables of interest # Index every name in all our tables of interest

View file

@ -9,7 +9,8 @@ lookup = None
def setup(): def setup():
# Recreate data # Recreate data
global lookup global lookup
lookup = PokedexLookup(recreate=True) lookup = PokedexLookup()
lookup.rebuild_index()
def test_exact_lookup(): def test_exact_lookup():
tests = [ tests = [