mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Split PokedexLookup(recreate=True) into its own method. #216
This commit is contained in:
parent
cb2bfa3032
commit
79df4768bf
3 changed files with 57 additions and 34 deletions
|
@ -93,8 +93,10 @@ def get_lookup(options, session=None, recreate=False):
|
||||||
print "Opened lookup index {index_dir} (from {got_from})" \
|
print "Opened lookup index {index_dir} (from {got_from})" \
|
||||||
.format(index_dir=index_dir, got_from=got_from)
|
.format(index_dir=index_dir, got_from=got_from)
|
||||||
|
|
||||||
lookup = pokedex.lookup.PokedexLookup(index_dir, session=session,
|
lookup = pokedex.lookup.PokedexLookup(index_dir, session=session)
|
||||||
recreate=recreate)
|
|
||||||
|
if recreate:
|
||||||
|
lookup.rebuild_index()
|
||||||
|
|
||||||
return lookup
|
return lookup
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,24 @@ rx_is_number = re.compile('^\d+$')
|
||||||
LookupResult = namedtuple('LookupResult',
|
LookupResult = namedtuple('LookupResult',
|
||||||
['object', 'indexed_name', 'name', 'language', 'iso3166', 'exact'])
|
['object', 'indexed_name', 'name', 'language', 'iso3166', 'exact'])
|
||||||
|
|
||||||
|
class UninitializedIndex(object):
|
||||||
|
class UninitializedIndexError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __nonzero__(self):
|
||||||
|
"""Dummy object should identify itself as False."""
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
"""Python 3000 version of the above. Future-proofing rules!"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __getattr__(self, *args, **kwargs):
|
||||||
|
raise self.UninitializedIndexError(
|
||||||
|
"The lookup index does not exist. Please use `pokedex setup` "
|
||||||
|
"or lookup.rebuild_index() to create it."
|
||||||
|
)
|
||||||
|
|
||||||
class LanguageWeighting(whoosh.scoring.Weighting):
|
class LanguageWeighting(whoosh.scoring.Weighting):
|
||||||
"""A scoring class that forces otherwise-equal English results to come
|
"""A scoring class that forces otherwise-equal English results to come
|
||||||
before foreign results.
|
before foreign results.
|
||||||
|
@ -67,7 +85,7 @@ class PokedexLookup(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, directory=None, session=None, recreate=False):
|
def __init__(self, directory=None, session=None):
|
||||||
"""Opens the whoosh index stored in the named directory. If the index
|
"""Opens the whoosh index stored in the named directory. If the index
|
||||||
doesn't already exist, it will be created.
|
doesn't already exist, it will be created.
|
||||||
|
|
||||||
|
@ -76,13 +94,9 @@ class PokedexLookup(object):
|
||||||
`pokedex` egg directory.
|
`pokedex` egg directory.
|
||||||
|
|
||||||
`session`
|
`session`
|
||||||
If the index needs to be created, this database session will be
|
Used for creating the index and retrieving objects. Defaults to an
|
||||||
used. Defaults to an attempt to connect to the default SQLite
|
attempt to connect to the default SQLite database installed by
|
||||||
database installed by `pokedex setup`.
|
`pokedex setup`.
|
||||||
|
|
||||||
`recreate`
|
|
||||||
If set to True, the whoosh index will be created even if it already
|
|
||||||
exists.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# By the time this returns, self.index, self.speller, and self.session
|
# By the time this returns, self.index, self.speller, and self.session
|
||||||
|
@ -92,6 +106,7 @@ class PokedexLookup(object):
|
||||||
if not directory:
|
if not directory:
|
||||||
directory = pkg_resources.resource_filename('pokedex',
|
directory = pkg_resources.resource_filename('pokedex',
|
||||||
'data/whoosh-index')
|
'data/whoosh-index')
|
||||||
|
self.directory = directory
|
||||||
|
|
||||||
if session:
|
if session:
|
||||||
self.session = session
|
self.session = session
|
||||||
|
@ -99,31 +114,33 @@ class PokedexLookup(object):
|
||||||
self.session = connect()
|
self.session = connect()
|
||||||
|
|
||||||
# Attempt to open or create the index
|
# Attempt to open or create the index
|
||||||
directory_exists = os.path.exists(directory)
|
if not os.path.exists(directory) or not os.listdir(directory):
|
||||||
if directory_exists and not recreate:
|
# Directory doesn't exist OR is empty; caller needs to use
|
||||||
# Already exists; should be an index! Bam, done.
|
# rebuild_index before doing anything. Provide a dummy object that
|
||||||
try:
|
# complains when used
|
||||||
self.index = whoosh.index.open_dir(directory, indexname='MAIN')
|
self.index = UninitializedIndex()
|
||||||
spell_store = whoosh.filedb.filestore.FileStorage(directory)
|
self.speller = UninitializedIndex()
|
||||||
self.speller = whoosh.spelling.SpellChecker(spell_store)
|
return
|
||||||
return
|
|
||||||
except whoosh.index.EmptyIndexError as e:
|
|
||||||
# Apparently not a real index. Fall out and create it
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Delete and start over if we're going to bail anyway.
|
# Otherwise, already exists; should be an index! Bam, done.
|
||||||
if directory_exists and recreate:
|
# Note that this will explode if the directory exists but doesn't
|
||||||
# Be safe and only delete if it looks like a whoosh index, i.e.,
|
# contain an index; that's a feature
|
||||||
# everything starts with _
|
try:
|
||||||
if all(f[0] == '_' for f in os.listdir(directory)):
|
self.index = whoosh.index.open_dir(directory, indexname='MAIN')
|
||||||
shutil.rmtree(directory)
|
except whoosh.index.EmptyIndexError:
|
||||||
directory_exists = False
|
raise IOError(
|
||||||
|
"The index directory already contains files. "
|
||||||
|
"Please use a dedicated directory for the lookup index."
|
||||||
|
)
|
||||||
|
|
||||||
if not directory_exists:
|
# Create speller, and done
|
||||||
os.mkdir(directory)
|
spell_store = whoosh.filedb.filestore.FileStorage(directory)
|
||||||
|
self.speller = whoosh.spelling.SpellChecker(spell_store)
|
||||||
|
|
||||||
|
|
||||||
### Create index
|
def rebuild_index(self):
|
||||||
|
"""Creates the index from scratch."""
|
||||||
|
|
||||||
schema = whoosh.fields.Schema(
|
schema = whoosh.fields.Schema(
|
||||||
name=whoosh.fields.ID(stored=True),
|
name=whoosh.fields.ID(stored=True),
|
||||||
table=whoosh.fields.ID(stored=True),
|
table=whoosh.fields.ID(stored=True),
|
||||||
|
@ -133,8 +150,11 @@ class PokedexLookup(object):
|
||||||
display_name=whoosh.fields.STORED, # non-lowercased name
|
display_name=whoosh.fields.STORED, # non-lowercased name
|
||||||
)
|
)
|
||||||
|
|
||||||
self.index = whoosh.index.create_in(directory, schema=schema,
|
if not os.path.exists(self.directory):
|
||||||
indexname='MAIN')
|
os.mkdir(self.directory)
|
||||||
|
|
||||||
|
self.index = whoosh.index.create_in(self.directory, schema=schema,
|
||||||
|
indexname='MAIN')
|
||||||
writer = self.index.writer()
|
writer = self.index.writer()
|
||||||
|
|
||||||
# Index every name in all our tables of interest
|
# Index every name in all our tables of interest
|
||||||
|
|
|
@ -9,7 +9,8 @@ lookup = None
|
||||||
def setup():
|
def setup():
|
||||||
# Recreate data
|
# Recreate data
|
||||||
global lookup
|
global lookup
|
||||||
lookup = PokedexLookup(recreate=True)
|
lookup = PokedexLookup()
|
||||||
|
lookup.rebuild_index()
|
||||||
|
|
||||||
def test_exact_lookup():
|
def test_exact_lookup():
|
||||||
tests = [
|
tests = [
|
||||||
|
|
Loading…
Reference in a new issue