Wildcard lookup! #89

Test suite no longer reloads the entire database.  Takes too long.

Factored out some magic numbers in lookup().
This commit is contained in:
Eevee 2009-08-24 22:04:55 -07:00
parent 11989d0db2
commit 9e3d8b317d
3 changed files with 27 additions and 9 deletions

View file

@ -20,6 +20,9 @@ from pokedex.roomaji import romanize
__all__ = ['open_index', 'lookup'] __all__ = ['open_index', 'lookup']
INTERMEDIATE_LOOKUP_RESULTS = 25
MAX_LOOKUP_RESULTS = 10
# Dictionary of table name => table class. # Dictionary of table name => table class.
# Need the table name so we can get the class from the table name after we # Need the table name so we can get the class from the table name after we
# retrieve something from the index # retrieve something from the index
@ -242,10 +245,13 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False):
# provided # provided
valid_types = prefixes valid_types = prefixes
# If the input provided is a number, match it as an id. Otherwise, name. # Do different things depending what the query looks like
# Term objects do an exact match, so we don't have to worry about a query # Note: Term objects do an exact match, so we don't have to worry about a
# parser tripping on weird characters in the input # query parser tripping on weird characters in the input
if rx_is_number.match(name): if '*' in name or '?' in name:
exact_only = True
query = whoosh.query.Wildcard(u'name', name)
elif rx_is_number.match(name):
# Don't spell-check numbers! # Don't spell-check numbers!
exact_only = True exact_only = True
query = whoosh.query.Term(u'row_id', name) query = whoosh.query.Term(u'row_id', name)
@ -285,14 +291,14 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False):
searcher.weighting = LanguageWeighting() # XXX kosher? docs say search() searcher.weighting = LanguageWeighting() # XXX kosher? docs say search()
# takes a weighting kw but it # takes a weighting kw but it
# certainly does not # certainly does not
results = searcher.search(query) results = searcher.search(query, limit=INTERMEDIATE_LOOKUP_RESULTS)
# Look for some fuzzy matches if necessary # Look for some fuzzy matches if necessary
if not exact_only and not results: if not exact_only and not results:
exact = False exact = False
results = [] results = []
for suggestion in speller.suggest(name, 25): for suggestion in speller.suggest(name, INTERMEDIATE_LOOKUP_RESULTS):
query = whoosh.query.Term('name', suggestion) query = whoosh.query.Term('name', suggestion)
results.extend(searcher.search(query)) results.extend(searcher.search(query))
@ -319,4 +325,4 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False):
# should have more than 10 here and lost a few. The speller returns 25 to # should have more than 10 here and lost a few. The speller returns 25 to
# give us some padding, and should avoid that problem. Not a big deal if # give us some padding, and should avoid that problem. Not a big deal if
# we lose the 25th-most-likely match anyway. # we lose the 25th-most-likely match anyway.
return objects[:10] return objects[:MAX_LOOKUP_RESULTS]

View file

@ -7,9 +7,8 @@ from pokedex.db.load import load
def setup(): def setup():
# Reload data just in case # Reload data just in case
session = connect() session = connect()
load(session, verbose=False, drop_tables=True)
open_index(session=session, recreate=True) open_index(session=session, recreate=True)
def teardown(): def teardown():
print "teardown" pass

View file

@ -89,3 +89,16 @@ def test_fuzzy_lookup():
top_names = [_.object.name for _ in results[0:2]] top_names = [_.object.name for _ in results[0:2]]
assert_true(u'Nidoran♂' in top_names, u'Nidoran♂ is a top result for "Nidoran"') assert_true(u'Nidoran♂' in top_names, u'Nidoran♂ is a top result for "Nidoran"')
assert_true(u'Nidoran♀' in top_names, u'Nidoran♀ is a top result for "Nidoran"') assert_true(u'Nidoran♀' in top_names, u'Nidoran♀ is a top result for "Nidoran"')
def test_wildcard_lookup():
tests = [
(u'pokemon:*meleon', u'Charmeleon'),
(u'item:master*', u'Master Ball'),
(u'ee?ee', u'Eevee'),
]
for wildcard, name in tests:
results = pokedex.lookup.lookup(wildcard)
first_result = results[0]
assert_equal(first_result.object.name, name,
u'Wildcards work correctly')