Wildcard lookup! #89

Test suite no longer reloads the entire database.  Takes too long.

Factored out some magic numbers in lookup().
This commit is contained in:
Eevee 2009-08-24 22:04:55 -07:00
parent 11989d0db2
commit 9e3d8b317d
3 changed files with 27 additions and 9 deletions

View file

@ -20,6 +20,9 @@ from pokedex.roomaji import romanize
__all__ = ['open_index', 'lookup']
INTERMEDIATE_LOOKUP_RESULTS = 25
MAX_LOOKUP_RESULTS = 10
# Dictionary of table name => table class.
# Need the table name so we can get the class from the table name after we
# retrieve something from the index
@ -242,10 +245,13 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False):
# provided
valid_types = prefixes
# If the input provided is a number, match it as an id. Otherwise, name.
# Term objects do an exact match, so we don't have to worry about a query
# parser tripping on weird characters in the input
if rx_is_number.match(name):
# Do different things depending what the query looks like
# Note: Term objects do an exact match, so we don't have to worry about a
# query parser tripping on weird characters in the input
if '*' in name or '?' in name:
exact_only = True
query = whoosh.query.Wildcard(u'name', name)
elif rx_is_number.match(name):
# Don't spell-check numbers!
exact_only = True
query = whoosh.query.Term(u'row_id', name)
@ -285,14 +291,14 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False):
searcher.weighting = LanguageWeighting() # XXX kosher? docs say search()
# takes a weighting kw but it
# certainly does not
results = searcher.search(query)
results = searcher.search(query, limit=INTERMEDIATE_LOOKUP_RESULTS)
# Look for some fuzzy matches if necessary
if not exact_only and not results:
exact = False
results = []
for suggestion in speller.suggest(name, 25):
for suggestion in speller.suggest(name, INTERMEDIATE_LOOKUP_RESULTS):
query = whoosh.query.Term('name', suggestion)
results.extend(searcher.search(query))
@ -319,4 +325,4 @@ def lookup(input, valid_types=[], session=None, indices=None, exact_only=False):
# should have more than 10 here and lost a few. The speller returns 25 to
# give us some padding, and should avoid that problem. Not a big deal if
# we lose the 25th-most-likely match anyway.
return objects[:10]
return objects[:MAX_LOOKUP_RESULTS]

View file

@ -7,9 +7,8 @@ from pokedex.db.load import load
def setup():
# Reload data just in case
session = connect()
load(session, verbose=False, drop_tables=True)
open_index(session=session, recreate=True)
def teardown():
print "teardown"
pass

View file

@ -89,3 +89,16 @@ def test_fuzzy_lookup():
top_names = [_.object.name for _ in results[0:2]]
assert_true(u'Nidoran♂' in top_names, u'Nidoran♂ is a top result for "Nidoran"')
assert_true(u'Nidoran♀' in top_names, u'Nidoran♀ is a top result for "Nidoran"')
def test_wildcard_lookup():
tests = [
(u'pokemon:*meleon', u'Charmeleon'),
(u'item:master*', u'Master Ball'),
(u'ee?ee', u'Eevee'),
]
for wildcard, name in tests:
results = pokedex.lookup.lookup(wildcard)
first_result = results[0]
assert_equal(first_result.object.name, name,
u'Wildcards work correctly')