mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Fix whoosh result sorting.
Now results are sorted by is-this-your-language (times levenshtein distance, if appropriate), then by rough class of result (Pokémon, then moves, then abilities, etc.) and finally by name. This fixes a couple issues: - If both a foreign name and a local name matched a wildcard lookup, you'll see the local name. Before, you'd see whichever happened to be first alphabetically. - Wildcard results are more likely to have useful stuff at the top, rather than being dominated by foreign junk and names of obscure locations. This also updates our usage of the whoosh API, which was old and busted as of 2.0 or so.
This commit is contained in:
parent
514ac79216
commit
eb6cae4bf3
1 changed files with 48 additions and 37 deletions
|
@ -12,6 +12,7 @@ import whoosh.filedb.fileindex
|
||||||
import whoosh.index
|
import whoosh.index
|
||||||
from whoosh.qparser import QueryParser
|
from whoosh.qparser import QueryParser
|
||||||
import whoosh.scoring
|
import whoosh.scoring
|
||||||
|
import whoosh.sorting
|
||||||
import whoosh.spelling
|
import whoosh.spelling
|
||||||
from whoosh.support import levenshtein
|
from whoosh.support import levenshtein
|
||||||
|
|
||||||
|
@ -49,40 +50,51 @@ class UninitializedIndex(object):
|
||||||
"or lookup.rebuild_index() to create it."
|
"or lookup.rebuild_index() to create it."
|
||||||
)
|
)
|
||||||
|
|
||||||
class LanguageWeighting(whoosh.scoring.Weighting):
|
def LanguageFacet(locale_ident, extra_weights={}):
|
||||||
"""A scoring class that forces otherwise-equal English results to come
|
"""Constructs a sorting function that bubbles results from the current
|
||||||
before foreign results.
|
locale (given by `locale_ident`) to the top of the list.
|
||||||
|
|
||||||
|
`extra_weights` may be a dictionary of weights which will be factored in.
|
||||||
|
Intended for use with spelling corrections, which come along with their own
|
||||||
|
weightings.
|
||||||
"""
|
"""
|
||||||
|
def score(searcher, docnum):
|
||||||
def __init__(self, locale_ident, extra_weights={}, *args, **kwargs):
|
|
||||||
"""`extra_weights` may be a dictionary of weights which will be
|
|
||||||
factored in.
|
|
||||||
|
|
||||||
Intended for use with spelling corrections, which come along with their
|
|
||||||
own weightings.
|
|
||||||
"""
|
|
||||||
self.locale_ident = locale_ident
|
|
||||||
self.extra_weights = extra_weights
|
|
||||||
super(LanguageWeighting, self).__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def score(self, searcher, fieldnum, text, docnum, weight, QTF=1):
|
|
||||||
doc = searcher.stored_fields(docnum)
|
doc = searcher.stored_fields(docnum)
|
||||||
|
weight = extra_weights.get(doc['name'], 1.0)
|
||||||
|
|
||||||
# Apply extra weight
|
doc_language = doc['language']
|
||||||
weight = weight * self.extra_weights.get(text, 1.0)
|
if doc_language == locale_ident:
|
||||||
|
|
||||||
doc_language = doc.get('language')
|
|
||||||
|
|
||||||
if doc_language == self.locale_ident:
|
|
||||||
# Bump up names in the current locale
|
# Bump up names in the current locale
|
||||||
return weight * 2.0
|
weight *= 2.0
|
||||||
elif doc_language == u'roomaji':
|
elif doc_language == u'roomaji':
|
||||||
# Given that the Japanese names are the originals, it seems likely
|
# Given that the Japanese names are the originals, it seems likely
|
||||||
# that basically anyone might want to look them up. Boost them a
|
# that basically anyone might want to look them up. Boost them a
|
||||||
# little bit.
|
# little bit.
|
||||||
return weight * 1.4
|
weight *= 1.4
|
||||||
|
|
||||||
return weight
|
# Higher weights should come FIRST, but sorts are ascending. Negate
|
||||||
|
# the weight to fix this
|
||||||
|
return -weight
|
||||||
|
|
||||||
|
return whoosh.sorting.FunctionFacet(score)
|
||||||
|
|
||||||
|
_table_order = dict(
|
||||||
|
pokemon_species=1,
|
||||||
|
pokemon_forms=1,
|
||||||
|
moves=2,
|
||||||
|
abilities=3,
|
||||||
|
items=4,
|
||||||
|
types=5,
|
||||||
|
locations=6,
|
||||||
|
natures=7,
|
||||||
|
)
|
||||||
|
def _table_facet_impl(searcher, docnum):
|
||||||
|
u"""Implements a sort that puts different "types" of results in a
|
||||||
|
relatively natural order: Pokémon first, then moves, etc.
|
||||||
|
"""
|
||||||
|
doc = searcher.stored_fields(docnum)
|
||||||
|
return _table_order[doc['table']]
|
||||||
|
table_facet = whoosh.sorting.FunctionFacet(_table_facet_impl)
|
||||||
|
|
||||||
|
|
||||||
class PokedexLookup(object):
|
class PokedexLookup(object):
|
||||||
|
@ -468,21 +480,22 @@ class PokedexLookup(object):
|
||||||
# Fuzzy are capped at 10, beyond which something is probably very
|
# Fuzzy are capped at 10, beyond which something is probably very
|
||||||
# wrong. Exact matches -- that is, wildcards and ids -- are far less
|
# wrong. Exact matches -- that is, wildcards and ids -- are far less
|
||||||
# constrained.
|
# constrained.
|
||||||
# Also, exact matches are sorted by name, since weight doesn't matter.
|
|
||||||
sort_by = dict()
|
|
||||||
if exact_only:
|
if exact_only:
|
||||||
max_results = self.MAX_EXACT_RESULTS
|
max_results = self.MAX_EXACT_RESULTS
|
||||||
sort_by['sortedby'] = (u'table', u'name')
|
|
||||||
else:
|
else:
|
||||||
max_results = self.MAX_FUZZY_RESULTS
|
max_results = self.MAX_FUZZY_RESULTS
|
||||||
|
|
||||||
locale = self._get_current_locale()
|
locale = self._get_current_locale()
|
||||||
searcher = self.index.searcher(
|
facet = whoosh.sorting.MultiFacet([
|
||||||
weighting=LanguageWeighting(locale.identifier))
|
LanguageFacet(locale.identifier),
|
||||||
|
table_facet,
|
||||||
|
"name",
|
||||||
|
])
|
||||||
|
searcher = self.index.searcher()
|
||||||
results = searcher.search(
|
results = searcher.search(
|
||||||
query,
|
query,
|
||||||
limit=int(max_results * self.INTERMEDIATE_FACTOR),
|
limit=int(max_results * self.INTERMEDIATE_FACTOR),
|
||||||
**sort_by
|
sortedby=facet,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Look for some fuzzy matches if necessary
|
# Look for some fuzzy matches if necessary
|
||||||
|
@ -492,10 +505,8 @@ class PokedexLookup(object):
|
||||||
|
|
||||||
fuzzy_query_parts = []
|
fuzzy_query_parts = []
|
||||||
fuzzy_weights = {}
|
fuzzy_weights = {}
|
||||||
min_weight = [None]
|
|
||||||
corrector = searcher.corrector('name')
|
corrector = searcher.corrector('name')
|
||||||
for suggestion in corrector.suggest(name, limit=max_results):
|
for suggestion in corrector.suggest(name, limit=max_results):
|
||||||
|
|
||||||
fuzzy_query_parts.append(whoosh.query.Term('name', suggestion))
|
fuzzy_query_parts.append(whoosh.query.Term('name', suggestion))
|
||||||
distance = levenshtein.relative(name, suggestion)
|
distance = levenshtein.relative(name, suggestion)
|
||||||
fuzzy_weights[suggestion] = distance
|
fuzzy_weights[suggestion] = distance
|
||||||
|
@ -508,9 +519,9 @@ class PokedexLookup(object):
|
||||||
if type_term:
|
if type_term:
|
||||||
fuzzy_query = fuzzy_query & type_term
|
fuzzy_query = fuzzy_query & type_term
|
||||||
|
|
||||||
searcher.weighting = LanguageWeighting(
|
sorter = LanguageFacet(
|
||||||
locale.identifier, extra_weights=fuzzy_weights)
|
locale.identifier, extra_weights=fuzzy_weights)
|
||||||
results = searcher.search(fuzzy_query)
|
results = searcher.search(fuzzy_query, sortedby=sorter)
|
||||||
|
|
||||||
### Convert results to db objects
|
### Convert results to db objects
|
||||||
objects = self._whoosh_records_to_results(results, exact=exact)
|
objects = self._whoosh_records_to_results(results, exact=exact)
|
||||||
|
@ -570,7 +581,7 @@ class PokedexLookup(object):
|
||||||
|
|
||||||
locale = self._get_current_locale()
|
locale = self._get_current_locale()
|
||||||
searcher = self.index.searcher()
|
searcher = self.index.searcher()
|
||||||
searcher.weighting = LanguageWeighting(locale.identifier)
|
facet = LanguageFacet(locale.identifier)
|
||||||
results = searcher.search(query) # XXX , limit=self.MAX_LOOKUP_RESULTS)
|
results = searcher.search(query, sortedby=facet) # XXX , limit=self.MAX_LOOKUP_RESULTS)
|
||||||
|
|
||||||
return self._whoosh_records_to_results(results)
|
return self._whoosh_records_to_results(results)
|
||||||
|
|
Loading…
Reference in a new issue