From eb6cae4bf39a9bf1a40b6e7acc78e0a618de45df Mon Sep 17 00:00:00 2001 From: Eevee Date: Fri, 9 Sep 2011 22:31:38 -0700 Subject: [PATCH] Fix whoosh result sorting. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now results are sorted by is-this-your-language (times levenshtein distance, if appropriate), then by rough class of result (Pokémon, then moves, then abilities, etc.) and finally by name. This fixes a couple issues: - If both a foreign name and a local name matched a wildcard lookup, you'll see the local name. Before, you'd see whichever happened to be first alphabetically. - Wildcard results are more likely to have useful stuff at the top, rather than being dominated by foreign junk and names of obscure locations. This also updates our usage of the whoosh API, which was old and busted as of 2.0 or so. --- pokedex/lookup.py | 85 ++++++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/pokedex/lookup.py b/pokedex/lookup.py index d6883fc..7439c2c 100644 --- a/pokedex/lookup.py +++ b/pokedex/lookup.py @@ -12,6 +12,7 @@ import whoosh.filedb.fileindex import whoosh.index from whoosh.qparser import QueryParser import whoosh.scoring +import whoosh.sorting import whoosh.spelling from whoosh.support import levenshtein @@ -49,40 +50,51 @@ class UninitializedIndex(object): "or lookup.rebuild_index() to create it." ) -class LanguageWeighting(whoosh.scoring.Weighting): - """A scoring class that forces otherwise-equal English results to come - before foreign results. +def LanguageFacet(locale_ident, extra_weights={}): + """Constructs a sorting function that bubbles results from the current + locale (given by `locale_ident`) to the top of the list. + + `extra_weights` may be a dictionary of weights which will be factored in. + Intended for use with spelling corrections, which come along with their own + weightings. """ - - def __init__(self, locale_ident, extra_weights={}, *args, **kwargs): - """`extra_weights` may be a dictionary of weights which will be - factored in. - - Intended for use with spelling corrections, which come along with their - own weightings. - """ - self.locale_ident = locale_ident - self.extra_weights = extra_weights - super(LanguageWeighting, self).__init__(*args, **kwargs) - - def score(self, searcher, fieldnum, text, docnum, weight, QTF=1): + def score(searcher, docnum): doc = searcher.stored_fields(docnum) + weight = extra_weights.get(doc['name'], 1.0) - # Apply extra weight - weight = weight * self.extra_weights.get(text, 1.0) - - doc_language = doc.get('language') - - if doc_language == self.locale_ident: + doc_language = doc['language'] + if doc_language == locale_ident: # Bump up names in the current locale - return weight * 2.0 + weight *= 2.0 elif doc_language == u'roomaji': # Given that the Japanese names are the originals, it seems likely # that basically anyone might want to look them up. Boost them a # little bit. - return weight * 1.4 + weight *= 1.4 - return weight + # Higher weights should come FIRST, but sorts are ascending. Negate + # the weight to fix this + return -weight + + return whoosh.sorting.FunctionFacet(score) + +_table_order = dict( + pokemon_species=1, + pokemon_forms=1, + moves=2, + abilities=3, + items=4, + types=5, + locations=6, + natures=7, +) +def _table_facet_impl(searcher, docnum): + u"""Implements a sort that puts different "types" of results in a + relatively natural order: Pokémon first, then moves, etc. + """ + doc = searcher.stored_fields(docnum) + return _table_order[doc['table']] +table_facet = whoosh.sorting.FunctionFacet(_table_facet_impl) class PokedexLookup(object): @@ -468,21 +480,22 @@ class PokedexLookup(object): # Fuzzy are capped at 10, beyond which something is probably very # wrong. Exact matches -- that is, wildcards and ids -- are far less # constrained. - # Also, exact matches are sorted by name, since weight doesn't matter. - sort_by = dict() if exact_only: max_results = self.MAX_EXACT_RESULTS - sort_by['sortedby'] = (u'table', u'name') else: max_results = self.MAX_FUZZY_RESULTS locale = self._get_current_locale() - searcher = self.index.searcher( - weighting=LanguageWeighting(locale.identifier)) + facet = whoosh.sorting.MultiFacet([ + LanguageFacet(locale.identifier), + table_facet, + "name", + ]) + searcher = self.index.searcher() results = searcher.search( query, limit=int(max_results * self.INTERMEDIATE_FACTOR), - **sort_by + sortedby=facet, ) # Look for some fuzzy matches if necessary @@ -492,10 +505,8 @@ class PokedexLookup(object): fuzzy_query_parts = [] fuzzy_weights = {} - min_weight = [None] corrector = searcher.corrector('name') for suggestion in corrector.suggest(name, limit=max_results): - fuzzy_query_parts.append(whoosh.query.Term('name', suggestion)) distance = levenshtein.relative(name, suggestion) fuzzy_weights[suggestion] = distance @@ -508,9 +519,9 @@ class PokedexLookup(object): if type_term: fuzzy_query = fuzzy_query & type_term - searcher.weighting = LanguageWeighting( + sorter = LanguageFacet( locale.identifier, extra_weights=fuzzy_weights) - results = searcher.search(fuzzy_query) + results = searcher.search(fuzzy_query, sortedby=sorter) ### Convert results to db objects objects = self._whoosh_records_to_results(results, exact=exact) @@ -570,7 +581,7 @@ class PokedexLookup(object): locale = self._get_current_locale() searcher = self.index.searcher() - searcher.weighting = LanguageWeighting(locale.identifier) - results = searcher.search(query) # XXX , limit=self.MAX_LOOKUP_RESULTS) + facet = LanguageFacet(locale.identifier) + results = searcher.search(query, sortedby=facet) # XXX , limit=self.MAX_LOOKUP_RESULTS) return self._whoosh_records_to_results(results)