Allow restricting lookup by language, with a @ja: prefix. #90

This commit is contained in:
Eevee 2010-08-24 20:06:40 -07:00
parent 6ad7ba5098
commit 2431fd6754
2 changed files with 75 additions and 34 deletions

View file

@ -26,8 +26,9 @@ __all__ = ['PokedexLookup']
rx_is_number = re.compile('^\d+$') rx_is_number = re.compile('^\d+$')
LookupResult = namedtuple('LookupResult', LookupResult = namedtuple('LookupResult', [
['object', 'indexed_name', 'name', 'language', 'iso3166', 'exact']) 'object', 'indexed_name', 'name', 'language', 'iso639', 'iso3166', 'exact',
])
class UninitializedIndex(object): class UninitializedIndex(object):
class UninitializedIndexError(Exception): class UninitializedIndexError(Exception):
@ -168,7 +169,8 @@ class PokedexLookup(object):
table=whoosh.fields.ID(stored=True), table=whoosh.fields.ID(stored=True),
row_id=whoosh.fields.ID(stored=True), row_id=whoosh.fields.ID(stored=True),
language=whoosh.fields.STORED, language=whoosh.fields.STORED,
iso3166=whoosh.fields.STORED, iso639=whoosh.fields.ID(stored=True),
iso3166=whoosh.fields.ID(stored=True),
display_name=whoosh.fields.STORED, # non-lowercased name display_name=whoosh.fields.STORED, # non-lowercased name
) )
@ -188,12 +190,12 @@ class PokedexLookup(object):
row_key = dict(table=unicode(cls.__tablename__), row_key = dict(table=unicode(cls.__tablename__),
row_id=unicode(row.id)) row_id=unicode(row.id))
def add(name, language, iso3166): def add(name, language, iso639, iso3166):
normalized_name = self.normalize_name(name) normalized_name = self.normalize_name(name)
writer.add_document( writer.add_document(
name=normalized_name, display_name=name, name=normalized_name, display_name=name,
language=language, iso3166=iso3166, language=language, iso639=iso639, iso3166=iso3166,
**row_key **row_key
) )
@ -204,14 +206,14 @@ class PokedexLookup(object):
if cls == tables.Pokemon: if cls == tables.Pokemon:
# Pokémon need their form name added # Pokémon need their form name added
# XXX kinda kludgy # XXX kinda kludgy
add(row.full_name, None, u'us') add(row.full_name, None, u'en', u'us')
# If this is a default form, ALSO add the unadorned name, # If this is a default form, ALSO add the unadorned name,
# so 'Deoxys' alone will still do the right thing # so 'Deoxys' alone will still do the right thing
if row.forme_name and not row.forme_base_pokemon_id: if row.forme_name and not row.forme_base_pokemon_id:
add(row.name, None, u'us') add(row.name, None, u'en', u'us')
else: else:
add(row.name, None, u'us') add(row.name, None, u'en', u'us')
# Some things also have other languages' names # Some things also have other languages' names
# XXX other language form names..? # XXX other language form names..?
@ -224,12 +226,13 @@ class PokedexLookup(object):
continue continue
add(moonspeak, foreign_name.language.name, add(moonspeak, foreign_name.language.name,
foreign_name.language.iso639,
foreign_name.language.iso3166) foreign_name.language.iso3166)
# Add Roomaji too # Add Roomaji too
if foreign_name.language.name == 'Japanese': if foreign_name.language.name == 'Japanese':
roomaji = romanize(foreign_name.name) roomaji = romanize(foreign_name.name)
add(roomaji, u'Roomaji', u'jp') add(roomaji, u'Roomaji', u'ja', u'jp')
writer.commit() writer.commit()
@ -310,16 +313,31 @@ class PokedexLookup(object):
# Construct the term # Construct the term
type_terms = [] type_terms = []
lang_terms = []
final_valid_types = [] final_valid_types = []
for valid_type in combined_valid_types: for valid_type in combined_valid_types:
if valid_type.startswith(u'@'):
# @foo means: language must be foo.
# Allow for either country or language codes
lang_code = valid_type[1:]
lang_terms.append(whoosh.query.Term(u'iso639', lang_code))
lang_terms.append(whoosh.query.Term(u'iso3166', lang_code))
else:
# otherwise, this is a type/table name
table_name = self._parse_table_name(valid_type) table_name = self._parse_table_name(valid_type)
# Quietly ignore bogus valid_types; more likely to DTRT # Quietly ignore bogus valid_types; more likely to DTRT
if table_name: if table_name:
final_valid_types.append(valid_type)
type_terms.append(whoosh.query.Term(u'table', table_name)) type_terms.append(whoosh.query.Term(u'table', table_name))
return name, final_valid_types, whoosh.query.Or(type_terms) # Combine both kinds of restriction
all_terms = []
if type_terms:
all_terms.append(whoosh.query.Or(type_terms))
if lang_terms:
all_terms.append(whoosh.query.Or(lang_terms))
return name, combined_valid_types, whoosh.query.And(all_terms)
def _parse_table_name(self, name): def _parse_table_name(self, name):
@ -362,6 +380,7 @@ class PokedexLookup(object):
indexed_name=record['name'], indexed_name=record['name'],
name=record['display_name'], name=record['display_name'],
language=record['language'], language=record['language'],
iso639=record['iso639'],
iso3166=record['iso3166'], iso3166=record['iso3166'],
exact=exact)) exact=exact))
@ -371,12 +390,11 @@ class PokedexLookup(object):
def lookup(self, input, valid_types=[], exact_only=False): def lookup(self, input, valid_types=[], exact_only=False):
"""Attempts to find some sort of object, given a name. """Attempts to find some sort of object, given a name.
Returns a list of named (object, name, language, iso3166, exact) Returns a list of named (object, name, language, iso639, iso3166,
tuples. `object` is a database object, `name` is the name under which exact) tuples. `object` is a database object, `name` is the name under
the object was found, `language` and `iso3166` are the name and country which the object was found, `language` and the two isos are the name
code of the language in which the name was found, and `exact` is True and country codes of the language in which the name was found, and
iff this was an `exact` is True iff this was an exact match.
exact match.
This function currently ONLY does fuzzy matching if there are no exact This function currently ONLY does fuzzy matching if there are no exact
matches. matches.
@ -394,17 +412,19 @@ class PokedexLookup(object):
Also: Also:
- Type restrictions. "type:psychic" will only return the type. This - Type restrictions. "type:psychic" will only return the type. This
is how to make ID lookup useful. Multiple type specs can be entered is how to make ID lookup useful. Multiple type specs can be entered
with commas, as "move,item:1". If `valid_types` are provided, any with commas, as "move,item:1".
type prefix will be ignored. - Language restrictions. "@fr:charge" will only return Tackle, which
is called "Charge" in French. These can be combined with type
restrictions, e.g., "@fr,move:charge".
- Alternate formes can be specified merely like "wash rotom". - Alternate formes can be specified merely like "wash rotom".
`input` `input`
Name of the thing to look for. Name of the thing to look for.
`valid_types` `valid_types`
A list of table objects or names, e.g., `['pokemon', 'moves']`. If A list of type or language restrictions, e.g., `['pokemon',
this is provided, only results in one of the given tables will be '@ja']`. If this is provided, only results in one of the given
returned. tables will be returned.
`exact_only` `exact_only`
If True, only exact matches are returned. If set to False (the If True, only exact matches are returned. If set to False (the
@ -503,17 +523,18 @@ class PokedexLookup(object):
`valid_types`. `valid_types`.
""" """
tables = [] table_names = []
for valid_type in valid_types: for valid_type in valid_types:
table_name = self._parse_table_name(valid_type) table_name = self._parse_table_name(valid_type)
# Skip anything not recognized. Could be, say, a language code
if table_name: if table_name:
tables.append(self.indexed_tables[table_name]) table_names.append(table_name)
if not tables: if not table_names:
# n.b.: It's possible we got a list of valid_types and none of them # n.b.: It's possible we got a list of valid_types and none of them
# were valid, but this function is guaranteed to return # were valid, but this function is guaranteed to return
# *something*, so it politely selects from the entire index isntead # *something*, so it politely selects from the entire index instead
tables = self.indexed_tables.values() table_names = self.indexed_tables.keys()
# Rather than create an array of many hundred items and pick randomly # Rather than create an array of many hundred items and pick randomly
# from it, just pick a number up to the total number of potential # from it, just pick a number up to the total number of potential
@ -523,10 +544,10 @@ class PokedexLookup(object):
# XXX ought to cache this (in the index?) if possible # XXX ought to cache this (in the index?) if possible
total = 0 total = 0
partitions = [] partitions = []
for table in tables: for table_name in table_names:
count = self.session.query(table).count() count = self.session.query(self.indexed_tables[table_name]).count()
total += count total += count
partitions.append((table, count)) partitions.append((table_name, count))
n = random.randint(1, total) n = random.randint(1, total)
while n > partitions[0][1]: while n > partitions[0][1]:

View file

@ -71,6 +71,26 @@ def test_type_lookup():
results = lookup.lookup(u'1', valid_types=['pokemon']) results = lookup.lookup(u'1', valid_types=['pokemon'])
assert_equal(results[0].name, u'Bulbasaur', u'valid_types works as well as type: prefix') assert_equal(results[0].name, u'Bulbasaur', u'valid_types works as well as type: prefix')
def test_language_lookup():
# There are two objects named "charge": the move Charge, and the move
# Tackle, which is called "Charge" in French.
results = lookup.lookup(u'charge')
assert_true(len(results) > 1, u'There are multiple "charge"s')
results = lookup.lookup(u'@fr:charge')
assert_equal(results[0].iso639, u'fr', u'Language restriction works correctly')
assert_equal(len(results), 1, u'Only one "charge" result when language is specified')
assert_equal(results[0].object.name, u'Tackle',
u'Language + vague name returns the right result')
results = lookup.lookup(u'charge', valid_types=['@fr'])
assert_equal(results[0].object.name, u'Tackle',
u'valid_types works as well as @lang: prefix')
results = lookup.lookup(u'@fr,move:charge')
assert_equal(results[0].object.name, u'Tackle',
u'Languages and types both work together')
def test_fuzzy_lookup(): def test_fuzzy_lookup():
tests = [ tests = [
# Regular English names # Regular English names