mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Fixed whoosh index creation to work with 0.2.x. #15
This commit is contained in:
parent
ba08a41f2f
commit
238487c908
2 changed files with 24 additions and 6 deletions
|
@ -1,8 +1,12 @@
|
||||||
# encoding: utf8
|
# encoding: utf8
|
||||||
|
import os, os.path
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from sqlalchemy.sql import func
|
from sqlalchemy.sql import func
|
||||||
import whoosh
|
import whoosh
|
||||||
|
import whoosh.filedb.filestore
|
||||||
|
import whoosh.filedb.fileindex
|
||||||
|
import whoosh.index
|
||||||
from whoosh.qparser import QueryParser
|
from whoosh.qparser import QueryParser
|
||||||
import whoosh.spelling
|
import whoosh.spelling
|
||||||
|
|
||||||
|
@ -42,16 +46,22 @@ def get_index(session):
|
||||||
if index_bits:
|
if index_bits:
|
||||||
return index_bits['index'], index_bits['speller']
|
return index_bits['index'], index_bits['speller']
|
||||||
|
|
||||||
store = whoosh.store.RamStorage()
|
store = whoosh.filedb.filestore.RamStorage()
|
||||||
schema = whoosh.fields.Schema(
|
schema = whoosh.fields.Schema(
|
||||||
name=whoosh.fields.ID(stored=True),
|
name=whoosh.fields.ID(stored=True),
|
||||||
table=whoosh.fields.STORED,
|
table=whoosh.fields.STORED,
|
||||||
row_id=whoosh.fields.STORED,
|
row_id=whoosh.fields.STORED,
|
||||||
language_id=whoosh.fields.STORED,
|
language=whoosh.fields.STORED,
|
||||||
|
|
||||||
|
# Whoosh 0.2 explodes when using a file-stored schema with no TEXT
|
||||||
|
# columns. Appease it
|
||||||
|
dummy=whoosh.fields.TEXT,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Construct a straight lookup index
|
index_directory = '/var/tmp/pokedex'
|
||||||
index = whoosh.index.Index(store, schema=schema, create=True)
|
if not os.path.exists(index_directory):
|
||||||
|
os.mkdir(index_directory)
|
||||||
|
index = whoosh.index.create_in(index_directory, schema=schema)
|
||||||
writer = index.writer()
|
writer = index.writer()
|
||||||
|
|
||||||
# Index every name in all our tables of interest
|
# Index every name in all our tables of interest
|
||||||
|
@ -82,6 +92,14 @@ def get_index(session):
|
||||||
|
|
||||||
writer.commit()
|
writer.commit()
|
||||||
|
|
||||||
|
# XXX GIHWEGREHKG
|
||||||
|
old__schema = whoosh.spelling.SpellChecker._schema
|
||||||
|
def new__schema(self):
|
||||||
|
schema = old__schema(self)
|
||||||
|
schema.add('dummy', whoosh.fields.TEXT)
|
||||||
|
return schema
|
||||||
|
whoosh.spelling.SpellChecker._schema = new__schema
|
||||||
|
|
||||||
# Construct and populate a spell-checker index. Quicker to do it all
|
# Construct and populate a spell-checker index. Quicker to do it all
|
||||||
# at once, as every call to add_* does a commit(), and those seem to be
|
# at once, as every call to add_* does a commit(), and those seem to be
|
||||||
# expensive
|
# expensive
|
||||||
|
@ -93,7 +111,7 @@ def get_index(session):
|
||||||
# complications.
|
# complications.
|
||||||
# The below is copied from SpellChecker.add_scored_words without the check
|
# The below is copied from SpellChecker.add_scored_words without the check
|
||||||
# for isalpha(). XXX get whoosh patched to make this unnecessary!
|
# for isalpha(). XXX get whoosh patched to make this unnecessary!
|
||||||
writer = whoosh.writing.IndexWriter(speller.index())
|
writer = speller.index(create=True).writer()
|
||||||
for word in speller_entries:
|
for word in speller_entries:
|
||||||
fields = {"word": word, "score": 1}
|
fields = {"word": word, "score": 1}
|
||||||
for size in xrange(speller.mingram, speller.maxgram + 1):
|
for size in xrange(speller.mingram, speller.maxgram + 1):
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -4,7 +4,7 @@ setup(
|
||||||
version = '0.1',
|
version = '0.1',
|
||||||
packages = find_packages(),
|
packages = find_packages(),
|
||||||
package_data = { '': 'data' },
|
package_data = { '': 'data' },
|
||||||
install_requires=['SQLAlchemy>=0.5.1', 'whoosh>=0.1.24'],
|
install_requires=['SQLAlchemy>=0.5.1', 'whoosh>=0.2.0'],
|
||||||
|
|
||||||
entry_points = {
|
entry_points = {
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
|
|
Loading…
Reference in a new issue