Fixed whoosh index creation to work with 0.2.x. #15

This commit is contained in:
Eevee 2009-08-16 21:03:49 -07:00
parent ba08a41f2f
commit 238487c908
2 changed files with 24 additions and 6 deletions

View file

@ -1,8 +1,12 @@
# encoding: utf8 # encoding: utf8
import os, os.path
import re import re
from sqlalchemy.sql import func from sqlalchemy.sql import func
import whoosh import whoosh
import whoosh.filedb.filestore
import whoosh.filedb.fileindex
import whoosh.index
from whoosh.qparser import QueryParser from whoosh.qparser import QueryParser
import whoosh.spelling import whoosh.spelling
@ -42,16 +46,22 @@ def get_index(session):
if index_bits: if index_bits:
return index_bits['index'], index_bits['speller'] return index_bits['index'], index_bits['speller']
store = whoosh.store.RamStorage() store = whoosh.filedb.filestore.RamStorage()
schema = whoosh.fields.Schema( schema = whoosh.fields.Schema(
name=whoosh.fields.ID(stored=True), name=whoosh.fields.ID(stored=True),
table=whoosh.fields.STORED, table=whoosh.fields.STORED,
row_id=whoosh.fields.STORED, row_id=whoosh.fields.STORED,
language_id=whoosh.fields.STORED, language=whoosh.fields.STORED,
# Whoosh 0.2 explodes when using a file-stored schema with no TEXT
# columns. Appease it
dummy=whoosh.fields.TEXT,
) )
# Construct a straight lookup index index_directory = '/var/tmp/pokedex'
index = whoosh.index.Index(store, schema=schema, create=True) if not os.path.exists(index_directory):
os.mkdir(index_directory)
index = whoosh.index.create_in(index_directory, schema=schema)
writer = index.writer() writer = index.writer()
# Index every name in all our tables of interest # Index every name in all our tables of interest
@ -82,6 +92,14 @@ def get_index(session):
writer.commit() writer.commit()
# XXX GIHWEGREHKG
old__schema = whoosh.spelling.SpellChecker._schema
def new__schema(self):
schema = old__schema(self)
schema.add('dummy', whoosh.fields.TEXT)
return schema
whoosh.spelling.SpellChecker._schema = new__schema
# Construct and populate a spell-checker index. Quicker to do it all # Construct and populate a spell-checker index. Quicker to do it all
# at once, as every call to add_* does a commit(), and those seem to be # at once, as every call to add_* does a commit(), and those seem to be
# expensive # expensive
@ -93,7 +111,7 @@ def get_index(session):
# complications. # complications.
# The below is copied from SpellChecker.add_scored_words without the check # The below is copied from SpellChecker.add_scored_words without the check
# for isalpha(). XXX get whoosh patched to make this unnecessary! # for isalpha(). XXX get whoosh patched to make this unnecessary!
writer = whoosh.writing.IndexWriter(speller.index()) writer = speller.index(create=True).writer()
for word in speller_entries: for word in speller_entries:
fields = {"word": word, "score": 1} fields = {"word": word, "score": 1}
for size in xrange(speller.mingram, speller.maxgram + 1): for size in xrange(speller.mingram, speller.maxgram + 1):

View file

@ -4,7 +4,7 @@ setup(
version = '0.1', version = '0.1',
packages = find_packages(), packages = find_packages(),
package_data = { '': 'data' }, package_data = { '': 'data' },
install_requires=['SQLAlchemy>=0.5.1', 'whoosh>=0.1.24'], install_requires=['SQLAlchemy>=0.5.1', 'whoosh>=0.2.0'],
entry_points = { entry_points = {
'console_scripts': [ 'console_scripts': [