Tuesday, April 26, 2011

Whoosh Python Search Notes

Baisc Whoosh Search Walkthrough 


# Import required 
import os
from whoosh.index import create_in
from whoosh.fields import *

# Create a "Schema" object to store text to search
# You can create custom search fields such as "title", "domain", "path"
# and "content". 
schema = Schema(title=TEXT(stored=True), domain=ID(stored=True),
                              path=ID(stored=True), content=TEXT)

# Check to see if a search index directory exists; if not, create one
if not os.path.exists("search_test"):
    os.mkdir("search_test")

# Create a search index in the search index directory
ix = create_in("search_test", schema, indexname="testindex")
writer = ix.writer()

# Index some documents
writer.add_document(title=u"Document 1", domain=u"www.mysearch.com",
                         path=u"/mywebsite", content=u'Article')
writer.add_document(title=u"Document 2", domain=u"www.mysearch.com",
                         path=u"/mywebsite/2/article/1", content=u'Document a')
writer.commit()

# Running search queries
from whoosh.qparser import QueryParser
import whoosh.index as index
# Check to see if the given search index exists
if index.exists_in("search_test", indexname="testindex"):
    # open the search index
    ix = index.open_dir("search_test", indexname="testindex")
    # generate a search query based on some string.
    # Here is creates a query looking for "Document" in the "content"
    # search index field. 
    query = QueryParser("content", ix.schema).parse(u'*Document*')
    with ix.searcher() as searcher:  
        # do the actual search
        results = searcher.search(query)
        results

# Running search queries
from whoosh.qparser import QueryParser
import whoosh.index as index
if index.exists_in("search_test", indexname="testindex"):
    ix = index.open_dir("search_test", indexname="testindex"))
    query = QueryParser("domain", ix.schema).parse(u'www.mydomain.com')
    with ix.searcher() as searcher:
        results = searcher.search(query)
        results

# Running search queries
from whoosh.qparser import QueryParser, MultifieldParser
import whoosh.index as index
from whoosh.query import *
if index.exists_in("search_test", indexname="testindex"):
    ix = index.open_dir("search_test")
    query = MultifieldParser(["domain","title","content"], schema=ix.schema).parse(u"Document OR Article")
    with ix.searcher() as searcher:
        results = searcher.search(query)
        results

No comments: