Mercurial > wikked
changeset 417:e0127ea76516
index: Better handling of search previews in whoosh.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 22 Mar 2017 17:51:41 -0700 |
parents | a11a7afb53a7 |
children | f8410cd802ad |
files | wikked/indexer/whooshidx.py |
diffstat | 1 files changed, 10 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/wikked/indexer/whooshidx.py Mon Mar 20 23:22:00 2017 -0700 +++ b/wikked/indexer/whooshidx.py Wed Mar 22 17:51:41 2017 -0700 @@ -2,8 +2,8 @@ import os.path import logging from .base import WikiIndex, HitResult -from whoosh.analysis import (StandardAnalyzer, StemmingAnalyzer, - CharsetFilter, NgramFilter) +from whoosh.analysis import ( + StemmingAnalyzer, CharsetFilter, NgramWordAnalyzer) from whoosh.fields import Schema, ID, TEXT, STORED from whoosh.highlight import WholeFragmenter, UppercaseFormatter from whoosh.index import create_in, open_dir @@ -74,15 +74,16 @@ def previewSearch(self, query): with self.ix.searcher() as searcher: - title_qp = QueryParser("title_preview", self.ix.schema).parse(query) + title_qp = QueryParser( + "title_preview", self.ix.schema).parse(query) results = searcher.search(title_qp) results.fragmenter = WholeFragmenter() hits = [] for result in results: hit = HitResult( - result['url'], - result.highlights('title_preview', text=result['title'])) + result['url'], + result.highlights('title_preview', text=result['title'])) hits.append(hit) return hits @@ -105,12 +106,12 @@ return hits def _getSchema(self): - preview_analyzer = (StandardAnalyzer() | CharsetFilter(accent_map) | - NgramFilter(minsize=1)) + preview_analyzer = NgramWordAnalyzer(minsize=2) text_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map) schema = Schema( url=ID(stored=True), - title_preview=TEXT(analyzer=preview_analyzer, stored=False), + title_preview=TEXT(analyzer=preview_analyzer, + stored=False, phrase=False), title=TEXT(analyzer=text_analyzer, stored=True), text=TEXT(analyzer=text_analyzer, stored=True), path=STORED, @@ -124,7 +125,7 @@ url=page.url, title_preview=page.title, title=page.title, - text=page.text, + text=page.raw_text, path=page.path, time=os.path.getmtime(page.path) )