Mercurial > wikked
changeset 152:8e75c12b1cc9
Search preview changes:
- Typing queries in the search box now runs n-gram searches with the
`whoosh` indexer.
- Search preview matches page titles and shows a drop-down list of pages.
Fixed naming collision problem with `whoosh` indexer.
Fixed inheritance problem with `elastic` indexer.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 23 Dec 2013 13:29:00 -0800 |
parents | f32af0888382 |
children | b4a69ee1a608 |
files | static/css/wikked/page.less static/js/wikked/models.js static/js/wikked/views.js static/tpl/nav.html wikked/indexer/elastic.py wikked/indexer/whoosh.py wikked/indexer/whooshidx.py wikked/views/special.py wikked/wiki.py |
diffstat | 9 files changed, 200 insertions(+), 139 deletions(-) [+] |
line wrap: on
line diff
--- a/static/css/wikked/page.less Mon Dec 16 20:59:42 2013 -0800 +++ b/static/css/wikked/page.less Mon Dec 23 13:29:00 2013 -0800 @@ -71,20 +71,29 @@ } // Search -form.form-search { - display: inline-block; +ul#search-preview { + position: absolute; + list-style: none; margin: 0; + padding: 0; - .search-query { - font-size: @font-size-small; - height: @font-size-small * 1.5; + li { + background: #fff; + border: 1px solid #eee; + margin: 0; + padding: 0; + } + a { + margin: 0; + padding: 0.5em; + display: inline-block; + width: 100%; } } ul.search-results { list-style: none; } b.match { - padding: 0 0.2em; background: #ffeb84; }
--- a/static/js/wikked/models.js Mon Dec 16 20:59:42 2013 -0800 +++ b/static/js/wikked/models.js Mon Dec 23 13:29:00 2013 -0800 @@ -34,17 +34,21 @@ }, doPreviewSearch: function(query, callback) { if (this._isSearching) { + this._pendingQuery = query; + this._pendingCallback = callback; return; } this._isSearching = true; var $model = this; - $.getJSON('/api/search', { q: query }) + $.getJSON('/api/searchpreview', { q: query }) .done(function (data) { $model._isSearching = false; callback(data); + this._flushPendingQuery(); }) .fail(function() { $model._isSearching = false; + this._flushPendingQuery(); }); }, doSearch: function(form) { @@ -60,6 +64,17 @@ }); }, _isSearching: false, + _pendingQuery: null, + _pendingCallback: null, + _flushPendingQuery: function() { + if (this._pendingQuery && this._pendingCallback) { + var q = this._pendingQuery; + var c = this._pendingCallback; + this._pendingQuery = null; + this._pendingCallback = null; + this.doPreviewSearch(q, c); + } + }, _onChangeAuth: function(auth) { if (auth) { this.set({
--- a/static/js/wikked/views.js Mon Dec 16 20:59:42 2013 -0800 +++ b/static/js/wikked/views.js Mon Dec 23 13:29:00 2013 -0800 @@ -148,7 +148,8 @@ }, render: function() { NavigationView.__super__.render.apply(this, arguments); - this.origPageEl = $('.wrapper>article'); + this.searchPreviewList = this.$('#search-preview'); + this.searchPreviewList.hide(); return this; }, events: { @@ -162,34 +163,33 @@ return false; }, _previewSearch: function(e) { - // Restore the original content if the query is now - // empty. Otherwise, run a search and render only the - // `article` portion of the results page. - var origPageEl = this.origPageEl; - var curPreviewEl = $('.wrapper>article[class~="preview-search-results"]'); var query = $(e.currentTarget).val(); - if (query && query.length > 0) { - var template = Handlebars.compile(tplSearchResults); + if (query && query.length >= 3) { + var $view = this; this.model.doPreviewSearch(query, function(data) { - data.is_instant = true; - var resultList = $(template(data)); - var inner = $(resultList) - .addClass('preview-search-results'); - if (origPageEl.is(':visible')) { - inner.insertAfter(origPageEl); - origPageEl.hide(); - } else { - curPreviewEl.replaceWith(inner); + var resultStr = ''; + for (var i = 0; i < data.hits.length; ++i) { + var hitUrl = data.hits[i].url.replace(/^\//, ''); + console.log(hitUrl, data.hits[i].title); + resultStr += '<li>' + + '<a href="/#read/' + hitUrl + '">' + + data.hits[i].title + + '</a>' + + '</li>'; } + console.log("Adding hits to the preview list."); + $view.searchPreviewList.html(resultStr); + if (!$view.searchPreviewList.is(':visible')) + $view.searchPreviewList.slideDown(200); }); - } else { - curPreviewEl.remove(); - origPageEl.show(); + } else if(!query || query.length === 0) { + this.searchPreviewList.slideUp(200); } }, _searchQueryChanged: function(e) { if (e.keyCode == 27) { // Clear search on `Esc`. + console.log("Clearing search results."); $(e.currentTarget).val('').trigger('input'); } }
--- a/static/tpl/nav.html Mon Dec 16 20:59:42 2013 -0800 +++ b/static/tpl/nav.html Mon Dec 23 13:29:00 2013 -0800 @@ -18,7 +18,8 @@ <form role="search" id="search" class="navbar-form navbar-left"> <div class="form-group"> <label class="sr-only" for="search-query">Search query</label> - <input type="text" name="q" id="search-query" class="form-control input-sm" placeholder="Search..."> + <input type="text" name="q" id="search-query" class="form-control input-sm" placeholder="Search..."></input> + <ul id="search-preview"></ul> </div> <button type="submit" class="btn btn-sm btn-default">Search</button> </form>
--- a/wikked/indexer/elastic.py Mon Dec 16 20:59:42 2013 -0800 +++ b/wikked/indexer/elastic.py Mon Dec 23 13:29:00 2013 -0800 @@ -13,7 +13,7 @@ class ElasticWikiIndex(WikiIndex): def __init__(self): - pass + WikiIndex.__init__(self) def initIndex(self, wiki): self.es = Elasticsearch()
--- a/wikked/indexer/whoosh.py Mon Dec 16 20:59:42 2013 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -import os -import os.path -import codecs -import logging -from base import WikiIndex -from whoosh.index import create_in, open_dir -from whoosh.fields import Schema, ID, TEXT, STORED -from whoosh.qparser import QueryParser - - -logger = logging.getLogger(__name__) - - -class WhooshWikiIndex(WikiIndex): - def __init__(self): - WikiIndex.__init__(self) - - def initIndex(self, wiki): - self.store_dir = os.path.join(wiki.root, '.wiki', 'index') - if not os.path.isdir(self.store_dir): - logger.debug("Creating new index in: " + self.store_dir) - os.makedirs(self.store_dir) - self.ix = create_in(self.store_dir, self._getSchema()) - else: - self.ix = open_dir(self.store_dir) - - def reset(self, pages): - logger.debug("Re-creating new index in: " + self.store_dir) - self.ix = create_in(self.store_dir, schema=self._getSchema()) - writer = self.ix.writer() - for page in pages: - self._indexPage(writer, page) - writer.commit() - - def update(self, pages): - logger.debug("Updating index...") - to_reindex = set() - already_indexed = set() - - with self.ix.searcher() as searcher: - writer = self.ix.writer() - - for fields in searcher.all_stored_fields(): - indexed_url = fields['url'] - indexed_path = fields['path'] - indexed_time = fields['time'] - - if not os.path.isfile(indexed_path): - # File was deleted. - self._unindexPage(writer, indexed_url) - else: - already_indexed.add(indexed_path) - if os.path.getmtime(indexed_path) > indexed_time: - # File has changed since last index. - self._unindexPage(writer, indexed_url) - to_reindex.add(indexed_path) - - for page in pages: - if page.path in to_reindex or page.path not in already_indexed: - self._indexPage(writer, page) - - writer.commit() - logger.debug("...done updating index.") - - def search(self, query): - with self.ix.searcher() as searcher: - title_qp = QueryParser("title", self.ix.schema).parse(query) - content_qp = QueryParser("content", self.ix.schema).parse(query) - comp_query = title_qp | content_qp - results = searcher.search(comp_query) - - page_infos = [] - for hit in results: - page_info = { - 'title': hit['title'], - 'url': hit['url'] - } - page_info['title_highlights'] = hit.highlights('title') - with codecs.open(hit['path'], 'r', encoding='utf-8') as f: - content = f.read() - page_info['content_highlights'] = hit.highlights('content', text=content) - page_infos.append(page_info) - return page_infos - - def _getSchema(self): - schema = Schema( - url=ID(stored=True), - title=TEXT(stored=True), - content=TEXT, - path=STORED, - time=STORED - ) - return schema - - def _indexPage(self, writer, page): - logger.debug("Indexing '%s'." % page.url) - writer.add_document( - url=unicode(page.url), - title=unicode(page.title), - content=unicode(page.raw_text), - path=page.path, - time=os.path.getmtime(page.path) - ) - - def _unindexPage(self, writer, url): - logger.debug("Removing '%s' from index." % url) - writer.delete_by_term('url', url) -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wikked/indexer/whooshidx.py Mon Dec 23 13:29:00 2013 -0800 @@ -0,0 +1,125 @@ +import os +import os.path +import logging +from base import WikiIndex, HitResult +from whoosh.analysis import StemmingAnalyzer, CharsetFilter, NgramFilter +from whoosh.fields import Schema, ID, TEXT, STORED +from whoosh.highlight import WholeFragmenter +from whoosh.index import create_in, open_dir +from whoosh.qparser import QueryParser +from whoosh.support.charset import accent_map + + +logger = logging.getLogger(__name__) + + +class WhooshWikiIndex(WikiIndex): + def __init__(self): + WikiIndex.__init__(self) + + def initIndex(self, wiki): + self.store_dir = os.path.join(wiki.root, '.wiki', 'index') + if not os.path.isdir(self.store_dir): + logger.debug("Creating new index in: " + self.store_dir) + os.makedirs(self.store_dir) + self.ix = create_in(self.store_dir, self._getSchema()) + else: + self.ix = open_dir(self.store_dir) + + def reset(self, pages): + logger.debug("Re-creating new index in: " + self.store_dir) + self.ix = create_in(self.store_dir, schema=self._getSchema()) + writer = self.ix.writer() + for page in pages: + self._indexPage(writer, page) + writer.commit() + + def update(self, pages): + logger.debug("Updating index...") + to_reindex = set() + already_indexed = set() + + with self.ix.searcher() as searcher: + writer = self.ix.writer() + + for fields in searcher.all_stored_fields(): + indexed_url = fields['url'] + indexed_path = fields['path'] + indexed_time = fields['time'] + + if not os.path.isfile(indexed_path): + # File was deleted. + self._unindexPage(writer, indexed_url) + else: + already_indexed.add(indexed_path) + if os.path.getmtime(indexed_path) > indexed_time: + # File has changed since last index. + self._unindexPage(writer, indexed_url) + to_reindex.add(indexed_path) + + for page in pages: + if page.path in to_reindex or page.path not in already_indexed: + self._indexPage(writer, page) + + writer.commit() + logger.debug("...done updating index.") + + def previewSearch(self, query): + with self.ix.searcher() as searcher: + title_qp = QueryParser("title_preview", self.ix.schema).parse(query) + results = searcher.search(title_qp) + results.fragmenter = WholeFragmenter() + + hits = [] + for result in results: + hit = HitResult( + result['url'], + result.highlights('title_preview', text=result['title'])) + hits.append(hit) + return hits + + def search(self, query): + with self.ix.searcher() as searcher: + title_qp = QueryParser("title", self.ix.schema).parse(query) + text_qp = QueryParser("text", self.ix.schema).parse(query) + comp_query = title_qp | text_qp + results = searcher.search(comp_query) + + hits = [] + for result in results: + hit = HitResult( + result['url'], + result.highlights('title'), + result.highlights('text')) + hits.append(hit) + return hits + + def _getSchema(self): + preview_analyzer = (StemmingAnalyzer() | CharsetFilter(accent_map) | + NgramFilter(minsize=3)) + text_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map) + schema = Schema( + url=ID(stored=True), + title_preview=TEXT(analyzer=preview_analyzer, stored=False), + title=TEXT(analyzer=text_analyzer, stored=True), + text=TEXT(analyzer=text_analyzer, stored=True), + path=STORED, + time=STORED + ) + return schema + + def _indexPage(self, writer, page): + logger.debug("Indexing '%s'." % page.url) + writer.add_document( + url=unicode(page.url), + title_preview=unicode(page.title), + title=unicode(page.title), + text=unicode(page.text), + path=page.path, + time=os.path.getmtime(page.path) + ) + + def _unindexPage(self, writer, url): + logger.debug("Removing '%s' from index." % url) + writer.delete_by_term('url', url) +
--- a/wikked/views/special.py Mon Dec 16 20:59:42 2013 -0800 +++ b/wikked/views/special.py Mon Dec 23 13:29:00 2013 -0800 @@ -1,4 +1,4 @@ -from flask import g, request +from flask import g, request, abort from wikked.views import (is_page_readable, get_page_meta, get_page_or_none, make_auth_response) from wikked.web import app @@ -29,6 +29,8 @@ @app.route('/api/search') def api_search(): query = request.args.get('q') + if query is None or query == '': + abort(400) readable_hits = [] hits = list(g.wiki.index.search(query)) @@ -40,3 +42,20 @@ result = {'query': query, 'hit_count': len(readable_hits), 'hits': readable_hits} return make_auth_response(result) + +@app.route('/api/searchpreview') +def api_searchpreview(): + query = request.args.get('q') + if query is None or query == '': + abort(400) + + readable_hits = [] + hits = list(g.wiki.index.previewSearch(query)) + for h in hits: + page = get_page_or_none(h.url, convert_url=False) + if page is not None and is_page_readable(page): + readable_hits.append({'url': h.url, 'title': h.title}) + + result = {'query': query, 'hit_count': len(readable_hits), 'hits': readable_hits} + return make_auth_response(result) +
--- a/wikked/wiki.py Mon Dec 16 20:59:42 2013 -0800 +++ b/wikked/wiki.py Mon Dec 23 13:29:00 2013 -0800 @@ -40,7 +40,7 @@ def index_factory(self, config): index_type = config.get('wiki', 'indexer') if index_type == 'whoosh': - from wikked.indexer.whoosh import WhooshWikiIndex + from wikked.indexer.whooshidx import WhooshWikiIndex return WhooshWikiIndex() elif index_type == 'elastic': from wikked.indexer.elastic import ElasticWikiIndex