changeset 152:8e75c12b1cc9

Search preview changes: - Typing queries in the search box now runs n-gram searches with the `whoosh` indexer. - Search preview matches page titles and shows a drop-down list of pages. Fixed naming collision problem with `whoosh` indexer. Fixed inheritance problem with `elastic` indexer.
author Ludovic Chabant <ludovic@chabant.com>
date Mon, 23 Dec 2013 13:29:00 -0800
parents f32af0888382
children b4a69ee1a608
files static/css/wikked/page.less static/js/wikked/models.js static/js/wikked/views.js static/tpl/nav.html wikked/indexer/elastic.py wikked/indexer/whoosh.py wikked/indexer/whooshidx.py wikked/views/special.py wikked/wiki.py
diffstat 9 files changed, 200 insertions(+), 139 deletions(-) [+]
line wrap: on
line diff
--- a/static/css/wikked/page.less	Mon Dec 16 20:59:42 2013 -0800
+++ b/static/css/wikked/page.less	Mon Dec 23 13:29:00 2013 -0800
@@ -71,20 +71,29 @@
 }
 
 // Search
-form.form-search {
-    display: inline-block;
+ul#search-preview {
+    position: absolute;
+    list-style: none;
     margin: 0;
+    padding: 0;
 
-    .search-query {
-        font-size: @font-size-small;
-        height: @font-size-small * 1.5;
+    li {
+        background: #fff;
+        border: 1px solid #eee;
+        margin: 0;
+        padding: 0;
+    }
+    a {
+        margin: 0;
+        padding: 0.5em;
+        display: inline-block;
+        width: 100%;
     }
 }
 ul.search-results {
     list-style: none;
 }
 b.match {
-    padding: 0 0.2em;
     background: #ffeb84;
 }
 
--- a/static/js/wikked/models.js	Mon Dec 16 20:59:42 2013 -0800
+++ b/static/js/wikked/models.js	Mon Dec 23 13:29:00 2013 -0800
@@ -34,17 +34,21 @@
         },
         doPreviewSearch: function(query, callback) {
             if (this._isSearching) {
+                this._pendingQuery = query;
+                this._pendingCallback = callback;
                 return;
             }
             this._isSearching = true;
             var $model = this;
-            $.getJSON('/api/search', { q: query })
+            $.getJSON('/api/searchpreview', { q: query })
                 .done(function (data) {
                     $model._isSearching = false;
                     callback(data);
+                    this._flushPendingQuery();
                 })
                 .fail(function() {
                     $model._isSearching = false;
+                    this._flushPendingQuery();
                 });
         },
         doSearch: function(form) {
@@ -60,6 +64,17 @@
             });
         },
         _isSearching: false,
+        _pendingQuery: null,
+        _pendingCallback: null,
+        _flushPendingQuery: function() {
+            if (this._pendingQuery && this._pendingCallback) {
+                var q = this._pendingQuery;
+                var c = this._pendingCallback;
+                this._pendingQuery = null;
+                this._pendingCallback = null;
+                this.doPreviewSearch(q, c);
+            }
+        },
         _onChangeAuth: function(auth) {
             if (auth) {
                 this.set({
--- a/static/js/wikked/views.js	Mon Dec 16 20:59:42 2013 -0800
+++ b/static/js/wikked/views.js	Mon Dec 23 13:29:00 2013 -0800
@@ -148,7 +148,8 @@
         },
         render: function() {
             NavigationView.__super__.render.apply(this, arguments);
-            this.origPageEl = $('.wrapper>article');
+            this.searchPreviewList = this.$('#search-preview');
+            this.searchPreviewList.hide();
             return this;
         },
         events: {
@@ -162,34 +163,33 @@
             return false;
         },
         _previewSearch: function(e) {
-            // Restore the original content if the query is now
-            // empty. Otherwise, run a search and render only the
-            // `article` portion of the results page.
-            var origPageEl = this.origPageEl;
-            var curPreviewEl = $('.wrapper>article[class~="preview-search-results"]');
             var query = $(e.currentTarget).val();
-            if (query && query.length > 0) {
-                var template = Handlebars.compile(tplSearchResults);
+            if (query && query.length >= 3) {
+                var $view = this;
                 this.model.doPreviewSearch(query, function(data) {
-                    data.is_instant = true;
-                    var resultList = $(template(data));
-                    var inner = $(resultList)
-                        .addClass('preview-search-results');
-                    if (origPageEl.is(':visible')) {
-                        inner.insertAfter(origPageEl);
-                        origPageEl.hide();
-                    } else {
-                        curPreviewEl.replaceWith(inner);
+                    var resultStr = '';
+                    for (var i = 0; i < data.hits.length; ++i) {
+                        var hitUrl = data.hits[i].url.replace(/^\//, '');
+                        console.log(hitUrl, data.hits[i].title);
+                        resultStr += '<li>' +
+                            '<a href="/#read/' + hitUrl + '">' +
+                            data.hits[i].title +
+                            '</a>' +
+                            '</li>';
                     }
+                    console.log("Adding hits to the preview list.");
+                    $view.searchPreviewList.html(resultStr);
+                    if (!$view.searchPreviewList.is(':visible'))
+                        $view.searchPreviewList.slideDown(200);
                 });
-            } else {
-                curPreviewEl.remove();
-                origPageEl.show();
+            } else if(!query || query.length === 0) {
+                this.searchPreviewList.slideUp(200);
             }
         },
         _searchQueryChanged: function(e) {
             if (e.keyCode == 27) {
                 // Clear search on `Esc`.
+                console.log("Clearing search results.");
                 $(e.currentTarget).val('').trigger('input');
             }
         }
--- a/static/tpl/nav.html	Mon Dec 16 20:59:42 2013 -0800
+++ b/static/tpl/nav.html	Mon Dec 23 13:29:00 2013 -0800
@@ -18,7 +18,8 @@
         <form role="search" id="search" class="navbar-form navbar-left">
             <div class="form-group">
                 <label class="sr-only" for="search-query">Search query</label>
-                <input type="text" name="q" id="search-query" class="form-control input-sm" placeholder="Search...">
+                <input type="text" name="q" id="search-query" class="form-control input-sm" placeholder="Search..."></input>
+                <ul id="search-preview"></ul>
             </div>
             <button type="submit" class="btn btn-sm btn-default">Search</button>
         </form>
--- a/wikked/indexer/elastic.py	Mon Dec 16 20:59:42 2013 -0800
+++ b/wikked/indexer/elastic.py	Mon Dec 23 13:29:00 2013 -0800
@@ -13,7 +13,7 @@
 
 class ElasticWikiIndex(WikiIndex):
     def __init__(self):
-        pass
+        WikiIndex.__init__(self)
 
     def initIndex(self, wiki):
         self.es = Elasticsearch()
--- a/wikked/indexer/whoosh.py	Mon Dec 16 20:59:42 2013 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-import os
-import os.path
-import codecs
-import logging
-from base import WikiIndex
-from whoosh.index import create_in, open_dir
-from whoosh.fields import Schema, ID, TEXT, STORED
-from whoosh.qparser import QueryParser
-
-
-logger = logging.getLogger(__name__)
-
-
-class WhooshWikiIndex(WikiIndex):
-    def __init__(self):
-        WikiIndex.__init__(self)
-
-    def initIndex(self, wiki):
-        self.store_dir = os.path.join(wiki.root, '.wiki', 'index')
-        if not os.path.isdir(self.store_dir):
-            logger.debug("Creating new index in: " + self.store_dir)
-            os.makedirs(self.store_dir)
-            self.ix = create_in(self.store_dir, self._getSchema())
-        else:
-            self.ix = open_dir(self.store_dir)
-
-    def reset(self, pages):
-        logger.debug("Re-creating new index in: " + self.store_dir)
-        self.ix = create_in(self.store_dir, schema=self._getSchema())
-        writer = self.ix.writer()
-        for page in pages:
-            self._indexPage(writer, page)
-        writer.commit()
-
-    def update(self, pages):
-        logger.debug("Updating index...")
-        to_reindex = set()
-        already_indexed = set()
-
-        with self.ix.searcher() as searcher:
-            writer = self.ix.writer()
-
-            for fields in searcher.all_stored_fields():
-                indexed_url = fields['url']
-                indexed_path = fields['path']
-                indexed_time = fields['time']
-
-                if not os.path.isfile(indexed_path):
-                    # File was deleted.
-                    self._unindexPage(writer, indexed_url)
-                else:
-                    already_indexed.add(indexed_path)
-                    if os.path.getmtime(indexed_path) > indexed_time:
-                        # File has changed since last index.
-                        self._unindexPage(writer, indexed_url)
-                        to_reindex.add(indexed_path)
-
-            for page in pages:
-                if page.path in to_reindex or page.path not in already_indexed:
-                    self._indexPage(writer, page)
-
-            writer.commit()
-        logger.debug("...done updating index.")
-
-    def search(self, query):
-        with self.ix.searcher() as searcher:
-            title_qp = QueryParser("title", self.ix.schema).parse(query)
-            content_qp = QueryParser("content", self.ix.schema).parse(query)
-            comp_query = title_qp | content_qp
-            results = searcher.search(comp_query)
-
-            page_infos = []
-            for hit in results:
-                page_info = {
-                        'title': hit['title'],
-                        'url': hit['url']
-                        }
-                page_info['title_highlights'] = hit.highlights('title')
-                with codecs.open(hit['path'], 'r', encoding='utf-8') as f:
-                    content = f.read()
-                page_info['content_highlights'] = hit.highlights('content', text=content)
-                page_infos.append(page_info)
-            return page_infos
-
-    def _getSchema(self):
-        schema = Schema(
-                url=ID(stored=True),
-                title=TEXT(stored=True),
-                content=TEXT,
-                path=STORED,
-                time=STORED
-                )
-        return schema
-
-    def _indexPage(self, writer, page):
-        logger.debug("Indexing '%s'." % page.url)
-        writer.add_document(
-            url=unicode(page.url),
-            title=unicode(page.title),
-            content=unicode(page.raw_text),
-            path=page.path,
-            time=os.path.getmtime(page.path)
-            )
-
-    def _unindexPage(self, writer, url):
-        logger.debug("Removing '%s' from index." % url)
-        writer.delete_by_term('url', url)
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wikked/indexer/whooshidx.py	Mon Dec 23 13:29:00 2013 -0800
@@ -0,0 +1,125 @@
+import os
+import os.path
+import logging
+from base import WikiIndex, HitResult
+from whoosh.analysis import StemmingAnalyzer, CharsetFilter, NgramFilter
+from whoosh.fields import Schema, ID, TEXT, STORED
+from whoosh.highlight import WholeFragmenter
+from whoosh.index import create_in, open_dir
+from whoosh.qparser import QueryParser
+from whoosh.support.charset import accent_map
+
+
+logger = logging.getLogger(__name__)
+
+
+class WhooshWikiIndex(WikiIndex):
+    def __init__(self):
+        WikiIndex.__init__(self)
+
+    def initIndex(self, wiki):
+        self.store_dir = os.path.join(wiki.root, '.wiki', 'index')
+        if not os.path.isdir(self.store_dir):
+            logger.debug("Creating new index in: " + self.store_dir)
+            os.makedirs(self.store_dir)
+            self.ix = create_in(self.store_dir, self._getSchema())
+        else:
+            self.ix = open_dir(self.store_dir)
+
+    def reset(self, pages):
+        logger.debug("Re-creating new index in: " + self.store_dir)
+        self.ix = create_in(self.store_dir, schema=self._getSchema())
+        writer = self.ix.writer()
+        for page in pages:
+            self._indexPage(writer, page)
+        writer.commit()
+
+    def update(self, pages):
+        logger.debug("Updating index...")
+        to_reindex = set()
+        already_indexed = set()
+
+        with self.ix.searcher() as searcher:
+            writer = self.ix.writer()
+
+            for fields in searcher.all_stored_fields():
+                indexed_url = fields['url']
+                indexed_path = fields['path']
+                indexed_time = fields['time']
+
+                if not os.path.isfile(indexed_path):
+                    # File was deleted.
+                    self._unindexPage(writer, indexed_url)
+                else:
+                    already_indexed.add(indexed_path)
+                    if os.path.getmtime(indexed_path) > indexed_time:
+                        # File has changed since last index.
+                        self._unindexPage(writer, indexed_url)
+                        to_reindex.add(indexed_path)
+
+            for page in pages:
+                if page.path in to_reindex or page.path not in already_indexed:
+                    self._indexPage(writer, page)
+
+            writer.commit()
+        logger.debug("...done updating index.")
+
+    def previewSearch(self, query):
+        with self.ix.searcher() as searcher:
+            title_qp = QueryParser("title_preview", self.ix.schema).parse(query)
+            results = searcher.search(title_qp)
+            results.fragmenter = WholeFragmenter()
+
+            hits = []
+            for result in results:
+                hit = HitResult(
+                        result['url'],
+                        result.highlights('title_preview', text=result['title']))
+                hits.append(hit)
+            return hits
+
+    def search(self, query):
+        with self.ix.searcher() as searcher:
+            title_qp = QueryParser("title", self.ix.schema).parse(query)
+            text_qp = QueryParser("text", self.ix.schema).parse(query)
+            comp_query = title_qp | text_qp
+            results = searcher.search(comp_query)
+
+            hits = []
+            for result in results:
+                hit = HitResult(
+                        result['url'],
+                        result.highlights('title'),
+                        result.highlights('text'))
+                hits.append(hit)
+            return hits
+
+    def _getSchema(self):
+        preview_analyzer = (StemmingAnalyzer() | CharsetFilter(accent_map) |
+                NgramFilter(minsize=3))
+        text_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
+        schema = Schema(
+                url=ID(stored=True),
+                title_preview=TEXT(analyzer=preview_analyzer, stored=False),
+                title=TEXT(analyzer=text_analyzer, stored=True),
+                text=TEXT(analyzer=text_analyzer, stored=True),
+                path=STORED,
+                time=STORED
+                )
+        return schema
+
+    def _indexPage(self, writer, page):
+        logger.debug("Indexing '%s'." % page.url)
+        writer.add_document(
+            url=unicode(page.url),
+            title_preview=unicode(page.title),
+            title=unicode(page.title),
+            text=unicode(page.text),
+            path=page.path,
+            time=os.path.getmtime(page.path)
+            )
+
+    def _unindexPage(self, writer, url):
+        logger.debug("Removing '%s' from index." % url)
+        writer.delete_by_term('url', url)
+
--- a/wikked/views/special.py	Mon Dec 16 20:59:42 2013 -0800
+++ b/wikked/views/special.py	Mon Dec 23 13:29:00 2013 -0800
@@ -1,4 +1,4 @@
-from flask import g, request
+from flask import g, request, abort
 from wikked.views import (is_page_readable, get_page_meta, get_page_or_none,
         make_auth_response)
 from wikked.web import app
@@ -29,6 +29,8 @@
 @app.route('/api/search')
 def api_search():
     query = request.args.get('q')
+    if query is None or query == '':
+        abort(400)
 
     readable_hits = []
     hits = list(g.wiki.index.search(query))
@@ -40,3 +42,20 @@
     result = {'query': query, 'hit_count': len(readable_hits), 'hits': readable_hits}
     return make_auth_response(result)
 
+
+@app.route('/api/searchpreview')
+def api_searchpreview():
+    query = request.args.get('q')
+    if query is None or query == '':
+        abort(400)
+
+    readable_hits = []
+    hits = list(g.wiki.index.previewSearch(query))
+    for h in hits:
+        page = get_page_or_none(h.url, convert_url=False)
+        if page is not None and is_page_readable(page):
+            readable_hits.append({'url': h.url, 'title': h.title})
+
+    result = {'query': query, 'hit_count': len(readable_hits), 'hits': readable_hits}
+    return make_auth_response(result)
+
--- a/wikked/wiki.py	Mon Dec 16 20:59:42 2013 -0800
+++ b/wikked/wiki.py	Mon Dec 23 13:29:00 2013 -0800
@@ -40,7 +40,7 @@
     def index_factory(self, config):
         index_type = config.get('wiki', 'indexer')
         if index_type == 'whoosh':
-            from wikked.indexer.whoosh import WhooshWikiIndex
+            from wikked.indexer.whooshidx import WhooshWikiIndex
             return WhooshWikiIndex()
         elif index_type == 'elastic':
             from wikked.indexer.elastic import ElasticWikiIndex