changeset 111:e5dea315583b

First phase of fixes: - Use unicode strings in more places. - Fixed updating/resetting of the wiki DB. - Fixed incoming links query. - Fixed incorrect URL trail and outgoing links while resolving includes. - Validate URLs correctly in the public API.
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 16 Nov 2013 08:32:57 -0800
parents 827e236aa7c6
children a65cedc183d6
files wikked/db.py wikked/formatter.py wikked/resolver.py wikked/views.py wikked/wiki.py
diffstat 5 files changed, 119 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/wikked/db.py	Thu Nov 14 15:21:48 2013 -0800
+++ b/wikked/db.py	Sat Nov 16 08:32:57 2013 -0800
@@ -31,7 +31,7 @@
     def reset(self, pages):
         raise NotImplementedError()
 
-    def update(self, pages):
+    def update(self, pages, force=False):
         raise NotImplementedError()
 
     def getPageUrls(self, subdir=None):
@@ -184,7 +184,7 @@
             self._addPage(page)
         db.session.commit()
 
-    def update(self, pages):
+    def update(self, pages, force=False):
         self.logger.debug("Updating SQL database...")
         to_update = set()
         already_added = set()
@@ -199,7 +199,7 @@
                 already_added.add(p.path)
                 path_time = datetime.datetime.fromtimestamp(
                     os.path.getmtime(p.path))
-                if path_time > p.time:
+                if path_time > p.time or force:
                     # File has changed since last index.
                     to_remove.append(p)
                     to_update.add(p.path)
@@ -208,6 +208,14 @@
 
         db.session.commit()
 
+        db_pages = db.session.query(SQLPage).\
+                add_columns('id', 'is_ready').\
+                all()
+        for p in db_pages:
+            p.is_ready = False
+        
+        db.session.commit()
+
         added_db_objs = []
         for p in pages:
             if (p.path in to_update or
@@ -253,11 +261,12 @@
         return self.getPage(url) is not None
 
     def getLinksTo(self, url):
-        q = db.session.query(SQLLink, SQLPage).\
-            filter(SQLLink.target_url == SQLPage.url).\
+        q = db.session.query(SQLReadyLink).\
+            filter(SQLReadyLink.target_url == url).\
+            join(SQLReadyLink.source).\
             all()
         for l in q:
-            yield l.source
+            yield l.source.url
 
     def _createSchema(self):
         db.drop_all()
--- a/wikked/formatter.py	Thu Nov 14 15:21:48 2013 -0800
+++ b/wikked/formatter.py	Sat Nov 16 08:32:57 2013 -0800
@@ -48,8 +48,8 @@
 
     def _processWikiMeta(self, ctx, text):
         def repl(m):
-            meta_name = str(m.group('name')).lower()
-            meta_value = str(m.group('value'))
+            meta_name = unicode(m.group('name')).lower()
+            meta_value = unicode(m.group('value'))
 
             if meta_value is None or meta_value == '':
                 # No value provided: this is a "flag" meta.
@@ -83,7 +83,7 @@
                 flags=re.MULTILINE)
         # Multi-line meta.
         text = re.sub(
-                r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*)^\}\}\s*$',
+                r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*)^\s*\}\}\s*$',
                 repl,
                 text,
                 flags=re.MULTILINE | re.DOTALL)
@@ -149,8 +149,8 @@
         arg_pattern = r"(^|\|)\s*(?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*="\
             r"(?P<value>[^\|]+)"
         for m in re.finditer(arg_pattern, query):
-            name = str(m.group('name')).strip()
-            value = str(m.group('value')).strip()
+            name = unicode(m.group('name')).strip()
+            value = unicode(m.group('value')).strip()
             processed_args += '%s=%s' % (name, value)
 
         mod_attr = ''
@@ -167,6 +167,6 @@
         urls = []
         pattern = r"<a class=\"[^\"]*\" data-wiki-url=\"(?P<url>[^\"]+)\">"
         for m in re.finditer(pattern, text):
-            urls.append(str(m.group('url')))
+            urls.append(unicode(m.group('url')))
         return urls
 
--- a/wikked/resolver.py	Thu Nov 14 15:21:48 2013 -0800
+++ b/wikked/resolver.py	Sat Nov 16 08:32:57 2013 -0800
@@ -16,18 +16,26 @@
     """ An exception raised when a circular include is found
         while rendering a page.
     """
-    def __init__(self, message, url_trail):
-        Exception.__init__(self, message)
-        self.url_trail = url_trail
+    def __init__(self, current_url, url_trail, message=None):
+        Exception.__init__(self, current_url, url_trail, message)
+
+    def __str__(self):
+        current_url = self.args[0]
+        url_trail = self.args[1]
+        message = self.args[2]
+        res = "Circular include detected at '%s' (after %s)" % (current_url, url_trail)
+        if message:
+            res += ": %s" % message
+        return res
 
 
 class ResolveContext(object):
     """ The context for resolving page queries. """
     def __init__(self, root_page=None):
         self.root_page = root_page
-        self.url_trail = set()
+        self.url_trail = []
         if root_page:
-            self.url_trail.add(root_page.url)
+            self.url_trail.append(root_page.url)
 
     def shouldRunMeta(self, modifier):
         if modifier is None:
@@ -52,10 +60,8 @@
         self.out_links = []
         if page:
             self.meta = dict(page.getLocalMeta())
-            self.out_links = list(page.getLocalLinks())
 
     def add(self, other):
-        self.out_links = list(set(self.out_links + other.out_links))
         for original_key, val in other.meta.iteritems():
             # Ignore internal properties. Strip include-only properties
             # from their prefix.
@@ -134,15 +140,15 @@
 
         # Resolve queries, includes, etc.
         def repl2(m):
-            meta_name = str(m.group('name'))
-            meta_value = str(m.group('value'))
+            meta_name = unicode(m.group('name'))
+            meta_value = unicode(m.group('value'))
             meta_opts = {}
             if m.group('opts'):
                 for c in re.finditer(
                         r'data-wiki-(?P<name>[a-z]+)="(?P<value>[^"]+)"',
-                        str(m.group('opts'))):
-                    opt_name = str(c.group('name'))
-                    opt_value = str(c.group('value'))
+                        unicode(m.group('opts'))):
+                    opt_name = unicode(c.group('name'))
+                    opt_value = unicode(c.group('value'))
                     meta_opts[opt_name] = opt_value
 
             resolver = self.resolvers.get(meta_name)
@@ -167,9 +173,10 @@
 
             # Resolve link states.
             def repl1(m):
-                raw_url = str(m.group('url'))
-                abs_raw_url = self.ctx.getAbsoluteUrl(raw_url)
-                url = namespace_title_to_url(abs_raw_url)
+                raw_url = unicode(m.group('url'))
+                raw_url = self.ctx.getAbsoluteUrl(raw_url)
+                url = namespace_title_to_url(raw_url)
+                self.output.out_links.append(url)
                 if self.wiki.pageExists(url):
                     return '<a class="wiki-link" data-wiki-url="%s">' % url
                 return '<a class="wiki-link missing" data-wiki-url="%s">' % url
@@ -196,7 +203,7 @@
         # Check for circular includes.
         include_url = self.ctx.getAbsoluteUrl(opts['url'], self.page.url)
         if include_url in self.ctx.url_trail:
-            raise CircularIncludeError("Circular include detected at: %s" % include_url, self.ctx.url_trail)
+            raise CircularIncludeError(include_url, self.ctx.url_trail)
 
         # Parse the templating parameters.
         parameters = dict(self.parameters)
@@ -208,19 +215,21 @@
             # root page.
             arg_pattern = r"(^|\|)\s*((?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*=)?(?P<value>[^\|]+)"
             for i, m in enumerate(re.finditer(arg_pattern, args)):
-                key = str(m.group('name')).lower()
-                value = str(m.group('value')).strip()
+                key = unicode(m.group('name')).lower()
+                value = unicode(m.group('value')).strip()
                 value = self._renderTemplate(value, parameters, error_url=self.page.url)
                 parameters[key] = value
                 parameters['__args'].append(value)
 
         # Re-run the resolver on the included page to get its final
         # formatted text.
+        current_url_trail = list(self.ctx.url_trail)
         page = self.wiki.getPage(include_url)
-        self.ctx.url_trail.add(page.url)
+        self.ctx.url_trail.append(page.url)
         child = PageResolver(page, self.ctx, parameters)
         child_output = child.run()
         self.output.add(child_output)
+        self.ctx.url_trail = current_url_trail
 
         # Run the templating.
         text = child_output.text
@@ -242,9 +251,9 @@
         for m in re.finditer(arg_pattern, query):
             key = m.group('name').lower()
             if key in parameters:
-                parameters[key] = str(m.group('value'))
+                parameters[key] = unicode(m.group('value'))
             else:
-                meta_query[key] = str(m.group('value'))
+                meta_query[key] = unicode(m.group('value'))
 
         # Find pages that match the query, excluding any page
         # that is in the URL trail.
@@ -319,11 +328,11 @@
         for v in include_meta_values:
             pipe_idx = v.find('|')
             if pipe_idx > 0:
-                incl_url = v[:pipe_idx]
+                abs_url = self.ctx.getAbsoluteUrl(v[:pipe_idx], page.url)
+                included_urls.append(abs_url)
             else:
-                incl_url = v
-            abs_incl_url = get_absolute_url(page.url, incl_url)
-            included_urls.append(abs_incl_url)
+                abs_url = self.ctx.getAbsoluteUrl(v, page.url)
+                included_urls.append(abs_url)
 
         # Recurse into included pages.
         for url in included_urls:
@@ -368,4 +377,3 @@
     if title is None:
         title = value
     return '<a class="wiki-link" data-wiki-url="%s" data-action="edit">%s</a>' % (value, title)
-
--- a/wikked/views.py	Thu Nov 14 15:21:48 2013 -0800
+++ b/wikked/views.py	Sat Nov 16 08:32:57 2013 -0800
@@ -1,5 +1,6 @@
 import time
 import urllib
+import string
 import os.path
 from flask import render_template, abort, request, g, jsonify
 from flask.ext.login import login_user, logout_user, current_user
@@ -10,7 +11,7 @@
 from page import Page, PageData
 from fs import PageNotFoundError
 from formatter import PageFormatter, FormattingContext
-from utils import namespace_title_to_url, get_absolute_url
+from utils import title_to_url
 import scm
 
 
@@ -19,12 +20,7 @@
 CHECK_FOR_WRITE = 2
 
 
-def coerce_redirect(page, redirect):
-    target_url = get_absolute_url(page.url, redirect[0])
-    return namespace_title_to_url(target_url)
-
-
-def coerce_category(page, category):
+def get_category_meta(category):
     result = []
     for item in category:
         result.append({
@@ -34,8 +30,8 @@
     return result
 
 COERCE_META = {
-    'redirect': coerce_redirect,
-    'category': coerce_category
+    'redirect': title_to_url,
+    'category': get_category_meta
     }
 
 
@@ -64,8 +60,6 @@
 
 
 def get_page_or_none(url, force_resolve=False):
-    if url[0] != '/':
-        url = '/' + url
     try:
         page = g.wiki.getPage(url)
         if force_resolve:
@@ -87,6 +81,10 @@
     abort(404)
 
 
+def make_absolute(url):
+    return '/' + string.lstrip(url, '/')
+
+
 def is_page_readable(page, user=current_user):
     return page.wiki.auth.isPageReadable(page, user.get_id())
 
@@ -104,7 +102,7 @@
     meta['url'] = page.url
     for name in COERCE_META:
         if name in meta:
-            meta[name] = COERCE_META[name](page, meta[name])
+            meta[name] = COERCE_META[name](meta[name])
     return meta
 
 
@@ -131,6 +129,13 @@
                         continue
                 except PageNotFoundError:
                     pass
+                    continue
+                page = g.wiki.getPage(f_info.url)
+                try:
+                    if not is_page_readable(page):
+                        continue
+                except PageNotFoundError:
+                    pass
                 rev_data['pages'].append({
                     'url': f_info.url,
                     'action': scm.ACTION_NAMES[f['action']]
@@ -182,7 +187,7 @@
 
 @app.route('/api/list/<path:url>')
 def api_list_pages(url):
-    pages = filter(is_page_readable, g.wiki.getPages(url))
+    pages = filter(is_page_readable, g.wiki.getPages(make_absolute(url)))
     page_metas = [get_page_meta(page) for page in pages]
     result = {'path': url, 'pages': list(page_metas)}
     return make_auth_response(result)
@@ -191,7 +196,7 @@
 @app.route('/api/read/<path:url>')
 def api_read_page(url):
     page = get_page_or_404(
-            url, 
+            make_absolute(url), 
             check_perms=CHECK_FOR_READ,
             force_resolve=('force_resolve' in request.args))
     result = {'meta': get_page_meta(page), 'text': page.text}
@@ -200,7 +205,7 @@
 
 @app.route('/api/raw/<path:url>')
 def api_read_page_raw(url):
-    page = get_page_or_404(url, CHECK_FOR_READ)
+    page = get_page_or_404(make_absolute(url), CHECK_FOR_READ)
     result = {'meta': get_page_meta(page), 'text': page.raw_text}
     return make_auth_response(result)
 
@@ -210,7 +215,7 @@
     rev = request.args.get('rev')
     if rev is None:
         abort(400)
-    page = get_page_or_404(url, CHECK_FOR_READ)
+    page = get_page_or_404(make_absolute(url), CHECK_FOR_READ)
     page_rev = page.getRevision(rev)
     meta = dict(get_page_meta(page, True), rev=rev)
     result = {'meta': meta, 'text': page_rev}
@@ -234,7 +239,7 @@
     rev2 = request.args.get('rev2')
     if rev1 is None:
         abort(400)
-    page = get_page_or_404(url, CHECK_FOR_READ)
+    page = get_page_or_404(make_absolute(url), CHECK_FOR_READ)
     diff = page.getDiff(rev1, rev2)
     if 'raw' not in request.args:
         lexer = get_lexer_by_name('diff')
@@ -250,7 +255,7 @@
 
 @app.route('/api/state/<path:url>')
 def api_get_state(url):
-    page = get_page_or_404(url, CHECK_FOR_READ)
+    page = get_page_or_404(make_absolute(url), CHECK_FOR_READ)
     state = page.getState()
     return make_auth_response({
         'meta': get_page_meta(page, True),
@@ -260,13 +265,13 @@
 
 @app.route('/api/outlinks/<path:url>')
 def api_get_outgoing_links(url):
-    page = get_page_or_404(url, CHECK_FOR_READ)
+    page = get_page_or_404(make_absolute(url), CHECK_FOR_READ)
     links = []
     for link in page.links:
         other = get_page_or_none(link)
         if other is not None:
             links.append({
-                'url': link,
+                'url': other.url,
                 'title': other.title
                 })
         else:
@@ -278,7 +283,7 @@
 
 @app.route('/api/inlinks/<path:url>')
 def api_get_incoming_links(url):
-    page = get_page_or_404(url, CHECK_FOR_READ)
+    page = get_page_or_404(make_absolute(url), CHECK_FOR_READ)
     links = []
     for link in page.getIncomingLinks():
         other = get_page_or_none(link)
@@ -296,6 +301,7 @@
 
 @app.route('/api/edit/<path:url>', methods=['GET', 'POST'])
 def api_edit_page(url):
+    url = make_absolute(url)
     if request.method == 'GET':
         page = get_page_or_none(url)
         if page is None:
@@ -354,7 +360,7 @@
     if 'message' in request.form and len(request.form['message']) > 0:
         message = request.form['message']
 
-    url = '/' + url
+    url = make_absolute(url)
     page_fields = {
             'rev': rev,
             'author': author,
@@ -419,6 +425,7 @@
 
 @app.route('/api/history/<path:url>')
 def api_page_history(url):
+    url = make_absolute(url)
     page = get_page_or_404(url, CHECK_FOR_READ)
     history = page.getHistory()
     hist_data = get_history_data(history)
@@ -494,3 +501,4 @@
         result = {'username': user.username, 'groups': user.groups}
         return make_auth_response(result)
     abort(404)
+
--- a/wikked/wiki.py	Thu Nov 14 15:21:48 2013 -0800
+++ b/wikked/wiki.py	Sat Nov 16 08:32:57 2013 -0800
@@ -122,13 +122,39 @@
         self.db.initDb()
 
         if update:
+            self.update()
+
+    def stop(self):
+        self.db.close()
+
+    def reset(self, cache_ext_data=True):
+        self.logger.debug("Resetting wiki data...")
+        page_infos = self.fs.getPageInfos()
+        fs_pages = FileSystemPage.fromPageInfos(self, page_infos)
+        self.db.reset(fs_pages)
+        self.index.reset(self.getPages())
+
+        if cache_ext_data:
+            self._cacheAllPages()
+
+    def update(self, url=None, cache_ext_data=True):
+        updated_urls = []
+        self.logger.debug("Updating pages...")
+        if url:
+            page_info = self.fs.getPage(url)
+            fs_page = FileSystemPage(self, page_info=page_info)
+            self.db.update([fs_page])
+            updated_urls.append(url)
+            self.index.update([self.getPage(url)])
+        else:
             page_infos = self.fs.getPageInfos()
             fs_pages = FileSystemPage.fromPageInfos(self, page_infos)
             self.db.update(fs_pages)
+            updated_urls += [p.url for p in fs_pages]
             self.index.update(self.getPages())
 
-    def stop(self):
-        self.db.close()
+        if cache_ext_data:
+            self._cacheAllPages()
 
     def getPageUrls(self, subdir=None):
         """ Returns all the page URLs in the wiki, or in the given
@@ -220,6 +246,12 @@
         """
         return self.scm.getHistory()
 
+    def _cacheAllPages(self):
+        self.logger.debug("Caching extended page data...")
+        for url in self.getPageUrls():
+            page = self.getPage(url)
+            page._ensureExtendedData()
+
     def _loadConfig(self, parameters):
         # Merge the default settings with any settings provided by
         # the parameters.