changeset 116:b7950fa699f7

More fixes for page includes: - Try in `/templates` first if the include URL is not absolute. - Fix problems with Jinja character conflicts by using their lexer. - Better pre-conditioning of include arguments in the formatter. - Optimize SQL queries a bit by deferring some properties. `manage update` can now update/re-cache a single page.
author Ludovic Chabant <ludovic@chabant.com>
date Mon, 18 Nov 2013 17:16:24 -0800
parents 1aee72ba7028
children b07cdd68de70
files wikked/db.py wikked/formatter.py wikked/page.py wikked/resolver.py wikked/utils.py wikked/wiki.py
diffstat 6 files changed, 118 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/wikked/db.py	Mon Nov 18 12:35:41 2013 -0800
+++ b/wikked/db.py	Mon Nov 18 17:16:24 2013 -0800
@@ -7,7 +7,7 @@
 from sqlalchemy import (
         and_,
         Column, Boolean, Integer, String, Text, DateTime, ForeignKey)
-from sqlalchemy.orm import relationship, backref
+from sqlalchemy.orm import relationship, backref, defer
 from wikked.web import db
 
 
@@ -185,12 +185,15 @@
         db.session.commit()
 
     def update(self, pages, force=False):
-        self.logger.debug("Updating SQL database...")
         to_update = set()
         already_added = set()
         to_remove = []
+        pages = list(pages)
 
-        db_pages = SQLPage.query.all()
+        self.logger.debug("Updating SQL database...")
+        page_urls = [p.url for p in pages]
+        db_pages = db.session.query(SQLPage).\
+                all()
         for p in db_pages:
             if not os.path.isfile(p.path):
                 # File was deleted.
@@ -199,7 +202,7 @@
                 already_added.add(p.path)
                 path_time = datetime.datetime.fromtimestamp(
                     os.path.getmtime(p.path))
-                if path_time > p.time or force:
+                if path_time > p.time or (force and p.url in page_urls):
                     # File has changed since last index.
                     to_remove.append(p)
                     to_update.add(p.path)
@@ -208,14 +211,6 @@
 
         db.session.commit()
 
-        db_pages = db.session.query(SQLPage).\
-                add_columns('id', 'is_ready').\
-                all()
-        for p in db_pages:
-            p.is_ready = False
-        
-        db.session.commit()
-
         added_db_objs = []
         for p in pages:
             if (p.path in to_update or
@@ -223,12 +218,25 @@
                 added_db_objs.append(self._addPage(p))
 
         db.session.commit()
+
+        if to_remove or added_db_objs:
+            db_pages = db.session.query(SQLPage).\
+                    options(
+                            defer(SQLPage.title),
+                            defer(SQLPage.raw_text),
+                            defer(SQLPage.formatted_text),
+                            defer(SQLPage.ready_text)).\
+                    all()
+            for p in db_pages:
+                p.is_ready = False
+            
+            db.session.commit()
+
         self.logger.debug("...done updating SQL database.")
-
         return [o.id for o in added_db_objs]
 
     def getPageUrls(self, subdir=None):
-        q = db.session.query(SQLPage)
+        q = db.session.query(SQLPage.url)
         if subdir:
             subdir = string.rstrip(subdir, '/') + '/%'
             q = q.filter(SQLPage.url.like(subdir))
--- a/wikked/formatter.py	Mon Nov 18 12:35:41 2013 -0800
+++ b/wikked/formatter.py	Mon Nov 18 17:16:24 2013 -0800
@@ -1,7 +1,8 @@
 import os
 import os.path
 import re
-from utils import get_meta_name_and_modifiers
+import jinja2
+from utils import get_meta_name_and_modifiers, html_escape
 
 
 class BaseContext(object):
@@ -126,14 +127,19 @@
             return url + '|' + parameters
 
     def _processInclude(self, ctx, modifier, value):
-        # Includes are run on the fly.
-        pipe_idx = value.find('|')
-        if pipe_idx < 0:
-            included_url = value
-            parameters = ''
-        else:
-            included_url = value[:pipe_idx]
-            parameters = value[pipe_idx + 1:]
+        # Includes are run on the fly, but we preprocess parameters.
+        bits = PageFormatter.pipeSplit(value)
+        parameters = ''
+        included_url = bits[0]
+        for p in bits[1:]:
+            name = ''
+            value = p
+            m = re.match('\s*(?P<name>\w[\w\d]*)\s*=(?P<value>.*)', value)
+            if m:
+                name = unicode(m.group('name'))
+                value = unicode(m.group('value'))
+            value = html_escape(value.strip())
+            parameters += '<div class="wiki-param" data-name="%s">%s</div>' % (name, value)
 
         url_attr = ' data-wiki-url="%s"' % included_url
         mod_attr = ''
@@ -170,3 +176,28 @@
             urls.append(unicode(m.group('url')))
         return urls
 
+    @staticmethod
+    def pipeSplit(text):
+        res = []
+        current = ''
+        env = jinja2.Environment()
+        for token in env.lex(text):
+            lineno = token[0]
+            token_type = token[1]
+            value = token[2]
+            if token_type == 'data':
+                bits = value.split('|')
+                if len(bits) > 1:
+                    current += bits[0]
+                    res.append(current)
+                    for bit in bits[1:-1]:
+                        res.append(bit)
+                    current = bits[-1]
+                else:
+                    current += value
+            else:
+                current += value
+        if current:
+            res.append(current)
+        return res
+
--- a/wikked/page.py	Mon Nov 18 12:35:41 2013 -0800
+++ b/wikked/page.py	Mon Nov 18 17:16:24 2013 -0800
@@ -32,9 +32,6 @@
         to load things from. Use `FileSystemPage` or `DatabasePage` instead.
     """
     def __init__(self, wiki, url):
-        if url[0] != '/':
-            raise ValueError("Page URLs need to be absolute: " + url)
-
         self.wiki = wiki
         self.url = url
         self._data = None
--- a/wikked/resolver.py	Mon Nov 18 12:35:41 2013 -0800
+++ b/wikked/resolver.py	Mon Nov 18 17:16:24 2013 -0800
@@ -2,7 +2,7 @@
 import os.path
 import jinja2
 from utils import (get_meta_name_and_modifiers, namespace_title_to_url,
-        get_absolute_url)
+        get_absolute_url, html_unescape)
 
 
 class FormatterNotFound(Exception):
@@ -112,7 +112,7 @@
             return self._unsafeRun()
         except Exception as e:
             self.wiki.logger.error("Error resolving page '%s':" % self.page.url)
-            self.wiki.logger.exception(e)
+            self.wiki.logger.exception(unicode(e.message))
             self.output = ResolveOutput(self.page)
             self.output.text = u'<div class="error">%s</div>' % e
             return self.output
@@ -200,8 +200,17 @@
             if not self.ctx.shouldRunMeta(opts['mod']):
                 return ''
 
+        # Get the included page. First, try with a page in the special
+        # `Templates` folder.
+        include_url = opts['url']
+        if include_url[0] != '/':
+            include_url = self.ctx.getAbsoluteUrl('/templates/' + include_url, self.page.url)
+            if not self.wiki.pageExists(include_url):
+                include_url = self.ctx.getAbsoluteUrl(opts['url'], self.page.url)
+        else:
+            include_url = self.ctx.getAbsoluteUrl(include_url, self.page.url)
+
         # Check for circular includes.
-        include_url = self.ctx.getAbsoluteUrl(opts['url'], self.page.url)
         if include_url in self.ctx.url_trail:
             raise CircularIncludeError(include_url, self.ctx.url_trail)
 
@@ -213,18 +222,21 @@
             # We do not, however, run them through the formatting -- this
             # will be done in one pass when everything is gathered on the
             # root page.
-            arg_pattern = r"(^|\|)\s*((?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*=)?(?P<value>[^\|]+)"
+            arg_pattern = r'<div class="wiki-param" data-name="(?P<name>\w[\w\d]*)?">(?P<value>.*?)</div>'
             for i, m in enumerate(re.finditer(arg_pattern, args)):
-                key = unicode(m.group('name')).lower()
                 value = unicode(m.group('value')).strip()
+                value = html_unescape(value)
                 value = self._renderTemplate(value, parameters, error_url=self.page.url)
-                parameters[key] = value
-                parameters['__args'].append(value)
+                if m.group('name'):
+                    key = unicode(m.group('name')).lower()
+                    parameters[key] = value
+                else:
+                    parameters['__args'].append(value)
 
         # Re-run the resolver on the included page to get its final
         # formatted text.
+        page = self.wiki.getPage(include_url)
         current_url_trail = list(self.ctx.url_trail)
-        page = self.wiki.getPage(include_url)
         self.ctx.url_trail.append(page.url)
         child = PageResolver(page, self.ctx, parameters)
         child_output = child.run()
@@ -262,8 +274,12 @@
             if p.url in self.ctx.url_trail:
                 continue
             for key, value in meta_query.iteritems():
-                if self._isPageMatch(p, key, value):
-                    matched_pages.append(p)
+                try:
+                    if self._isPageMatch(p, key, value):
+                        matched_pages.append(p)
+                except Exception as e:
+                    self.wiki.logger.error("Can't query page '%s' for '%s':" % (p.url, self.page.url))
+                    self.wiki.logger.exception(unicode(e.message))
 
         # No match: return the 'empty' template.
         if len(matched_pages) == 0:
@@ -328,11 +344,15 @@
         for v in include_meta_values:
             pipe_idx = v.find('|')
             if pipe_idx > 0:
-                abs_url = self.ctx.getAbsoluteUrl(v[:pipe_idx], page.url)
-                included_urls.append(abs_url)
+                v = v[:pipe_idx]
+
+            if v[0] != '/':
+                include_url = self.ctx.getAbsoluteUrl('/templates/' + v, page.url)
+                if not self.wiki.pageExists(include_url):
+                    include_url = self.ctx.getAbsoluteUrl(v, page.url)
             else:
-                abs_url = self.ctx.getAbsoluteUrl(v, page.url)
-                included_urls.append(abs_url)
+                include_url = self.ctx.getAbsoluteUrl(v, page.url)
+            included_urls.append(include_url)
 
         # Recurse into included pages.
         for url in included_urls:
@@ -377,3 +397,4 @@
     if title is None:
         title = value
     return '<a class="wiki-link" data-wiki-url="%s" data-action="edit">%s</a>' % (value, title)
+
--- a/wikked/utils.py	Mon Nov 18 12:35:41 2013 -0800
+++ b/wikked/utils.py	Mon Nov 18 17:16:24 2013 -0800
@@ -1,6 +1,7 @@
 import re
 import os.path
 import unicodedata
+from xml.sax.saxutils import escape, unescape
 
 
 def get_absolute_url(base_url, url, do_slugify=True):
@@ -66,3 +67,14 @@
         clean_name = name[1:]
     return (clean_name, modifiers)
 
+
+html_escape_table = {'"': "&quot;", "'": "&apos;"}
+html_unescape_table = {v: k for k, v in html_escape_table.items()}
+
+def html_escape(text):
+    return escape(text, html_escape_table)
+
+
+def html_unescape(text):
+    return unescape(text, html_unescape_table)
+
--- a/wikked/wiki.py	Mon Nov 18 12:35:41 2013 -0800
+++ b/wikked/wiki.py	Mon Nov 18 17:16:24 2013 -0800
@@ -132,7 +132,7 @@
         self.index.reset(self.getPages())
 
         if cache_ext_data:
-            self._cacheAllPages()
+            self._cachePages()
 
     def update(self, url=None, cache_ext_data=True):
         updated_urls = []
@@ -140,7 +140,7 @@
         if url:
             page_info = self.fs.getPage(url)
             fs_page = FileSystemPage(self, page_info=page_info)
-            self.db.update([fs_page])
+            self.db.update([fs_page], force=True)
             updated_urls.append(url)
             self.index.update([self.getPage(url)])
         else:
@@ -151,7 +151,7 @@
             self.index.update(self.getPages())
 
         if cache_ext_data:
-            self._cacheAllPages()
+            self._cachePages([url] if url else None)
 
     def getPageUrls(self, subdir=None):
         """ Returns all the page URLs in the wiki, or in the given
@@ -196,9 +196,7 @@
         self.scm.commit([page_info.path], commit_meta)
 
         # Update the DB and index with the new/modified page.
-        fs_page = FileSystemPage(self, page_info=page_info)
-        self.db.update([fs_page])
-        self.index.update([self.getPage(url)])
+        self.update(url, cache_ext_data=False)
 
     def revertPage(self, url, page_fields):
         """ Reverts the page with the given URL to an older revision.
@@ -219,7 +217,7 @@
         rev_text = self.scm.getRevision(path, page_fields['rev'])
 
         # Write to the file and commit.
-        page_info = self.fs.setPage(url, rev_text)
+        self.fs.setPage(url, rev_text)
 
         # Commit to source-control.
         commit_meta = {
@@ -229,9 +227,7 @@
         self.scm.commit([path], commit_meta)
 
         # Update the DB and index with the modified page.
-        fs_page = FileSystemPage(self, page_info=page_info)
-        self.db.update([fs_page])
-        self.index.update([self.getPage(url)])
+        self.update(url, cache_ext_data=False)
 
     def pageExists(self, url):
         """ Returns whether a page exists at the given URL.
@@ -243,9 +239,10 @@
         """
         return self.scm.getHistory()
 
-    def _cacheAllPages(self):
+    def _cachePages(self, only_urls=None):
         self.logger.debug("Caching extended page data...")
-        for url in self.getPageUrls():
+        urls = only_urls or self.getPageUrls()
+        for url in urls:
             page = self.getPage(url)
             page._ensureExtendedData()