Mercurial > wikked
changeset 116:b7950fa699f7
More fixes for page includes:
- Try in `/templates` first if the include URL is not absolute.
- Fix problems with Jinja character conflicts by using their lexer.
- Better pre-conditioning of include arguments in the formatter.
- Optimize SQL queries a bit by deferring some properties.
`manage update` can now update/re-cache a single page.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 18 Nov 2013 17:16:24 -0800 |
parents | 1aee72ba7028 |
children | b07cdd68de70 |
files | wikked/db.py wikked/formatter.py wikked/page.py wikked/resolver.py wikked/utils.py wikked/wiki.py |
diffstat | 6 files changed, 118 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/wikked/db.py Mon Nov 18 12:35:41 2013 -0800 +++ b/wikked/db.py Mon Nov 18 17:16:24 2013 -0800 @@ -7,7 +7,7 @@ from sqlalchemy import ( and_, Column, Boolean, Integer, String, Text, DateTime, ForeignKey) -from sqlalchemy.orm import relationship, backref +from sqlalchemy.orm import relationship, backref, defer from wikked.web import db @@ -185,12 +185,15 @@ db.session.commit() def update(self, pages, force=False): - self.logger.debug("Updating SQL database...") to_update = set() already_added = set() to_remove = [] + pages = list(pages) - db_pages = SQLPage.query.all() + self.logger.debug("Updating SQL database...") + page_urls = [p.url for p in pages] + db_pages = db.session.query(SQLPage).\ + all() for p in db_pages: if not os.path.isfile(p.path): # File was deleted. @@ -199,7 +202,7 @@ already_added.add(p.path) path_time = datetime.datetime.fromtimestamp( os.path.getmtime(p.path)) - if path_time > p.time or force: + if path_time > p.time or (force and p.url in page_urls): # File has changed since last index. to_remove.append(p) to_update.add(p.path) @@ -208,14 +211,6 @@ db.session.commit() - db_pages = db.session.query(SQLPage).\ - add_columns('id', 'is_ready').\ - all() - for p in db_pages: - p.is_ready = False - - db.session.commit() - added_db_objs = [] for p in pages: if (p.path in to_update or @@ -223,12 +218,25 @@ added_db_objs.append(self._addPage(p)) db.session.commit() + + if to_remove or added_db_objs: + db_pages = db.session.query(SQLPage).\ + options( + defer(SQLPage.title), + defer(SQLPage.raw_text), + defer(SQLPage.formatted_text), + defer(SQLPage.ready_text)).\ + all() + for p in db_pages: + p.is_ready = False + + db.session.commit() + self.logger.debug("...done updating SQL database.") - return [o.id for o in added_db_objs] def getPageUrls(self, subdir=None): - q = db.session.query(SQLPage) + q = db.session.query(SQLPage.url) if subdir: subdir = string.rstrip(subdir, '/') + '/%' q = q.filter(SQLPage.url.like(subdir))
--- a/wikked/formatter.py Mon Nov 18 12:35:41 2013 -0800 +++ b/wikked/formatter.py Mon Nov 18 17:16:24 2013 -0800 @@ -1,7 +1,8 @@ import os import os.path import re -from utils import get_meta_name_and_modifiers +import jinja2 +from utils import get_meta_name_and_modifiers, html_escape class BaseContext(object): @@ -126,14 +127,19 @@ return url + '|' + parameters def _processInclude(self, ctx, modifier, value): - # Includes are run on the fly. - pipe_idx = value.find('|') - if pipe_idx < 0: - included_url = value - parameters = '' - else: - included_url = value[:pipe_idx] - parameters = value[pipe_idx + 1:] + # Includes are run on the fly, but we preprocess parameters. + bits = PageFormatter.pipeSplit(value) + parameters = '' + included_url = bits[0] + for p in bits[1:]: + name = '' + value = p + m = re.match('\s*(?P<name>\w[\w\d]*)\s*=(?P<value>.*)', value) + if m: + name = unicode(m.group('name')) + value = unicode(m.group('value')) + value = html_escape(value.strip()) + parameters += '<div class="wiki-param" data-name="%s">%s</div>' % (name, value) url_attr = ' data-wiki-url="%s"' % included_url mod_attr = '' @@ -170,3 +176,28 @@ urls.append(unicode(m.group('url'))) return urls + @staticmethod + def pipeSplit(text): + res = [] + current = '' + env = jinja2.Environment() + for token in env.lex(text): + lineno = token[0] + token_type = token[1] + value = token[2] + if token_type == 'data': + bits = value.split('|') + if len(bits) > 1: + current += bits[0] + res.append(current) + for bit in bits[1:-1]: + res.append(bit) + current = bits[-1] + else: + current += value + else: + current += value + if current: + res.append(current) + return res +
--- a/wikked/page.py Mon Nov 18 12:35:41 2013 -0800 +++ b/wikked/page.py Mon Nov 18 17:16:24 2013 -0800 @@ -32,9 +32,6 @@ to load things from. Use `FileSystemPage` or `DatabasePage` instead. """ def __init__(self, wiki, url): - if url[0] != '/': - raise ValueError("Page URLs need to be absolute: " + url) - self.wiki = wiki self.url = url self._data = None
--- a/wikked/resolver.py Mon Nov 18 12:35:41 2013 -0800 +++ b/wikked/resolver.py Mon Nov 18 17:16:24 2013 -0800 @@ -2,7 +2,7 @@ import os.path import jinja2 from utils import (get_meta_name_and_modifiers, namespace_title_to_url, - get_absolute_url) + get_absolute_url, html_unescape) class FormatterNotFound(Exception): @@ -112,7 +112,7 @@ return self._unsafeRun() except Exception as e: self.wiki.logger.error("Error resolving page '%s':" % self.page.url) - self.wiki.logger.exception(e) + self.wiki.logger.exception(unicode(e.message)) self.output = ResolveOutput(self.page) self.output.text = u'<div class="error">%s</div>' % e return self.output @@ -200,8 +200,17 @@ if not self.ctx.shouldRunMeta(opts['mod']): return '' + # Get the included page. First, try with a page in the special + # `Templates` folder. + include_url = opts['url'] + if include_url[0] != '/': + include_url = self.ctx.getAbsoluteUrl('/templates/' + include_url, self.page.url) + if not self.wiki.pageExists(include_url): + include_url = self.ctx.getAbsoluteUrl(opts['url'], self.page.url) + else: + include_url = self.ctx.getAbsoluteUrl(include_url, self.page.url) + # Check for circular includes. - include_url = self.ctx.getAbsoluteUrl(opts['url'], self.page.url) if include_url in self.ctx.url_trail: raise CircularIncludeError(include_url, self.ctx.url_trail) @@ -213,18 +222,21 @@ # We do not, however, run them through the formatting -- this # will be done in one pass when everything is gathered on the # root page. - arg_pattern = r"(^|\|)\s*((?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*=)?(?P<value>[^\|]+)" + arg_pattern = r'<div class="wiki-param" data-name="(?P<name>\w[\w\d]*)?">(?P<value>.*?)</div>' for i, m in enumerate(re.finditer(arg_pattern, args)): - key = unicode(m.group('name')).lower() value = unicode(m.group('value')).strip() + value = html_unescape(value) value = self._renderTemplate(value, parameters, error_url=self.page.url) - parameters[key] = value - parameters['__args'].append(value) + if m.group('name'): + key = unicode(m.group('name')).lower() + parameters[key] = value + else: + parameters['__args'].append(value) # Re-run the resolver on the included page to get its final # formatted text. + page = self.wiki.getPage(include_url) current_url_trail = list(self.ctx.url_trail) - page = self.wiki.getPage(include_url) self.ctx.url_trail.append(page.url) child = PageResolver(page, self.ctx, parameters) child_output = child.run() @@ -262,8 +274,12 @@ if p.url in self.ctx.url_trail: continue for key, value in meta_query.iteritems(): - if self._isPageMatch(p, key, value): - matched_pages.append(p) + try: + if self._isPageMatch(p, key, value): + matched_pages.append(p) + except Exception as e: + self.wiki.logger.error("Can't query page '%s' for '%s':" % (p.url, self.page.url)) + self.wiki.logger.exception(unicode(e.message)) # No match: return the 'empty' template. if len(matched_pages) == 0: @@ -328,11 +344,15 @@ for v in include_meta_values: pipe_idx = v.find('|') if pipe_idx > 0: - abs_url = self.ctx.getAbsoluteUrl(v[:pipe_idx], page.url) - included_urls.append(abs_url) + v = v[:pipe_idx] + + if v[0] != '/': + include_url = self.ctx.getAbsoluteUrl('/templates/' + v, page.url) + if not self.wiki.pageExists(include_url): + include_url = self.ctx.getAbsoluteUrl(v, page.url) else: - abs_url = self.ctx.getAbsoluteUrl(v, page.url) - included_urls.append(abs_url) + include_url = self.ctx.getAbsoluteUrl(v, page.url) + included_urls.append(include_url) # Recurse into included pages. for url in included_urls: @@ -377,3 +397,4 @@ if title is None: title = value return '<a class="wiki-link" data-wiki-url="%s" data-action="edit">%s</a>' % (value, title) +
--- a/wikked/utils.py Mon Nov 18 12:35:41 2013 -0800 +++ b/wikked/utils.py Mon Nov 18 17:16:24 2013 -0800 @@ -1,6 +1,7 @@ import re import os.path import unicodedata +from xml.sax.saxutils import escape, unescape def get_absolute_url(base_url, url, do_slugify=True): @@ -66,3 +67,14 @@ clean_name = name[1:] return (clean_name, modifiers) + +html_escape_table = {'"': """, "'": "'"} +html_unescape_table = {v: k for k, v in html_escape_table.items()} + +def html_escape(text): + return escape(text, html_escape_table) + + +def html_unescape(text): + return unescape(text, html_unescape_table) +
--- a/wikked/wiki.py Mon Nov 18 12:35:41 2013 -0800 +++ b/wikked/wiki.py Mon Nov 18 17:16:24 2013 -0800 @@ -132,7 +132,7 @@ self.index.reset(self.getPages()) if cache_ext_data: - self._cacheAllPages() + self._cachePages() def update(self, url=None, cache_ext_data=True): updated_urls = [] @@ -140,7 +140,7 @@ if url: page_info = self.fs.getPage(url) fs_page = FileSystemPage(self, page_info=page_info) - self.db.update([fs_page]) + self.db.update([fs_page], force=True) updated_urls.append(url) self.index.update([self.getPage(url)]) else: @@ -151,7 +151,7 @@ self.index.update(self.getPages()) if cache_ext_data: - self._cacheAllPages() + self._cachePages([url] if url else None) def getPageUrls(self, subdir=None): """ Returns all the page URLs in the wiki, or in the given @@ -196,9 +196,7 @@ self.scm.commit([page_info.path], commit_meta) # Update the DB and index with the new/modified page. - fs_page = FileSystemPage(self, page_info=page_info) - self.db.update([fs_page]) - self.index.update([self.getPage(url)]) + self.update(url, cache_ext_data=False) def revertPage(self, url, page_fields): """ Reverts the page with the given URL to an older revision. @@ -219,7 +217,7 @@ rev_text = self.scm.getRevision(path, page_fields['rev']) # Write to the file and commit. - page_info = self.fs.setPage(url, rev_text) + self.fs.setPage(url, rev_text) # Commit to source-control. commit_meta = { @@ -229,9 +227,7 @@ self.scm.commit([path], commit_meta) # Update the DB and index with the modified page. - fs_page = FileSystemPage(self, page_info=page_info) - self.db.update([fs_page]) - self.index.update([self.getPage(url)]) + self.update(url, cache_ext_data=False) def pageExists(self, url): """ Returns whether a page exists at the given URL. @@ -243,9 +239,10 @@ """ return self.scm.getHistory() - def _cacheAllPages(self): + def _cachePages(self, only_urls=None): self.logger.debug("Caching extended page data...") - for url in self.getPageUrls(): + urls = only_urls or self.getPageUrls() + for url in urls: page = self.getPage(url) page._ensureExtendedData()