Mercurial > wikked
changeset 224:d45450a0256a
Various changes/improvements:
* Nicer init/deinit loop in Wiki class.
* Renamed `wk cache` to `wk resolve`, made resolving pages a 1st class API on
the Wiki class.
* The `wk update` command now takes a path instead of an URL.
* Re-implemented auto-updating pages when their file has changed. This added a
new `isCacheValid` function on the DB layer.
* Got rid of exceptions with some meta-properties being arrays and some not.
Now they're all arrays, but there's some helper functions to work with that.
* Remove trailing empty lines from multi-line meta-properties.
* Fixed some issues parsing multi-line meta-properties and queries.
* Fixed some issues resolving queries, especially with custom item templates.
* Better handling of page/wiki permissions.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 10 Mar 2014 16:47:21 -0700 |
parents | a978ecf99408 |
children | ebb12ff21cb2 |
files | wikked/auth.py wikked/commands/manage.py wikked/db/base.py wikked/db/sql.py wikked/formatter.py wikked/page.py wikked/resolver.py wikked/utils.py wikked/views/__init__.py wikked/views/edit.py wikked/web.py wikked/wiki.py |
diffstat | 12 files changed, 223 insertions(+), 116 deletions(-) [+] |
line wrap: on
line diff
--- a/wikked/auth.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/auth.py Mon Mar 10 16:47:21 2014 -0700 @@ -62,17 +62,29 @@ return self._isAllowedForMeta(page, 'writers', username) def _isAllowedForMeta(self, page, meta_name, username): - perm = self._permissions.get(meta_name) - if perm is not None and ( - username is None or username not in perm): - return False - perm = page.meta.get(meta_name) + perm = page.getMeta(meta_name) if perm is not None: - allowed = [r.strip() for r in re.split(r'[ ,;]', perm[0])] - if username is None: - return 'anonymous' in allowed - else: - return '*' in allowed or username in allowed + # Permissions are declared at the page level. + for p in perm: + allowed = [r.strip() for r in re.split(r'[ ,;]', p)] + if username is None and 'anonymous' in allowed: + return True + if username is not None and ( + '*' in allowed or username in allowed): + return True + return False + + perm = self._permissions.get(meta_name) + if perm is not None: + # Permissions are declared at the wiki level. + if username is None and 'anonymous' in perm: + return True + if username is not None and ( + '*' in perm or username in perm): + return True + return False + + # No permissions declared anywhere. return True def _updatePermissions(self, config):
--- a/wikked/commands/manage.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/commands/manage.py Mon Mar 10 16:47:21 2014 -0700 @@ -66,7 +66,7 @@ def setupParser(self, parser): parser.add_argument('--indexonly', - help="Only update the full-text search index", + help="Only reset the full-text search index", action='store_true') def run(self, ctx): @@ -85,34 +85,47 @@ "index with any changed/new files.") def setupParser(self, parser): - parser.add_argument('url', - help="The URL of a page to update specifically", + parser.add_argument('path', + help="The path to a page to update specifically", nargs='?') parser.add_argument('--cache', help="Re-cache all pages", action='store_true') def run(self, ctx): - ctx.wiki.update(ctx.args.url, cache_ext_data=ctx.args.cache) + ctx.wiki.update(path=ctx.args.path, cache_ext_data=ctx.args.cache) + + if ctx.args.debug and ctx.args.path: + page_info = ctx.wiki.fs.getPageInfo(ctx.args.path) + if page_info is None: + logger.debug("No page for path: %s" % ctx.args.path) + logger.debug("Path doesn't exist, or is ignored.") + return + page = ctx.wiki.getPage(page_info.url) + logger.debug("Page [%s]:" % page.url) + logger.debug("--- formatted text ---") + logger.debug(page.getFormattedText()) + logger.debug("--- resolved text ---") + logger.debug(page.text) @register_command -class CacheCommand(WikkedCommand): +class ResolveCommand(WikkedCommand): def __init__(self): - super(CacheCommand, self).__init__() - self.name = 'cache' - self.description = ("Makes sure the extended cache is valid for the " - "whole wiki.") + super(ResolveCommand, self).__init__() + self.name = 'resolve' + self.description = ("Makes sure that the final page text is resolved " + "for all pages.") def setupParser(self, parser): parser.add_argument('-f', '--force', - help="Force cache all pages", + help="Force resolve all pages", action='store_true') parser.add_argument('--parallel', help="Run the operation with multiple workers in parallel", action='store_true') def run(self, ctx): - ctx.wiki._cachePages( - force_resolve=ctx.args.force, + ctx.wiki.resolve( + force=ctx.args.force, parallel=ctx.args.parallel)
--- a/wikked/db/base.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/db/base.py Mon Mar 10 16:47:21 2014 -0700 @@ -50,6 +50,9 @@ def cachePage(self, page): raise NotImplementedError() + def isCacheValid(self, page): + raise NotImplementedError() + def pageExists(self, url=None, path=None): raise NotImplementedError()
--- a/wikked/db/sql.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/db/sql.py Mon Mar 10 16:47:21 2014 -0700 @@ -16,7 +16,6 @@ from sqlalchemy.orm.exc import NoResultFound from wikked.db.base import Database from wikked.page import Page, PageData -from wikked.formatter import SINGLE_METAS logger = logging.getLogger(__name__) @@ -305,6 +304,15 @@ for p in q.all(): yield SQLDatabasePage(self, p, fields) + def isCacheValid(self, page): + db_obj = self.session.query(SQLPage).\ + options(load_only('id', 'path', 'time')).\ + filter(SQLPage.id == page._id).\ + one() + path_time = datetime.datetime.fromtimestamp( + os.path.getmtime(db_obj.path)) + return path_time < db_obj.time + def cachePage(self, page): if not hasattr(page, '_id') or not page._id: raise Exception("Given page '%s' has no `_id` attribute set." % page.url) @@ -458,14 +466,14 @@ if fields is None or 'local_meta' in fields: data.local_meta = {} for m in db_obj.meta: - value = data.local_meta.get(m.name) - if m.name in SINGLE_METAS: - data.local_meta[m.name] = m.value + existing = data.local_meta.get(m.name) + value = m.value + if value == '': + value = True + if existing is None: + data.local_meta[m.name] = [value] else: - if value is None: - data.local_meta[m.name] = [m.value] - else: - data.local_meta[m.name].append(m.value) + existing.append(value) if fields is None or 'local_links' in fields: data.local_links = [l.target_url for l in db_obj.links] @@ -475,7 +483,8 @@ if not db_obj.is_ready: raise Exception( "Requested extended data for page '%s' " - "but data is not cached." % (data.url or data._db_id)) + "but data is not cached in the SQL database." % ( + data.url or data._db_id)) if fields is None or 'text' in fields: data.text = db_obj.ready_text @@ -483,11 +492,14 @@ if fields is None or 'meta' in fields: data.ext_meta = {} for m in db_obj.ready_meta: - value = data.ext_meta.get(m.name) - if value is None: - data.ext_meta[m.name] = [m.value] + existing = data.ext_meta.get(m.name) + value = m.value + if value == '': + value = True + if existing is None: + data.ext_meta[m.name] = [value] else: - data.ext_meta[m.name].append(m.value) + existing.append(value) if fields is None or 'links' in fields: data.ext_links = [l.target_url for l in db_obj.ready_links]
--- a/wikked/formatter.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/formatter.py Mon Mar 10 16:47:21 2014 -0700 @@ -7,8 +7,6 @@ from utils import get_meta_name_and_modifiers, html_escape -SINGLE_METAS = ['redirect', 'title'] - FILE_FORMAT_REGEX = re.compile(r'\r\n?', re.MULTILINE) @@ -71,6 +69,12 @@ ctx.meta[meta_name] = True return '' + # If this is a multi-line meta, strip the trailing new line, + # since it's there because you must put the ending '}}' on + # its own line. + if meta_value[-1] == "\n": + meta_value = meta_value[:-1] + # If we actually have a value, coerce it, if applicable, # and get the name without the modifier prefix. clean_meta_name, meta_modifier = get_meta_name_and_modifiers(meta_name) @@ -80,15 +84,10 @@ # Then, set the value on the meta dictionary, or add it to # other existing meta values with the same key. - # TODO: right now we have a hard-coded list of meta names we know - # shouldn't be made into an array... make it configurable. - if meta_name in SINGLE_METAS: - ctx.meta[meta_name] = coerced_meta_value + if meta_name not in ctx.meta: + ctx.meta[meta_name] = [coerced_meta_value] else: - if meta_name not in ctx.meta: - ctx.meta[meta_name] = [coerced_meta_value] - else: - ctx.meta[meta_name].append(coerced_meta_value) + ctx.meta[meta_name].append(coerced_meta_value) # Process it, or remove it from the output text. if clean_meta_name in self.processors: @@ -103,7 +102,7 @@ flags=re.MULTILINE) # Multi-line meta. text = re.sub( - r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*?)^\s*\}\}\s*$', + r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*?)^[ \t]*\}\}\s*$', repl, text, flags=re.MULTILINE | re.DOTALL) @@ -179,18 +178,19 @@ # Queries are run on the fly. # But we pre-process arguments that reference other pages, # so that we get the absolute URLs right away. - processed_args = '' - arg_pattern = r"(^|\|)\s*(?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*="\ + processed_args = [] + arg_pattern = r"(\A|\|)\s*(?P<name>(__)?[a-zA-Z][a-zA-Z0-9_\-]+)\s*="\ r"(?P<value>[^\|]+)" - for m in re.finditer(arg_pattern, query): + for m in re.finditer(arg_pattern, query, re.MULTILINE): name = unicode(m.group('name')).strip() value = unicode(m.group('value')).strip() - processed_args += '%s=%s' % (name, value) + processed_args.append('%s=%s' % (name, value)) mod_attr = '' if modifier: mod_attr = ' data-wiki-mod="%s"' % modifier - return '<div class="wiki-query"%s>%s</div>\n' % (mod_attr, processed_args) + return '<div class="wiki-query"%s>%s</div>\n' % ( + mod_attr, '|'.join(processed_args)) def _formatUrlLink(self, ctx, endpoint, value, display): if value.startswith('/'):
--- a/wikked/page.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/page.py Mon Mar 10 16:47:21 2014 -0700 @@ -8,6 +8,18 @@ logger = logging.getLogger(__name__) +def get_meta_value(meta, key, first=False): + value = meta.get(key) + if value is not None and isinstance(value, list): + l = len(value) + if l == 0: + return None + if l == 1 or first: + return value[0] + return value + return value + + class PageLoadingError(Exception): """ An exception that can get raised if a page can't be loaded. """ @@ -70,10 +82,6 @@ return self._data.text @property - def meta(self): - return self._data.ext_meta - - @property def links(self): return self._data.ext_links @@ -95,8 +103,15 @@ def getFormattedText(self): return self._data.formatted_text - def getLocalMeta(self): - return self._data.local_meta + def getMeta(self, name=None, first=False): + if name is None: + return self._data.ext_meta + return get_meta_value(self._data.ext_meta, name, first) + + def getLocalMeta(self, name=None, first=False): + if name is None: + return self._data.local_meta + return get_meta_value(self._data.local_meta, name, first) def getLocalLinks(self): return self._data.local_links @@ -133,5 +148,7 @@ filename = os.path.basename(data.path) filename_split = os.path.splitext(filename) data.title = re.sub(r'\-', ' ', filename_split[0]) + elif isinstance(data.title, list): + data.title = data.title[0] return data
--- a/wikked/resolver.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/resolver.py Mon Mar 10 16:47:21 2014 -0700 @@ -3,10 +3,11 @@ import os.path import logging import jinja2 -from utils import ( +from wikked.formatter import PageFormatter, FormattingContext +from wikked.utils import ( PageNotFoundError, get_meta_name_and_modifiers, get_absolute_url, - html_unescape) + flatten_single_metas, html_unescape) logger = logging.getLogger(__name__) @@ -100,11 +101,10 @@ `include` or `query`. """ default_parameters = { - '__header': "<ul>\n", - '__footer': "</ul>\n", - '__item': "<li><a class=\"wiki-link\" data-wiki-url=\"{{url}}\">" + - "{{title}}</a></li>\n", - '__empty': "<p>No page matches the query.</p>\n" + '__header': "\n", + '__footer': "\n", + '__item': "* [[{{title}}|{{url}}]]\n", + '__empty': "No page matches the query.\n" } def __init__(self, page, ctx=None, parameters=None, page_getter=None, @@ -160,7 +160,8 @@ 'title': self.page.title, 'full_title': full_title }, - '__args': [] + '__args': [], + '__xargs': [] } # Create the output object, so it can be referenced and merged @@ -201,6 +202,7 @@ if self.is_root: # Resolve any `{{foo}}` variable references. parameters = dict(self.parameters) + parameters.update(flatten_single_metas(dict(self.page.getLocalMeta()))) final_text = self._renderTemplate(final_text, parameters, error_url=self.page.url) # Resolve link states. @@ -260,12 +262,14 @@ for i, m in enumerate(re.finditer(arg_pattern, args)): value = unicode(m.group('value')).strip() value = html_unescape(value) - value = self._renderTemplate(value, parameters, error_url=self.page.url) + value = self._renderTemplate(value, self.parameters, + error_url=self.page.url) if m.group('name'): key = unicode(m.group('name')).lower() parameters[key] = value else: - parameters['__args'].append(value) + parameters['__xargs'].append(value) + parameters['__args'].append(value) # Re-run the resolver on the included page to get its final # formatted text. @@ -296,7 +300,7 @@ # Parse the query. parameters = dict(self.default_parameters) meta_query = {} - arg_pattern = r"(^|\|)\s*(?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*="\ + arg_pattern = r"(^|\|)\s*(?P<name>(__)?[a-zA-Z][a-zA-Z0-9_\-]+)\s*="\ r"(?P<value>[^\|]+)" for m in re.finditer(arg_pattern, query): key = m.group('name').lower() @@ -308,6 +312,7 @@ # Find pages that match the query, excluding any page # that is in the URL trail. matched_pages = [] + logger.debug("Running page query: %s" % meta_query) for p in self.pages_meta_getter(): if p.url in self.ctx.url_trail: continue @@ -319,26 +324,46 @@ logger.error("Can't query page '%s' for '%s':" % (p.url, self.page.url)) logger.exception(unicode(e.message)) + # We'll have to format things... + fmt_ctx = FormattingContext(self.page.url) + fmt = PageFormatter(self.wiki) + # No match: return the 'empty' template. if len(matched_pages) == 0: - return self._valueOrPageText(parameters['__empty']) + logger.debug("No pages matched query.") + tpl_empty = fmt.formatText( + fmt_ctx, self._valueOrPageText(parameters['__empty'])) + return tpl_empty # Combine normal templates to build the output. - text = self._valueOrPageText(parameters['__header']) + tpl_header = fmt.formatText( + fmt_ctx, self._valueOrPageText(parameters['__header'])) + tpl_footer = fmt.formatText( + fmt_ctx, self._valueOrPageText(parameters['__footer'])) + item_url, tpl_item = self._valueOrPageText(parameters['__item'], with_url=True) + tpl_item = fmt.formatText(fmt_ctx, tpl_item) + + text = tpl_header + add_trailing_line = tpl_item[-1] == "\n" for p in matched_pages: tokens = { 'url': p.url, 'title': p.title} - tokens.update(p.getLocalMeta()) - item_url, item_text = self._valueOrPageText(parameters['__item'], with_url=True) - text += self._renderTemplate(item_text, tokens, error_url=item_url or self.page.url) - text += self._valueOrPageText(parameters['__footer']) + page_local_meta = flatten_single_metas(dict(p.getLocalMeta())) + tokens.update(page_local_meta) + text += self._renderTemplate( + tpl_item, tokens, error_url=item_url or self.page.url) + if add_trailing_line: + # Jinja2 eats trailing new lines... :( + text += "\n" + text += tpl_footer return text def _valueOrPageText(self, value, with_url=False): - if re.match(r'^\[\[.*\]\]$', value): - include_url = value[2:-2] + stripped_value = value.strip() + if re.match(r'^\[\[.*\]\]$', stripped_value): + include_url = stripped_value[2:-2] try: page = self.page_getter(include_url) except PageNotFoundError: @@ -347,6 +372,12 @@ return (page.url, page.text) return page.text + if re.match(r'^__[a-zA-Z][a-zA-Z0-9_\-]+$', stripped_value): + meta = self.page.getLocalMeta(stripped_value) + if with_url: + return (None, meta) + return meta + if with_url: return (None, value) return value
--- a/wikked/utils.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/utils.py Mon Mar 10 16:47:21 2014 -0700 @@ -95,6 +95,18 @@ return (clean_name, modifiers) +def flatten_single_metas(meta): + items = list(meta.iteritems()) + for k, v in items: + if isinstance(v, list): + l = len(v) + if l == 0: + del meta[k] + elif l == 1: + meta[k] = v[0] + return meta + + html_escape_table = {'"': """, "'": "'"} html_unescape_table = {v: k for k, v in html_escape_table.items()}
--- a/wikked/views/__init__.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/views/__init__.py Mon Mar 10 16:47:21 2014 -0700 @@ -3,7 +3,7 @@ from flask import g, abort, jsonify from flask.ext.login import current_user from wikked.fs import PageNotFoundError -from wikked.utils import split_page_url +from wikked.utils import split_page_url, flatten_single_metas from wikked.web import app @@ -39,8 +39,16 @@ except PageNotFoundError: return None - if force_resolve: - page._force_resolve = True + if app.config.get('WIKI_AUTO_RELOAD'): + if not g.wiki.db.isCacheValid(page): + app.logger.info("Page '%s' has changed, reloading." % url) + g.wiki.update(url) + else: + app.logger.debug("Page '%s' is up to date." % url) + elif force_resolve: + g.wiki._cachePages([url], force_resolve=True) + page = g.wiki.getPage(url) + if check_perms == CHECK_FOR_READ and not is_page_readable(page): abort(401) elif check_perms == CHECK_FOR_WRITE and not is_page_writable(page): @@ -69,7 +77,8 @@ if local_only: meta = dict(page.getLocalMeta()) else: - meta = dict(page.meta) + meta = dict(page.getMeta()) + flatten_single_metas(meta) meta['title'] = page.title meta['url'] = urllib.quote(page.url.encode('utf-8')) for name in COERCE_META:
--- a/wikked/views/edit.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/views/edit.py Mon Mar 10 16:47:21 2014 -0700 @@ -32,6 +32,8 @@ data.title = (data.local_meta.get('title') or make_page_title(self.url)) + if isinstance(data.title, list): + data.title = data.title[0] return data
--- a/wikked/web.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/web.py Mon Mar 10 16:47:21 2014 -0700 @@ -21,6 +21,7 @@ app.config.setdefault('SQL_COMMIT_ON_TEARDOWN', False) app.config.setdefault('WIKI_ROOT', None) app.config.setdefault('UPDATE_WIKI_ON_START', True) +app.config.setdefault('WIKI_AUTO_RELOAD', False) app.config.setdefault('SYNCHRONOUS_UPDATE', True)
--- a/wikked/wiki.py Sun Mar 09 20:31:27 2014 -0700 +++ b/wikked/wiki.py Mon Mar 10 16:47:21 2014 -0700 @@ -208,11 +208,9 @@ def start(self, update=False): """ Properly initializes the wiki and all its sub-systems. """ - self.fs.start(self) - self.scm.start(self) - self.index.start(self) - self.db.start(self) - self.auth.start(self) + order = [self.fs, self.scm, self.index, self.db, self.auth] + for o in order: + o.start(self) if update: self.update() @@ -220,19 +218,12 @@ def init(self): """ Creates a new wiki at the specified root directory. """ - self.fs.init(self) - self.scm.init(self) - self.index.init(self) - self.db.init(self) - self.auth.init(self) - + order = [self.fs, self.scm, self.index, self.db, self.auth] + for o in order: + o.init(self) self.start() - - self.fs.postInit() - self.scm.postInit() - self.index.postInit() - self.db.postInit() - self.auth.postInit() + for o in order: + o.postInit() def stop(self): self.db.close() @@ -242,21 +233,37 @@ page_infos = self.fs.getPageInfos() factory = lambda pi: FileSystemPage(self, pi) self.db.reset(page_infos, factory) - self._cachePages(force_resolve=True) + self.resolve(force=True) self.index.reset(self.getPages()) - def update(self, url=None, cache_ext_data=True): + def resolve(self, only_urls=None, force=False, parallel=False): + logger.debug("Resolving pages...") + if only_urls: + page_urls = only_urls + else: + page_urls = self.db.getPageUrls(uncached_only=(not force)) + + num_workers = 4 if parallel else 1 + s = ResolveScheduler(self, page_urls) + s.run(num_workers) + + def update(self, url=None, path=None, cache_ext_data=True): logger.info("Updating pages...") factory = lambda pi: FileSystemPage(self, pi) - if url: - page_info = self.fs.findPage(url) + if url or path: + if url and path: + raise Exception("Can't specify both an URL and a path.") + if path: + page_info = self.fs.getPageInfo(path) + else: + page_info = self.fs.findPageInfo(url) self.db.update([page_info], factory, force=True) - self._cachePages([url]) - self.index.update([self.getPage(url)]) + self.resolve(only_urls=[page_info.url]) + self.index.update([self.getPage(page_info.url)]) else: page_infos = self.fs.getPageInfos() self.db.update(page_infos, factory) - self._cachePages() + self.resolve() self.index.update(self.getPages()) def getPageUrls(self, subdir=None): @@ -347,18 +354,6 @@ def getSpecialFilenames(self): return self.special_filenames - def _cachePages(self, only_urls=None, force_resolve=False, - parallel=False): - logger.debug("Caching extended page data...") - if only_urls: - page_urls = only_urls - else: - page_urls = self.db.getPageUrls(uncached_only=(not force_resolve)) - - num_workers = 4 if parallel else 1 - s = ResolveScheduler(self, page_urls) - s.run(num_workers) - def _createEndpointInfos(self, config): endpoints = {} sections = [s for s in config.sections() if s.startswith('endpoint:')]