view wikked/formatter.py @ 130:72e5f588f989

Proper support for meta-listing pages: - Added support for URL endpoints. - Moved all category-related stuff to generic meta handling. - Generic meta-listing pages in the front end. Miscellaneous fixes: - Remove previous "ready" items before caching new ones. - Only un-"ready" pages that have includes and queries. - Better error handling in `DatabasePage` auto-update. - Make sure pages have extended data cached before running queries.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 27 Nov 2013 23:32:45 -0800
parents 6d5c339af405
children 9d22cf4d2412
line wrap: on
line source

import os
import os.path
import re
import jinja2
from StringIO import StringIO
from utils import get_meta_name_and_modifiers, html_escape


SINGLE_METAS = ['redirect', 'title']

FILE_FORMAT_REGEX = re.compile(r'\r\n?', re.MULTILINE)


class BaseContext(object):
    """ Base context for formatting pages. """
    def __init__(self, url):
        self.url = url

    @property
    def urldir(self):
        return os.path.dirname(self.url)


class FormattingContext(BaseContext):
    """ Context for formatting pages. """
    def __init__(self, url):
        BaseContext.__init__(self, url)
        self.out_links = []
        self.meta = {}


class PageFormatter(object):
    """ An object responsible for formatting a page, i.e. rendering
        "stable" content (everything except queries run on the fly,
        like `include` or `query`).
    """
    def __init__(self, wiki):
        self.wiki = wiki
        self.coercers = {
                'include': self._coerceInclude
                }
        self.processors = {
                'include': self._processInclude,
                'query': self._processQuery
                }
        self.endpoints = {
                'url': self._formatUrlLink
                }

    def formatText(self, ctx, text):
        text = FILE_FORMAT_REGEX.sub("\n", text)
        text = self._processWikiSyntax(ctx, text)
        return text

    def _processWikiSyntax(self, ctx, text):
        text = self._processWikiMeta(ctx, text)
        text = self._processWikiLinks(ctx, text)
        return text

    def _processWikiMeta(self, ctx, text):
        def repl(m):
            meta_name = unicode(m.group('name')).lower()
            meta_value = unicode(m.group('value'))

            if meta_value is None or meta_value == '':
                # No value provided: this is a "flag" meta.
                ctx.meta[meta_name] = True
                return ''

            # If we actually have a value, coerce it, if applicable,
            # and get the name without the modifier prefix.
            clean_meta_name, meta_modifier = get_meta_name_and_modifiers(meta_name)
            coerced_meta_value = meta_value
            if clean_meta_name in self.coercers:
                coerced_meta_value = self.coercers[clean_meta_name](ctx, meta_value)

            # Then, set the value on the meta dictionary, or add it to
            # other existing meta values with the same key.
            # TODO: right now we have a hard-coded list of meta names we know
            #       shouldn't be made into an array... make it configurable.
            if meta_name in SINGLE_METAS:
                ctx.meta[meta_name] = coerced_meta_value
            else:
                if meta_name not in ctx.meta:
                    ctx.meta[meta_name] = [coerced_meta_value]
                else:
                    ctx.meta[meta_name].append(coerced_meta_value)

            # Process it, or remove it from the output text.
            if clean_meta_name in self.processors:
                return self.processors[clean_meta_name](ctx, meta_modifier, coerced_meta_value)
            return ''

        # Single line meta.
        text = re.sub(
                r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*)\}\}\s*$',
                repl,
                text,
                flags=re.MULTILINE)
        # Multi-line meta.
        text = re.sub(
                r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*?)^\s*\}\}\s*$',
                repl,
                text,
                flags=re.MULTILINE | re.DOTALL)
        return text

    def _processWikiLinks(self, ctx, text):
        s = self

        # [[endpoint:Something/Blah.ext]]
        def repl1(m):
            endpoint = m.group(1)
            value = m.group(2).strip()
            if endpoint in self.endpoints:
                return self.endpoints[endpoint](ctx, endpoint, value, value)
            return self._formatMetaLink(ctx, endpoint, value, value)
        text = re.sub(r'\[\[(\w[\w\d]+)\:([^\]]+)\]\]', repl1, text)

        # [[display name|endpoint:Something/Whatever]]
        def repl2(m):
            display = m.group(1).strip()
            endpoint = m.group(2)
            value = m.group(3).strip()
            if endpoint in self.endpoints:
                return self.endpoints[endpoint](ctx, endpoint, value, display)
            return self._formatMetaLink(ctx, endpoint, value, display)
        text = re.sub(r'\[\[([^\|\]]+)\|\s*(\w[\w\d]+)\:([^\]]+)\]\]', repl2, text)

        # [[display name|Whatever/PageName]]
        def repl3(m):
            return s._formatWikiLink(ctx, m.group(1).strip(), m.group(2).strip())
        text = re.sub(r'\[\[([^\|\]]+)\|([^\]]+)\]\]', repl3, text)

        # [[Namespace/PageName]]
        def repl4(m):
            a, b = m.group(1, 2)
            url = b if a is None else (a + b)
            return s._formatWikiLink(ctx, b, url)
        text = re.sub(r'\[\[([^\]]+/)?([^\]]+)\]\]', repl4, text)

        return text

    def _coerceInclude(self, ctx, value):
        pipe_idx = value.find('|')
        if pipe_idx < 0:
            return value.strip()
        else:
            url = value[:pipe_idx].strip()
            parameters = value[pipe_idx + 1:].replace('\n', '')
            return url + '|' + parameters

    def _processInclude(self, ctx, modifier, value):
        # Includes are run on the fly, but we preprocess parameters.
        bits = PageFormatter.pipeSplit(value)
        parameters = ''
        included_url = bits[0]
        for p in bits[1:]:
            name = ''
            value = p
            m = re.match('\s*(?P<name>\w[\w\d]*)\s*=(?P<value>.*)', value)
            if m:
                name = unicode(m.group('name'))
                value = unicode(m.group('value'))
            value = html_escape(value.strip())
            parameters += '<div class="wiki-param" data-name="%s">%s</div>' % (name, value)

        url_attr = ' data-wiki-url="%s"' % included_url
        mod_attr = ''
        if modifier:
            mod_attr = ' data-wiki-mod="%s"' % modifier
        return '<div class="wiki-include"%s%s>%s</div>\n' % (url_attr, mod_attr, parameters)

    def _processQuery(self, ctx, modifier, query):
        # Queries are run on the fly.
        # But we pre-process arguments that reference other pages,
        # so that we get the absolute URLs right away.
        processed_args = ''
        arg_pattern = r"(^|\|)\s*(?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*="\
            r"(?P<value>[^\|]+)"
        for m in re.finditer(arg_pattern, query):
            name = unicode(m.group('name')).strip()
            value = unicode(m.group('value')).strip()
            processed_args += '%s=%s' % (name, value)

        mod_attr = ''
        if modifier:
            mod_attr = ' data-wiki-mod="%s"' % modifier
        return '<div class="wiki-query"%s>%s</div>\n' % (mod_attr, processed_args)

    def _formatUrlLink(self, ctx, endpoint, value, display):
        if value.startswith('/'):
            abs_url = '/files' + value
        else:
            abs_url = os.path.join('/files', ctx.urldir, value)
            abs_url = os.path.normpath(abs_url).replace('\\', '/')
        return '<a class="wiki-asset" href="%s">%s</a>' % (abs_url, display)

    def _formatMetaLink(self, ctx, endpoint, value, display):
        ctx.out_links.append("%s:%s" % (endpoint, value))
        return '<a class="wiki-meta-link" data-wiki-meta="%s" data-wiki-value="%s">%s</a>' % (endpoint, value, display)

    def _formatWikiLink(self, ctx, display, url):
        ctx.out_links.append(url)
        return '<a class="wiki-link" data-wiki-url="%s">%s</a>' % (url, display)

    @staticmethod
    def parseWikiLinks(text):
        urls = []
        pattern = r"<a class=\"[^\"]*\" data-wiki-url=\"(?P<url>[^\"]+)\">"
        for m in re.finditer(pattern, text):
            urls.append(unicode(m.group('url')))
        return urls

    LEXER_STATE_NORMAL = 0
    LEXER_STATE_LINK = 1

    @staticmethod
    def pipeSplit(text):
        res = []
        current = StringIO()
        state = PageFormatter.LEXER_STATE_NORMAL
        env = jinja2.Environment()
        for token in env.lex(text):
            token_type = token[1]
            value = token[2]
            if token_type == 'data':
                for i, c in enumerate(value):
                    if i > 0:
                        if c == '[' and value[i - 1] == '[':
                            state = PageFormatter.LEXER_STATE_LINK
                        elif c == ']' and value[i - 1] == ']':
                            state = PageFormatter.LEXER_STATE_NORMAL
                    if state == PageFormatter.LEXER_STATE_NORMAL and c == '|':
                        res.append(current.getvalue())
                        current.close()
                        current = StringIO()
                    else:
                        current.write(c)
            else:
                current.write(value)
        last_value = current.getvalue()
        if last_value:
            res.append(last_value)
        return res