changeset 127:6d5c339af405

Various page formatting fixes: - Convert Mac line-endings before formatting the page. - Half-assed lexer to properly split include parameters.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 24 Nov 2013 19:34:32 -0800
parents 9079fb01abb8
children cfa92a67a31f
files wikked/formatter.py
diffstat 1 files changed, 26 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/wikked/formatter.py	Sun Nov 24 14:14:32 2013 -0800
+++ b/wikked/formatter.py	Sun Nov 24 19:34:32 2013 -0800
@@ -2,11 +2,14 @@
 import os.path
 import re
 import jinja2
+from StringIO import StringIO
 from utils import get_meta_name_and_modifiers, html_escape
 
 
 SINGLE_METAS = ['redirect', 'title']
 
+FILE_FORMAT_REGEX = re.compile(r'\r\n?', re.MULTILINE)
+
 
 class BaseContext(object):
     """ Base context for formatting pages. """
@@ -42,6 +45,7 @@
                 }
 
     def formatText(self, ctx, text):
+        text = FILE_FORMAT_REGEX.sub("\n", text)
         text = self._processWikiSyntax(ctx, text)
         return text
 
@@ -92,7 +96,7 @@
                 flags=re.MULTILINE)
         # Multi-line meta.
         text = re.sub(
-                r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*)^\s*\}\}\s*$',
+                r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*?)^\s*\}\}\s*$',
                 repl,
                 text,
                 flags=re.MULTILINE | re.DOTALL)
@@ -184,27 +188,35 @@
             urls.append(unicode(m.group('url')))
         return urls
 
+    LEXER_STATE_NORMAL = 0
+    LEXER_STATE_LINK = 1
+
     @staticmethod
     def pipeSplit(text):
         res = []
-        current = ''
+        current = StringIO()
+        state = PageFormatter.LEXER_STATE_NORMAL
         env = jinja2.Environment()
         for token in env.lex(text):
             token_type = token[1]
             value = token[2]
             if token_type == 'data':
-                bits = value.split('|')
-                if len(bits) > 1:
-                    current += bits[0]
-                    res.append(current)
-                    for bit in bits[1:-1]:
-                        res.append(bit)
-                    current = bits[-1]
-                else:
-                    current += value
+                for i, c in enumerate(value):
+                    if i > 0:
+                        if c == '[' and value[i - 1] == '[':
+                            state = PageFormatter.LEXER_STATE_LINK
+                        elif c == ']' and value[i - 1] == ']':
+                            state = PageFormatter.LEXER_STATE_NORMAL
+                    if state == PageFormatter.LEXER_STATE_NORMAL and c == '|':
+                        res.append(current.getvalue())
+                        current.close()
+                        current = StringIO()
+                    else:
+                        current.write(c)
             else:
-                current += value
-        if current:
-            res.append(current)
+                current.write(value)
+        last_value = current.getvalue()
+        if last_value:
+            res.append(last_value)
         return res