changeset 520:bab91fcef741

bake/serve: Improve support for unicode, add slugification options. * Add slugification options for taxonomies. * Sort out some unicode support problems on OSX. * Add tests.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 28 Jul 2015 18:34:21 -0700
parents 9d1a89cd8146
children 39175c48a4ae
files piecrust/rendering.py piecrust/routing.py piecrust/serving/server.py piecrust/sources/default.py piecrust/sources/posts.py tests/bakes/test_unicode.yaml tests/bakes/test_unicode_tags.yaml tests/conftest.py tests/procs/test_dotfiles.yaml tests/servings/test_unicode.yaml tests/servings/test_unicode_tags.yaml tests/tmpfs.py
diffstat 12 files changed, 238 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/rendering.py	Tue Jul 28 18:29:41 2015 -0700
+++ b/piecrust/rendering.py	Tue Jul 28 18:34:21 2015 -0700
@@ -166,15 +166,19 @@
             pass_info = self.current_pass_info
             pass_info.used_source_names.add(source.name)
 
-    def setTaxonomyFilter(self, term_value):
+    def setTaxonomyFilter(self, term_value, *, needs_slugifier=False):
         if not self.page.route.is_taxonomy_route:
             raise Exception("The page for this context is not tied to a "
                             "taxonomy route: %s" % self.uri)
 
+        slugifier = None
+        if needs_slugifier:
+            slugifier = self.page.route.slugifyTaxonomyTerm
         taxonomy = self.app.getTaxonomy(self.page.route.taxonomy_name)
+
         flt = PaginationFilter(value_accessor=page_value_accessor)
         flt.addClause(HasTaxonomyTermsFilterClause(
-                taxonomy, term_value, self.page.route.slugifyTaxonomyTerm))
+                taxonomy, term_value, slugifier))
         self.pagination_filter = flt
 
         is_combination = isinstance(term_value, tuple)
--- a/piecrust/routing.py	Tue Jul 28 18:29:41 2015 -0700
+++ b/piecrust/routing.py	Tue Jul 28 18:34:21 2015 -0700
@@ -2,6 +2,8 @@
 import os.path
 import copy
 import logging
+import urllib.parse
+import unidecode
 
 
 logger = logging.getLogger(__name__)
@@ -32,6 +34,34 @@
         raise NotImplementedError()
 
 
+SLUGIFY_ENCODE = 1
+SLUGIFY_TRANSLITERATE = 2
+SLUGIFY_LOWERCASE = 4
+SLUGIFY_DOT_TO_DASH = 8
+
+
+re_first_dot_to_dash = re.compile(r'^\.+')
+re_dot_to_dash = re.compile(r'\.+')
+
+
+def _parse_slugify_mode(value):
+    mapping = {
+            'encode': SLUGIFY_ENCODE,
+            'transliterate': SLUGIFY_TRANSLITERATE,
+            'lowercase': SLUGIFY_LOWERCASE,
+            'dot_to_dash': SLUGIFY_DOT_TO_DASH}
+    mode = 0
+    for v in value.split(','):
+        f = mapping.get(v.strip())
+        if f is None:
+            if v == 'iconv':
+                raise Exception("'iconv' is not supported as a slugify mode "
+                                "in PieCrust2. Use 'transliterate'.")
+            raise Exception("Unknown slugify flag: %s" % v)
+        mode |= f
+    return mode
+
+
 class Route(object):
     """ Information about a route for a PieCrust application.
         Each route defines the "shape" of an URL and how it maps to
@@ -43,6 +73,8 @@
         self.source_name = cfg['source']
         self.taxonomy_name = cfg.get('taxonomy')
         self.taxonomy_term_sep = cfg.get('term_separator', '/')
+        self.slugify_mode = _parse_slugify_mode(
+                cfg.get('slugify_mode', 'encode,lowercase'))
 
         self.pretty_urls = app.config.get('site/pretty_urls')
         self.trailing_slash = app.config.get('site/trailing_slash')
@@ -185,7 +217,7 @@
                 else:
                     uri = base_uri + ext
 
-        uri = self.uri_root + uri
+        uri = urllib.parse.quote(self.uri_root + uri)
 
         if self.show_debug_info:
             uri += '?!debug'
@@ -207,9 +239,19 @@
         return all_values
 
     def slugifyTaxonomyTerm(self, term):
-        #TODO: add options for transliterating and combining terms.
         if isinstance(term, tuple):
-            return '/'.join(term)
+            return self.taxonomy_term_sep.join(
+                    map(self._slugifyOne, term))
+        return self._slugifyOne(term)
+
+    def _slugifyOne(self, term):
+        if self.slugify_mode & SLUGIFY_TRANSLITERATE:
+            term = unidecode.unidecode(term)
+        if self.slugify_mode & SLUGIFY_LOWERCASE:
+            term = term.lower()
+        if self.slugify_mode & SLUGIFY_DOT_TO_DASH:
+            term = re_first_dot_to_dash.sub('', term)
+            term = re_dot_to_dash.sub('-', term)
         return term
 
     def _uriFormatRepl(self, m):
--- a/piecrust/serving/server.py	Tue Jul 28 18:29:41 2015 -0700
+++ b/piecrust/serving/server.py	Tue Jul 28 18:34:21 2015 -0700
@@ -345,7 +345,7 @@
                                           force_render=True)
         if taxonomy_info is not None:
             _, tax_terms = taxonomy_info
-            render_ctx.setTaxonomyFilter(tax_terms)
+            render_ctx.setTaxonomyFilter(tax_terms, needs_slugifier=True)
 
         # See if this page is known to use sources. If that's the case,
         # just don't use cached rendered segments for that page (but still
--- a/piecrust/sources/default.py	Tue Jul 28 18:29:41 2015 -0700
+++ b/piecrust/sources/default.py	Tue Jul 28 18:34:21 2015 -0700
@@ -1,6 +1,6 @@
-import os
 import os.path
 import logging
+from piecrust import osutil
 from piecrust.sources.base import (
         PageFactory, PageSource, InvalidFileSystemEndpointError,
         MODE_CREATING)
@@ -41,13 +41,14 @@
             raise InvalidFileSystemEndpointError(self.name,
                                                  self.fs_endpoint_path)
 
-        for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
+        for dirpath, dirnames, filenames in osutil.walk(self.fs_endpoint_path):
             rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path)
             dirnames[:] = list(filter(filter_page_dirname, dirnames))
             for f in sorted(filter(filter_page_filename, filenames)):
                 fac_path = f
                 if rel_dirpath != '.':
                     fac_path = os.path.join(rel_dirpath, f)
+
                 slug = self._makeSlug(fac_path)
                 metadata = {'slug': slug}
                 fac_path = fac_path.replace('\\', '/')
@@ -95,7 +96,7 @@
     def listPath(self, rel_path):
         rel_path = rel_path.lstrip('\\/')
         path = os.path.join(self.fs_endpoint_path, rel_path)
-        names = sorted(os.listdir(path))
+        names = sorted(osutil.listdir(path))
         items = []
         for name in names:
             if os.path.isdir(os.path.join(path, name)):
--- a/piecrust/sources/posts.py	Tue Jul 28 18:29:41 2015 -0700
+++ b/piecrust/sources/posts.py	Tue Jul 28 18:34:21 2015 -0700
@@ -1,9 +1,9 @@
 import os
 import os.path
 import re
-import glob
 import logging
 import datetime
+from piecrust import osutil
 from piecrust.sources.base import (
         PageSource, InvalidFileSystemEndpointError, PageFactory,
         MODE_CREATING, MODE_PARSING)
@@ -85,7 +85,7 @@
         if needs_recapture:
             if mode == MODE_CREATING:
                 raise ValueError("Not enough information to find a post path.")
-            possible_paths = glob.glob(path)
+            possible_paths = osutil.glob(path)
             if len(possible_paths) != 1:
                 return None
             path = possible_paths[0]
@@ -183,7 +183,7 @@
             return
         logger.debug("Scanning for posts (flat) in: %s" % self.fs_endpoint_path)
         pattern = re.compile(r'(\d{4})-(\d{2})-(\d{2})_(.*)\.(\w+)$')
-        _, __, filenames = next(os.walk(self.fs_endpoint_path))
+        _, __, filenames = next(osutil.walk(self.fs_endpoint_path))
         for f in filenames:
             match = pattern.match(f)
             if match is None:
@@ -212,7 +212,7 @@
         logger.debug("Scanning for posts (shallow) in: %s" % self.fs_endpoint_path)
         year_pattern = re.compile(r'(\d{4})$')
         file_pattern = re.compile(r'(\d{2})-(\d{2})_(.*)\.(\w+)$')
-        _, year_dirs, __ = next(os.walk(self.fs_endpoint_path))
+        _, year_dirs, __ = next(osutil.walk(self.fs_endpoint_path))
         year_dirs = [d for d in year_dirs if year_pattern.match(d)]
         for yd in year_dirs:
             if year_pattern.match(yd) is None:
@@ -222,7 +222,7 @@
             year = int(yd)
             year_dir = os.path.join(self.fs_endpoint_path, yd)
 
-            _, __, filenames = next(os.walk(year_dir))
+            _, __, filenames = next(osutil.walk(year_dir))
             for f in filenames:
                 match = file_pattern.match(f)
                 if match is None:
@@ -252,19 +252,19 @@
         year_pattern = re.compile(r'(\d{4})$')
         month_pattern = re.compile(r'(\d{2})$')
         file_pattern = re.compile(r'(\d{2})_(.*)\.(\w+)$')
-        _, year_dirs, __ = next(os.walk(self.fs_endpoint_path))
+        _, year_dirs, __ = next(osutil.walk(self.fs_endpoint_path))
         year_dirs = [d for d in year_dirs if year_pattern.match(d)]
         for yd in year_dirs:
             year = int(yd)
             year_dir = os.path.join(self.fs_endpoint_path, yd)
 
-            _, month_dirs, __ = next(os.walk(year_dir))
+            _, month_dirs, __ = next(osutil.walk(year_dir))
             month_dirs = [d for d in month_dirs if month_pattern.match(d)]
             for md in month_dirs:
                 month = int(md)
                 month_dir = os.path.join(year_dir, md)
 
-                _, __, filenames = next(os.walk(month_dir))
+                _, __, filenames = next(osutil.walk(month_dir))
                 for f in filenames:
                     match = file_pattern.match(f)
                     if match is None:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/bakes/test_unicode.yaml	Tue Jul 28 18:34:21 2015 -0700
@@ -0,0 +1,21 @@
+---
+in:
+    posts/2010-01-01_déjà-des-accents.md: 'POST URL: {{page.url}}'
+    pages/présentation.md: 'PAGE URL: {{page.url}}'
+    pages/_index.md: ''
+out:
+    '2010':
+        '01':
+            '01':
+                déjà-des-accents.html: 'POST URL: /2010/01/01/d%C3%A9j%C3%A0-des-accents.html'
+    présentation.html: 'PAGE URL: /pr%C3%A9sentation.html'
+    index.html: ''
+---
+in:
+    pages/special/Это тэг.md: 'PAGE URL: {{page.url}}'
+    pages/_index.md: ''
+out:
+    special:
+        Это тэг.html: 'PAGE URL: /special/%D0%AD%D1%82%D0%BE%20%D1%82%D1%8D%D0%B3.html'
+    index.html: ''
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/bakes/test_unicode_tags.yaml	Tue Jul 28 18:34:21 2015 -0700
@@ -0,0 +1,44 @@
+---
+in:
+    posts/2015-03-01_post01.md: |
+      ---
+      title: Post 01
+      tags: [étrange]
+      ---
+    posts/2015-03-02_post02.md: |
+      ---
+      title: Post 02
+      tags: [étrange, sévère]
+      ---
+    pages/_tag.md: |
+      Pages in {{pctagurl(tag)}}
+      {% for p in pagination.posts -%}
+      {{p.title}}
+      {% endfor %}
+    pages/_index.md: ''
+outfiles:
+    tag/étrange.html: |
+      Pages in /tag/%C3%A9trange.html
+      Post 02
+      Post 01
+    tag/sévère.html: |
+      Pages in /tag/s%C3%A9v%C3%A8re.html
+      Post 02
+---
+in:
+    posts/2015-03-01_post01.md: |
+      ---
+      title: Post 01
+      tags: [Это тэг]
+      ---
+    pages/_tag.md: |
+      Pages in {{pctagurl(tag)}}
+      {% for p in pagination.posts -%}
+      {{p.title}}
+      {% endfor %}
+    pages/_index.md: ''
+outfiles:
+    tag/это тэг.html: |
+      Pages in /tag/%D1%8D%D1%82%D0%BE%20%D1%82%D1%8D%D0%B3.html
+      Post 01
+
--- a/tests/conftest.py	Tue Jul 28 18:29:41 2015 -0700
+++ b/tests/conftest.py	Tue Jul 28 18:34:21 2015 -0700
@@ -7,6 +7,7 @@
 import pytest
 import yaml
 import colorama
+from werkzeug.exceptions import HTTPException
 from piecrust.app import apply_variant_and_values
 from piecrust.configuration import merge_dicts
 from .mockutil import mock_fs, mock_fs_scope
@@ -323,6 +324,19 @@
     def reportinfo(self):
         return self.fspath, 0, "serve: %s" % self.name
 
+    def repr_failure(self, excinfo):
+        from piecrust.serving.server import MultipleNotFound
+        if isinstance(excinfo.value, MultipleNotFound):
+            return '\n'.join(
+                    ["HTTP error 404 returned:",
+                     excinfo.value.description] +
+                    [e.description for e in excinfo.value._nfes])
+        elif isinstance(excinfo.value, HTTPException):
+            return '\n'.join(
+                    ["HTTP error %s returned:" % excinfo.value.code,
+                     excinfo.value.description])
+        return super(ServeTestItem, self).repr_failure(excinfo)
+
 
 class ServeTestFile(YamlTestFileBase):
     __item_class__ = ServeTestItem
@@ -372,11 +386,15 @@
         extra_left = set(left.keys()) - set(right.keys())
         if extra_left:
             return (["Left contains more items: "] +
-                    ['- %s/%s' % (ctx.path, k) for k in extra_left])
+                    ['- %s/%s' % (ctx.path, k) for k in extra_left] +
+                    ['Left:', ', '.join(left.keys())] +
+                    ['Right:', ', '.join(right.keys())])
         extra_right = set(right.keys()) - set(left.keys())
         if extra_right:
             return (["Right contains more items: "] +
-                    ['- %s/%s' % (ctx.path, k) for k in extra_right])
+                    ['- %s/%s' % (ctx.path, k) for k in extra_right] +
+                    ['Left:', ', '.join(left.keys())] +
+                    ['Right:', ', '.join(right.keys())])
         return ["Unknown difference"]
 
     for key in left.keys():
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/procs/test_dotfiles.yaml	Tue Jul 28 18:34:21 2015 -0700
@@ -0,0 +1,8 @@
+---
+in:
+    assets/something.txt: Foo bar
+    assets/.htaccess: "# Apache config"
+outfiles:
+    something.txt: Foo bar
+    .htaccess: "# Apache config"
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/servings/test_unicode.yaml	Tue Jul 28 18:34:21 2015 -0700
@@ -0,0 +1,16 @@
+---
+url: /pr%C3%A9sentation.html
+in:
+    pages/présentation.md: 'PAGE URL: {{page.url}}'
+out: 'PAGE URL: /pr%C3%A9sentation.html'
+---
+url: /2010/01/01/d%C3%A9j%C3%A0-des-accents.html
+in:
+    posts/2010-01-01_déjà-des-accents.md: 'POST URL: {{page.url}}'
+out: 'POST URL: /2010/01/01/d%C3%A9j%C3%A0-des-accents.html'
+---
+url: /special/%D0%AD%D1%82%D0%BE%20%D1%82%D1%8D%D0%B3.html
+in:
+    pages/special/Это тэг.md: 'PAGE URL: {{page.url}}'
+out: 'PAGE URL: /special/%D0%AD%D1%82%D0%BE%20%D1%82%D1%8D%D0%B3.html'
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/servings/test_unicode_tags.yaml	Tue Jul 28 18:34:21 2015 -0700
@@ -0,0 +1,63 @@
+---
+url: /tag/%C3%A9trange.html
+in:
+    posts/2015-03-01_post01.md: |
+      ---
+      title: Post 01
+      tags: [étrange]
+      ---
+    posts/2015-03-02_post02.md: |
+      ---
+      title: Post 02
+      tags: [étrange, sévère]
+      ---
+    pages/_tag.md: |
+      Pages in {{pctagurl(tag)}}
+      {% for p in pagination.posts -%}
+      {{p.title}}
+      {% endfor %}
+    pages/_index.md: ''
+out: |
+    Pages in /tag/%C3%A9trange.html
+    Post 02
+    Post 01
+---
+url: /tag/s%C3%A9v%C3%A8re.html
+in:
+    posts/2015-03-01_post01.md: |
+      ---
+      title: Post 01
+      tags: [étrange]
+      ---
+    posts/2015-03-02_post02.md: |
+      ---
+      title: Post 02
+      tags: [étrange, sévère]
+      ---
+    pages/_tag.md: |
+      Pages in {{pctagurl(tag)}}
+      {% for p in pagination.posts -%}
+      {{p.title}}
+      {% endfor %}
+    pages/_index.md: ''
+out: |
+    Pages in /tag/s%C3%A9v%C3%A8re.html
+    Post 02
+---
+url: /tag/%D1%8D%D1%82%D0%BE%20%D1%82%D1%8D%D0%B3.html
+in:
+    posts/2015-03-01_post01.md: |
+      ---
+      title: Post 01
+      tags: [Это тэг]
+      ---
+    pages/_tag.md: |
+      Pages in {{pctagurl(tag)}}
+      {% for p in pagination.posts -%}
+      {{p.title}}
+      {% endfor %}
+    pages/_index.md: ''
+out: |
+    Pages in /tag/%D1%8D%D1%82%D0%BE%20%D1%82%D1%8D%D0%B3.html
+    Post 01
+
--- a/tests/tmpfs.py	Tue Jul 28 18:29:41 2015 -0700
+++ b/tests/tmpfs.py	Tue Jul 28 18:34:21 2015 -0700
@@ -2,6 +2,7 @@
 import os.path
 import shutil
 import random
+import unicodedata
 from .basefs import TestFileSystemBase
 
 
@@ -37,6 +38,7 @@
             return fp.read()
 
     def _getStructureRecursive(self, target, parent, cur):
+        cur = unicodedata.normalize('NFC', cur)
         full_cur = os.path.join(parent, cur)
         if os.path.isdir(full_cur):
             e = {}