diff piecrust/sources/autoconfig.py @ 239:f43f19975671

sources: Refactor `autoconfig` source, add `OrderedPageSource`. Also add unit tests.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 15 Feb 2015 22:48:42 -0800
parents 683baa977d97
children f130365568ff
line wrap: on
line diff
--- a/piecrust/sources/autoconfig.py	Sun Feb 15 22:47:46 2015 -0800
+++ b/piecrust/sources/autoconfig.py	Sun Feb 15 22:48:42 2015 -0800
@@ -1,6 +1,10 @@
+import re
 import os
 import os.path
+import glob
 import logging
+from piecrust.configuration import ConfigurationError
+from piecrust.data.iterators import SettingSortIterator
 from piecrust.sources.base import (
         SimplePageSource, IPreparingSource, SimplePaginationSourceMixin,
         PageNotFoundError, InvalidFileSystemEndpointError,
@@ -10,53 +14,197 @@
 logger = logging.getLogger(__name__)
 
 
-class AutoConfigSource(SimplePageSource,
-                       SimplePaginationSourceMixin):
-    SOURCE_NAME = 'autoconfig'
-
+class AutoConfigSourceBase(SimplePageSource,
+                           SimplePaginationSourceMixin):
+    """ Base class for page sources that automatically apply configuration
+        settings to their generated pages based on those pages' paths.
+    """
     def __init__(self, app, name, config):
-        super(AutoConfigSource, self).__init__(app, name, config)
-        self.setting_name = config.get('setting_name', name)
-        self.collapse_single_values = config.get('collapse_single_values', False)
-        self.only_single_values = config.get('only_single_values', False)
+        super(AutoConfigSourceBase, self).__init__(app, name, config)
+        self.capture_mode = config.get('capture_mode', 'path')
+        if self.capture_mode not in ['path', 'dirname', 'filename']:
+            raise ConfigurationError("Capture mode in source '%s' must be "
+                                     "one of: path, dirname, filename" %
+                                     name)
 
     def buildPageFactories(self):
         if not os.path.isdir(self.fs_endpoint_path):
-            raise InvalidFileSystemEndpointError(self.name, self.fs_endpoint_path)
+            raise InvalidFileSystemEndpointError(self.name,
+                                                 self.fs_endpoint_path)
 
         for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
             if not filenames:
                 continue
-            config = self._extractConfigFragment(dirpath)
+
+            rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path)
+
+            # If `capture_mode` is `dirname`, we don't need to recompute it
+            # for each filename, so we do it here.
+            if self.capture_mode == 'dirname':
+                config = self.extractConfigFragment(rel_dirpath)
+
             for f in filenames:
-                slug, ext = os.path.splitext(f)
-                path = os.path.join(dirpath, f)
+                if self.capture_mode == 'path':
+                    path = os.path.join(rel_dirpath, f)
+                    config = self.extractConfigFragment(path)
+                elif self.capture_mode == 'filename':
+                    config = self.extractConfigFragment(f)
+
+                fac_path = f
+                if rel_dirpath != '.':
+                    fac_path = os.path.join(rel_dirpath, f)
+
+                slug = self.makeSlug(rel_dirpath, f)
+
                 metadata = {
                         'slug': slug,
                         'config': config}
-                yield PageFactory(self, path, metadata)
+                yield PageFactory(self, fac_path, metadata)
+
+    def makeSlug(self, rel_dirpath, filename):
+        raise NotImplementedError()
+
+    def extractConfigFragment(self, rel_path):
+        raise NotImplementedError()
+
+    def findPagePath(self, metadata, mode):
+        raise NotImplementedError()
+
 
-    def _extractConfigFragment(self, path):
-        rel_path = os.path.relpath(path, self.fs_endpoint_path)
+class AutoConfigSource(AutoConfigSourceBase):
+    """ Page source that extracts configuration settings from the sub-folders
+        each page resides in. This is ideal for setting tags or categories
+        on pages based on the folders they're in.
+    """
+    SOURCE_NAME = 'autoconfig'
+
+    def __init__(self, app, name, config):
+        config['capture_mode'] = 'dirname'
+        super(AutoConfigSource, self).__init__(app, name, config)
+        self.setting_name = config.get('setting_name', name)
+        self.only_single_values = config.get('only_single_values', False)
+        self.collapse_single_values = config.get('collapse_single_values',
+                                                 False)
+        self.supported_extensions = list(
+                app.config.get('site/auto_formats').keys())
+
+    def makeSlug(self, rel_dirpath, filename):
+        slug, ext = os.path.splitext(filename)
+        if ext.lstrip('.') not in self.supported_extensions:
+            slug += ext
+        return slug
+
+    def extractConfigFragment(self, rel_path):
         if rel_path == '.':
             values = []
         else:
             values = rel_path.split(os.sep)
-        if self.only_single_values and len(values) > 1:
-            raise Exception("Only one folder level is allowed for pages "
-                            "in source '%s'." % self.name)
-        if self.collapse_single_values and len(values) == 1:
-            values = values[0]
+
+        if self.only_single_values:
+            if len(values) > 1:
+                raise Exception("Only one folder level is allowed for pages "
+                                "in source '%s'." % self.name)
+            elif len(values) == 1:
+                values = values[0]
+            else:
+                values = None
+
+        if self.collapse_single_values:
+            if len(values) == 1:
+                values = values[0]
+            elif len(values) == 0:
+                values = None
+
         return {self.setting_name: values}
 
     def findPagePath(self, metadata, mode):
+        # Pages from this source are effectively flattened, so we need to
+        # find pages using a brute-force kinda way.
         for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
             for f in filenames:
                 slug, _ = os.path.splitext(f)
                 if slug == metadata['slug']:
                     path = os.path.join(dirpath, f)
                     rel_path = os.path.relpath(path, self.fs_endpoint_path)
-                    config = self._extractConfigFragment(dirpath)
+                    config = self.extractConfigFragment(dirpath)
                     metadata = {'slug': slug, 'config': config}
                     return rel_path, metadata
 
+
+class OrderedPageSource(AutoConfigSourceBase):
+    """ A page source that assigns an "order" to its pages based on a
+        numerical prefix in their filename. Page iterators will automatically
+        sort pages using that order.
+    """
+    SOURCE_NAME = 'ordered'
+
+    re_pattern = re.compile(r'(^|/)(?P<num>\d+)_')
+
+    def __init__(self, app, name, config):
+        config['capture_mode'] = 'filename'
+        super(OrderedPageSource, self).__init__(app, name, config)
+        self.setting_name = config.get('setting_name', 'order')
+        self.default_value = config.get('default_value', 0)
+        self.supported_extensions = list(
+                app.config.get('site/auto_formats').keys())
+
+    def makeSlug(self, rel_dirpath, filename):
+        slug, ext = os.path.splitext(filename)
+        if ext.lstrip('.') not in self.supported_extensions:
+            slug += ext
+        slug = self.re_pattern.sub(r'\1', slug)
+        slug = os.path.join(rel_dirpath, slug).replace('\\', '/')
+        if slug.startswith('./'):
+            slug = slug[2:]
+        return slug
+
+    def extractConfigFragment(self, rel_path):
+        m = self.re_pattern.match(rel_path)
+        if m is not None:
+            val = int(m.group('num'))
+        else:
+            val = self.default_value
+        return {self.setting_name: val}
+
+    def findPagePath(self, metadata, mode):
+        uri_path = metadata.get('slug', '')
+        if uri_path != '':
+            uri_parts = ['*_%s' % p for p in uri_path.split('/')]
+        else:
+            uri_parts = ['*__index']
+        uri_parts.insert(0, self.fs_endpoint_path)
+        path = os.path.join(*uri_parts)
+
+        _, ext = os.path.splitext(uri_path)
+        if ext == '':
+            path += '.*'
+
+        possibles = glob.glob(path)
+
+        if len(possibles) == 0:
+            return None, None
+
+        if len(possibles) > 1:
+            raise Exception("More than one path matching: %s" % uri_path)
+
+        path = possibles[0]
+        fac_path = os.path.relpath(path, self.fs_endpoint_path)
+
+        _, filename = os.path.split(path)
+        config = self.extractConfigFragment(filename)
+        metadata = {'slug': uri_path, 'config': config}
+
+        return fac_path, metadata
+
+    def getSorterIterator(self, it):
+        accessor = self.getSettingAccessor()
+        return SettingSortIterator(it, self.setting_name,
+                                   value_accessor=accessor)
+
+    def _populateMetadata(self, rel_path, metadata, mode=None):
+        _, filename = os.path.split(rel_path)
+        config = self.extractConfigFragment(filename)
+        metadata['config'] = config
+        slug = metadata['slug']
+        metadata['slug'] = self.re_pattern.sub(r'\1', slug)
+