diff piecrust/sources/autoconfig.py @ 242:f130365568ff

internal: Code reorganization to put less stuff in `sources.base`. Interfaces that sources can implement are in `sources.interfaces`. The default page source is in `sources.default`. The `SimplePageSource` is gone since most subclasses only wanted to do *some* stuff the same, but *lots* of stuff slightly different. I may have to revisit the code to extract exactly the code that's in common.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 18 Feb 2015 18:35:03 -0800
parents f43f19975671
children 3f740928043a
line wrap: on
line diff
--- a/piecrust/sources/autoconfig.py	Mon Feb 16 08:25:08 2015 -0800
+++ b/piecrust/sources/autoconfig.py	Wed Feb 18 18:35:03 2015 -0800
@@ -1,26 +1,32 @@
 import re
 import os
 import os.path
-import glob
 import logging
 from piecrust.configuration import ConfigurationError
-from piecrust.data.iterators import SettingSortIterator
 from piecrust.sources.base import (
-        SimplePageSource, IPreparingSource, SimplePaginationSourceMixin,
-        PageNotFoundError, InvalidFileSystemEndpointError,
-        PageFactory, MODE_CREATING, MODE_PARSING)
+        PageSource, PageFactory, InvalidFileSystemEndpointError)
+from piecrust.sources.default import (
+        filter_page_dirname, filter_page_filename)
+from piecrust.sources.interfaces import IListableSource
+from piecrust.sources.mixins import SimplePaginationSourceMixin
 
 
 logger = logging.getLogger(__name__)
 
 
-class AutoConfigSourceBase(SimplePageSource,
-                           SimplePaginationSourceMixin):
+class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin,
+                           IListableSource):
     """ Base class for page sources that automatically apply configuration
         settings to their generated pages based on those pages' paths.
     """
     def __init__(self, app, name, config):
         super(AutoConfigSourceBase, self).__init__(app, name, config)
+        self.fs_endpoint = config.get('fs_endpoint', name)
+        self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
+        self.supported_extensions = list(
+                app.config.get('site/auto_formats').keys())
+        self.default_auto_format = app.config.get('site/default_auto_format')
+
         self.capture_mode = config.get('capture_mode', 'path')
         if self.capture_mode not in ['path', 'dirname', 'filename']:
             raise ConfigurationError("Capture mode in source '%s' must be "
@@ -28,46 +34,57 @@
                                      name)
 
     def buildPageFactories(self):
+        logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path)
         if not os.path.isdir(self.fs_endpoint_path):
             raise InvalidFileSystemEndpointError(self.name,
                                                  self.fs_endpoint_path)
 
         for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
-            if not filenames:
-                continue
-
             rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path)
+            dirnames[:] = list(filter(filter_page_dirname, dirnames))
 
             # If `capture_mode` is `dirname`, we don't need to recompute it
             # for each filename, so we do it here.
             if self.capture_mode == 'dirname':
-                config = self.extractConfigFragment(rel_dirpath)
+                config = self._extractConfigFragment(rel_dirpath)
 
-            for f in filenames:
+            for f in filter(filter_page_filename, filenames):
                 if self.capture_mode == 'path':
                     path = os.path.join(rel_dirpath, f)
-                    config = self.extractConfigFragment(path)
+                    config = self._extractConfigFragment(path)
                 elif self.capture_mode == 'filename':
-                    config = self.extractConfigFragment(f)
+                    config = self._extractConfigFragment(f)
 
                 fac_path = f
                 if rel_dirpath != '.':
                     fac_path = os.path.join(rel_dirpath, f)
 
-                slug = self.makeSlug(rel_dirpath, f)
+                slug = self._makeSlug(fac_path)
 
                 metadata = {
                         'slug': slug,
                         'config': config}
                 yield PageFactory(self, fac_path, metadata)
 
-    def makeSlug(self, rel_dirpath, filename):
+    def resolveRef(self, ref_path):
+        return os.path.normpath(
+                os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/")))
+
+    def listPath(self, rel_path):
         raise NotImplementedError()
 
-    def extractConfigFragment(self, rel_path):
+    def getDirpath(self, rel_path):
+        return os.path.dirname(rel_path)
+
+    def getBasename(self, rel_path):
+        filename = os.path.basename(rel_path)
+        name, _ = os.path.splitext(filename)
+        return name
+
+    def _makeSlug(self, rel_path):
         raise NotImplementedError()
 
-    def findPagePath(self, metadata, mode):
+    def _extractConfigFragment(self, rel_path):
         raise NotImplementedError()
 
 
@@ -88,13 +105,13 @@
         self.supported_extensions = list(
                 app.config.get('site/auto_formats').keys())
 
-    def makeSlug(self, rel_dirpath, filename):
-        slug, ext = os.path.splitext(filename)
+    def _makeSlug(self, rel_path):
+        slug, ext = os.path.splitext(os.path.basename(rel_path))
         if ext.lstrip('.') not in self.supported_extensions:
             slug += ext
         return slug
 
-    def extractConfigFragment(self, rel_path):
+    def _extractConfigFragment(self, rel_path):
         if rel_path == '.':
             values = []
         else:
@@ -126,10 +143,32 @@
                 if slug == metadata['slug']:
                     path = os.path.join(dirpath, f)
                     rel_path = os.path.relpath(path, self.fs_endpoint_path)
-                    config = self.extractConfigFragment(dirpath)
+                    config = self._extractConfigFragment(rel_path)
                     metadata = {'slug': slug, 'config': config}
                     return rel_path, metadata
 
+    def listPath(self, rel_path):
+        rel_path = rel_path.lstrip('\\/')
+        path = os.path.join(self.fs_endpoint_path, rel_path)
+        names = sorted(os.listdir(path))
+        items = []
+        for name in names:
+            if os.path.isdir(os.path.join(path, name)):
+                if filter_page_dirname(name):
+                    rel_subdir = os.path.join(rel_path, name)
+                    items.append((True, name, rel_subdir))
+            else:
+                if filter_page_filename(name):
+                    cur_rel_path = os.path.join(rel_path, name)
+                    slug = self._makeSlug(cur_rel_path)
+                    config = self._extractConfigFragment(cur_rel_path)
+                    metadata = {'slug': slug, 'config': config}
+                    fac = PageFactory(self, cur_rel_path, metadata)
+
+                    name, _ = os.path.splitext(name)
+                    items.append((False, name, fac))
+        return items
+
 
 class OrderedPageSource(AutoConfigSourceBase):
     """ A page source that assigns an "order" to its pages based on a
@@ -141,70 +180,140 @@
     re_pattern = re.compile(r'(^|/)(?P<num>\d+)_')
 
     def __init__(self, app, name, config):
-        config['capture_mode'] = 'filename'
+        config['capture_mode'] = 'path'
         super(OrderedPageSource, self).__init__(app, name, config)
         self.setting_name = config.get('setting_name', 'order')
         self.default_value = config.get('default_value', 0)
         self.supported_extensions = list(
                 app.config.get('site/auto_formats').keys())
 
-    def makeSlug(self, rel_dirpath, filename):
-        slug, ext = os.path.splitext(filename)
-        if ext.lstrip('.') not in self.supported_extensions:
-            slug += ext
-        slug = self.re_pattern.sub(r'\1', slug)
-        slug = os.path.join(rel_dirpath, slug).replace('\\', '/')
-        if slug.startswith('./'):
-            slug = slug[2:]
-        return slug
-
-    def extractConfigFragment(self, rel_path):
-        m = self.re_pattern.match(rel_path)
-        if m is not None:
-            val = int(m.group('num'))
-        else:
-            val = self.default_value
-        return {self.setting_name: val}
-
     def findPagePath(self, metadata, mode):
         uri_path = metadata.get('slug', '')
-        if uri_path != '':
-            uri_parts = ['*_%s' % p for p in uri_path.split('/')]
-        else:
-            uri_parts = ['*__index']
-        uri_parts.insert(0, self.fs_endpoint_path)
-        path = os.path.join(*uri_parts)
+        if uri_path == '':
+            uri_path = '_index'
 
-        _, ext = os.path.splitext(uri_path)
-        if ext == '':
-            path += '.*'
+        path = self.fs_endpoint_path
+        uri_parts = uri_path.split('/')
+        for i, p in enumerate(uri_parts):
+            if i == len(uri_parts) - 1:
+                # Last part, this is the filename. We need to check for either
+                # the name, or the name with the prefix, but also handle a
+                # possible extension.
+                p_pat = r'(\d+_)?' + re.escape(p)
+
+                _, ext = os.path.splitext(uri_path)
+                if ext == '':
+                    p_pat += r'\.[\w\d]+'
 
-        possibles = glob.glob(path)
-
-        if len(possibles) == 0:
-            return None, None
+                found = False
+                for name in os.listdir(path):
+                    if re.match(p_pat, name):
+                        path = os.path.join(path, name)
+                        found = True
+                        break
+                if not found:
+                    return None, None
+            else:
+                # Find each sub-directory. It can either be a directory with
+                # the name itself, or the name with a number prefix.
+                p_pat = r'(\d+_)?' + re.escape(p)
+                found = False
+                for name in os.listdir(path):
+                    if re.match(p_pat, name):
+                        path = os.path.join(path, name)
+                        found = True
+                        break
+                if not found:
+                    return None, None
 
-        if len(possibles) > 1:
-            raise Exception("More than one path matching: %s" % uri_path)
-
-        path = possibles[0]
         fac_path = os.path.relpath(path, self.fs_endpoint_path)
-
-        _, filename = os.path.split(path)
-        config = self.extractConfigFragment(filename)
+        config = self._extractConfigFragment(fac_path)
         metadata = {'slug': uri_path, 'config': config}
 
         return fac_path, metadata
 
     def getSorterIterator(self, it):
         accessor = self.getSettingAccessor()
-        return SettingSortIterator(it, self.setting_name,
-                                   value_accessor=accessor)
+        return OrderTrailSortIterator(it, self.setting_name + '_trail',
+                                      value_accessor=accessor)
+
+    def listPath(self, rel_path):
+        rel_path = rel_path.lstrip('/')
+        path = self.fs_endpoint_path
+        if rel_path != '':
+            parts = rel_path.split('/')
+            for p in parts:
+                p_pat = r'(\d+_)?' + re.escape(p)
+                for name in os.listdir(path):
+                    if re.match(p_pat, name):
+                        path = os.path.join(path, name)
+                        break
+                else:
+                    raise Exception("No such path: %s" % rel_path)
+
+        items = []
+        names = sorted(os.listdir(path))
+        for name in names:
+            if os.path.isdir(os.path.join(path, name)):
+                if filter_page_dirname(name):
+                    rel_subdir = os.path.join(rel_path, name)
+                    items.append((True, name, rel_subdir))
+            else:
+                if filter_page_filename(name):
+                    slug = self._makeSlug(os.path.join(rel_path, name))
+
+                    fac_path = name
+                    if rel_path != '.':
+                        fac_path = os.path.join(rel_path, name)
+                    fac_path = fac_path.replace('\\', '/')
+
+                    config = self._extractConfigFragment(fac_path)
+                    metadata = {'slug': slug, 'config': config}
+                    fac = PageFactory(self, fac_path, metadata)
+
+                    name, _ = os.path.splitext(name)
+                    items.append((False, name, fac))
+        return items
+
+    def _makeSlug(self, rel_path):
+        slug, ext = os.path.splitext(rel_path)
+        if ext.lstrip('.') not in self.supported_extensions:
+            slug += ext
+        slug = self.re_pattern.sub(r'\1', slug)
+        return slug
+
+    def _extractConfigFragment(self, rel_path):
+        values = []
+        for m in self.re_pattern.finditer(rel_path):
+            val = int(m.group('num'))
+            values.append(val)
+
+        if len(values) == 0:
+            values.append(self.default_value)
+
+        return {
+                self.setting_name: values[-1],
+                self.setting_name + '_trail': values}
 
     def _populateMetadata(self, rel_path, metadata, mode=None):
         _, filename = os.path.split(rel_path)
-        config = self.extractConfigFragment(filename)
+        config = self._extractConfigFragment(filename)
         metadata['config'] = config
         slug = metadata['slug']
         metadata['slug'] = self.re_pattern.sub(r'\1', slug)
 
+
+class OrderTrailSortIterator(object):
+    def __init__(self, it, trail_name, value_accessor):
+        self.it = it
+        self.trail_name = trail_name
+        self.value_accessor = value_accessor
+
+    def __iter__(self):
+        return iter(sorted(self.it, key=self._key_getter))
+
+    def _key_getter(self, item):
+        values = self.value_accessor(item, self.trail_name)
+        key = ''.join(values)
+        return key
+