diff piecrust/sources/autoconfig.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents 58ebf50235a5
children f070a4fc033c
line wrap: on
line diff
--- a/piecrust/sources/autoconfig.py	Sat Apr 29 21:42:22 2017 -0700
+++ b/piecrust/sources/autoconfig.py	Wed May 17 00:11:48 2017 -0700
@@ -3,30 +3,19 @@
 import os.path
 import logging
 from piecrust.configuration import ConfigurationError
-from piecrust.routing import RouteParameter
-from piecrust.sources.base import (
-        PageSource, PageFactory, InvalidFileSystemEndpointError)
-from piecrust.sources.default import (
-        filter_page_dirname, filter_page_filename)
-from piecrust.sources.interfaces import IListableSource
-from piecrust.sources.mixins import SimplePaginationSourceMixin
+from piecrust.sources.base import ContentItem
+from piecrust.sources.default import DefaultContentSource
 
 
 logger = logging.getLogger(__name__)
 
 
-class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin,
-                           IListableSource):
-    """ Base class for page sources that automatically apply configuration
+class AutoConfigContentSourceBase(DefaultContentSource):
+    """ Base class for content sources that automatically apply configuration
         settings to their generated pages based on those pages' paths.
     """
     def __init__(self, app, name, config):
-        super(AutoConfigSourceBase, self).__init__(app, name, config)
-        self.fs_endpoint = config.get('fs_endpoint', name)
-        self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
-        self.supported_extensions = list(
-                app.config.get('site/auto_formats').keys())
-        self.default_auto_format = app.config.get('site/default_auto_format')
+        DefaultContentSource.__init__(app, name, config)
 
         self.capture_mode = config.get('capture_mode', 'path')
         if self.capture_mode not in ['path', 'dirname', 'filename']:
@@ -34,91 +23,34 @@
                                      "one of: path, dirname, filename" %
                                      name)
 
-    def getSupportedRouteParameters(self):
-        return [
-            RouteParameter('slug', RouteParameter.TYPE_PATH)]
-
-    def buildPageFactories(self):
-        logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path)
-        if not os.path.isdir(self.fs_endpoint_path):
-            raise InvalidFileSystemEndpointError(self.name,
-                                                 self.fs_endpoint_path)
-
-        for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
-            rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path)
-            dirnames[:] = list(filter(filter_page_dirname, dirnames))
+    def _finalizeContent(self, parent_group, items, groups):
+        DefaultContentSource._finalizeContent(parent_group, items, groups)
 
-            # If `capture_mode` is `dirname`, we don't need to recompute it
-            # for each filename, so we do it here.
-            if self.capture_mode == 'dirname':
-                config = self._extractConfigFragment(rel_dirpath)
-
-            for f in filter(filter_page_filename, filenames):
-                if self.capture_mode == 'path':
-                    path = os.path.join(rel_dirpath, f)
-                    config = self._extractConfigFragment(path)
-                elif self.capture_mode == 'filename':
-                    config = self._extractConfigFragment(f)
-
-                fac_path = f
-                if rel_dirpath != '.':
-                    fac_path = os.path.join(rel_dirpath, f)
-
-                slug = self._makeSlug(fac_path)
-
-                metadata = {
-                        'slug': slug,
-                        'config': config}
-                yield PageFactory(self, fac_path, metadata)
+        # If `capture_mode` is `dirname`, we don't need to recompute it
+        # for each filename, so we do it here.
+        if self.capture_mode == 'dirname':
+            rel_dirpath = os.path.relpath(parent_group.spec,
+                                          self.fs_endpoint_path)
+            config = self._extractConfigFragment(rel_dirpath)
 
-    def resolveRef(self, ref_path):
-        path = os.path.normpath(
-                os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/")))
-
-        config = None
-        if self.capture_mode == 'dirname':
-            config = self._extractConfigFragment(os.path.dirname(ref_path))
-        elif self.capture_mode == 'path':
-            config = self._extractConfigFragment(ref_path)
-        elif self.capture_mode == 'filename':
-            config = self._extractConfigFragment(os.path.basename(ref_path))
-
-        slug = self._makeSlug(ref_path)
-        metadata = {'slug': slug, 'config': config}
-        return path, metadata
-
-    def listPath(self, rel_path):
-        raise NotImplementedError()
+        for i in items:
+            # Compute the confif for the other capture modes.
+            if self.capture_mode == 'path':
+                rel_path = os.path.relpath(i.spec, self.fs_endpoint_path)
+                config = self._extractConfigFragment(rel_path)
+            elif self.capture_mode == 'filename':
+                fname = os.path.basename(i.spec)
+                config = self._extractConfigFragment(fname)
 
-    def getDirpath(self, rel_path):
-        return os.path.dirname(rel_path)
-
-    def getBasename(self, rel_path):
-        filename = os.path.basename(rel_path)
-        name, _ = os.path.splitext(filename)
-        return name
-
-    def _makeSlug(self, rel_path):
-        slug = rel_path.replace('\\', '/')
-        slug = self._cleanSlug(slug)
-        slug, ext = os.path.splitext(slug)
-        if ext.lstrip('.') not in self.supported_extensions:
-            slug += ext
-        if slug.startswith('./'):
-            slug = slug[2:]
-        if slug == '_index':
-            slug = ''
-        return slug
-
-    def _cleanSlug(self, slug):
-        return slug
+            # Set the config on the content item's metadata.
+            i.metadata.setdefault('config', {}).update(config)
 
     def _extractConfigFragment(self, rel_path):
         raise NotImplementedError()
 
 
-class AutoConfigSource(AutoConfigSourceBase):
-    """ Page source that extracts configuration settings from the sub-folders
+class AutoConfigContentSource(AutoConfigContentSourceBase):
+    """ Content source that extracts configuration settings from the sub-folders
         each page resides in. This is ideal for setting tags or categories
         on pages based on the folders they're in.
     """
@@ -126,13 +58,12 @@
 
     def __init__(self, app, name, config):
         config['capture_mode'] = 'dirname'
-        super(AutoConfigSource, self).__init__(app, name, config)
+        AutoConfigContentSourceBase.__init__(app, name, config)
+
         self.setting_name = config.get('setting_name', name)
         self.only_single_values = config.get('only_single_values', False)
         self.collapse_single_values = config.get('collapse_single_values',
                                                  False)
-        self.supported_extensions = list(
-                app.config.get('site/auto_formats').keys())
 
     def _extractConfigFragment(self, rel_path):
         if rel_path == '.':
@@ -157,48 +88,27 @@
 
         return {self.setting_name: values}
 
-    def findPageFactory(self, metadata, mode):
+    def findContent(self, route_params):
         # Pages from this source are effectively flattened, so we need to
         # find pages using a brute-force kinda way.
+        route_slug = route_params.get('slug', '')
+        if not route_slug:
+            route_slug = '_index'
+
         for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
             for f in filenames:
                 slug, _ = os.path.splitext(f)
-                if slug == metadata['slug']:
+                if slug == route_slug:
                     path = os.path.join(dirpath, f)
                     rel_path = os.path.relpath(path, self.fs_endpoint_path)
                     config = self._extractConfigFragment(rel_path)
                     metadata = {'slug': slug, 'config': config}
-                    return PageFactory(self, rel_path, metadata)
+                    return ContentItem(path, metadata)
         return None
 
-    def listPath(self, rel_path):
-        rel_path = rel_path.lstrip('\\/')
-        path = os.path.join(self.fs_endpoint_path, rel_path)
-        names = sorted(os.listdir(path))
-        items = []
-        for name in names:
-            if os.path.isdir(os.path.join(path, name)):
-                if filter_page_dirname(name):
-                    rel_subdir = os.path.join(rel_path, name)
-                    items.append((True, name, rel_subdir))
-            else:
-                if filter_page_filename(name):
-                    cur_rel_path = os.path.join(rel_path, name)
-                    slug = self._makeSlug(cur_rel_path)
-                    config = self._extractConfigFragment(cur_rel_path)
-                    metadata = {'slug': slug, 'config': config}
-                    fac = PageFactory(self, cur_rel_path, metadata)
 
-                    name, _ = os.path.splitext(name)
-                    items.append((False, name, fac))
-        return items
-
-    def _cleanSlug(self, slug):
-        return os.path.basename(slug)
-
-
-class OrderedPageSource(AutoConfigSourceBase):
-    """ A page source that assigns an "order" to its pages based on a
+class OrderedContentSource(AutoConfigContentSourceBase):
+    """ A content source that assigns an "order" to its pages based on a
         numerical prefix in their filename. Page iterators will automatically
         sort pages using that order.
     """
@@ -208,14 +118,13 @@
 
     def __init__(self, app, name, config):
         config['capture_mode'] = 'path'
-        super(OrderedPageSource, self).__init__(app, name, config)
+        AutoConfigContentSourceBase.__init__(app, name, config)
+
         self.setting_name = config.get('setting_name', 'order')
         self.default_value = config.get('default_value', 0)
-        self.supported_extensions = list(
-                app.config.get('site/auto_formats').keys())
 
-    def findPageFactory(self, metadata, mode):
-        uri_path = metadata.get('slug', '')
+    def findContent(self, route_params):
+        uri_path = route_params.get('slug', '')
         if uri_path == '':
             uri_path = '_index'
 
@@ -253,60 +162,16 @@
                 if not found:
                     return None
 
-        fac_path = os.path.relpath(path, self.fs_endpoint_path)
-        config = self._extractConfigFragment(fac_path)
+        rel_path = os.path.relpath(path, self.fs_endpoint_path)
+        config = self._extractConfigFragment(rel_path)
         metadata = {'slug': uri_path, 'config': config}
-
-        return PageFactory(self, fac_path, metadata)
+        return ContentItem(path, metadata)
 
     def getSorterIterator(self, it):
         accessor = self.getSettingAccessor()
         return OrderTrailSortIterator(it, self.setting_name + '_trail',
                                       value_accessor=accessor)
 
-    def listPath(self, rel_path):
-        rel_path = rel_path.lstrip('/')
-        path = self.fs_endpoint_path
-        if rel_path != '':
-            parts = rel_path.split('/')
-            for p in parts:
-                p_pat = r'(\d+_)?' + re.escape(p) + '$'
-                for name in os.listdir(path):
-                    if re.match(p_pat, name):
-                        path = os.path.join(path, name)
-                        break
-                else:
-                    raise Exception("No such path: %s" % rel_path)
-
-        items = []
-        names = sorted(os.listdir(path))
-        for name in names:
-            clean_name = self.re_pattern.sub('', name)
-            clean_name, _ = os.path.splitext(clean_name)
-            if os.path.isdir(os.path.join(path, name)):
-                if filter_page_dirname(name):
-                    rel_subdir = os.path.join(rel_path, name)
-                    items.append((True, clean_name, rel_subdir))
-            else:
-                if filter_page_filename(name):
-                    slug = self._makeSlug(os.path.join(rel_path, name))
-
-                    fac_path = name
-                    if rel_path != '.':
-                        fac_path = os.path.join(rel_path, name)
-                    fac_path = fac_path.replace('\\', '/')
-
-                    config = self._extractConfigFragment(fac_path)
-                    metadata = {'slug': slug, 'config': config}
-                    fac = PageFactory(self, fac_path, metadata)
-
-                    name, _ = os.path.splitext(name)
-                    items.append((False, clean_name, fac))
-        return items
-
-    def _cleanSlug(self, slug):
-        return self.re_pattern.sub(r'\1', slug)
-
     def _extractConfigFragment(self, rel_path):
         values = []
         for m in self.re_pattern.finditer(rel_path):
@@ -317,15 +182,12 @@
             values.append(self.default_value)
 
         return {
-                self.setting_name: values[-1],
-                self.setting_name + '_trail': values}
+            self.setting_name: values[-1],
+            self.setting_name + '_trail': values}
 
-    def _populateMetadata(self, rel_path, metadata, mode=None):
-        _, filename = os.path.split(rel_path)
-        config = self._extractConfigFragment(filename)
-        metadata['config'] = config
-        slug = metadata['slug']
-        metadata['slug'] = self.re_pattern.sub(r'\1', slug)
+    def _makeSlug(self, path):
+        slug = super()._makeSlug(path)
+        return self.re_pattern.sub(r'\1', slug)
 
 
 class OrderTrailSortIterator(object):