diff piecrust/sources/posts.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents f0930178fd01
children f070a4fc033c
line wrap: on
line diff
--- a/piecrust/sources/posts.py	Sat Apr 29 21:42:22 2017 -0700
+++ b/piecrust/sources/posts.py	Wed May 17 00:11:48 2017 -0700
@@ -5,80 +5,48 @@
 import datetime
 from piecrust import osutil
 from piecrust.routing import RouteParameter
-from piecrust.sources.base import (
-        PageSource, InvalidFileSystemEndpointError, PageFactory,
-        MODE_CREATING, MODE_PARSING)
+from piecrust.sources.base import REL_ASSETS, ContentItem
+from piecrust.sources.fs import (
+    FSContentSource, InvalidFileSystemEndpointError)
 from piecrust.sources.interfaces import (
-        IPreparingSource, IInteractiveSource, InteractiveField)
-from piecrust.sources.mixins import SimplePaginationSourceMixin
-from piecrust.uriutil import multi_replace
+    IPreparingSource, IInteractiveSource, InteractiveField)
+from piecrust.sources.mixins import (
+    SimplePaginationSourceMixin, SimpleAssetsSubDirMixin)
+from piecrust.uriutil import uri_to_title
 
 
 logger = logging.getLogger(__name__)
 
 
-class PostsSource(PageSource, IPreparingSource, IInteractiveSource,
-                  SimplePaginationSourceMixin):
+class PostsSource(FSContentSource,
+                  SimpleAssetsSubDirMixin,
+                  IPreparingSource, IInteractiveSource):
     PATH_FORMAT = None
 
     def __init__(self, app, name, config):
-        PageSource.__init__(self, app, name, config)
-        self.fs_endpoint = config.get('fs_endpoint', name)
-        self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
-        self.supported_extensions = list(app.config.get('site/auto_formats').keys())
+        FSContentSource.__init__(self, app, name, config)
+        self.auto_formats = app.config.get('site/auto_formats')
         self.default_auto_format = app.config.get('site/default_auto_format')
-        self._source_it_cache = None
+        self.supported_extensions = list(self.auto_formats)
 
     @property
     def path_format(self):
         return self.__class__.PATH_FORMAT
 
-    def resolveRef(self, ref_path):
-        path = os.path.normpath(os.path.join(self.fs_endpoint_path, ref_path))
-        metadata = self._parseMetadataFromPath(ref_path)
-        return path, metadata
-
-    def getSupportedRouteParameters(self):
-        return [
-            RouteParameter('slug', RouteParameter.TYPE_STRING),
-            RouteParameter('day', RouteParameter.TYPE_INT2),
-            RouteParameter('month', RouteParameter.TYPE_INT2),
-            RouteParameter('year', RouteParameter.TYPE_INT4)]
+    def _finalizeContent(self, parent_group, items, groups):
+        SimpleAssetsSubDirMixin._onFinalizeContent(
+            parent_group, items, groups)
 
-    def buildPageFactory(self, path):
-        if not path.startswith(self.fs_endpoint_path):
-            raise Exception("Page path '%s' isn't inside '%s'." % (
-                    path, self.fs_endpoint_path))
-        rel_path = path[len(self.fs_endpoint_path):].lstrip('\\/')
-        pat = self.PATH_FORMAT % {
-                'year': 'YEAR',
-                'month': 'MONTH',
-                'day': 'DAY',
-                'slug': 'SLUG',
-                'ext': 'EXT'}
-        pat = re.escape(pat)
-        pat = multi_replace(pat, {
-                'YEAR': '(\d{4})',
-                'MONTH': '(\d{2})',
-                'DAY': '(\d{2})',
-                'SLUG': '(.*)',
-                'EXT': '(.*)'})
-        m = re.match(pat, rel_path)
-        if m is None:
-            raise Exception("'%s' isn't a proper %s page path." % (
-                    rel_path, self.SOURCE_NAME))
-        return self._makeFactory(
-                rel_path,
-                m.group(4),
-                int(m.group(1)),
-                int(m.group(2)),
-                int(m.group(3)))
+    def getRelatedContents(self, item, relationship):
+        if relationship == REL_ASSETS:
+            SimpleAssetsSubDirMixin._getRelatedAssetsContents(item)
+        raise NotImplementedError()
 
-    def findPageFactory(self, metadata, mode):
-        year = metadata.get('year')
-        month = metadata.get('month')
-        day = metadata.get('day')
-        slug = metadata.get('slug')
+    def findContent(self, route_params):
+        year = route_params.get('year')
+        month = route_params.get('month')
+        day = route_params.get('day')
+        slug = route_params.get('slug')
 
         try:
             if year is not None:
@@ -90,20 +58,18 @@
         except ValueError:
             return None
 
-        ext = metadata.get('ext')
+        ext = route_params.get('ext')
         if ext is None:
             if len(self.supported_extensions) == 1:
                 ext = self.supported_extensions[0]
-            elif mode == MODE_CREATING and self.default_auto_format:
-                ext = self.default_auto_format
 
         replacements = {
-                'year': '%04d' % year if year is not None else None,
-                'month': '%02d' % month if month is not None else None,
-                'day': '%02d' % day if day is not None else None,
-                'slug': slug,
-                'ext': ext
-                }
+            'year': '%04d' % year if year is not None else None,
+            'month': '%02d' % month if month is not None else None,
+            'day': '%02d' % day if day is not None else None,
+            'slug': slug,
+            'ext': ext
+        }
         needs_recapture = False
         if year is None:
             needs_recapture = True
@@ -121,22 +87,53 @@
             needs_recapture = True
             replacements['ext'] = '*'
         path = os.path.normpath(os.path.join(
-                self.fs_endpoint_path, self.path_format % replacements))
+            self.fs_endpoint_path, self.path_format % replacements))
 
         if needs_recapture:
-            if mode == MODE_CREATING:
-                raise ValueError("Not enough information to find a post path.")
             possible_paths = osutil.glob(path)
             if len(possible_paths) != 1:
                 return None
             path = possible_paths[0]
-        elif mode == MODE_PARSING and not os.path.isfile(path):
+        elif not os.path.isfile(path):
             return None
 
-        rel_path = os.path.relpath(path, self.fs_endpoint_path)
-        rel_path = rel_path.replace('\\', '/')
-        fac_metadata = self._parseMetadataFromPath(rel_path)
-        return PageFactory(self, rel_path, fac_metadata)
+        metadata = self._parseMetadataFromPath(path)
+        return ContentItem(path, metadata)
+
+    def _parseMetadataFromPath(self, path):
+        regex_repl = {
+            'year': '(?P<year>\d{4})',
+            'month': '(?P<month>\d{2})',
+            'day': '(?P<day>\d{2})',
+            'slug': '(?P<slug>.*)',
+            'ext': '(?P<ext>.*)'
+        }
+        path_format_re = re.sub(r'([\-\.])', r'\\\1', self.path_format)
+        pattern = path_format_re % regex_repl + '$'
+        m = re.search(pattern, path.replace('\\', '/'))
+        if not m:
+            raise Exception("Expected to be able to match path with path "
+                            "format: %s" % path)
+
+        year = int(m.group('year'))
+        month = int(m.group('month'))
+        day = int(m.group('day'))
+        timestamp = datetime.date(year, month, day)
+        metadata = {
+            'year': year,
+            'month': month,
+            'day': day,
+            'slug': m.group('slug'),
+            'date': timestamp
+        }
+        return metadata
+
+    def getSupportedRouteParameters(self):
+        return [
+            RouteParameter('slug', RouteParameter.TYPE_STRING),
+            RouteParameter('day', RouteParameter.TYPE_INT2),
+            RouteParameter('month', RouteParameter.TYPE_INT2),
+            RouteParameter('year', RouteParameter.TYPE_INT4)]
 
     def getSourceIterator(self):
         if self._source_it_cache is None:
@@ -146,11 +143,11 @@
 
     def setupPrepareParser(self, parser, app):
         parser.add_argument(
-                '-d', '--date', help="The date of the post, "
-                "in `year/month/day` format (defaults to today).")
+            '-d', '--date', help="The date of the post, "
+            "in `year/month/day` format (defaults to today).")
         parser.add_argument('slug', help="The URL slug for the new post.")
 
-    def buildMetadata(self, args):
+    def createContent(self, args):
         dt = datetime.date.today()
         if args.date:
             if args.date == 'today':
@@ -170,8 +167,23 @@
                                     "YEAR/MONTH/DAY.")
                 dt = datetime.date(year, month, day)
 
+        slug, ext = os.path.splitext(args.slug)
+        if not ext:
+            ext = self.default_auto_format
         year, month, day = dt.year, dt.month, dt.day
-        return {'year': year, 'month': month, 'day': day, 'slug': args.slug}
+        tokens = {
+            'slug': args.slug,
+            'ext': ext,
+            'year': '%04d' % year,
+            'month': '%02d' % month,
+            'day': '%02d' % day
+        }
+        rel_path = self.path_format % tokens
+        path = os.path.join(self.fs_endpoint_path, rel_path)
+        metadata = {
+            'config': {'title': uri_to_title(slug)}
+        }
+        return ContentItem(path, metadata)
 
     def getInteractiveFields(self):
         dt = datetime.date.today()
@@ -185,96 +197,78 @@
         if not os.path.isdir(self.fs_endpoint_path):
             if self.ignore_missing_dir:
                 return False
-            raise InvalidFileSystemEndpointError(self.name, self.fs_endpoint_path)
+            raise InvalidFileSystemEndpointError(self.name,
+                                                 self.fs_endpoint_path)
         return True
 
-    def _parseMetadataFromPath(self, path):
-        regex_repl = {
-                'year': '(?P<year>\d{4})',
-                'month': '(?P<month>\d{2})',
-                'day': '(?P<day>\d{2})',
-                'slug': '(?P<slug>.*)',
-                'ext': '(?P<ext>.*)'
-                }
-        path_format_re = re.sub(r'([\-\.])', r'\\\1', self.path_format)
-        pattern = path_format_re % regex_repl + '$'
-        m = re.search(pattern, path.replace('\\', '/'))
-        if not m:
-            raise Exception("Expected to be able to match path with path "
-                            "format: %s" % path)
-
-        year = int(m.group('year'))
-        month = int(m.group('month'))
-        day = int(m.group('day'))
-        timestamp = datetime.date(year, month, day)
-        metadata = {
-                'year': year,
-                'month': month,
-                'day': day,
-                'slug': m.group('slug'),
-                'date': timestamp
-                }
-        return metadata
-
-    def _makeFactory(self, path, slug, year, month, day):
+    def _makeContentItem(self, path, slug, year, month, day):
         path = path.replace('\\', '/')
         timestamp = datetime.date(year, month, day)
         metadata = {
-                'slug': slug,
-                'year': year,
-                'month': month,
-                'day': day,
-                'date': timestamp}
-        return PageFactory(self, path, metadata)
+            'slug': slug,
+            'year': year,
+            'month': month,
+            'day': day,
+            'date': timestamp}
+        return ContentItem(path, metadata)
 
 
 class FlatPostsSource(PostsSource):
     SOURCE_NAME = 'posts/flat'
     PATH_FORMAT = '%(year)s-%(month)s-%(day)s_%(slug)s.%(ext)s'
+    PATTERN = re.compile(r'(\d{4})-(\d{2})-(\d{2})_(.*)\.(\w+)$')
 
     def __init__(self, app, name, config):
-        super(FlatPostsSource, self).__init__(app, name, config)
+        super().__init__(app, name, config)
 
-    def buildPageFactories(self):
-        if not self._checkFsEndpointPath():
-            return
-        logger.debug("Scanning for posts (flat) in: %s" % self.fs_endpoint_path)
-        pattern = re.compile(r'(\d{4})-(\d{2})-(\d{2})_(.*)\.(\w+)$')
+    def getContents(self, group):
+        if not self._checkFSEndpoint():
+            return None
+
+        logger.debug("Scanning for posts (flat) in: %s" %
+                     self.fs_endpoint_path)
+        pattern = FlatPostsSource.PATTERN
         _, __, filenames = next(osutil.walk(self.fs_endpoint_path))
         for f in filenames:
             match = pattern.match(f)
             if match is None:
                 name, ext = os.path.splitext(f)
-                logger.warning("'%s' is not formatted as 'YYYY-MM-DD_slug-title.%s' "
-                        "and will be ignored. Is that a typo?" % (f, ext))
+                logger.warning(
+                    "'%s' is not formatted as 'YYYY-MM-DD_slug-title.%s' "
+                    "and will be ignored. Is that a typo?" % (f, ext))
                 continue
-            yield self._makeFactory(
-                    f,
-                    match.group(4),
-                    int(match.group(1)),
-                    int(match.group(2)),
-                    int(match.group(3)))
+            yield self._makeContentItem(
+                f,
+                match.group(4),
+                int(match.group(1)),
+                int(match.group(2)),
+                int(match.group(3)))
 
 
 class ShallowPostsSource(PostsSource):
     SOURCE_NAME = 'posts/shallow'
     PATH_FORMAT = '%(year)s/%(month)s-%(day)s_%(slug)s.%(ext)s'
+    YEAR_PATTERN = re.compile(r'(\d{4})$')
+    FILE_PATTERN = re.compile(r'(\d{2})-(\d{2})_(.*)\.(\w+)$')
 
     def __init__(self, app, name, config):
         super(ShallowPostsSource, self).__init__(app, name, config)
 
-    def buildPageFactories(self):
+    def getContents(self, group):
         if not self._checkFsEndpointPath():
             return
-        logger.debug("Scanning for posts (shallow) in: %s" % self.fs_endpoint_path)
-        year_pattern = re.compile(r'(\d{4})$')
-        file_pattern = re.compile(r'(\d{2})-(\d{2})_(.*)\.(\w+)$')
+
+        logger.debug("Scanning for posts (shallow) in: %s" %
+                     self.fs_endpoint_path)
+        year_pattern = ShallowPostsSource.YEAR_PATTERN
+        file_pattern = ShallowPostsSource.FILE_PATTERN
         _, year_dirs, __ = next(osutil.walk(self.fs_endpoint_path))
         year_dirs = [d for d in year_dirs if year_pattern.match(d)]
         for yd in year_dirs:
             if year_pattern.match(yd) is None:
-                logger.warning("'%s' is not formatted as 'YYYY' and will be ignored. "
-                        "Is that a typo?")
+                logger.warning(
+                    "'%s' is not formatted as 'YYYY' and will be ignored. "
+                    "Is that a typo?")
                 continue
             year = int(yd)
             year_dir = os.path.join(self.fs_endpoint_path, yd)
@@ -284,31 +278,37 @@
                 match = file_pattern.match(f)
                 if match is None:
                     name, ext = os.path.splitext(f)
-                    logger.warning("'%s' is not formatted as 'MM-DD_slug-title.%s' "
-                            "and will be ignored. Is that a typo?" % (f, ext))
+                    logger.warning(
+                        "'%s' is not formatted as 'MM-DD_slug-title.%s' "
+                        "and will be ignored. Is that a typo?" % (f, ext))
                     continue
-                yield self._makeFactory(
-                        os.path.join(yd, f),
-                        match.group(3),
-                        year,
-                        int(match.group(1)),
-                        int(match.group(2)))
+                yield self._makeContentItem(
+                    os.path.join(yd, f),
+                    match.group(3),
+                    year,
+                    int(match.group(1)),
+                    int(match.group(2)))
 
 
 class HierarchyPostsSource(PostsSource):
     SOURCE_NAME = 'posts/hierarchy'
     PATH_FORMAT = '%(year)s/%(month)s/%(day)s_%(slug)s.%(ext)s'
+    YEAR_PATTERN = re.compile(r'(\d{4})$')
+    MONTH_PATTERN = re.compile(r'(\d{2})$')
+    FILE_PATTERN = re.compile(r'(\d{2})_(.*)\.(\w+)$')
 
     def __init__(self, app, name, config):
         super(HierarchyPostsSource, self).__init__(app, name, config)
 
-    def buildPageFactories(self):
+    def getContents(self, group):
         if not self._checkFsEndpointPath():
             return
-        logger.debug("Scanning for posts (hierarchy) in: %s" % self.fs_endpoint_path)
-        year_pattern = re.compile(r'(\d{4})$')
-        month_pattern = re.compile(r'(\d{2})$')
-        file_pattern = re.compile(r'(\d{2})_(.*)\.(\w+)$')
+
+        logger.debug("Scanning for posts (hierarchy) in: %s" %
+                     self.fs_endpoint_path)
+        year_pattern = HierarchyPostsSource.YEAR_PATTERN
+        month_pattern = HierarchyPostsSource.MONTH_PATTERN
+        file_pattern = HierarchyPostsSource.FILE_PATTERN
         _, year_dirs, __ = next(osutil.walk(self.fs_endpoint_path))
         year_dirs = [d for d in year_dirs if year_pattern.match(d)]
         for yd in year_dirs:
@@ -326,14 +326,15 @@
                     match = file_pattern.match(f)
                     if match is None:
                         name, ext = os.path.splitext(f)
-                        logger.warning("'%s' is not formatted as 'DD_slug-title.%s' "
-                                "and will be ignored. Is that a typo?" % (f, ext))
+                        logger.warning(
+                            "'%s' is not formatted as 'DD_slug-title.%s' "
+                            "and will be ignored. Is that a typo?" % (f, ext))
                         continue
                     rel_name = os.path.join(yd, md, f)
-                    yield self._makeFactory(
-                            rel_name,
-                            match.group(2),
-                            year,
-                            month,
-                            int(match.group(1)))
+                    yield self._makeContentItem(
+                        rel_name,
+                        match.group(2),
+                        year,
+                        month,
+                        int(match.group(1)))