Mercurial > piecrust2
diff piecrust/sources/posts.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | f0930178fd01 |
children | f070a4fc033c |
line wrap: on
line diff
--- a/piecrust/sources/posts.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/sources/posts.py Wed May 17 00:11:48 2017 -0700 @@ -5,80 +5,48 @@ import datetime from piecrust import osutil from piecrust.routing import RouteParameter -from piecrust.sources.base import ( - PageSource, InvalidFileSystemEndpointError, PageFactory, - MODE_CREATING, MODE_PARSING) +from piecrust.sources.base import REL_ASSETS, ContentItem +from piecrust.sources.fs import ( + FSContentSource, InvalidFileSystemEndpointError) from piecrust.sources.interfaces import ( - IPreparingSource, IInteractiveSource, InteractiveField) -from piecrust.sources.mixins import SimplePaginationSourceMixin -from piecrust.uriutil import multi_replace + IPreparingSource, IInteractiveSource, InteractiveField) +from piecrust.sources.mixins import ( + SimplePaginationSourceMixin, SimpleAssetsSubDirMixin) +from piecrust.uriutil import uri_to_title logger = logging.getLogger(__name__) -class PostsSource(PageSource, IPreparingSource, IInteractiveSource, - SimplePaginationSourceMixin): +class PostsSource(FSContentSource, + SimpleAssetsSubDirMixin, + IPreparingSource, IInteractiveSource): PATH_FORMAT = None def __init__(self, app, name, config): - PageSource.__init__(self, app, name, config) - self.fs_endpoint = config.get('fs_endpoint', name) - self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) - self.supported_extensions = list(app.config.get('site/auto_formats').keys()) + FSContentSource.__init__(self, app, name, config) + self.auto_formats = app.config.get('site/auto_formats') self.default_auto_format = app.config.get('site/default_auto_format') - self._source_it_cache = None + self.supported_extensions = list(self.auto_formats) @property def path_format(self): return self.__class__.PATH_FORMAT - def resolveRef(self, ref_path): - path = os.path.normpath(os.path.join(self.fs_endpoint_path, ref_path)) - metadata = self._parseMetadataFromPath(ref_path) - return path, metadata - - def getSupportedRouteParameters(self): - return [ - RouteParameter('slug', RouteParameter.TYPE_STRING), - RouteParameter('day', RouteParameter.TYPE_INT2), - RouteParameter('month', RouteParameter.TYPE_INT2), - RouteParameter('year', RouteParameter.TYPE_INT4)] + def _finalizeContent(self, parent_group, items, groups): + SimpleAssetsSubDirMixin._onFinalizeContent( + parent_group, items, groups) - def buildPageFactory(self, path): - if not path.startswith(self.fs_endpoint_path): - raise Exception("Page path '%s' isn't inside '%s'." % ( - path, self.fs_endpoint_path)) - rel_path = path[len(self.fs_endpoint_path):].lstrip('\\/') - pat = self.PATH_FORMAT % { - 'year': 'YEAR', - 'month': 'MONTH', - 'day': 'DAY', - 'slug': 'SLUG', - 'ext': 'EXT'} - pat = re.escape(pat) - pat = multi_replace(pat, { - 'YEAR': '(\d{4})', - 'MONTH': '(\d{2})', - 'DAY': '(\d{2})', - 'SLUG': '(.*)', - 'EXT': '(.*)'}) - m = re.match(pat, rel_path) - if m is None: - raise Exception("'%s' isn't a proper %s page path." % ( - rel_path, self.SOURCE_NAME)) - return self._makeFactory( - rel_path, - m.group(4), - int(m.group(1)), - int(m.group(2)), - int(m.group(3))) + def getRelatedContents(self, item, relationship): + if relationship == REL_ASSETS: + SimpleAssetsSubDirMixin._getRelatedAssetsContents(item) + raise NotImplementedError() - def findPageFactory(self, metadata, mode): - year = metadata.get('year') - month = metadata.get('month') - day = metadata.get('day') - slug = metadata.get('slug') + def findContent(self, route_params): + year = route_params.get('year') + month = route_params.get('month') + day = route_params.get('day') + slug = route_params.get('slug') try: if year is not None: @@ -90,20 +58,18 @@ except ValueError: return None - ext = metadata.get('ext') + ext = route_params.get('ext') if ext is None: if len(self.supported_extensions) == 1: ext = self.supported_extensions[0] - elif mode == MODE_CREATING and self.default_auto_format: - ext = self.default_auto_format replacements = { - 'year': '%04d' % year if year is not None else None, - 'month': '%02d' % month if month is not None else None, - 'day': '%02d' % day if day is not None else None, - 'slug': slug, - 'ext': ext - } + 'year': '%04d' % year if year is not None else None, + 'month': '%02d' % month if month is not None else None, + 'day': '%02d' % day if day is not None else None, + 'slug': slug, + 'ext': ext + } needs_recapture = False if year is None: needs_recapture = True @@ -121,22 +87,53 @@ needs_recapture = True replacements['ext'] = '*' path = os.path.normpath(os.path.join( - self.fs_endpoint_path, self.path_format % replacements)) + self.fs_endpoint_path, self.path_format % replacements)) if needs_recapture: - if mode == MODE_CREATING: - raise ValueError("Not enough information to find a post path.") possible_paths = osutil.glob(path) if len(possible_paths) != 1: return None path = possible_paths[0] - elif mode == MODE_PARSING and not os.path.isfile(path): + elif not os.path.isfile(path): return None - rel_path = os.path.relpath(path, self.fs_endpoint_path) - rel_path = rel_path.replace('\\', '/') - fac_metadata = self._parseMetadataFromPath(rel_path) - return PageFactory(self, rel_path, fac_metadata) + metadata = self._parseMetadataFromPath(path) + return ContentItem(path, metadata) + + def _parseMetadataFromPath(self, path): + regex_repl = { + 'year': '(?P<year>\d{4})', + 'month': '(?P<month>\d{2})', + 'day': '(?P<day>\d{2})', + 'slug': '(?P<slug>.*)', + 'ext': '(?P<ext>.*)' + } + path_format_re = re.sub(r'([\-\.])', r'\\\1', self.path_format) + pattern = path_format_re % regex_repl + '$' + m = re.search(pattern, path.replace('\\', '/')) + if not m: + raise Exception("Expected to be able to match path with path " + "format: %s" % path) + + year = int(m.group('year')) + month = int(m.group('month')) + day = int(m.group('day')) + timestamp = datetime.date(year, month, day) + metadata = { + 'year': year, + 'month': month, + 'day': day, + 'slug': m.group('slug'), + 'date': timestamp + } + return metadata + + def getSupportedRouteParameters(self): + return [ + RouteParameter('slug', RouteParameter.TYPE_STRING), + RouteParameter('day', RouteParameter.TYPE_INT2), + RouteParameter('month', RouteParameter.TYPE_INT2), + RouteParameter('year', RouteParameter.TYPE_INT4)] def getSourceIterator(self): if self._source_it_cache is None: @@ -146,11 +143,11 @@ def setupPrepareParser(self, parser, app): parser.add_argument( - '-d', '--date', help="The date of the post, " - "in `year/month/day` format (defaults to today).") + '-d', '--date', help="The date of the post, " + "in `year/month/day` format (defaults to today).") parser.add_argument('slug', help="The URL slug for the new post.") - def buildMetadata(self, args): + def createContent(self, args): dt = datetime.date.today() if args.date: if args.date == 'today': @@ -170,8 +167,23 @@ "YEAR/MONTH/DAY.") dt = datetime.date(year, month, day) + slug, ext = os.path.splitext(args.slug) + if not ext: + ext = self.default_auto_format year, month, day = dt.year, dt.month, dt.day - return {'year': year, 'month': month, 'day': day, 'slug': args.slug} + tokens = { + 'slug': args.slug, + 'ext': ext, + 'year': '%04d' % year, + 'month': '%02d' % month, + 'day': '%02d' % day + } + rel_path = self.path_format % tokens + path = os.path.join(self.fs_endpoint_path, rel_path) + metadata = { + 'config': {'title': uri_to_title(slug)} + } + return ContentItem(path, metadata) def getInteractiveFields(self): dt = datetime.date.today() @@ -185,96 +197,78 @@ if not os.path.isdir(self.fs_endpoint_path): if self.ignore_missing_dir: return False - raise InvalidFileSystemEndpointError(self.name, self.fs_endpoint_path) + raise InvalidFileSystemEndpointError(self.name, + self.fs_endpoint_path) return True - def _parseMetadataFromPath(self, path): - regex_repl = { - 'year': '(?P<year>\d{4})', - 'month': '(?P<month>\d{2})', - 'day': '(?P<day>\d{2})', - 'slug': '(?P<slug>.*)', - 'ext': '(?P<ext>.*)' - } - path_format_re = re.sub(r'([\-\.])', r'\\\1', self.path_format) - pattern = path_format_re % regex_repl + '$' - m = re.search(pattern, path.replace('\\', '/')) - if not m: - raise Exception("Expected to be able to match path with path " - "format: %s" % path) - - year = int(m.group('year')) - month = int(m.group('month')) - day = int(m.group('day')) - timestamp = datetime.date(year, month, day) - metadata = { - 'year': year, - 'month': month, - 'day': day, - 'slug': m.group('slug'), - 'date': timestamp - } - return metadata - - def _makeFactory(self, path, slug, year, month, day): + def _makeContentItem(self, path, slug, year, month, day): path = path.replace('\\', '/') timestamp = datetime.date(year, month, day) metadata = { - 'slug': slug, - 'year': year, - 'month': month, - 'day': day, - 'date': timestamp} - return PageFactory(self, path, metadata) + 'slug': slug, + 'year': year, + 'month': month, + 'day': day, + 'date': timestamp} + return ContentItem(path, metadata) class FlatPostsSource(PostsSource): SOURCE_NAME = 'posts/flat' PATH_FORMAT = '%(year)s-%(month)s-%(day)s_%(slug)s.%(ext)s' + PATTERN = re.compile(r'(\d{4})-(\d{2})-(\d{2})_(.*)\.(\w+)$') def __init__(self, app, name, config): - super(FlatPostsSource, self).__init__(app, name, config) + super().__init__(app, name, config) - def buildPageFactories(self): - if not self._checkFsEndpointPath(): - return - logger.debug("Scanning for posts (flat) in: %s" % self.fs_endpoint_path) - pattern = re.compile(r'(\d{4})-(\d{2})-(\d{2})_(.*)\.(\w+)$') + def getContents(self, group): + if not self._checkFSEndpoint(): + return None + + logger.debug("Scanning for posts (flat) in: %s" % + self.fs_endpoint_path) + pattern = FlatPostsSource.PATTERN _, __, filenames = next(osutil.walk(self.fs_endpoint_path)) for f in filenames: match = pattern.match(f) if match is None: name, ext = os.path.splitext(f) - logger.warning("'%s' is not formatted as 'YYYY-MM-DD_slug-title.%s' " - "and will be ignored. Is that a typo?" % (f, ext)) + logger.warning( + "'%s' is not formatted as 'YYYY-MM-DD_slug-title.%s' " + "and will be ignored. Is that a typo?" % (f, ext)) continue - yield self._makeFactory( - f, - match.group(4), - int(match.group(1)), - int(match.group(2)), - int(match.group(3))) + yield self._makeContentItem( + f, + match.group(4), + int(match.group(1)), + int(match.group(2)), + int(match.group(3))) class ShallowPostsSource(PostsSource): SOURCE_NAME = 'posts/shallow' PATH_FORMAT = '%(year)s/%(month)s-%(day)s_%(slug)s.%(ext)s' + YEAR_PATTERN = re.compile(r'(\d{4})$') + FILE_PATTERN = re.compile(r'(\d{2})-(\d{2})_(.*)\.(\w+)$') def __init__(self, app, name, config): super(ShallowPostsSource, self).__init__(app, name, config) - def buildPageFactories(self): + def getContents(self, group): if not self._checkFsEndpointPath(): return - logger.debug("Scanning for posts (shallow) in: %s" % self.fs_endpoint_path) - year_pattern = re.compile(r'(\d{4})$') - file_pattern = re.compile(r'(\d{2})-(\d{2})_(.*)\.(\w+)$') + + logger.debug("Scanning for posts (shallow) in: %s" % + self.fs_endpoint_path) + year_pattern = ShallowPostsSource.YEAR_PATTERN + file_pattern = ShallowPostsSource.FILE_PATTERN _, year_dirs, __ = next(osutil.walk(self.fs_endpoint_path)) year_dirs = [d for d in year_dirs if year_pattern.match(d)] for yd in year_dirs: if year_pattern.match(yd) is None: - logger.warning("'%s' is not formatted as 'YYYY' and will be ignored. " - "Is that a typo?") + logger.warning( + "'%s' is not formatted as 'YYYY' and will be ignored. " + "Is that a typo?") continue year = int(yd) year_dir = os.path.join(self.fs_endpoint_path, yd) @@ -284,31 +278,37 @@ match = file_pattern.match(f) if match is None: name, ext = os.path.splitext(f) - logger.warning("'%s' is not formatted as 'MM-DD_slug-title.%s' " - "and will be ignored. Is that a typo?" % (f, ext)) + logger.warning( + "'%s' is not formatted as 'MM-DD_slug-title.%s' " + "and will be ignored. Is that a typo?" % (f, ext)) continue - yield self._makeFactory( - os.path.join(yd, f), - match.group(3), - year, - int(match.group(1)), - int(match.group(2))) + yield self._makeContentItem( + os.path.join(yd, f), + match.group(3), + year, + int(match.group(1)), + int(match.group(2))) class HierarchyPostsSource(PostsSource): SOURCE_NAME = 'posts/hierarchy' PATH_FORMAT = '%(year)s/%(month)s/%(day)s_%(slug)s.%(ext)s' + YEAR_PATTERN = re.compile(r'(\d{4})$') + MONTH_PATTERN = re.compile(r'(\d{2})$') + FILE_PATTERN = re.compile(r'(\d{2})_(.*)\.(\w+)$') def __init__(self, app, name, config): super(HierarchyPostsSource, self).__init__(app, name, config) - def buildPageFactories(self): + def getContents(self, group): if not self._checkFsEndpointPath(): return - logger.debug("Scanning for posts (hierarchy) in: %s" % self.fs_endpoint_path) - year_pattern = re.compile(r'(\d{4})$') - month_pattern = re.compile(r'(\d{2})$') - file_pattern = re.compile(r'(\d{2})_(.*)\.(\w+)$') + + logger.debug("Scanning for posts (hierarchy) in: %s" % + self.fs_endpoint_path) + year_pattern = HierarchyPostsSource.YEAR_PATTERN + month_pattern = HierarchyPostsSource.MONTH_PATTERN + file_pattern = HierarchyPostsSource.FILE_PATTERN _, year_dirs, __ = next(osutil.walk(self.fs_endpoint_path)) year_dirs = [d for d in year_dirs if year_pattern.match(d)] for yd in year_dirs: @@ -326,14 +326,15 @@ match = file_pattern.match(f) if match is None: name, ext = os.path.splitext(f) - logger.warning("'%s' is not formatted as 'DD_slug-title.%s' " - "and will be ignored. Is that a typo?" % (f, ext)) + logger.warning( + "'%s' is not formatted as 'DD_slug-title.%s' " + "and will be ignored. Is that a typo?" % (f, ext)) continue rel_name = os.path.join(yd, md, f) - yield self._makeFactory( - rel_name, - match.group(2), - year, - month, - int(match.group(1))) + yield self._makeContentItem( + rel_name, + match.group(2), + year, + month, + int(match.group(1)))