changeset 856:9bb22bbe093c

refactor: Make the blog archives functional again. The blog archives are using the same pattern as the taxonomy support.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 06 Jun 2017 01:23:25 -0700
parents 448710d84121
children d231a10d18f9
files piecrust/dataproviders/pageiterator.py piecrust/page.py piecrust/resources/theme/templates/_year.html piecrust/sources/blogarchives.py piecrust/sources/generator.py piecrust/sources/taxonomy.py
diffstat 6 files changed, 219 insertions(+), 127 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/dataproviders/pageiterator.py	Tue Jun 06 00:26:21 2017 -0700
+++ b/piecrust/dataproviders/pageiterator.py	Tue Jun 06 01:23:25 2017 -0700
@@ -190,6 +190,13 @@
         self._it = it_class(self._it, *args, **kwargs)
         return self
 
+    def _wrapAsSort(self, sort_it_class, *args, **kwargs):
+        self._ensureUnlocked()
+        self._ensureUnloaded()
+        self._it = sort_it_class(self._it, *args, **kwargs)
+        self._has_sorter = True
+        return self
+
     def _lockIterator(self):
         self._ensureUnlocked()
         self._locked = True
--- a/piecrust/page.py	Tue Jun 06 00:26:21 2017 -0700
+++ b/piecrust/page.py	Tue Jun 06 01:23:25 2017 -0700
@@ -89,7 +89,8 @@
     def datetime(self):
         if self._datetime is None:
             try:
-                self._datetime = self._computeDateTime()
+                self._datetime = _compute_datetime(self.source_metadata,
+                                                   self.config)
             except Exception as ex:
                 logger.exception(ex)
                 raise Exception(
@@ -113,38 +114,6 @@
     def getSegment(self, name='content'):
         return self.segments[name]
 
-    def _computeDateTime(self):
-        if 'datetime' in self.source_metadata:
-            # Get the date/time from the source.
-            self._datetime = self.source_metadata['datetime']
-        elif 'date' in self.source_metadata:
-            # Get the date from the source. Potentially get the
-            # time from the page config.
-            page_date = self.source_metadata['date']
-            page_time = _parse_config_time(self.config.get('time'))
-            if page_time is not None:
-                self._datetime = datetime.datetime(
-                    page_date.year,
-                    page_date.month,
-                    page_date.day) + page_time
-            else:
-                self._datetime = datetime.datetime(
-                    page_date.year, page_date.month, page_date.day)
-        elif 'date' in self.config:
-            # Get the date from the page config, and maybe the
-            # time too.
-            page_date = _parse_config_date(self.config.get('date'))
-            self._datetime = datetime.datetime(
-                page_date.year,
-                page_date.month,
-                page_date.day)
-            page_time = _parse_config_time(self.config.get('time'))
-            if page_time is not None:
-                self._datetime += page_time
-            else:
-                # No idea what the date/time for this page is.
-                self._datetime = datetime.datetime.fromtimestamp(0)
-
     def _load(self):
         if self._config is not None:
             return
@@ -166,6 +135,42 @@
             self._flags |= FLAG_RAW_CACHE_VALID
 
 
+def _compute_datetime(source_metadata, config):
+    # Get the date/time from the source.
+    dt = source_metadata.get('datetime')
+    if dt is not None:
+        return dt
+
+    # Get the date from the source. Potentially get the
+    # time from the page config.
+    page_date = source_metadata.get('date')
+    if page_date is not None:
+        dt = datetime.datetime(
+            page_date.year, page_date.month, page_date.day)
+
+        page_time = _parse_config_time(config.get('time'))
+        if page_time is not None:
+            dt += page_time
+
+        return dt
+
+    # Get the date from the page config, and maybe the
+    # time too.
+    page_date = _parse_config_date(config.get('date'))
+    if page_date is not None:
+        dt = datetime.datetime(
+            page_date.year, page_date.month, page_date.day)
+
+        page_time = _parse_config_time(config.get('time'))
+        if page_time is not None:
+            dt += page_time
+
+        return dt
+
+    # No idea what the date/time for this page is.
+    return datetime.datetime.fromtimestamp(0)
+
+
 def _parse_config_date(page_date):
     if page_date is None:
         return None
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/resources/theme/templates/_year.html	Tue Jun 06 01:23:25 2017 -0700
@@ -0,0 +1,15 @@
+{% extends "default.html" %}
+
+{% block main %}
+<h2>Posts in {{ year }}</h2>
+
+<section>
+    {% for post in pagination.posts %}
+    <p><em>{{post.timestamp|date('%d %B')}}</em> &ndash; <a href="{{ post.url }}">{{ post.title }}</a></p>
+    {% endfor %}
+</section>
+<section>
+    {% if pagination.prev_page %}<div class="prev"><a href="{{ pagination.prev_page }}">Next Posts</a></div>{% endif %}
+    {% if pagination.next_page %}<div class="next"><a href="{{ pagination.next_page }}">Previous Posts</a></div>{% endif %}
+</section>
+{% endblock %}
--- a/piecrust/sources/blogarchives.py	Tue Jun 06 00:26:21 2017 -0700
+++ b/piecrust/sources/blogarchives.py	Tue Jun 06 01:23:25 2017 -0700
@@ -1,30 +1,51 @@
 import logging
 import datetime
-from piecrust.chefutil import format_timed_scope
 from piecrust.data.filters import PaginationFilter, IFilterClause
-from piecrust.dataproviders.pageiterator import PageIterator
-from piecrust.pipelines.base import ContentPipeline
+from piecrust.dataproviders.pageiterator import (
+    PageIterator, HardCodedFilterIterator, DateSortIterator)
+from piecrust.page import Page
+from piecrust.pipelines._pagebaker import PageBaker
+from piecrust.pipelines._pagerecords import PagePipelineRecordEntry
+from piecrust.pipelines.base import (
+    ContentPipeline, get_record_name_for_source)
 from piecrust.routing import RouteParameter
-from piecrust.sources.base import ContentSource, GeneratedContentException
+from piecrust.sources.base import ContentItem
+from piecrust.sources.generator import GeneratorSourceBase
 
 
 logger = logging.getLogger(__name__)
 
 
-class BlogArchivesSource(ContentSource):
+_year_index = """---
+layout: %(template)s
+---
+"""
+
+
+class BlogArchivesSource(GeneratorSourceBase):
     SOURCE_NAME = 'blog_archives'
     DEFAULT_PIPELINE_NAME = 'blog_archives'
 
     def __init__(self, app, name, config):
         super().__init__(app, name, config)
 
-    def getContents(self, group):
-        raise GeneratedContentException()
+        tpl_name = config.get('template', '_year.html')
+        self._raw_item = _year_index % {'template': tpl_name}
+
+    def getSupportedRouteParameters(self):
+        return [RouteParameter('year', RouteParameter.TYPE_INT4)]
+
+    def findContent(self, route_params):
+        year = route_params['year']
+        spec = '_index[%04d]' % year
+        metadata = {'route_params': {'year': year}}
+        return ContentItem(spec, metadata)
 
     def prepareRenderContext(self, ctx):
-        ctx.pagination_source = self.source
+        ctx.pagination_source = self.inner_source
 
-        year = ctx.page.route_metadata.get('year')
+        route_params = ctx.page.source_metadata['route_params']
+        year = route_params.get('year')
         if year is None:
             raise Exception(
                 "Can't find the archive year in the route metadata")
@@ -41,41 +62,11 @@
 
         flt2 = PaginationFilter()
         flt2.addClause(IsFromYearFilterClause(year))
-        it = PageIterator(self.source, pagination_filter=flt2,
-                          sorter=_date_sorter)
+        it = PageIterator(self.inner_source)
+        it._simpleNonSortedWrap(HardCodedFilterIterator, flt2)
+        it._wrapAsSort(DateSortIterator, reverse=False)
         ctx.custom_data['archives'] = it
 
-    def bake(self, ctx):
-        if not self.page_ref.exists:
-            logger.debug(
-                "No page found at '%s', skipping %s archives." %
-                (self.page_ref, self.source_name))
-            return
-
-        logger.debug("Baking %s archives...", self.source_name)
-        with format_timed_scope(logger, 'gathered archive years',
-                                level=logging.DEBUG, colored=False):
-            all_years, dirty_years = self._buildDirtyYears(ctx)
-
-        with format_timed_scope(logger, "baked %d %s archives." %
-                                (len(dirty_years), self.source_name)):
-            self._bakeDirtyYears(ctx, all_years, dirty_years)
-
-    def _getSource(self):
-        return self.app.getSource(self.config['source'])
-
-    def _buildDirtyYears(self, ctx):
-        logger.debug("Gathering dirty post years.")
-        all_years = set()
-        dirty_years = set()
-        for _, cur_entry in ctx.getAllPageRecords():
-            if cur_entry and cur_entry.source_name == self.source_name:
-                dt = datetime.datetime.fromtimestamp(cur_entry.timestamp)
-                all_years.add(dt.year)
-                if cur_entry.was_any_sub_baked:
-                    dirty_years.add(dt.year)
-        return all_years, dirty_years
-
     def _bakeDirtyYears(self, ctx, all_years, dirty_years):
         route = self.app.getGeneratorRoute(self.name)
         if route is None:
@@ -93,27 +84,6 @@
             ctx.queueBakeJob(fac, route, extra_route_metadata, str(y))
         ctx.runJobQueue()
 
-        # Create bake entries for the years that were *not* dirty.
-        # Otherwise, when checking for deleted pages, we would not find any
-        # outputs and would delete those files.
-        all_str_years = [str(y) for y in all_years]
-        for prev_entry, cur_entry in ctx.getAllPageRecords():
-            if prev_entry and not cur_entry:
-                try:
-                    y = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key)
-                except InvalidRecordExtraKey:
-                    continue
-                if y in all_str_years:
-                    logger.debug(
-                        "Creating unbaked entry for year %s archive." % y)
-                    ctx.collapseRecord(prev_entry)
-                else:
-                    logger.debug(
-                        "No page references year %s anymore." % y)
-
-    def getSupportedRouteParameters(self):
-        return [RouteParameter('year', RouteParameter.TYPE_INT4)]
-
 
 class IsFromYearFilterClause(IFilterClause):
     def __init__(self, year):
@@ -127,6 +97,93 @@
     return sorted(it, key=lambda x: x.datetime)
 
 
+class BlogArchivesPipelineRecordEntry(PagePipelineRecordEntry):
+    def __init__(self):
+        super().__init__()
+        self.year = None
+
+
 class BlogArchivesPipeline(ContentPipeline):
     PIPELINE_NAME = 'blog_archives'
     PASS_NUM = 1
+    RECORD_ENTRY_CLASS = BlogArchivesPipelineRecordEntry
+
+    def __init__(self, source, ctx):
+        if not isinstance(source, BlogArchivesSource):
+            raise Exception("The blog archives pipeline only supports blog "
+                            "archives content sources.")
+
+        super().__init__(source, ctx)
+        self.inner_source = source.inner_source
+        self._tpl_name = source.config['template']
+        self._all_years = None
+        self._dirty_years = None
+        self._pagebaker = None
+
+    def initialize(self):
+        self._pagebaker = PageBaker(self.app,
+                                    self.ctx.out_dir,
+                                    force=self.ctx.force)
+        self._pagebaker.startWriterQueue()
+
+    def shutdown(self):
+        self._pagebaker.stopWriterQueue()
+
+    def createJobs(self, ctx):
+        logger.debug("Building blog archives for: %s" %
+                     self.inner_source.name)
+        self._buildDirtyYears(ctx)
+        logger.debug("Got %d dirty years out of %d." %
+                     (len(self._dirty_years), len(self._all_years)))
+
+        jobs = []
+        for y in self._dirty_years:
+            item = ContentItem(
+                '_index[%04d]' % y,
+                {'route_params': {'year': y}})
+            jobs.append(self.createJob(item))
+        if len(jobs) > 0:
+            return jobs
+        return None
+
+    def run(self, job, ctx, result):
+        page = Page(self.source, job.content_item)
+        prev_entry = ctx.previous_entry
+        cur_entry = result.record_entry
+        cur_entry.year = job.content_item.metadata['route_params']['year']
+        self._pagebaker.bake(page, prev_entry, cur_entry, [])
+
+    def postJobRun(self, ctx):
+        # Create bake entries for the years that were *not* dirty.
+        # Otherwise, when checking for deleted pages, we would not find any
+        # outputs and would delete those files.
+        all_str_years = [str(y) for y in self._all_years]
+        for prev, cur in ctx.record_history.diffs:
+            if prev and not cur:
+                y = prev.year
+                if y in all_str_years:
+                    logger.debug(
+                        "Creating unbaked entry for year %s archive." % y)
+                    cur.year = y
+                    cur.out_paths = list(prev.out_paths)
+                    cur.errors = list(prev.errors)
+                else:
+                    logger.debug(
+                        "No page references year %s anymore." % y)
+
+    def _buildDirtyYears(self, ctx):
+        all_years = set()
+        dirty_years = set()
+
+        record_name = get_record_name_for_source(self.inner_source)
+        current_records = ctx.record_histories.current
+        cur_rec = current_records.getRecord(record_name)
+        for cur_entry in cur_rec.getEntries():
+            dt = datetime.datetime.fromtimestamp(cur_entry.timestamp)
+            all_years.add(dt.year)
+            if cur_entry.was_any_sub_baked:
+                dirty_years.add(dt.year)
+
+        self._all_years = all_years
+        self._dirty_years = dirty_years
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/sources/generator.py	Tue Jun 06 01:23:25 2017 -0700
@@ -0,0 +1,35 @@
+import io
+import time
+from werkzeug.utils import cached_property
+from piecrust.configuration import ConfigurationError
+from piecrust.sources.base import ContentSource, GeneratedContentException
+
+
+class GeneratorSourceBase(ContentSource):
+    def __init__(self, app, name, config):
+        super().__init__(app, name, config)
+
+        source_name = config.get('source')
+        if source_name is None:
+            raise ConfigurationError(
+                "Taxonomy source '%s' requires an inner source." % name)
+        self._inner_source_name = source_name
+
+        self._raw_item = ''
+
+    @cached_property
+    def inner_source(self):
+        return self.app.getSource(self._inner_source_name)
+
+    def getContents(self, group):
+        # Our content is procedurally generated from other content sources,
+        # so we really don't support listing anything here -- it would be
+        # typically quite costly.
+        raise GeneratedContentException()
+
+    def openItem(self, item, mode='r', **kwargs):
+        return io.StringIO(self._raw_item)
+
+    def getItemMtime(self, item):
+        return time.time()
+
--- a/piecrust/sources/taxonomy.py	Tue Jun 06 00:26:21 2017 -0700
+++ b/piecrust/sources/taxonomy.py	Tue Jun 06 01:23:25 2017 -0700
@@ -1,9 +1,6 @@
-import io
 import re
-import time
 import logging
 import unidecode
-from werkzeug.utils import cached_property
 from piecrust.configuration import ConfigurationError
 from piecrust.data.filters import (
     PaginationFilter, SettingFilterClause)
@@ -14,8 +11,8 @@
     ContentPipeline, get_record_name_for_source)
 from piecrust.pipelines.records import RecordHistory
 from piecrust.routing import RouteParameter
-from piecrust.sources.base import (
-    ContentItem, ContentSource, GeneratedContentException)
+from piecrust.sources.base import ContentItem
+from piecrust.sources.generator import GeneratorSourceBase
 
 
 logger = logging.getLogger(__name__)
@@ -57,7 +54,7 @@
 """
 
 
-class TaxonomySource(ContentSource):
+class TaxonomySource(GeneratorSourceBase):
     """ A content source that generates taxonomy listing pages.
     """
     SOURCE_NAME = 'taxonomy'
@@ -66,12 +63,6 @@
     def __init__(self, app, name, config):
         super().__init__(app, name, config)
 
-        source_name = config.get('source')
-        if source_name is None:
-            raise ConfigurationError(
-                "Taxonomy source '%s' requires an inner source." % name)
-        self._inner_source_name = source_name
-
         tax_name = config.get('taxonomy')
         if tax_name is None:
             raise ConfigurationError(
@@ -84,25 +75,6 @@
         tpl_name = config.get('template', '_%s.html' % tax_name)
         self._raw_item = _taxonomy_index % {'template': tpl_name}
 
-    @cached_property
-    def inner_source(self):
-        return self.app.getSource(self._inner_source_name)
-
-    def openItem(self, item, mode='r', **kwargs):
-        return io.StringIO(self._raw_item)
-
-    def getItemMtime(self, item):
-        return time.time()
-
-    def getContents(self, group):
-        # Our content is procedurally generated from other content sources,
-        # so we really don't support listing anything here -- it would be
-        # quite costly.
-        #
-        # Instead, our pipeline (the `TaxonomyPipeline`) will generate
-        # content items for us when it is asked to produce bake jobs.
-        raise GeneratedContentException()
-
     def getSupportedRouteParameters(self):
         name = self.taxonomy.term_name
         param_type = (RouteParameter.TYPE_PATH if self.taxonomy.is_multiple
@@ -321,6 +293,7 @@
         page = Page(self.source, job.content_item)
         prev_entry = ctx.previous_entry
         cur_entry = result.record_entry
+        cur_entry.term = content_item.metadata['term']
         self._pagebaker.bake(page, prev_entry, cur_entry, [])
 
     def postJobRun(self, ctx):