changeset 877:d6d35b2efd04

bake: Rename "pass" to "step" and make the page pipeline use different steps. That pipeline is now first loading all pages, and then rendering full pages unless they trigger a sub-render.
author Ludovic Chabant <ludovic@chabant.com>
date Thu, 15 Jun 2017 22:16:23 -0700
parents d1095774bfcf
children 313db67cfc35
files piecrust/baking/baker.py piecrust/data/pagedata.py piecrust/data/paginationdata.py piecrust/dataproviders/pageiterator.py piecrust/pipelines/_pagebaker.py piecrust/pipelines/base.py piecrust/pipelines/page.py piecrust/rendering.py piecrust/sources/blogarchives.py piecrust/sources/taxonomy.py
diffstat 10 files changed, 91 insertions(+), 78 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/baking/baker.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/baking/baker.py	Thu Jun 15 22:16:23 2017 -0700
@@ -214,11 +214,11 @@
             return True
 
     def _bakeRealm(self, pool, record_histories, pp_pass_num, realm, pplist):
-        # Start with the first pass, where we iterate on the content sources'
+        # Start with the first step, where we iterate on the content sources'
         # items and run jobs on those.
-        pool.userdata.cur_pass = 0
-        next_pass_jobs = {}
-        pool.userdata.next_pass_jobs = next_pass_jobs
+        pool.userdata.cur_step = 0
+        next_step_jobs = {}
+        pool.userdata.next_step_jobs = next_step_jobs
 
         start_time = time.perf_counter()
         job_count = 0
@@ -231,10 +231,10 @@
 
             logger.debug(
                 "Queuing jobs for source '%s' using pipeline '%s' "
-                "(%s, pass 0)." %
+                "(%s, step 0)." %
                 (src.name, pp.PIPELINE_NAME, realm_name))
 
-            next_pass_jobs[src.name] = []
+            next_step_jobs[src.name] = []
             jcctx = PipelineJobCreateContext(pp_pass_num, record_histories)
             jobs = pp.createJobs(jcctx)
             if jobs is not None:
@@ -250,29 +250,29 @@
         pool.wait()
 
         logger.info(format_timed(
-            start_time, "%d pipeline jobs completed (%s, pass 0)." %
+            start_time, "%d pipeline jobs completed (%s, step 0)." %
             (job_count, realm_name)))
 
         # Now let's see if any job created a follow-up job. Let's keep
         # processing those jobs as long as they create new ones.
-        pool.userdata.cur_pass = 1
+        pool.userdata.cur_step = 1
         while True:
-            # Make a copy of out next pass jobs and reset the list, so
+            # Make a copy of out next step jobs and reset the list, so
             # the first jobs to be processed don't mess it up as we're
             # still iterating on it.
-            next_pass_jobs = pool.userdata.next_pass_jobs
-            pool.userdata.next_pass_jobs = {}
+            next_step_jobs = pool.userdata.next_step_jobs
+            pool.userdata.next_step_jobs = {}
 
             start_time = time.perf_counter()
             job_count = 0
 
-            for sn, jobs in next_pass_jobs.items():
+            for sn, jobs in next_step_jobs.items():
                 if jobs:
                     logger.debug(
-                        "Queuing jobs for source '%s' (%s, pass %d)." %
-                        (sn, realm_name, pool.userdata.cur_pass))
+                        "Queuing jobs for source '%s' (%s, step %d)." %
+                        (sn, realm_name, pool.userdata.cur_step))
                     job_count += len(jobs)
-                    pool.userdata.next_pass_jobs[sn] = []
+                    pool.userdata.next_step_jobs[sn] = []
                     pool.queueJobs(jobs)
 
             stats.stepTimer('WorkerTastPut', time.perf_counter() - start_time)
@@ -284,10 +284,10 @@
 
             logger.info(format_timed(
                 start_time,
-                "%d pipeline jobs completed (%s, pass %d)." %
-                (job_count, realm_name, pool.userdata.cur_pass)))
+                "%d pipeline jobs completed (%s, step %d)." %
+                (job_count, realm_name, pool.userdata.cur_step)))
 
-            pool.userdata.cur_pass += 1
+            pool.userdata.cur_step += 1
 
     def _logErrors(self, item_spec, errors):
         logger.error("Errors found in %s:" % item_spec)
@@ -319,21 +319,20 @@
         return pool
 
     def _handleWorkerResult(self, job, res, userdata):
-        cur_pass = userdata.cur_pass
+        cur_step = userdata.cur_step
         record = userdata.records.getRecord(job.record_name)
 
-        if cur_pass == 0:
+        if cur_step == 0:
             record.addEntry(res.record_entry)
         else:
             ppinfo = userdata.ppmngr.getPipeline(job.source_name)
-            ppmrctx = PipelineMergeRecordContext(
-                record, job, cur_pass)
+            ppmrctx = PipelineMergeRecordContext(record, job, cur_step)
             ppinfo.pipeline.mergeRecordEntry(res.record_entry, ppmrctx)
 
-        npj = res.next_pass_job
+        npj = res.next_step_job
         if npj is not None:
-            npj.data['pass'] = cur_pass + 1
-            userdata.next_pass_jobs[job.source_name].append(npj)
+            npj.step_num = cur_step + 1
+            userdata.next_step_jobs[job.source_name].append(npj)
 
         if not res.record_entry.success:
             record.success = False
@@ -341,18 +340,21 @@
             self._logErrors(job.content_item.spec, res.record_entry.errors)
 
     def _handleWorkerError(self, job, exc_data, userdata):
-        cur_pass = userdata.cur_pass
+        cur_step = userdata.cur_step
         record = userdata.records.getRecord(job.record_name)
 
-        if cur_pass == 0:
+        record_entry_spec = job.content_item.metadata.get(
+            'record_entry_spec', job.content_item.spec)
+
+        if cur_step == 0:
             ppinfo = userdata.ppmngr.getPipeline(job.source_name)
             entry_class = ppinfo.pipeline.RECORD_ENTRY_CLASS or RecordEntry
             e = entry_class()
-            e.item_spec = job.content_item.spec
+            e.item_spec = record_entry_spec
             e.errors.append(str(exc_data))
             record.addEntry(e)
         else:
-            e = record.getEntry(job.record_entry_spec)
+            e = record.getEntry(record_entry_spec)
             e.errors.append(str(exc_data))
 
         record.success = False
@@ -368,8 +370,8 @@
         self.baker = baker
         self.ppmngr = ppmngr
         self.records = ppmngr.record_histories.current
-        self.cur_pass = 0
-        self.next_pass_jobs = {}
+        self.cur_step = 0
+        self.next_step_jobs = {}
 
 
 def _get_pipeline_infos_by_pass_and_realm(pp_infos):
--- a/piecrust/data/pagedata.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/data/pagedata.py	Thu Jun 15 22:16:23 2017 -0700
@@ -1,6 +1,7 @@
 import time
 import logging
 import collections.abc
+from piecrust.sources.base import AbortedSourceUseError
 
 
 logger = logging.getLogger(__name__)
@@ -70,7 +71,7 @@
         if loader is not None:
             try:
                 self._values[name] = loader(self, name)
-            except LazyPageConfigLoaderHasNoValue:
+            except (LazyPageConfigLoaderHasNoValue, AbortedSourceUseError):
                 raise
             except Exception as ex:
                 logger.exception(ex)
@@ -90,7 +91,7 @@
         if loader is not None:
             try:
                 self._values[name] = loader(self, name)
-            except LazyPageConfigLoaderHasNoValue:
+            except (LazyPageConfigLoaderHasNoValue, AbortedSourceUseError):
                 raise
             except Exception as ex:
                 logger.exception(ex)
--- a/piecrust/data/paginationdata.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/data/paginationdata.py	Thu Jun 15 22:16:23 2017 -0700
@@ -1,6 +1,7 @@
 import time
 import logging
 from piecrust.data.pagedata import LazyPageConfigData
+from piecrust.sources.base import AbortedSourceUseError
 
 
 logger = logging.getLogger(__name__)
@@ -67,6 +68,8 @@
             ctx = RenderingContext(page)
             render_result = render_page_segments(ctx)
             segs = render_result.segments
+        except AbortedSourceUseError:
+            raise
         except Exception as ex:
             logger.exception(ex)
             raise Exception(
--- a/piecrust/dataproviders/pageiterator.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/dataproviders/pageiterator.py	Thu Jun 15 22:16:23 2017 -0700
@@ -3,7 +3,7 @@
 from piecrust.data.paginationdata import PaginationData
 from piecrust.events import Event
 from piecrust.dataproviders.base import DataProvider
-from piecrust.sources.base import ContentSource, AbortedSourceUseError
+from piecrust.sources.base import ContentSource
 
 
 logger = logging.getLogger(__name__)
@@ -240,17 +240,6 @@
         if self._cache is not None:
             return
 
-        if self._is_content_source:
-            if self._source.app.env.abort_source_use:
-                if self._current_page is not None:
-                    logger.debug("Aborting iteration of '%s' from: %s." %
-                                 (self._source.name,
-                                  self._current_page.content_spec))
-                else:
-                    logger.debug("Aborting iteration of '%s'." %
-                                 self._source.name)
-                raise AbortedSourceUseError()
-
         self._ensureSorter()
 
         if self._is_content_source:
--- a/piecrust/pipelines/_pagebaker.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/pipelines/_pagebaker.py	Thu Jun 15 22:16:23 2017 -0700
@@ -6,6 +6,7 @@
 import urllib.parse
 from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry
 from piecrust.rendering import RenderingContext, render_page
+from piecrust.sources.base import AbortedSourceUseError
 from piecrust.uriutil import split_uri
 
 
@@ -78,7 +79,6 @@
 
             # Create the sub-entry for the bake record.
             cur_sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path)
-            cur_entry.subs.append(cur_sub_entry)
 
             # Find a corresponding sub-entry in the previous bake record.
             prev_sub_entry = None
@@ -97,6 +97,7 @@
             if bake_status == STATUS_CLEAN:
                 cur_sub_entry.render_info = prev_sub_entry.copyRenderInfo()
                 cur_sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE
+                cur_entry.subs.append(cur_sub_entry)
 
                 if prev_entry.num_subs >= cur_sub + 1:
                     cur_sub += 1
@@ -118,6 +119,8 @@
 
                 logger.debug("  p%d -> %s" % (cur_sub, out_path))
                 rp = self._bakeSingle(page, cur_sub, out_path)
+            except AbortedSourceUseError:
+                raise
             except Exception as ex:
                 logger.exception(ex)
                 raise BakingError("%s: error baking '%s'." %
@@ -126,6 +129,7 @@
             # Record what we did.
             cur_sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED
             cur_sub_entry.render_info = rp.copyRenderInfo()
+            cur_entry.subs.append(cur_sub_entry)
 
             # Copy page assets.
             if (cur_sub == 1 and
--- a/piecrust/pipelines/base.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/pipelines/base.py	Thu Jun 15 22:16:23 2017 -0700
@@ -39,14 +39,15 @@
         self.source_name = pipeline.source.name
         self.record_name = pipeline.record_name
         self.content_item = content_item
+        self.step_num = 0
         self.data = {}
 
 
 class PipelineJobCreateContext:
     """ Context for create pipeline baking jobs.
     """
-    def __init__(self, pass_num, record_histories):
-        self.pass_num = pass_num
+    def __init__(self, step_num, record_histories):
+        self.step_num = step_num
         self.record_histories = record_histories
 
 
@@ -82,17 +83,17 @@
     """
     def __init__(self):
         self.record_entry = None
-        self.next_pass_job = None
+        self.next_step_job = None
 
 
 class PipelineMergeRecordContext:
     """ The context for merging a record entry for a second or higher pass
         into the bake record.
     """
-    def __init__(self, record, job, pass_num):
+    def __init__(self, record, job, step_num):
         self.record = record
         self.job = job
-        self.pass_num = pass_num
+        self.step_num = step_num
 
 
 class PipelinePostJobRunContext:
--- a/piecrust/pipelines/page.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/pipelines/page.py	Thu Jun 15 22:16:23 2017 -0700
@@ -2,8 +2,6 @@
 from piecrust.pipelines.base import ContentPipeline
 from piecrust.pipelines._pagebaker import PageBaker
 from piecrust.pipelines._pagerecords import PagePipelineRecordEntry
-from piecrust.rendering import (
-    RenderingContext, render_page_segments)
 from piecrust.sources.base import AbortedSourceUseError
 
 
@@ -35,11 +33,13 @@
         existing.subs = record_entry.subs
 
     def run(self, job, ctx, result):
-        pass_name = job.data.get('pass', 0)
-        if pass_name == 0:
-            self._renderSegmentsOrPostpone(job.content_item, ctx, result)
-        elif pass_name == 1:
-            self._fullRender(job.content_item, ctx, result)
+        step_num = job.step_num
+        if step_num == 0:
+            self._loadPage(job.content_item, ctx, result)
+        elif step_num == 1:
+            self._renderOrPostpone(job.content_item, ctx, result)
+        elif step_num == 2:
+            self._renderAlways(job.content_item, ctx, result)
 
     def getDeletions(self, ctx):
         for prev, cur in ctx.record_history.diffs:
@@ -59,37 +59,35 @@
     def shutdown(self):
         self._pagebaker.stopWriterQueue()
 
-    def _renderSegmentsOrPostpone(self, content_item, ctx, result):
+    def _loadPage(self, content_item, ctx, result):
+        logger.debug("Loading page: %s" % content_item.spec)
+        page = self.app.getPage(self.source, content_item)
+        record_entry = result.record_entry
+        record_entry.config = page.config.getAll()
+        record_entry.timestamp = page.datetime.timestamp()
+        result.next_step_job = self.createJob(content_item)
+
+    def _renderOrPostpone(self, content_item, ctx, result):
         # Here our job is to render the page's segments so that they're
         # cached in memory and on disk... unless we detect that the page
         # is using some other sources, in which case we abort and we'll try
         # again on the second pass.
-        logger.debug("Rendering segments for: %s" % content_item.spec)
-        record_entry = result.record_entry
-        stats = self.app.env.stats
-
+        logger.debug("Conditional render for: %s" % content_item.spec)
         page = self.app.getPage(self.source, content_item)
-        record_entry.config = page.config.getAll()
-        record_entry.timestamp = page.datetime.timestamp()
-
-        rdrctx = RenderingContext(page)
+        prev_entry = ctx.previous_entry
+        cur_entry = result.record_entry
         self.app.env.abort_source_use = True
         try:
-            render_page_segments(rdrctx)
+            self._pagebaker.bake(page, prev_entry, cur_entry)
         except AbortedSourceUseError:
             logger.debug("Page was aborted for using source: %s" %
                          content_item.spec)
-            stats.stepCounter("SourceUseAbortions")
+            self.app.env.stats.stepCounter("SourceUseAbortions")
+            result.next_step_job = self.createJob(content_item)
         finally:
             self.app.env.abort_source_use = False
 
-        result.next_pass_job = self.createJob(content_item)
-        result.next_pass_job.data.update({
-            'pass': 1,
-            'record_entry': record_entry
-        })
-
-    def _fullRender(self, content_item, ctx, result):
+    def _renderAlways(self, content_item, ctx, result):
         logger.debug("Full render for: %s" % content_item.spec)
         page = self.app.getPage(self.source, content_item)
         prev_entry = ctx.previous_entry
--- a/piecrust/rendering.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/rendering.py	Thu Jun 15 22:16:23 2017 -0700
@@ -6,6 +6,7 @@
     DataBuildingContext, build_page_data, add_layout_data)
 from piecrust.fastpickle import _pickle_object, _unpickle_object
 from piecrust.templating.base import TemplateNotFoundError, TemplatingError
+from piecrust.sources.base import AbortedSourceUseError
 
 
 logger = logging.getLogger(__name__)
@@ -127,6 +128,10 @@
         self._ctx_stack = []
 
     @property
+    def is_empty(self):
+        return len(self._ctx_stack) == 0
+
+    @property
     def current_ctx(self):
         if len(self._ctx_stack) == 0:
             return None
@@ -217,6 +222,8 @@
                 layout_result['pass_info'])
         return rp
 
+    except AbortedSourceUseError:
+        raise
     except Exception as ex:
         if ctx.app.debug:
             raise
@@ -234,6 +241,14 @@
     stats = env.stats
 
     stack = env.render_ctx_stack
+
+    if env.abort_source_use and not stack.is_empty:
+        cur_spec = ctx.page.content_spec
+        from_spec = stack.current_ctx.page.content_spec
+        logger.debug("Aborting rendering of '%s' from: %s." %
+                     (cur_spec, from_spec))
+        raise AbortedSourceUseError()
+
     stack.pushCtx(ctx)
 
     page = ctx.page
--- a/piecrust/sources/blogarchives.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/sources/blogarchives.py	Thu Jun 15 22:16:23 2017 -0700
@@ -145,7 +145,7 @@
 
 class BlogArchivesPipeline(ContentPipeline):
     PIPELINE_NAME = 'blog_archives'
-    PASS_NUM = 1
+    PASS_NUM = 10
     RECORD_ENTRY_CLASS = BlogArchivesPipelineRecordEntry
 
     def __init__(self, source, ctx):
--- a/piecrust/sources/taxonomy.py	Thu Jun 15 07:33:40 2017 -0700
+++ b/piecrust/sources/taxonomy.py	Thu Jun 15 22:16:23 2017 -0700
@@ -239,7 +239,7 @@
 
 class TaxonomyPipeline(ContentPipeline):
     PIPELINE_NAME = 'taxonomy'
-    PASS_NUM = 1
+    PASS_NUM = 10
     RECORD_ENTRY_CLASS = TaxonomyPipelineRecordEntry
 
     def __init__(self, source, ctx):