Mercurial > piecrust2
changeset 877:d6d35b2efd04
bake: Rename "pass" to "step" and make the page pipeline use different steps.
That pipeline is now first loading all pages, and then rendering full pages
unless they trigger a sub-render.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Thu, 15 Jun 2017 22:16:23 -0700 |
parents | d1095774bfcf |
children | 313db67cfc35 |
files | piecrust/baking/baker.py piecrust/data/pagedata.py piecrust/data/paginationdata.py piecrust/dataproviders/pageiterator.py piecrust/pipelines/_pagebaker.py piecrust/pipelines/base.py piecrust/pipelines/page.py piecrust/rendering.py piecrust/sources/blogarchives.py piecrust/sources/taxonomy.py |
diffstat | 10 files changed, 91 insertions(+), 78 deletions(-) [+] |
line wrap: on
line diff
--- a/piecrust/baking/baker.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/baking/baker.py Thu Jun 15 22:16:23 2017 -0700 @@ -214,11 +214,11 @@ return True def _bakeRealm(self, pool, record_histories, pp_pass_num, realm, pplist): - # Start with the first pass, where we iterate on the content sources' + # Start with the first step, where we iterate on the content sources' # items and run jobs on those. - pool.userdata.cur_pass = 0 - next_pass_jobs = {} - pool.userdata.next_pass_jobs = next_pass_jobs + pool.userdata.cur_step = 0 + next_step_jobs = {} + pool.userdata.next_step_jobs = next_step_jobs start_time = time.perf_counter() job_count = 0 @@ -231,10 +231,10 @@ logger.debug( "Queuing jobs for source '%s' using pipeline '%s' " - "(%s, pass 0)." % + "(%s, step 0)." % (src.name, pp.PIPELINE_NAME, realm_name)) - next_pass_jobs[src.name] = [] + next_step_jobs[src.name] = [] jcctx = PipelineJobCreateContext(pp_pass_num, record_histories) jobs = pp.createJobs(jcctx) if jobs is not None: @@ -250,29 +250,29 @@ pool.wait() logger.info(format_timed( - start_time, "%d pipeline jobs completed (%s, pass 0)." % + start_time, "%d pipeline jobs completed (%s, step 0)." % (job_count, realm_name))) # Now let's see if any job created a follow-up job. Let's keep # processing those jobs as long as they create new ones. - pool.userdata.cur_pass = 1 + pool.userdata.cur_step = 1 while True: - # Make a copy of out next pass jobs and reset the list, so + # Make a copy of out next step jobs and reset the list, so # the first jobs to be processed don't mess it up as we're # still iterating on it. - next_pass_jobs = pool.userdata.next_pass_jobs - pool.userdata.next_pass_jobs = {} + next_step_jobs = pool.userdata.next_step_jobs + pool.userdata.next_step_jobs = {} start_time = time.perf_counter() job_count = 0 - for sn, jobs in next_pass_jobs.items(): + for sn, jobs in next_step_jobs.items(): if jobs: logger.debug( - "Queuing jobs for source '%s' (%s, pass %d)." % - (sn, realm_name, pool.userdata.cur_pass)) + "Queuing jobs for source '%s' (%s, step %d)." % + (sn, realm_name, pool.userdata.cur_step)) job_count += len(jobs) - pool.userdata.next_pass_jobs[sn] = [] + pool.userdata.next_step_jobs[sn] = [] pool.queueJobs(jobs) stats.stepTimer('WorkerTastPut', time.perf_counter() - start_time) @@ -284,10 +284,10 @@ logger.info(format_timed( start_time, - "%d pipeline jobs completed (%s, pass %d)." % - (job_count, realm_name, pool.userdata.cur_pass))) + "%d pipeline jobs completed (%s, step %d)." % + (job_count, realm_name, pool.userdata.cur_step))) - pool.userdata.cur_pass += 1 + pool.userdata.cur_step += 1 def _logErrors(self, item_spec, errors): logger.error("Errors found in %s:" % item_spec) @@ -319,21 +319,20 @@ return pool def _handleWorkerResult(self, job, res, userdata): - cur_pass = userdata.cur_pass + cur_step = userdata.cur_step record = userdata.records.getRecord(job.record_name) - if cur_pass == 0: + if cur_step == 0: record.addEntry(res.record_entry) else: ppinfo = userdata.ppmngr.getPipeline(job.source_name) - ppmrctx = PipelineMergeRecordContext( - record, job, cur_pass) + ppmrctx = PipelineMergeRecordContext(record, job, cur_step) ppinfo.pipeline.mergeRecordEntry(res.record_entry, ppmrctx) - npj = res.next_pass_job + npj = res.next_step_job if npj is not None: - npj.data['pass'] = cur_pass + 1 - userdata.next_pass_jobs[job.source_name].append(npj) + npj.step_num = cur_step + 1 + userdata.next_step_jobs[job.source_name].append(npj) if not res.record_entry.success: record.success = False @@ -341,18 +340,21 @@ self._logErrors(job.content_item.spec, res.record_entry.errors) def _handleWorkerError(self, job, exc_data, userdata): - cur_pass = userdata.cur_pass + cur_step = userdata.cur_step record = userdata.records.getRecord(job.record_name) - if cur_pass == 0: + record_entry_spec = job.content_item.metadata.get( + 'record_entry_spec', job.content_item.spec) + + if cur_step == 0: ppinfo = userdata.ppmngr.getPipeline(job.source_name) entry_class = ppinfo.pipeline.RECORD_ENTRY_CLASS or RecordEntry e = entry_class() - e.item_spec = job.content_item.spec + e.item_spec = record_entry_spec e.errors.append(str(exc_data)) record.addEntry(e) else: - e = record.getEntry(job.record_entry_spec) + e = record.getEntry(record_entry_spec) e.errors.append(str(exc_data)) record.success = False @@ -368,8 +370,8 @@ self.baker = baker self.ppmngr = ppmngr self.records = ppmngr.record_histories.current - self.cur_pass = 0 - self.next_pass_jobs = {} + self.cur_step = 0 + self.next_step_jobs = {} def _get_pipeline_infos_by_pass_and_realm(pp_infos):
--- a/piecrust/data/pagedata.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/data/pagedata.py Thu Jun 15 22:16:23 2017 -0700 @@ -1,6 +1,7 @@ import time import logging import collections.abc +from piecrust.sources.base import AbortedSourceUseError logger = logging.getLogger(__name__) @@ -70,7 +71,7 @@ if loader is not None: try: self._values[name] = loader(self, name) - except LazyPageConfigLoaderHasNoValue: + except (LazyPageConfigLoaderHasNoValue, AbortedSourceUseError): raise except Exception as ex: logger.exception(ex) @@ -90,7 +91,7 @@ if loader is not None: try: self._values[name] = loader(self, name) - except LazyPageConfigLoaderHasNoValue: + except (LazyPageConfigLoaderHasNoValue, AbortedSourceUseError): raise except Exception as ex: logger.exception(ex)
--- a/piecrust/data/paginationdata.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/data/paginationdata.py Thu Jun 15 22:16:23 2017 -0700 @@ -1,6 +1,7 @@ import time import logging from piecrust.data.pagedata import LazyPageConfigData +from piecrust.sources.base import AbortedSourceUseError logger = logging.getLogger(__name__) @@ -67,6 +68,8 @@ ctx = RenderingContext(page) render_result = render_page_segments(ctx) segs = render_result.segments + except AbortedSourceUseError: + raise except Exception as ex: logger.exception(ex) raise Exception(
--- a/piecrust/dataproviders/pageiterator.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/dataproviders/pageiterator.py Thu Jun 15 22:16:23 2017 -0700 @@ -3,7 +3,7 @@ from piecrust.data.paginationdata import PaginationData from piecrust.events import Event from piecrust.dataproviders.base import DataProvider -from piecrust.sources.base import ContentSource, AbortedSourceUseError +from piecrust.sources.base import ContentSource logger = logging.getLogger(__name__) @@ -240,17 +240,6 @@ if self._cache is not None: return - if self._is_content_source: - if self._source.app.env.abort_source_use: - if self._current_page is not None: - logger.debug("Aborting iteration of '%s' from: %s." % - (self._source.name, - self._current_page.content_spec)) - else: - logger.debug("Aborting iteration of '%s'." % - self._source.name) - raise AbortedSourceUseError() - self._ensureSorter() if self._is_content_source:
--- a/piecrust/pipelines/_pagebaker.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/pipelines/_pagebaker.py Thu Jun 15 22:16:23 2017 -0700 @@ -6,6 +6,7 @@ import urllib.parse from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry from piecrust.rendering import RenderingContext, render_page +from piecrust.sources.base import AbortedSourceUseError from piecrust.uriutil import split_uri @@ -78,7 +79,6 @@ # Create the sub-entry for the bake record. cur_sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) - cur_entry.subs.append(cur_sub_entry) # Find a corresponding sub-entry in the previous bake record. prev_sub_entry = None @@ -97,6 +97,7 @@ if bake_status == STATUS_CLEAN: cur_sub_entry.render_info = prev_sub_entry.copyRenderInfo() cur_sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE + cur_entry.subs.append(cur_sub_entry) if prev_entry.num_subs >= cur_sub + 1: cur_sub += 1 @@ -118,6 +119,8 @@ logger.debug(" p%d -> %s" % (cur_sub, out_path)) rp = self._bakeSingle(page, cur_sub, out_path) + except AbortedSourceUseError: + raise except Exception as ex: logger.exception(ex) raise BakingError("%s: error baking '%s'." % @@ -126,6 +129,7 @@ # Record what we did. cur_sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED cur_sub_entry.render_info = rp.copyRenderInfo() + cur_entry.subs.append(cur_sub_entry) # Copy page assets. if (cur_sub == 1 and
--- a/piecrust/pipelines/base.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/pipelines/base.py Thu Jun 15 22:16:23 2017 -0700 @@ -39,14 +39,15 @@ self.source_name = pipeline.source.name self.record_name = pipeline.record_name self.content_item = content_item + self.step_num = 0 self.data = {} class PipelineJobCreateContext: """ Context for create pipeline baking jobs. """ - def __init__(self, pass_num, record_histories): - self.pass_num = pass_num + def __init__(self, step_num, record_histories): + self.step_num = step_num self.record_histories = record_histories @@ -82,17 +83,17 @@ """ def __init__(self): self.record_entry = None - self.next_pass_job = None + self.next_step_job = None class PipelineMergeRecordContext: """ The context for merging a record entry for a second or higher pass into the bake record. """ - def __init__(self, record, job, pass_num): + def __init__(self, record, job, step_num): self.record = record self.job = job - self.pass_num = pass_num + self.step_num = step_num class PipelinePostJobRunContext:
--- a/piecrust/pipelines/page.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/pipelines/page.py Thu Jun 15 22:16:23 2017 -0700 @@ -2,8 +2,6 @@ from piecrust.pipelines.base import ContentPipeline from piecrust.pipelines._pagebaker import PageBaker from piecrust.pipelines._pagerecords import PagePipelineRecordEntry -from piecrust.rendering import ( - RenderingContext, render_page_segments) from piecrust.sources.base import AbortedSourceUseError @@ -35,11 +33,13 @@ existing.subs = record_entry.subs def run(self, job, ctx, result): - pass_name = job.data.get('pass', 0) - if pass_name == 0: - self._renderSegmentsOrPostpone(job.content_item, ctx, result) - elif pass_name == 1: - self._fullRender(job.content_item, ctx, result) + step_num = job.step_num + if step_num == 0: + self._loadPage(job.content_item, ctx, result) + elif step_num == 1: + self._renderOrPostpone(job.content_item, ctx, result) + elif step_num == 2: + self._renderAlways(job.content_item, ctx, result) def getDeletions(self, ctx): for prev, cur in ctx.record_history.diffs: @@ -59,37 +59,35 @@ def shutdown(self): self._pagebaker.stopWriterQueue() - def _renderSegmentsOrPostpone(self, content_item, ctx, result): + def _loadPage(self, content_item, ctx, result): + logger.debug("Loading page: %s" % content_item.spec) + page = self.app.getPage(self.source, content_item) + record_entry = result.record_entry + record_entry.config = page.config.getAll() + record_entry.timestamp = page.datetime.timestamp() + result.next_step_job = self.createJob(content_item) + + def _renderOrPostpone(self, content_item, ctx, result): # Here our job is to render the page's segments so that they're # cached in memory and on disk... unless we detect that the page # is using some other sources, in which case we abort and we'll try # again on the second pass. - logger.debug("Rendering segments for: %s" % content_item.spec) - record_entry = result.record_entry - stats = self.app.env.stats - + logger.debug("Conditional render for: %s" % content_item.spec) page = self.app.getPage(self.source, content_item) - record_entry.config = page.config.getAll() - record_entry.timestamp = page.datetime.timestamp() - - rdrctx = RenderingContext(page) + prev_entry = ctx.previous_entry + cur_entry = result.record_entry self.app.env.abort_source_use = True try: - render_page_segments(rdrctx) + self._pagebaker.bake(page, prev_entry, cur_entry) except AbortedSourceUseError: logger.debug("Page was aborted for using source: %s" % content_item.spec) - stats.stepCounter("SourceUseAbortions") + self.app.env.stats.stepCounter("SourceUseAbortions") + result.next_step_job = self.createJob(content_item) finally: self.app.env.abort_source_use = False - result.next_pass_job = self.createJob(content_item) - result.next_pass_job.data.update({ - 'pass': 1, - 'record_entry': record_entry - }) - - def _fullRender(self, content_item, ctx, result): + def _renderAlways(self, content_item, ctx, result): logger.debug("Full render for: %s" % content_item.spec) page = self.app.getPage(self.source, content_item) prev_entry = ctx.previous_entry
--- a/piecrust/rendering.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/rendering.py Thu Jun 15 22:16:23 2017 -0700 @@ -6,6 +6,7 @@ DataBuildingContext, build_page_data, add_layout_data) from piecrust.fastpickle import _pickle_object, _unpickle_object from piecrust.templating.base import TemplateNotFoundError, TemplatingError +from piecrust.sources.base import AbortedSourceUseError logger = logging.getLogger(__name__) @@ -127,6 +128,10 @@ self._ctx_stack = [] @property + def is_empty(self): + return len(self._ctx_stack) == 0 + + @property def current_ctx(self): if len(self._ctx_stack) == 0: return None @@ -217,6 +222,8 @@ layout_result['pass_info']) return rp + except AbortedSourceUseError: + raise except Exception as ex: if ctx.app.debug: raise @@ -234,6 +241,14 @@ stats = env.stats stack = env.render_ctx_stack + + if env.abort_source_use and not stack.is_empty: + cur_spec = ctx.page.content_spec + from_spec = stack.current_ctx.page.content_spec + logger.debug("Aborting rendering of '%s' from: %s." % + (cur_spec, from_spec)) + raise AbortedSourceUseError() + stack.pushCtx(ctx) page = ctx.page
--- a/piecrust/sources/blogarchives.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/sources/blogarchives.py Thu Jun 15 22:16:23 2017 -0700 @@ -145,7 +145,7 @@ class BlogArchivesPipeline(ContentPipeline): PIPELINE_NAME = 'blog_archives' - PASS_NUM = 1 + PASS_NUM = 10 RECORD_ENTRY_CLASS = BlogArchivesPipelineRecordEntry def __init__(self, source, ctx):
--- a/piecrust/sources/taxonomy.py Thu Jun 15 07:33:40 2017 -0700 +++ b/piecrust/sources/taxonomy.py Thu Jun 15 22:16:23 2017 -0700 @@ -239,7 +239,7 @@ class TaxonomyPipeline(ContentPipeline): PIPELINE_NAME = 'taxonomy' - PASS_NUM = 1 + PASS_NUM = 10 RECORD_ENTRY_CLASS = TaxonomyPipelineRecordEntry def __init__(self, source, ctx):