piecrust2: piecrust/sources/taxonomy.py comparison

comparison piecrust/sources/taxonomy.py @ 1136:5f97b5b59dfe

bake: Optimize cache handling for the baking process. - Get rid of the 2-level pipeline runs... handle a single set of passes. - Go back to load/render segments/layout passes for pages. - Add descriptions of what each job batch does. - Improve the taxonomy pipeline so it doesn't re-bake terms that don't need to be re-baked. - Simplify some of the code.

author	Ludovic Chabant <ludovic@chabant.com>
date	Mon, 23 Apr 2018 21:47:49 -0700
parents	1857dbd4580f
children	9f3e702a8a69

comparison

equal deleted inserted replaced

-:6350ee084273
+:5f97b5b59dfe
 from piecrust.configuration import ConfigurationError
 from piecrust.data.filters import (
 PaginationFilter, SettingFilterClause)
 from piecrust.page import Page
 from piecrust.pipelines._pagebaker import PageBaker
-from piecrust.pipelines._pagerecords import (
+from piecrust.pipelines._pagerecords import PagePipelineRecordEntry
-PagePipelineRecordEntry,
-add_page_job_result, merge_job_result_into_record_entry)
 from piecrust.pipelines.base import (
-ContentPipeline, get_record_name_for_source,
+ContentPipeline, get_record_name_for_source, create_job)
-create_job, content_item_from_job)
-from piecrust.pipelines.records import RecordHistory
 from piecrust.routing import RouteParameter
 from piecrust.sources.base import ContentItem
 from piecrust.sources.generator import GeneratorSourceBase
 entry = rec_fac(record_entry_spec)
 current_record.addEntry(entry)
 if len(jobs) > 0:
-return jobs
+return jobs, "taxonomize"
-return None
+return None, None
 def run(self, job, ctx, result):
 term = job['term']
 content_item = ContentItem('_index',
 {'term': term,
 logger.debug("Rendering '%s' page: %s" %
 (self.taxonomy.name, page.source_metadata['term']))
 prev_entry = ctx.previous_entry
 rdr_subs = self._pagebaker.bake(page, prev_entry)
-add_page_job_result(result)
 result['subs'] = rdr_subs
 result['term'] = page.source_metadata['term']
 def handleJobResult(self, result, ctx):
 existing = ctx.record_entry
-merge_job_result_into_record_entry(existing, result)
+existing.subs = result['subs']
 existing.term = result['term']
 def postJobRun(self, ctx):
 # We create bake entries for all the terms that were *not* dirty.
 # This is because otherwise, on the next incremental bake, we wouldn't
 class _TaxonomyTermsAnalyzer(object):
 def __init__(self, pipeline, record_histories):
 self.pipeline = pipeline
 self.record_histories = record_histories
 self._all_terms = {}
-self._single_dirty_slugified_terms = set()
 self._all_dirty_slugified_terms = None
 @property
 def dirty_slugified_terms(self):
 """ Returns the slugified terms that have been 'dirtied' during
 return term in self._all_terms
 def analyze(self):
 # Build the list of terms for our taxonomy, and figure out which ones
 # are 'dirty' for the current bake.
-#
-# Remember all terms used.
 source = self.pipeline.inner_source
 taxonomy = self.pipeline.taxonomy
 slugifier = self.pipeline.slugifier
+tax_is_mult = taxonomy.is_multiple
+tax_setting_name = taxonomy.setting_name
+# First, go over all of our source's pages seen during this bake.
+# Gather all the taxonomy terms they have, and also keep track of
+# the ones used by the pages that were actually rendered (instead of
+# those that were up-to-date and skipped).
+single_dirty_slugified_terms = set()
+current_records = self.record_histories.current
 record_name = get_record_name_for_source(source)
-current_records = self.record_histories.current
 cur_rec = current_records.getRecord(record_name)
 for cur_entry in cur_rec.getEntries():
-if not cur_entry.was_overriden:
+if cur_entry.hasFlag(PagePipelineRecordEntry.FLAG_OVERRIDEN):
-cur_terms = cur_entry.config.get(taxonomy.setting_name)
+continue
-if cur_terms:
-if not taxonomy.is_multiple:
+cur_terms = cur_entry.config.get(tax_setting_name)
-self._addTerm(
+if not cur_terms:
-slugifier, cur_entry.item_spec, cur_terms)
+continue
-else:
-self._addTerms(
+if not tax_is_mult:
-slugifier, cur_entry.item_spec, cur_terms)
+self._addTerm(
+slugifier, cur_entry.item_spec, cur_terms)
-# Re-bake all taxonomy terms that include new or changed pages, by
+else:
-# marking them as 'dirty'.
+self._addTerms(
-history = self.record_histories.getHistory(record_name).copy()
+slugifier, cur_entry.item_spec, cur_terms)
-history.build()
-for prev_entry, cur_entry in history.diffs:
+if cur_entry.hasFlag(
-entries = [cur_entry]
+PagePipelineRecordEntry.FLAG_SEGMENTS_RENDERED):
-if prev_entry:
+if not tax_is_mult:
-entries.append(prev_entry)
+single_dirty_slugified_terms.add(
+slugifier.slugify(cur_terms))
-for e in entries:
+else:
-if e and e.was_any_sub_baked:
+single_dirty_slugified_terms.update(
-entry_terms = e.config.get(taxonomy.setting_name)
+(slugifier.slugify(t)
-if entry_terms:
+for t in cur_terms))
-if not taxonomy.is_multiple:
-self._single_dirty_slugified_terms.add(
-slugifier.slugify(entry_terms))
-else:
-self._single_dirty_slugified_terms.update(
-(slugifier.slugify(t)
-for t in entry_terms))
 self._all_dirty_slugified_terms = list(
-self._single_dirty_slugified_terms)
+single_dirty_slugified_terms)
 logger.debug("Gathered %d dirty taxonomy terms",
 len(self._all_dirty_slugified_terms))
 # Re-bake the combination pages for terms that are 'dirty'.
 # We make all terms into tuple, even those that are not actual
 # wherever combinations were used, so they're coming from the
 # `onRouteFunctionUsed` method. And because combinations can be used
 # by any page in the website (anywhere someone can ask for an URL
 # to the combination page), it means we check all the records, not
 # just the record for our source.
-if taxonomy.is_multiple:
+if tax_is_mult:
 known_combinations = set()
 for rec in current_records.records:
 # Cheap way to test if a record contains entries that
 # are sub-types of a page entry: test the first one.
 first_entry = next(iter(rec.getEntries()), None)
 if len(terms) > 1:
 known_combinations.add(terms)
 dcc = 0
 for terms in known_combinations:
-if not self._single_dirty_slugified_terms.isdisjoint(
+if not single_dirty_slugified_terms.isdisjoint(
 set(terms)):
 self._all_dirty_slugified_terms.append(
 taxonomy.separator.join(terms))
 dcc += 1
 logger.debug("Gathered %d term combinations, with %d dirty." %

Mercurial > piecrust2

comparison piecrust/sources/taxonomy.py @ 1136:5f97b5b59dfe