Mercurial > piecrust2
diff piecrust/sources/taxonomy.py @ 855:448710d84121
refactor: Get the taxonomy support back to a functional state.
There's now a taxonomy content source that wraps another normal content source
like a blog posts' source. It works in tandem with a taxonomy content pipeline
that will do the heavy lifting of figuring out what kind of terms exist and
need to be baked.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 06 Jun 2017 00:26:21 -0700 |
parents | 08e02c2a2a1a |
children | 9bb22bbe093c |
line wrap: on
line diff
--- a/piecrust/sources/taxonomy.py Sun Jun 04 23:34:28 2017 -0700 +++ b/piecrust/sources/taxonomy.py Tue Jun 06 00:26:21 2017 -0700 @@ -1,14 +1,21 @@ +import io import re import time import logging import unidecode -from piecrust.chefutil import format_timed, format_timed_scope +from werkzeug.utils import cached_property from piecrust.configuration import ConfigurationError from piecrust.data.filters import ( PaginationFilter, SettingFilterClause) -from piecrust.pipelines.base import ContentPipeline +from piecrust.page import Page +from piecrust.pipelines._pagebaker import PageBaker +from piecrust.pipelines._pagerecords import PagePipelineRecordEntry +from piecrust.pipelines.base import ( + ContentPipeline, get_record_name_for_source) +from piecrust.pipelines.records import RecordHistory from piecrust.routing import RouteParameter -from piecrust.sources.base import ContentSource, GeneratedContentException +from piecrust.sources.base import ( + ContentItem, ContentSource, GeneratedContentException) logger = logging.getLogger(__name__) @@ -44,6 +51,12 @@ return self.term_name +_taxonomy_index = """--- +layout: %(template)s +--- +""" + + class TaxonomySource(ContentSource): """ A content source that generates taxonomy listing pages. """ @@ -53,6 +66,12 @@ def __init__(self, app, name, config): super().__init__(app, name, config) + source_name = config.get('source') + if source_name is None: + raise ConfigurationError( + "Taxonomy source '%s' requires an inner source." % name) + self._inner_source_name = source_name + tax_name = config.get('taxonomy') if tax_name is None: raise ConfigurationError( @@ -62,6 +81,19 @@ sm = config.get('slugify_mode') self.slugifier = _get_slugifier(app, self.taxonomy, sm) + tpl_name = config.get('template', '_%s.html' % tax_name) + self._raw_item = _taxonomy_index % {'template': tpl_name} + + @cached_property + def inner_source(self): + return self.app.getSource(self._inner_source_name) + + def openItem(self, item, mode='r', **kwargs): + return io.StringIO(self._raw_item) + + def getItemMtime(self, item): + return time.time() + def getContents(self, group): # Our content is procedurally generated from other content sources, # so we really don't support listing anything here -- it would be @@ -78,6 +110,15 @@ return [RouteParameter(name, param_type, variadic=self.taxonomy.is_multiple)] + def findContent(self, route_params): + slugified_term = route_params[self.taxonomy.term_name] + spec = '_index[%s]' % slugified_term + metadata = {'term': slugified_term, + 'route_params': { + self.taxonomy.term_name: slugified_term} + } + return ContentItem(spec, metadata) + def slugify(self, term): return self.slugifier.slugify(term) @@ -86,7 +127,7 @@ def prepareRenderContext(self, ctx): # Set the pagination source as the source we're generating for. - ctx.pagination_source = self.source + ctx.pagination_source = self.inner_source # Get the taxonomy terms from the route metadata... this can come from # the browser's URL (while serving) or from the baking (see `bake` @@ -141,7 +182,7 @@ # Set up the filter that will check the pages' terms. flt = PaginationFilter() flt.addClause(HasTaxonomyTermsFilterClause( - self.taxonomy, self.slugify.mode, term_value, is_combination)) + self.taxonomy, self.slugifier.mode, term_value, is_combination)) ctx.pagination_filter = flt def onRouteFunctionUsed(self, route_params): @@ -173,30 +214,34 @@ self._taxonomy = taxonomy self._is_combination = is_combination self._slugifier = _Slugifier(taxonomy, slugify_mode) + if taxonomy.is_multiple: + self.pageMatches = self._pageMatchesAny + else: + self.pageMatches = self._pageMatchesSingle - def pageMatches(self, fil, page): - if self._taxonomy.is_multiple: - # Multiple taxonomy, i.e. it supports multiple terms, like tags. - page_values = fil.value_accessor(page, self.name) - if page_values is None or not isinstance(page_values, list): - return False + def _pageMatchesAny(self, fil, page): + # Multiple taxonomy, i.e. it supports multiple terms, like tags. + page_values = page.config.get(self.name) + if page_values is None or not isinstance(page_values, list): + return False - page_set = set(map(self._slugifier.slugify, page_values)) - if self._is_combination: - # Multiple taxonomy, and multiple terms to match. Check that - # the ones to match are all in the page's terms. - value_set = set(self.value) - return value_set.issubset(page_set) - else: - # Multiple taxonomy, one term to match. - return self.value in page_set + page_set = set(map(self._slugifier.slugify, page_values)) + if self._is_combination: + # Multiple taxonomy, and multiple terms to match. Check that + # the ones to match are all in the page's terms. + value_set = set(self.value) + return value_set.issubset(page_set) else: - # Single taxonomy. Just compare the values. - page_value = fil.value_accessor(page, self.name) - if page_value is None: - return False - page_value = self._slugifier.slugify(page_value) - return page_value == self.value + # Multiple taxonomy, one term to match. + return self.value in page_set + + def _pageMatchesSingle(self, fil, page): + # Single taxonomy. Just compare the values. + page_value = page.config.get(self.name) + if page_value is None: + return False + page_value = self._slugifier.slugify(page_value) + return page_value == self.value def _get_taxonomy(app, tax_name): @@ -213,9 +258,16 @@ return _Slugifier(taxonomy, sm) +class TaxonomyPipelineRecordEntry(PagePipelineRecordEntry): + def __init__(self): + super().__init__() + self.term = None + + class TaxonomyPipeline(ContentPipeline): PIPELINE_NAME = 'taxonomy' PASS_NUM = 1 + RECORD_ENTRY_CLASS = TaxonomyPipelineRecordEntry def __init__(self, source, ctx): if not isinstance(source, TaxonomySource): @@ -223,94 +275,80 @@ "content sources.") super().__init__(source, ctx) + self.inner_source = source.inner_source self.taxonomy = source.taxonomy self.slugifier = source.slugifier - - def buildJobs(self): - logger.debug("Building taxonomy pages for source: %s" % - self.source.name) - analyzer = _TaxonomyTermsAnalyzer(self) - with format_timed_scope(logger, 'gathered taxonomy terms', - level=logging.DEBUG, colored=False): - analyzer.analyze(ctx) + self._tpl_name = source.config['template'] + self._analyzer = None + self._pagebaker = None - def bake(self, ctx): - if not self.page_ref.exists: - logger.debug( - "No page found at '%s', skipping taxonomy '%s'." % - (self.page_ref, self.taxonomy.name)) - return + def initialize(self): + self._pagebaker = PageBaker(self.app, + self.ctx.out_dir, + force=self.ctx.force) + self._pagebaker.startWriterQueue() - logger.debug("Baking %s pages...", self.taxonomy.name) - analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy, - self.slugify_mode) - with format_timed_scope(logger, 'gathered taxonomy terms', - level=logging.DEBUG, colored=False): - analyzer.analyze(ctx) + def shutdown(self): + self._pagebaker.stopWriterQueue() + + def createJobs(self, ctx): + logger.debug("Building '%s' taxonomy pages for source: %s" % + (self.taxonomy.name, self.inner_source.name)) + self._analyzer = _TaxonomyTermsAnalyzer(self, ctx.record_histories) + self._analyzer.analyze() - start_time = time.perf_counter() - page_count = self._bakeTaxonomyTerms(ctx, analyzer) - if page_count > 0: - logger.info(format_timed( - start_time, - "baked %d %s pages for %s." % ( - page_count, self.taxonomy.term_name, self.source_name))) - - def _bakeTaxonomyTerms(self, ctx, analyzer): - # Start baking those terms. - logger.debug( - "Baking '%s' for source '%s': %d terms" % - (self.taxonomy.name, self.source_name, - len(analyzer.dirty_slugified_terms))) + logger.debug("Queuing %d '%s' jobs." % + (len(self._analyzer.dirty_slugified_terms), + self.taxonomy.name)) + jobs = [] + for slugified_term in self._analyzer.dirty_slugified_terms: + item = ContentItem( + '_index[%s]' % slugified_term, + {'term': slugified_term, + 'route_params': { + self.taxonomy.term_name: slugified_term} + }) + jobs.append(self.createJob(item)) + if len(jobs) > 0: + return jobs + return None - route = self.app.getGeneratorRoute(self.name) - if route is None: - raise Exception("No routes have been defined for generator: %s" % - self.name) - - logger.debug("Using taxonomy page: %s" % self.page_ref) - fac = self.page_ref.getFactory() + def run(self, job, ctx, result): + content_item = job.content_item + logger.debug("Rendering '%s' page: %s" % + (self.taxonomy.name, content_item.metadata['term'])) - job_count = 0 - for slugified_term in analyzer.dirty_slugified_terms: - extra_route_params = { - self.taxonomy.term_name: slugified_term} + page = Page(self.source, job.content_item) + prev_entry = ctx.previous_entry + cur_entry = result.record_entry + self._pagebaker.bake(page, prev_entry, cur_entry, []) - # Use the slugified term as the record's extra key seed. - logger.debug( - "Queuing: %s [%s=%s]" % - (fac.ref_spec, self.taxonomy.name, slugified_term)) - ctx.queueBakeJob(fac, route, extra_route_params, slugified_term) - job_count += 1 - ctx.runJobQueue() - - # Now we create bake entries for all the terms that were *not* dirty. + def postJobRun(self, ctx): + # We create bake entries for all the terms that were *not* dirty. # This is because otherwise, on the next incremental bake, we wouldn't # find any entry for those things, and figure that we need to delete # their outputs. - for prev_entry, cur_entry in ctx.getAllPageRecords(): - # Only consider taxonomy-related entries that don't have any - # current version (i.e. they weren't baked just now). - if prev_entry and not cur_entry: - try: - t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) - except InvalidRecordExtraKey: - continue - + analyzer = self._analyzer + for prev, cur in ctx.record_history.diffs: + # Only consider entries that don't have any current version + # (i.e. they weren't baked just now). + if prev and not cur: + t = prev.term if analyzer.isKnownSlugifiedTerm(t): - logger.debug("Creating unbaked entry for %s term: %s" % - (self.name, t)) - ctx.collapseRecord(prev_entry) + logger.debug("Creating unbaked entry for '%s' term: %s" % + (self.taxonomy.name, t)) + cur.term = t + cur.out_paths = list(prev.out_paths) + cur.errors = list(prev.errors) else: - logger.debug("Term %s in %s isn't used anymore." % - (self.name, t)) - - return job_count + logger.debug("Term '%s' in '%s' isn't used anymore." % + (t, self.taxonomy.name)) class _TaxonomyTermsAnalyzer(object): - def __init__(self, source): - self.source = source + def __init__(self, pipeline, record_histories): + self.pipeline = pipeline + self.record_histories = record_histories self._all_terms = {} self._single_dirty_slugified_terms = set() self._all_dirty_slugified_terms = None @@ -328,39 +366,49 @@ """ return term in self._all_terms - def analyze(self, ctx): + def analyze(self): # Build the list of terms for our taxonomy, and figure out which ones # are 'dirty' for the current bake. # # Remember all terms used. - for _, cur_entry in ctx.getAllPageRecords(): - if cur_entry and not cur_entry.was_overriden: - cur_terms = cur_entry.config.get(self.taxonomy.setting_name) + source = self.pipeline.inner_source + taxonomy = self.pipeline.taxonomy + slugifier = self.pipeline.slugifier + + record_name = get_record_name_for_source(source) + current_records = self.record_histories.current + cur_rec = current_records.getRecord(record_name) + for cur_entry in cur_rec.getEntries(): + if not cur_entry.was_overriden: + cur_terms = cur_entry.config.get(taxonomy.setting_name) if cur_terms: - if not self.taxonomy.is_multiple: - self._addTerm(cur_entry.path, cur_terms) + if not taxonomy.is_multiple: + self._addTerm( + slugifier, cur_entry.item_spec, cur_terms) else: - self._addTerms(cur_entry.path, cur_terms) + self._addTerms( + slugifier, cur_entry.item_spec, cur_terms) # Re-bake all taxonomy terms that include new or changed pages, by # marking them as 'dirty'. - for prev_entry, cur_entry in ctx.getBakedPageRecords(): - if cur_entry.source_name != self.source_name: - continue - + previous_records = self.record_histories.previous + prev_rec = previous_records.getRecord(record_name) + history = RecordHistory(prev_rec, cur_rec) + history.build() + for prev_entry, cur_entry in history.diffs: entries = [cur_entry] if prev_entry: entries.append(prev_entry) for e in entries: - entry_terms = e.config.get(self.taxonomy.setting_name) + entry_terms = e.config.get(taxonomy.setting_name) if entry_terms: - if not self.taxonomy.is_multiple: + if not taxonomy.is_multiple: self._single_dirty_slugified_terms.add( - self.slugifier.slugify(entry_terms)) + slugifier.slugify(entry_terms)) else: self._single_dirty_slugified_terms.update( - (self.slugifier.slugify(t) + (slugifier.slugify(t) for t in entry_terms)) self._all_dirty_slugified_terms = list( @@ -376,37 +424,36 @@ # Add the combinations to that list. We get those combinations from # wherever combinations were used, so they're coming from the # `onRouteFunctionUsed` method. - if self.taxonomy.is_multiple: + if taxonomy.is_multiple: known_combinations = set() - for _, cur_entry in ctx.getAllPageRecords(): - if cur_entry: - used_terms = _get_all_entry_taxonomy_terms(cur_entry) - for terms in used_terms: - if len(terms) > 1: - known_combinations.add(terms) + for cur_entry in cur_rec.getEntries(): + used_terms = _get_all_entry_taxonomy_terms(cur_entry) + for terms in used_terms: + if len(terms) > 1: + known_combinations.add(terms) dcc = 0 for terms in known_combinations: if not self._single_dirty_slugified_terms.isdisjoint( set(terms)): self._all_dirty_slugified_terms.append( - self.taxonomy.separator.join(terms)) + taxonomy.separator.join(terms)) dcc += 1 logger.debug("Gathered %d term combinations, with %d dirty." % (len(known_combinations), dcc)) - def _addTerms(self, entry_path, terms): + def _addTerms(self, slugifier, item_spec, terms): for t in terms: - self._addTerm(entry_path, t) + self._addTerm(slugifier, item_spec, t) - def _addTerm(self, entry_path, term): - st = self.slugifier.slugify(term) + def _addTerm(self, slugifier, item_spec, term): + st = slugifier.slugify(term) orig_terms = self._all_terms.setdefault(st, []) if orig_terms and orig_terms[0] != term: logger.warning( "Term '%s' in '%s' is slugified to '%s' which conflicts with " "previously existing '%s'. The two will be merged." % - (term, entry_path, st, orig_terms[0])) + (term, item_spec, st, orig_terms[0])) orig_terms.append(term)