piecrust2: piecrust/generation/taxonomy.py comparison

comparison piecrust/generation/taxonomy.py @ 789:b8e760b3413e

bake: Fix how slugified taxonomy terms are handled. This fixes a problem where multiple terms all slugifying to the same thing would lead to a fatal bake error.

author	Ludovic Chabant <ludovic@chabant.com>
date	Mon, 05 Sep 2016 21:03:00 -0700
parents	661f7ba15762
children	58ebf50235a5

comparison

equal deleted inserted replaced

-:276030ea7972
+:b8e760b3413e
 self.config = config
 self.term_name = config.get('term', name)
 self.is_multiple = bool(config.get('multiple', False))
 self.separator = config.get('separator', '/')
 self.page_ref = config.get('page')
-self._source_page_refs = {}
 @property
 def setting_name(self):
 if self.is_multiple:
 return self.name
 return self.term_name
 class TaxonomyPageGenerator(PageGenerator):
+""" A page generator that handles taxonomies, _i.e._ lists of keywords
+that pages are labelled with, and for which we need to generate
+listing pages.
+"""
 GENERATOR_NAME = 'taxonomy'
 def __init__(self, app, name, config):
 super(TaxonomyPageGenerator, self).__init__(app, name, config)
 sm = config.get('slugify_mode')
 if not sm:
 sm = app.config.get('site/slugify_mode', 'encode')
 self.slugify_mode = _parse_slugify_mode(sm)
+self.slugifier = _Slugifier(self.taxonomy, self.slugify_mode)
+def slugify(self, term):
+return self.slugifier.slugify(term)
+def slugifyMultiple(self, terms):
+return self.slugifier.slugifyMultiple(terms)
 def prepareRenderContext(self, ctx):
-self._setPaginationSource(ctx)
+# Set the pagination source as the source we're generating for.
+ctx.pagination_source = self.source
+# Get the taxonomy terms from the route metadata... this can come from
+# the browser's URL (while serving) or from the baking (see `bake`
+# method below). In both cases, we expect to have the *slugified*
+# version of the term, because we're going to set a filter that also
+# slugifies the terms found on each page.
+#
+# This is because:
+#  * while serving, we get everything from the request URL, so we only
+#    have the slugified version.
+#  * if 2 slightly different terms "collide" into the same slugified
+#    term, we'll get a merge of the 2 on the listing page, which is
+#    what the user expects.
+#
 tax_terms, is_combination = self._getTaxonomyTerms(
 ctx.page.route_metadata)
 self._setTaxonomyFilter(ctx, tax_terms, is_combination)
+# Add some custom data for rendering.
 ctx.custom_data.update({
 self.taxonomy.term_name: tax_terms,
 'is_multiple_%s' % self.taxonomy.term_name: is_combination})
+# Add some "plural" version of the term... so for instance, if this
+# is the "tags" taxonomy, "tag" will have one term most of the time,
+# except when it's a combination. Here, we add "tags" as something that
+# is always a tuple, even when it's not a combination.
 if (self.taxonomy.is_multiple and
 self.taxonomy.name != self.taxonomy.term_name):
 mult_val = tax_terms
 if not is_combination:
 mult_val = (mult_val,)
 ctx.custom_data[self.taxonomy.name] = mult_val
-logger.debug("Prepared render context with: %s" % ctx.custom_data)
 def _getTaxonomyTerms(self, route_metadata):
+# Get the individual slugified terms from the route metadata.
 all_values = route_metadata.get(self.taxonomy.term_name)
 if all_values is None:
 raise Exception("'%s' values couldn't be found in route metadata" %
 self.taxonomy.term_name)
+# If it's a "multiple" taxonomy, we need to potentially split the
+# route value into the individual terms (_e.g._ when listing all pages
+# that have 2 given tags, we need to get each of those 2 tags).
 if self.taxonomy.is_multiple:
 sep = self.taxonomy.separator
 if sep in all_values:
 return tuple(all_values.split(sep)), True
+# Not a "multiple" taxonomy, so there's only the one value.
 return all_values, False
 def _setTaxonomyFilter(self, ctx, term_value, is_combination):
+# Set up the filter that will check the pages' terms.
 flt = PaginationFilter(value_accessor=page_value_accessor)
 flt.addClause(HasTaxonomyTermsFilterClause(
 self.taxonomy, self.slugify_mode, term_value, is_combination))
 ctx.pagination_filter = flt
-def _setPaginationSource(self, ctx):
-ctx.pagination_source = self.source
 def onRouteFunctionUsed(self, route, route_metadata):
-# Get the values.
+# Get the values, and slugify them appropriately.
 values = route_metadata[self.taxonomy.term_name]
 if self.taxonomy.is_multiple:
-#TODO: here we assume the route has been properly configured.
+# TODO: here we assume the route has been properly configured.
-values = tuple([str(v) for v in values])
+slugified_values = self.slugifyMultiple((str(v) for v in values))
+route_val = self.taxonomy.separator.join(slugified_values)
 else:
-values = (str(values),)
+slugified_values = self.slugify(str(values))
+route_val = slugified_values
 # We need to register this use of a taxonomy term.
 eis = self.app.env.exec_info_stack
 cpi = eis.current_page_info.render_ctx.current_pass_info
 if cpi:
 utt = cpi.getCustomInfo('used_taxonomy_terms', [], True)
-utt.append(values)
+utt.append(slugified_values)
-# We need to slugify the terms before they get transformed
+# Put the slugified values in the route metadata so they're used to
-# into URL-bits.
+# generate the URL.
-s = _Slugifier(self.taxonomy, self.slugify_mode)
+route_metadata[self.taxonomy.term_name] = route_val
-str_values = s.slugify(values)
-route_metadata[self.taxonomy.term_name] = str_values
-logger.debug("Changed route metadata to: %s" % route_metadata)
 def bake(self, ctx):
 if not self.page_ref.exists:
 logger.debug(
 "No page found at '%s', skipping taxonomy '%s'." %
 (self.page_ref, self.taxonomy.name))
 return
 logger.debug("Baking %s pages...", self.taxonomy.name)
+analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy,
+self.slugify_mode)
 with format_timed_scope(logger, 'gathered taxonomy terms',
 level=logging.DEBUG, colored=False):
-all_terms, dirty_terms = self._buildDirtyTaxonomyTerms(ctx)
+analyzer.analyze(ctx)
 start_time = time.perf_counter()
-page_count = self._bakeTaxonomyTerms(ctx, all_terms, dirty_terms)
+page_count = self._bakeTaxonomyTerms(ctx, analyzer)
 if page_count > 0:
 logger.info(format_timed(
 start_time,
 "baked %d %s pages for %s." % (
 page_count, self.taxonomy.term_name, self.source_name)))
-def _buildDirtyTaxonomyTerms(self, ctx):
+def _bakeTaxonomyTerms(self, ctx, analyzer):
-# Build the list of terms for our taxonomy, and figure out which ones
-# are 'dirty' for the current bake.
-logger.debug("Gathering dirty taxonomy terms")
-all_terms = set()
-single_dirty_terms = set()
-# Re-bake all taxonomy terms that include new or changed pages.
-for prev_entry, cur_entry in ctx.getBakedPageRecords():
-if cur_entry.source_name != self.source_name:
-continue
-entries = [cur_entry]
-if prev_entry:
-entries.append(prev_entry)
-terms = []
-for e in entries:
-entry_terms = e.config.get(self.taxonomy.setting_name)
-if entry_terms:
-if not self.taxonomy.is_multiple:
-terms.append(entry_terms)
-else:
-terms += entry_terms
-single_dirty_terms.update(terms)
-# Remember all terms used.
-for _, cur_entry in ctx.getAllPageRecords():
-if cur_entry and not cur_entry.was_overriden:
-cur_terms = cur_entry.config.get(self.taxonomy.setting_name)
-if cur_terms:
-if not self.taxonomy.is_multiple:
-all_terms.add(cur_terms)
-else:
-all_terms |= set(cur_terms)
-# Re-bake the combination pages for terms that are 'dirty'.
-# We make all terms into tuple, even those that are not actual
-# combinations, so that we have less things to test further down the
-# line.
-dirty_terms = [(t,) for t in single_dirty_terms]
-# Add the combinations to that list.
-if self.taxonomy.is_multiple:
-known_combinations = set()
-logger.debug("Gathering dirty term combinations")
-for _, cur_entry in ctx.getAllPageRecords():
-if cur_entry:
-used_terms = _get_all_entry_taxonomy_terms(cur_entry)
-for terms in used_terms:
-if len(terms) > 1:
-known_combinations.add(terms)
-for terms in known_combinations:
-if not single_dirty_terms.isdisjoint(set(terms)):
-dirty_terms.append(terms)
-return all_terms, dirty_terms
-def _bakeTaxonomyTerms(self, ctx, all_terms, dirty_terms):
 # Start baking those terms.
 logger.debug(
-"Baking '%s' for source '%s': %s" %
+"Baking '%s' for source '%s': %d terms" %
-(self.taxonomy.name, self.source_name, dirty_terms))
+(self.taxonomy.name, self.source_name,
+len(analyzer.dirty_slugified_terms)))
 route = self.app.getGeneratorRoute(self.name)
 if route is None:
 raise Exception("No routes have been defined for generator: %s" %
 self.name)
 logger.debug("Using taxonomy page: %s" % self.page_ref)
 fac = self.page_ref.getFactory()
 job_count = 0
-s = _Slugifier(self.taxonomy, self.slugify_mode)
+for slugified_term in analyzer.dirty_slugified_terms:
-for term in dirty_terms:
+extra_route_metadata = {
-if not self.taxonomy.is_multiple:
+self.taxonomy.term_name: slugified_term}
-term = term[0]
-slugified_term = s.slugify(term)
+# Use the slugified term as the record's extra key seed.
-extra_route_metadata = {self.taxonomy.term_name: slugified_term}
-# Use the slugified term as the record extra key.
 logger.debug(
 "Queuing: %s [%s=%s]" %
 (fac.ref_spec, self.taxonomy.name, slugified_term))
 ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term)
 job_count += 1
 ctx.runJobQueue()
 # Now we create bake entries for all the terms that were *not* dirty.
 try:
 t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key)
 except InvalidRecordExtraKey:
 continue
-if t in all_terms:
+if analyzer.isKnownSlugifiedTerm(t):
 logger.debug("Creating unbaked entry for %s term: %s" %
 (self.name, t))
 ctx.collapseRecord(prev_entry)
 else:
 logger.debug("Term %s in %s isn't used anymore." %
 (self.name, t))
 return job_count
-def _get_all_entry_taxonomy_terms(entry):
-res = set()
-for o in entry.subs:
-for pinfo in o.render_info:
-if pinfo:
-terms = pinfo.getCustomInfo('used_taxonomy_terms')
-if terms:
-res |= set(terms)
-return res
 class HasTaxonomyTermsFilterClause(SettingFilterClause):
 def __init__(self, taxonomy, slugify_mode, value, is_combination):
 super(HasTaxonomyTermsFilterClause, self).__init__(
 return False
 page_value = self._slugifier.slugify(page_value)
 return page_value == self.value
+class _TaxonomyTermsAnalyzer(object):
+def __init__(self, source_name, taxonomy, slugify_mode):
+self.source_name = source_name
+self.taxonomy = taxonomy
+self.slugifier = _Slugifier(taxonomy, slugify_mode)
+self._all_terms = {}
+self._single_dirty_slugified_terms = set()
+self._all_dirty_slugified_terms = None
+@property
+def dirty_slugified_terms(self):
+""" Returns the slugified terms that have been 'dirtied' during
+this bake.
+"""
+return self._all_dirty_slugified_terms
+def isKnownSlugifiedTerm(self, term):
+""" Returns whether the given slugified term has been seen during
+this bake.
+"""
+return term in self._all_terms
+def analyze(self, ctx):
+# Build the list of terms for our taxonomy, and figure out which ones
+# are 'dirty' for the current bake.
+#
+# Remember all terms used.
+for _, cur_entry in ctx.getAllPageRecords():
+if cur_entry and not cur_entry.was_overriden:
+cur_terms = cur_entry.config.get(self.taxonomy.setting_name)
+if cur_terms:
+if not self.taxonomy.is_multiple:
+self._addTerm(cur_entry.path, cur_terms)
+else:
+self._addTerms(cur_entry.path, cur_terms)
+# Re-bake all taxonomy terms that include new or changed pages, by
+# marking them as 'dirty'.
+for prev_entry, cur_entry in ctx.getBakedPageRecords():
+if cur_entry.source_name != self.source_name:
+continue
+entries = [cur_entry]
+if prev_entry:
+entries.append(prev_entry)
+for e in entries:
+entry_terms = e.config.get(self.taxonomy.setting_name)
+if entry_terms:
+if not self.taxonomy.is_multiple:
+self._single_dirty_slugified_terms.add(
+self.slugifier.slugify(entry_terms))
+else:
+self._single_dirty_slugified_terms.update(
+(self.slugifier.slugify(t)
+for t in entry_terms))
+self._all_dirty_slugified_terms = list(
+self._single_dirty_slugified_terms)
+logger.debug("Gathered %d dirty taxonomy terms",
+len(self._all_dirty_slugified_terms))
+# Re-bake the combination pages for terms that are 'dirty'.
+# We make all terms into tuple, even those that are not actual
+# combinations, so that we have less things to test further down the
+# line.
+#
+# Add the combinations to that list. We get those combinations from
+# wherever combinations were used, so they're coming from the
+# `onRouteFunctionUsed` method.
+if self.taxonomy.is_multiple:
+known_combinations = set()
+for _, cur_entry in ctx.getAllPageRecords():
+if cur_entry:
+used_terms = _get_all_entry_taxonomy_terms(cur_entry)
+for terms in used_terms:
+if len(terms) > 1:
+known_combinations.add(terms)
+dcc = 0
+for terms in known_combinations:
+if not self._single_dirty_slugified_terms.isdisjoint(
+set(terms)):
+self._all_dirty_slugified_terms.append(
+self.taxonomy.separator.join(terms))
+dcc += 1
+logger.debug("Gathered %d term combinations, with %d dirty." %
+(len(known_combinations), dcc))
+def _addTerms(self, entry_path, terms):
+for t in terms:
+self._addTerm(entry_path, t)
+def _addTerm(self, entry_path, term):
+st = self.slugifier.slugify(term)
+orig_terms = self._all_terms.setdefault(st, [])
+if orig_terms and orig_terms[0] != term:
+logger.warning(
+"Term '%s' in '%s' is slugified to '%s' which conflicts with "
+"previously existing '%s'. The two will be merged." %
+(term, entry_path, st, orig_terms[0]))
+orig_terms.append(term)
+def _get_all_entry_taxonomy_terms(entry):
+res = set()
+for o in entry.subs:
+for pinfo in o.render_info:
+if pinfo:
+terms = pinfo.getCustomInfo('used_taxonomy_terms')
+if terms:
+res |= set(terms)
+return res
 class _Slugifier(object):
 def __init__(self, taxonomy, mode):
 self.taxonomy = taxonomy
 self.mode = mode
+def slugifyMultiple(self, terms):
+return tuple(map(self.slugify, terms))
 def slugify(self, term):
-if isinstance(term, tuple):
-return self.taxonomy.separator.join(
-map(self._slugifyOne, term))
-return self._slugifyOne(term)
-def _slugifyOne(self, term):
 if self.mode & SLUGIFY_TRANSLITERATE:
 term = unidecode.unidecode(term)
 if self.mode & SLUGIFY_LOWERCASE:
 term = term.lower()
 if self.mode & SLUGIFY_DOT_TO_DASH:

Mercurial > piecrust2

comparison piecrust/generation/taxonomy.py @ 789:b8e760b3413e