diff piecrust/sources/taxonomy.py @ 854:08e02c2a2a1a

core: Keep refactoring, this time to prepare for generator sources. - Make a few APIs simpler. - Content pipelines create their own jobs, so that generator sources can keep aborting in `getContents`, but rely on their pipeline to generate pages for baking.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 04 Jun 2017 23:34:28 -0700
parents 4850f8c21b6e
children 448710d84121
line wrap: on
line diff
--- a/piecrust/sources/taxonomy.py	Sun May 21 00:06:59 2017 -0700
+++ b/piecrust/sources/taxonomy.py	Sun Jun 04 23:34:28 2017 -0700
@@ -5,8 +5,8 @@
 from piecrust.chefutil import format_timed, format_timed_scope
 from piecrust.configuration import ConfigurationError
 from piecrust.data.filters import (
-    PaginationFilter, SettingFilterClause,
-    page_value_accessor)
+    PaginationFilter, SettingFilterClause)
+from piecrust.pipelines.base import ContentPipeline
 from piecrust.routing import RouteParameter
 from piecrust.sources.base import ContentSource, GeneratedContentException
 
@@ -27,6 +27,8 @@
 
 
 class Taxonomy(object):
+    """ Describes a taxonomy.
+    """
     def __init__(self, name, config):
         self.name = name
         self.config = config
@@ -43,11 +45,10 @@
 
 
 class TaxonomySource(ContentSource):
-    """ A page generator that handles taxonomies, _i.e._ lists of keywords
-        that pages are labelled with, and for which we need to generate
-        listing pages.
+    """ A content source that generates taxonomy listing pages.
     """
     SOURCE_NAME = 'taxonomy'
+    DEFAULT_PIPELINE_NAME = 'taxonomy'
 
     def __init__(self, app, name, config):
         super().__init__(app, name, config)
@@ -55,21 +56,19 @@
         tax_name = config.get('taxonomy')
         if tax_name is None:
             raise ConfigurationError(
-                "Generator '%s' requires a taxonomy name." % name)
-        tax_config = app.config.get('site/taxonomies/' + tax_name)
-        if tax_config is None:
-            raise ConfigurationError(
-                "Error initializing generator '%s', no such taxonomy: %s",
-                (name, tax_name))
-        self.taxonomy = Taxonomy(tax_name, tax_config)
+                "Taxonomy source '%s' requires a taxonomy name." % name)
+        self.taxonomy = _get_taxonomy(app, tax_name)
 
         sm = config.get('slugify_mode')
-        if not sm:
-            sm = app.config.get('site/slugify_mode', 'encode')
-        self.slugify_mode = _parse_slugify_mode(sm)
-        self.slugifier = _Slugifier(self.taxonomy, self.slugify_mode)
+        self.slugifier = _get_slugifier(app, self.taxonomy, sm)
 
     def getContents(self, group):
+        # Our content is procedurally generated from other content sources,
+        # so we really don't support listing anything here -- it would be
+        # quite costly.
+        #
+        # Instead, our pipeline (the `TaxonomyPipeline`) will generate
+        # content items for us when it is asked to produce bake jobs.
         raise GeneratedContentException()
 
     def getSupportedRouteParameters(self):
@@ -102,14 +101,14 @@
         #    term, we'll get a merge of the 2 on the listing page, which is
         #    what the user expects.
         #
-        tax_terms, is_combination = self._getTaxonomyTerms(
-                ctx.page.route_metadata)
+        route_params = ctx.page.source_metadata['route_params']
+        tax_terms, is_combination = self._getTaxonomyTerms(route_params)
         self._setTaxonomyFilter(ctx, tax_terms, is_combination)
 
         # Add some custom data for rendering.
         ctx.custom_data.update({
-                self.taxonomy.term_name: tax_terms,
-                'is_multiple_%s' % self.taxonomy.term_name: is_combination})
+            self.taxonomy.term_name: tax_terms,
+            'is_multiple_%s' % self.taxonomy.term_name: is_combination})
         # Add some "plural" version of the term... so for instance, if this
         # is the "tags" taxonomy, "tag" will have one term most of the time,
         # except when it's a combination. Here, we add "tags" as something that
@@ -121,12 +120,9 @@
                 mult_val = (mult_val,)
             ctx.custom_data[self.taxonomy.name] = mult_val
 
-    def _getSource(self):
-        return self.app.getSource(self.config['source'])
-
-    def _getTaxonomyTerms(self, route_metadata):
+    def _getTaxonomyTerms(self, route_params):
         # Get the individual slugified terms from the route metadata.
-        all_values = route_metadata.get(self.taxonomy.term_name)
+        all_values = route_params.get(self.taxonomy.term_name)
         if all_values is None:
             raise Exception("'%s' values couldn't be found in route metadata" %
                             self.taxonomy.term_name)
@@ -143,14 +139,14 @@
 
     def _setTaxonomyFilter(self, ctx, term_value, is_combination):
         # Set up the filter that will check the pages' terms.
-        flt = PaginationFilter(value_accessor=page_value_accessor)
+        flt = PaginationFilter()
         flt.addClause(HasTaxonomyTermsFilterClause(
-                self.taxonomy, self.slugify_mode, term_value, is_combination))
+            self.taxonomy, self.slugify.mode, term_value, is_combination))
         ctx.pagination_filter = flt
 
-    def onRouteFunctionUsed(self, route, route_metadata):
+    def onRouteFunctionUsed(self, route_params):
         # Get the values, and slugify them appropriately.
-        values = route_metadata[self.taxonomy.term_name]
+        values = route_params[self.taxonomy.term_name]
         if self.taxonomy.is_multiple:
             # TODO: here we assume the route has been properly configured.
             slugified_values = self.slugifyMultiple((str(v) for v in values))
@@ -160,94 +156,20 @@
             route_val = slugified_values
 
         # We need to register this use of a taxonomy term.
-        eis = self.app.env.exec_info_stack
-        cpi = eis.current_page_info.render_ctx.current_pass_info
+        rcs = self.app.env.render_ctx_stack
+        cpi = rcs.current_ctx.current_pass_info
         if cpi:
             utt = cpi.getCustomInfo('used_taxonomy_terms', [], True)
             utt.append(slugified_values)
 
         # Put the slugified values in the route metadata so they're used to
         # generate the URL.
-        route_metadata[self.taxonomy.term_name] = route_val
-
-    def bake(self, ctx):
-        if not self.page_ref.exists:
-            logger.debug(
-                    "No page found at '%s', skipping taxonomy '%s'." %
-                    (self.page_ref, self.taxonomy.name))
-            return
-
-        logger.debug("Baking %s pages...", self.taxonomy.name)
-        analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy,
-                                          self.slugify_mode)
-        with format_timed_scope(logger, 'gathered taxonomy terms',
-                                level=logging.DEBUG, colored=False):
-            analyzer.analyze(ctx)
-
-        start_time = time.perf_counter()
-        page_count = self._bakeTaxonomyTerms(ctx, analyzer)
-        if page_count > 0:
-            logger.info(format_timed(
-                start_time,
-                "baked %d %s pages for %s." % (
-                    page_count, self.taxonomy.term_name, self.source_name)))
-
-    def _bakeTaxonomyTerms(self, ctx, analyzer):
-        # Start baking those terms.
-        logger.debug(
-                "Baking '%s' for source '%s': %d terms" %
-                (self.taxonomy.name, self.source_name,
-                 len(analyzer.dirty_slugified_terms)))
-
-        route = self.app.getGeneratorRoute(self.name)
-        if route is None:
-            raise Exception("No routes have been defined for generator: %s" %
-                            self.name)
-
-        logger.debug("Using taxonomy page: %s" % self.page_ref)
-        fac = self.page_ref.getFactory()
-
-        job_count = 0
-        for slugified_term in analyzer.dirty_slugified_terms:
-            extra_route_metadata = {
-                self.taxonomy.term_name: slugified_term}
-
-            # Use the slugified term as the record's extra key seed.
-            logger.debug(
-                "Queuing: %s [%s=%s]" %
-                (fac.ref_spec, self.taxonomy.name, slugified_term))
-            ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term)
-            job_count += 1
-        ctx.runJobQueue()
-
-        # Now we create bake entries for all the terms that were *not* dirty.
-        # This is because otherwise, on the next incremental bake, we wouldn't
-        # find any entry for those things, and figure that we need to delete
-        # their outputs.
-        for prev_entry, cur_entry in ctx.getAllPageRecords():
-            # Only consider taxonomy-related entries that don't have any
-            # current version (i.e. they weren't baked just now).
-            if prev_entry and not cur_entry:
-                try:
-                    t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key)
-                except InvalidRecordExtraKey:
-                    continue
-
-                if analyzer.isKnownSlugifiedTerm(t):
-                    logger.debug("Creating unbaked entry for %s term: %s" %
-                                 (self.name, t))
-                    ctx.collapseRecord(prev_entry)
-                else:
-                    logger.debug("Term %s in %s isn't used anymore." %
-                                 (self.name, t))
-
-        return job_count
+        route_params[self.taxonomy.term_name] = route_val
 
 
 class HasTaxonomyTermsFilterClause(SettingFilterClause):
     def __init__(self, taxonomy, slugify_mode, value, is_combination):
-        super(HasTaxonomyTermsFilterClause, self).__init__(
-                taxonomy.setting_name, value)
+        super().__init__(taxonomy.setting_name, value)
         self._taxonomy = taxonomy
         self._is_combination = is_combination
         self._slugifier = _Slugifier(taxonomy, slugify_mode)
@@ -277,11 +199,118 @@
             return page_value == self.value
 
 
+def _get_taxonomy(app, tax_name):
+    tax_config = app.config.get('site/taxonomies/' + tax_name)
+    if tax_config is None:
+        raise ConfigurationError("No such taxonomy: %s" % tax_name)
+    return Taxonomy(tax_name, tax_config)
+
+
+def _get_slugifier(app, taxonomy, slugify_mode=None):
+    if slugify_mode is None:
+        slugify_mode = app.config.get('site/slugify_mode', 'encode')
+    sm = _parse_slugify_mode(slugify_mode)
+    return _Slugifier(taxonomy, sm)
+
+
+class TaxonomyPipeline(ContentPipeline):
+    PIPELINE_NAME = 'taxonomy'
+    PASS_NUM = 1
+
+    def __init__(self, source, ctx):
+        if not isinstance(source, TaxonomySource):
+            raise Exception("The taxonomy pipeline only supports taxonomy "
+                            "content sources.")
+
+        super().__init__(source, ctx)
+        self.taxonomy = source.taxonomy
+        self.slugifier = source.slugifier
+
+    def buildJobs(self):
+        logger.debug("Building taxonomy pages for source: %s" %
+                     self.source.name)
+        analyzer = _TaxonomyTermsAnalyzer(self)
+        with format_timed_scope(logger, 'gathered taxonomy terms',
+                                level=logging.DEBUG, colored=False):
+            analyzer.analyze(ctx)
+
+    def bake(self, ctx):
+        if not self.page_ref.exists:
+            logger.debug(
+                "No page found at '%s', skipping taxonomy '%s'." %
+                (self.page_ref, self.taxonomy.name))
+            return
+
+        logger.debug("Baking %s pages...", self.taxonomy.name)
+        analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy,
+                                          self.slugify_mode)
+        with format_timed_scope(logger, 'gathered taxonomy terms',
+                                level=logging.DEBUG, colored=False):
+            analyzer.analyze(ctx)
+
+        start_time = time.perf_counter()
+        page_count = self._bakeTaxonomyTerms(ctx, analyzer)
+        if page_count > 0:
+            logger.info(format_timed(
+                start_time,
+                "baked %d %s pages for %s." % (
+                    page_count, self.taxonomy.term_name, self.source_name)))
+
+    def _bakeTaxonomyTerms(self, ctx, analyzer):
+        # Start baking those terms.
+        logger.debug(
+            "Baking '%s' for source '%s': %d terms" %
+            (self.taxonomy.name, self.source_name,
+             len(analyzer.dirty_slugified_terms)))
+
+        route = self.app.getGeneratorRoute(self.name)
+        if route is None:
+            raise Exception("No routes have been defined for generator: %s" %
+                            self.name)
+
+        logger.debug("Using taxonomy page: %s" % self.page_ref)
+        fac = self.page_ref.getFactory()
+
+        job_count = 0
+        for slugified_term in analyzer.dirty_slugified_terms:
+            extra_route_params = {
+                self.taxonomy.term_name: slugified_term}
+
+            # Use the slugified term as the record's extra key seed.
+            logger.debug(
+                "Queuing: %s [%s=%s]" %
+                (fac.ref_spec, self.taxonomy.name, slugified_term))
+            ctx.queueBakeJob(fac, route, extra_route_params, slugified_term)
+            job_count += 1
+        ctx.runJobQueue()
+
+        # Now we create bake entries for all the terms that were *not* dirty.
+        # This is because otherwise, on the next incremental bake, we wouldn't
+        # find any entry for those things, and figure that we need to delete
+        # their outputs.
+        for prev_entry, cur_entry in ctx.getAllPageRecords():
+            # Only consider taxonomy-related entries that don't have any
+            # current version (i.e. they weren't baked just now).
+            if prev_entry and not cur_entry:
+                try:
+                    t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key)
+                except InvalidRecordExtraKey:
+                    continue
+
+                if analyzer.isKnownSlugifiedTerm(t):
+                    logger.debug("Creating unbaked entry for %s term: %s" %
+                                 (self.name, t))
+                    ctx.collapseRecord(prev_entry)
+                else:
+                    logger.debug("Term %s in %s isn't used anymore." %
+                                 (self.name, t))
+
+        return job_count
+
+
 class _TaxonomyTermsAnalyzer(object):
-    def __init__(self, source_name, taxonomy, slugify_mode):
-        self.source_name = source_name
-        self.taxonomy = taxonomy
-        self.slugifier = _Slugifier(taxonomy, slugify_mode)
+    def __init__(self, source):
+        self.source = source
         self._all_terms = {}
         self._single_dirty_slugified_terms = set()
         self._all_dirty_slugified_terms = None
@@ -415,11 +444,11 @@
 
 def _parse_slugify_mode(value):
     mapping = {
-            'encode': SLUGIFY_ENCODE,
-            'transliterate': SLUGIFY_TRANSLITERATE,
-            'lowercase': SLUGIFY_LOWERCASE,
-            'dot_to_dash': SLUGIFY_DOT_TO_DASH,
-            'space_to_dash': SLUGIFY_SPACE_TO_DASH}
+        'encode': SLUGIFY_ENCODE,
+        'transliterate': SLUGIFY_TRANSLITERATE,
+        'lowercase': SLUGIFY_LOWERCASE,
+        'dot_to_dash': SLUGIFY_DOT_TO_DASH,
+        'space_to_dash': SLUGIFY_SPACE_TO_DASH}
     mode = 0
     for v in value.split(','):
         f = mapping.get(v.strip())