Mercurial > piecrust2
comparison piecrust/sources/taxonomy.py @ 854:08e02c2a2a1a
core: Keep refactoring, this time to prepare for generator sources.
- Make a few APIs simpler.
- Content pipelines create their own jobs, so that generator sources can
keep aborting in `getContents`, but rely on their pipeline to generate
pages for baking.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 04 Jun 2017 23:34:28 -0700 |
parents | 4850f8c21b6e |
children | 448710d84121 |
comparison
equal
deleted
inserted
replaced
853:f070a4fc033c | 854:08e02c2a2a1a |
---|---|
3 import logging | 3 import logging |
4 import unidecode | 4 import unidecode |
5 from piecrust.chefutil import format_timed, format_timed_scope | 5 from piecrust.chefutil import format_timed, format_timed_scope |
6 from piecrust.configuration import ConfigurationError | 6 from piecrust.configuration import ConfigurationError |
7 from piecrust.data.filters import ( | 7 from piecrust.data.filters import ( |
8 PaginationFilter, SettingFilterClause, | 8 PaginationFilter, SettingFilterClause) |
9 page_value_accessor) | 9 from piecrust.pipelines.base import ContentPipeline |
10 from piecrust.routing import RouteParameter | 10 from piecrust.routing import RouteParameter |
11 from piecrust.sources.base import ContentSource, GeneratedContentException | 11 from piecrust.sources.base import ContentSource, GeneratedContentException |
12 | 12 |
13 | 13 |
14 logger = logging.getLogger(__name__) | 14 logger = logging.getLogger(__name__) |
25 re_dot_to_dash = re.compile(r'\.+') | 25 re_dot_to_dash = re.compile(r'\.+') |
26 re_space_to_dash = re.compile(r'\s+') | 26 re_space_to_dash = re.compile(r'\s+') |
27 | 27 |
28 | 28 |
29 class Taxonomy(object): | 29 class Taxonomy(object): |
30 """ Describes a taxonomy. | |
31 """ | |
30 def __init__(self, name, config): | 32 def __init__(self, name, config): |
31 self.name = name | 33 self.name = name |
32 self.config = config | 34 self.config = config |
33 self.term_name = config.get('term', name) | 35 self.term_name = config.get('term', name) |
34 self.is_multiple = bool(config.get('multiple', False)) | 36 self.is_multiple = bool(config.get('multiple', False)) |
41 return self.name | 43 return self.name |
42 return self.term_name | 44 return self.term_name |
43 | 45 |
44 | 46 |
45 class TaxonomySource(ContentSource): | 47 class TaxonomySource(ContentSource): |
46 """ A page generator that handles taxonomies, _i.e._ lists of keywords | 48 """ A content source that generates taxonomy listing pages. |
47 that pages are labelled with, and for which we need to generate | |
48 listing pages. | |
49 """ | 49 """ |
50 SOURCE_NAME = 'taxonomy' | 50 SOURCE_NAME = 'taxonomy' |
51 DEFAULT_PIPELINE_NAME = 'taxonomy' | |
51 | 52 |
52 def __init__(self, app, name, config): | 53 def __init__(self, app, name, config): |
53 super().__init__(app, name, config) | 54 super().__init__(app, name, config) |
54 | 55 |
55 tax_name = config.get('taxonomy') | 56 tax_name = config.get('taxonomy') |
56 if tax_name is None: | 57 if tax_name is None: |
57 raise ConfigurationError( | 58 raise ConfigurationError( |
58 "Generator '%s' requires a taxonomy name." % name) | 59 "Taxonomy source '%s' requires a taxonomy name." % name) |
59 tax_config = app.config.get('site/taxonomies/' + tax_name) | 60 self.taxonomy = _get_taxonomy(app, tax_name) |
60 if tax_config is None: | |
61 raise ConfigurationError( | |
62 "Error initializing generator '%s', no such taxonomy: %s", | |
63 (name, tax_name)) | |
64 self.taxonomy = Taxonomy(tax_name, tax_config) | |
65 | 61 |
66 sm = config.get('slugify_mode') | 62 sm = config.get('slugify_mode') |
67 if not sm: | 63 self.slugifier = _get_slugifier(app, self.taxonomy, sm) |
68 sm = app.config.get('site/slugify_mode', 'encode') | |
69 self.slugify_mode = _parse_slugify_mode(sm) | |
70 self.slugifier = _Slugifier(self.taxonomy, self.slugify_mode) | |
71 | 64 |
72 def getContents(self, group): | 65 def getContents(self, group): |
66 # Our content is procedurally generated from other content sources, | |
67 # so we really don't support listing anything here -- it would be | |
68 # quite costly. | |
69 # | |
70 # Instead, our pipeline (the `TaxonomyPipeline`) will generate | |
71 # content items for us when it is asked to produce bake jobs. | |
73 raise GeneratedContentException() | 72 raise GeneratedContentException() |
74 | 73 |
75 def getSupportedRouteParameters(self): | 74 def getSupportedRouteParameters(self): |
76 name = self.taxonomy.term_name | 75 name = self.taxonomy.term_name |
77 param_type = (RouteParameter.TYPE_PATH if self.taxonomy.is_multiple | 76 param_type = (RouteParameter.TYPE_PATH if self.taxonomy.is_multiple |
100 # have the slugified version. | 99 # have the slugified version. |
101 # * if 2 slightly different terms "collide" into the same slugified | 100 # * if 2 slightly different terms "collide" into the same slugified |
102 # term, we'll get a merge of the 2 on the listing page, which is | 101 # term, we'll get a merge of the 2 on the listing page, which is |
103 # what the user expects. | 102 # what the user expects. |
104 # | 103 # |
105 tax_terms, is_combination = self._getTaxonomyTerms( | 104 route_params = ctx.page.source_metadata['route_params'] |
106 ctx.page.route_metadata) | 105 tax_terms, is_combination = self._getTaxonomyTerms(route_params) |
107 self._setTaxonomyFilter(ctx, tax_terms, is_combination) | 106 self._setTaxonomyFilter(ctx, tax_terms, is_combination) |
108 | 107 |
109 # Add some custom data for rendering. | 108 # Add some custom data for rendering. |
110 ctx.custom_data.update({ | 109 ctx.custom_data.update({ |
111 self.taxonomy.term_name: tax_terms, | 110 self.taxonomy.term_name: tax_terms, |
112 'is_multiple_%s' % self.taxonomy.term_name: is_combination}) | 111 'is_multiple_%s' % self.taxonomy.term_name: is_combination}) |
113 # Add some "plural" version of the term... so for instance, if this | 112 # Add some "plural" version of the term... so for instance, if this |
114 # is the "tags" taxonomy, "tag" will have one term most of the time, | 113 # is the "tags" taxonomy, "tag" will have one term most of the time, |
115 # except when it's a combination. Here, we add "tags" as something that | 114 # except when it's a combination. Here, we add "tags" as something that |
116 # is always a tuple, even when it's not a combination. | 115 # is always a tuple, even when it's not a combination. |
117 if (self.taxonomy.is_multiple and | 116 if (self.taxonomy.is_multiple and |
119 mult_val = tax_terms | 118 mult_val = tax_terms |
120 if not is_combination: | 119 if not is_combination: |
121 mult_val = (mult_val,) | 120 mult_val = (mult_val,) |
122 ctx.custom_data[self.taxonomy.name] = mult_val | 121 ctx.custom_data[self.taxonomy.name] = mult_val |
123 | 122 |
124 def _getSource(self): | 123 def _getTaxonomyTerms(self, route_params): |
125 return self.app.getSource(self.config['source']) | |
126 | |
127 def _getTaxonomyTerms(self, route_metadata): | |
128 # Get the individual slugified terms from the route metadata. | 124 # Get the individual slugified terms from the route metadata. |
129 all_values = route_metadata.get(self.taxonomy.term_name) | 125 all_values = route_params.get(self.taxonomy.term_name) |
130 if all_values is None: | 126 if all_values is None: |
131 raise Exception("'%s' values couldn't be found in route metadata" % | 127 raise Exception("'%s' values couldn't be found in route metadata" % |
132 self.taxonomy.term_name) | 128 self.taxonomy.term_name) |
133 | 129 |
134 # If it's a "multiple" taxonomy, we need to potentially split the | 130 # If it's a "multiple" taxonomy, we need to potentially split the |
141 # Not a "multiple" taxonomy, so there's only the one value. | 137 # Not a "multiple" taxonomy, so there's only the one value. |
142 return all_values, False | 138 return all_values, False |
143 | 139 |
144 def _setTaxonomyFilter(self, ctx, term_value, is_combination): | 140 def _setTaxonomyFilter(self, ctx, term_value, is_combination): |
145 # Set up the filter that will check the pages' terms. | 141 # Set up the filter that will check the pages' terms. |
146 flt = PaginationFilter(value_accessor=page_value_accessor) | 142 flt = PaginationFilter() |
147 flt.addClause(HasTaxonomyTermsFilterClause( | 143 flt.addClause(HasTaxonomyTermsFilterClause( |
148 self.taxonomy, self.slugify_mode, term_value, is_combination)) | 144 self.taxonomy, self.slugify.mode, term_value, is_combination)) |
149 ctx.pagination_filter = flt | 145 ctx.pagination_filter = flt |
150 | 146 |
151 def onRouteFunctionUsed(self, route, route_metadata): | 147 def onRouteFunctionUsed(self, route_params): |
152 # Get the values, and slugify them appropriately. | 148 # Get the values, and slugify them appropriately. |
153 values = route_metadata[self.taxonomy.term_name] | 149 values = route_params[self.taxonomy.term_name] |
154 if self.taxonomy.is_multiple: | 150 if self.taxonomy.is_multiple: |
155 # TODO: here we assume the route has been properly configured. | 151 # TODO: here we assume the route has been properly configured. |
156 slugified_values = self.slugifyMultiple((str(v) for v in values)) | 152 slugified_values = self.slugifyMultiple((str(v) for v in values)) |
157 route_val = self.taxonomy.separator.join(slugified_values) | 153 route_val = self.taxonomy.separator.join(slugified_values) |
158 else: | 154 else: |
159 slugified_values = self.slugify(str(values)) | 155 slugified_values = self.slugify(str(values)) |
160 route_val = slugified_values | 156 route_val = slugified_values |
161 | 157 |
162 # We need to register this use of a taxonomy term. | 158 # We need to register this use of a taxonomy term. |
163 eis = self.app.env.exec_info_stack | 159 rcs = self.app.env.render_ctx_stack |
164 cpi = eis.current_page_info.render_ctx.current_pass_info | 160 cpi = rcs.current_ctx.current_pass_info |
165 if cpi: | 161 if cpi: |
166 utt = cpi.getCustomInfo('used_taxonomy_terms', [], True) | 162 utt = cpi.getCustomInfo('used_taxonomy_terms', [], True) |
167 utt.append(slugified_values) | 163 utt.append(slugified_values) |
168 | 164 |
169 # Put the slugified values in the route metadata so they're used to | 165 # Put the slugified values in the route metadata so they're used to |
170 # generate the URL. | 166 # generate the URL. |
171 route_metadata[self.taxonomy.term_name] = route_val | 167 route_params[self.taxonomy.term_name] = route_val |
172 | |
173 def bake(self, ctx): | |
174 if not self.page_ref.exists: | |
175 logger.debug( | |
176 "No page found at '%s', skipping taxonomy '%s'." % | |
177 (self.page_ref, self.taxonomy.name)) | |
178 return | |
179 | |
180 logger.debug("Baking %s pages...", self.taxonomy.name) | |
181 analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy, | |
182 self.slugify_mode) | |
183 with format_timed_scope(logger, 'gathered taxonomy terms', | |
184 level=logging.DEBUG, colored=False): | |
185 analyzer.analyze(ctx) | |
186 | |
187 start_time = time.perf_counter() | |
188 page_count = self._bakeTaxonomyTerms(ctx, analyzer) | |
189 if page_count > 0: | |
190 logger.info(format_timed( | |
191 start_time, | |
192 "baked %d %s pages for %s." % ( | |
193 page_count, self.taxonomy.term_name, self.source_name))) | |
194 | |
195 def _bakeTaxonomyTerms(self, ctx, analyzer): | |
196 # Start baking those terms. | |
197 logger.debug( | |
198 "Baking '%s' for source '%s': %d terms" % | |
199 (self.taxonomy.name, self.source_name, | |
200 len(analyzer.dirty_slugified_terms))) | |
201 | |
202 route = self.app.getGeneratorRoute(self.name) | |
203 if route is None: | |
204 raise Exception("No routes have been defined for generator: %s" % | |
205 self.name) | |
206 | |
207 logger.debug("Using taxonomy page: %s" % self.page_ref) | |
208 fac = self.page_ref.getFactory() | |
209 | |
210 job_count = 0 | |
211 for slugified_term in analyzer.dirty_slugified_terms: | |
212 extra_route_metadata = { | |
213 self.taxonomy.term_name: slugified_term} | |
214 | |
215 # Use the slugified term as the record's extra key seed. | |
216 logger.debug( | |
217 "Queuing: %s [%s=%s]" % | |
218 (fac.ref_spec, self.taxonomy.name, slugified_term)) | |
219 ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term) | |
220 job_count += 1 | |
221 ctx.runJobQueue() | |
222 | |
223 # Now we create bake entries for all the terms that were *not* dirty. | |
224 # This is because otherwise, on the next incremental bake, we wouldn't | |
225 # find any entry for those things, and figure that we need to delete | |
226 # their outputs. | |
227 for prev_entry, cur_entry in ctx.getAllPageRecords(): | |
228 # Only consider taxonomy-related entries that don't have any | |
229 # current version (i.e. they weren't baked just now). | |
230 if prev_entry and not cur_entry: | |
231 try: | |
232 t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) | |
233 except InvalidRecordExtraKey: | |
234 continue | |
235 | |
236 if analyzer.isKnownSlugifiedTerm(t): | |
237 logger.debug("Creating unbaked entry for %s term: %s" % | |
238 (self.name, t)) | |
239 ctx.collapseRecord(prev_entry) | |
240 else: | |
241 logger.debug("Term %s in %s isn't used anymore." % | |
242 (self.name, t)) | |
243 | |
244 return job_count | |
245 | 168 |
246 | 169 |
247 class HasTaxonomyTermsFilterClause(SettingFilterClause): | 170 class HasTaxonomyTermsFilterClause(SettingFilterClause): |
248 def __init__(self, taxonomy, slugify_mode, value, is_combination): | 171 def __init__(self, taxonomy, slugify_mode, value, is_combination): |
249 super(HasTaxonomyTermsFilterClause, self).__init__( | 172 super().__init__(taxonomy.setting_name, value) |
250 taxonomy.setting_name, value) | |
251 self._taxonomy = taxonomy | 173 self._taxonomy = taxonomy |
252 self._is_combination = is_combination | 174 self._is_combination = is_combination |
253 self._slugifier = _Slugifier(taxonomy, slugify_mode) | 175 self._slugifier = _Slugifier(taxonomy, slugify_mode) |
254 | 176 |
255 def pageMatches(self, fil, page): | 177 def pageMatches(self, fil, page): |
275 return False | 197 return False |
276 page_value = self._slugifier.slugify(page_value) | 198 page_value = self._slugifier.slugify(page_value) |
277 return page_value == self.value | 199 return page_value == self.value |
278 | 200 |
279 | 201 |
202 def _get_taxonomy(app, tax_name): | |
203 tax_config = app.config.get('site/taxonomies/' + tax_name) | |
204 if tax_config is None: | |
205 raise ConfigurationError("No such taxonomy: %s" % tax_name) | |
206 return Taxonomy(tax_name, tax_config) | |
207 | |
208 | |
209 def _get_slugifier(app, taxonomy, slugify_mode=None): | |
210 if slugify_mode is None: | |
211 slugify_mode = app.config.get('site/slugify_mode', 'encode') | |
212 sm = _parse_slugify_mode(slugify_mode) | |
213 return _Slugifier(taxonomy, sm) | |
214 | |
215 | |
216 class TaxonomyPipeline(ContentPipeline): | |
217 PIPELINE_NAME = 'taxonomy' | |
218 PASS_NUM = 1 | |
219 | |
220 def __init__(self, source, ctx): | |
221 if not isinstance(source, TaxonomySource): | |
222 raise Exception("The taxonomy pipeline only supports taxonomy " | |
223 "content sources.") | |
224 | |
225 super().__init__(source, ctx) | |
226 self.taxonomy = source.taxonomy | |
227 self.slugifier = source.slugifier | |
228 | |
229 def buildJobs(self): | |
230 logger.debug("Building taxonomy pages for source: %s" % | |
231 self.source.name) | |
232 analyzer = _TaxonomyTermsAnalyzer(self) | |
233 with format_timed_scope(logger, 'gathered taxonomy terms', | |
234 level=logging.DEBUG, colored=False): | |
235 analyzer.analyze(ctx) | |
236 | |
237 def bake(self, ctx): | |
238 if not self.page_ref.exists: | |
239 logger.debug( | |
240 "No page found at '%s', skipping taxonomy '%s'." % | |
241 (self.page_ref, self.taxonomy.name)) | |
242 return | |
243 | |
244 logger.debug("Baking %s pages...", self.taxonomy.name) | |
245 analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy, | |
246 self.slugify_mode) | |
247 with format_timed_scope(logger, 'gathered taxonomy terms', | |
248 level=logging.DEBUG, colored=False): | |
249 analyzer.analyze(ctx) | |
250 | |
251 start_time = time.perf_counter() | |
252 page_count = self._bakeTaxonomyTerms(ctx, analyzer) | |
253 if page_count > 0: | |
254 logger.info(format_timed( | |
255 start_time, | |
256 "baked %d %s pages for %s." % ( | |
257 page_count, self.taxonomy.term_name, self.source_name))) | |
258 | |
259 def _bakeTaxonomyTerms(self, ctx, analyzer): | |
260 # Start baking those terms. | |
261 logger.debug( | |
262 "Baking '%s' for source '%s': %d terms" % | |
263 (self.taxonomy.name, self.source_name, | |
264 len(analyzer.dirty_slugified_terms))) | |
265 | |
266 route = self.app.getGeneratorRoute(self.name) | |
267 if route is None: | |
268 raise Exception("No routes have been defined for generator: %s" % | |
269 self.name) | |
270 | |
271 logger.debug("Using taxonomy page: %s" % self.page_ref) | |
272 fac = self.page_ref.getFactory() | |
273 | |
274 job_count = 0 | |
275 for slugified_term in analyzer.dirty_slugified_terms: | |
276 extra_route_params = { | |
277 self.taxonomy.term_name: slugified_term} | |
278 | |
279 # Use the slugified term as the record's extra key seed. | |
280 logger.debug( | |
281 "Queuing: %s [%s=%s]" % | |
282 (fac.ref_spec, self.taxonomy.name, slugified_term)) | |
283 ctx.queueBakeJob(fac, route, extra_route_params, slugified_term) | |
284 job_count += 1 | |
285 ctx.runJobQueue() | |
286 | |
287 # Now we create bake entries for all the terms that were *not* dirty. | |
288 # This is because otherwise, on the next incremental bake, we wouldn't | |
289 # find any entry for those things, and figure that we need to delete | |
290 # their outputs. | |
291 for prev_entry, cur_entry in ctx.getAllPageRecords(): | |
292 # Only consider taxonomy-related entries that don't have any | |
293 # current version (i.e. they weren't baked just now). | |
294 if prev_entry and not cur_entry: | |
295 try: | |
296 t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) | |
297 except InvalidRecordExtraKey: | |
298 continue | |
299 | |
300 if analyzer.isKnownSlugifiedTerm(t): | |
301 logger.debug("Creating unbaked entry for %s term: %s" % | |
302 (self.name, t)) | |
303 ctx.collapseRecord(prev_entry) | |
304 else: | |
305 logger.debug("Term %s in %s isn't used anymore." % | |
306 (self.name, t)) | |
307 | |
308 return job_count | |
309 | |
310 | |
280 class _TaxonomyTermsAnalyzer(object): | 311 class _TaxonomyTermsAnalyzer(object): |
281 def __init__(self, source_name, taxonomy, slugify_mode): | 312 def __init__(self, source): |
282 self.source_name = source_name | 313 self.source = source |
283 self.taxonomy = taxonomy | |
284 self.slugifier = _Slugifier(taxonomy, slugify_mode) | |
285 self._all_terms = {} | 314 self._all_terms = {} |
286 self._single_dirty_slugified_terms = set() | 315 self._single_dirty_slugified_terms = set() |
287 self._all_dirty_slugified_terms = None | 316 self._all_dirty_slugified_terms = None |
288 | 317 |
289 @property | 318 @property |
413 return term | 442 return term |
414 | 443 |
415 | 444 |
416 def _parse_slugify_mode(value): | 445 def _parse_slugify_mode(value): |
417 mapping = { | 446 mapping = { |
418 'encode': SLUGIFY_ENCODE, | 447 'encode': SLUGIFY_ENCODE, |
419 'transliterate': SLUGIFY_TRANSLITERATE, | 448 'transliterate': SLUGIFY_TRANSLITERATE, |
420 'lowercase': SLUGIFY_LOWERCASE, | 449 'lowercase': SLUGIFY_LOWERCASE, |
421 'dot_to_dash': SLUGIFY_DOT_TO_DASH, | 450 'dot_to_dash': SLUGIFY_DOT_TO_DASH, |
422 'space_to_dash': SLUGIFY_SPACE_TO_DASH} | 451 'space_to_dash': SLUGIFY_SPACE_TO_DASH} |
423 mode = 0 | 452 mode = 0 |
424 for v in value.split(','): | 453 for v in value.split(','): |
425 f = mapping.get(v.strip()) | 454 f = mapping.get(v.strip()) |
426 if f is None: | 455 if f is None: |
427 if v == 'iconv': | 456 if v == 'iconv': |