Mercurial > piecrust2
comparison piecrust/generation/taxonomy.py @ 789:b8e760b3413e
bake: Fix how slugified taxonomy terms are handled.
This fixes a problem where multiple terms all slugifying to the same thing
would lead to a fatal bake error.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 05 Sep 2016 21:03:00 -0700 |
parents | 661f7ba15762 |
children | 58ebf50235a5 |
comparison
equal
deleted
inserted
replaced
788:276030ea7972 | 789:b8e760b3413e |
---|---|
31 self.config = config | 31 self.config = config |
32 self.term_name = config.get('term', name) | 32 self.term_name = config.get('term', name) |
33 self.is_multiple = bool(config.get('multiple', False)) | 33 self.is_multiple = bool(config.get('multiple', False)) |
34 self.separator = config.get('separator', '/') | 34 self.separator = config.get('separator', '/') |
35 self.page_ref = config.get('page') | 35 self.page_ref = config.get('page') |
36 self._source_page_refs = {} | |
37 | 36 |
38 @property | 37 @property |
39 def setting_name(self): | 38 def setting_name(self): |
40 if self.is_multiple: | 39 if self.is_multiple: |
41 return self.name | 40 return self.name |
42 return self.term_name | 41 return self.term_name |
43 | 42 |
44 | 43 |
45 class TaxonomyPageGenerator(PageGenerator): | 44 class TaxonomyPageGenerator(PageGenerator): |
45 """ A page generator that handles taxonomies, _i.e._ lists of keywords | |
46 that pages are labelled with, and for which we need to generate | |
47 listing pages. | |
48 """ | |
46 GENERATOR_NAME = 'taxonomy' | 49 GENERATOR_NAME = 'taxonomy' |
47 | 50 |
48 def __init__(self, app, name, config): | 51 def __init__(self, app, name, config): |
49 super(TaxonomyPageGenerator, self).__init__(app, name, config) | 52 super(TaxonomyPageGenerator, self).__init__(app, name, config) |
50 | 53 |
61 | 64 |
62 sm = config.get('slugify_mode') | 65 sm = config.get('slugify_mode') |
63 if not sm: | 66 if not sm: |
64 sm = app.config.get('site/slugify_mode', 'encode') | 67 sm = app.config.get('site/slugify_mode', 'encode') |
65 self.slugify_mode = _parse_slugify_mode(sm) | 68 self.slugify_mode = _parse_slugify_mode(sm) |
69 self.slugifier = _Slugifier(self.taxonomy, self.slugify_mode) | |
70 | |
71 def slugify(self, term): | |
72 return self.slugifier.slugify(term) | |
73 | |
74 def slugifyMultiple(self, terms): | |
75 return self.slugifier.slugifyMultiple(terms) | |
66 | 76 |
67 def prepareRenderContext(self, ctx): | 77 def prepareRenderContext(self, ctx): |
68 self._setPaginationSource(ctx) | 78 # Set the pagination source as the source we're generating for. |
69 | 79 ctx.pagination_source = self.source |
80 | |
81 # Get the taxonomy terms from the route metadata... this can come from | |
82 # the browser's URL (while serving) or from the baking (see `bake` | |
83 # method below). In both cases, we expect to have the *slugified* | |
84 # version of the term, because we're going to set a filter that also | |
85 # slugifies the terms found on each page. | |
86 # | |
87 # This is because: | |
88 # * while serving, we get everything from the request URL, so we only | |
89 # have the slugified version. | |
90 # * if 2 slightly different terms "collide" into the same slugified | |
91 # term, we'll get a merge of the 2 on the listing page, which is | |
92 # what the user expects. | |
93 # | |
70 tax_terms, is_combination = self._getTaxonomyTerms( | 94 tax_terms, is_combination = self._getTaxonomyTerms( |
71 ctx.page.route_metadata) | 95 ctx.page.route_metadata) |
72 self._setTaxonomyFilter(ctx, tax_terms, is_combination) | 96 self._setTaxonomyFilter(ctx, tax_terms, is_combination) |
73 | 97 |
98 # Add some custom data for rendering. | |
74 ctx.custom_data.update({ | 99 ctx.custom_data.update({ |
75 self.taxonomy.term_name: tax_terms, | 100 self.taxonomy.term_name: tax_terms, |
76 'is_multiple_%s' % self.taxonomy.term_name: is_combination}) | 101 'is_multiple_%s' % self.taxonomy.term_name: is_combination}) |
102 # Add some "plural" version of the term... so for instance, if this | |
103 # is the "tags" taxonomy, "tag" will have one term most of the time, | |
104 # except when it's a combination. Here, we add "tags" as something that | |
105 # is always a tuple, even when it's not a combination. | |
77 if (self.taxonomy.is_multiple and | 106 if (self.taxonomy.is_multiple and |
78 self.taxonomy.name != self.taxonomy.term_name): | 107 self.taxonomy.name != self.taxonomy.term_name): |
79 mult_val = tax_terms | 108 mult_val = tax_terms |
80 if not is_combination: | 109 if not is_combination: |
81 mult_val = (mult_val,) | 110 mult_val = (mult_val,) |
82 ctx.custom_data[self.taxonomy.name] = mult_val | 111 ctx.custom_data[self.taxonomy.name] = mult_val |
83 logger.debug("Prepared render context with: %s" % ctx.custom_data) | |
84 | 112 |
85 def _getTaxonomyTerms(self, route_metadata): | 113 def _getTaxonomyTerms(self, route_metadata): |
114 # Get the individual slugified terms from the route metadata. | |
86 all_values = route_metadata.get(self.taxonomy.term_name) | 115 all_values = route_metadata.get(self.taxonomy.term_name) |
87 if all_values is None: | 116 if all_values is None: |
88 raise Exception("'%s' values couldn't be found in route metadata" % | 117 raise Exception("'%s' values couldn't be found in route metadata" % |
89 self.taxonomy.term_name) | 118 self.taxonomy.term_name) |
90 | 119 |
120 # If it's a "multiple" taxonomy, we need to potentially split the | |
121 # route value into the individual terms (_e.g._ when listing all pages | |
122 # that have 2 given tags, we need to get each of those 2 tags). | |
91 if self.taxonomy.is_multiple: | 123 if self.taxonomy.is_multiple: |
92 sep = self.taxonomy.separator | 124 sep = self.taxonomy.separator |
93 if sep in all_values: | 125 if sep in all_values: |
94 return tuple(all_values.split(sep)), True | 126 return tuple(all_values.split(sep)), True |
127 # Not a "multiple" taxonomy, so there's only the one value. | |
95 return all_values, False | 128 return all_values, False |
96 | 129 |
97 def _setTaxonomyFilter(self, ctx, term_value, is_combination): | 130 def _setTaxonomyFilter(self, ctx, term_value, is_combination): |
131 # Set up the filter that will check the pages' terms. | |
98 flt = PaginationFilter(value_accessor=page_value_accessor) | 132 flt = PaginationFilter(value_accessor=page_value_accessor) |
99 flt.addClause(HasTaxonomyTermsFilterClause( | 133 flt.addClause(HasTaxonomyTermsFilterClause( |
100 self.taxonomy, self.slugify_mode, term_value, is_combination)) | 134 self.taxonomy, self.slugify_mode, term_value, is_combination)) |
101 ctx.pagination_filter = flt | 135 ctx.pagination_filter = flt |
102 | 136 |
103 def _setPaginationSource(self, ctx): | |
104 ctx.pagination_source = self.source | |
105 | |
106 def onRouteFunctionUsed(self, route, route_metadata): | 137 def onRouteFunctionUsed(self, route, route_metadata): |
107 # Get the values. | 138 # Get the values, and slugify them appropriately. |
108 values = route_metadata[self.taxonomy.term_name] | 139 values = route_metadata[self.taxonomy.term_name] |
109 if self.taxonomy.is_multiple: | 140 if self.taxonomy.is_multiple: |
110 #TODO: here we assume the route has been properly configured. | 141 # TODO: here we assume the route has been properly configured. |
111 values = tuple([str(v) for v in values]) | 142 slugified_values = self.slugifyMultiple((str(v) for v in values)) |
143 route_val = self.taxonomy.separator.join(slugified_values) | |
112 else: | 144 else: |
113 values = (str(values),) | 145 slugified_values = self.slugify(str(values)) |
146 route_val = slugified_values | |
114 | 147 |
115 # We need to register this use of a taxonomy term. | 148 # We need to register this use of a taxonomy term. |
116 eis = self.app.env.exec_info_stack | 149 eis = self.app.env.exec_info_stack |
117 cpi = eis.current_page_info.render_ctx.current_pass_info | 150 cpi = eis.current_page_info.render_ctx.current_pass_info |
118 if cpi: | 151 if cpi: |
119 utt = cpi.getCustomInfo('used_taxonomy_terms', [], True) | 152 utt = cpi.getCustomInfo('used_taxonomy_terms', [], True) |
120 utt.append(values) | 153 utt.append(slugified_values) |
121 | 154 |
122 # We need to slugify the terms before they get transformed | 155 # Put the slugified values in the route metadata so they're used to |
123 # into URL-bits. | 156 # generate the URL. |
124 s = _Slugifier(self.taxonomy, self.slugify_mode) | 157 route_metadata[self.taxonomy.term_name] = route_val |
125 str_values = s.slugify(values) | |
126 route_metadata[self.taxonomy.term_name] = str_values | |
127 logger.debug("Changed route metadata to: %s" % route_metadata) | |
128 | 158 |
129 def bake(self, ctx): | 159 def bake(self, ctx): |
130 if not self.page_ref.exists: | 160 if not self.page_ref.exists: |
131 logger.debug( | 161 logger.debug( |
132 "No page found at '%s', skipping taxonomy '%s'." % | 162 "No page found at '%s', skipping taxonomy '%s'." % |
133 (self.page_ref, self.taxonomy.name)) | 163 (self.page_ref, self.taxonomy.name)) |
134 return | 164 return |
135 | 165 |
136 logger.debug("Baking %s pages...", self.taxonomy.name) | 166 logger.debug("Baking %s pages...", self.taxonomy.name) |
167 analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy, | |
168 self.slugify_mode) | |
137 with format_timed_scope(logger, 'gathered taxonomy terms', | 169 with format_timed_scope(logger, 'gathered taxonomy terms', |
138 level=logging.DEBUG, colored=False): | 170 level=logging.DEBUG, colored=False): |
139 all_terms, dirty_terms = self._buildDirtyTaxonomyTerms(ctx) | 171 analyzer.analyze(ctx) |
140 | 172 |
141 start_time = time.perf_counter() | 173 start_time = time.perf_counter() |
142 page_count = self._bakeTaxonomyTerms(ctx, all_terms, dirty_terms) | 174 page_count = self._bakeTaxonomyTerms(ctx, analyzer) |
143 if page_count > 0: | 175 if page_count > 0: |
144 logger.info(format_timed( | 176 logger.info(format_timed( |
145 start_time, | 177 start_time, |
146 "baked %d %s pages for %s." % ( | 178 "baked %d %s pages for %s." % ( |
147 page_count, self.taxonomy.term_name, self.source_name))) | 179 page_count, self.taxonomy.term_name, self.source_name))) |
148 | 180 |
149 def _buildDirtyTaxonomyTerms(self, ctx): | 181 def _bakeTaxonomyTerms(self, ctx, analyzer): |
150 # Build the list of terms for our taxonomy, and figure out which ones | |
151 # are 'dirty' for the current bake. | |
152 logger.debug("Gathering dirty taxonomy terms") | |
153 all_terms = set() | |
154 single_dirty_terms = set() | |
155 | |
156 # Re-bake all taxonomy terms that include new or changed pages. | |
157 for prev_entry, cur_entry in ctx.getBakedPageRecords(): | |
158 if cur_entry.source_name != self.source_name: | |
159 continue | |
160 | |
161 entries = [cur_entry] | |
162 if prev_entry: | |
163 entries.append(prev_entry) | |
164 | |
165 terms = [] | |
166 for e in entries: | |
167 entry_terms = e.config.get(self.taxonomy.setting_name) | |
168 if entry_terms: | |
169 if not self.taxonomy.is_multiple: | |
170 terms.append(entry_terms) | |
171 else: | |
172 terms += entry_terms | |
173 single_dirty_terms.update(terms) | |
174 | |
175 # Remember all terms used. | |
176 for _, cur_entry in ctx.getAllPageRecords(): | |
177 if cur_entry and not cur_entry.was_overriden: | |
178 cur_terms = cur_entry.config.get(self.taxonomy.setting_name) | |
179 if cur_terms: | |
180 if not self.taxonomy.is_multiple: | |
181 all_terms.add(cur_terms) | |
182 else: | |
183 all_terms |= set(cur_terms) | |
184 | |
185 # Re-bake the combination pages for terms that are 'dirty'. | |
186 # We make all terms into tuple, even those that are not actual | |
187 # combinations, so that we have less things to test further down the | |
188 # line. | |
189 dirty_terms = [(t,) for t in single_dirty_terms] | |
190 # Add the combinations to that list. | |
191 if self.taxonomy.is_multiple: | |
192 known_combinations = set() | |
193 logger.debug("Gathering dirty term combinations") | |
194 for _, cur_entry in ctx.getAllPageRecords(): | |
195 if cur_entry: | |
196 used_terms = _get_all_entry_taxonomy_terms(cur_entry) | |
197 for terms in used_terms: | |
198 if len(terms) > 1: | |
199 known_combinations.add(terms) | |
200 | |
201 for terms in known_combinations: | |
202 if not single_dirty_terms.isdisjoint(set(terms)): | |
203 dirty_terms.append(terms) | |
204 | |
205 return all_terms, dirty_terms | |
206 | |
207 def _bakeTaxonomyTerms(self, ctx, all_terms, dirty_terms): | |
208 # Start baking those terms. | 182 # Start baking those terms. |
209 logger.debug( | 183 logger.debug( |
210 "Baking '%s' for source '%s': %s" % | 184 "Baking '%s' for source '%s': %d terms" % |
211 (self.taxonomy.name, self.source_name, dirty_terms)) | 185 (self.taxonomy.name, self.source_name, |
186 len(analyzer.dirty_slugified_terms))) | |
212 | 187 |
213 route = self.app.getGeneratorRoute(self.name) | 188 route = self.app.getGeneratorRoute(self.name) |
214 if route is None: | 189 if route is None: |
215 raise Exception("No routes have been defined for generator: %s" % | 190 raise Exception("No routes have been defined for generator: %s" % |
216 self.name) | 191 self.name) |
217 | 192 |
218 logger.debug("Using taxonomy page: %s" % self.page_ref) | 193 logger.debug("Using taxonomy page: %s" % self.page_ref) |
219 fac = self.page_ref.getFactory() | 194 fac = self.page_ref.getFactory() |
220 | 195 |
221 job_count = 0 | 196 job_count = 0 |
222 s = _Slugifier(self.taxonomy, self.slugify_mode) | 197 for slugified_term in analyzer.dirty_slugified_terms: |
223 for term in dirty_terms: | 198 extra_route_metadata = { |
224 if not self.taxonomy.is_multiple: | 199 self.taxonomy.term_name: slugified_term} |
225 term = term[0] | 200 |
226 slugified_term = s.slugify(term) | 201 # Use the slugified term as the record's extra key seed. |
227 extra_route_metadata = {self.taxonomy.term_name: slugified_term} | |
228 | |
229 # Use the slugified term as the record extra key. | |
230 logger.debug( | 202 logger.debug( |
231 "Queuing: %s [%s=%s]" % | 203 "Queuing: %s [%s=%s]" % |
232 (fac.ref_spec, self.taxonomy.name, slugified_term)) | 204 (fac.ref_spec, self.taxonomy.name, slugified_term)) |
233 ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term) | 205 ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term) |
234 job_count += 1 | 206 job_count += 1 |
235 ctx.runJobQueue() | 207 ctx.runJobQueue() |
236 | 208 |
237 # Now we create bake entries for all the terms that were *not* dirty. | 209 # Now we create bake entries for all the terms that were *not* dirty. |
245 try: | 217 try: |
246 t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) | 218 t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) |
247 except InvalidRecordExtraKey: | 219 except InvalidRecordExtraKey: |
248 continue | 220 continue |
249 | 221 |
250 if t in all_terms: | 222 if analyzer.isKnownSlugifiedTerm(t): |
251 logger.debug("Creating unbaked entry for %s term: %s" % | 223 logger.debug("Creating unbaked entry for %s term: %s" % |
252 (self.name, t)) | 224 (self.name, t)) |
253 ctx.collapseRecord(prev_entry) | 225 ctx.collapseRecord(prev_entry) |
254 else: | 226 else: |
255 logger.debug("Term %s in %s isn't used anymore." % | 227 logger.debug("Term %s in %s isn't used anymore." % |
256 (self.name, t)) | 228 (self.name, t)) |
257 | 229 |
258 return job_count | 230 return job_count |
259 | |
260 | |
261 def _get_all_entry_taxonomy_terms(entry): | |
262 res = set() | |
263 for o in entry.subs: | |
264 for pinfo in o.render_info: | |
265 if pinfo: | |
266 terms = pinfo.getCustomInfo('used_taxonomy_terms') | |
267 if terms: | |
268 res |= set(terms) | |
269 return res | |
270 | 231 |
271 | 232 |
272 class HasTaxonomyTermsFilterClause(SettingFilterClause): | 233 class HasTaxonomyTermsFilterClause(SettingFilterClause): |
273 def __init__(self, taxonomy, slugify_mode, value, is_combination): | 234 def __init__(self, taxonomy, slugify_mode, value, is_combination): |
274 super(HasTaxonomyTermsFilterClause, self).__init__( | 235 super(HasTaxonomyTermsFilterClause, self).__init__( |
300 return False | 261 return False |
301 page_value = self._slugifier.slugify(page_value) | 262 page_value = self._slugifier.slugify(page_value) |
302 return page_value == self.value | 263 return page_value == self.value |
303 | 264 |
304 | 265 |
266 class _TaxonomyTermsAnalyzer(object): | |
267 def __init__(self, source_name, taxonomy, slugify_mode): | |
268 self.source_name = source_name | |
269 self.taxonomy = taxonomy | |
270 self.slugifier = _Slugifier(taxonomy, slugify_mode) | |
271 self._all_terms = {} | |
272 self._single_dirty_slugified_terms = set() | |
273 self._all_dirty_slugified_terms = None | |
274 | |
275 @property | |
276 def dirty_slugified_terms(self): | |
277 """ Returns the slugified terms that have been 'dirtied' during | |
278 this bake. | |
279 """ | |
280 return self._all_dirty_slugified_terms | |
281 | |
282 def isKnownSlugifiedTerm(self, term): | |
283 """ Returns whether the given slugified term has been seen during | |
284 this bake. | |
285 """ | |
286 return term in self._all_terms | |
287 | |
288 def analyze(self, ctx): | |
289 # Build the list of terms for our taxonomy, and figure out which ones | |
290 # are 'dirty' for the current bake. | |
291 # | |
292 # Remember all terms used. | |
293 for _, cur_entry in ctx.getAllPageRecords(): | |
294 if cur_entry and not cur_entry.was_overriden: | |
295 cur_terms = cur_entry.config.get(self.taxonomy.setting_name) | |
296 if cur_terms: | |
297 if not self.taxonomy.is_multiple: | |
298 self._addTerm(cur_entry.path, cur_terms) | |
299 else: | |
300 self._addTerms(cur_entry.path, cur_terms) | |
301 | |
302 # Re-bake all taxonomy terms that include new or changed pages, by | |
303 # marking them as 'dirty'. | |
304 for prev_entry, cur_entry in ctx.getBakedPageRecords(): | |
305 if cur_entry.source_name != self.source_name: | |
306 continue | |
307 | |
308 entries = [cur_entry] | |
309 if prev_entry: | |
310 entries.append(prev_entry) | |
311 | |
312 for e in entries: | |
313 entry_terms = e.config.get(self.taxonomy.setting_name) | |
314 if entry_terms: | |
315 if not self.taxonomy.is_multiple: | |
316 self._single_dirty_slugified_terms.add( | |
317 self.slugifier.slugify(entry_terms)) | |
318 else: | |
319 self._single_dirty_slugified_terms.update( | |
320 (self.slugifier.slugify(t) | |
321 for t in entry_terms)) | |
322 | |
323 self._all_dirty_slugified_terms = list( | |
324 self._single_dirty_slugified_terms) | |
325 logger.debug("Gathered %d dirty taxonomy terms", | |
326 len(self._all_dirty_slugified_terms)) | |
327 | |
328 # Re-bake the combination pages for terms that are 'dirty'. | |
329 # We make all terms into tuple, even those that are not actual | |
330 # combinations, so that we have less things to test further down the | |
331 # line. | |
332 # | |
333 # Add the combinations to that list. We get those combinations from | |
334 # wherever combinations were used, so they're coming from the | |
335 # `onRouteFunctionUsed` method. | |
336 if self.taxonomy.is_multiple: | |
337 known_combinations = set() | |
338 for _, cur_entry in ctx.getAllPageRecords(): | |
339 if cur_entry: | |
340 used_terms = _get_all_entry_taxonomy_terms(cur_entry) | |
341 for terms in used_terms: | |
342 if len(terms) > 1: | |
343 known_combinations.add(terms) | |
344 | |
345 dcc = 0 | |
346 for terms in known_combinations: | |
347 if not self._single_dirty_slugified_terms.isdisjoint( | |
348 set(terms)): | |
349 self._all_dirty_slugified_terms.append( | |
350 self.taxonomy.separator.join(terms)) | |
351 dcc += 1 | |
352 logger.debug("Gathered %d term combinations, with %d dirty." % | |
353 (len(known_combinations), dcc)) | |
354 | |
355 def _addTerms(self, entry_path, terms): | |
356 for t in terms: | |
357 self._addTerm(entry_path, t) | |
358 | |
359 def _addTerm(self, entry_path, term): | |
360 st = self.slugifier.slugify(term) | |
361 orig_terms = self._all_terms.setdefault(st, []) | |
362 if orig_terms and orig_terms[0] != term: | |
363 logger.warning( | |
364 "Term '%s' in '%s' is slugified to '%s' which conflicts with " | |
365 "previously existing '%s'. The two will be merged." % | |
366 (term, entry_path, st, orig_terms[0])) | |
367 orig_terms.append(term) | |
368 | |
369 | |
370 def _get_all_entry_taxonomy_terms(entry): | |
371 res = set() | |
372 for o in entry.subs: | |
373 for pinfo in o.render_info: | |
374 if pinfo: | |
375 terms = pinfo.getCustomInfo('used_taxonomy_terms') | |
376 if terms: | |
377 res |= set(terms) | |
378 return res | |
379 | |
380 | |
305 class _Slugifier(object): | 381 class _Slugifier(object): |
306 def __init__(self, taxonomy, mode): | 382 def __init__(self, taxonomy, mode): |
307 self.taxonomy = taxonomy | 383 self.taxonomy = taxonomy |
308 self.mode = mode | 384 self.mode = mode |
309 | 385 |
386 def slugifyMultiple(self, terms): | |
387 return tuple(map(self.slugify, terms)) | |
388 | |
310 def slugify(self, term): | 389 def slugify(self, term): |
311 if isinstance(term, tuple): | |
312 return self.taxonomy.separator.join( | |
313 map(self._slugifyOne, term)) | |
314 return self._slugifyOne(term) | |
315 | |
316 def _slugifyOne(self, term): | |
317 if self.mode & SLUGIFY_TRANSLITERATE: | 390 if self.mode & SLUGIFY_TRANSLITERATE: |
318 term = unidecode.unidecode(term) | 391 term = unidecode.unidecode(term) |
319 if self.mode & SLUGIFY_LOWERCASE: | 392 if self.mode & SLUGIFY_LOWERCASE: |
320 term = term.lower() | 393 term = term.lower() |
321 if self.mode & SLUGIFY_DOT_TO_DASH: | 394 if self.mode & SLUGIFY_DOT_TO_DASH: |