Mercurial > piecrust2
comparison piecrust/baking/baker.py @ 334:b034f6f15e22
bake: Several bug taxonomy-related fixes for incorrect incremental bakes.
* Improve how the baker processes taxonomy terms and figures out what needs
to be re-baked or not.
* Create bake entries for clean taxnomy terms so they're not deleted by an
incremental bake.
* Add more information to bake records.
* Slugify taxonomy terms is now done by the route in one place.
* Fix a bug where the cache key for invalidating rendered segments was not
computed the same way as when the caching was done.
* Fix how term combinations are passed around, rendered, printed, parsed, etc.
(TODO: more word needed in the routing functions)
* Expose to the template whether a taxonomy term is a combination or not.
* Display term combinations better in the built-in theme.
* Rename `route.taxonomy` to `route.taxonomy_name` to prevent confusion.
* Add options to show bake records for previous bakes.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Fri, 03 Apr 2015 10:59:50 -0700 |
parents | 91b07f9efdc1 |
children | 938be93215cb |
comparison
equal
deleted
inserted
replaced
333:91b07f9efdc1 | 334:b034f6f15e22 |
---|---|
173 logger.debug( | 173 logger.debug( |
174 "Skipping taxonomy page: %s:%s" % | 174 "Skipping taxonomy page: %s:%s" % |
175 (source.name, fac.ref_spec)) | 175 (source.name, fac.ref_spec)) |
176 continue | 176 continue |
177 | 177 |
178 entry = BakeRecordPageEntry(fac) | 178 entry = BakeRecordPageEntry(fac.source.name, fac.rel_path, |
179 fac.path) | |
179 record.addEntry(entry) | 180 record.addEntry(entry) |
180 | 181 |
181 route = self.app.getRoute(source.name, fac.metadata) | 182 route = self.app.getRoute(source.name, fac.metadata) |
182 if route is None: | 183 if route is None: |
183 entry.errors.append( | 184 entry.errors.append( |
191 record.current.success &= success | 192 record.current.success &= success |
192 | 193 |
193 def _bakeTaxonomies(self, record): | 194 def _bakeTaxonomies(self, record): |
194 logger.debug("Baking taxonomies") | 195 logger.debug("Baking taxonomies") |
195 | 196 |
197 class _TaxonomyTermsInfo(object): | |
198 def __init__(self): | |
199 self.dirty_terms = set() | |
200 self.all_terms = set() | |
201 | |
202 def __str__(self): | |
203 return 'dirty:%s, all:%s' % (self.dirty_terms, self.all_terms) | |
204 | |
205 def __repr__(self): | |
206 return 'dirty:%s, all:%s' % (self.dirty_terms, self.all_terms) | |
207 | |
196 # Let's see all the taxonomy terms for which we must bake a | 208 # Let's see all the taxonomy terms for which we must bake a |
197 # listing page... first, pre-populate our big map of used terms. | 209 # listing page... first, pre-populate our big map of used terms. |
210 # For each source name, we have a list of taxonomies, and for each | |
211 # taxonomies, a list of terms, some being 'dirty', some used last | |
212 # time, etc. | |
198 buckets = {} | 213 buckets = {} |
199 tax_names = [t.name for t in self.app.taxonomies] | 214 tax_names = [t.name for t in self.app.taxonomies] |
200 source_names = [s.name for s in self.app.sources] | 215 source_names = [s.name for s in self.app.sources] |
201 for sn in source_names: | 216 for sn in source_names: |
202 source_taxonomies = {} | 217 source_taxonomies = {} |
203 buckets[sn] = source_taxonomies | 218 buckets[sn] = source_taxonomies |
204 for tn in tax_names: | 219 for tn in tax_names: |
205 source_taxonomies[tn] = set() | 220 source_taxonomies[tn] = _TaxonomyTermsInfo() |
206 | 221 |
207 # Now see which ones are 'dirty' based on our bake record. | 222 # Now see which ones are 'dirty' based on our bake record. |
208 logger.debug("Gathering dirty taxonomy terms") | 223 logger.debug("Gathering dirty taxonomy terms") |
209 for prev_entry, cur_entry in record.transitions.values(): | 224 for prev_entry, cur_entry in record.transitions.values(): |
210 for tax in self.app.taxonomies: | 225 for tax in self.app.taxonomies: |
211 changed_terms = None | |
212 # Re-bake all taxonomy pages that include new or changed | 226 # Re-bake all taxonomy pages that include new or changed |
213 # pages. | 227 # pages. |
214 if (not prev_entry and cur_entry and | 228 if cur_entry and cur_entry.was_baked_successfully: |
215 cur_entry.was_baked_successfully): | 229 if prev_entry and prev_entry.was_baked_successfully: |
216 changed_terms = cur_entry.config.get(tax.setting_name) | 230 # Entry was re-baked this time. Mark as dirty both the |
217 elif (prev_entry and cur_entry and | 231 # old and new terms. |
218 cur_entry.was_baked_successfully): | 232 changed_terms = [] |
219 changed_terms = [] | 233 prev_terms = prev_entry.config.get(tax.setting_name) |
234 cur_terms = cur_entry.config.get(tax.setting_name) | |
235 if tax.is_multiple: | |
236 if prev_terms is not None: | |
237 changed_terms += prev_terms | |
238 if cur_terms is not None: | |
239 changed_terms += cur_terms | |
240 else: | |
241 if prev_terms is not None: | |
242 changed_terms.append(prev_terms) | |
243 if cur_terms is not None: | |
244 changed_terms.append(cur_terms) | |
245 else: | |
246 # Entry was not baked last time. Just mark as dirty | |
247 # all the new terms. | |
248 changed_terms = cur_entry.config.get(tax.setting_name) | |
249 | |
250 if changed_terms is not None: | |
251 if not isinstance(changed_terms, list): | |
252 changed_terms = [changed_terms] | |
253 tt_info = buckets[cur_entry.source_name][tax.name] | |
254 tt_info.dirty_terms |= set(changed_terms) | |
255 | |
256 # Remember all terms used. | |
257 if cur_entry and cur_entry.was_baked_successfully: | |
258 cur_terms = cur_entry.config.get(tax.setting_name) | |
259 if cur_terms is not None: | |
260 if not isinstance(cur_terms, list): | |
261 cur_terms = [cur_terms] | |
262 tt_info = buckets[cur_entry.source_name][tax.name] | |
263 tt_info.all_terms |= set(cur_terms) | |
264 elif (prev_entry and prev_entry.was_baked_successfully and | |
265 cur_entry and not cur_entry.was_baked): | |
220 prev_terms = prev_entry.config.get(tax.setting_name) | 266 prev_terms = prev_entry.config.get(tax.setting_name) |
221 cur_terms = cur_entry.config.get(tax.setting_name) | 267 if prev_terms is not None: |
222 if tax.is_multiple: | 268 if not isinstance(prev_terms, list): |
223 if prev_terms is not None: | 269 prev_terms = [prev_terms] |
224 changed_terms += prev_terms | 270 tt_info = buckets[prev_entry.source_name][tax.name] |
225 if cur_terms is not None: | 271 tt_info.all_terms |= set(prev_terms) |
226 changed_terms += cur_terms | |
227 else: | |
228 if prev_terms is not None: | |
229 changed_terms.append(prev_terms) | |
230 if cur_terms is not None: | |
231 changed_terms.append(cur_terms) | |
232 if changed_terms is not None: | |
233 if not isinstance(changed_terms, list): | |
234 changed_terms = [changed_terms] | |
235 buckets[cur_entry.source_name][tax.name] |= ( | |
236 set(changed_terms)) | |
237 | 272 |
238 # Re-bake the combination pages for terms that are 'dirty'. | 273 # Re-bake the combination pages for terms that are 'dirty'. |
239 known_combinations = set() | 274 known_combinations = set() |
240 logger.debug("Gathering dirty term combinations") | 275 logger.debug("Gathering dirty term combinations") |
241 for prev_entry, cur_entry in record.transitions.values(): | 276 for prev_entry, cur_entry in record.transitions.values(): |
242 if cur_entry: | 277 if cur_entry and cur_entry.was_baked_successfully: |
243 known_combinations |= cur_entry.used_taxonomy_terms | 278 known_combinations |= cur_entry.used_taxonomy_terms |
244 elif prev_entry: | 279 elif prev_entry: |
245 known_combinations |= prev_entry.used_taxonomy_terms | 280 known_combinations |= prev_entry.used_taxonomy_terms |
246 for sn, tn, terms in known_combinations: | 281 for sn, tn, terms in known_combinations: |
247 changed_terms = buckets[sn][tn] | 282 tt_info = buckets[sn][tn] |
248 if not changed_terms.isdisjoint(set(terms)): | 283 tt_info.all_terms.add(terms) |
249 changed_terms.add(terms) | 284 if not tt_info.dirty_terms.isdisjoint(set(terms)): |
285 tt_info.dirty_terms.add(terms) | |
250 | 286 |
251 # Start baking those terms. | 287 # Start baking those terms. |
252 pool, queue, abort = self._createWorkerPool(record, self.num_workers) | 288 pool, queue, abort = self._createWorkerPool(record, self.num_workers) |
253 for source_name, source_taxonomies in buckets.items(): | 289 for source_name, source_taxonomies in buckets.items(): |
254 for tax_name, terms in source_taxonomies.items(): | 290 for tax_name, tt_info in source_taxonomies.items(): |
291 terms = tt_info.dirty_terms | |
255 if len(terms) == 0: | 292 if len(terms) == 0: |
256 continue | 293 continue |
257 | 294 |
258 logger.debug( | 295 logger.debug( |
259 "Baking '%s' for source '%s': %s" % | 296 "Baking '%s' for source '%s': %s" % |
278 tax_page_source, tax_page_rel_path, | 315 tax_page_source, tax_page_rel_path, |
279 {tax.term_name: term}) | 316 {tax.term_name: term}) |
280 logger.debug( | 317 logger.debug( |
281 "Queuing: %s [%s, %s]" % | 318 "Queuing: %s [%s, %s]" % |
282 (fac.ref_spec, tax_name, term)) | 319 (fac.ref_spec, tax_name, term)) |
283 entry = BakeRecordPageEntry(fac, tax_name, term) | 320 entry = BakeRecordPageEntry( |
321 fac.source.name, fac.rel_path, fac.path, | |
322 (tax_name, term, source_name)) | |
284 record.addEntry(entry) | 323 record.addEntry(entry) |
285 queue.addJob( | 324 queue.addJob(BakeWorkerJob(fac, route, entry)) |
286 BakeWorkerJob(fac, route, entry, tax_name, term)) | |
287 | 325 |
288 success = self._waitOnWorkerPool(pool, abort) | 326 success = self._waitOnWorkerPool(pool, abort) |
289 record.current.success &= success | 327 record.current.success &= success |
328 | |
329 # Now we create bake entries for all the terms that were *not* dirty. | |
330 # This is because otherwise, on the next incremental bake, we wouldn't | |
331 # find any entry for those things, and figure that we need to delete | |
332 # their outputs. | |
333 for prev_entry, cur_entry in record.transitions.values(): | |
334 # Only consider taxonomy-related entries that don't have any | |
335 # current version. | |
336 if (prev_entry and prev_entry.taxonomy_info and | |
337 not cur_entry): | |
338 sn = prev_entry.source_name | |
339 tn, tt, tsn = prev_entry.taxonomy_info | |
340 tt_info = buckets[tsn][tn] | |
341 if tt in tt_info.all_terms: | |
342 logger.debug("Creating unbaked entry for taxonomy " | |
343 "term '%s:%s'." % (tn, tt)) | |
344 entry = BakeRecordPageEntry( | |
345 prev_entry.source_name, prev_entry.rel_path, | |
346 prev_entry.path, prev_entry.taxonomy_info) | |
347 record.addEntry(entry) | |
348 else: | |
349 logger.debug("Taxonomy term '%s:%s' isn't used anymore." % | |
350 (tn, tt)) | |
290 | 351 |
291 def _handleDeletetions(self, record): | 352 def _handleDeletetions(self, record): |
292 for path, reason in record.getDeletions(): | 353 for path, reason in record.getDeletions(): |
293 logger.debug("Removing '%s': %s" % (path, reason)) | 354 logger.debug("Removing '%s': %s" % (path, reason)) |
294 try: | 355 try: |
353 self.work_queue = work_queue | 414 self.work_queue = work_queue |
354 self.abort_event = abort_event | 415 self.abort_event = abort_event |
355 | 416 |
356 | 417 |
357 class BakeWorkerJob(object): | 418 class BakeWorkerJob(object): |
358 def __init__(self, factory, route, record_entry, | 419 def __init__(self, factory, route, record_entry): |
359 taxonomy_name=None, taxonomy_term=None): | |
360 self.factory = factory | 420 self.factory = factory |
361 self.route = route | 421 self.route = route |
362 self.record_entry = record_entry | 422 self.record_entry = record_entry |
363 self.taxonomy_name = taxonomy_name | |
364 self.taxonomy_term = taxonomy_term | |
365 | 423 |
366 @property | 424 @property |
367 def source(self): | 425 def source(self): |
368 return self.factory.source | 426 return self.factory.source |
369 | 427 |
404 def _unsafeRun(self, job): | 462 def _unsafeRun(self, job): |
405 start_time = time.clock() | 463 start_time = time.clock() |
406 | 464 |
407 entry = job.record_entry | 465 entry = job.record_entry |
408 try: | 466 try: |
409 self._page_baker.bake( | 467 self._page_baker.bake(job.factory, job.route, entry) |
410 job.factory, job.route, entry, | |
411 taxonomy_name=job.taxonomy_name, | |
412 taxonomy_term=job.taxonomy_term) | |
413 except BakingError as ex: | 468 except BakingError as ex: |
414 logger.debug("Got baking error. Adding it to the record.") | 469 logger.debug("Got baking error. Adding it to the record.") |
415 while ex: | 470 while ex: |
416 entry.errors.append(str(ex)) | 471 entry.errors.append(str(ex)) |
417 ex = ex.__cause__ | 472 ex = ex.__cause__ |