comparison piecrust/baking/baker.py @ 334:b034f6f15e22

bake: Several bug taxonomy-related fixes for incorrect incremental bakes. * Improve how the baker processes taxonomy terms and figures out what needs to be re-baked or not. * Create bake entries for clean taxnomy terms so they're not deleted by an incremental bake. * Add more information to bake records. * Slugify taxonomy terms is now done by the route in one place. * Fix a bug where the cache key for invalidating rendered segments was not computed the same way as when the caching was done. * Fix how term combinations are passed around, rendered, printed, parsed, etc. (TODO: more word needed in the routing functions) * Expose to the template whether a taxonomy term is a combination or not. * Display term combinations better in the built-in theme. * Rename `route.taxonomy` to `route.taxonomy_name` to prevent confusion. * Add options to show bake records for previous bakes.
author Ludovic Chabant <ludovic@chabant.com>
date Fri, 03 Apr 2015 10:59:50 -0700
parents 91b07f9efdc1
children 938be93215cb
comparison
equal deleted inserted replaced
333:91b07f9efdc1 334:b034f6f15e22
173 logger.debug( 173 logger.debug(
174 "Skipping taxonomy page: %s:%s" % 174 "Skipping taxonomy page: %s:%s" %
175 (source.name, fac.ref_spec)) 175 (source.name, fac.ref_spec))
176 continue 176 continue
177 177
178 entry = BakeRecordPageEntry(fac) 178 entry = BakeRecordPageEntry(fac.source.name, fac.rel_path,
179 fac.path)
179 record.addEntry(entry) 180 record.addEntry(entry)
180 181
181 route = self.app.getRoute(source.name, fac.metadata) 182 route = self.app.getRoute(source.name, fac.metadata)
182 if route is None: 183 if route is None:
183 entry.errors.append( 184 entry.errors.append(
191 record.current.success &= success 192 record.current.success &= success
192 193
193 def _bakeTaxonomies(self, record): 194 def _bakeTaxonomies(self, record):
194 logger.debug("Baking taxonomies") 195 logger.debug("Baking taxonomies")
195 196
197 class _TaxonomyTermsInfo(object):
198 def __init__(self):
199 self.dirty_terms = set()
200 self.all_terms = set()
201
202 def __str__(self):
203 return 'dirty:%s, all:%s' % (self.dirty_terms, self.all_terms)
204
205 def __repr__(self):
206 return 'dirty:%s, all:%s' % (self.dirty_terms, self.all_terms)
207
196 # Let's see all the taxonomy terms for which we must bake a 208 # Let's see all the taxonomy terms for which we must bake a
197 # listing page... first, pre-populate our big map of used terms. 209 # listing page... first, pre-populate our big map of used terms.
210 # For each source name, we have a list of taxonomies, and for each
211 # taxonomies, a list of terms, some being 'dirty', some used last
212 # time, etc.
198 buckets = {} 213 buckets = {}
199 tax_names = [t.name for t in self.app.taxonomies] 214 tax_names = [t.name for t in self.app.taxonomies]
200 source_names = [s.name for s in self.app.sources] 215 source_names = [s.name for s in self.app.sources]
201 for sn in source_names: 216 for sn in source_names:
202 source_taxonomies = {} 217 source_taxonomies = {}
203 buckets[sn] = source_taxonomies 218 buckets[sn] = source_taxonomies
204 for tn in tax_names: 219 for tn in tax_names:
205 source_taxonomies[tn] = set() 220 source_taxonomies[tn] = _TaxonomyTermsInfo()
206 221
207 # Now see which ones are 'dirty' based on our bake record. 222 # Now see which ones are 'dirty' based on our bake record.
208 logger.debug("Gathering dirty taxonomy terms") 223 logger.debug("Gathering dirty taxonomy terms")
209 for prev_entry, cur_entry in record.transitions.values(): 224 for prev_entry, cur_entry in record.transitions.values():
210 for tax in self.app.taxonomies: 225 for tax in self.app.taxonomies:
211 changed_terms = None
212 # Re-bake all taxonomy pages that include new or changed 226 # Re-bake all taxonomy pages that include new or changed
213 # pages. 227 # pages.
214 if (not prev_entry and cur_entry and 228 if cur_entry and cur_entry.was_baked_successfully:
215 cur_entry.was_baked_successfully): 229 if prev_entry and prev_entry.was_baked_successfully:
216 changed_terms = cur_entry.config.get(tax.setting_name) 230 # Entry was re-baked this time. Mark as dirty both the
217 elif (prev_entry and cur_entry and 231 # old and new terms.
218 cur_entry.was_baked_successfully): 232 changed_terms = []
219 changed_terms = [] 233 prev_terms = prev_entry.config.get(tax.setting_name)
234 cur_terms = cur_entry.config.get(tax.setting_name)
235 if tax.is_multiple:
236 if prev_terms is not None:
237 changed_terms += prev_terms
238 if cur_terms is not None:
239 changed_terms += cur_terms
240 else:
241 if prev_terms is not None:
242 changed_terms.append(prev_terms)
243 if cur_terms is not None:
244 changed_terms.append(cur_terms)
245 else:
246 # Entry was not baked last time. Just mark as dirty
247 # all the new terms.
248 changed_terms = cur_entry.config.get(tax.setting_name)
249
250 if changed_terms is not None:
251 if not isinstance(changed_terms, list):
252 changed_terms = [changed_terms]
253 tt_info = buckets[cur_entry.source_name][tax.name]
254 tt_info.dirty_terms |= set(changed_terms)
255
256 # Remember all terms used.
257 if cur_entry and cur_entry.was_baked_successfully:
258 cur_terms = cur_entry.config.get(tax.setting_name)
259 if cur_terms is not None:
260 if not isinstance(cur_terms, list):
261 cur_terms = [cur_terms]
262 tt_info = buckets[cur_entry.source_name][tax.name]
263 tt_info.all_terms |= set(cur_terms)
264 elif (prev_entry and prev_entry.was_baked_successfully and
265 cur_entry and not cur_entry.was_baked):
220 prev_terms = prev_entry.config.get(tax.setting_name) 266 prev_terms = prev_entry.config.get(tax.setting_name)
221 cur_terms = cur_entry.config.get(tax.setting_name) 267 if prev_terms is not None:
222 if tax.is_multiple: 268 if not isinstance(prev_terms, list):
223 if prev_terms is not None: 269 prev_terms = [prev_terms]
224 changed_terms += prev_terms 270 tt_info = buckets[prev_entry.source_name][tax.name]
225 if cur_terms is not None: 271 tt_info.all_terms |= set(prev_terms)
226 changed_terms += cur_terms
227 else:
228 if prev_terms is not None:
229 changed_terms.append(prev_terms)
230 if cur_terms is not None:
231 changed_terms.append(cur_terms)
232 if changed_terms is not None:
233 if not isinstance(changed_terms, list):
234 changed_terms = [changed_terms]
235 buckets[cur_entry.source_name][tax.name] |= (
236 set(changed_terms))
237 272
238 # Re-bake the combination pages for terms that are 'dirty'. 273 # Re-bake the combination pages for terms that are 'dirty'.
239 known_combinations = set() 274 known_combinations = set()
240 logger.debug("Gathering dirty term combinations") 275 logger.debug("Gathering dirty term combinations")
241 for prev_entry, cur_entry in record.transitions.values(): 276 for prev_entry, cur_entry in record.transitions.values():
242 if cur_entry: 277 if cur_entry and cur_entry.was_baked_successfully:
243 known_combinations |= cur_entry.used_taxonomy_terms 278 known_combinations |= cur_entry.used_taxonomy_terms
244 elif prev_entry: 279 elif prev_entry:
245 known_combinations |= prev_entry.used_taxonomy_terms 280 known_combinations |= prev_entry.used_taxonomy_terms
246 for sn, tn, terms in known_combinations: 281 for sn, tn, terms in known_combinations:
247 changed_terms = buckets[sn][tn] 282 tt_info = buckets[sn][tn]
248 if not changed_terms.isdisjoint(set(terms)): 283 tt_info.all_terms.add(terms)
249 changed_terms.add(terms) 284 if not tt_info.dirty_terms.isdisjoint(set(terms)):
285 tt_info.dirty_terms.add(terms)
250 286
251 # Start baking those terms. 287 # Start baking those terms.
252 pool, queue, abort = self._createWorkerPool(record, self.num_workers) 288 pool, queue, abort = self._createWorkerPool(record, self.num_workers)
253 for source_name, source_taxonomies in buckets.items(): 289 for source_name, source_taxonomies in buckets.items():
254 for tax_name, terms in source_taxonomies.items(): 290 for tax_name, tt_info in source_taxonomies.items():
291 terms = tt_info.dirty_terms
255 if len(terms) == 0: 292 if len(terms) == 0:
256 continue 293 continue
257 294
258 logger.debug( 295 logger.debug(
259 "Baking '%s' for source '%s': %s" % 296 "Baking '%s' for source '%s': %s" %
278 tax_page_source, tax_page_rel_path, 315 tax_page_source, tax_page_rel_path,
279 {tax.term_name: term}) 316 {tax.term_name: term})
280 logger.debug( 317 logger.debug(
281 "Queuing: %s [%s, %s]" % 318 "Queuing: %s [%s, %s]" %
282 (fac.ref_spec, tax_name, term)) 319 (fac.ref_spec, tax_name, term))
283 entry = BakeRecordPageEntry(fac, tax_name, term) 320 entry = BakeRecordPageEntry(
321 fac.source.name, fac.rel_path, fac.path,
322 (tax_name, term, source_name))
284 record.addEntry(entry) 323 record.addEntry(entry)
285 queue.addJob( 324 queue.addJob(BakeWorkerJob(fac, route, entry))
286 BakeWorkerJob(fac, route, entry, tax_name, term))
287 325
288 success = self._waitOnWorkerPool(pool, abort) 326 success = self._waitOnWorkerPool(pool, abort)
289 record.current.success &= success 327 record.current.success &= success
328
329 # Now we create bake entries for all the terms that were *not* dirty.
330 # This is because otherwise, on the next incremental bake, we wouldn't
331 # find any entry for those things, and figure that we need to delete
332 # their outputs.
333 for prev_entry, cur_entry in record.transitions.values():
334 # Only consider taxonomy-related entries that don't have any
335 # current version.
336 if (prev_entry and prev_entry.taxonomy_info and
337 not cur_entry):
338 sn = prev_entry.source_name
339 tn, tt, tsn = prev_entry.taxonomy_info
340 tt_info = buckets[tsn][tn]
341 if tt in tt_info.all_terms:
342 logger.debug("Creating unbaked entry for taxonomy "
343 "term '%s:%s'." % (tn, tt))
344 entry = BakeRecordPageEntry(
345 prev_entry.source_name, prev_entry.rel_path,
346 prev_entry.path, prev_entry.taxonomy_info)
347 record.addEntry(entry)
348 else:
349 logger.debug("Taxonomy term '%s:%s' isn't used anymore." %
350 (tn, tt))
290 351
291 def _handleDeletetions(self, record): 352 def _handleDeletetions(self, record):
292 for path, reason in record.getDeletions(): 353 for path, reason in record.getDeletions():
293 logger.debug("Removing '%s': %s" % (path, reason)) 354 logger.debug("Removing '%s': %s" % (path, reason))
294 try: 355 try:
353 self.work_queue = work_queue 414 self.work_queue = work_queue
354 self.abort_event = abort_event 415 self.abort_event = abort_event
355 416
356 417
357 class BakeWorkerJob(object): 418 class BakeWorkerJob(object):
358 def __init__(self, factory, route, record_entry, 419 def __init__(self, factory, route, record_entry):
359 taxonomy_name=None, taxonomy_term=None):
360 self.factory = factory 420 self.factory = factory
361 self.route = route 421 self.route = route
362 self.record_entry = record_entry 422 self.record_entry = record_entry
363 self.taxonomy_name = taxonomy_name
364 self.taxonomy_term = taxonomy_term
365 423
366 @property 424 @property
367 def source(self): 425 def source(self):
368 return self.factory.source 426 return self.factory.source
369 427
404 def _unsafeRun(self, job): 462 def _unsafeRun(self, job):
405 start_time = time.clock() 463 start_time = time.clock()
406 464
407 entry = job.record_entry 465 entry = job.record_entry
408 try: 466 try:
409 self._page_baker.bake( 467 self._page_baker.bake(job.factory, job.route, entry)
410 job.factory, job.route, entry,
411 taxonomy_name=job.taxonomy_name,
412 taxonomy_term=job.taxonomy_term)
413 except BakingError as ex: 468 except BakingError as ex:
414 logger.debug("Got baking error. Adding it to the record.") 469 logger.debug("Got baking error. Adding it to the record.")
415 while ex: 470 while ex:
416 entry.errors.append(str(ex)) 471 entry.errors.append(str(ex))
417 ex = ex.__cause__ 472 ex = ex.__cause__