comparison piecrust/baking/baker.py @ 711:ab5c6a8ae90a

bake: Replace hard-coded taxonomy support with "generator" system. * Taxonomies are now implemented one or more `TaxonomyGenerator`s. * A `BlogArchivesGenerator` stub is there but non-functional.
author Ludovic Chabant <ludovic@chabant.com>
date Thu, 26 May 2016 19:52:47 -0700
parents 5f552aedd918
children 234d0c7c02cf
comparison
equal deleted inserted replaced
710:e85f29b28b84 711:ab5c6a8ae90a
1 import time 1 import time
2 import os.path 2 import os.path
3 import hashlib 3 import hashlib
4 import logging 4 import logging
5 from piecrust.baking.records import ( 5 from piecrust.baking.records import (
6 BakeRecordEntry, TransitionalBakeRecord, TaxonomyInfo) 6 BakeRecordEntry, TransitionalBakeRecord)
7 from piecrust.baking.worker import ( 7 from piecrust.baking.worker import (
8 save_factory, 8 save_factory,
9 JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE) 9 JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE)
10 from piecrust.chefutil import ( 10 from piecrust.chefutil import (
11 format_timed_scope, format_timed) 11 format_timed_scope, format_timed)
12 from piecrust.environment import ExecutionStats 12 from piecrust.environment import ExecutionStats
13 from piecrust.generation.base import PageGeneratorBakeContext
13 from piecrust.routing import create_route_metadata 14 from piecrust.routing import create_route_metadata
14 from piecrust.sources.base import ( 15 from piecrust.sources.base import (
15 REALM_NAMES, REALM_USER, REALM_THEME) 16 REALM_NAMES, REALM_USER, REALM_THEME)
16 17
17 18
27 self.out_dir = out_dir 28 self.out_dir = out_dir
28 self.force = force 29 self.force = force
29 self.applied_config_variant = applied_config_variant 30 self.applied_config_variant = applied_config_variant
30 self.applied_config_values = applied_config_values 31 self.applied_config_values = applied_config_values
31 32
32 # Remember what taxonomy pages we should skip 33 # Remember what generator pages we should skip.
33 # (we'll bake them repeatedly later with each taxonomy term) 34 self.generator_pages = []
34 self.taxonomy_pages = [] 35 logger.debug("Gathering generator page paths:")
35 logger.debug("Gathering taxonomy page paths:") 36 for gen in self.app.generators:
36 for tax in self.app.taxonomies: 37 for path in gen.page_ref.possible_paths:
37 for src in self.app.sources: 38 self.generator_pages.append(path)
38 tax_page_ref = tax.getPageRef(src) 39 logger.debug(" - %s" % path)
39 for path in tax_page_ref.possible_paths:
40 self.taxonomy_pages.append(path)
41 logger.debug(" - %s" % path)
42 40
43 # Register some timers. 41 # Register some timers.
44 self.app.env.registerTimer('LoadJob', raise_if_registered=False) 42 self.app.env.registerTimer('LoadJob', raise_if_registered=False)
45 self.app.env.registerTimer('RenderFirstSubJob', 43 self.app.env.registerTimer('RenderFirstSubJob',
46 raise_if_registered=False) 44 raise_if_registered=False)
99 for realm in realm_list: 97 for realm in realm_list:
100 srclist = sources_by_realm.get(realm) 98 srclist = sources_by_realm.get(realm)
101 if srclist is not None: 99 if srclist is not None:
102 self._bakeRealm(record, pool, realm, srclist) 100 self._bakeRealm(record, pool, realm, srclist)
103 101
104 # Bake taxonomies. 102 # Call all the page generators.
105 self._bakeTaxonomies(record, pool) 103 self._bakePageGenerators(record, pool)
106 104
107 # All done with the workers. Close the pool and get reports. 105 # All done with the workers. Close the pool and get reports.
108 reports = pool.close() 106 reports = pool.close()
109 total_stats = ExecutionStats() 107 total_stats = ExecutionStats()
110 record.current.stats['_Total'] = total_stats 108 record.current.stats['_Total'] = total_stats
195 193
196 all_factories = [] 194 all_factories = []
197 for source in srclist: 195 for source in srclist:
198 factories = source.getPageFactories() 196 factories = source.getPageFactories()
199 all_factories += [f for f in factories 197 all_factories += [f for f in factories
200 if f.path not in self.taxonomy_pages] 198 if f.path not in self.generator_pages]
201 199
202 self._loadRealmPages(record, pool, all_factories) 200 self._loadRealmPages(record, pool, all_factories)
203 self._renderRealmPages(record, pool, all_factories) 201 self._renderRealmPages(record, pool, all_factories)
204 self._bakeRealmPages(record, pool, realm, all_factories) 202 self._bakeRealmPages(record, pool, realm, all_factories)
205 finally: 203 finally:
270 record_entry.errors.append( 268 record_entry.errors.append(
271 "Can't get source for page: %s" % fac.ref_spec) 269 "Can't get source for page: %s" % fac.ref_spec)
272 logger.error(record_entry.errors[-1]) 270 logger.error(record_entry.errors[-1])
273 continue 271 continue
274 272
275 route = self.app.getRoute(fac.source.name, fac.metadata, 273 route = self.app.getSourceRoute(fac.source.name, fac.metadata)
276 skip_taxonomies=True)
277 if route is None: 274 if route is None:
278 record_entry.errors.append( 275 record_entry.errors.append(
279 "Can't get route for page: %s" % fac.ref_spec) 276 "Can't get route for page: %s" % fac.ref_spec)
280 logger.error(record_entry.errors[-1]) 277 logger.error(record_entry.errors[-1])
281 continue 278 continue
282 279
283 # All good, queue the job. 280 # All good, queue the job.
281 route_index = self.app.routes.index(route)
284 job = { 282 job = {
285 'type': JOB_RENDER_FIRST, 283 'type': JOB_RENDER_FIRST,
286 'job': save_factory(fac)} 284 'job': {
285 'factory_info': save_factory(fac),
286 'route_index': route_index
287 }
288 }
287 jobs.append(job) 289 jobs.append(job)
288 290
289 ar = pool.queueJobs(jobs, handler=_handler) 291 ar = pool.queueJobs(jobs, handler=_handler)
290 ar.wait() 292 ar.wait()
291 293
292 def _bakeRealmPages(self, record, pool, realm, factories): 294 def _bakeRealmPages(self, record, pool, realm, factories):
293 def _handler(res): 295 def _handler(res):
294 entry = record.getCurrentEntry(res['path'], res['taxonomy_info']) 296 entry = record.getCurrentEntry(res['path'])
295 entry.subs = res['sub_entries'] 297 entry.subs = res['sub_entries']
296 if res['errors']: 298 if res['errors']:
297 entry.errors += res['errors'] 299 entry.errors += res['errors']
298 self._logErrors(res['path'], res['errors']) 300 self._logErrors(res['path'], res['errors'])
299 if entry.has_any_error: 301 if entry.has_any_error:
315 jobs.append(job) 317 jobs.append(job)
316 318
317 ar = pool.queueJobs(jobs, handler=_handler) 319 ar = pool.queueJobs(jobs, handler=_handler)
318 ar.wait() 320 ar.wait()
319 321
320 def _bakeTaxonomies(self, record, pool): 322 def _bakePageGenerators(self, record, pool):
321 logger.debug("Baking taxonomy pages...") 323 for gen in self.app.generators:
322 with format_timed_scope(logger, 'built taxonomy buckets', 324 ctx = PageGeneratorBakeContext(self.app, record, pool, gen)
323 level=logging.DEBUG, colored=False): 325 gen.bake(ctx)
324 buckets = self._buildTaxonomyBuckets(record) 326
325 327 def _makeBakeJob(self, record, fac):
326 start_time = time.perf_counter()
327 page_count = self._bakeTaxonomyBuckets(record, pool, buckets)
328 logger.info(format_timed(start_time,
329 "baked %d taxonomy pages." % page_count))
330
331 def _buildTaxonomyBuckets(self, record):
332 # Let's see all the taxonomy terms for which we must bake a
333 # listing page... first, pre-populate our big map of used terms.
334 # For each source name, we have a list of taxonomies, and for each
335 # taxonomies, a list of terms, some being 'dirty', some used last
336 # time, etc.
337 buckets = {}
338 tax_names = [t.name for t in self.app.taxonomies]
339 source_names = [s.name for s in self.app.sources]
340 for sn in source_names:
341 source_taxonomies = {}
342 buckets[sn] = source_taxonomies
343 for tn in tax_names:
344 source_taxonomies[tn] = _TaxonomyTermsInfo()
345
346 # Now see which ones are 'dirty' based on our bake record.
347 logger.debug("Gathering dirty taxonomy terms")
348 for prev_entry, cur_entry in record.transitions.values():
349 # Re-bake all taxonomy pages that include new or changed
350 # pages.
351 if cur_entry and cur_entry.was_any_sub_baked:
352 entries = [cur_entry]
353 if prev_entry:
354 entries.append(prev_entry)
355
356 for tax in self.app.taxonomies:
357 changed_terms = set()
358 for e in entries:
359 terms = e.config.get(tax.setting_name)
360 if terms:
361 if not tax.is_multiple:
362 terms = [terms]
363 changed_terms |= set(terms)
364
365 if len(changed_terms) > 0:
366 tt_info = buckets[cur_entry.source_name][tax.name]
367 tt_info.dirty_terms |= changed_terms
368
369 # Remember all terms used.
370 for tax in self.app.taxonomies:
371 if cur_entry and not cur_entry.was_overriden:
372 cur_terms = cur_entry.config.get(tax.setting_name)
373 if cur_terms:
374 if not tax.is_multiple:
375 cur_terms = [cur_terms]
376 tt_info = buckets[cur_entry.source_name][tax.name]
377 tt_info.all_terms |= set(cur_terms)
378
379 # Re-bake the combination pages for terms that are 'dirty'.
380 known_combinations = set()
381 logger.debug("Gathering dirty term combinations")
382 for prev_entry, cur_entry in record.transitions.values():
383 if not cur_entry:
384 continue
385 used_taxonomy_terms = cur_entry.getAllUsedTaxonomyTerms()
386 for sn, tn, terms in used_taxonomy_terms:
387 if isinstance(terms, tuple):
388 known_combinations.add((sn, tn, terms))
389 for sn, tn, terms in known_combinations:
390 tt_info = buckets[sn][tn]
391 tt_info.all_terms.add(terms)
392 if not tt_info.dirty_terms.isdisjoint(set(terms)):
393 tt_info.dirty_terms.add(terms)
394
395 return buckets
396
397 def _bakeTaxonomyBuckets(self, record, pool, buckets):
398 def _handler(res):
399 entry = record.getCurrentEntry(res['path'], res['taxonomy_info'])
400 entry.subs = res['sub_entries']
401 if res['errors']:
402 entry.errors += res['errors']
403 if entry.has_any_error:
404 record.current.success = False
405
406 # Start baking those terms.
407 jobs = []
408 for source_name, source_taxonomies in buckets.items():
409 for tax_name, tt_info in source_taxonomies.items():
410 terms = tt_info.dirty_terms
411 if len(terms) == 0:
412 continue
413
414 logger.debug(
415 "Baking '%s' for source '%s': %s" %
416 (tax_name, source_name, terms))
417 tax = self.app.getTaxonomy(tax_name)
418 source = self.app.getSource(source_name)
419 tax_page_ref = tax.getPageRef(source)
420 if not tax_page_ref.exists:
421 logger.debug(
422 "No taxonomy page found at '%s', skipping." %
423 tax.page_ref)
424 continue
425
426 logger.debug(
427 "Using taxonomy page: %s:%s" %
428 (tax_page_ref.source_name, tax_page_ref.rel_path))
429 fac = tax_page_ref.getFactory()
430
431 for term in terms:
432 logger.debug(
433 "Queuing: %s [%s=%s]" %
434 (fac.ref_spec, tax_name, term))
435 tax_info = TaxonomyInfo(tax_name, source_name, term)
436
437 cur_entry = BakeRecordEntry(
438 fac.source.name, fac.path, tax_info)
439 record.addEntry(cur_entry)
440
441 job = self._makeBakeJob(record, fac, tax_info)
442 if job is not None:
443 jobs.append(job)
444
445 ar = pool.queueJobs(jobs, handler=_handler)
446 ar.wait()
447
448 # Now we create bake entries for all the terms that were *not* dirty.
449 # This is because otherwise, on the next incremental bake, we wouldn't
450 # find any entry for those things, and figure that we need to delete
451 # their outputs.
452 for prev_entry, cur_entry in record.transitions.values():
453 # Only consider taxonomy-related entries that don't have any
454 # current version.
455 if (prev_entry and prev_entry.taxonomy_info and
456 not cur_entry):
457 ti = prev_entry.taxonomy_info
458 tt_info = buckets[ti.source_name][ti.taxonomy_name]
459 if ti.term in tt_info.all_terms:
460 logger.debug("Creating unbaked entry for taxonomy "
461 "term '%s:%s'." % (ti.taxonomy_name, ti.term))
462 record.collapseEntry(prev_entry)
463 else:
464 logger.debug("Taxonomy term '%s:%s' isn't used anymore." %
465 (ti.taxonomy_name, ti.term))
466
467 return len(jobs)
468
469 def _makeBakeJob(self, record, fac, tax_info=None):
470 # Get the previous (if any) and current entry for this page. 328 # Get the previous (if any) and current entry for this page.
471 pair = record.getPreviousAndCurrentEntries(fac.path, tax_info) 329 pair = record.getPreviousAndCurrentEntries(fac.path)
472 assert pair is not None 330 assert pair is not None
473 prev_entry, cur_entry = pair 331 prev_entry, cur_entry = pair
474 assert cur_entry is not None 332 assert cur_entry is not None
475 333
476 # Ignore if there were errors in the previous passes. 334 # Ignore if there were errors in the previous passes.
480 return None 338 return None
481 339
482 # Build the route metadata and find the appropriate route. 340 # Build the route metadata and find the appropriate route.
483 page = fac.buildPage() 341 page = fac.buildPage()
484 route_metadata = create_route_metadata(page) 342 route_metadata = create_route_metadata(page)
485 if tax_info is not None: 343 route = self.app.getSourceRoute(fac.source.name, route_metadata)
486 tax = self.app.getTaxonomy(tax_info.taxonomy_name)
487 route = self.app.getTaxonomyRoute(tax_info.taxonomy_name,
488 tax_info.source_name)
489
490 slugified_term = route.slugifyTaxonomyTerm(tax_info.term)
491 route_metadata[tax.term_name] = slugified_term
492 else:
493 route = self.app.getRoute(fac.source.name, route_metadata,
494 skip_taxonomies=True)
495 assert route is not None 344 assert route is not None
496 345
497 # Figure out if this page is overriden by another previously 346 # Figure out if this page is overriden by another previously
498 # baked page. This happens for example when the user has 347 # baked page. This happens for example when the user has
499 # made a page that has the same page/URL as a theme page. 348 # made a page that has the same page/URL as a theme page.
509 (fac.ref_spec, uri, override_entry.path)) 358 (fac.ref_spec, uri, override_entry.path))
510 logger.error(cur_entry.errors[-1]) 359 logger.error(cur_entry.errors[-1])
511 cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN 360 cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN
512 return None 361 return None
513 362
363 route_index = self.app.routes.index(route)
514 job = { 364 job = {
515 'type': JOB_BAKE, 365 'type': JOB_BAKE,
516 'job': { 366 'job': {
517 'factory_info': save_factory(fac), 367 'factory_info': save_factory(fac),
518 'taxonomy_info': tax_info, 368 'generator_name': None,
369 'generator_record_key': None,
370 'route_index': route_index,
519 'route_metadata': route_metadata, 371 'route_metadata': route_metadata,
520 'dirty_source_names': record.dirty_source_names 372 'dirty_source_names': record.dirty_source_names
521 } 373 }
522 } 374 }
523 return job 375 return job
567 batch_size=batch_size, 419 batch_size=batch_size,
568 worker_class=BakeWorker, 420 worker_class=BakeWorker,
569 initargs=(ctx,)) 421 initargs=(ctx,))
570 return pool 422 return pool
571 423
572
573 class _TaxonomyTermsInfo(object):
574 def __init__(self):
575 self.dirty_terms = set()
576 self.all_terms = set()
577
578 def __str__(self):
579 return 'dirty:%s, all:%s' % (self.dirty_terms, self.all_terms)
580
581 def __repr__(self):
582 return 'dirty:%s, all:%s' % (self.dirty_terms, self.all_terms)
583