Mercurial > piecrust2
comparison piecrust/baking/baker.py @ 711:ab5c6a8ae90a
bake: Replace hard-coded taxonomy support with "generator" system.
* Taxonomies are now implemented one or more `TaxonomyGenerator`s.
* A `BlogArchivesGenerator` stub is there but non-functional.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Thu, 26 May 2016 19:52:47 -0700 |
parents | 5f552aedd918 |
children | 234d0c7c02cf |
comparison
equal
deleted
inserted
replaced
710:e85f29b28b84 | 711:ab5c6a8ae90a |
---|---|
1 import time | 1 import time |
2 import os.path | 2 import os.path |
3 import hashlib | 3 import hashlib |
4 import logging | 4 import logging |
5 from piecrust.baking.records import ( | 5 from piecrust.baking.records import ( |
6 BakeRecordEntry, TransitionalBakeRecord, TaxonomyInfo) | 6 BakeRecordEntry, TransitionalBakeRecord) |
7 from piecrust.baking.worker import ( | 7 from piecrust.baking.worker import ( |
8 save_factory, | 8 save_factory, |
9 JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE) | 9 JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE) |
10 from piecrust.chefutil import ( | 10 from piecrust.chefutil import ( |
11 format_timed_scope, format_timed) | 11 format_timed_scope, format_timed) |
12 from piecrust.environment import ExecutionStats | 12 from piecrust.environment import ExecutionStats |
13 from piecrust.generation.base import PageGeneratorBakeContext | |
13 from piecrust.routing import create_route_metadata | 14 from piecrust.routing import create_route_metadata |
14 from piecrust.sources.base import ( | 15 from piecrust.sources.base import ( |
15 REALM_NAMES, REALM_USER, REALM_THEME) | 16 REALM_NAMES, REALM_USER, REALM_THEME) |
16 | 17 |
17 | 18 |
27 self.out_dir = out_dir | 28 self.out_dir = out_dir |
28 self.force = force | 29 self.force = force |
29 self.applied_config_variant = applied_config_variant | 30 self.applied_config_variant = applied_config_variant |
30 self.applied_config_values = applied_config_values | 31 self.applied_config_values = applied_config_values |
31 | 32 |
32 # Remember what taxonomy pages we should skip | 33 # Remember what generator pages we should skip. |
33 # (we'll bake them repeatedly later with each taxonomy term) | 34 self.generator_pages = [] |
34 self.taxonomy_pages = [] | 35 logger.debug("Gathering generator page paths:") |
35 logger.debug("Gathering taxonomy page paths:") | 36 for gen in self.app.generators: |
36 for tax in self.app.taxonomies: | 37 for path in gen.page_ref.possible_paths: |
37 for src in self.app.sources: | 38 self.generator_pages.append(path) |
38 tax_page_ref = tax.getPageRef(src) | 39 logger.debug(" - %s" % path) |
39 for path in tax_page_ref.possible_paths: | |
40 self.taxonomy_pages.append(path) | |
41 logger.debug(" - %s" % path) | |
42 | 40 |
43 # Register some timers. | 41 # Register some timers. |
44 self.app.env.registerTimer('LoadJob', raise_if_registered=False) | 42 self.app.env.registerTimer('LoadJob', raise_if_registered=False) |
45 self.app.env.registerTimer('RenderFirstSubJob', | 43 self.app.env.registerTimer('RenderFirstSubJob', |
46 raise_if_registered=False) | 44 raise_if_registered=False) |
99 for realm in realm_list: | 97 for realm in realm_list: |
100 srclist = sources_by_realm.get(realm) | 98 srclist = sources_by_realm.get(realm) |
101 if srclist is not None: | 99 if srclist is not None: |
102 self._bakeRealm(record, pool, realm, srclist) | 100 self._bakeRealm(record, pool, realm, srclist) |
103 | 101 |
104 # Bake taxonomies. | 102 # Call all the page generators. |
105 self._bakeTaxonomies(record, pool) | 103 self._bakePageGenerators(record, pool) |
106 | 104 |
107 # All done with the workers. Close the pool and get reports. | 105 # All done with the workers. Close the pool and get reports. |
108 reports = pool.close() | 106 reports = pool.close() |
109 total_stats = ExecutionStats() | 107 total_stats = ExecutionStats() |
110 record.current.stats['_Total'] = total_stats | 108 record.current.stats['_Total'] = total_stats |
195 | 193 |
196 all_factories = [] | 194 all_factories = [] |
197 for source in srclist: | 195 for source in srclist: |
198 factories = source.getPageFactories() | 196 factories = source.getPageFactories() |
199 all_factories += [f for f in factories | 197 all_factories += [f for f in factories |
200 if f.path not in self.taxonomy_pages] | 198 if f.path not in self.generator_pages] |
201 | 199 |
202 self._loadRealmPages(record, pool, all_factories) | 200 self._loadRealmPages(record, pool, all_factories) |
203 self._renderRealmPages(record, pool, all_factories) | 201 self._renderRealmPages(record, pool, all_factories) |
204 self._bakeRealmPages(record, pool, realm, all_factories) | 202 self._bakeRealmPages(record, pool, realm, all_factories) |
205 finally: | 203 finally: |
270 record_entry.errors.append( | 268 record_entry.errors.append( |
271 "Can't get source for page: %s" % fac.ref_spec) | 269 "Can't get source for page: %s" % fac.ref_spec) |
272 logger.error(record_entry.errors[-1]) | 270 logger.error(record_entry.errors[-1]) |
273 continue | 271 continue |
274 | 272 |
275 route = self.app.getRoute(fac.source.name, fac.metadata, | 273 route = self.app.getSourceRoute(fac.source.name, fac.metadata) |
276 skip_taxonomies=True) | |
277 if route is None: | 274 if route is None: |
278 record_entry.errors.append( | 275 record_entry.errors.append( |
279 "Can't get route for page: %s" % fac.ref_spec) | 276 "Can't get route for page: %s" % fac.ref_spec) |
280 logger.error(record_entry.errors[-1]) | 277 logger.error(record_entry.errors[-1]) |
281 continue | 278 continue |
282 | 279 |
283 # All good, queue the job. | 280 # All good, queue the job. |
281 route_index = self.app.routes.index(route) | |
284 job = { | 282 job = { |
285 'type': JOB_RENDER_FIRST, | 283 'type': JOB_RENDER_FIRST, |
286 'job': save_factory(fac)} | 284 'job': { |
285 'factory_info': save_factory(fac), | |
286 'route_index': route_index | |
287 } | |
288 } | |
287 jobs.append(job) | 289 jobs.append(job) |
288 | 290 |
289 ar = pool.queueJobs(jobs, handler=_handler) | 291 ar = pool.queueJobs(jobs, handler=_handler) |
290 ar.wait() | 292 ar.wait() |
291 | 293 |
292 def _bakeRealmPages(self, record, pool, realm, factories): | 294 def _bakeRealmPages(self, record, pool, realm, factories): |
293 def _handler(res): | 295 def _handler(res): |
294 entry = record.getCurrentEntry(res['path'], res['taxonomy_info']) | 296 entry = record.getCurrentEntry(res['path']) |
295 entry.subs = res['sub_entries'] | 297 entry.subs = res['sub_entries'] |
296 if res['errors']: | 298 if res['errors']: |
297 entry.errors += res['errors'] | 299 entry.errors += res['errors'] |
298 self._logErrors(res['path'], res['errors']) | 300 self._logErrors(res['path'], res['errors']) |
299 if entry.has_any_error: | 301 if entry.has_any_error: |
315 jobs.append(job) | 317 jobs.append(job) |
316 | 318 |
317 ar = pool.queueJobs(jobs, handler=_handler) | 319 ar = pool.queueJobs(jobs, handler=_handler) |
318 ar.wait() | 320 ar.wait() |
319 | 321 |
320 def _bakeTaxonomies(self, record, pool): | 322 def _bakePageGenerators(self, record, pool): |
321 logger.debug("Baking taxonomy pages...") | 323 for gen in self.app.generators: |
322 with format_timed_scope(logger, 'built taxonomy buckets', | 324 ctx = PageGeneratorBakeContext(self.app, record, pool, gen) |
323 level=logging.DEBUG, colored=False): | 325 gen.bake(ctx) |
324 buckets = self._buildTaxonomyBuckets(record) | 326 |
325 | 327 def _makeBakeJob(self, record, fac): |
326 start_time = time.perf_counter() | |
327 page_count = self._bakeTaxonomyBuckets(record, pool, buckets) | |
328 logger.info(format_timed(start_time, | |
329 "baked %d taxonomy pages." % page_count)) | |
330 | |
331 def _buildTaxonomyBuckets(self, record): | |
332 # Let's see all the taxonomy terms for which we must bake a | |
333 # listing page... first, pre-populate our big map of used terms. | |
334 # For each source name, we have a list of taxonomies, and for each | |
335 # taxonomies, a list of terms, some being 'dirty', some used last | |
336 # time, etc. | |
337 buckets = {} | |
338 tax_names = [t.name for t in self.app.taxonomies] | |
339 source_names = [s.name for s in self.app.sources] | |
340 for sn in source_names: | |
341 source_taxonomies = {} | |
342 buckets[sn] = source_taxonomies | |
343 for tn in tax_names: | |
344 source_taxonomies[tn] = _TaxonomyTermsInfo() | |
345 | |
346 # Now see which ones are 'dirty' based on our bake record. | |
347 logger.debug("Gathering dirty taxonomy terms") | |
348 for prev_entry, cur_entry in record.transitions.values(): | |
349 # Re-bake all taxonomy pages that include new or changed | |
350 # pages. | |
351 if cur_entry and cur_entry.was_any_sub_baked: | |
352 entries = [cur_entry] | |
353 if prev_entry: | |
354 entries.append(prev_entry) | |
355 | |
356 for tax in self.app.taxonomies: | |
357 changed_terms = set() | |
358 for e in entries: | |
359 terms = e.config.get(tax.setting_name) | |
360 if terms: | |
361 if not tax.is_multiple: | |
362 terms = [terms] | |
363 changed_terms |= set(terms) | |
364 | |
365 if len(changed_terms) > 0: | |
366 tt_info = buckets[cur_entry.source_name][tax.name] | |
367 tt_info.dirty_terms |= changed_terms | |
368 | |
369 # Remember all terms used. | |
370 for tax in self.app.taxonomies: | |
371 if cur_entry and not cur_entry.was_overriden: | |
372 cur_terms = cur_entry.config.get(tax.setting_name) | |
373 if cur_terms: | |
374 if not tax.is_multiple: | |
375 cur_terms = [cur_terms] | |
376 tt_info = buckets[cur_entry.source_name][tax.name] | |
377 tt_info.all_terms |= set(cur_terms) | |
378 | |
379 # Re-bake the combination pages for terms that are 'dirty'. | |
380 known_combinations = set() | |
381 logger.debug("Gathering dirty term combinations") | |
382 for prev_entry, cur_entry in record.transitions.values(): | |
383 if not cur_entry: | |
384 continue | |
385 used_taxonomy_terms = cur_entry.getAllUsedTaxonomyTerms() | |
386 for sn, tn, terms in used_taxonomy_terms: | |
387 if isinstance(terms, tuple): | |
388 known_combinations.add((sn, tn, terms)) | |
389 for sn, tn, terms in known_combinations: | |
390 tt_info = buckets[sn][tn] | |
391 tt_info.all_terms.add(terms) | |
392 if not tt_info.dirty_terms.isdisjoint(set(terms)): | |
393 tt_info.dirty_terms.add(terms) | |
394 | |
395 return buckets | |
396 | |
397 def _bakeTaxonomyBuckets(self, record, pool, buckets): | |
398 def _handler(res): | |
399 entry = record.getCurrentEntry(res['path'], res['taxonomy_info']) | |
400 entry.subs = res['sub_entries'] | |
401 if res['errors']: | |
402 entry.errors += res['errors'] | |
403 if entry.has_any_error: | |
404 record.current.success = False | |
405 | |
406 # Start baking those terms. | |
407 jobs = [] | |
408 for source_name, source_taxonomies in buckets.items(): | |
409 for tax_name, tt_info in source_taxonomies.items(): | |
410 terms = tt_info.dirty_terms | |
411 if len(terms) == 0: | |
412 continue | |
413 | |
414 logger.debug( | |
415 "Baking '%s' for source '%s': %s" % | |
416 (tax_name, source_name, terms)) | |
417 tax = self.app.getTaxonomy(tax_name) | |
418 source = self.app.getSource(source_name) | |
419 tax_page_ref = tax.getPageRef(source) | |
420 if not tax_page_ref.exists: | |
421 logger.debug( | |
422 "No taxonomy page found at '%s', skipping." % | |
423 tax.page_ref) | |
424 continue | |
425 | |
426 logger.debug( | |
427 "Using taxonomy page: %s:%s" % | |
428 (tax_page_ref.source_name, tax_page_ref.rel_path)) | |
429 fac = tax_page_ref.getFactory() | |
430 | |
431 for term in terms: | |
432 logger.debug( | |
433 "Queuing: %s [%s=%s]" % | |
434 (fac.ref_spec, tax_name, term)) | |
435 tax_info = TaxonomyInfo(tax_name, source_name, term) | |
436 | |
437 cur_entry = BakeRecordEntry( | |
438 fac.source.name, fac.path, tax_info) | |
439 record.addEntry(cur_entry) | |
440 | |
441 job = self._makeBakeJob(record, fac, tax_info) | |
442 if job is not None: | |
443 jobs.append(job) | |
444 | |
445 ar = pool.queueJobs(jobs, handler=_handler) | |
446 ar.wait() | |
447 | |
448 # Now we create bake entries for all the terms that were *not* dirty. | |
449 # This is because otherwise, on the next incremental bake, we wouldn't | |
450 # find any entry for those things, and figure that we need to delete | |
451 # their outputs. | |
452 for prev_entry, cur_entry in record.transitions.values(): | |
453 # Only consider taxonomy-related entries that don't have any | |
454 # current version. | |
455 if (prev_entry and prev_entry.taxonomy_info and | |
456 not cur_entry): | |
457 ti = prev_entry.taxonomy_info | |
458 tt_info = buckets[ti.source_name][ti.taxonomy_name] | |
459 if ti.term in tt_info.all_terms: | |
460 logger.debug("Creating unbaked entry for taxonomy " | |
461 "term '%s:%s'." % (ti.taxonomy_name, ti.term)) | |
462 record.collapseEntry(prev_entry) | |
463 else: | |
464 logger.debug("Taxonomy term '%s:%s' isn't used anymore." % | |
465 (ti.taxonomy_name, ti.term)) | |
466 | |
467 return len(jobs) | |
468 | |
469 def _makeBakeJob(self, record, fac, tax_info=None): | |
470 # Get the previous (if any) and current entry for this page. | 328 # Get the previous (if any) and current entry for this page. |
471 pair = record.getPreviousAndCurrentEntries(fac.path, tax_info) | 329 pair = record.getPreviousAndCurrentEntries(fac.path) |
472 assert pair is not None | 330 assert pair is not None |
473 prev_entry, cur_entry = pair | 331 prev_entry, cur_entry = pair |
474 assert cur_entry is not None | 332 assert cur_entry is not None |
475 | 333 |
476 # Ignore if there were errors in the previous passes. | 334 # Ignore if there were errors in the previous passes. |
480 return None | 338 return None |
481 | 339 |
482 # Build the route metadata and find the appropriate route. | 340 # Build the route metadata and find the appropriate route. |
483 page = fac.buildPage() | 341 page = fac.buildPage() |
484 route_metadata = create_route_metadata(page) | 342 route_metadata = create_route_metadata(page) |
485 if tax_info is not None: | 343 route = self.app.getSourceRoute(fac.source.name, route_metadata) |
486 tax = self.app.getTaxonomy(tax_info.taxonomy_name) | |
487 route = self.app.getTaxonomyRoute(tax_info.taxonomy_name, | |
488 tax_info.source_name) | |
489 | |
490 slugified_term = route.slugifyTaxonomyTerm(tax_info.term) | |
491 route_metadata[tax.term_name] = slugified_term | |
492 else: | |
493 route = self.app.getRoute(fac.source.name, route_metadata, | |
494 skip_taxonomies=True) | |
495 assert route is not None | 344 assert route is not None |
496 | 345 |
497 # Figure out if this page is overriden by another previously | 346 # Figure out if this page is overriden by another previously |
498 # baked page. This happens for example when the user has | 347 # baked page. This happens for example when the user has |
499 # made a page that has the same page/URL as a theme page. | 348 # made a page that has the same page/URL as a theme page. |
509 (fac.ref_spec, uri, override_entry.path)) | 358 (fac.ref_spec, uri, override_entry.path)) |
510 logger.error(cur_entry.errors[-1]) | 359 logger.error(cur_entry.errors[-1]) |
511 cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN | 360 cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN |
512 return None | 361 return None |
513 | 362 |
363 route_index = self.app.routes.index(route) | |
514 job = { | 364 job = { |
515 'type': JOB_BAKE, | 365 'type': JOB_BAKE, |
516 'job': { | 366 'job': { |
517 'factory_info': save_factory(fac), | 367 'factory_info': save_factory(fac), |
518 'taxonomy_info': tax_info, | 368 'generator_name': None, |
369 'generator_record_key': None, | |
370 'route_index': route_index, | |
519 'route_metadata': route_metadata, | 371 'route_metadata': route_metadata, |
520 'dirty_source_names': record.dirty_source_names | 372 'dirty_source_names': record.dirty_source_names |
521 } | 373 } |
522 } | 374 } |
523 return job | 375 return job |
567 batch_size=batch_size, | 419 batch_size=batch_size, |
568 worker_class=BakeWorker, | 420 worker_class=BakeWorker, |
569 initargs=(ctx,)) | 421 initargs=(ctx,)) |
570 return pool | 422 return pool |
571 | 423 |
572 | |
573 class _TaxonomyTermsInfo(object): | |
574 def __init__(self): | |
575 self.dirty_terms = set() | |
576 self.all_terms = set() | |
577 | |
578 def __str__(self): | |
579 return 'dirty:%s, all:%s' % (self.dirty_terms, self.all_terms) | |
580 | |
581 def __repr__(self): | |
582 return 'dirty:%s, all:%s' % (self.dirty_terms, self.all_terms) | |
583 |