Mercurial > piecrust2
comparison piecrust/sources/taxonomy.py @ 1136:5f97b5b59dfe
bake: Optimize cache handling for the baking process.
- Get rid of the 2-level pipeline runs... handle a single set of passes.
- Go back to load/render segments/layout passes for pages.
- Add descriptions of what each job batch does.
- Improve the taxonomy pipeline so it doesn't re-bake terms that don't need
to be re-baked.
- Simplify some of the code.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 23 Apr 2018 21:47:49 -0700 |
parents | 1857dbd4580f |
children | 9f3e702a8a69 |
comparison
equal
deleted
inserted
replaced
1135:6350ee084273 | 1136:5f97b5b59dfe |
---|---|
5 from piecrust.configuration import ConfigurationError | 5 from piecrust.configuration import ConfigurationError |
6 from piecrust.data.filters import ( | 6 from piecrust.data.filters import ( |
7 PaginationFilter, SettingFilterClause) | 7 PaginationFilter, SettingFilterClause) |
8 from piecrust.page import Page | 8 from piecrust.page import Page |
9 from piecrust.pipelines._pagebaker import PageBaker | 9 from piecrust.pipelines._pagebaker import PageBaker |
10 from piecrust.pipelines._pagerecords import ( | 10 from piecrust.pipelines._pagerecords import PagePipelineRecordEntry |
11 PagePipelineRecordEntry, | |
12 add_page_job_result, merge_job_result_into_record_entry) | |
13 from piecrust.pipelines.base import ( | 11 from piecrust.pipelines.base import ( |
14 ContentPipeline, get_record_name_for_source, | 12 ContentPipeline, get_record_name_for_source, create_job) |
15 create_job, content_item_from_job) | |
16 from piecrust.pipelines.records import RecordHistory | |
17 from piecrust.routing import RouteParameter | 13 from piecrust.routing import RouteParameter |
18 from piecrust.sources.base import ContentItem | 14 from piecrust.sources.base import ContentItem |
19 from piecrust.sources.generator import GeneratorSourceBase | 15 from piecrust.sources.generator import GeneratorSourceBase |
20 | 16 |
21 | 17 |
305 | 301 |
306 entry = rec_fac(record_entry_spec) | 302 entry = rec_fac(record_entry_spec) |
307 current_record.addEntry(entry) | 303 current_record.addEntry(entry) |
308 | 304 |
309 if len(jobs) > 0: | 305 if len(jobs) > 0: |
310 return jobs | 306 return jobs, "taxonomize" |
311 return None | 307 return None, None |
312 | 308 |
313 def run(self, job, ctx, result): | 309 def run(self, job, ctx, result): |
314 term = job['term'] | 310 term = job['term'] |
315 content_item = ContentItem('_index', | 311 content_item = ContentItem('_index', |
316 {'term': term, | 312 {'term': term, |
322 logger.debug("Rendering '%s' page: %s" % | 318 logger.debug("Rendering '%s' page: %s" % |
323 (self.taxonomy.name, page.source_metadata['term'])) | 319 (self.taxonomy.name, page.source_metadata['term'])) |
324 prev_entry = ctx.previous_entry | 320 prev_entry = ctx.previous_entry |
325 rdr_subs = self._pagebaker.bake(page, prev_entry) | 321 rdr_subs = self._pagebaker.bake(page, prev_entry) |
326 | 322 |
327 add_page_job_result(result) | |
328 result['subs'] = rdr_subs | 323 result['subs'] = rdr_subs |
329 result['term'] = page.source_metadata['term'] | 324 result['term'] = page.source_metadata['term'] |
330 | 325 |
331 def handleJobResult(self, result, ctx): | 326 def handleJobResult(self, result, ctx): |
332 existing = ctx.record_entry | 327 existing = ctx.record_entry |
333 merge_job_result_into_record_entry(existing, result) | 328 existing.subs = result['subs'] |
334 existing.term = result['term'] | 329 existing.term = result['term'] |
335 | 330 |
336 def postJobRun(self, ctx): | 331 def postJobRun(self, ctx): |
337 # We create bake entries for all the terms that were *not* dirty. | 332 # We create bake entries for all the terms that were *not* dirty. |
338 # This is because otherwise, on the next incremental bake, we wouldn't | 333 # This is because otherwise, on the next incremental bake, we wouldn't |
360 class _TaxonomyTermsAnalyzer(object): | 355 class _TaxonomyTermsAnalyzer(object): |
361 def __init__(self, pipeline, record_histories): | 356 def __init__(self, pipeline, record_histories): |
362 self.pipeline = pipeline | 357 self.pipeline = pipeline |
363 self.record_histories = record_histories | 358 self.record_histories = record_histories |
364 self._all_terms = {} | 359 self._all_terms = {} |
365 self._single_dirty_slugified_terms = set() | |
366 self._all_dirty_slugified_terms = None | 360 self._all_dirty_slugified_terms = None |
367 | 361 |
368 @property | 362 @property |
369 def dirty_slugified_terms(self): | 363 def dirty_slugified_terms(self): |
370 """ Returns the slugified terms that have been 'dirtied' during | 364 """ Returns the slugified terms that have been 'dirtied' during |
379 return term in self._all_terms | 373 return term in self._all_terms |
380 | 374 |
381 def analyze(self): | 375 def analyze(self): |
382 # Build the list of terms for our taxonomy, and figure out which ones | 376 # Build the list of terms for our taxonomy, and figure out which ones |
383 # are 'dirty' for the current bake. | 377 # are 'dirty' for the current bake. |
384 # | |
385 # Remember all terms used. | |
386 source = self.pipeline.inner_source | 378 source = self.pipeline.inner_source |
387 taxonomy = self.pipeline.taxonomy | 379 taxonomy = self.pipeline.taxonomy |
388 slugifier = self.pipeline.slugifier | 380 slugifier = self.pipeline.slugifier |
389 | 381 |
382 tax_is_mult = taxonomy.is_multiple | |
383 tax_setting_name = taxonomy.setting_name | |
384 | |
385 # First, go over all of our source's pages seen during this bake. | |
386 # Gather all the taxonomy terms they have, and also keep track of | |
387 # the ones used by the pages that were actually rendered (instead of | |
388 # those that were up-to-date and skipped). | |
389 single_dirty_slugified_terms = set() | |
390 current_records = self.record_histories.current | |
390 record_name = get_record_name_for_source(source) | 391 record_name = get_record_name_for_source(source) |
391 current_records = self.record_histories.current | |
392 cur_rec = current_records.getRecord(record_name) | 392 cur_rec = current_records.getRecord(record_name) |
393 for cur_entry in cur_rec.getEntries(): | 393 for cur_entry in cur_rec.getEntries(): |
394 if not cur_entry.was_overriden: | 394 if cur_entry.hasFlag(PagePipelineRecordEntry.FLAG_OVERRIDEN): |
395 cur_terms = cur_entry.config.get(taxonomy.setting_name) | 395 continue |
396 if cur_terms: | 396 |
397 if not taxonomy.is_multiple: | 397 cur_terms = cur_entry.config.get(tax_setting_name) |
398 self._addTerm( | 398 if not cur_terms: |
399 slugifier, cur_entry.item_spec, cur_terms) | 399 continue |
400 else: | 400 |
401 self._addTerms( | 401 if not tax_is_mult: |
402 slugifier, cur_entry.item_spec, cur_terms) | 402 self._addTerm( |
403 | 403 slugifier, cur_entry.item_spec, cur_terms) |
404 # Re-bake all taxonomy terms that include new or changed pages, by | 404 else: |
405 # marking them as 'dirty'. | 405 self._addTerms( |
406 history = self.record_histories.getHistory(record_name).copy() | 406 slugifier, cur_entry.item_spec, cur_terms) |
407 history.build() | 407 |
408 for prev_entry, cur_entry in history.diffs: | 408 if cur_entry.hasFlag( |
409 entries = [cur_entry] | 409 PagePipelineRecordEntry.FLAG_SEGMENTS_RENDERED): |
410 if prev_entry: | 410 if not tax_is_mult: |
411 entries.append(prev_entry) | 411 single_dirty_slugified_terms.add( |
412 | 412 slugifier.slugify(cur_terms)) |
413 for e in entries: | 413 else: |
414 if e and e.was_any_sub_baked: | 414 single_dirty_slugified_terms.update( |
415 entry_terms = e.config.get(taxonomy.setting_name) | 415 (slugifier.slugify(t) |
416 if entry_terms: | 416 for t in cur_terms)) |
417 if not taxonomy.is_multiple: | |
418 self._single_dirty_slugified_terms.add( | |
419 slugifier.slugify(entry_terms)) | |
420 else: | |
421 self._single_dirty_slugified_terms.update( | |
422 (slugifier.slugify(t) | |
423 for t in entry_terms)) | |
424 | 417 |
425 self._all_dirty_slugified_terms = list( | 418 self._all_dirty_slugified_terms = list( |
426 self._single_dirty_slugified_terms) | 419 single_dirty_slugified_terms) |
427 logger.debug("Gathered %d dirty taxonomy terms", | 420 logger.debug("Gathered %d dirty taxonomy terms", |
428 len(self._all_dirty_slugified_terms)) | 421 len(self._all_dirty_slugified_terms)) |
429 | 422 |
430 # Re-bake the combination pages for terms that are 'dirty'. | 423 # Re-bake the combination pages for terms that are 'dirty'. |
431 # We make all terms into tuple, even those that are not actual | 424 # We make all terms into tuple, even those that are not actual |
436 # wherever combinations were used, so they're coming from the | 429 # wherever combinations were used, so they're coming from the |
437 # `onRouteFunctionUsed` method. And because combinations can be used | 430 # `onRouteFunctionUsed` method. And because combinations can be used |
438 # by any page in the website (anywhere someone can ask for an URL | 431 # by any page in the website (anywhere someone can ask for an URL |
439 # to the combination page), it means we check all the records, not | 432 # to the combination page), it means we check all the records, not |
440 # just the record for our source. | 433 # just the record for our source. |
441 if taxonomy.is_multiple: | 434 if tax_is_mult: |
442 known_combinations = set() | 435 known_combinations = set() |
443 for rec in current_records.records: | 436 for rec in current_records.records: |
444 # Cheap way to test if a record contains entries that | 437 # Cheap way to test if a record contains entries that |
445 # are sub-types of a page entry: test the first one. | 438 # are sub-types of a page entry: test the first one. |
446 first_entry = next(iter(rec.getEntries()), None) | 439 first_entry = next(iter(rec.getEntries()), None) |
454 if len(terms) > 1: | 447 if len(terms) > 1: |
455 known_combinations.add(terms) | 448 known_combinations.add(terms) |
456 | 449 |
457 dcc = 0 | 450 dcc = 0 |
458 for terms in known_combinations: | 451 for terms in known_combinations: |
459 if not self._single_dirty_slugified_terms.isdisjoint( | 452 if not single_dirty_slugified_terms.isdisjoint( |
460 set(terms)): | 453 set(terms)): |
461 self._all_dirty_slugified_terms.append( | 454 self._all_dirty_slugified_terms.append( |
462 taxonomy.separator.join(terms)) | 455 taxonomy.separator.join(terms)) |
463 dcc += 1 | 456 dcc += 1 |
464 logger.debug("Gathered %d term combinations, with %d dirty." % | 457 logger.debug("Gathered %d term combinations, with %d dirty." % |