Mercurial > piecrust2
comparison piecrust/baking/baker.py @ 91:e88e330eb8dc
Improvements to incremental baking and cache invalidating.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Fri, 05 Sep 2014 00:42:13 -0700 |
parents | 3471ffa059b2 |
children | 0445a2232de7 |
comparison
equal
deleted
inserted
replaced
90:e293f08d954e | 91:e88e330eb8dc |
---|---|
4 import shutil | 4 import shutil |
5 import hashlib | 5 import hashlib |
6 import logging | 6 import logging |
7 import threading | 7 import threading |
8 import urllib.request, urllib.error, urllib.parse | 8 import urllib.request, urllib.error, urllib.parse |
9 from piecrust.baking.records import TransitionalBakeRecord, BakeRecordPageEntry | 9 from piecrust.baking.records import (TransitionalBakeRecord, |
10 BakeRecordPageEntry, | |
11 FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED) | |
10 from piecrust.chefutil import format_timed, log_friendly_exception | 12 from piecrust.chefutil import format_timed, log_friendly_exception |
11 from piecrust.data.filters import (PaginationFilter, HasFilterClause, | 13 from piecrust.data.filters import (PaginationFilter, HasFilterClause, |
12 IsFilterClause, AndBooleanClause) | 14 IsFilterClause, AndBooleanClause) |
13 from piecrust.processing.base import ProcessorPipeline | 15 from piecrust.processing.base import ProcessorPipeline |
14 from piecrust.rendering import PageRenderingContext, render_page | 16 from piecrust.rendering import PageRenderingContext, render_page |
74 else: | 76 else: |
75 bake_path.append(decoded_uri + '.html') | 77 bake_path.append(decoded_uri + '.html') |
76 | 78 |
77 return os.path.normpath(os.path.join(*bake_path)) | 79 return os.path.normpath(os.path.join(*bake_path)) |
78 | 80 |
79 def bake(self, factory, route, taxonomy_name=None, taxonomy_term=None): | 81 def bake(self, factory, route, record_entry, |
82 taxonomy_name=None, taxonomy_term=None): | |
80 pagination_filter = None | 83 pagination_filter = None |
81 custom_data = None | 84 custom_data = None |
82 if taxonomy_name and taxonomy_term: | 85 if taxonomy_name and taxonomy_term: |
83 # Must bake a taxonomy listing page... we'll have to add a | 86 # Must bake a taxonomy listing page... we'll have to add a |
84 # pagination filter for only get matching posts, and the output | 87 # pagination filter for only get matching posts, and the output |
115 "'%s:%s'." % (factory.ref_spec, uri, | 118 "'%s:%s'." % (factory.ref_spec, uri, |
116 override.source_name, override.rel_path)) | 119 override.source_name, override.rel_path)) |
117 logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" % | 120 logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" % |
118 (factory.ref_spec, uri, override.source_name, | 121 (factory.ref_spec, uri, override.source_name, |
119 override.rel_path)) | 122 override.rel_path)) |
120 entry = BakeRecordPageEntry() | 123 record_entry.flags |= FLAG_OVERRIDEN |
121 entry.path = factory.path | 124 return |
122 entry.rel_path = factory.rel_path | |
123 entry.source_name = factory.source.name | |
124 entry.was_overriden = True | |
125 | |
126 if self.record: | |
127 self.record.addEntry(entry) | |
128 | |
129 return entry | |
130 | 125 |
131 cur_sub = 1 | 126 cur_sub = 1 |
132 has_more_subs = True | 127 has_more_subs = True |
128 force_this = self.force | |
133 page = factory.buildPage() | 129 page = factory.buildPage() |
134 cur_record_entry = BakeRecordPageEntry(page) | 130 record_entry.config = page.config.get().copy() |
135 cur_record_entry.taxonomy_name = taxonomy_name | |
136 cur_record_entry.taxonomy_term = taxonomy_term | |
137 prev_record_entry = self.record.getPreviousEntry( | 131 prev_record_entry = self.record.getPreviousEntry( |
138 factory.source.name, factory.rel_path, | 132 factory.source.name, factory.rel_path, |
139 taxonomy_name, taxonomy_term) | 133 taxonomy_name, taxonomy_term) |
140 | 134 |
141 logger.debug("Baking '%s'..." % uri) | 135 logger.debug("Baking '%s'..." % uri) |
136 | |
137 # If the current page is known to use pages from other sources, | |
138 # see if any of those got baked, or are going to be baked for some | |
139 # reason. If so, we need to bake this one too. | |
140 # (this happens for instance with the main page of a blog). | |
141 if prev_record_entry: | |
142 any_used_src_baked = False | |
143 used_src_names = list(prev_record_entry.used_source_names) | |
144 for src_name in used_src_names: | |
145 entries = self.record.getCurrentEntries(src_name) | |
146 for e in entries: | |
147 if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED: | |
148 any_used_src_baked = True | |
149 break | |
150 if any_used_src_baked: | |
151 break | |
152 if any_used_src_baked: | |
153 logger.debug("'%s' is known to use sources %s, at least one " | |
154 "of which got baked. Will force bake this page. " | |
155 % (uri, used_src_names)) | |
156 force_this = True | |
157 | |
142 while has_more_subs: | 158 while has_more_subs: |
143 sub_uri = self.getOutputUri(uri, cur_sub) | 159 sub_uri = self.getOutputUri(uri, cur_sub) |
144 out_path = self.getOutputPath(sub_uri) | 160 out_path = self.getOutputPath(sub_uri) |
145 | 161 |
146 # Check for up-to-date outputs. | 162 # Check for up-to-date outputs. |
147 do_bake = True | 163 do_bake = True |
148 if not self.force and prev_record_entry: | 164 if not force_this and prev_record_entry: |
149 try: | 165 try: |
150 in_path_time = page.path_mtime | 166 in_path_time = record_entry.path_mtime |
151 out_path_time = os.path.getmtime(out_path) | 167 out_path_time = os.path.getmtime(out_path) |
152 if out_path_time > in_path_time: | 168 if out_path_time > in_path_time and not any_used_src_baked: |
153 do_bake = False | 169 do_bake = False |
154 except OSError: | 170 except OSError: |
155 # File doesn't exist, we'll need to bake. | 171 # File doesn't exist, we'll need to bake. |
156 pass | 172 pass |
157 | 173 |
200 dest_ap = os.path.join(out_assets_dir, os.path.basename(ap)) | 216 dest_ap = os.path.join(out_assets_dir, os.path.basename(ap)) |
201 logger.debug(" %s -> %s" % (ap, dest_ap)) | 217 logger.debug(" %s -> %s" % (ap, dest_ap)) |
202 shutil.copy(ap, dest_ap) | 218 shutil.copy(ap, dest_ap) |
203 | 219 |
204 # Record what we did and figure out if we have more work. | 220 # Record what we did and figure out if we have more work. |
205 cur_record_entry.out_uris.append(sub_uri) | 221 record_entry.out_uris.append(sub_uri) |
206 cur_record_entry.out_paths.append(out_path) | 222 record_entry.out_paths.append(out_path) |
207 cur_record_entry.used_source_names |= ctx.used_source_names | 223 record_entry.used_source_names |= ctx.used_source_names |
208 cur_record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms | 224 record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms |
209 | 225 |
210 has_more_subs = False | 226 has_more_subs = False |
211 if ctx.used_pagination is not None: | 227 if ctx.used_pagination is not None: |
212 cur_record_entry.addUsedSource(ctx.used_pagination._source) | 228 record_entry.addUsedSource(ctx.used_pagination._source) |
213 if ctx.used_pagination.has_more: | 229 if ctx.used_pagination.has_more: |
214 cur_sub += 1 | 230 cur_sub += 1 |
215 has_more_subs = True | 231 has_more_subs = True |
216 | |
217 if self.record: | |
218 self.record.addEntry(cur_record_entry) | |
219 | |
220 return cur_record_entry | |
221 | 232 |
222 def _bakeSingle(self, page, sub_uri, num, out_path, | 233 def _bakeSingle(self, page, sub_uri, num, out_path, |
223 pagination_filter=None, custom_data=None): | 234 pagination_filter=None, custom_data=None): |
224 ctx = PageRenderingContext(page, sub_uri) | 235 ctx = PageRenderingContext(page, sub_uri) |
225 ctx.page_num = num | 236 ctx.page_num = num |
312 self._bakeAssets(record) | 323 self._bakeAssets(record) |
313 | 324 |
314 # Save the bake record. | 325 # Save the bake record. |
315 t = time.clock() | 326 t = time.clock() |
316 record.current.bake_time = time.time() | 327 record.current.bake_time = time.time() |
328 record.current.out_dir = self.out_dir | |
317 record.collapseRecords() | 329 record.collapseRecords() |
318 record.saveCurrent(record_cache.getCachePath(record_name)) | 330 record.saveCurrent(record_cache.getCachePath(record_name)) |
319 logger.debug(format_timed(t, 'saved bake record', colored=False)) | 331 logger.debug(format_timed(t, 'saved bake record', colored=False)) |
320 | 332 |
321 # All done. | 333 # All done. |
331 reason = "ordered to" | 343 reason = "ordered to" |
332 elif not self.app.config.get('__cache_valid'): | 344 elif not self.app.config.get('__cache_valid'): |
333 # The configuration file was changed, or we're running a new | 345 # The configuration file was changed, or we're running a new |
334 # version of the app. | 346 # version of the app. |
335 reason = "not valid anymore" | 347 reason = "not valid anymore" |
336 elif not record.previous.bake_time: | 348 elif (not record.previous.bake_time or |
349 not record.previous.hasLatestVersion()): | |
337 # We have no valid previous bake record. | 350 # We have no valid previous bake record. |
338 reason = "need bake record regeneration" | 351 reason = "need bake record regeneration" |
339 else: | 352 else: |
340 # Check if any template has changed since the last bake. Since | 353 # Check if any template has changed since the last bake. Since |
341 # there could be some advanced conditional logic going on, we'd | 354 # there could be some advanced conditional logic going on, we'd |
354 cache_dir = self.app.cache.getCacheDir('baker') | 367 cache_dir = self.app.cache.getCacheDir('baker') |
355 if os.path.isdir(cache_dir): | 368 if os.path.isdir(cache_dir): |
356 logger.debug("Cleaning baker cache: %s" % cache_dir) | 369 logger.debug("Cleaning baker cache: %s" % cache_dir) |
357 shutil.rmtree(cache_dir) | 370 shutil.rmtree(cache_dir) |
358 self.force = True | 371 self.force = True |
372 record.incremental_count = 0 | |
359 logger.info(format_timed(start_time, | 373 logger.info(format_timed(start_time, |
360 "cleaned cache (reason: %s)" % reason)) | 374 "cleaned cache (reason: %s)" % reason)) |
361 else: | 375 else: |
376 record.incremental_count += 1 | |
362 logger.debug(format_timed(start_time, "cache is assumed valid", | 377 logger.debug(format_timed(start_time, "cache is assumed valid", |
363 colored=False)) | 378 colored=False)) |
364 | 379 |
365 def _bakeRealm(self, record, realm, srclist): | 380 def _bakeRealm(self, record, realm, srclist): |
366 # Gather all page factories from the sources and queue them | 381 # Gather all page factories from the sources and queue them |
379 route = self.app.getRoute(source.name, fac.metadata) | 394 route = self.app.getRoute(source.name, fac.metadata) |
380 if route is None: | 395 if route is None: |
381 logger.error("Can't get route for page: %s" % fac.ref_spec) | 396 logger.error("Can't get route for page: %s" % fac.ref_spec) |
382 continue | 397 continue |
383 | 398 |
384 logger.debug("Queuing: %s" % fac.ref_spec) | 399 entry = BakeRecordPageEntry(fac) |
385 queue.addJob(BakeWorkerJob(fac, route)) | 400 record.addEntry(entry) |
401 queue.addJob(BakeWorkerJob(fac, route, entry)) | |
386 | 402 |
387 self._waitOnWorkerPool(pool, abort) | 403 self._waitOnWorkerPool(pool, abort) |
388 | 404 |
389 def _bakeTaxonomies(self, record): | 405 def _bakeTaxonomies(self, record): |
390 logger.debug("Baking taxonomies") | 406 logger.debug("Baking taxonomies") |
467 for term in terms: | 483 for term in terms: |
468 fac = PageFactory(tax_page_source, tax_page_rel_path, | 484 fac = PageFactory(tax_page_source, tax_page_rel_path, |
469 {tax.term_name: term}) | 485 {tax.term_name: term}) |
470 logger.debug("Queuing: %s [%s, %s]" % | 486 logger.debug("Queuing: %s [%s, %s]" % |
471 (fac.ref_spec, tax_name, term)) | 487 (fac.ref_spec, tax_name, term)) |
488 entry = BakeRecordPageEntry(fac, tax_name, term) | |
489 record.addEntry(entry) | |
472 queue.addJob( | 490 queue.addJob( |
473 BakeWorkerJob(fac, route, tax_name, term)) | 491 BakeWorkerJob(fac, route, entry, tax_name, term)) |
474 | 492 |
475 self._waitOnWorkerPool(pool, abort) | 493 self._waitOnWorkerPool(pool, abort) |
476 | 494 |
477 def _bakeAssets(self, record): | 495 def _bakeAssets(self, record): |
478 mounts = self.app.assets_dirs | 496 mounts = self.app.assets_dirs |
525 self._lock = threading.Lock() | 543 self._lock = threading.Lock() |
526 self._added_event = threading.Event() | 544 self._added_event = threading.Event() |
527 self._done_event = threading.Event() | 545 self._done_event = threading.Event() |
528 | 546 |
529 def addJob(self, job): | 547 def addJob(self, job): |
530 logger.debug("Adding job '%s:%s' to scheduler." % ( | 548 logger.debug("Queuing job '%s:%s'." % ( |
531 job.factory.source.name, job.factory.rel_path)) | 549 job.factory.source.name, job.factory.rel_path)) |
532 with self._lock: | 550 with self._lock: |
533 self.jobs.append(job) | 551 self.jobs.append(job) |
534 self._added_event.set() | 552 self._added_event.set() |
535 | 553 |
536 def onJobFinished(self, job): | 554 def onJobFinished(self, job): |
537 logger.debug("Removing job '%s:%s' from scheduler." % ( | 555 logger.debug("Removing job '%s:%s'." % ( |
538 job.factory.source.name, job.factory.rel_path)) | 556 job.factory.source.name, job.factory.rel_path)) |
539 with self._lock: | 557 with self._lock: |
540 self._active_jobs.remove(job) | 558 self._active_jobs.remove(job) |
541 self._done_event.set() | 559 self._done_event.set() |
542 | 560 |
543 def getNextJob(self, timeout=None): | 561 def getNextJob(self, wait_timeout=None, empty_timeout=None): |
544 self._added_event.clear() | 562 self._added_event.clear() |
545 self._done_event.clear() | 563 self._done_event.clear() |
546 job = self._doGetNextJob() | 564 job = self._doGetNextJob() |
547 while job in (self._EMPTY, self._WAIT): | 565 while job in (self._EMPTY, self._WAIT): |
548 if timeout is None: | |
549 return None | |
550 if job == self._EMPTY: | 566 if job == self._EMPTY: |
567 if empty_timeout is None: | |
568 return None | |
551 logger.debug("Waiting for a new job to be added...") | 569 logger.debug("Waiting for a new job to be added...") |
552 res = self._added_event.wait(timeout) | 570 res = self._added_event.wait(empty_timeout) |
553 elif job == self._WAIT: | 571 elif job == self._WAIT: |
572 if wait_timeout is None: | |
573 return None | |
554 logger.debug("Waiting for a job to be finished...") | 574 logger.debug("Waiting for a job to be finished...") |
555 res = self._done_event.wait(timeout) | 575 res = self._done_event.wait(wait_timeout) |
556 if not res: | 576 if not res: |
557 logger.debug("Timed-out. No job found.") | 577 logger.debug("Timed-out. No job found.") |
558 return None | 578 return None |
559 job = self._doGetNextJob() | 579 job = self._doGetNextJob() |
560 return job | 580 return job |
571 job.factory.source.name, job.factory.rel_path)) | 591 job.factory.source.name, job.factory.rel_path)) |
572 self.jobs.append(job) | 592 self.jobs.append(job) |
573 job = self.jobs.pop(0) | 593 job = self.jobs.pop(0) |
574 if job == first_job: | 594 if job == first_job: |
575 # None of the jobs are ready... we need to wait. | 595 # None of the jobs are ready... we need to wait. |
596 self.jobs.append(job) | |
576 return self._WAIT | 597 return self._WAIT |
577 | 598 |
578 logger.debug("Job '%s:%s' is ready to go, moving to active " | 599 logger.debug("Job '%s:%s' is ready to go, moving to active " |
579 "queue." % (job.factory.source.name, job.factory.rel_path)) | 600 "queue." % (job.factory.source.name, job.factory.rel_path)) |
580 self._active_jobs.append(job) | 601 self._active_jobs.append(job) |
584 e = self.record.getPreviousEntry(job.factory.source.name, | 605 e = self.record.getPreviousEntry(job.factory.source.name, |
585 job.factory.rel_path) | 606 job.factory.rel_path) |
586 if not e: | 607 if not e: |
587 return True | 608 return True |
588 for sn in e.used_source_names: | 609 for sn in e.used_source_names: |
610 if sn == job.factory.source.name: | |
611 continue | |
589 if any(filter(lambda j: j.factory.source.name == sn, self.jobs)): | 612 if any(filter(lambda j: j.factory.source.name == sn, self.jobs)): |
590 return False | 613 return False |
591 if any(filter(lambda j: j.factory.source.name == sn, | 614 if any(filter(lambda j: j.factory.source.name == sn, |
592 self._active_jobs)): | 615 self._active_jobs)): |
593 return False | 616 return False |
604 self.work_queue = work_queue | 627 self.work_queue = work_queue |
605 self.abort_event = abort_event | 628 self.abort_event = abort_event |
606 | 629 |
607 | 630 |
608 class BakeWorkerJob(object): | 631 class BakeWorkerJob(object): |
609 def __init__(self, factory, route, taxonomy_name=None, taxonomy_term=None): | 632 def __init__(self, factory, route, record_entry, |
633 taxonomy_name=None, taxonomy_term=None): | |
610 self.factory = factory | 634 self.factory = factory |
611 self.route = route | 635 self.route = route |
636 self.record_entry = record_entry | |
612 self.taxonomy_name = taxonomy_name | 637 self.taxonomy_name = taxonomy_name |
613 self.taxonomy_term = taxonomy_term | 638 self.taxonomy_term = taxonomy_term |
614 | 639 |
615 @property | 640 @property |
616 def source(self): | 641 def source(self): |
626 self._page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force, | 651 self._page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force, |
627 ctx.record) | 652 ctx.record) |
628 | 653 |
629 def run(self): | 654 def run(self): |
630 while(not self.ctx.abort_event.is_set()): | 655 while(not self.ctx.abort_event.is_set()): |
631 job = self.ctx.work_queue.getNextJob() | 656 job = self.ctx.work_queue.getNextJob(wait_timeout=1) |
632 if job is None: | 657 if job is None: |
633 logger.debug("[%d] No more work... shutting down." % self.wid) | 658 logger.debug("[%d] No more work... shutting down." % self.wid) |
634 break | 659 break |
635 | 660 |
636 try: | 661 try: |
646 break | 671 break |
647 | 672 |
648 def _unsafeRun(self, job): | 673 def _unsafeRun(self, job): |
649 start_time = time.clock() | 674 start_time = time.clock() |
650 | 675 |
651 bake_res = self._page_baker.bake(job.factory, job.route, | 676 entry = job.record_entry |
677 self._page_baker.bake(job.factory, job.route, entry, | |
652 taxonomy_name=job.taxonomy_name, | 678 taxonomy_name=job.taxonomy_name, |
653 taxonomy_term=job.taxonomy_term) | 679 taxonomy_term=job.taxonomy_term) |
654 | 680 |
655 if bake_res.was_baked: | 681 if entry.was_baked: |
656 uri = bake_res.out_uris[0] | 682 uri = entry.out_uris[0] |
657 friendly_uri = uri if uri != '' else '[main page]' | 683 friendly_uri = uri if uri != '' else '[main page]' |
658 friendly_count = '' | 684 friendly_count = '' |
659 if bake_res.num_subs > 1: | 685 if entry.num_subs > 1: |
660 friendly_count = ' (%d pages)' % bake_res.num_subs | 686 friendly_count = ' (%d pages)' % entry.num_subs |
661 logger.info(format_timed(start_time, '[%d] %s%s' % | 687 logger.info(format_timed(start_time, '[%d] %s%s' % |
662 (self.wid, friendly_uri, friendly_count))) | 688 (self.wid, friendly_uri, friendly_count))) |
663 | 689 |