comparison piecrust/baking/baker.py @ 91:e88e330eb8dc

Improvements to incremental baking and cache invalidating.
author Ludovic Chabant <ludovic@chabant.com>
date Fri, 05 Sep 2014 00:42:13 -0700
parents 3471ffa059b2
children 0445a2232de7
comparison
equal deleted inserted replaced
90:e293f08d954e 91:e88e330eb8dc
4 import shutil 4 import shutil
5 import hashlib 5 import hashlib
6 import logging 6 import logging
7 import threading 7 import threading
8 import urllib.request, urllib.error, urllib.parse 8 import urllib.request, urllib.error, urllib.parse
9 from piecrust.baking.records import TransitionalBakeRecord, BakeRecordPageEntry 9 from piecrust.baking.records import (TransitionalBakeRecord,
10 BakeRecordPageEntry,
11 FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED)
10 from piecrust.chefutil import format_timed, log_friendly_exception 12 from piecrust.chefutil import format_timed, log_friendly_exception
11 from piecrust.data.filters import (PaginationFilter, HasFilterClause, 13 from piecrust.data.filters import (PaginationFilter, HasFilterClause,
12 IsFilterClause, AndBooleanClause) 14 IsFilterClause, AndBooleanClause)
13 from piecrust.processing.base import ProcessorPipeline 15 from piecrust.processing.base import ProcessorPipeline
14 from piecrust.rendering import PageRenderingContext, render_page 16 from piecrust.rendering import PageRenderingContext, render_page
74 else: 76 else:
75 bake_path.append(decoded_uri + '.html') 77 bake_path.append(decoded_uri + '.html')
76 78
77 return os.path.normpath(os.path.join(*bake_path)) 79 return os.path.normpath(os.path.join(*bake_path))
78 80
79 def bake(self, factory, route, taxonomy_name=None, taxonomy_term=None): 81 def bake(self, factory, route, record_entry,
82 taxonomy_name=None, taxonomy_term=None):
80 pagination_filter = None 83 pagination_filter = None
81 custom_data = None 84 custom_data = None
82 if taxonomy_name and taxonomy_term: 85 if taxonomy_name and taxonomy_term:
83 # Must bake a taxonomy listing page... we'll have to add a 86 # Must bake a taxonomy listing page... we'll have to add a
84 # pagination filter for only get matching posts, and the output 87 # pagination filter for only get matching posts, and the output
115 "'%s:%s'." % (factory.ref_spec, uri, 118 "'%s:%s'." % (factory.ref_spec, uri,
116 override.source_name, override.rel_path)) 119 override.source_name, override.rel_path))
117 logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" % 120 logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" %
118 (factory.ref_spec, uri, override.source_name, 121 (factory.ref_spec, uri, override.source_name,
119 override.rel_path)) 122 override.rel_path))
120 entry = BakeRecordPageEntry() 123 record_entry.flags |= FLAG_OVERRIDEN
121 entry.path = factory.path 124 return
122 entry.rel_path = factory.rel_path
123 entry.source_name = factory.source.name
124 entry.was_overriden = True
125
126 if self.record:
127 self.record.addEntry(entry)
128
129 return entry
130 125
131 cur_sub = 1 126 cur_sub = 1
132 has_more_subs = True 127 has_more_subs = True
128 force_this = self.force
133 page = factory.buildPage() 129 page = factory.buildPage()
134 cur_record_entry = BakeRecordPageEntry(page) 130 record_entry.config = page.config.get().copy()
135 cur_record_entry.taxonomy_name = taxonomy_name
136 cur_record_entry.taxonomy_term = taxonomy_term
137 prev_record_entry = self.record.getPreviousEntry( 131 prev_record_entry = self.record.getPreviousEntry(
138 factory.source.name, factory.rel_path, 132 factory.source.name, factory.rel_path,
139 taxonomy_name, taxonomy_term) 133 taxonomy_name, taxonomy_term)
140 134
141 logger.debug("Baking '%s'..." % uri) 135 logger.debug("Baking '%s'..." % uri)
136
137 # If the current page is known to use pages from other sources,
138 # see if any of those got baked, or are going to be baked for some
139 # reason. If so, we need to bake this one too.
140 # (this happens for instance with the main page of a blog).
141 if prev_record_entry:
142 any_used_src_baked = False
143 used_src_names = list(prev_record_entry.used_source_names)
144 for src_name in used_src_names:
145 entries = self.record.getCurrentEntries(src_name)
146 for e in entries:
147 if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED:
148 any_used_src_baked = True
149 break
150 if any_used_src_baked:
151 break
152 if any_used_src_baked:
153 logger.debug("'%s' is known to use sources %s, at least one "
154 "of which got baked. Will force bake this page. "
155 % (uri, used_src_names))
156 force_this = True
157
142 while has_more_subs: 158 while has_more_subs:
143 sub_uri = self.getOutputUri(uri, cur_sub) 159 sub_uri = self.getOutputUri(uri, cur_sub)
144 out_path = self.getOutputPath(sub_uri) 160 out_path = self.getOutputPath(sub_uri)
145 161
146 # Check for up-to-date outputs. 162 # Check for up-to-date outputs.
147 do_bake = True 163 do_bake = True
148 if not self.force and prev_record_entry: 164 if not force_this and prev_record_entry:
149 try: 165 try:
150 in_path_time = page.path_mtime 166 in_path_time = record_entry.path_mtime
151 out_path_time = os.path.getmtime(out_path) 167 out_path_time = os.path.getmtime(out_path)
152 if out_path_time > in_path_time: 168 if out_path_time > in_path_time and not any_used_src_baked:
153 do_bake = False 169 do_bake = False
154 except OSError: 170 except OSError:
155 # File doesn't exist, we'll need to bake. 171 # File doesn't exist, we'll need to bake.
156 pass 172 pass
157 173
200 dest_ap = os.path.join(out_assets_dir, os.path.basename(ap)) 216 dest_ap = os.path.join(out_assets_dir, os.path.basename(ap))
201 logger.debug(" %s -> %s" % (ap, dest_ap)) 217 logger.debug(" %s -> %s" % (ap, dest_ap))
202 shutil.copy(ap, dest_ap) 218 shutil.copy(ap, dest_ap)
203 219
204 # Record what we did and figure out if we have more work. 220 # Record what we did and figure out if we have more work.
205 cur_record_entry.out_uris.append(sub_uri) 221 record_entry.out_uris.append(sub_uri)
206 cur_record_entry.out_paths.append(out_path) 222 record_entry.out_paths.append(out_path)
207 cur_record_entry.used_source_names |= ctx.used_source_names 223 record_entry.used_source_names |= ctx.used_source_names
208 cur_record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms 224 record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms
209 225
210 has_more_subs = False 226 has_more_subs = False
211 if ctx.used_pagination is not None: 227 if ctx.used_pagination is not None:
212 cur_record_entry.addUsedSource(ctx.used_pagination._source) 228 record_entry.addUsedSource(ctx.used_pagination._source)
213 if ctx.used_pagination.has_more: 229 if ctx.used_pagination.has_more:
214 cur_sub += 1 230 cur_sub += 1
215 has_more_subs = True 231 has_more_subs = True
216
217 if self.record:
218 self.record.addEntry(cur_record_entry)
219
220 return cur_record_entry
221 232
222 def _bakeSingle(self, page, sub_uri, num, out_path, 233 def _bakeSingle(self, page, sub_uri, num, out_path,
223 pagination_filter=None, custom_data=None): 234 pagination_filter=None, custom_data=None):
224 ctx = PageRenderingContext(page, sub_uri) 235 ctx = PageRenderingContext(page, sub_uri)
225 ctx.page_num = num 236 ctx.page_num = num
312 self._bakeAssets(record) 323 self._bakeAssets(record)
313 324
314 # Save the bake record. 325 # Save the bake record.
315 t = time.clock() 326 t = time.clock()
316 record.current.bake_time = time.time() 327 record.current.bake_time = time.time()
328 record.current.out_dir = self.out_dir
317 record.collapseRecords() 329 record.collapseRecords()
318 record.saveCurrent(record_cache.getCachePath(record_name)) 330 record.saveCurrent(record_cache.getCachePath(record_name))
319 logger.debug(format_timed(t, 'saved bake record', colored=False)) 331 logger.debug(format_timed(t, 'saved bake record', colored=False))
320 332
321 # All done. 333 # All done.
331 reason = "ordered to" 343 reason = "ordered to"
332 elif not self.app.config.get('__cache_valid'): 344 elif not self.app.config.get('__cache_valid'):
333 # The configuration file was changed, or we're running a new 345 # The configuration file was changed, or we're running a new
334 # version of the app. 346 # version of the app.
335 reason = "not valid anymore" 347 reason = "not valid anymore"
336 elif not record.previous.bake_time: 348 elif (not record.previous.bake_time or
349 not record.previous.hasLatestVersion()):
337 # We have no valid previous bake record. 350 # We have no valid previous bake record.
338 reason = "need bake record regeneration" 351 reason = "need bake record regeneration"
339 else: 352 else:
340 # Check if any template has changed since the last bake. Since 353 # Check if any template has changed since the last bake. Since
341 # there could be some advanced conditional logic going on, we'd 354 # there could be some advanced conditional logic going on, we'd
354 cache_dir = self.app.cache.getCacheDir('baker') 367 cache_dir = self.app.cache.getCacheDir('baker')
355 if os.path.isdir(cache_dir): 368 if os.path.isdir(cache_dir):
356 logger.debug("Cleaning baker cache: %s" % cache_dir) 369 logger.debug("Cleaning baker cache: %s" % cache_dir)
357 shutil.rmtree(cache_dir) 370 shutil.rmtree(cache_dir)
358 self.force = True 371 self.force = True
372 record.incremental_count = 0
359 logger.info(format_timed(start_time, 373 logger.info(format_timed(start_time,
360 "cleaned cache (reason: %s)" % reason)) 374 "cleaned cache (reason: %s)" % reason))
361 else: 375 else:
376 record.incremental_count += 1
362 logger.debug(format_timed(start_time, "cache is assumed valid", 377 logger.debug(format_timed(start_time, "cache is assumed valid",
363 colored=False)) 378 colored=False))
364 379
365 def _bakeRealm(self, record, realm, srclist): 380 def _bakeRealm(self, record, realm, srclist):
366 # Gather all page factories from the sources and queue them 381 # Gather all page factories from the sources and queue them
379 route = self.app.getRoute(source.name, fac.metadata) 394 route = self.app.getRoute(source.name, fac.metadata)
380 if route is None: 395 if route is None:
381 logger.error("Can't get route for page: %s" % fac.ref_spec) 396 logger.error("Can't get route for page: %s" % fac.ref_spec)
382 continue 397 continue
383 398
384 logger.debug("Queuing: %s" % fac.ref_spec) 399 entry = BakeRecordPageEntry(fac)
385 queue.addJob(BakeWorkerJob(fac, route)) 400 record.addEntry(entry)
401 queue.addJob(BakeWorkerJob(fac, route, entry))
386 402
387 self._waitOnWorkerPool(pool, abort) 403 self._waitOnWorkerPool(pool, abort)
388 404
389 def _bakeTaxonomies(self, record): 405 def _bakeTaxonomies(self, record):
390 logger.debug("Baking taxonomies") 406 logger.debug("Baking taxonomies")
467 for term in terms: 483 for term in terms:
468 fac = PageFactory(tax_page_source, tax_page_rel_path, 484 fac = PageFactory(tax_page_source, tax_page_rel_path,
469 {tax.term_name: term}) 485 {tax.term_name: term})
470 logger.debug("Queuing: %s [%s, %s]" % 486 logger.debug("Queuing: %s [%s, %s]" %
471 (fac.ref_spec, tax_name, term)) 487 (fac.ref_spec, tax_name, term))
488 entry = BakeRecordPageEntry(fac, tax_name, term)
489 record.addEntry(entry)
472 queue.addJob( 490 queue.addJob(
473 BakeWorkerJob(fac, route, tax_name, term)) 491 BakeWorkerJob(fac, route, entry, tax_name, term))
474 492
475 self._waitOnWorkerPool(pool, abort) 493 self._waitOnWorkerPool(pool, abort)
476 494
477 def _bakeAssets(self, record): 495 def _bakeAssets(self, record):
478 mounts = self.app.assets_dirs 496 mounts = self.app.assets_dirs
525 self._lock = threading.Lock() 543 self._lock = threading.Lock()
526 self._added_event = threading.Event() 544 self._added_event = threading.Event()
527 self._done_event = threading.Event() 545 self._done_event = threading.Event()
528 546
529 def addJob(self, job): 547 def addJob(self, job):
530 logger.debug("Adding job '%s:%s' to scheduler." % ( 548 logger.debug("Queuing job '%s:%s'." % (
531 job.factory.source.name, job.factory.rel_path)) 549 job.factory.source.name, job.factory.rel_path))
532 with self._lock: 550 with self._lock:
533 self.jobs.append(job) 551 self.jobs.append(job)
534 self._added_event.set() 552 self._added_event.set()
535 553
536 def onJobFinished(self, job): 554 def onJobFinished(self, job):
537 logger.debug("Removing job '%s:%s' from scheduler." % ( 555 logger.debug("Removing job '%s:%s'." % (
538 job.factory.source.name, job.factory.rel_path)) 556 job.factory.source.name, job.factory.rel_path))
539 with self._lock: 557 with self._lock:
540 self._active_jobs.remove(job) 558 self._active_jobs.remove(job)
541 self._done_event.set() 559 self._done_event.set()
542 560
543 def getNextJob(self, timeout=None): 561 def getNextJob(self, wait_timeout=None, empty_timeout=None):
544 self._added_event.clear() 562 self._added_event.clear()
545 self._done_event.clear() 563 self._done_event.clear()
546 job = self._doGetNextJob() 564 job = self._doGetNextJob()
547 while job in (self._EMPTY, self._WAIT): 565 while job in (self._EMPTY, self._WAIT):
548 if timeout is None:
549 return None
550 if job == self._EMPTY: 566 if job == self._EMPTY:
567 if empty_timeout is None:
568 return None
551 logger.debug("Waiting for a new job to be added...") 569 logger.debug("Waiting for a new job to be added...")
552 res = self._added_event.wait(timeout) 570 res = self._added_event.wait(empty_timeout)
553 elif job == self._WAIT: 571 elif job == self._WAIT:
572 if wait_timeout is None:
573 return None
554 logger.debug("Waiting for a job to be finished...") 574 logger.debug("Waiting for a job to be finished...")
555 res = self._done_event.wait(timeout) 575 res = self._done_event.wait(wait_timeout)
556 if not res: 576 if not res:
557 logger.debug("Timed-out. No job found.") 577 logger.debug("Timed-out. No job found.")
558 return None 578 return None
559 job = self._doGetNextJob() 579 job = self._doGetNextJob()
560 return job 580 return job
571 job.factory.source.name, job.factory.rel_path)) 591 job.factory.source.name, job.factory.rel_path))
572 self.jobs.append(job) 592 self.jobs.append(job)
573 job = self.jobs.pop(0) 593 job = self.jobs.pop(0)
574 if job == first_job: 594 if job == first_job:
575 # None of the jobs are ready... we need to wait. 595 # None of the jobs are ready... we need to wait.
596 self.jobs.append(job)
576 return self._WAIT 597 return self._WAIT
577 598
578 logger.debug("Job '%s:%s' is ready to go, moving to active " 599 logger.debug("Job '%s:%s' is ready to go, moving to active "
579 "queue." % (job.factory.source.name, job.factory.rel_path)) 600 "queue." % (job.factory.source.name, job.factory.rel_path))
580 self._active_jobs.append(job) 601 self._active_jobs.append(job)
584 e = self.record.getPreviousEntry(job.factory.source.name, 605 e = self.record.getPreviousEntry(job.factory.source.name,
585 job.factory.rel_path) 606 job.factory.rel_path)
586 if not e: 607 if not e:
587 return True 608 return True
588 for sn in e.used_source_names: 609 for sn in e.used_source_names:
610 if sn == job.factory.source.name:
611 continue
589 if any(filter(lambda j: j.factory.source.name == sn, self.jobs)): 612 if any(filter(lambda j: j.factory.source.name == sn, self.jobs)):
590 return False 613 return False
591 if any(filter(lambda j: j.factory.source.name == sn, 614 if any(filter(lambda j: j.factory.source.name == sn,
592 self._active_jobs)): 615 self._active_jobs)):
593 return False 616 return False
604 self.work_queue = work_queue 627 self.work_queue = work_queue
605 self.abort_event = abort_event 628 self.abort_event = abort_event
606 629
607 630
608 class BakeWorkerJob(object): 631 class BakeWorkerJob(object):
609 def __init__(self, factory, route, taxonomy_name=None, taxonomy_term=None): 632 def __init__(self, factory, route, record_entry,
633 taxonomy_name=None, taxonomy_term=None):
610 self.factory = factory 634 self.factory = factory
611 self.route = route 635 self.route = route
636 self.record_entry = record_entry
612 self.taxonomy_name = taxonomy_name 637 self.taxonomy_name = taxonomy_name
613 self.taxonomy_term = taxonomy_term 638 self.taxonomy_term = taxonomy_term
614 639
615 @property 640 @property
616 def source(self): 641 def source(self):
626 self._page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force, 651 self._page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force,
627 ctx.record) 652 ctx.record)
628 653
629 def run(self): 654 def run(self):
630 while(not self.ctx.abort_event.is_set()): 655 while(not self.ctx.abort_event.is_set()):
631 job = self.ctx.work_queue.getNextJob() 656 job = self.ctx.work_queue.getNextJob(wait_timeout=1)
632 if job is None: 657 if job is None:
633 logger.debug("[%d] No more work... shutting down." % self.wid) 658 logger.debug("[%d] No more work... shutting down." % self.wid)
634 break 659 break
635 660
636 try: 661 try:
646 break 671 break
647 672
648 def _unsafeRun(self, job): 673 def _unsafeRun(self, job):
649 start_time = time.clock() 674 start_time = time.clock()
650 675
651 bake_res = self._page_baker.bake(job.factory, job.route, 676 entry = job.record_entry
677 self._page_baker.bake(job.factory, job.route, entry,
652 taxonomy_name=job.taxonomy_name, 678 taxonomy_name=job.taxonomy_name,
653 taxonomy_term=job.taxonomy_term) 679 taxonomy_term=job.taxonomy_term)
654 680
655 if bake_res.was_baked: 681 if entry.was_baked:
656 uri = bake_res.out_uris[0] 682 uri = entry.out_uris[0]
657 friendly_uri = uri if uri != '' else '[main page]' 683 friendly_uri = uri if uri != '' else '[main page]'
658 friendly_count = '' 684 friendly_count = ''
659 if bake_res.num_subs > 1: 685 if entry.num_subs > 1:
660 friendly_count = ' (%d pages)' % bake_res.num_subs 686 friendly_count = ' (%d pages)' % entry.num_subs
661 logger.info(format_timed(start_time, '[%d] %s%s' % 687 logger.info(format_timed(start_time, '[%d] %s%s' %
662 (self.wid, friendly_uri, friendly_count))) 688 (self.wid, friendly_uri, friendly_count)))
663 689