comparison piecrust/baking/baker.py @ 96:0445a2232de7

Improvements and fixes to incremental baking. * Better handling of the render pass during page rendering. * Used sources are paired with the pass they were used in. * Proper use and invalidation of the rendered segments cache based on render passes. * The `Assetor` is also better tracking what was used in a page. * Add flags on a page to get better caching information for the debug window. * Property invalidation of the previous bake record when needed. * Better information about why pages are delayed.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 07 Sep 2014 23:48:57 -0700
parents e88e330eb8dc
children 7d2fdf43d7ca
comparison
equal deleted inserted replaced
95:cb6eadea0845 96:0445a2232de7
11 FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED) 11 FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED)
12 from piecrust.chefutil import format_timed, log_friendly_exception 12 from piecrust.chefutil import format_timed, log_friendly_exception
13 from piecrust.data.filters import (PaginationFilter, HasFilterClause, 13 from piecrust.data.filters import (PaginationFilter, HasFilterClause,
14 IsFilterClause, AndBooleanClause) 14 IsFilterClause, AndBooleanClause)
15 from piecrust.processing.base import ProcessorPipeline 15 from piecrust.processing.base import ProcessorPipeline
16 from piecrust.rendering import PageRenderingContext, render_page 16 from piecrust.rendering import (PageRenderingContext, render_page,
17 PASS_FORMATTING, PASS_RENDERING)
17 from piecrust.sources.base import (PageFactory, 18 from piecrust.sources.base import (PageFactory,
18 REALM_NAMES, REALM_USER, REALM_THEME) 19 REALM_NAMES, REALM_USER, REALM_THEME)
19 20
20 21
21 logger = logging.getLogger(__name__) 22 logger = logging.getLogger(__name__)
124 return 125 return
125 126
126 cur_sub = 1 127 cur_sub = 1
127 has_more_subs = True 128 has_more_subs = True
128 force_this = self.force 129 force_this = self.force
130 invalidate_formatting = False
129 page = factory.buildPage() 131 page = factory.buildPage()
130 record_entry.config = page.config.get().copy() 132 record_entry.config = page.config.get().copy()
131 prev_record_entry = self.record.getPreviousEntry( 133 prev_record_entry = self.record.getPreviousEntry(
132 factory.source.name, factory.rel_path, 134 factory.source.name, factory.rel_path,
133 taxonomy_name, taxonomy_term) 135 taxonomy_name, taxonomy_term)
137 # If the current page is known to use pages from other sources, 139 # If the current page is known to use pages from other sources,
138 # see if any of those got baked, or are going to be baked for some 140 # see if any of those got baked, or are going to be baked for some
139 # reason. If so, we need to bake this one too. 141 # reason. If so, we need to bake this one too.
140 # (this happens for instance with the main page of a blog). 142 # (this happens for instance with the main page of a blog).
141 if prev_record_entry: 143 if prev_record_entry:
142 any_used_src_baked = False 144 invalidated_render_passes = set()
143 used_src_names = list(prev_record_entry.used_source_names) 145 used_src_names = list(prev_record_entry.used_source_names)
144 for src_name in used_src_names: 146 for src_name, rdr_pass in used_src_names:
145 entries = self.record.getCurrentEntries(src_name) 147 entries = self.record.getCurrentEntries(src_name)
146 for e in entries: 148 for e in entries:
147 if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED: 149 if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED:
148 any_used_src_baked = True 150 invalidated_render_passes.add(rdr_pass)
149 break 151 break
150 if any_used_src_baked: 152 if len(invalidated_render_passes) > 0:
151 break
152 if any_used_src_baked:
153 logger.debug("'%s' is known to use sources %s, at least one " 153 logger.debug("'%s' is known to use sources %s, at least one "
154 "of which got baked. Will force bake this page. " 154 "of which got baked. Will force bake this page. "
155 % (uri, used_src_names)) 155 % (uri, used_src_names))
156 force_this = True 156 force_this = True
157 if PASS_FORMATTING in invalidated_render_passes:
158 logger.debug("Will invalidate cached formatting for '%s' "
159 "since sources were using during that pass."
160 % uri)
161 invalidate_formatting = True
157 162
158 while has_more_subs: 163 while has_more_subs:
159 sub_uri = self.getOutputUri(uri, cur_sub) 164 sub_uri = self.getOutputUri(uri, cur_sub)
160 out_path = self.getOutputPath(sub_uri) 165 out_path = self.getOutputPath(sub_uri)
161 166
162 # Check for up-to-date outputs. 167 # Check for up-to-date outputs.
163 do_bake = True 168 do_bake = True
164 if not force_this and prev_record_entry: 169 if not force_this:
165 try: 170 try:
166 in_path_time = record_entry.path_mtime 171 in_path_time = record_entry.path_mtime
167 out_path_time = os.path.getmtime(out_path) 172 out_path_time = os.path.getmtime(out_path)
168 if out_path_time > in_path_time and not any_used_src_baked: 173 if out_path_time > in_path_time:
169 do_bake = False 174 do_bake = False
170 except OSError: 175 except OSError:
171 # File doesn't exist, we'll need to bake. 176 # File doesn't exist, we'll need to bake.
172 pass 177 pass
173 178
187 logger.debug(" %s is up to date, skipping bake." % out_path) 192 logger.debug(" %s is up to date, skipping bake." % out_path)
188 break 193 break
189 194
190 # All good, proceed. 195 # All good, proceed.
191 try: 196 try:
197 if invalidate_formatting:
198 cache_key = '%s:%s' % (uri, cur_sub)
199 self.app.env.rendered_segments_repository.invalidate(
200 cache_key)
201
192 logger.debug(" p%d -> %s" % (cur_sub, out_path)) 202 logger.debug(" p%d -> %s" % (cur_sub, out_path))
193 ctx, rp = self._bakeSingle(page, sub_uri, cur_sub, out_path, 203 ctx, rp = self._bakeSingle(page, sub_uri, cur_sub, out_path,
194 pagination_filter, custom_data) 204 pagination_filter, custom_data)
195 except Exception as ex: 205 except Exception as ex:
196 if self.app.debug: 206 if self.app.debug:
210 out_assets_dir += out_name_noext 220 out_assets_dir += out_name_noext
211 221
212 logger.debug("Copying page assets to: %s" % out_assets_dir) 222 logger.debug("Copying page assets to: %s" % out_assets_dir)
213 if not os.path.isdir(out_assets_dir): 223 if not os.path.isdir(out_assets_dir):
214 os.makedirs(out_assets_dir, 0o755) 224 os.makedirs(out_assets_dir, 0o755)
215 for ap in ctx.used_assets._getAssetPaths(): 225 for ap in ctx.used_assets:
216 dest_ap = os.path.join(out_assets_dir, os.path.basename(ap)) 226 dest_ap = os.path.join(out_assets_dir, os.path.basename(ap))
217 logger.debug(" %s -> %s" % (ap, dest_ap)) 227 logger.debug(" %s -> %s" % (ap, dest_ap))
218 shutil.copy(ap, dest_ap) 228 shutil.copy(ap, dest_ap)
219 229
220 # Record what we did and figure out if we have more work. 230 # Record what we did and figure out if we have more work.
222 record_entry.out_paths.append(out_path) 232 record_entry.out_paths.append(out_path)
223 record_entry.used_source_names |= ctx.used_source_names 233 record_entry.used_source_names |= ctx.used_source_names
224 record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms 234 record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms
225 235
226 has_more_subs = False 236 has_more_subs = False
227 if ctx.used_pagination is not None: 237 if (ctx.used_pagination is not None and
228 record_entry.addUsedSource(ctx.used_pagination._source) 238 ctx.used_pagination.has_more):
229 if ctx.used_pagination.has_more: 239 cur_sub += 1
230 cur_sub += 1 240 has_more_subs = True
231 has_more_subs = True
232 241
233 def _bakeSingle(self, page, sub_uri, num, out_path, 242 def _bakeSingle(self, page, sub_uri, num, out_path,
234 pagination_filter=None, custom_data=None): 243 pagination_filter=None, custom_data=None):
235 ctx = PageRenderingContext(page, sub_uri) 244 ctx = PageRenderingContext(page, sub_uri)
236 ctx.page_num = num 245 ctx.page_num = num
368 if os.path.isdir(cache_dir): 377 if os.path.isdir(cache_dir):
369 logger.debug("Cleaning baker cache: %s" % cache_dir) 378 logger.debug("Cleaning baker cache: %s" % cache_dir)
370 shutil.rmtree(cache_dir) 379 shutil.rmtree(cache_dir)
371 self.force = True 380 self.force = True
372 record.incremental_count = 0 381 record.incremental_count = 0
382 record.clearPrevious()
373 logger.info(format_timed(start_time, 383 logger.info(format_timed(start_time,
374 "cleaned cache (reason: %s)" % reason)) 384 "cleaned cache (reason: %s)" % reason))
375 else: 385 else:
376 record.incremental_count += 1 386 record.incremental_count += 1
377 logger.debug(format_timed(start_time, "cache is assumed valid", 387 logger.debug(format_timed(start_time, "cache is assumed valid",
584 if len(self.jobs) == 0: 594 if len(self.jobs) == 0:
585 return self._EMPTY 595 return self._EMPTY
586 596
587 job = self.jobs.pop(0) 597 job = self.jobs.pop(0)
588 first_job = job 598 first_job = job
589 while not self._isJobReady(job): 599 while True:
590 logger.debug("Job '%s:%s' isn't ready yet." % ( 600 ready, wait_on_src = self._isJobReady(job)
591 job.factory.source.name, job.factory.rel_path)) 601 if ready:
602 break
603
604 logger.debug("Job '%s:%s' isn't ready yet: waiting on pages "
605 "from source '%s' to finish baking." %
606 (job.factory.source.name,
607 job.factory.rel_path, wait_on_src))
592 self.jobs.append(job) 608 self.jobs.append(job)
593 job = self.jobs.pop(0) 609 job = self.jobs.pop(0)
594 if job == first_job: 610 if job == first_job:
595 # None of the jobs are ready... we need to wait. 611 # None of the jobs are ready... we need to wait.
596 self.jobs.append(job) 612 self.jobs.append(job)
603 619
604 def _isJobReady(self, job): 620 def _isJobReady(self, job):
605 e = self.record.getPreviousEntry(job.factory.source.name, 621 e = self.record.getPreviousEntry(job.factory.source.name,
606 job.factory.rel_path) 622 job.factory.rel_path)
607 if not e: 623 if not e:
608 return True 624 return (True, None)
609 for sn in e.used_source_names: 625 for sn, rp in e.used_source_names:
610 if sn == job.factory.source.name: 626 if sn == job.factory.source.name:
611 continue 627 continue
612 if any(filter(lambda j: j.factory.source.name == sn, self.jobs)): 628 if any(filter(lambda j: j.factory.source.name == sn, self.jobs)):
613 return False 629 return (False, sn)
614 if any(filter(lambda j: j.factory.source.name == sn, 630 if any(filter(lambda j: j.factory.source.name == sn,
615 self._active_jobs)): 631 self._active_jobs)):
616 return False 632 return (False, sn)
617 return True 633 return (True, None)
618 634
619 635
620 class BakeWorkerContext(object): 636 class BakeWorkerContext(object):
621 def __init__(self, app, out_dir, force, record, work_queue, 637 def __init__(self, app, out_dir, force, record, work_queue,
622 abort_event): 638 abort_event):
651 self._page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force, 667 self._page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force,
652 ctx.record) 668 ctx.record)
653 669
654 def run(self): 670 def run(self):
655 while(not self.ctx.abort_event.is_set()): 671 while(not self.ctx.abort_event.is_set()):
656 job = self.ctx.work_queue.getNextJob(wait_timeout=1)
657 if job is None:
658 logger.debug("[%d] No more work... shutting down." % self.wid)
659 break
660
661 try: 672 try:
673 job = self.ctx.work_queue.getNextJob(wait_timeout=1)
674 if job is None:
675 logger.debug("[%d] No more work... shutting down." %
676 self.wid)
677 break
678
662 self._unsafeRun(job) 679 self._unsafeRun(job)
663 logger.debug("[%d] Done with page." % self.wid) 680 logger.debug("[%d] Done with page." % self.wid)
664 self.ctx.work_queue.onJobFinished(job) 681 self.ctx.work_queue.onJobFinished(job)
665 except Exception as ex: 682 except Exception as ex:
666 self.ctx.abort_event.set() 683 self.ctx.abort_event.set()