Mercurial > piecrust2
changeset 96:0445a2232de7
Improvements and fixes to incremental baking.
* Better handling of the render pass during page rendering.
* Used sources are paired with the pass they were used in.
* Proper use and invalidation of the rendered segments cache based on render
passes.
* The `Assetor` is also better tracking what was used in a page.
* Add flags on a page to get better caching information for the debug window.
* Property invalidation of the previous bake record when needed.
* Better information about why pages are delayed.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 07 Sep 2014 23:48:57 -0700 |
parents | cb6eadea0845 |
children | 00a9b24ca944 |
files | piecrust/baking/baker.py piecrust/baking/records.py piecrust/data/assetor.py piecrust/data/debug.py piecrust/environment.py piecrust/page.py piecrust/rendering.py |
diffstat | 7 files changed, 113 insertions(+), 88 deletions(-) [+] |
line wrap: on
line diff
--- a/piecrust/baking/baker.py Sun Sep 07 21:37:10 2014 -0700 +++ b/piecrust/baking/baker.py Sun Sep 07 23:48:57 2014 -0700 @@ -13,7 +13,8 @@ from piecrust.data.filters import (PaginationFilter, HasFilterClause, IsFilterClause, AndBooleanClause) from piecrust.processing.base import ProcessorPipeline -from piecrust.rendering import PageRenderingContext, render_page +from piecrust.rendering import (PageRenderingContext, render_page, + PASS_FORMATTING, PASS_RENDERING) from piecrust.sources.base import (PageFactory, REALM_NAMES, REALM_USER, REALM_THEME) @@ -126,6 +127,7 @@ cur_sub = 1 has_more_subs = True force_this = self.force + invalidate_formatting = False page = factory.buildPage() record_entry.config = page.config.get().copy() prev_record_entry = self.record.getPreviousEntry( @@ -139,21 +141,24 @@ # reason. If so, we need to bake this one too. # (this happens for instance with the main page of a blog). if prev_record_entry: - any_used_src_baked = False + invalidated_render_passes = set() used_src_names = list(prev_record_entry.used_source_names) - for src_name in used_src_names: + for src_name, rdr_pass in used_src_names: entries = self.record.getCurrentEntries(src_name) for e in entries: if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED: - any_used_src_baked = True + invalidated_render_passes.add(rdr_pass) break - if any_used_src_baked: - break - if any_used_src_baked: + if len(invalidated_render_passes) > 0: logger.debug("'%s' is known to use sources %s, at least one " "of which got baked. Will force bake this page. " % (uri, used_src_names)) force_this = True + if PASS_FORMATTING in invalidated_render_passes: + logger.debug("Will invalidate cached formatting for '%s' " + "since sources were using during that pass." + % uri) + invalidate_formatting = True while has_more_subs: sub_uri = self.getOutputUri(uri, cur_sub) @@ -161,11 +166,11 @@ # Check for up-to-date outputs. do_bake = True - if not force_this and prev_record_entry: + if not force_this: try: in_path_time = record_entry.path_mtime out_path_time = os.path.getmtime(out_path) - if out_path_time > in_path_time and not any_used_src_baked: + if out_path_time > in_path_time: do_bake = False except OSError: # File doesn't exist, we'll need to bake. @@ -189,6 +194,11 @@ # All good, proceed. try: + if invalidate_formatting: + cache_key = '%s:%s' % (uri, cur_sub) + self.app.env.rendered_segments_repository.invalidate( + cache_key) + logger.debug(" p%d -> %s" % (cur_sub, out_path)) ctx, rp = self._bakeSingle(page, sub_uri, cur_sub, out_path, pagination_filter, custom_data) @@ -212,7 +222,7 @@ logger.debug("Copying page assets to: %s" % out_assets_dir) if not os.path.isdir(out_assets_dir): os.makedirs(out_assets_dir, 0o755) - for ap in ctx.used_assets._getAssetPaths(): + for ap in ctx.used_assets: dest_ap = os.path.join(out_assets_dir, os.path.basename(ap)) logger.debug(" %s -> %s" % (ap, dest_ap)) shutil.copy(ap, dest_ap) @@ -224,11 +234,10 @@ record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms has_more_subs = False - if ctx.used_pagination is not None: - record_entry.addUsedSource(ctx.used_pagination._source) - if ctx.used_pagination.has_more: - cur_sub += 1 - has_more_subs = True + if (ctx.used_pagination is not None and + ctx.used_pagination.has_more): + cur_sub += 1 + has_more_subs = True def _bakeSingle(self, page, sub_uri, num, out_path, pagination_filter=None, custom_data=None): @@ -370,6 +379,7 @@ shutil.rmtree(cache_dir) self.force = True record.incremental_count = 0 + record.clearPrevious() logger.info(format_timed(start_time, "cleaned cache (reason: %s)" % reason)) else: @@ -586,9 +596,15 @@ job = self.jobs.pop(0) first_job = job - while not self._isJobReady(job): - logger.debug("Job '%s:%s' isn't ready yet." % ( - job.factory.source.name, job.factory.rel_path)) + while True: + ready, wait_on_src = self._isJobReady(job) + if ready: + break + + logger.debug("Job '%s:%s' isn't ready yet: waiting on pages " + "from source '%s' to finish baking." % + (job.factory.source.name, + job.factory.rel_path, wait_on_src)) self.jobs.append(job) job = self.jobs.pop(0) if job == first_job: @@ -605,16 +621,16 @@ e = self.record.getPreviousEntry(job.factory.source.name, job.factory.rel_path) if not e: - return True - for sn in e.used_source_names: + return (True, None) + for sn, rp in e.used_source_names: if sn == job.factory.source.name: continue if any(filter(lambda j: j.factory.source.name == sn, self.jobs)): - return False + return (False, sn) if any(filter(lambda j: j.factory.source.name == sn, self._active_jobs)): - return False - return True + return (False, sn) + return (True, None) class BakeWorkerContext(object): @@ -653,12 +669,13 @@ def run(self): while(not self.ctx.abort_event.is_set()): - job = self.ctx.work_queue.getNextJob(wait_timeout=1) - if job is None: - logger.debug("[%d] No more work... shutting down." % self.wid) - break + try: + job = self.ctx.work_queue.getNextJob(wait_timeout=1) + if job is None: + logger.debug("[%d] No more work... shutting down." % + self.wid) + break - try: self._unsafeRun(job) logger.debug("[%d] Done with page." % self.wid) self.ctx.work_queue.onJobFinished(job)
--- a/piecrust/baking/records.py Sun Sep 07 21:37:10 2014 -0700 +++ b/piecrust/baking/records.py Sun Sep 07 23:48:57 2014 -0700 @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) -RECORD_VERSION = 4 +RECORD_VERSION = 6 def _get_transition_key(source_name, rel_path, taxonomy_name=None, @@ -75,10 +75,6 @@ return _get_transition_key(self.source_name, self.rel_path, self.taxonomy_name, self.taxonomy_term) - def addUsedSource(self, source): - if isinstance(source, PageSource): - self.used_source_names.add(source.name) - def __getstate__(self): state = self.__dict__.copy() del state['path_mtime'] @@ -110,6 +106,9 @@ for e in self.previous.entries: self.transitions[e.transition_key] = (e, None) + def clearPrevious(self): + self.previous = BakeRecord() + def saveCurrent(self, current_path): self.current.save(current_path)
--- a/piecrust/data/assetor.py Sun Sep 07 21:37:10 2014 -0700 +++ b/piecrust/data/assetor.py Sun Sep 07 23:48:57 2014 -0700 @@ -57,10 +57,6 @@ self._cacheAssets() return map(lambda i: i[0], self._cache.values()) - def _getAssetPaths(self): - self._cacheAssets() - return map(lambda i: i[1], self._cache.values()) - def _debugRenderAssetNames(self): self._cacheAssets() return list(self._cache.keys()) @@ -91,5 +87,6 @@ cpi = self._page.app.env.exec_info_stack.current_page_info if cpi is not None: - cpi.render_ctx.used_assets = self + used_assets = list(map(lambda i: i[1], self._cache.values())) + cpi.render_ctx.used_assets = used_assets
--- a/piecrust/data/debug.py Sun Sep 07 21:37:10 2014 -0700 +++ b/piecrust/data/debug.py Sun Sep 07 23:48:57 2014 -0700 @@ -4,6 +4,7 @@ import logging import collections from piecrust import APP_VERSION, PIECRUST_URL +from piecrust.page import FLAG_RAW_CACHE_VALID logger = logging.getLogger(__name__) @@ -60,7 +61,6 @@ def _do_build_debug_info(page, data, output): app = page.app - exec_info = app.env.exec_info_stack.current_page_info print('<div id="piecrust-debug-info" style="%s">' % CSS_DEBUGINFO, file=output) @@ -69,22 +69,18 @@ # If we have some execution info in the environment, # add more information. - if exec_info: - if exec_info.was_cache_valid: - output.write('baked this morning') - else: - output.write('baked just now') + if page.flags & FLAG_RAW_CACHE_VALID: + output.write('baked this morning') + else: + output.write('baked just now') - if app.cache.enabled: - if app.env.was_cache_cleaned: - output.write(', from a brand new cache') - else: - output.write(', from a valid cache') + if app.cache.enabled: + if app.env.was_cache_cleaned: + output.write(', from a brand new cache') else: - output.write(', with no cache') - + output.write(', from a valid cache') else: - output.write('no caching information available') + output.write(', with no cache') output.write(', ') if app.env.start_time != 0:
--- a/piecrust/environment.py Sun Sep 07 21:37:10 2014 -0700 +++ b/piecrust/environment.py Sun Sep 07 23:48:57 2014 -0700 @@ -25,6 +25,16 @@ self.cache = repoze.lru.LRUCache(size) self.lock = threading.RLock() self.fs_cache = None + self._invalidated_fs_items = set() + + def invalidate(self, key): + with self.lock: + logger.debug("Invalidating cache item '%s'." % key) + self.cache.invalidate(key) + if self.fs_cache: + logger.debug("Invalidating FS cache item '%s'." % key) + fs_key = _make_fs_cache_key(key) + self._invalidated_fs_items.add(fs_key) def get(self, key, item_maker, fs_cache_time=None): item = self.cache.get(key) @@ -37,7 +47,8 @@ fs_cache_time is not None): # Try first from the file-system cache. fs_key = _make_fs_cache_key(key) - if self.fs_cache.isValid(fs_key, fs_cache_time): + if (fs_key not in self._invalidated_fs_items and + self.fs_cache.isValid(fs_key, fs_cache_time)): logger.debug("'%s' found in file-system cache." % key) item_raw = self.fs_cache.read(fs_key) @@ -59,15 +70,9 @@ return item -PHASE_PAGE_PARSING = 0 -PHASE_PAGE_FORMATTING = 1 -PHASE_PAGE_RENDERING = 2 - - class ExecutionInfo(object): - def __init__(self, page, phase, render_ctx): + def __init__(self, page, render_ctx): self.page = page - self.phase = phase self.render_ctx = render_ctx self.was_cache_valid = False self.start_time = time.clock() @@ -93,8 +98,11 @@ return True return False - def pushPage(self, page, phase, render_ctx): - self._page_stack.append(ExecutionInfo(page, phase, render_ctx)) + def pushPage(self, page, render_ctx): + if len(self._page_stack) > 0: + top = self._page_stack[-1] + assert top.page is not page + self._page_stack.append(ExecutionInfo(page, render_ctx)) def popPage(self): del self._page_stack[-1] @@ -108,12 +116,10 @@ self.page_repository = MemCache() self.rendered_segments_repository = MemCache() self.base_asset_url_format = '%uri%' - self._use_rendered_segments_fs_cache = False def initialize(self, app): - if self._use_rendered_segments_fs_cache: - cache = app.cache.getCache('renders') - self.rendered_segments_repository.fs_cache = cache + cache = app.cache.getCache('renders') + self.rendered_segments_repository.fs_cache = cache class StandardEnvironment(Environment):
--- a/piecrust/page.py Sun Sep 07 21:37:10 2014 -0700 +++ b/piecrust/page.py Sun Sep 07 23:48:57 2014 -0700 @@ -11,7 +11,6 @@ from werkzeug.utils import cached_property from piecrust.configuration import (Configuration, ConfigurationError, parse_config_header) -from piecrust.environment import PHASE_PAGE_PARSING logger = logging.getLogger(__name__) @@ -33,6 +32,10 @@ return values +FLAG_NONE = 0 +FLAG_RAW_CACHE_VALID = 2**0 + + class Page(object): def __init__(self, source, source_metadata, rel_path): self.source = source @@ -40,6 +43,7 @@ self.rel_path = rel_path self._config = None self._raw_content = None + self._flags = FLAG_NONE self._datetime = None @property @@ -59,6 +63,10 @@ return os.path.getmtime(self.path) @property + def flags(self): + return self._flags + + @property def config(self): self._load() return self._config @@ -123,14 +131,12 @@ if self._config is not None: return - eis = self.app.env.exec_info_stack - eis.pushPage(self, PHASE_PAGE_PARSING, None) - try: - config, content = load_page(self.app, self.path, self.path_mtime) - self._config = config - self._raw_content = content - finally: - eis.popPage() + config, content, was_cache_valid = load_page(self.app, self.path, + self.path_mtime) + self._config = config + self._raw_content = content + if was_cache_valid: + self._flags |= FLAG_RAW_CACHE_VALID class PageLoadingError(Exception): @@ -195,26 +201,19 @@ def _do_load_page(app, path, path_mtime): - exec_info = app.env.exec_info_stack.current_page_info - if exec_info is None: - raise Exception("Loading page '%s' but not execution context has " - "been created for it." % path) - # Check the cache first. cache = app.cache.getCache('pages') cache_path = "%s.json" % hashlib.md5(path.encode('utf8')).hexdigest() page_time = path_mtime or os.path.getmtime(path) if cache.isValid(cache_path, page_time): - exec_info.was_cache_valid = True cache_data = json.loads(cache.read(cache_path), object_pairs_hook=collections.OrderedDict) config = PageConfiguration(values=cache_data['config'], validate=False) content = json_load_segments(cache_data['content']) - return config, content + return config, content, True # Nope, load the page from the source file. - exec_info.was_cache_valid = False logger.debug("Loading page configuration from: %s" % path) with codecs.open(path, 'r', 'utf-8') as fp: raw = fp.read() @@ -235,7 +234,7 @@ 'content': json_save_segments(content)} cache.write(cache_path, json.dumps(cache_data)) - return config, content + return config, content, False segment_pattern = re.compile(
--- a/piecrust/rendering.py Sun Sep 07 21:37:10 2014 -0700 +++ b/piecrust/rendering.py Sun Sep 07 23:48:57 2014 -0700 @@ -3,7 +3,6 @@ import logging from piecrust.data.builder import (DataBuildingContext, build_page_data, build_layout_data) -from piecrust.environment import PHASE_PAGE_FORMATTING, PHASE_PAGE_RENDERING from piecrust.sources.base import PageSource from piecrust.uriutil import get_slug @@ -33,6 +32,11 @@ return self.page.app +PASS_NONE = 0 +PASS_FORMATTING = 1 +PASS_RENDERING = 2 + + class PageRenderingContext(object): def __init__(self, page, uri, page_num=1): self.page = page @@ -46,6 +50,7 @@ self.used_pagination = None self.used_source_names = set() self.used_taxonomy_terms = set() + self.current_pass = PASS_NONE @property def app(self): @@ -66,15 +71,16 @@ if self.used_pagination is not None: raise Exception("Pagination has already been used.") self.used_pagination = paginator + self.addUsedSource(paginator._source) def addUsedSource(self, source): if isinstance(source, PageSource): - self.used_source_names.add(source.name) + self.used_source_names.add((source.name, self.current_pass)) def render_page(ctx): eis = ctx.app.env.exec_info_stack - eis.pushPage(ctx.page, PHASE_PAGE_RENDERING, ctx) + eis.pushPage(ctx.page, ctx) try: page = ctx.page @@ -87,6 +93,7 @@ page_data.update(ctx.custom_data) # Render content segments. + ctx.current_pass = PASS_FORMATTING repo = ctx.app.env.rendered_segments_repository if repo: cache_key = '%s:%s' % (ctx.uri, ctx.page_num) @@ -98,6 +105,7 @@ contents = _do_render_page_segments(page, page_data) # Render layout. + ctx.current_pass = PASS_RENDERING layout_name = page.config.get('layout') if layout_name is None: layout_name = page.source.config.get('default_layout', 'default') @@ -114,6 +122,7 @@ rp.execution_info = eis.current_page_info return rp finally: + ctx.current_pass = PASS_NONE eis.popPage() @@ -130,12 +139,14 @@ def _do_render_page_segments_from_ctx(ctx): eis = ctx.app.env.exec_info_stack - eis.pushPage(ctx.page, PHASE_PAGE_FORMATTING, ctx) + eis.pushPage(ctx.page, ctx) + ctx.current_pass = PASS_FORMATTING try: data_ctx = DataBuildingContext(ctx.page, ctx.uri, ctx.page_num) page_data = build_page_data(data_ctx) return _do_render_page_segments(ctx.page, page_data) finally: + ctx.current_pass = PASS_NONE eis.popPage()