changeset 96:0445a2232de7

Improvements and fixes to incremental baking. * Better handling of the render pass during page rendering. * Used sources are paired with the pass they were used in. * Proper use and invalidation of the rendered segments cache based on render passes. * The `Assetor` is also better tracking what was used in a page. * Add flags on a page to get better caching information for the debug window. * Property invalidation of the previous bake record when needed. * Better information about why pages are delayed.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 07 Sep 2014 23:48:57 -0700
parents cb6eadea0845
children 00a9b24ca944
files piecrust/baking/baker.py piecrust/baking/records.py piecrust/data/assetor.py piecrust/data/debug.py piecrust/environment.py piecrust/page.py piecrust/rendering.py
diffstat 7 files changed, 113 insertions(+), 88 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/baking/baker.py	Sun Sep 07 21:37:10 2014 -0700
+++ b/piecrust/baking/baker.py	Sun Sep 07 23:48:57 2014 -0700
@@ -13,7 +13,8 @@
 from piecrust.data.filters import (PaginationFilter, HasFilterClause,
         IsFilterClause, AndBooleanClause)
 from piecrust.processing.base import ProcessorPipeline
-from piecrust.rendering import PageRenderingContext, render_page
+from piecrust.rendering import (PageRenderingContext, render_page,
+        PASS_FORMATTING, PASS_RENDERING)
 from piecrust.sources.base import (PageFactory,
         REALM_NAMES, REALM_USER, REALM_THEME)
 
@@ -126,6 +127,7 @@
         cur_sub = 1
         has_more_subs = True
         force_this = self.force
+        invalidate_formatting = False
         page = factory.buildPage()
         record_entry.config = page.config.get().copy()
         prev_record_entry = self.record.getPreviousEntry(
@@ -139,21 +141,24 @@
         # reason. If so, we need to bake this one too.
         # (this happens for instance with the main page of a blog).
         if prev_record_entry:
-            any_used_src_baked = False
+            invalidated_render_passes = set()
             used_src_names = list(prev_record_entry.used_source_names)
-            for src_name in used_src_names:
+            for src_name, rdr_pass in used_src_names:
                 entries = self.record.getCurrentEntries(src_name)
                 for e in entries:
                     if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED:
-                        any_used_src_baked = True
+                        invalidated_render_passes.add(rdr_pass)
                         break
-                if any_used_src_baked:
-                    break
-            if any_used_src_baked:
+            if len(invalidated_render_passes) > 0:
                 logger.debug("'%s' is known to use sources %s, at least one "
                              "of which got baked. Will force bake this page. "
                              % (uri, used_src_names))
                 force_this = True
+                if PASS_FORMATTING in invalidated_render_passes:
+                    logger.debug("Will invalidate cached formatting for '%s' "
+                                 "since sources were using during that pass."
+                                 % uri)
+                    invalidate_formatting = True
 
         while has_more_subs:
             sub_uri = self.getOutputUri(uri, cur_sub)
@@ -161,11 +166,11 @@
 
             # Check for up-to-date outputs.
             do_bake = True
-            if not force_this and prev_record_entry:
+            if not force_this:
                 try:
                     in_path_time = record_entry.path_mtime
                     out_path_time = os.path.getmtime(out_path)
-                    if out_path_time > in_path_time and not any_used_src_baked:
+                    if out_path_time > in_path_time:
                         do_bake = False
                 except OSError:
                     # File doesn't exist, we'll need to bake.
@@ -189,6 +194,11 @@
 
             # All good, proceed.
             try:
+                if invalidate_formatting:
+                    cache_key = '%s:%s' % (uri, cur_sub)
+                    self.app.env.rendered_segments_repository.invalidate(
+                            cache_key)
+
                 logger.debug("  p%d -> %s" % (cur_sub, out_path))
                 ctx, rp = self._bakeSingle(page, sub_uri, cur_sub, out_path,
                         pagination_filter, custom_data)
@@ -212,7 +222,7 @@
                 logger.debug("Copying page assets to: %s" % out_assets_dir)
                 if not os.path.isdir(out_assets_dir):
                     os.makedirs(out_assets_dir, 0o755)
-                for ap in ctx.used_assets._getAssetPaths():
+                for ap in ctx.used_assets:
                     dest_ap = os.path.join(out_assets_dir, os.path.basename(ap))
                     logger.debug("  %s -> %s" % (ap, dest_ap))
                     shutil.copy(ap, dest_ap)
@@ -224,11 +234,10 @@
             record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms
 
             has_more_subs = False
-            if ctx.used_pagination is not None:
-                record_entry.addUsedSource(ctx.used_pagination._source)
-                if ctx.used_pagination.has_more:
-                    cur_sub += 1
-                    has_more_subs = True
+            if (ctx.used_pagination is not None and
+                    ctx.used_pagination.has_more):
+                cur_sub += 1
+                has_more_subs = True
 
     def _bakeSingle(self, page, sub_uri, num, out_path,
             pagination_filter=None, custom_data=None):
@@ -370,6 +379,7 @@
                 shutil.rmtree(cache_dir)
             self.force = True
             record.incremental_count = 0
+            record.clearPrevious()
             logger.info(format_timed(start_time,
                 "cleaned cache (reason: %s)" % reason))
         else:
@@ -586,9 +596,15 @@
 
             job = self.jobs.pop(0)
             first_job = job
-            while not self._isJobReady(job):
-                logger.debug("Job '%s:%s' isn't ready yet." % (
-                        job.factory.source.name, job.factory.rel_path))
+            while True:
+                ready, wait_on_src = self._isJobReady(job)
+                if ready:
+                    break
+
+                logger.debug("Job '%s:%s' isn't ready yet: waiting on pages "
+                             "from source '%s' to finish baking." %
+                             (job.factory.source.name,
+                                 job.factory.rel_path, wait_on_src))
                 self.jobs.append(job)
                 job = self.jobs.pop(0)
                 if job == first_job:
@@ -605,16 +621,16 @@
         e = self.record.getPreviousEntry(job.factory.source.name,
                 job.factory.rel_path)
         if not e:
-            return True
-        for sn in e.used_source_names:
+            return (True, None)
+        for sn, rp in e.used_source_names:
             if sn == job.factory.source.name:
                 continue
             if any(filter(lambda j: j.factory.source.name == sn, self.jobs)):
-                return False
+                return (False, sn)
             if any(filter(lambda j: j.factory.source.name == sn,
                     self._active_jobs)):
-                return False
-        return True
+                return (False, sn)
+        return (True, None)
 
 
 class BakeWorkerContext(object):
@@ -653,12 +669,13 @@
 
     def run(self):
         while(not self.ctx.abort_event.is_set()):
-            job = self.ctx.work_queue.getNextJob(wait_timeout=1)
-            if job is None:
-                logger.debug("[%d] No more work... shutting down." % self.wid)
-                break
+            try:
+                job = self.ctx.work_queue.getNextJob(wait_timeout=1)
+                if job is None:
+                    logger.debug("[%d] No more work... shutting down." %
+                            self.wid)
+                    break
 
-            try:
                 self._unsafeRun(job)
                 logger.debug("[%d] Done with page." % self.wid)
                 self.ctx.work_queue.onJobFinished(job)
--- a/piecrust/baking/records.py	Sun Sep 07 21:37:10 2014 -0700
+++ b/piecrust/baking/records.py	Sun Sep 07 23:48:57 2014 -0700
@@ -8,7 +8,7 @@
 logger = logging.getLogger(__name__)
 
 
-RECORD_VERSION = 4
+RECORD_VERSION = 6
 
 
 def _get_transition_key(source_name, rel_path, taxonomy_name=None,
@@ -75,10 +75,6 @@
         return _get_transition_key(self.source_name, self.rel_path,
                 self.taxonomy_name, self.taxonomy_term)
 
-    def addUsedSource(self, source):
-        if isinstance(source, PageSource):
-            self.used_source_names.add(source.name)
-
     def __getstate__(self):
         state = self.__dict__.copy()
         del state['path_mtime']
@@ -110,6 +106,9 @@
         for e in self.previous.entries:
             self.transitions[e.transition_key] = (e, None)
 
+    def clearPrevious(self):
+        self.previous = BakeRecord()
+
     def saveCurrent(self, current_path):
         self.current.save(current_path)
 
--- a/piecrust/data/assetor.py	Sun Sep 07 21:37:10 2014 -0700
+++ b/piecrust/data/assetor.py	Sun Sep 07 23:48:57 2014 -0700
@@ -57,10 +57,6 @@
         self._cacheAssets()
         return map(lambda i: i[0], self._cache.values())
 
-    def _getAssetPaths(self):
-        self._cacheAssets()
-        return map(lambda i: i[1], self._cache.values())
-
     def _debugRenderAssetNames(self):
         self._cacheAssets()
         return list(self._cache.keys())
@@ -91,5 +87,6 @@
 
         cpi = self._page.app.env.exec_info_stack.current_page_info
         if cpi is not None:
-            cpi.render_ctx.used_assets = self
+            used_assets = list(map(lambda i: i[1], self._cache.values()))
+            cpi.render_ctx.used_assets = used_assets
 
--- a/piecrust/data/debug.py	Sun Sep 07 21:37:10 2014 -0700
+++ b/piecrust/data/debug.py	Sun Sep 07 23:48:57 2014 -0700
@@ -4,6 +4,7 @@
 import logging
 import collections
 from piecrust import APP_VERSION, PIECRUST_URL
+from piecrust.page import FLAG_RAW_CACHE_VALID
 
 
 logger = logging.getLogger(__name__)
@@ -60,7 +61,6 @@
 
 def _do_build_debug_info(page, data, output):
     app = page.app
-    exec_info = app.env.exec_info_stack.current_page_info
 
     print('<div id="piecrust-debug-info" style="%s">' % CSS_DEBUGINFO, file=output)
 
@@ -69,22 +69,18 @@
 
     # If we have some execution info in the environment,
     # add more information.
-    if exec_info:
-        if exec_info.was_cache_valid:
-            output.write('baked this morning')
-        else:
-            output.write('baked just now')
+    if page.flags & FLAG_RAW_CACHE_VALID:
+        output.write('baked this morning')
+    else:
+        output.write('baked just now')
 
-        if app.cache.enabled:
-            if app.env.was_cache_cleaned:
-                output.write(', from a brand new cache')
-            else:
-                output.write(', from a valid cache')
+    if app.cache.enabled:
+        if app.env.was_cache_cleaned:
+            output.write(', from a brand new cache')
         else:
-            output.write(', with no cache')
-
+            output.write(', from a valid cache')
     else:
-        output.write('no caching information available')
+        output.write(', with no cache')
 
     output.write(', ')
     if app.env.start_time != 0:
--- a/piecrust/environment.py	Sun Sep 07 21:37:10 2014 -0700
+++ b/piecrust/environment.py	Sun Sep 07 23:48:57 2014 -0700
@@ -25,6 +25,16 @@
         self.cache = repoze.lru.LRUCache(size)
         self.lock = threading.RLock()
         self.fs_cache = None
+        self._invalidated_fs_items = set()
+
+    def invalidate(self, key):
+        with self.lock:
+            logger.debug("Invalidating cache item '%s'." % key)
+            self.cache.invalidate(key)
+            if self.fs_cache:
+                logger.debug("Invalidating FS cache item '%s'." % key)
+                fs_key = _make_fs_cache_key(key)
+                self._invalidated_fs_items.add(fs_key)
 
     def get(self, key, item_maker, fs_cache_time=None):
         item = self.cache.get(key)
@@ -37,7 +47,8 @@
                             fs_cache_time is not None):
                         # Try first from the file-system cache.
                         fs_key = _make_fs_cache_key(key)
-                        if self.fs_cache.isValid(fs_key, fs_cache_time):
+                        if (fs_key not in self._invalidated_fs_items and
+                                self.fs_cache.isValid(fs_key, fs_cache_time)):
                             logger.debug("'%s' found in file-system cache." %
                                          key)
                             item_raw = self.fs_cache.read(fs_key)
@@ -59,15 +70,9 @@
         return item
 
 
-PHASE_PAGE_PARSING = 0
-PHASE_PAGE_FORMATTING = 1
-PHASE_PAGE_RENDERING = 2
-
-
 class ExecutionInfo(object):
-    def __init__(self, page, phase, render_ctx):
+    def __init__(self, page, render_ctx):
         self.page = page
-        self.phase = phase
         self.render_ctx = render_ctx
         self.was_cache_valid = False
         self.start_time = time.clock()
@@ -93,8 +98,11 @@
                 return True
         return False
 
-    def pushPage(self, page, phase, render_ctx):
-        self._page_stack.append(ExecutionInfo(page, phase, render_ctx))
+    def pushPage(self, page, render_ctx):
+        if len(self._page_stack) > 0:
+            top = self._page_stack[-1]
+            assert top.page is not page
+        self._page_stack.append(ExecutionInfo(page, render_ctx))
 
     def popPage(self):
         del self._page_stack[-1]
@@ -108,12 +116,10 @@
         self.page_repository = MemCache()
         self.rendered_segments_repository = MemCache()
         self.base_asset_url_format = '%uri%'
-        self._use_rendered_segments_fs_cache = False
 
     def initialize(self, app):
-        if self._use_rendered_segments_fs_cache:
-            cache = app.cache.getCache('renders')
-            self.rendered_segments_repository.fs_cache = cache
+        cache = app.cache.getCache('renders')
+        self.rendered_segments_repository.fs_cache = cache
 
 
 class StandardEnvironment(Environment):
--- a/piecrust/page.py	Sun Sep 07 21:37:10 2014 -0700
+++ b/piecrust/page.py	Sun Sep 07 23:48:57 2014 -0700
@@ -11,7 +11,6 @@
 from werkzeug.utils import cached_property
 from piecrust.configuration import (Configuration, ConfigurationError,
         parse_config_header)
-from piecrust.environment import PHASE_PAGE_PARSING
 
 
 logger = logging.getLogger(__name__)
@@ -33,6 +32,10 @@
         return values
 
 
+FLAG_NONE = 0
+FLAG_RAW_CACHE_VALID = 2**0
+
+
 class Page(object):
     def __init__(self, source, source_metadata, rel_path):
         self.source = source
@@ -40,6 +43,7 @@
         self.rel_path = rel_path
         self._config = None
         self._raw_content = None
+        self._flags = FLAG_NONE
         self._datetime = None
 
     @property
@@ -59,6 +63,10 @@
         return os.path.getmtime(self.path)
 
     @property
+    def flags(self):
+        return self._flags
+
+    @property
     def config(self):
         self._load()
         return self._config
@@ -123,14 +131,12 @@
         if self._config is not None:
             return
 
-        eis = self.app.env.exec_info_stack
-        eis.pushPage(self, PHASE_PAGE_PARSING, None)
-        try:
-            config, content = load_page(self.app, self.path, self.path_mtime)
-            self._config = config
-            self._raw_content = content
-        finally:
-            eis.popPage()
+        config, content, was_cache_valid = load_page(self.app, self.path,
+                                                     self.path_mtime)
+        self._config = config
+        self._raw_content = content
+        if was_cache_valid:
+            self._flags |= FLAG_RAW_CACHE_VALID
 
 
 class PageLoadingError(Exception):
@@ -195,26 +201,19 @@
 
 
 def _do_load_page(app, path, path_mtime):
-    exec_info = app.env.exec_info_stack.current_page_info
-    if exec_info is None:
-        raise Exception("Loading page '%s' but not execution context has "
-                        "been created for it." % path)
-
     # Check the cache first.
     cache = app.cache.getCache('pages')
     cache_path = "%s.json" % hashlib.md5(path.encode('utf8')).hexdigest()
     page_time = path_mtime or os.path.getmtime(path)
     if cache.isValid(cache_path, page_time):
-        exec_info.was_cache_valid = True
         cache_data = json.loads(cache.read(cache_path),
                 object_pairs_hook=collections.OrderedDict)
         config = PageConfiguration(values=cache_data['config'],
                 validate=False)
         content = json_load_segments(cache_data['content'])
-        return config, content
+        return config, content, True
 
     # Nope, load the page from the source file.
-    exec_info.was_cache_valid = False
     logger.debug("Loading page configuration from: %s" % path)
     with codecs.open(path, 'r', 'utf-8') as fp:
         raw = fp.read()
@@ -235,7 +234,7 @@
             'content': json_save_segments(content)}
     cache.write(cache_path, json.dumps(cache_data))
 
-    return config, content
+    return config, content, False
 
 
 segment_pattern = re.compile(
--- a/piecrust/rendering.py	Sun Sep 07 21:37:10 2014 -0700
+++ b/piecrust/rendering.py	Sun Sep 07 23:48:57 2014 -0700
@@ -3,7 +3,6 @@
 import logging
 from piecrust.data.builder import (DataBuildingContext, build_page_data,
         build_layout_data)
-from piecrust.environment import PHASE_PAGE_FORMATTING, PHASE_PAGE_RENDERING
 from piecrust.sources.base import PageSource
 from piecrust.uriutil import get_slug
 
@@ -33,6 +32,11 @@
         return self.page.app
 
 
+PASS_NONE = 0
+PASS_FORMATTING = 1
+PASS_RENDERING = 2
+
+
 class PageRenderingContext(object):
     def __init__(self, page, uri, page_num=1):
         self.page = page
@@ -46,6 +50,7 @@
         self.used_pagination = None
         self.used_source_names = set()
         self.used_taxonomy_terms = set()
+        self.current_pass = PASS_NONE
 
     @property
     def app(self):
@@ -66,15 +71,16 @@
         if self.used_pagination is not None:
             raise Exception("Pagination has already been used.")
         self.used_pagination = paginator
+        self.addUsedSource(paginator._source)
 
     def addUsedSource(self, source):
         if isinstance(source, PageSource):
-            self.used_source_names.add(source.name)
+            self.used_source_names.add((source.name, self.current_pass))
 
 
 def render_page(ctx):
     eis = ctx.app.env.exec_info_stack
-    eis.pushPage(ctx.page, PHASE_PAGE_RENDERING, ctx)
+    eis.pushPage(ctx.page, ctx)
     try:
         page = ctx.page
 
@@ -87,6 +93,7 @@
             page_data.update(ctx.custom_data)
 
         # Render content segments.
+        ctx.current_pass = PASS_FORMATTING
         repo = ctx.app.env.rendered_segments_repository
         if repo:
             cache_key = '%s:%s' % (ctx.uri, ctx.page_num)
@@ -98,6 +105,7 @@
             contents = _do_render_page_segments(page, page_data)
 
         # Render layout.
+        ctx.current_pass = PASS_RENDERING
         layout_name = page.config.get('layout')
         if layout_name is None:
             layout_name = page.source.config.get('default_layout', 'default')
@@ -114,6 +122,7 @@
         rp.execution_info = eis.current_page_info
         return rp
     finally:
+        ctx.current_pass = PASS_NONE
         eis.popPage()
 
 
@@ -130,12 +139,14 @@
 
 def _do_render_page_segments_from_ctx(ctx):
     eis = ctx.app.env.exec_info_stack
-    eis.pushPage(ctx.page, PHASE_PAGE_FORMATTING, ctx)
+    eis.pushPage(ctx.page, ctx)
+    ctx.current_pass = PASS_FORMATTING
     try:
         data_ctx = DataBuildingContext(ctx.page, ctx.uri, ctx.page_num)
         page_data = build_page_data(data_ctx)
         return _do_render_page_segments(ctx.page, page_data)
     finally:
+        ctx.current_pass = PASS_NONE
         eis.popPage()