changeset 991:1857dbd4580f

bake: Fix bugs introduced by bake optimizations, of course. - Make the execution stats JSON-serializable. - Re-add ability to differentiate between sources used during segment rendering and during layout rendering. Fixes problems with cache invalidation of pages that use other sources. - Make taxonomy-related stuff JSON-serializable.
author Ludovic Chabant <ludovic@chabant.com>
date Mon, 20 Nov 2017 23:06:47 -0800
parents 22cf13b86cc3
children 4f2e0136123d
files piecrust/environment.py piecrust/page.py piecrust/pipelines/_pagerecords.py piecrust/pipelines/page.py piecrust/rendering.py piecrust/sources/taxonomy.py piecrust/templating/jinja/extensions.py piecrust/workerpool.py tests/test_baking_baker.py
diffstat 9 files changed, 176 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/environment.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/piecrust/environment.py	Mon Nov 20 23:06:47 2017 -0800
@@ -60,6 +60,17 @@
             v = self.manifests.setdefault(oc, [])
             self.manifests[oc] = v + ov
 
+    def toData(self):
+        return {
+            'timers': self.timers.copy(),
+            'counters': self.counters.copy(),
+            'manifests': self.manifests.copy()}
+
+    def fromData(self, data):
+        self.timers = data['timers']
+        self.counters = data['counters']
+        self.manifests = data['manifests']
+
 
 class Environment:
     def __init__(self):
--- a/piecrust/page.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/piecrust/page.py	Mon Nov 20 23:06:47 2017 -0800
@@ -106,6 +106,10 @@
     def datetime(self, value):
         self._datetime = value
 
+    @property
+    def was_modified(self):
+        return (self._flags & FLAG_RAW_CACHE_VALID) == 0
+
     def getUri(self, sub_num=1):
         route_params = self.source_metadata['route_params']
         return self.route.getUri(route_params, sub_num=sub_num)
--- a/piecrust/pipelines/_pagerecords.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/piecrust/pipelines/_pagerecords.py	Mon Nov 20 23:06:47 2017 -0800
@@ -89,12 +89,15 @@
             yield from o['errors']
 
     def getAllUsedSourceNames(self):
-        res = set()
+        res_segments = set()
+        res_layout = set()
         for o in self.subs:
             pinfo = o.get('render_info')
             if pinfo:
-                res |= pinfo['used_source_names']
-        return res
+                usn = pinfo['used_source_names']
+                res_segments |= set(usn['segments'])
+                res_layout |= set(usn['layout'])
+        return res_segments, res_layout
 
     def getAllOutputPaths(self):
         for o in self.subs:
--- a/piecrust/pipelines/page.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/piecrust/pipelines/page.py	Mon Nov 20 23:06:47 2017 -0800
@@ -1,3 +1,4 @@
+import copy
 import time
 import logging
 from piecrust.pipelines.base import (
@@ -48,6 +49,8 @@
             cur_entry.route_params = item.metadata['route_params']
             cur_entry.timestamp = page.datetime.timestamp()
 
+            if page.was_modified:
+                cur_entry.flags |= PagePipelineRecordEntry.FLAG_SOURCE_MODIFIED
             if page.config.get(self._draft_setting):
                 cur_entry.flags |= PagePipelineRecordEntry.FLAG_IS_DRAFT
 
@@ -85,8 +88,10 @@
 
             # Skip pages that are known to use other sources... we'll
             # schedule them in the second pass.
-            if prev and prev.getAllUsedSourceNames():
-                continue
+            if prev:
+                usn1, usn2 = prev.getAllUsedSourceNames()
+                if usn1 or usn2:
+                    continue
 
             # Check if this item has been overriden by a previous pipeline
             # run... for instance, we could be the pipeline for a "theme pages"
@@ -94,7 +99,6 @@
             # page that writes out to the same URL.
             uri = uri_getter(cur.route_params)
             path = get_output_path(app, out_dir, uri, pretty_urls)
-
             override = used_paths.get(path)
             if override is not None:
                 override_source_name, override_entry = override
@@ -143,14 +147,25 @@
         history = ctx.record_histories.getHistory(ctx.record_name).copy()
         history.build()
         for prev, cur in history.diffs:
-            if cur and cur.was_any_sub_baked:
+            if not cur:
+                continue
+            if cur.was_any_sub_baked:
                 continue
-            if prev and any(map(
-                    lambda usn: usn in dirty_source_names,
-                    prev.getAllUsedSourceNames())):
-                jobs.append(create_job(self, prev.item_spec,
-                                       pass_num=pass_num,
-                                       force_bake=True))
+            if prev:
+                if any(map(
+                        lambda usn: usn in dirty_source_names,
+                        prev.getAllUsedSourceNames()[0])):
+                    jobs.append(create_job(self, prev.item_spec,
+                                           pass_num=pass_num,
+                                           force_bake=True))
+                else:
+                    # This page uses other sources, but no source was dirty
+                    # this time around (it was a null build, maybe). We
+                    # don't have any work to do, but we need to carry over
+                    # any information we have, otherwise the post bake step
+                    # will think we need to delete last bake's outputs.
+                    cur.subs = copy.deepcopy(prev.subs)
+
         if len(jobs) > 0:
             return jobs
         return None
--- a/piecrust/rendering.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/piecrust/rendering.py	Mon Nov 20 23:06:47 2017 -0800
@@ -52,7 +52,7 @@
         saved to records.
     """
     return {
-        'used_source_names': set(),
+        'used_source_names': {'segments': [], 'layout': []},
         'used_pagination': False,
         'pagination_has_more': False,
         'used_assets': False,
@@ -68,11 +68,27 @@
         self.pagination_filter = None
         self.render_info = create_render_info()
         self.custom_data = {}
+        self._current_used_source_names = None
 
     @property
     def app(self):
         return self.page.app
 
+    @property
+    def current_used_source_names(self):
+        usn = self._current_used_source_names
+        if usn is not None:
+            return usn
+        else:
+            raise Exception("No render pass specified.")
+
+    def setRenderPass(self, name):
+        if name is not None:
+            self._current_used_source_names = \
+                self.render_info['used_source_names'][name]
+        else:
+            self._current_used_source_names = None
+
     def setPagination(self, paginator):
         ri = self.render_info
         if ri.get('used_pagination'):
@@ -83,8 +99,9 @@
         self.addUsedSource(paginator._source)
 
     def addUsedSource(self, source):
-        ri = self.render_info
-        ri['used_source_names'].add(source.name)
+        usn = self.current_used_source_names
+        if source.name not in usn:
+            usn.append(source.name)
 
 
 class RenderingContextStack(object):
@@ -253,6 +270,8 @@
     page = ctx.page
     app = page.app
 
+    ctx.setRenderPass('segments')
+
     engine_name = page.config.get('template_engine')
     format_name = page.config.get('format')
 
@@ -296,6 +315,8 @@
     assert cur_ctx is not None
     assert cur_ctx.page == page
 
+    cur_ctx.setRenderPass('layout')
+
     names = layout_name.split(',')
     full_names = []
     for name in names:
--- a/piecrust/sources/taxonomy.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/piecrust/sources/taxonomy.py	Mon Nov 20 23:06:47 2017 -0800
@@ -175,15 +175,19 @@
             route_val = slugified_values
 
         # We need to register this use of a taxonomy term.
+        # Because the render info gets serialized across bake worker
+        # processes, we can only use basic JSON-able structures, which
+        # excludes `set`... hence the awkward use of `list`.
+        # Also, note that the tuples we're putting in there will be
+        # transformed into lists so we'll have to convert back.
         rcs = self.app.env.render_ctx_stack
         ri = rcs.current_ctx.render_info
         utt = ri.get('used_taxonomy_terms')
         if utt is None:
-            utt = set()
-            utt.add(slugified_values)
-            ri['used_taxonomy_terms'] = utt
+            ri['used_taxonomy_terms'] = [slugified_values]
         else:
-            utt.add(slugified_values)
+            if slugified_values not in utt:
+                utt.append(slugified_values)
 
         # Put the slugified values in the route metadata so they're used to
         # generate the URL.
@@ -481,7 +485,7 @@
         pinfo = o['render_info']
         terms = pinfo.get('used_taxonomy_terms')
         if terms:
-            res |= set(terms)
+            res |= set([tuple(t) for t in terms])
     return res
 
 
--- a/piecrust/templating/jinja/extensions.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/piecrust/templating/jinja/extensions.py	Mon Nov 20 23:06:47 2017 -0800
@@ -146,19 +146,20 @@
         key = self.environment.piecrust_cache_prefix + name
 
         rcs = self.environment.app.env.render_ctx_stack
-        ri = rcs.current_ctx.render_info
+        ctx = rcs.current_ctx
 
         # try to load the block from the cache
         # if there is no fragment in the cache, render it and store
         # it in the cache.
         pair = self.environment.piecrust_cache.get(key)
         if pair is not None:
-            ri['used_source_names'].update(pair[1])
+            for usn in pair[1]:
+                ctx.addUsedSource(usn)
             return pair[0]
 
-        prev_used = ri['used_source_names'].copy()
+        prev_used = set(ctx.current_used_source_names)
         rv = caller()
-        after_used = ri['used_source_names'].copy()
+        after_used = set(ctx.current_used_source_names)
         used_delta = after_used.difference(prev_used)
         self.environment.piecrust_cache[key] = (rv, used_delta)
         return rv
--- a/piecrust/workerpool.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/piecrust/workerpool.py	Mon Nov 20 23:06:47 2017 -0800
@@ -11,7 +11,7 @@
 
 logger = logging.getLogger(__name__)
 
-use_fastqueue = True
+use_fastqueue = False
 use_fastpickle = False
 use_msgpack = False
 use_marshall = False
@@ -202,7 +202,8 @@
             stats.registerTimer('WorkerResultPut', time=time_in_put)
             try:
                 stats.mergeStats(w.getStats())
-                rep = (task_type, wid, [(task_data, (wid, stats), True)])
+                stats_data = stats.toData()
+                rep = (task_type, wid, [(task_data, (wid, stats_data), True)])
             except Exception as e:
                 logger.debug(
                     "Error getting report, sending exception to main process:")
@@ -439,6 +440,7 @@
         self.reports = [None] * worker_count
         self._count = worker_count
         self._received = 0
+        self._lock = threading.Lock()
         self._event = threading.Event()
 
     def wait(self, timeout=None):
@@ -450,11 +452,14 @@
             logger.error("Ignoring report from unknown worker %d." % wid)
             return
 
-        self._received += 1
-        self.reports[wid] = data
+        stats = ExecutionStats()
+        stats.fromData(data)
 
-        if self._received == self._count:
-            self._event.set()
+        with self._lock:
+            self.reports[wid] = stats
+            self._received += 1
+            if self._received == self._count:
+                self._event.set()
 
     def _handleError(self, job, res, _):
         logger.error("Worker %d failed to send its report." % res.wid)
@@ -467,6 +472,7 @@
         self._rlock = multiprocessing.Lock()
         self._wlock = multiprocessing.Lock()
         self._initBuffers()
+        self._initSerializer()
 
     def _initBuffers(self):
         self._rbuf = io.BytesIO()
@@ -474,6 +480,9 @@
         self._wbuf = io.BytesIO()
         self._wbuf.truncate(256)
 
+    def _initSerializer(self):
+        pass
+
     def __getstate__(self):
         return (self._reader, self._writer, self._rlock, self._wlock)
 
@@ -483,6 +492,7 @@
 
     def get(self):
         with self._rlock:
+            self._rbuf.seek(0)
             try:
                 with self._rbuf.getbuffer() as b:
                     bufsize = self._reader.recv_bytes_into(b)
@@ -493,11 +503,11 @@
                 self._rbuf.write(e.args[0])
 
         self._rbuf.seek(0)
-        return _unpickle(self._rbuf, bufsize)
+        return _unpickle(self, self._rbuf, bufsize)
 
     def put(self, obj):
         self._wbuf.seek(0)
-        _pickle(obj, self._wbuf)
+        _pickle(self, obj, self._wbuf)
         size = self._wbuf.tell()
 
         self._wbuf.seek(0)
@@ -506,13 +516,25 @@
                 self._writer.send_bytes(b, 0, size)
 
 
+class _BufferWrapper:
+    def __init__(self, buf, read_size=0):
+        self._buf = buf
+        self._read_size = read_size
+
+    def write(self, data):
+        self._buf.write(data.encode('utf8'))
+
+    def read(self):
+        return self._buf.read(self._read_size).decode('utf8')
+
+
 if use_fastpickle:
     from piecrust import fastpickle
 
-    def _pickle_fast(obj, buf):
+    def _pickle_fast(queue, obj, buf):
         fastpickle.pickle_intob(obj, buf)
 
-    def _unpickle_fast(buf, bufsize):
+    def _unpickle_fast(queue, buf, bufsize):
         return fastpickle.unpickle_fromb(buf, bufsize)
 
     _pickle = _pickle_fast
@@ -521,22 +543,30 @@
 elif use_msgpack:
     import msgpack
 
-    def _pickle_msgpack(obj, buf):
-        msgpack.pack(obj, buf)
+    def _pickle_msgpack(queue, obj, buf):
+        buf.write(queue._packer.pack(obj))
 
-    def _unpickle_msgpack(buf, bufsize):
-        return msgpack.unpack(buf)
+    def _unpickle_msgpack(queue, buf, bufsize):
+        queue._unpacker.feed(buf.getbuffer())
+        for o in queue._unpacker:
+            return o
+        # return msgpack.unpack(buf)
+
+    def _init_msgpack(queue):
+        queue._packer = msgpack.Packer()
+        queue._unpacker = msgpack.Unpacker()
 
     _pickle = _pickle_msgpack
     _unpickle = _unpickle_msgpack
+    FastQueue._initSerializer = _init_msgpack
 
 elif use_marshall:
     import marshal
 
-    def _pickle_marshal(obj, buf):
+    def _pickle_marshal(queue, obj, buf):
         marshal.dump(obj, buf)
 
-    def _unpickle_marshal(buf, bufsize):
+    def _unpickle_marshal(queue, buf, bufsize):
         return marshal.load(buf)
 
     _pickle = _pickle_marshal
@@ -545,22 +575,12 @@
 elif use_json:
     import json
 
-    class _BufferWrapper:
-        def __init__(self, buf):
-            self._buf = buf
-
-        def write(self, data):
-            self._buf.write(data.encode('utf8'))
-
-        def read(self):
-            return self._buf.read().decode('utf8')
-
-    def _pickle_json(obj, buf):
+    def _pickle_json(queue, obj, buf):
         buf = _BufferWrapper(buf)
         json.dump(obj, buf, indent=None, separators=(',', ':'))
 
-    def _unpickle_json(buf, bufsize):
-        buf = _BufferWrapper(buf)
+    def _unpickle_json(queue, buf, bufsize):
+        buf = _BufferWrapper(buf, bufsize)
         return json.load(buf)
 
     _pickle = _pickle_json
@@ -569,10 +589,10 @@
 else:
     import pickle
 
-    def _pickle_default(obj, buf):
+    def _pickle_default(queue, obj, buf):
         pickle.dump(obj, buf, pickle.HIGHEST_PROTOCOL)
 
-    def _unpickle_default(buf, bufsize):
+    def _unpickle_default(queue, buf, bufsize):
         return pickle.load(buf)
 
     _pickle = _pickle_default
--- a/tests/test_baking_baker.py	Sun Nov 19 14:29:52 2017 -0800
+++ b/tests/test_baking_baker.py	Mon Nov 20 23:06:47 2017 -0800
@@ -23,3 +23,44 @@
         structure = fs.getStructure('kitchen/_counter')
         assert structure['index.html'] == 'Second\nFirst\n'
 
+
+def test_bake_four_times():
+    fs = (mock_fs()
+          .withConfig({'site': {
+              'default_format': 'none',
+              'default_page_layout': 'none',
+              'default_post_layout': 'none',
+          }})
+          .withPage('pages/_index.html', {'layout': 'none', 'format': 'none'},
+                    "{% for p in pagination.posts -%}\n"
+                    "{{p.title}}\n"
+                    "{% endfor %}")
+          .withPage('posts/2017-01-01_first.html', {'title': "First"},
+                    "something 1")
+          .withPage('posts/2017-01-02_second.html', {'title': "Second"},
+                    "something 2"))
+    with mock_fs_scope(fs):
+        fs.runChef('bake')
+        structure = fs.getStructure('kitchen/_counter')
+        assert structure['index.html'] == 'Second\nFirst\n'
+        assert structure['2017']['01']['01']['first.html'] == 'something 1'
+        assert structure['2017']['01']['02']['second.html'] == 'something 2'
+
+        fs.runChef('bake')
+        structure = fs.getStructure('kitchen/_counter')
+        assert structure['index.html'] == 'Second\nFirst\n'
+        assert structure['2017']['01']['01']['first.html'] == 'something 1'
+        assert structure['2017']['01']['02']['second.html'] == 'something 2'
+
+        fs.runChef('bake')
+        structure = fs.getStructure('kitchen/_counter')
+        assert structure['index.html'] == 'Second\nFirst\n'
+        assert structure['2017']['01']['01']['first.html'] == 'something 1'
+        assert structure['2017']['01']['02']['second.html'] == 'something 2'
+
+        fs.runChef('bake')
+        structure = fs.getStructure('kitchen/_counter')
+        assert structure['index.html'] == 'Second\nFirst\n'
+        assert structure['2017']['01']['01']['first.html'] == 'something 1'
+        assert structure['2017']['01']['02']['second.html'] == 'something 2'
+