Mercurial > piecrust2
changeset 991:1857dbd4580f
bake: Fix bugs introduced by bake optimizations, of course.
- Make the execution stats JSON-serializable.
- Re-add ability to differentiate between sources used during segment rendering
and during layout rendering. Fixes problems with cache invalidation of
pages that use other sources.
- Make taxonomy-related stuff JSON-serializable.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 20 Nov 2017 23:06:47 -0800 |
parents | 22cf13b86cc3 |
children | 4f2e0136123d |
files | piecrust/environment.py piecrust/page.py piecrust/pipelines/_pagerecords.py piecrust/pipelines/page.py piecrust/rendering.py piecrust/sources/taxonomy.py piecrust/templating/jinja/extensions.py piecrust/workerpool.py tests/test_baking_baker.py |
diffstat | 9 files changed, 176 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- a/piecrust/environment.py Sun Nov 19 14:29:52 2017 -0800 +++ b/piecrust/environment.py Mon Nov 20 23:06:47 2017 -0800 @@ -60,6 +60,17 @@ v = self.manifests.setdefault(oc, []) self.manifests[oc] = v + ov + def toData(self): + return { + 'timers': self.timers.copy(), + 'counters': self.counters.copy(), + 'manifests': self.manifests.copy()} + + def fromData(self, data): + self.timers = data['timers'] + self.counters = data['counters'] + self.manifests = data['manifests'] + class Environment: def __init__(self):
--- a/piecrust/page.py Sun Nov 19 14:29:52 2017 -0800 +++ b/piecrust/page.py Mon Nov 20 23:06:47 2017 -0800 @@ -106,6 +106,10 @@ def datetime(self, value): self._datetime = value + @property + def was_modified(self): + return (self._flags & FLAG_RAW_CACHE_VALID) == 0 + def getUri(self, sub_num=1): route_params = self.source_metadata['route_params'] return self.route.getUri(route_params, sub_num=sub_num)
--- a/piecrust/pipelines/_pagerecords.py Sun Nov 19 14:29:52 2017 -0800 +++ b/piecrust/pipelines/_pagerecords.py Mon Nov 20 23:06:47 2017 -0800 @@ -89,12 +89,15 @@ yield from o['errors'] def getAllUsedSourceNames(self): - res = set() + res_segments = set() + res_layout = set() for o in self.subs: pinfo = o.get('render_info') if pinfo: - res |= pinfo['used_source_names'] - return res + usn = pinfo['used_source_names'] + res_segments |= set(usn['segments']) + res_layout |= set(usn['layout']) + return res_segments, res_layout def getAllOutputPaths(self): for o in self.subs:
--- a/piecrust/pipelines/page.py Sun Nov 19 14:29:52 2017 -0800 +++ b/piecrust/pipelines/page.py Mon Nov 20 23:06:47 2017 -0800 @@ -1,3 +1,4 @@ +import copy import time import logging from piecrust.pipelines.base import ( @@ -48,6 +49,8 @@ cur_entry.route_params = item.metadata['route_params'] cur_entry.timestamp = page.datetime.timestamp() + if page.was_modified: + cur_entry.flags |= PagePipelineRecordEntry.FLAG_SOURCE_MODIFIED if page.config.get(self._draft_setting): cur_entry.flags |= PagePipelineRecordEntry.FLAG_IS_DRAFT @@ -85,8 +88,10 @@ # Skip pages that are known to use other sources... we'll # schedule them in the second pass. - if prev and prev.getAllUsedSourceNames(): - continue + if prev: + usn1, usn2 = prev.getAllUsedSourceNames() + if usn1 or usn2: + continue # Check if this item has been overriden by a previous pipeline # run... for instance, we could be the pipeline for a "theme pages" @@ -94,7 +99,6 @@ # page that writes out to the same URL. uri = uri_getter(cur.route_params) path = get_output_path(app, out_dir, uri, pretty_urls) - override = used_paths.get(path) if override is not None: override_source_name, override_entry = override @@ -143,14 +147,25 @@ history = ctx.record_histories.getHistory(ctx.record_name).copy() history.build() for prev, cur in history.diffs: - if cur and cur.was_any_sub_baked: + if not cur: + continue + if cur.was_any_sub_baked: continue - if prev and any(map( - lambda usn: usn in dirty_source_names, - prev.getAllUsedSourceNames())): - jobs.append(create_job(self, prev.item_spec, - pass_num=pass_num, - force_bake=True)) + if prev: + if any(map( + lambda usn: usn in dirty_source_names, + prev.getAllUsedSourceNames()[0])): + jobs.append(create_job(self, prev.item_spec, + pass_num=pass_num, + force_bake=True)) + else: + # This page uses other sources, but no source was dirty + # this time around (it was a null build, maybe). We + # don't have any work to do, but we need to carry over + # any information we have, otherwise the post bake step + # will think we need to delete last bake's outputs. + cur.subs = copy.deepcopy(prev.subs) + if len(jobs) > 0: return jobs return None
--- a/piecrust/rendering.py Sun Nov 19 14:29:52 2017 -0800 +++ b/piecrust/rendering.py Mon Nov 20 23:06:47 2017 -0800 @@ -52,7 +52,7 @@ saved to records. """ return { - 'used_source_names': set(), + 'used_source_names': {'segments': [], 'layout': []}, 'used_pagination': False, 'pagination_has_more': False, 'used_assets': False, @@ -68,11 +68,27 @@ self.pagination_filter = None self.render_info = create_render_info() self.custom_data = {} + self._current_used_source_names = None @property def app(self): return self.page.app + @property + def current_used_source_names(self): + usn = self._current_used_source_names + if usn is not None: + return usn + else: + raise Exception("No render pass specified.") + + def setRenderPass(self, name): + if name is not None: + self._current_used_source_names = \ + self.render_info['used_source_names'][name] + else: + self._current_used_source_names = None + def setPagination(self, paginator): ri = self.render_info if ri.get('used_pagination'): @@ -83,8 +99,9 @@ self.addUsedSource(paginator._source) def addUsedSource(self, source): - ri = self.render_info - ri['used_source_names'].add(source.name) + usn = self.current_used_source_names + if source.name not in usn: + usn.append(source.name) class RenderingContextStack(object): @@ -253,6 +270,8 @@ page = ctx.page app = page.app + ctx.setRenderPass('segments') + engine_name = page.config.get('template_engine') format_name = page.config.get('format') @@ -296,6 +315,8 @@ assert cur_ctx is not None assert cur_ctx.page == page + cur_ctx.setRenderPass('layout') + names = layout_name.split(',') full_names = [] for name in names:
--- a/piecrust/sources/taxonomy.py Sun Nov 19 14:29:52 2017 -0800 +++ b/piecrust/sources/taxonomy.py Mon Nov 20 23:06:47 2017 -0800 @@ -175,15 +175,19 @@ route_val = slugified_values # We need to register this use of a taxonomy term. + # Because the render info gets serialized across bake worker + # processes, we can only use basic JSON-able structures, which + # excludes `set`... hence the awkward use of `list`. + # Also, note that the tuples we're putting in there will be + # transformed into lists so we'll have to convert back. rcs = self.app.env.render_ctx_stack ri = rcs.current_ctx.render_info utt = ri.get('used_taxonomy_terms') if utt is None: - utt = set() - utt.add(slugified_values) - ri['used_taxonomy_terms'] = utt + ri['used_taxonomy_terms'] = [slugified_values] else: - utt.add(slugified_values) + if slugified_values not in utt: + utt.append(slugified_values) # Put the slugified values in the route metadata so they're used to # generate the URL. @@ -481,7 +485,7 @@ pinfo = o['render_info'] terms = pinfo.get('used_taxonomy_terms') if terms: - res |= set(terms) + res |= set([tuple(t) for t in terms]) return res
--- a/piecrust/templating/jinja/extensions.py Sun Nov 19 14:29:52 2017 -0800 +++ b/piecrust/templating/jinja/extensions.py Mon Nov 20 23:06:47 2017 -0800 @@ -146,19 +146,20 @@ key = self.environment.piecrust_cache_prefix + name rcs = self.environment.app.env.render_ctx_stack - ri = rcs.current_ctx.render_info + ctx = rcs.current_ctx # try to load the block from the cache # if there is no fragment in the cache, render it and store # it in the cache. pair = self.environment.piecrust_cache.get(key) if pair is not None: - ri['used_source_names'].update(pair[1]) + for usn in pair[1]: + ctx.addUsedSource(usn) return pair[0] - prev_used = ri['used_source_names'].copy() + prev_used = set(ctx.current_used_source_names) rv = caller() - after_used = ri['used_source_names'].copy() + after_used = set(ctx.current_used_source_names) used_delta = after_used.difference(prev_used) self.environment.piecrust_cache[key] = (rv, used_delta) return rv
--- a/piecrust/workerpool.py Sun Nov 19 14:29:52 2017 -0800 +++ b/piecrust/workerpool.py Mon Nov 20 23:06:47 2017 -0800 @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) -use_fastqueue = True +use_fastqueue = False use_fastpickle = False use_msgpack = False use_marshall = False @@ -202,7 +202,8 @@ stats.registerTimer('WorkerResultPut', time=time_in_put) try: stats.mergeStats(w.getStats()) - rep = (task_type, wid, [(task_data, (wid, stats), True)]) + stats_data = stats.toData() + rep = (task_type, wid, [(task_data, (wid, stats_data), True)]) except Exception as e: logger.debug( "Error getting report, sending exception to main process:") @@ -439,6 +440,7 @@ self.reports = [None] * worker_count self._count = worker_count self._received = 0 + self._lock = threading.Lock() self._event = threading.Event() def wait(self, timeout=None): @@ -450,11 +452,14 @@ logger.error("Ignoring report from unknown worker %d." % wid) return - self._received += 1 - self.reports[wid] = data + stats = ExecutionStats() + stats.fromData(data) - if self._received == self._count: - self._event.set() + with self._lock: + self.reports[wid] = stats + self._received += 1 + if self._received == self._count: + self._event.set() def _handleError(self, job, res, _): logger.error("Worker %d failed to send its report." % res.wid) @@ -467,6 +472,7 @@ self._rlock = multiprocessing.Lock() self._wlock = multiprocessing.Lock() self._initBuffers() + self._initSerializer() def _initBuffers(self): self._rbuf = io.BytesIO() @@ -474,6 +480,9 @@ self._wbuf = io.BytesIO() self._wbuf.truncate(256) + def _initSerializer(self): + pass + def __getstate__(self): return (self._reader, self._writer, self._rlock, self._wlock) @@ -483,6 +492,7 @@ def get(self): with self._rlock: + self._rbuf.seek(0) try: with self._rbuf.getbuffer() as b: bufsize = self._reader.recv_bytes_into(b) @@ -493,11 +503,11 @@ self._rbuf.write(e.args[0]) self._rbuf.seek(0) - return _unpickle(self._rbuf, bufsize) + return _unpickle(self, self._rbuf, bufsize) def put(self, obj): self._wbuf.seek(0) - _pickle(obj, self._wbuf) + _pickle(self, obj, self._wbuf) size = self._wbuf.tell() self._wbuf.seek(0) @@ -506,13 +516,25 @@ self._writer.send_bytes(b, 0, size) +class _BufferWrapper: + def __init__(self, buf, read_size=0): + self._buf = buf + self._read_size = read_size + + def write(self, data): + self._buf.write(data.encode('utf8')) + + def read(self): + return self._buf.read(self._read_size).decode('utf8') + + if use_fastpickle: from piecrust import fastpickle - def _pickle_fast(obj, buf): + def _pickle_fast(queue, obj, buf): fastpickle.pickle_intob(obj, buf) - def _unpickle_fast(buf, bufsize): + def _unpickle_fast(queue, buf, bufsize): return fastpickle.unpickle_fromb(buf, bufsize) _pickle = _pickle_fast @@ -521,22 +543,30 @@ elif use_msgpack: import msgpack - def _pickle_msgpack(obj, buf): - msgpack.pack(obj, buf) + def _pickle_msgpack(queue, obj, buf): + buf.write(queue._packer.pack(obj)) - def _unpickle_msgpack(buf, bufsize): - return msgpack.unpack(buf) + def _unpickle_msgpack(queue, buf, bufsize): + queue._unpacker.feed(buf.getbuffer()) + for o in queue._unpacker: + return o + # return msgpack.unpack(buf) + + def _init_msgpack(queue): + queue._packer = msgpack.Packer() + queue._unpacker = msgpack.Unpacker() _pickle = _pickle_msgpack _unpickle = _unpickle_msgpack + FastQueue._initSerializer = _init_msgpack elif use_marshall: import marshal - def _pickle_marshal(obj, buf): + def _pickle_marshal(queue, obj, buf): marshal.dump(obj, buf) - def _unpickle_marshal(buf, bufsize): + def _unpickle_marshal(queue, buf, bufsize): return marshal.load(buf) _pickle = _pickle_marshal @@ -545,22 +575,12 @@ elif use_json: import json - class _BufferWrapper: - def __init__(self, buf): - self._buf = buf - - def write(self, data): - self._buf.write(data.encode('utf8')) - - def read(self): - return self._buf.read().decode('utf8') - - def _pickle_json(obj, buf): + def _pickle_json(queue, obj, buf): buf = _BufferWrapper(buf) json.dump(obj, buf, indent=None, separators=(',', ':')) - def _unpickle_json(buf, bufsize): - buf = _BufferWrapper(buf) + def _unpickle_json(queue, buf, bufsize): + buf = _BufferWrapper(buf, bufsize) return json.load(buf) _pickle = _pickle_json @@ -569,10 +589,10 @@ else: import pickle - def _pickle_default(obj, buf): + def _pickle_default(queue, obj, buf): pickle.dump(obj, buf, pickle.HIGHEST_PROTOCOL) - def _unpickle_default(buf, bufsize): + def _unpickle_default(queue, buf, bufsize): return pickle.load(buf) _pickle = _pickle_default
--- a/tests/test_baking_baker.py Sun Nov 19 14:29:52 2017 -0800 +++ b/tests/test_baking_baker.py Mon Nov 20 23:06:47 2017 -0800 @@ -23,3 +23,44 @@ structure = fs.getStructure('kitchen/_counter') assert structure['index.html'] == 'Second\nFirst\n' + +def test_bake_four_times(): + fs = (mock_fs() + .withConfig({'site': { + 'default_format': 'none', + 'default_page_layout': 'none', + 'default_post_layout': 'none', + }}) + .withPage('pages/_index.html', {'layout': 'none', 'format': 'none'}, + "{% for p in pagination.posts -%}\n" + "{{p.title}}\n" + "{% endfor %}") + .withPage('posts/2017-01-01_first.html', {'title': "First"}, + "something 1") + .withPage('posts/2017-01-02_second.html', {'title': "Second"}, + "something 2")) + with mock_fs_scope(fs): + fs.runChef('bake') + structure = fs.getStructure('kitchen/_counter') + assert structure['index.html'] == 'Second\nFirst\n' + assert structure['2017']['01']['01']['first.html'] == 'something 1' + assert structure['2017']['01']['02']['second.html'] == 'something 2' + + fs.runChef('bake') + structure = fs.getStructure('kitchen/_counter') + assert structure['index.html'] == 'Second\nFirst\n' + assert structure['2017']['01']['01']['first.html'] == 'something 1' + assert structure['2017']['01']['02']['second.html'] == 'something 2' + + fs.runChef('bake') + structure = fs.getStructure('kitchen/_counter') + assert structure['index.html'] == 'Second\nFirst\n' + assert structure['2017']['01']['01']['first.html'] == 'something 1' + assert structure['2017']['01']['02']['second.html'] == 'something 2' + + fs.runChef('bake') + structure = fs.getStructure('kitchen/_counter') + assert structure['index.html'] == 'Second\nFirst\n' + assert structure['2017']['01']['01']['first.html'] == 'something 1' + assert structure['2017']['01']['02']['second.html'] == 'something 2' +