Mercurial > piecrust2
changeset 871:504ddb370df8
refactor: Fixing some issues with baking assets.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 13 Jun 2017 22:30:27 -0700 |
parents | 48d25fd68b8d |
children | 3004ab31bb46 |
files | piecrust/appconfigdefaults.py piecrust/baking/baker.py piecrust/baking/worker.py piecrust/data/assetor.py piecrust/pipelines/_pagebaker.py piecrust/pipelines/_pagerecords.py piecrust/pipelines/page.py piecrust/sources/base.py piecrust/sources/blogarchives.py piecrust/sources/taxonomy.py |
diffstat | 10 files changed, 151 insertions(+), 142 deletions(-) [+] |
line wrap: on
line diff
--- a/piecrust/appconfigdefaults.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/appconfigdefaults.py Tue Jun 13 22:30:27 2017 -0700 @@ -24,7 +24,6 @@ 'default_auto_format': 'md', 'default_pagination_source': None, 'pagination_suffix': '/%num%', - 'asset_url_format': '%uri%', 'slugify_mode': 'encode', 'themes_sources': [DEFAULT_THEME_SOURCE], 'use_default_content': True,
--- a/piecrust/baking/baker.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/baking/baker.py Tue Jun 13 22:30:27 2017 -0700 @@ -11,7 +11,7 @@ from piecrust.pipelines.records import ( MultiRecordHistory, MultiRecord, RecordEntry, load_records) -from piecrust.sources.base import REALM_USER, REALM_THEME +from piecrust.sources.base import REALM_USER, REALM_THEME, REALM_NAMES logger = logging.getLogger(__name__) @@ -42,7 +42,7 @@ # Get into bake mode. self.app.config.set('baker/is_baking', True) - self.app.config.set('site/base_asset_url_format', '%uri') + self.app.config.set('site/asset_url_format', '%page_uri%/%filename%') # Make sure the output directory exists. if not os.path.isdir(self.out_dir): @@ -119,13 +119,14 @@ ppinfo.pipeline.source.config['realm'], []) pplist.append(ppinfo) - for pp_pass in sorted(pp_by_pass_and_realm.keys()): - logger.debug("Pipelines pass %d" % pp_pass) - pp_by_realm = pp_by_pass_and_realm[pp_pass] + for pp_pass_num in sorted(pp_by_pass_and_realm.keys()): + logger.debug("Pipelines pass %d" % pp_pass_num) + pp_by_realm = pp_by_pass_and_realm[pp_pass_num] for realm in realm_list: pplist = pp_by_realm.get(realm) if pplist is not None: - self._bakeRealm(pool, pp_pass, record_histories, pplist) + self._bakeRealm( + pool, record_histories, pp_pass_num, realm, pplist) # Handle deletions, collapse records, etc. ppmngr.postJobRun() @@ -213,59 +214,75 @@ start_time, "cache is assumed valid", colored=False)) return True - def _bakeRealm(self, pool, pppass, record_histories, pplist): + def _bakeRealm(self, pool, record_histories, pp_pass_num, realm, pplist): # Start with the first pass, where we iterate on the content sources' # items and run jobs on those. pool.userdata.cur_pass = 0 next_pass_jobs = {} pool.userdata.next_pass_jobs = next_pass_jobs - queued_any_job = False + + start_time = time.perf_counter() + job_count = 0 + realm_name = REALM_NAMES[realm].lower() + for ppinfo in pplist: src = ppinfo.source pp = ppinfo.pipeline logger.debug( - "Queuing jobs for source '%s' using pipeline '%s' (pass 0)." % - (src.name, pp.PIPELINE_NAME)) + "Queuing jobs for source '%s' using pipeline '%s' " + "(%s, pass 0)." % + (src.name, pp.PIPELINE_NAME, realm_name)) next_pass_jobs[src.name] = [] - jcctx = PipelineJobCreateContext(pppass, record_histories) + jcctx = PipelineJobCreateContext(pp_pass_num, record_histories) jobs = pp.createJobs(jcctx) if jobs is not None: + job_count += len(jobs) pool.queueJobs(jobs) - queued_any_job = True - if not queued_any_job: + if job_count == 0: logger.debug("No jobs queued! Bailing out of this bake pass.") return pool.wait() + logger.info(format_timed( + start_time, "%d pipeline jobs completed (%s, pass 0)." % + (job_count, realm_name))) + # Now let's see if any job created a follow-up job. Let's keep # processing those jobs as long as they create new ones. pool.userdata.cur_pass = 1 while True: - had_any_job = False - # Make a copy of out next pass jobs and reset the list, so # the first jobs to be processed don't mess it up as we're # still iterating on it. next_pass_jobs = pool.userdata.next_pass_jobs pool.userdata.next_pass_jobs = {} + start_time = time.perf_counter() + job_count = 0 + for sn, jobs in next_pass_jobs.items(): if jobs: logger.debug( - "Queuing jobs for source '%s' (pass %d)." % - (sn, pool.userdata.cur_pass)) + "Queuing jobs for source '%s' (%s, pass %d)." % + (sn, realm_name, pool.userdata.cur_pass)) + job_count += len(jobs) pool.userdata.next_pass_jobs[sn] = [] pool.queueJobs(jobs) - had_any_job = True - if not had_any_job: + if job_count == 0: break pool.wait() + + logger.info(format_timed( + start_time, + "%d pipeline jobs completed (%s, pass %d)." % + (job_count, realm_name, pool.userdata.cur_pass))) + pool.userdata.cur_pass += 1 def _logErrors(self, item_spec, errors):
--- a/piecrust/baking/worker.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/baking/worker.py Tue Jun 13 22:30:27 2017 -0700 @@ -37,7 +37,7 @@ app = self.ctx.appfactory.create() app.config.set('baker/is_baking', True) app.config.set('baker/worker_id', self.wid) - app.config.set('site/base_asset_url_format', '%uri') + app.config.set('site/asset_url_format', '%page_uri%/%filename%') app.env.fs_cache_only_for_main_page = True
--- a/piecrust/data/assetor.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/data/assetor.py Tue Jun 13 22:30:27 2017 -0700 @@ -1,9 +1,7 @@ import os import os.path -import shutil import logging import collections.abc -from piecrust import ASSET_DIR_SUFFIX from piecrust.sources.base import REL_ASSETS from piecrust.uriutil import multi_replace @@ -47,6 +45,10 @@ self._cacheAssets() return len(self._cache_list) + def _getAssetItems(self): + self._cacheAssets() + return map(lambda i: i.content_item, self._cache_map.values()) + def _debugRenderAssetNames(self): self._cacheAssets() return list(self._cache_map.keys()) @@ -89,9 +91,9 @@ (name, content_item.spec)) # TODO: this assumes a file-system source! - uri_build_tokens['%path%'] = ( - os.path.relpath(a.spec, root_dir).replace('\\', '/')) - uri_build_tokens['%filename%'] = a.metadata['filename'], + uri_build_tokens['%path%'] = \ + os.path.relpath(a.spec, root_dir).replace('\\', '/') + uri_build_tokens['%filename%'] = a.metadata['filename'] uri = multi_replace(asset_url_format, uri_build_tokens) self._cache_map[name] = _AssetInfo(a, uri) @@ -102,13 +104,3 @@ if cur_ctx is not None: cur_ctx.current_pass_info.used_assets = True - def copyAssets(self, dest_dir): - page_pathname, _ = os.path.splitext(self._page.path) - in_assets_dir = page_pathname + ASSET_DIR_SUFFIX - for fn in os.listdir(in_assets_dir): - full_fn = os.path.join(in_assets_dir, fn) - if os.path.isfile(full_fn): - dest_ap = os.path.join(dest_dir, fn) - logger.debug(" %s -> %s" % (full_fn, dest_ap)) - shutil.copy(full_fn, dest_ap) -
--- a/piecrust/pipelines/_pagebaker.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/pipelines/_pagebaker.py Tue Jun 13 22:30:27 2017 -0700 @@ -1,10 +1,11 @@ import os.path import queue +import shutil import logging import threading import urllib.parse from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry -from piecrust.rendering import RenderingContext, render_page, PASS_FORMATTING +from piecrust.rendering import RenderingContext, render_page from piecrust.uriutil import split_uri @@ -16,16 +17,17 @@ class PageBaker(object): - def __init__(self, app, out_dir, force=False, copy_assets=True): + def __init__(self, app, out_dir, force=False): self.app = app self.out_dir = out_dir self.force = force - self.copy_assets = copy_assets self.site_root = app.config.get('site/root') self.pretty_urls = app.config.get('site/pretty_urls') + self._do_write = self._writeDirect self._writer_queue = None self._writer = None self._stats = app.env.stats + self._rsr = app.env.rendered_segments_repository def startWriterQueue(self): self._writer_queue = queue.Queue() @@ -34,11 +36,19 @@ target=_text_writer, args=(self._writer_queue,)) self._writer.start() + self._do_write = self._sendToWriterQueue def stopWriterQueue(self): self._writer_queue.put_nowait(None) self._writer.join() + def _sendToWriterQueue(self, out_path, content): + self._writer_queue.put_nowait((out_path, content)) + + def _writeDirect(self, out_path, content): + with open(out_path, 'w', encoding='utf8') as fp: + fp.write(content) + def getOutputPath(self, uri, pretty_urls): uri_root, uri_path = split_uri(self.app, uri) @@ -54,12 +64,12 @@ return os.path.normpath(os.path.join(*bake_path)) - def bake(self, page, prev_entry, cur_entry, dirty_source_names): - # Start baking the sub-pages. + def bake(self, page, prev_entry, cur_entry): cur_sub = 1 has_more_subs = True pretty_urls = page.config.get('pretty_urls', self.pretty_urls) + # Start baking the sub-pages. while has_more_subs: sub_uri = page.getUri(sub_num=cur_sub) logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) @@ -67,8 +77,8 @@ out_path = self.getOutputPath(sub_uri, pretty_urls) # Create the sub-entry for the bake record. - sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) - cur_entry.subs.append(sub_entry) + cur_sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) + cur_entry.subs.append(cur_sub_entry) # Find a corresponding sub-entry in the previous bake record. prev_sub_entry = None @@ -78,28 +88,15 @@ except IndexError: pass - # Figure out if we need to invalidate or force anything. - force_this_sub, invalidate_formatting = _compute_force_flags( - prev_sub_entry, sub_entry, dirty_source_names) - force_this_sub = force_this_sub or self.force - - # Check for up-to-date outputs. - do_bake = True - if not force_this_sub: - try: - in_path_time = page.content_mtime - out_path_time = os.path.getmtime(out_path) - if out_path_time >= in_path_time: - do_bake = False - except OSError: - # File doesn't exist, we'll need to bake. - pass + # Figure out if we need to bake this page. + bake_status = _get_bake_status(page, out_path, self.force, + prev_sub_entry, cur_sub_entry) # If this page didn't bake because it's already up-to-date. # Keep trying for as many subs as we know this page has. - if not do_bake: - sub_entry.render_info = prev_sub_entry.copyRenderInfo() - sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE + if bake_status == STATUS_CLEAN: + cur_sub_entry.render_info = prev_sub_entry.copyRenderInfo() + cur_sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE if prev_entry.num_subs >= cur_sub + 1: cur_sub += 1 @@ -113,11 +110,10 @@ # All good, proceed. try: - if invalidate_formatting: + if bake_status == STATUS_INVALIDATE_AND_BAKE: cache_key = sub_uri - self.app.env.rendered_segments_repository.invalidate( - cache_key) - sub_entry.flags |= \ + self._rsr.invalidate(cache_key) + cur_sub_entry.flags |= \ SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED logger.debug(" p%d -> %s" % (cur_sub, out_path)) @@ -128,12 +124,12 @@ (page.content_spec, sub_uri)) from ex # Record what we did. - sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED - sub_entry.render_info = rp.copyRenderInfo() + cur_sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED + cur_sub_entry.render_info = rp.copyRenderInfo() # Copy page assets. - if (cur_sub == 1 and self.copy_assets and - sub_entry.anyPass(lambda p: p.used_assets)): + if (cur_sub == 1 and + cur_sub_entry.anyPass(lambda p: p.used_assets)): if pretty_urls: out_assets_dir = os.path.dirname(out_path) else: @@ -145,11 +141,17 @@ logger.debug("Copying page assets to: %s" % out_assets_dir) _ensure_dir_exists(out_assets_dir) - # TODO: copy assets to out dir + assetor = rp.data.get('assets') + if assetor is not None: + for i in assetor._getAssetItems(): + fn = os.path.basename(i.spec) + out_asset_path = os.path.join(out_assets_dir, fn) + logger.debug(" %s -> %s" % (i.spec, out_asset_path)) + shutil.copy(i.spec, out_asset_path) # Figure out if we have more work. has_more_subs = False - if sub_entry.anyPass(lambda p: p.pagination_has_more): + if cur_sub_entry.anyPass(lambda p: p.pagination_has_more): cur_sub += 1 has_more_subs = True @@ -161,11 +163,7 @@ rp = render_page(ctx) with self._stats.timerScope("PageSerialize"): - if self._writer_queue is not None: - self._writer_queue.put_nowait((out_path, rp.content)) - else: - with open(out_path, 'w', encoding='utf8') as fp: - fp.write(rp.content) + self._do_write(out_path, rp.content) return rp @@ -188,68 +186,49 @@ break -def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names): - # Figure out what to do with this page. - force_this_sub = False - invalidate_formatting = False - sub_uri = sub_entry.out_uri - if (prev_sub_entry and - (prev_sub_entry.was_baked_successfully or - prev_sub_entry.was_clean)): - # If the current page is known to use pages from other sources, - # see if any of those got baked, or are going to be baked for - # some reason. If so, we need to bake this one too. - # (this happens for instance with the main page of a blog). - dirty_for_this, invalidated_render_passes = ( - _get_dirty_source_names_and_render_passes( - prev_sub_entry, dirty_source_names)) - if len(invalidated_render_passes) > 0: - logger.debug( - "'%s' is known to use sources %s, which have " - "items that got (re)baked. Will force bake this " - "page. " % (sub_uri, dirty_for_this)) - sub_entry.flags |= \ - SubPagePipelineRecordEntry.FLAG_FORCED_BY_SOURCE - force_this_sub = True - - if PASS_FORMATTING in invalidated_render_passes: - logger.debug( - "Will invalidate cached formatting for '%s' " - "since sources were using during that pass." - % sub_uri) - invalidate_formatting = True - elif (prev_sub_entry and - prev_sub_entry.errors): - # Previous bake failed. We'll have to bake it again. - logger.debug( - "Previous record entry indicates baking failed for " - "'%s'. Will bake it again." % sub_uri) - sub_entry.flags |= \ - SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS - force_this_sub = True - elif not prev_sub_entry: - # No previous record. We'll have to bake it. - logger.debug("No previous record entry found for '%s'. Will " - "force bake it." % sub_uri) - sub_entry.flags |= \ - SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS - force_this_sub = True - - return force_this_sub, invalidate_formatting +STATUS_CLEAN = 0 +STATUS_BAKE = 1 +STATUS_INVALIDATE_AND_BAKE = 2 -def _get_dirty_source_names_and_render_passes(sub_entry, dirty_source_names): - dirty_for_this = set() - invalidated_render_passes = set() - for p, pinfo in enumerate(sub_entry.render_info): - if pinfo: - for src_name in pinfo.used_source_names: - is_dirty = (src_name in dirty_source_names) - if is_dirty: - invalidated_render_passes.add(p) - dirty_for_this.add(src_name) - break - return dirty_for_this, invalidated_render_passes +def _get_bake_status(page, out_path, force, prev_sub_entry, cur_sub_entry): + # Figure out if we need to invalidate or force anything. + status = _compute_force_flags(prev_sub_entry, cur_sub_entry) + if status != STATUS_CLEAN: + return status + + # Easy test. + if force: + return STATUS_BAKE + + # Check for up-to-date outputs. + in_path_time = page.content_mtime + try: + out_path_time = os.path.getmtime(out_path) + except OSError: + # File doesn't exist, we'll need to bake. + return STATUS_BAKE + + if out_path_time <= in_path_time: + return STATUS_BAKE + + # Nope, all good. + return STATUS_CLEAN + + +def _compute_force_flags(prev_sub_entry, cur_sub_entry): + if prev_sub_entry and prev_sub_entry.errors: + # Previous bake failed. We'll have to bake it again. + cur_sub_entry.flags |= \ + SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS + return STATUS_BAKE + + if not prev_sub_entry: + cur_sub_entry.flags |= \ + SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS + return STATUS_BAKE + + return STATUS_CLEAN def _ensure_dir_exists(path):
--- a/piecrust/pipelines/_pagerecords.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/pipelines/_pagerecords.py Tue Jun 13 22:30:27 2017 -0700 @@ -50,6 +50,7 @@ super().__init__() self.flags = self.FLAG_NONE self.config = None + self.timestamp = None self.subs = [] @property @@ -108,7 +109,11 @@ 'URI': sub.out_uri, 'Path': sub.out_path, 'Flags': get_flag_descriptions( - sub.flags, sub_flag_descriptions) + sub.flags, sub_flag_descriptions), + 'RenderInfo': [ + _describe_render_info(sub.render_info[0]), + _describe_render_info(sub.render_info[1]) + ] } return d @@ -129,3 +134,12 @@ SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED: 'formatting invalidated' } + + +def _describe_render_info(ri): + return { + 'UsedPagination': ri.used_pagination, + 'PaginationHasMore': ri.pagination_has_more, + 'UsedAssets': ri.used_assets, + 'UsedSourceNames': ri.used_source_names + }
--- a/piecrust/pipelines/page.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/pipelines/page.py Tue Jun 13 22:30:27 2017 -0700 @@ -70,6 +70,7 @@ page = self.app.getPage(self.source, content_item) record_entry.config = page.config.getAll() + record_entry.timestamp = page.datetime.timestamp() rdrctx = RenderingContext(page) self.app.env.abort_source_use = True @@ -93,4 +94,4 @@ page = self.app.getPage(self.source, content_item) prev_entry = ctx.previous_entry cur_entry = result.record_entry - self._pagebaker.bake(page, prev_entry, cur_entry, []) + self._pagebaker.bake(page, prev_entry, cur_entry)
--- a/piecrust/sources/base.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/sources/base.py Tue Jun 13 22:30:27 2017 -0700 @@ -83,6 +83,7 @@ self.app = app self.name = name self.config = config or {} + self._cache = None @property def is_theme_source(self): @@ -99,6 +100,10 @@ raise NotImplementedError() def getAllContents(self): + if self._cache is not None: + return self._cache + + cache = [] stack = collections.deque() stack.append(None) while len(stack) > 0: @@ -112,7 +117,9 @@ if c.is_group: stack.append(c) else: - yield c + cache.append(c) + self._cache = cache + return cache def getContents(self, group): raise NotImplementedError("'%s' doesn't implement 'getContents'." %
--- a/piecrust/sources/blogarchives.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/sources/blogarchives.py Tue Jun 13 22:30:27 2017 -0700 @@ -188,7 +188,7 @@ prev_entry = ctx.previous_entry cur_entry = result.record_entry cur_entry.year = job.content_item.metadata['route_params']['year'] - self._pagebaker.bake(page, prev_entry, cur_entry, []) + self._pagebaker.bake(page, prev_entry, cur_entry) def postJobRun(self, ctx): # Create bake entries for the years that were *not* dirty.
--- a/piecrust/sources/taxonomy.py Mon Jun 12 22:30:06 2017 -0700 +++ b/piecrust/sources/taxonomy.py Tue Jun 13 22:30:27 2017 -0700 @@ -294,7 +294,7 @@ prev_entry = ctx.previous_entry cur_entry = result.record_entry cur_entry.term = content_item.metadata['term'] - self._pagebaker.bake(page, prev_entry, cur_entry, []) + self._pagebaker.bake(page, prev_entry, cur_entry) def postJobRun(self, ctx): # We create bake entries for all the terms that were *not* dirty.