Mercurial > piecrust2
view piecrust/baking/single.py @ 415:0e9a94b7fdfa
bake: Improve bake record information.
* Store things in the bake record that require less interaction between the
master process and the workers. For instance, don't store the paginator
object in the render pass info -- instead, just store whether pagination
was used, and whether it had more items.
* Simplify information passing between workers and bake passes by saving the
rendering info to the JSON cache. This means the "render first sub" job
doesn't have to return anything except errors now.
* Add more performance counter info.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sat, 20 Jun 2015 19:23:16 -0700 |
parents | e7b865f8f335 |
children | 21e26ed867b6 |
line wrap: on
line source
import os.path import shutil import codecs import logging import urllib.parse from piecrust import ASSET_DIR_SUFFIX from piecrust.baking.records import SubPageBakeInfo from piecrust.rendering import ( QualifiedPage, PageRenderingContext, render_page, PASS_FORMATTING) from piecrust.uriutil import split_uri logger = logging.getLogger(__name__) def copy_public_page_config(config): res = config.getDeepcopy() for k in list(res.keys()): if k.startswith('__'): del res[k] return res class BakingError(Exception): pass class PageBaker(object): def __init__(self, app, out_dir, force=False, copy_assets=True): self.app = app self.out_dir = out_dir self.force = force self.copy_assets = copy_assets self.site_root = app.config.get('site/root') self.pretty_urls = app.config.get('site/pretty_urls') def getOutputPath(self, uri): uri_root, uri_path = split_uri(self.app, uri) bake_path = [self.out_dir] decoded_uri = urllib.parse.unquote(uri_path) if self.pretty_urls: bake_path.append(decoded_uri) bake_path.append('index.html') elif decoded_uri == '': bake_path.append('index.html') else: bake_path.append(decoded_uri) return os.path.normpath(os.path.join(*bake_path)) def bake(self, factory, route, route_metadata, prev_entry, dirty_source_names, tax_info=None): # Get the page. page = factory.buildPage() # Start baking the sub-pages. cur_sub = 1 has_more_subs = True sub_entries = [] while has_more_subs: # Get the URL and path for this sub-page. sub_uri = route.getUri(route_metadata, sub_num=cur_sub, provider=page) logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) out_path = self.getOutputPath(sub_uri) # Create the sub-entry for the bake record. sub_entry = SubPageBakeInfo(sub_uri, out_path) sub_entries.append(sub_entry) # Find a corresponding sub-entry in the previous bake record. prev_sub_entry = None if prev_entry: try: prev_sub_entry = prev_entry.getSub(cur_sub) except IndexError: pass # Figure out if we need to invalidate or force anything. force_this_sub, invalidate_formatting = _compute_force_flags( prev_sub_entry, sub_entry, dirty_source_names) force_this_sub = force_this_sub or self.force # Check for up-to-date outputs. do_bake = True if not force_this_sub: try: in_path_time = page.path_mtime out_path_time = os.path.getmtime(out_path) if out_path_time >= in_path_time: do_bake = False except OSError: # File doesn't exist, we'll need to bake. pass # If this page didn't bake because it's already up-to-date. # Keep trying for as many subs as we know this page has. if not do_bake: sub_entry.render_info = prev_sub_entry.copyRenderInfo() sub_entry.flags = SubPageBakeInfo.FLAG_NONE if prev_entry.num_subs >= cur_sub + 1: cur_sub += 1 has_more_subs = True logger.debug(" %s is up to date, skipping to next " "sub-page." % out_path) continue logger.debug(" %s is up to date, skipping bake." % out_path) break # All good, proceed. try: if invalidate_formatting: cache_key = sub_uri self.app.env.rendered_segments_repository.invalidate( cache_key) sub_entry.flags |= \ SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED logger.debug(" p%d -> %s" % (cur_sub, out_path)) qp = QualifiedPage(page, route, route_metadata) rp = self._bakeSingle(qp, cur_sub, out_path, tax_info) except Exception as ex: page_rel_path = os.path.relpath(page.path, self.app.root_dir) raise BakingError("%s: error baking '%s'." % (page_rel_path, sub_uri)) from ex # Record what we did. sub_entry.flags |= SubPageBakeInfo.FLAG_BAKED sub_entry.render_info = rp.copyRenderInfo() # Copy page assets. if (cur_sub == 1 and self.copy_assets and sub_entry.anyPass(lambda p: p.used_assets)): if self.pretty_urls: out_assets_dir = os.path.dirname(out_path) else: out_assets_dir, out_name = os.path.split(out_path) if sub_uri != self.site_root: out_name_noext, _ = os.path.splitext(out_name) out_assets_dir += out_name_noext logger.debug("Copying page assets to: %s" % out_assets_dir) _ensure_dir_exists(out_assets_dir) page_dirname = os.path.dirname(page.path) page_pathname, _ = os.path.splitext(page.path) in_assets_dir = page_pathname + ASSET_DIR_SUFFIX for fn in os.listdir(in_assets_dir): full_fn = os.path.join(page_dirname, fn) if os.path.isfile(full_fn): dest_ap = os.path.join(out_assets_dir, fn) logger.debug(" %s -> %s" % (full_fn, dest_ap)) shutil.copy(full_fn, dest_ap) # Figure out if we have more work. has_more_subs = False if sub_entry.anyPass(lambda p: p.pagination_has_more): cur_sub += 1 has_more_subs = True return sub_entries def _bakeSingle(self, qualified_page, num, out_path, tax_info=None): ctx = PageRenderingContext(qualified_page, page_num=num) if tax_info: tax = self.app.getTaxonomy(tax_info.taxonomy_name) ctx.setTaxonomyFilter(tax, tax_info.term) rp = render_page(ctx) out_dir = os.path.dirname(out_path) _ensure_dir_exists(out_dir) with codecs.open(out_path, 'w', 'utf8') as fp: fp.write(rp.content) return rp def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names): # Figure out what to do with this page. force_this_sub = False invalidate_formatting = False sub_uri = sub_entry.out_uri if (prev_sub_entry and (prev_sub_entry.was_baked_successfully or prev_sub_entry.was_clean)): # If the current page is known to use pages from other sources, # see if any of those got baked, or are going to be baked for # some reason. If so, we need to bake this one too. # (this happens for instance with the main page of a blog). dirty_for_this, invalidated_render_passes = ( _get_dirty_source_names_and_render_passes( prev_sub_entry, dirty_source_names)) if len(invalidated_render_passes) > 0: logger.debug( "'%s' is known to use sources %s, which have " "items that got (re)baked. Will force bake this " "page. " % (sub_uri, dirty_for_this)) sub_entry.flags |= \ SubPageBakeInfo.FLAG_FORCED_BY_SOURCE force_this_sub = True if PASS_FORMATTING in invalidated_render_passes: logger.debug( "Will invalidate cached formatting for '%s' " "since sources were using during that pass." % sub_uri) invalidate_formatting = True elif (prev_sub_entry and prev_sub_entry.errors): # Previous bake failed. We'll have to bake it again. logger.debug( "Previous record entry indicates baking failed for " "'%s'. Will bake it again." % sub_uri) sub_entry.flags |= \ SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS force_this_sub = True elif not prev_sub_entry: # No previous record. We'll have to bake it. logger.debug("No previous record entry found for '%s'. Will " "force bake it." % sub_uri) sub_entry.flags |= \ SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS force_this_sub = True return force_this_sub, invalidate_formatting def _get_dirty_source_names_and_render_passes(sub_entry, dirty_source_names): dirty_for_this = set() invalidated_render_passes = set() assert sub_entry.render_info is not None for p, pinfo in sub_entry.render_info.items(): for src_name in pinfo.used_source_names: is_dirty = (src_name in dirty_source_names) if is_dirty: invalidated_render_passes.add(p) dirty_for_this.add(src_name) break return dirty_for_this, invalidated_render_passes def _ensure_dir_exists(path): try: os.makedirs(path, mode=0o755, exist_ok=True) except OSError: # In a multiprocess environment, several process may very # occasionally try to create the same directory at the same time. # Let's ignore any error and if something's really wrong (like file # acces permissions or whatever), then it will more legitimately fail # just after this when we try to write files. pass