view piecrust/baking/single.py @ 338:938be93215cb

bake: Improve render context and bake record, fix incremental bake bugs. * Used sources and taxonomies are now stored on a per-render-pass basis. This fixes bugs where sources/taxonomies were used for one pass, but that pass is skipped on a later bake because its result is cached. * Bake records are now created for all pages even when they're not baked. Record collapsing is gone except for taxonomy index pages. * Bake records now also have sub-entries in order to store information about each sub-page, since some sub-pages could use sources/taxonomies differently than others, or be missing from the output. This lets PieCrust handle clean/dirty states on a sub-page level.
author Ludovic Chabant <ludovic@chabant.com>
date Mon, 06 Apr 2015 19:59:54 -0700
parents b034f6f15e22
children b8ff1780b491
line wrap: on
line source

import os.path
import shutil
import codecs
import logging
import urllib.parse
from piecrust.baking.records import (
        BakeRecordPassInfo, BakeRecordPageEntry, BakeRecordSubPageEntry)
from piecrust.data.filters import (
        PaginationFilter, HasFilterClause,
        IsFilterClause, AndBooleanClause,
        page_value_accessor)
from piecrust.rendering import (
        PageRenderingContext, render_page,
        PASS_FORMATTING, PASS_RENDERING)
from piecrust.sources.base import (
        PageFactory,
        REALM_NAMES, REALM_USER, REALM_THEME)
from piecrust.uriutil import split_uri


logger = logging.getLogger(__name__)


def copy_public_page_config(config):
    res = config.get().copy()
    for k in list(res.keys()):
        if k.startswith('__'):
            del res[k]
    return res


class BakingError(Exception):
    pass


class PageBaker(object):
    def __init__(self, app, out_dir, force=False, record=None,
                 copy_assets=True):
        self.app = app
        self.out_dir = out_dir
        self.force = force
        self.record = record
        self.copy_assets = copy_assets
        self.site_root = app.config.get('site/root')
        self.pretty_urls = app.config.get('site/pretty_urls')
        self.pagination_suffix = app.config.get('site/pagination_suffix')

    def getOutputPath(self, uri):
        uri_root, uri_path = split_uri(self.app, uri)

        bake_path = [self.out_dir]
        decoded_uri = urllib.parse.unquote(uri_path)
        if self.pretty_urls:
            bake_path.append(decoded_uri)
            bake_path.append('index.html')
        elif decoded_uri == '':
            bake_path.append('index.html')
        else:
            bake_path.append(decoded_uri)

        return os.path.normpath(os.path.join(*bake_path))

    def bake(self, factory, route, record_entry):
        bake_taxonomy_info = None
        route_metadata = dict(factory.metadata)

        # Add taxonomy metadata for generating the URL if needed.
        if record_entry.taxonomy_info:
            tax_name, tax_term, tax_source_name = record_entry.taxonomy_info
            taxonomy = self.app.getTaxonomy(tax_name)
            slugified_term = route.slugifyTaxonomyTerm(tax_term)
            route_metadata[taxonomy.term_name] = slugified_term
            bake_taxonomy_info = (taxonomy, tax_term)

        # Generate the URL using the route.
        page = factory.buildPage()
        uri = route.getUri(route_metadata, provider=page)

        # See if this URL has been overriden by a previously baked page.
        # If that page is from another realm (e.g. a user page vs. a theme
        # page), we silently skip this page. If they're from the same realm,
        # we don't allow overriding and raise an error (this is probably
        # because of a misconfigured configuration that allows for ambiguous
        # URLs between 2 routes or sources).
        override = self.record.getOverrideEntry(factory, uri)
        if override is not None:
            override_source = self.app.getSource(override.source_name)
            if override_source.realm == factory.source.realm:
                raise BakingError(
                        "Page '%s' maps to URL '%s' but is overriden by page"
                        "'%s:%s'." % (factory.ref_spec, uri,
                                      override.source_name,
                                      override.rel_path))
            logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" %
                         (factory.ref_spec, uri, override.source_name,
                          override.rel_path))
            record_entry.flags |= BakeRecordPageEntry.FLAG_OVERRIDEN
            return

        # Setup the record entry.
        record_entry.config = copy_public_page_config(page.config)

        # Start baking the sub-pages.
        cur_sub = 1
        has_more_subs = True
        force_this = self.force
        invalidate_formatting = False
        prev_record_entry = self.record.getPreviousEntry(
                factory.source.name, factory.rel_path,
                record_entry.taxonomy_info)

        logger.debug("Baking '%s'..." % uri)

        while has_more_subs:
            # Get the URL and path for this sub-page.
            sub_uri = route.getUri(route_metadata, sub_num=cur_sub,
                                   provider=page)
            out_path = self.getOutputPath(sub_uri)

            # Create the sub-entry for the bake record.
            record_sub_entry = BakeRecordSubPageEntry(sub_uri, out_path)
            record_entry.subs.append(record_sub_entry)

            # Find a corresponding sub-entry in the previous bake record.
            prev_record_sub_entry = None
            if prev_record_entry:
                try:
                    prev_record_sub_entry = prev_record_entry.getSub(cur_sub)
                except IndexError:
                    pass

            # Figure out what to do with this page.
            if (prev_record_sub_entry and
                    (prev_record_sub_entry.was_baked_successfully or
                        prev_record_sub_entry.was_clean)):
                # If the current page is known to use pages from other sources,
                # see if any of those got baked, or are going to be baked for
                # some reason. If so, we need to bake this one too.
                # (this happens for instance with the main page of a blog).
                dirty_src_names, invalidated_render_passes = (
                        self._getDirtySourceNamesAndRenderPasses(
                            prev_record_sub_entry))
                if len(invalidated_render_passes) > 0:
                    logger.debug(
                            "'%s' is known to use sources %s, which have "
                            "items that got (re)baked. Will force bake this "
                            "page. " % (uri, dirty_src_names))
                    record_sub_entry.flags |= \
                        BakeRecordSubPageEntry.FLAG_FORCED_BY_SOURCE
                    force_this = True

                    if PASS_FORMATTING in invalidated_render_passes:
                        logger.debug(
                                "Will invalidate cached formatting for '%s' "
                                "since sources were using during that pass."
                                % uri)
                        invalidate_formatting = True
            elif (prev_record_sub_entry and
                    prev_record_sub_entry.errors):
                # Previous bake failed. We'll have to bake it again.
                logger.debug(
                        "Previous record entry indicates baking failed for "
                        "'%s'. Will bake it again." % uri)
                record_sub_entry.flags |= \
                    BakeRecordSubPageEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS
                force_this = True
            elif not prev_record_sub_entry:
                # No previous record. We'll have to bake it.
                logger.debug("No previous record entry found for '%s'. Will "
                             "force bake it." % uri)
                record_sub_entry.flags |= \
                    BakeRecordSubPageEntry.FLAG_FORCED_BY_NO_PREVIOUS
                force_this = True

            # Check for up-to-date outputs.
            do_bake = True
            if not force_this:
                try:
                    in_path_time = page.path_mtime
                    out_path_time = os.path.getmtime(out_path)
                    if out_path_time >= in_path_time:
                        do_bake = False
                except OSError:
                    # File doesn't exist, we'll need to bake.
                    pass

            # If this page didn't bake because it's already up-to-date.
            # Keep trying for as many subs as we know this page has.
            if not do_bake:
                prev_record_sub_entry.collapseRenderPasses(record_sub_entry)
                record_sub_entry.flags = BakeRecordSubPageEntry.FLAG_NONE

                if prev_record_entry.num_subs >= cur_sub + 1:
                    cur_sub += 1
                    has_more_subs = True
                    logger.debug("  %s is up to date, skipping to next "
                                 "sub-page." % out_path)
                    continue

                logger.debug("  %s is up to date, skipping bake." % out_path)
                break

            # All good, proceed.
            try:
                if invalidate_formatting:
                    cache_key = sub_uri
                    self.app.env.rendered_segments_repository.invalidate(
                            cache_key)
                    record_sub_entry.flags |= \
                        BakeRecordSubPageEntry.FLAG_FORMATTING_INVALIDATED

                logger.debug("  p%d -> %s" % (cur_sub, out_path))
                ctx, rp = self._bakeSingle(page, sub_uri, cur_sub, out_path,
                                           bake_taxonomy_info)
            except Exception as ex:
                if self.app.debug:
                    logger.exception(ex)
                page_rel_path = os.path.relpath(page.path, self.app.root_dir)
                raise BakingError("%s: error baking '%s'." %
                                  (page_rel_path, uri)) from ex

            # Record what we did.
            record_sub_entry.flags |= BakeRecordSubPageEntry.FLAG_BAKED
            self.record.dirty_source_names.add(record_entry.source_name)
            for p, pinfo in ctx.render_passes.items():
                brpi = BakeRecordPassInfo()
                brpi.used_source_names = set(pinfo.used_source_names)
                brpi.used_taxonomy_terms = set(pinfo.used_taxonomy_terms)
                record_sub_entry.render_passes[p] = brpi
            if prev_record_sub_entry:
                prev_record_sub_entry.collapseRenderPasses(record_sub_entry)

            # Copy page assets.
            if (cur_sub == 1 and self.copy_assets and
                    ctx.used_assets is not None):
                if self.pretty_urls:
                    out_assets_dir = os.path.dirname(out_path)
                else:
                    out_assets_dir, out_name = os.path.split(out_path)
                    if sub_uri != self.site_root:
                        out_name_noext, _ = os.path.splitext(out_name)
                        out_assets_dir += out_name_noext

                logger.debug("Copying page assets to: %s" % out_assets_dir)
                if not os.path.isdir(out_assets_dir):
                    os.makedirs(out_assets_dir, 0o755)
                for ap in ctx.used_assets:
                    dest_ap = os.path.join(out_assets_dir,
                                           os.path.basename(ap))
                    logger.debug("  %s -> %s" % (ap, dest_ap))
                    shutil.copy(ap, dest_ap)
                    record_entry.assets.append(ap)

            # Figure out if we have more work.
            has_more_subs = False
            if ctx.used_pagination is not None:
                if ctx.used_pagination.has_more:
                    cur_sub += 1
                    has_more_subs = True

    def _bakeSingle(self, page, sub_uri, num, out_path,
                    taxonomy_info=None):
        ctx = PageRenderingContext(page, sub_uri)
        ctx.page_num = num
        if taxonomy_info:
            ctx.setTaxonomyFilter(taxonomy_info[0], taxonomy_info[1])

        rp = render_page(ctx)

        out_dir = os.path.dirname(out_path)
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir, 0o755)

        with codecs.open(out_path, 'w', 'utf8') as fp:
            fp.write(rp.content)

        return ctx, rp

    def _getDirtySourceNamesAndRenderPasses(self, record_sub_entry):
        dirty_src_names = set()
        invalidated_render_passes = set()
        for p, pinfo in record_sub_entry.render_passes.items():
            for src_name in pinfo.used_source_names:
                is_dirty = (src_name in self.record.dirty_source_names)
                if is_dirty:
                    invalidated_render_passes.add(p)
                    dirty_src_names.add(src_name)
                    break
        return dirty_src_names, invalidated_render_passes