view piecrust/baking/single.py @ 369:4b1019bb2533

serve: Giant refactor to change how we handle data when serving pages. * We need a distinction between source metadata and route metadata. In most cases they're the same, but in cases like taxonomy pages, route metadata contains more things that can't be in source metadata if we want to re-use cached pages. * Create a new `QualifiedPage` type which is a page with a specific route and route metadata. Pass this around in many places. * Instead of passing an URL around, use the route in the `QualifiedPage` to generate URLs. This is better since it removes the guess-work from trying to generate URLs for sub-pages. * Deep-copy app and page configurations before passing them around to things that could modify them, like data builders and such. * Exclude taxonomy pages from iterator data providers. * Properly nest iterator data providers for when the theme and user page sources are merged inside `site.pages`.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 03 May 2015 18:47:10 -0700
parents b8ff1780b491
children e7b865f8f335
line wrap: on
line source

import os.path
import copy
import shutil
import codecs
import logging
import urllib.parse
from piecrust.baking.records import (
        BakeRecordPassInfo, BakeRecordPageEntry, BakeRecordSubPageEntry)
from piecrust.data.filters import (
        PaginationFilter, HasFilterClause,
        IsFilterClause, AndBooleanClause,
        page_value_accessor)
from piecrust.rendering import (
        QualifiedPage, PageRenderingContext, render_page,
        PASS_FORMATTING, PASS_RENDERING)
from piecrust.sources.base import (
        PageFactory,
        REALM_NAMES, REALM_USER, REALM_THEME)
from piecrust.uriutil import split_uri


logger = logging.getLogger(__name__)


def copy_public_page_config(config):
    res = config.getDeepcopy()
    for k in list(res.keys()):
        if k.startswith('__'):
            del res[k]
    return res


class BakingError(Exception):
    pass


class PageBaker(object):
    def __init__(self, app, out_dir, force=False, record=None,
                 copy_assets=True):
        self.app = app
        self.out_dir = out_dir
        self.force = force
        self.record = record
        self.copy_assets = copy_assets
        self.site_root = app.config.get('site/root')
        self.pretty_urls = app.config.get('site/pretty_urls')

    def getOutputPath(self, uri):
        uri_root, uri_path = split_uri(self.app, uri)

        bake_path = [self.out_dir]
        decoded_uri = urllib.parse.unquote(uri_path)
        if self.pretty_urls:
            bake_path.append(decoded_uri)
            bake_path.append('index.html')
        elif decoded_uri == '':
            bake_path.append('index.html')
        else:
            bake_path.append(decoded_uri)

        return os.path.normpath(os.path.join(*bake_path))

    def bake(self, factory, route, record_entry):
        # Get the page.
        page = factory.buildPage()
        route_metadata = copy.deepcopy(factory.metadata)

        # Add taxonomy info in the template data and route metadata if needed.
        bake_taxonomy_info = None
        if record_entry.taxonomy_info:
            tax_name, tax_term, tax_source_name = record_entry.taxonomy_info
            taxonomy = self.app.getTaxonomy(tax_name)
            slugified_term = route.slugifyTaxonomyTerm(tax_term)
            route_metadata[taxonomy.term_name] = slugified_term
            bake_taxonomy_info = (taxonomy, tax_term)

        # Generate the URI.
        uri = route.getUri(route_metadata, provider=page)

        # See if this URL has been overriden by a previously baked page.
        # If that page is from another realm (e.g. a user page vs. a theme
        # page), we silently skip this page. If they're from the same realm,
        # we don't allow overriding and raise an error (this is probably
        # because of a misconfigured configuration that allows for ambiguous
        # URLs between 2 routes or sources).
        override = self.record.getOverrideEntry(factory, uri)
        if override is not None:
            override_source = self.app.getSource(override.source_name)
            if override_source.realm == factory.source.realm:
                raise BakingError(
                        "Page '%s' maps to URL '%s' but is overriden by page"
                        "'%s:%s'." % (factory.ref_spec, uri,
                                      override.source_name,
                                      override.rel_path))
            logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" %
                         (factory.ref_spec, uri, override.source_name,
                          override.rel_path))
            record_entry.flags |= BakeRecordPageEntry.FLAG_OVERRIDEN
            return

        # Setup the record entry.
        record_entry.config = copy_public_page_config(page.config)

        # Start baking the sub-pages.
        cur_sub = 1
        has_more_subs = True
        force_this = self.force
        invalidate_formatting = False
        prev_record_entry = self.record.getPreviousEntry(
                factory.source.name, factory.rel_path,
                record_entry.taxonomy_info)

        logger.debug("Baking '%s'..." % uri)

        while has_more_subs:
            # Get the URL and path for this sub-page.
            sub_uri = route.getUri(route_metadata, sub_num=cur_sub,
                                   provider=page)
            out_path = self.getOutputPath(sub_uri)

            # Create the sub-entry for the bake record.
            record_sub_entry = BakeRecordSubPageEntry(sub_uri, out_path)
            record_entry.subs.append(record_sub_entry)

            # Find a corresponding sub-entry in the previous bake record.
            prev_record_sub_entry = None
            if prev_record_entry:
                try:
                    prev_record_sub_entry = prev_record_entry.getSub(cur_sub)
                except IndexError:
                    pass

            # Figure out what to do with this page.
            if (prev_record_sub_entry and
                    (prev_record_sub_entry.was_baked_successfully or
                        prev_record_sub_entry.was_clean)):
                # If the current page is known to use pages from other sources,
                # see if any of those got baked, or are going to be baked for
                # some reason. If so, we need to bake this one too.
                # (this happens for instance with the main page of a blog).
                dirty_src_names, invalidated_render_passes = (
                        self._getDirtySourceNamesAndRenderPasses(
                            prev_record_sub_entry))
                if len(invalidated_render_passes) > 0:
                    logger.debug(
                            "'%s' is known to use sources %s, which have "
                            "items that got (re)baked. Will force bake this "
                            "page. " % (uri, dirty_src_names))
                    record_sub_entry.flags |= \
                        BakeRecordSubPageEntry.FLAG_FORCED_BY_SOURCE
                    force_this = True

                    if PASS_FORMATTING in invalidated_render_passes:
                        logger.debug(
                                "Will invalidate cached formatting for '%s' "
                                "since sources were using during that pass."
                                % uri)
                        invalidate_formatting = True
            elif (prev_record_sub_entry and
                    prev_record_sub_entry.errors):
                # Previous bake failed. We'll have to bake it again.
                logger.debug(
                        "Previous record entry indicates baking failed for "
                        "'%s'. Will bake it again." % uri)
                record_sub_entry.flags |= \
                    BakeRecordSubPageEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS
                force_this = True
            elif not prev_record_sub_entry:
                # No previous record. We'll have to bake it.
                logger.debug("No previous record entry found for '%s'. Will "
                             "force bake it." % uri)
                record_sub_entry.flags |= \
                    BakeRecordSubPageEntry.FLAG_FORCED_BY_NO_PREVIOUS
                force_this = True

            # Check for up-to-date outputs.
            do_bake = True
            if not force_this:
                try:
                    in_path_time = page.path_mtime
                    out_path_time = os.path.getmtime(out_path)
                    if out_path_time >= in_path_time:
                        do_bake = False
                except OSError:
                    # File doesn't exist, we'll need to bake.
                    pass

            # If this page didn't bake because it's already up-to-date.
            # Keep trying for as many subs as we know this page has.
            if not do_bake:
                prev_record_sub_entry.collapseRenderPasses(record_sub_entry)
                record_sub_entry.flags = BakeRecordSubPageEntry.FLAG_NONE

                if prev_record_entry.num_subs >= cur_sub + 1:
                    cur_sub += 1
                    has_more_subs = True
                    logger.debug("  %s is up to date, skipping to next "
                                 "sub-page." % out_path)
                    continue

                logger.debug("  %s is up to date, skipping bake." % out_path)
                break

            # All good, proceed.
            try:
                if invalidate_formatting:
                    cache_key = sub_uri
                    self.app.env.rendered_segments_repository.invalidate(
                            cache_key)
                    record_sub_entry.flags |= \
                        BakeRecordSubPageEntry.FLAG_FORMATTING_INVALIDATED

                logger.debug("  p%d -> %s" % (cur_sub, out_path))
                qp = QualifiedPage(page, route, route_metadata)
                ctx, rp = self._bakeSingle(qp, cur_sub, out_path,
                                           bake_taxonomy_info)
            except Exception as ex:
                if self.app.debug:
                    logger.exception(ex)
                page_rel_path = os.path.relpath(page.path, self.app.root_dir)
                raise BakingError("%s: error baking '%s'." %
                                  (page_rel_path, uri)) from ex

            # Record what we did.
            record_sub_entry.flags |= BakeRecordSubPageEntry.FLAG_BAKED
            self.record.dirty_source_names.add(record_entry.source_name)
            for p, pinfo in ctx.render_passes.items():
                brpi = BakeRecordPassInfo()
                brpi.used_source_names = set(pinfo.used_source_names)
                brpi.used_taxonomy_terms = set(pinfo.used_taxonomy_terms)
                record_sub_entry.render_passes[p] = brpi
            if prev_record_sub_entry:
                prev_record_sub_entry.collapseRenderPasses(record_sub_entry)

            # Copy page assets.
            if (cur_sub == 1 and self.copy_assets and
                    ctx.used_assets is not None):
                if self.pretty_urls:
                    out_assets_dir = os.path.dirname(out_path)
                else:
                    out_assets_dir, out_name = os.path.split(out_path)
                    if sub_uri != self.site_root:
                        out_name_noext, _ = os.path.splitext(out_name)
                        out_assets_dir += out_name_noext

                logger.debug("Copying page assets to: %s" % out_assets_dir)
                if not os.path.isdir(out_assets_dir):
                    os.makedirs(out_assets_dir, 0o755)
                for ap in ctx.used_assets:
                    dest_ap = os.path.join(out_assets_dir,
                                           os.path.basename(ap))
                    logger.debug("  %s -> %s" % (ap, dest_ap))
                    shutil.copy(ap, dest_ap)
                    record_entry.assets.append(ap)

            # Figure out if we have more work.
            has_more_subs = False
            if ctx.used_pagination is not None:
                if ctx.used_pagination.has_more:
                    cur_sub += 1
                    has_more_subs = True

    def _bakeSingle(self, qualified_page, num, out_path, taxonomy_info=None):
        ctx = PageRenderingContext(qualified_page, page_num=num)
        if taxonomy_info:
            ctx.setTaxonomyFilter(taxonomy_info[0], taxonomy_info[1])

        rp = render_page(ctx)

        out_dir = os.path.dirname(out_path)
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir, 0o755)

        with codecs.open(out_path, 'w', 'utf8') as fp:
            fp.write(rp.content)

        return ctx, rp

    def _getDirtySourceNamesAndRenderPasses(self, record_sub_entry):
        dirty_src_names = set()
        invalidated_render_passes = set()
        for p, pinfo in record_sub_entry.render_passes.items():
            for src_name in pinfo.used_source_names:
                is_dirty = (src_name in self.record.dirty_source_names)
                if is_dirty:
                    invalidated_render_passes.add(p)
                    dirty_src_names.add(src_name)
                    break
        return dirty_src_names, invalidated_render_passes