view piecrust/pipelines/_pagebaker.py @ 1035:1a7f3ae09c53

bake: Allow specifying which asset processors to use on a per-source basis.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 16 Jan 2018 08:40:34 -0800
parents 298b07a899b5
children 3bcb2d446397
line wrap: on
line source

import os.path
import copy
import queue
import shutil
import logging
import threading
import urllib.parse
from piecrust.pipelines._pagerecords import (
    SubPageFlags, create_subpage_job_result)
from piecrust.rendering import RenderingContext, render_page
from piecrust.sources.base import AbortedSourceUseError
from piecrust.uriutil import split_uri


logger = logging.getLogger(__name__)


def get_output_path(app, out_dir, uri, pretty_urls):
    uri_root, uri_path = split_uri(app, uri)

    bake_path = [out_dir]
    decoded_uri = urllib.parse.unquote(uri_path)
    if pretty_urls:
        bake_path.append(decoded_uri)
        bake_path.append('index.html')
    elif decoded_uri == '':
        bake_path.append('index.html')
    else:
        bake_path.append(decoded_uri)

    return os.path.normpath(os.path.join(*bake_path))


class BakingError(Exception):
    pass


class PageBaker(object):
    def __init__(self, app, out_dir, force=False):
        self.app = app
        self.out_dir = out_dir
        self.force = force
        self.site_root = app.config.get('site/root')
        self.pretty_urls = app.config.get('site/pretty_urls')
        self._do_write = self._writeDirect
        self._writer_queue = None
        self._writer = None
        self._stats = app.env.stats
        self._rsr = app.env.rendered_segments_repository

    def startWriterQueue(self):
        self._writer_queue = queue.Queue()
        self._writer = threading.Thread(
            name='PageSerializer',
            daemon=True,
            target=_text_writer,
            args=(self._writer_queue,))
        self._writer.start()
        self._do_write = self._sendToWriterQueue

    def stopWriterQueue(self):
        self._writer_queue.put_nowait(None)
        self._writer.join()

    def _sendToWriterQueue(self, out_path, content):
        self._writer_queue.put_nowait((out_path, content))

    def _writeDirect(self, out_path, content):
        with open(out_path, 'w', encoding='utf8') as fp:
            fp.write(content)

    def bake(self, page, prev_entry, force=False):
        cur_sub = 1
        has_more_subs = True
        app = self.app
        out_dir = self.out_dir
        force_bake = self.force or force
        pretty_urls = page.config.get('pretty_urls', self.pretty_urls)

        rendered_subs = []

        # Start baking the sub-pages.
        while has_more_subs:
            sub_uri = page.getUri(sub_num=cur_sub)
            logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub))

            out_path = get_output_path(app, out_dir, sub_uri, pretty_urls)

            # Create the sub-entry for the bake record.
            cur_sub_entry = create_subpage_job_result(sub_uri, out_path)
            rendered_subs.append(cur_sub_entry)

            # Find a corresponding sub-entry in the previous bake record.
            prev_sub_entry = None
            if prev_entry is not None:
                try:
                    prev_sub_entry = prev_entry.getSub(cur_sub)
                except IndexError:
                    pass

            # Figure out if we need to bake this page.
            bake_status = _get_bake_status(page, out_path, force_bake,
                                           prev_sub_entry, cur_sub_entry)

            # If this page didn't bake because it's already up-to-date.
            # Keep trying for as many subs as we know this page has.
            if bake_status == STATUS_CLEAN:
                cur_sub_entry['render_info'] = copy.deepcopy(
                    prev_sub_entry['render_info'])
                cur_sub_entry['flags'] = \
                    SubPageFlags.FLAG_COLLAPSED_FROM_LAST_RUN

                if prev_entry.num_subs >= cur_sub + 1:
                    cur_sub += 1
                    has_more_subs = True
                    logger.debug("  %s is up to date, skipping to next "
                                 "sub-page." % out_path)
                    continue

                logger.debug("  %s is up to date, skipping bake." % out_path)
                break

            # All good, proceed.
            try:
                if bake_status == STATUS_INVALIDATE_AND_BAKE:
                    cache_key = sub_uri
                    self._rsr.invalidate(cache_key)
                    cur_sub_entry['flags'] |= \
                        SubPageFlags.FLAG_RENDER_CACHE_INVALIDATED

                logger.debug("  p%d -> %s" % (cur_sub, out_path))
                rp = self._bakeSingle(page, cur_sub, out_path)
            except AbortedSourceUseError:
                raise
            except Exception as ex:
                logger.exception(ex)
                raise BakingError("%s: error baking '%s'." %
                                  (page.content_spec, sub_uri)) from ex

            # Record what we did.
            cur_sub_entry['flags'] |= SubPageFlags.FLAG_BAKED
            cur_sub_entry['render_info'] = copy.deepcopy(rp.render_info)

            # Copy page assets.
            if (cur_sub == 1 and
                    cur_sub_entry['render_info']['used_assets']):
                if pretty_urls:
                    out_assets_dir = os.path.dirname(out_path)
                else:
                    out_assets_dir, out_name = os.path.split(out_path)
                    if sub_uri != self.site_root:
                        out_name_noext, _ = os.path.splitext(out_name)
                        out_assets_dir = os.path.join(out_assets_dir,
                                                      out_name_noext)

                logger.debug("Copying page assets to: %s" % out_assets_dir)
                _ensure_dir_exists(out_assets_dir)
                assetor = rp.data.get('assets')
                if assetor is not None:
                    for i in assetor._getAssetItems():
                        fn = os.path.basename(i.spec)
                        out_asset_path = os.path.join(out_assets_dir, fn)
                        logger.debug("  %s -> %s" % (i.spec, out_asset_path))
                        shutil.copy(i.spec, out_asset_path)

            # Figure out if we have more work.
            has_more_subs = False
            if cur_sub_entry['render_info']['pagination_has_more']:
                cur_sub += 1
                has_more_subs = True

        return rendered_subs

    def _bakeSingle(self, page, sub_num, out_path):
        ctx = RenderingContext(page, sub_num=sub_num)
        page.source.prepareRenderContext(ctx)

        with self._stats.timerScope("PageRender"):
            rp = render_page(ctx)

        with self._stats.timerScope("PageSerialize"):
            self._do_write(out_path, rp.content)

        return rp


def _text_writer(q):
    while True:
        item = q.get()
        if item is not None:
            out_path, txt = item
            out_dir = os.path.dirname(out_path)
            _ensure_dir_exists(out_dir)

            with open(out_path, 'w', encoding='utf8') as fp:
                fp.write(txt)

            q.task_done()
        else:
            # Sentinel object, terminate the thread.
            q.task_done()
            break


STATUS_CLEAN = 0
STATUS_BAKE = 1
STATUS_INVALIDATE_AND_BAKE = 2


def _get_bake_status(page, out_path, force, prev_sub_entry, cur_sub_entry):
    # Figure out if we need to invalidate or force anything.
    status = _compute_force_flags(prev_sub_entry, cur_sub_entry)
    if status != STATUS_CLEAN:
        return status

    # Easy test.
    if force:
        cur_sub_entry['flags'] |= \
            SubPageFlags.FLAG_FORCED_BY_GENERAL_FORCE
        # We need to invalidate any cache we have on this page because
        # it's being forced, so something important has changed somehow.
        return STATUS_INVALIDATE_AND_BAKE

    # Check for up-to-date outputs.
    in_path_time = page.content_mtime
    try:
        out_path_time = os.path.getmtime(out_path)
    except OSError:
        # File doesn't exist, we'll need to bake.
        cur_sub_entry['flags'] |= \
            SubPageFlags.FLAG_FORCED_BY_NO_PREVIOUS
        return STATUS_BAKE

    if out_path_time <= in_path_time:
        return STATUS_BAKE

    # Nope, all good.
    return STATUS_CLEAN


def _compute_force_flags(prev_sub_entry, cur_sub_entry):
    if prev_sub_entry and len(prev_sub_entry['errors']) > 0:
        # Previous bake failed. We'll have to bake it again.
        cur_sub_entry['flags'] |= \
            SubPageFlags.FLAG_FORCED_BY_PREVIOUS_ERRORS
        return STATUS_BAKE

    if not prev_sub_entry:
        # No previous record, so most probably was never baked. Bake it.
        cur_sub_entry['flags'] |= \
            SubPageFlags.FLAG_FORCED_BY_NO_PREVIOUS
        return STATUS_BAKE

    return STATUS_CLEAN


def _ensure_dir_exists(path):
    try:
        os.makedirs(path, mode=0o755, exist_ok=True)
    except OSError:
        # In a multiprocess environment, several process may very
        # occasionally try to create the same directory at the same time.
        # Let's ignore any error and if something's really wrong (like file
        # acces permissions or whatever), then it will more legitimately fail
        # just after this when we try to write files.
        pass