Mercurial > piecrust2
view piecrust/commands/builtin/baking.py @ 411:e7b865f8f335
bake: Enable multiprocess baking.
Baking is now done by running a worker per CPU, and sending jobs to them.
This changes several things across the codebase:
* Ability to not cache things related to pages other than the 'main' page
(i.e. the page at the bottom of the execution stack).
* Decouple the baking process from the bake records, so only the main process
keeps track (and modifies) the bake record.
* Remove the need for 'batch page getters' and loading a page directly from
the page factories.
There are various smaller changes too included here, including support for
scope performance timers that are saved with the bake record and can be
printed out to the console. Yes I got carried away.
For testing, the in-memory 'mock' file-system doesn't work anymore, since
we're spawning processes, so this is replaced by a 'tmpfs' file-system which
is saved in temporary files on disk and deleted after tests have run.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Fri, 12 Jun 2015 17:09:19 -0700 |
parents | 2cd2b5d07129 |
children | 0e9a94b7fdfa |
line wrap: on
line source
import time import os.path import logging import hashlib import fnmatch import datetime from piecrust.baking.baker import Baker from piecrust.baking.records import ( BakeRecord, BakeRecordEntry, SubPageBakeInfo) from piecrust.chefutil import format_timed from piecrust.commands.base import ChefCommand from piecrust.processing.base import ProcessorPipeline from piecrust.processing.records import ( ProcessorPipelineRecord, FLAG_PREPARED, FLAG_PROCESSED, FLAG_OVERRIDEN, FLAG_BYPASSED_STRUCTURED_PROCESSING) from piecrust.rendering import PASS_FORMATTING, PASS_RENDERING logger = logging.getLogger(__name__) class BakeCommand(ChefCommand): def __init__(self): super(BakeCommand, self).__init__() self.name = 'bake' self.description = "Bakes your website into static HTML files." def setupParser(self, parser, app): parser.add_argument( '-o', '--output', help="The directory to put all the baked HTML files into " "(defaults to `_counter`)") parser.add_argument( '-f', '--force', help="Force re-baking the entire website.", action='store_true') parser.add_argument( '-w', '--workers', help="The number of worker processes to spawn.", type=int, default=-1) parser.add_argument( '--assets-only', help="Only bake the assets (don't bake the web pages).", action='store_true') parser.add_argument( '--html-only', help="Only bake HTML files (don't run the asset pipeline).", action='store_true') parser.add_argument( '--show-timers', help="Show detailed timing information.", action='store_true') def run(self, ctx): out_dir = (ctx.args.output or os.path.join(ctx.app.root_dir, '_counter')) success = True start_time = time.perf_counter() try: # Bake the site sources. if not ctx.args.assets_only: success = success & self._bakeSources(ctx, out_dir) # Bake the assets. if not ctx.args.html_only: success = success & self._bakeAssets(ctx, out_dir) # All done. logger.info('-------------------------') logger.info(format_timed(start_time, 'done baking')) return 0 if success else 1 except Exception as ex: if ctx.app.debug: logger.exception(ex) else: logger.error(str(ex)) return 1 def _bakeSources(self, ctx, out_dir): if ctx.args.workers > 0: ctx.app.config.set('baker/workers', ctx.args.workers) baker = Baker( ctx.app, out_dir, force=ctx.args.force) record = baker.bake() if ctx.args.show_timers: if record.timers: from colorama import Fore logger.info("-------------------") logger.info("Timing information:") for name in sorted(record.timers.keys()): val_str = '%8.1f s' % record.timers[name] logger.info( "[%s%s%s] %s" % (Fore.GREEN, val_str, Fore.RESET, name)) else: logger.warning("Timing information is not available.") return record.success def _bakeAssets(self, ctx, out_dir): proc = ProcessorPipeline( ctx.app, out_dir, force=ctx.args.force) record = proc.run() return record.success class ShowRecordCommand(ChefCommand): def __init__(self): super(ShowRecordCommand, self).__init__() self.name = 'showrecord' self.description = ("Shows the bake record for a given output " "directory.") def setupParser(self, parser, app): parser.add_argument( '-o', '--output', help="The output directory for which to show the bake record " "(defaults to `_counter`)", nargs='?') parser.add_argument( '-p', '--path', help="A pattern that will be used to filter the relative path " "of entries to show.") parser.add_argument( '-t', '--out', help="A pattern that will be used to filter the output path " "of entries to show.") parser.add_argument( '--last', type=int, default=0, help="Show the last Nth bake record.") def run(self, ctx): out_dir = ctx.args.output or os.path.join(ctx.app.root_dir, '_counter') record_id = hashlib.md5(out_dir.encode('utf8')).hexdigest() suffix = '' if ctx.args.last == 0 else '.%d' % ctx.args.last record_name = '%s%s.record' % (record_id, suffix) pattern = None if ctx.args.path: pattern = '*%s*' % ctx.args.path.strip('*') out_pattern = None if ctx.args.out: out_pattern = '*%s*' % ctx.args.out.strip('*') record_cache = ctx.app.cache.getCache('baker') if not record_cache.has(record_name): raise Exception("No record has been created for this output path. " "Did you bake there yet?") # Show the bake record. record = BakeRecord.load(record_cache.getCachePath(record_name)) logging.info("Bake record for: %s" % record.out_dir) logging.info("From: %s" % record_name) logging.info("Last baked: %s" % datetime.datetime.fromtimestamp(record.bake_time)) if record.success: logging.info("Status: success") else: logging.error("Status: failed") logging.info("Entries:") for entry in record.entries: if pattern and not fnmatch.fnmatch(entry.rel_path, pattern): continue if out_pattern and not ( any([o for o in entry.out_paths if fnmatch.fnmatch(o, out_pattern)])): continue flags = [] if entry.flags & BakeRecordEntry.FLAG_OVERRIDEN: flags.append('overriden') passes = {PASS_RENDERING: 'render', PASS_FORMATTING: 'format'} logging.info(" - ") logging.info(" path: %s" % entry.rel_path) logging.info(" spec: %s:%s" % (entry.source_name, entry.rel_path)) if entry.taxonomy_info: tax_name, term, source_name = entry.taxonomy_info logging.info(" taxonomy: %s (%s:%s)" % (term, source_name, tax_name)) else: logging.info(" taxonomy: <none>") logging.info(" flags: %s" % ', '.join(flags)) logging.info(" config: %s" % entry.config) logging.info(" %d sub-pages:" % len(entry.subs)) for sub in entry.subs: logging.info(" - ") logging.info(" URL: %s" % sub.out_uri) logging.info(" path: %s" % os.path.relpath(sub.out_path, out_dir)) logging.info(" baked?: %s" % sub.was_baked) sub_flags = [] if sub.flags & SubPageBakeInfo.FLAG_FORCED_BY_SOURCE: sub_flags.append('forced by source') if sub.flags & SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS: sub_flags.append('forced by missing previous record entry') if sub.flags & SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS: sub_flags.append('forced by previous errors') logging.info(" flags: %s" % ', '.join(sub_flags)) for p, pi in sub.render_passes.items(): logging.info(" %s pass:" % passes[p]) logging.info(" used srcs: %s" % ', '.join(pi.used_source_names)) logging.info(" used terms: %s" % ', '.join( ['%s (%s:%s)' % (t, sn, tn) for sn, tn, t in pi.used_taxonomy_terms])) if sub.errors: logging.error(" errors: %s" % sub.errors) logging.info(" assets: %s" % ', '.join(entry.assets)) if entry.errors: logging.error(" errors: %s" % entry.errors) record_cache = ctx.app.cache.getCache('proc') if not record_cache.has(record_name): return # Show the pipeline record. record = ProcessorPipelineRecord.load( record_cache.getCachePath(record_name)) logging.info("") logging.info("Processing record for: %s" % record.out_dir) logging.info("Last baked: %s" % datetime.datetime.fromtimestamp(record.process_time)) if record.success: logging.info("Status: success") else: logging.error("Status: failed") logging.info("Entries:") for entry in record.entries: if pattern and not fnmatch.fnmatch(entry.rel_input, pattern): continue if out_pattern and not ( any([o for o in entry.rel_outputs if fnmatch.fnmatch(o, out_pattern)])): continue flags = [] if entry.flags & FLAG_PREPARED: flags.append('prepared') if entry.flags & FLAG_PROCESSED: flags.append('processed') if entry.flags & FLAG_OVERRIDEN: flags.append('overriden') if entry.flags & FLAG_BYPASSED_STRUCTURED_PROCESSING: flags.append('external') logger.info(" - ") logger.info(" path: %s" % entry.rel_input) logger.info(" out paths: %s" % entry.rel_outputs) logger.info(" flags: %s" % flags) logger.info(" proc tree: %s" % format_proc_tree( entry.proc_tree, 14*' ')) if entry.errors: logger.error(" errors: %s" % entry.errors) def format_proc_tree(tree, margin='', level=0): name, children = tree res = '%s%s+ %s\n' % (margin if level > 0 else '', level * ' ', name) if children: for c in children: res += format_proc_tree(c, margin, level + 1) return res