Mercurial > piecrust2
view piecrust/baking/baker.py @ 837:ad8f48a31c62
assets: Fix crash when a page doesn't have assets.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 05 Feb 2017 22:52:01 -0800 |
parents | 9a92e2804562 |
children | 4850f8c21b6e |
line wrap: on
line source
import time import os.path import hashlib import logging from piecrust.baking.records import ( BakeRecordEntry, TransitionalBakeRecord) from piecrust.baking.worker import ( save_factory, JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE) from piecrust.chefutil import ( format_timed_scope, format_timed) from piecrust.environment import ExecutionStats from piecrust.generation.base import PageGeneratorBakeContext from piecrust.routing import create_route_metadata from piecrust.sources.base import ( REALM_NAMES, REALM_USER, REALM_THEME) logger = logging.getLogger(__name__) class Baker(object): def __init__(self, app, out_dir, force=False, applied_config_variant=None, applied_config_values=None): assert app and out_dir self.app = app self.out_dir = out_dir self.force = force self.applied_config_variant = applied_config_variant self.applied_config_values = applied_config_values # Remember what generator pages we should skip. self.generator_pages = [] logger.debug("Gathering generator page paths:") for gen in self.app.generators: for path in gen.page_ref.possible_paths: self.generator_pages.append(path) logger.debug(" - %s" % path) # Register some timers. self.app.env.registerTimer('LoadJob', raise_if_registered=False) self.app.env.registerTimer('RenderFirstSubJob', raise_if_registered=False) self.app.env.registerTimer('BakeJob', raise_if_registered=False) def bake(self): logger.debug(" Bake Output: %s" % self.out_dir) logger.debug(" Root URL: %s" % self.app.config.get('site/root')) # Get into bake mode. start_time = time.perf_counter() self.app.config.set('baker/is_baking', True) self.app.env.base_asset_url_format = '%uri%' # Make sure the output directory exists. if not os.path.isdir(self.out_dir): os.makedirs(self.out_dir, 0o755) # Load/create the bake record. record = TransitionalBakeRecord() record_cache = self.app.cache.getCache('baker') record_id = hashlib.md5(self.out_dir.encode('utf8')).hexdigest() record_name = record_id + '.record' previous_record_path = None if not self.force and record_cache.has(record_name): with format_timed_scope(logger, "loaded previous bake record", level=logging.DEBUG, colored=False): previous_record_path = record_cache.getCachePath(record_name) record.loadPrevious(previous_record_path) record.current.success = True # Figure out if we need to clean the cache because important things # have changed. is_cache_valid = self._handleCacheValidity(record) if not is_cache_valid: previous_record_path = None # Pre-create all caches. for cache_name in ['app', 'baker', 'pages', 'renders']: self.app.cache.getCache(cache_name) # Gather all sources by realm -- we're going to bake each realm # separately so we can handle "overriding" (i.e. one realm overrides # another realm's pages, like the user realm overriding the theme # realm). sources_by_realm = {} for source in self.app.sources: srclist = sources_by_realm.setdefault(source.realm, []) srclist.append(source) # Create the worker processes. pool = self._createWorkerPool(previous_record_path) # Bake the realms. realm_list = [REALM_USER, REALM_THEME] for realm in realm_list: srclist = sources_by_realm.get(realm) if srclist is not None: self._bakeRealm(record, pool, realm, srclist) # Call all the page generators. self._bakePageGenerators(record, pool) # All done with the workers. Close the pool and get reports. reports = pool.close() total_stats = ExecutionStats() record.current.stats['_Total'] = total_stats for i in range(len(reports)): worker_stats = reports[i]['data'] if worker_stats is not None: worker_name = 'BakeWorker_%d' % i record.current.stats[worker_name] = worker_stats total_stats.mergeStats(worker_stats) # Delete files from the output. self._handleDeletetions(record) # Backup previous records. for i in range(8, -1, -1): suffix = '' if i == 0 else '.%d' % i record_path = record_cache.getCachePath( '%s%s.record' % (record_id, suffix)) if os.path.exists(record_path): record_path_next = record_cache.getCachePath( '%s.%s.record' % (record_id, i + 1)) if os.path.exists(record_path_next): os.remove(record_path_next) os.rename(record_path, record_path_next) # Save the bake record. with format_timed_scope(logger, "saved bake record.", level=logging.DEBUG, colored=False): record.current.bake_time = time.time() record.current.out_dir = self.out_dir record.saveCurrent(record_cache.getCachePath(record_name)) # All done. self.app.config.set('baker/is_baking', False) logger.debug(format_timed(start_time, 'done baking')) return record.detach() def _handleCacheValidity(self, record): start_time = time.perf_counter() reason = None if self.force: reason = "ordered to" elif not self.app.config.get('__cache_valid'): # The configuration file was changed, or we're running a new # version of the app. reason = "not valid anymore" elif (not record.previous.bake_time or not record.previous.hasLatestVersion()): # We have no valid previous bake record. reason = "need bake record regeneration" else: # Check if any template has changed since the last bake. Since # there could be some advanced conditional logic going on, we'd # better just force a bake from scratch if that's the case. max_time = 0 for d in self.app.templates_dirs: for dpath, _, filenames in os.walk(d): for fn in filenames: full_fn = os.path.join(dpath, fn) max_time = max(max_time, os.path.getmtime(full_fn)) if max_time >= record.previous.bake_time: reason = "templates modified" if reason is not None: # We have to bake everything from scratch. self.app.cache.clearCaches(except_names=['app', 'baker']) self.force = True record.incremental_count = 0 record.clearPrevious() logger.info(format_timed( start_time, "cleaned cache (reason: %s)" % reason)) return False else: record.incremental_count += 1 logger.debug(format_timed( start_time, "cache is assumed valid", colored=False)) return True def _bakeRealm(self, record, pool, realm, srclist): start_time = time.perf_counter() try: record.current.baked_count[realm] = 0 record.current.total_baked_count[realm] = 0 all_factories = [] for source in srclist: factories = source.getPageFactories() all_factories += [f for f in factories if f.path not in self.generator_pages] self._loadRealmPages(record, pool, all_factories) self._renderRealmPages(record, pool, all_factories) self._bakeRealmPages(record, pool, realm, all_factories) finally: page_count = record.current.baked_count[realm] total_page_count = record.current.total_baked_count[realm] logger.info(format_timed( start_time, "baked %d %s pages (%d total)." % (page_count, REALM_NAMES[realm].lower(), total_page_count))) def _loadRealmPages(self, record, pool, factories): def _handler(res): # Create the record entry for this page. # This will also update the `dirty_source_names` for the record # as we add page files whose last modification times are later # than the last bake. record_entry = BakeRecordEntry(res['source_name'], res['path']) record_entry.config = res['config'] record_entry.timestamp = res['timestamp'] if res['errors']: record_entry.errors += res['errors'] record.current.success = False self._logErrors(res['path'], res['errors']) record.addEntry(record_entry) logger.debug("Loading %d realm pages..." % len(factories)) with format_timed_scope(logger, "loaded %d pages" % len(factories), level=logging.DEBUG, colored=False, timer_env=self.app.env, timer_category='LoadJob'): jobs = [] for fac in factories: job = { 'type': JOB_LOAD, 'job': save_factory(fac)} jobs.append(job) ar = pool.queueJobs(jobs, handler=_handler) ar.wait() def _renderRealmPages(self, record, pool, factories): def _handler(res): entry = record.getCurrentEntry(res['path']) if res['errors']: entry.errors += res['errors'] record.current.success = False self._logErrors(res['path'], res['errors']) logger.debug("Rendering %d realm pages..." % len(factories)) with format_timed_scope(logger, "prepared %d pages" % len(factories), level=logging.DEBUG, colored=False, timer_env=self.app.env, timer_category='RenderFirstSubJob'): jobs = [] for fac in factories: record_entry = record.getCurrentEntry(fac.path) if record_entry.errors: logger.debug("Ignoring %s because it had previous " "errors." % fac.ref_spec) continue # Make sure the source and the route exist for this page, # otherwise we add errors to the record entry and we'll skip # this page for the rest of the bake. source = self.app.getSource(fac.source.name) if source is None: record_entry.errors.append( "Can't get source for page: %s" % fac.ref_spec) logger.error(record_entry.errors[-1]) continue route = self.app.getSourceRoute(fac.source.name, fac.metadata) if route is None: record_entry.errors.append( "Can't get route for page: %s" % fac.ref_spec) logger.error(record_entry.errors[-1]) continue # All good, queue the job. route_index = self.app.routes.index(route) job = { 'type': JOB_RENDER_FIRST, 'job': { 'factory_info': save_factory(fac), 'route_index': route_index } } jobs.append(job) ar = pool.queueJobs(jobs, handler=_handler) ar.wait() def _bakeRealmPages(self, record, pool, realm, factories): def _handler(res): entry = record.getCurrentEntry(res['path']) entry.subs = res['sub_entries'] if res['errors']: entry.errors += res['errors'] self._logErrors(res['path'], res['errors']) if entry.has_any_error: record.current.success = False if entry.subs and entry.was_any_sub_baked: record.current.baked_count[realm] += 1 record.current.total_baked_count[realm] += len(entry.subs) logger.debug("Baking %d realm pages..." % len(factories)) with format_timed_scope(logger, "baked %d pages" % len(factories), level=logging.DEBUG, colored=False, timer_env=self.app.env, timer_category='BakeJob'): jobs = [] for fac in factories: job = self._makeBakeJob(record, fac) if job is not None: jobs.append(job) ar = pool.queueJobs(jobs, handler=_handler) ar.wait() def _bakePageGenerators(self, record, pool): for gen in self.app.generators: ctx = PageGeneratorBakeContext(self.app, record, pool, gen) gen.bake(ctx) def _makeBakeJob(self, record, fac): # Get the previous (if any) and current entry for this page. pair = record.getPreviousAndCurrentEntries(fac.path) assert pair is not None prev_entry, cur_entry = pair assert cur_entry is not None # Ignore if there were errors in the previous passes. if cur_entry.errors: logger.debug("Ignoring %s because it had previous " "errors." % fac.ref_spec) return None # Build the route metadata and find the appropriate route. page = fac.buildPage() route_metadata = create_route_metadata(page) route = self.app.getSourceRoute(fac.source.name, route_metadata) assert route is not None # Figure out if this page is overriden by another previously # baked page. This happens for example when the user has # made a page that has the same page/URL as a theme page. uri = route.getUri(route_metadata) override_entry = record.getOverrideEntry(page.path, uri) if override_entry is not None: override_source = self.app.getSource( override_entry.source_name) if override_source.realm == fac.source.realm: cur_entry.errors.append( "Page '%s' maps to URL '%s' but is overriden " "by page '%s'." % (fac.ref_spec, uri, override_entry.path)) logger.error(cur_entry.errors[-1]) cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN return None route_index = self.app.routes.index(route) job = { 'type': JOB_BAKE, 'job': { 'factory_info': save_factory(fac), 'generator_name': None, 'generator_record_key': None, 'route_index': route_index, 'route_metadata': route_metadata, 'dirty_source_names': record.dirty_source_names } } return job def _handleDeletetions(self, record): logger.debug("Handling deletions...") for path, reason in record.getDeletions(): logger.debug("Removing '%s': %s" % (path, reason)) record.current.deleted.append(path) try: os.remove(path) logger.info('[delete] %s' % path) except OSError: # Not a big deal if that file had already been removed # by the user. pass def _logErrors(self, path, errors): rel_path = os.path.relpath(path, self.app.root_dir) logger.error("Errors found in %s:" % rel_path) for e in errors: logger.error(" " + e) def _createWorkerPool(self, previous_record_path): from piecrust.app import PieCrustFactory from piecrust.workerpool import WorkerPool from piecrust.baking.worker import BakeWorkerContext, BakeWorker appfactory = PieCrustFactory( self.app.root_dir, cache=self.app.cache.enabled, cache_key=self.app.cache_key, config_variant=self.applied_config_variant, config_values=self.applied_config_values, debug=self.app.debug, theme_site=self.app.theme_site) worker_count = self.app.config.get('baker/workers') batch_size = self.app.config.get('baker/batch_size') ctx = BakeWorkerContext( appfactory, self.out_dir, force=self.force, previous_record_path=previous_record_path) pool = WorkerPool( worker_count=worker_count, batch_size=batch_size, worker_class=BakeWorker, initargs=(ctx,)) return pool