Mercurial > piecrust2
changeset 687:61d606fbc313
bake: Change `show-timers` to `show-stats`, add stats.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 14 Mar 2016 08:26:14 -0700 |
parents | 1a6c4c2683fd |
children | e86568a7434d |
files | piecrust/baking/baker.py piecrust/baking/records.py piecrust/baking/worker.py piecrust/cache.py piecrust/commands/builtin/baking.py piecrust/environment.py piecrust/processing/pipeline.py piecrust/processing/records.py piecrust/processing/worker.py piecrust/records.py piecrust/workerpool.py |
diffstat | 11 files changed, 189 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- a/piecrust/baking/baker.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/baking/baker.py Mon Mar 14 08:26:14 2016 -0700 @@ -9,6 +9,7 @@ JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE) from piecrust.chefutil import ( format_timed_scope, format_timed) +from piecrust.environment import ExecutionStats from piecrust.routing import create_route_metadata from piecrust.sources.base import ( REALM_NAMES, REALM_USER, REALM_THEME) @@ -103,20 +104,16 @@ # Bake taxonomies. self._bakeTaxonomies(record, pool) - # All done with the workers. Close the pool and get timing reports. + # All done with the workers. Close the pool and get reports. reports = pool.close() - record.current.timers = {} + total_stats = ExecutionStats() + record.current.stats['_Total'] = total_stats for i in range(len(reports)): - timers = reports[i] - if timers is None: - continue - - worker_name = 'BakeWorker_%d' % i - record.current.timers[worker_name] = {} - for name, val in timers['data'].items(): - main_val = record.current.timers.setdefault(name, 0) - record.current.timers[name] = main_val + val - record.current.timers[worker_name][name] = val + worker_stats = reports[i]['data'] + if worker_stats is not None: + worker_name = 'BakeWorker_%d' % i + record.current.stats[worker_name] = worker_stats + total_stats.mergeStats(worker_stats) # Delete files from the output. self._handleDeletetions(record)
--- a/piecrust/baking/records.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/baking/records.py Mon Mar 14 08:26:14 2016 -0700 @@ -21,14 +21,13 @@ class BakeRecord(Record): - RECORD_VERSION = 14 + RECORD_VERSION = 16 def __init__(self): super(BakeRecord, self).__init__() self.out_dir = None self.bake_time = None self.baked_count = {} - self.timers = None self.success = True
--- a/piecrust/baking/worker.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/baking/worker.py Mon Mar 14 08:26:14 2016 -0700 @@ -41,6 +41,9 @@ app.env.registerTimer("BakeWorker_%d_Total" % self.wid) app.env.registerTimer("BakeWorkerInit") app.env.registerTimer("JobReceive") + app.env.registerManifest("LoadJobs") + app.env.registerManifest("RenderJobs") + app.env.registerManifest("BakeJobs") self.ctx.app = app # Load previous record @@ -71,9 +74,10 @@ def getReport(self): self.ctx.app.env.stepTimerSince("BakeWorker_%d_Total" % self.wid, self.work_start_time) + data = self.ctx.app.env.getStats() return { - 'type': 'timers', - 'data': self.ctx.app.env._timers} + 'type': 'stats', + 'data': data} JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE = range(0, 3) @@ -116,6 +120,7 @@ # Just make sure the page has been cached. fac = load_factory(self.app, job) logger.debug("Loading page: %s" % fac.ref_spec) + self.app.env.addManifestEntry('LoadJobs', fac.ref_spec) result = { 'source_name': fac.source.name, 'path': fac.path, @@ -137,6 +142,7 @@ def handleJob(self, job): # Render the segments for the first sub-page of this page. fac = load_factory(self.app, job) + self.app.env.addManifestEntry('RenderJobs', fac.ref_spec) # These things should be OK as they're checked upstream by the baker. route = self.app.getRoute(fac.source.name, fac.metadata, @@ -177,6 +183,7 @@ def handleJob(self, job): # Actually bake the page and all its sub-pages to the output folder. fac = load_factory(self.app, job['factory_info']) + self.app.env.addManifestEntry('BakeJobs', fac.ref_spec) route_metadata = job['route_metadata'] tax_info = job['taxonomy_info']
--- a/piecrust/cache.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/cache.py Mon Mar 14 08:26:14 2016 -0700 @@ -162,6 +162,9 @@ self.fs_cache = None self._last_access_hit = None self._invalidated_fs_items = set() + self._missed_keys = [] + self._misses = 0 + self._hits = 0 @property def last_access_hit(self): @@ -185,32 +188,38 @@ def get(self, key, item_maker, fs_cache_time=None, save_to_fs=True): self._last_access_hit = True item = self.cache.get(key) - if item is None: - if (self.fs_cache is not None and - fs_cache_time is not None): - # Try first from the file-system cache. - fs_key = _make_fs_cache_key(key) - if (fs_key not in self._invalidated_fs_items and - self.fs_cache.isValid(fs_key, fs_cache_time)): - logger.debug("'%s' found in file-system cache." % - key) - item_raw = self.fs_cache.read(fs_key) - item = json.loads( - item_raw, - object_pairs_hook=collections.OrderedDict) - self.cache.put(key, item) - return item + if item is not None: + self._hits += 1 + return item - # Look into the mem-cache. - logger.debug("'%s' not found in cache, must build." % key) - item = item_maker() - self.cache.put(key, item) - self._last_access_hit = False + if (self.fs_cache is not None and + fs_cache_time is not None): + # Try first from the file-system cache. + fs_key = _make_fs_cache_key(key) + if (fs_key not in self._invalidated_fs_items and + self.fs_cache.isValid(fs_key, fs_cache_time)): + logger.debug("'%s' found in file-system cache." % + key) + item_raw = self.fs_cache.read(fs_key) + item = json.loads( + item_raw, + object_pairs_hook=collections.OrderedDict) + self.cache.put(key, item) + self._hits += 1 + return item - # Save to the file-system if needed. - if self.fs_cache is not None and save_to_fs: - item_raw = json.dumps(item) - self.fs_cache.write(fs_key, item_raw) + # Look into the mem-cache. + logger.debug("'%s' not found in cache, must build." % key) + item = item_maker() + self.cache.put(key, item) + self._last_access_hit = False + self._misses += 1 + self._missed_keys.append(key) + + # Save to the file-system if needed. + if self.fs_cache is not None and save_to_fs: + item_raw = json.dumps(item) + self.fs_cache.write(fs_key, item_raw) return item
--- a/piecrust/commands/builtin/baking.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/commands/builtin/baking.py Mon Mar 14 08:26:14 2016 -0700 @@ -11,6 +11,7 @@ BakeRecord, BakeRecordEntry, SubPageBakeInfo) from piecrust.chefutil import format_timed from piecrust.commands.base import ChefCommand +from piecrust.environment import ExecutionStats from piecrust.processing.pipeline import ProcessorPipeline from piecrust.processing.records import ( ProcessorPipelineRecord, @@ -55,8 +56,8 @@ help="Only bake the pages (don't run the asset pipeline).", action='store_true') parser.add_argument( - '--show-timers', - help="Show detailed timing information.", + '--show-stats', + help="Show detailed information about the bake.", action='store_true') def run(self, ctx): @@ -64,7 +65,7 @@ os.path.join(ctx.app.root_dir, '_counter')) success = True - ctx.timers = {} + ctx.stats = {} start_time = time.perf_counter() try: # Bake the site sources. @@ -75,11 +76,11 @@ if not ctx.args.html_only: success = success & self._bakeAssets(ctx, out_dir) - # Show merged timers. - if ctx.args.show_timers: + # Show merged stats. + if ctx.args.show_stats: logger.info("-------------------") logger.info("Timing information:") - _show_timers(ctx.timers) + _show_stats(ctx.stats) # All done. logger.info('-------------------------') @@ -103,7 +104,7 @@ applied_config_variant=ctx.config_variant, applied_config_values=ctx.config_values) record = baker.bake() - _merge_timers(record.timers, ctx.timers) + _merge_stats(record.stats, ctx.stats) return record.success def _bakeAssets(self, ctx, out_dir): @@ -113,39 +114,48 @@ applied_config_variant=ctx.config_variant, applied_config_values=ctx.config_values) record = proc.run() - _merge_timers(record.timers, ctx.timers) + _merge_stats(record.stats, ctx.stats) return record.success -def _merge_timers(source, target): +def _merge_stats(source, target): if source is None: return for name, val in source.items(): - if isinstance(val, float): - if name not in target: - target[name] = 0 - target[name] += val - elif isinstance(val, dict): - if name not in target: - target[name] = {} - _merge_timers(val, target[name]) + if name not in target: + target[name] = ExecutionStats() + target[name].mergeStats(val) -def _show_timers(timers, indent=''): - sub_timer_names = [] - for name in sorted(timers.keys()): - if isinstance(timers[name], float): - val_str = '%8.1f s' % timers[name] +def _show_stats(stats, full=False): + indent = ' ' + for name in sorted(stats.keys()): + logger.info('%s:' % name) + s = stats[name] + + logger.info(' Timers:') + for name, val in s.timers.items(): + val_str = '%8.1f s' % val logger.info( "%s[%s%s%s] %s" % (indent, Fore.GREEN, val_str, Fore.RESET, name)) - else: - sub_timer_names.append(name) + + logger.info(' Counters:') + for name, val in s.counters.items(): + val_str = '%8d ' % val + logger.info( + "%s[%s%s%s] %s" % + (indent, Fore.GREEN, val_str, Fore.RESET, name)) - for name in sub_timer_names: - logger.info('%s:' % name) - _show_timers(timers[name], indent + ' ') + logger.info(' Manifests:') + for name, val in s.manifests.items(): + logger.info( + "%s[%s%s%s] [%d entries]" % + (indent, Fore.CYAN, name, Fore.RESET, len(val))) + if full: + for v in val: + logger.info("%s - %s" % (indent, v)) class ShowRecordCommand(ChefCommand): @@ -278,7 +288,7 @@ PASS_FORMATTING: 'formatting pass', PASS_RENDERING: 'rendering pass'} for p, ri in sub.render_info.items(): - logging.info(" - %s" % pass_names[p]) + logging.info(" - %s" % p) logging.info(" used sources: %s" % _join(ri.used_source_names)) pgn_info = 'no'
--- a/piecrust/environment.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/environment.py Mon Mar 14 08:26:14 2016 -0700 @@ -52,6 +52,60 @@ self._page_stack = [] +class ExecutionStats(object): + def __init__(self): + self.timers = {} + self.counters = {} + self.manifests = {} + + def registerTimer(self, category, *, raise_if_registered=True): + if raise_if_registered and category in self.timers: + raise Exception("Timer '%s' has already been registered." % + category) + self.timers[category] = 0 + + @contextlib.contextmanager + def timerScope(self, category): + start = time.perf_counter() + yield + self.timers[category] += time.perf_counter() - start + + def stepTimer(self, category, value): + self.timers[category] += value + + def stepTimerSince(self, category, since): + self.stepTimer(category, time.perf_counter() - since) + + def registerCounter(self, category, *, raise_if_registered=True): + if raise_if_registered and category in self.counters: + raise Exception("Counter '%s' has already been registered." % + category) + self.counters[category] = 0 + + def stepCounter(self, category, inc=1): + self.counters[category] += inc + + def registerManifest(self, name, *, raise_if_registered=True): + if raise_if_registered and name in self.manifests: + raise Exception("Manifest '%s' has already been registered." % + name) + self.manifests[name] = [] + + def addManifestEntry(self, name, entry): + self.manifests[name].append(entry) + + def mergeStats(self, other): + for oc, ov in other.timers.items(): + v = self.timers.setdefault(oc, 0) + self.timers[oc] = v + ov + for oc, ov in other.counters.items(): + v = self.counters.setdefault(oc, 0) + self.counters[oc] = v + ov + for oc, ov in other.manifests.items(): + v = self.manifests.setdefault(oc, []) + self.manifests[oc] = v + ov + + class Environment(object): def __init__(self): self.app = None @@ -66,7 +120,7 @@ self.fs_cache_only_for_main_page = False self.abort_source_use = False self._default_layout_extensions = None - self._timers = {} + self._stats = ExecutionStats() @property def default_layout_extensions(self): @@ -94,22 +148,41 @@ repo.fs_cache = cache def registerTimer(self, category, *, raise_if_registered=True): - if raise_if_registered and category in self._timers: - raise Exception("Timer '%s' has already been registered." % - category) - self._timers[category] = 0 + self._stats.registerTimer( + category, raise_if_registered=raise_if_registered) - @contextlib.contextmanager def timerScope(self, category): - start = time.perf_counter() - yield - self._timers[category] += time.perf_counter() - start + return self._stats.timerScope(category) def stepTimer(self, category, value): - self._timers[category] += value + self._stats.stepTimer(category, value) def stepTimerSince(self, category, since): - self.stepTimer(category, time.perf_counter() - since) + self._stats.stepTimerSince(category, since) + + def registerCounter(self, category, *, raise_if_registered=True): + self._stats.registerCounter( + category, raise_if_registered=raise_if_registered) + + def stepCounter(self, category, inc=1): + self._stats.stepCounter(category, inc) + + def registerManifest(self, name, *, raise_if_registered=True): + self._stats.registerManifest( + name, raise_if_registered=raise_if_registered) + + def addManifestEntry(self, name, entry): + self._stats.addManifestEntry(name, entry) + + def getStats(self): + repos = [ + ('RenderedSegmentsRepo', self.rendered_segments_repository), + ('PagesRepo', self.page_repository)] + for name, repo in repos: + self._stats.counters['%s_hit' % name] = repo._hits + self._stats.counters['%s_miss' % name] = repo._misses + self._stats.manifests['%s_missedKeys' % name] = list(repo._missed_keys) + return self._stats class StandardEnvironment(Environment):
--- a/piecrust/processing/pipeline.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/processing/pipeline.py Mon Mar 14 08:26:14 2016 -0700 @@ -6,6 +6,7 @@ import logging import multiprocessing from piecrust.chefutil import format_timed, format_timed_scope +from piecrust.environment import ExecutionStats from piecrust.processing.base import PipelineContext from piecrust.processing.records import ( ProcessorPipelineRecordEntry, TransitionalProcessorPipelineRecord, @@ -54,7 +55,6 @@ ignores = baker_params.get('ignore', []) ignores += [ '_cache', '_counter', - 'theme_info.yml', '.DS_Store', 'Thumbs.db', '.git*', '.hg*', '.svn'] self.ignore_patterns = make_re(ignores) @@ -148,18 +148,14 @@ # Shutdown the workers and get timing information from them. reports = pool.close() - record.current.timers = {} + total_stats = ExecutionStats() + record.current.stats['_Total'] = total_stats for i in range(len(reports)): - timers = reports[i] - if timers is None: - continue - - worker_name = 'PipelineWorker_%d' % i - record.current.timers[worker_name] = {} - for name, val in timers['data'].items(): - main_val = record.current.timers.setdefault(name, 0) - record.current.timers[name] = main_val + val - record.current.timers[worker_name][name] = val + worker_stats = reports[i]['data'] + if worker_stats is not None: + worker_name = 'PipelineWorker_%d' % i + record.current.stats[worker_name] = worker_stats + total_stats.mergeStats(worker_stats) # Invoke post-processors. pipeline_ctx.record = record.current
--- a/piecrust/processing/records.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/processing/records.py Mon Mar 14 08:26:14 2016 -0700 @@ -4,7 +4,7 @@ class ProcessorPipelineRecord(Record): - RECORD_VERSION = 5 + RECORD_VERSION = 6 def __init__(self): super(ProcessorPipelineRecord, self).__init__() @@ -12,7 +12,6 @@ self.process_time = None self.processed_count = 0 self.success = False - self.timers = None FLAG_NONE = 0
--- a/piecrust/processing/worker.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/processing/worker.py Mon Mar 14 08:26:14 2016 -0700 @@ -144,9 +144,10 @@ self.app.env.stepTimerSince("PipelineWorker_%d_Total" % self.wid, self.work_start_time) + data = self.app.env.getStats() return { - 'type': 'timers', - 'data': self.app.env._timers} + 'type': 'stats', + 'data': data} def get_filtered_processors(processors, authorized_names):
--- a/piecrust/records.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/records.py Mon Mar 14 08:26:14 2016 -0700 @@ -15,6 +15,7 @@ self.entry_added = Event() self.app_version = APP_VERSION self.record_version = self.__class__.RECORD_VERSION + self.stats = {} def hasLatestVersion(self): return (self.app_version == APP_VERSION and
--- a/piecrust/workerpool.py Sun Mar 13 16:55:12 2016 -0700 +++ b/piecrust/workerpool.py Mon Mar 14 08:26:14 2016 -0700 @@ -85,6 +85,7 @@ try: rep = (task_type, True, wid, (wid, w.getReport())) except Exception as e: + logger.debug("Error getting report: %s" % e) if params.wrap_exception: e = multiprocessing.ExceptionWithTraceback( e, e.__traceback__) @@ -265,6 +266,7 @@ if pool._error_callback: pool._error_callback(data) else: + logger.error("Got error data:") logger.error(data) except Exception as ex: logger.exception(ex)