changeset 687:61d606fbc313

bake: Change `show-timers` to `show-stats`, add stats.
author Ludovic Chabant <ludovic@chabant.com>
date Mon, 14 Mar 2016 08:26:14 -0700
parents 1a6c4c2683fd
children e86568a7434d
files piecrust/baking/baker.py piecrust/baking/records.py piecrust/baking/worker.py piecrust/cache.py piecrust/commands/builtin/baking.py piecrust/environment.py piecrust/processing/pipeline.py piecrust/processing/records.py piecrust/processing/worker.py piecrust/records.py piecrust/workerpool.py
diffstat 11 files changed, 189 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/baking/baker.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/baking/baker.py	Mon Mar 14 08:26:14 2016 -0700
@@ -9,6 +9,7 @@
         JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE)
 from piecrust.chefutil import (
         format_timed_scope, format_timed)
+from piecrust.environment import ExecutionStats
 from piecrust.routing import create_route_metadata
 from piecrust.sources.base import (
         REALM_NAMES, REALM_USER, REALM_THEME)
@@ -103,20 +104,16 @@
         # Bake taxonomies.
         self._bakeTaxonomies(record, pool)
 
-        # All done with the workers. Close the pool and get timing reports.
+        # All done with the workers. Close the pool and get reports.
         reports = pool.close()
-        record.current.timers = {}
+        total_stats = ExecutionStats()
+        record.current.stats['_Total'] = total_stats
         for i in range(len(reports)):
-            timers = reports[i]
-            if timers is None:
-                continue
-
-            worker_name = 'BakeWorker_%d' % i
-            record.current.timers[worker_name] = {}
-            for name, val in timers['data'].items():
-                main_val = record.current.timers.setdefault(name, 0)
-                record.current.timers[name] = main_val + val
-                record.current.timers[worker_name][name] = val
+            worker_stats = reports[i]['data']
+            if worker_stats is not None:
+                worker_name = 'BakeWorker_%d' % i
+                record.current.stats[worker_name] = worker_stats
+                total_stats.mergeStats(worker_stats)
 
         # Delete files from the output.
         self._handleDeletetions(record)
--- a/piecrust/baking/records.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/baking/records.py	Mon Mar 14 08:26:14 2016 -0700
@@ -21,14 +21,13 @@
 
 
 class BakeRecord(Record):
-    RECORD_VERSION = 14
+    RECORD_VERSION = 16
 
     def __init__(self):
         super(BakeRecord, self).__init__()
         self.out_dir = None
         self.bake_time = None
         self.baked_count = {}
-        self.timers = None
         self.success = True
 
 
--- a/piecrust/baking/worker.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/baking/worker.py	Mon Mar 14 08:26:14 2016 -0700
@@ -41,6 +41,9 @@
         app.env.registerTimer("BakeWorker_%d_Total" % self.wid)
         app.env.registerTimer("BakeWorkerInit")
         app.env.registerTimer("JobReceive")
+        app.env.registerManifest("LoadJobs")
+        app.env.registerManifest("RenderJobs")
+        app.env.registerManifest("BakeJobs")
         self.ctx.app = app
 
         # Load previous record
@@ -71,9 +74,10 @@
     def getReport(self):
         self.ctx.app.env.stepTimerSince("BakeWorker_%d_Total" % self.wid,
                                         self.work_start_time)
+        data = self.ctx.app.env.getStats()
         return {
-                'type': 'timers',
-                'data': self.ctx.app.env._timers}
+                'type': 'stats',
+                'data': data}
 
 
 JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE = range(0, 3)
@@ -116,6 +120,7 @@
         # Just make sure the page has been cached.
         fac = load_factory(self.app, job)
         logger.debug("Loading page: %s" % fac.ref_spec)
+        self.app.env.addManifestEntry('LoadJobs', fac.ref_spec)
         result = {
                 'source_name': fac.source.name,
                 'path': fac.path,
@@ -137,6 +142,7 @@
     def handleJob(self, job):
         # Render the segments for the first sub-page of this page.
         fac = load_factory(self.app, job)
+        self.app.env.addManifestEntry('RenderJobs', fac.ref_spec)
 
         # These things should be OK as they're checked upstream by the baker.
         route = self.app.getRoute(fac.source.name, fac.metadata,
@@ -177,6 +183,7 @@
     def handleJob(self, job):
         # Actually bake the page and all its sub-pages to the output folder.
         fac = load_factory(self.app, job['factory_info'])
+        self.app.env.addManifestEntry('BakeJobs', fac.ref_spec)
 
         route_metadata = job['route_metadata']
         tax_info = job['taxonomy_info']
--- a/piecrust/cache.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/cache.py	Mon Mar 14 08:26:14 2016 -0700
@@ -162,6 +162,9 @@
         self.fs_cache = None
         self._last_access_hit = None
         self._invalidated_fs_items = set()
+        self._missed_keys = []
+        self._misses = 0
+        self._hits = 0
 
     @property
     def last_access_hit(self):
@@ -185,32 +188,38 @@
     def get(self, key, item_maker, fs_cache_time=None, save_to_fs=True):
         self._last_access_hit = True
         item = self.cache.get(key)
-        if item is None:
-            if (self.fs_cache is not None and
-                    fs_cache_time is not None):
-                # Try first from the file-system cache.
-                fs_key = _make_fs_cache_key(key)
-                if (fs_key not in self._invalidated_fs_items and
-                        self.fs_cache.isValid(fs_key, fs_cache_time)):
-                    logger.debug("'%s' found in file-system cache." %
-                                 key)
-                    item_raw = self.fs_cache.read(fs_key)
-                    item = json.loads(
-                            item_raw,
-                            object_pairs_hook=collections.OrderedDict)
-                    self.cache.put(key, item)
-                    return item
+        if item is not None:
+            self._hits += 1
+            return item
 
-            # Look into the mem-cache.
-            logger.debug("'%s' not found in cache, must build." % key)
-            item = item_maker()
-            self.cache.put(key, item)
-            self._last_access_hit = False
+        if (self.fs_cache is not None and
+                fs_cache_time is not None):
+            # Try first from the file-system cache.
+            fs_key = _make_fs_cache_key(key)
+            if (fs_key not in self._invalidated_fs_items and
+                    self.fs_cache.isValid(fs_key, fs_cache_time)):
+                logger.debug("'%s' found in file-system cache." %
+                             key)
+                item_raw = self.fs_cache.read(fs_key)
+                item = json.loads(
+                        item_raw,
+                        object_pairs_hook=collections.OrderedDict)
+                self.cache.put(key, item)
+                self._hits += 1
+                return item
 
-            # Save to the file-system if needed.
-            if self.fs_cache is not None and save_to_fs:
-                item_raw = json.dumps(item)
-                self.fs_cache.write(fs_key, item_raw)
+        # Look into the mem-cache.
+        logger.debug("'%s' not found in cache, must build." % key)
+        item = item_maker()
+        self.cache.put(key, item)
+        self._last_access_hit = False
+        self._misses += 1
+        self._missed_keys.append(key)
+
+        # Save to the file-system if needed.
+        if self.fs_cache is not None and save_to_fs:
+            item_raw = json.dumps(item)
+            self.fs_cache.write(fs_key, item_raw)
 
         return item
 
--- a/piecrust/commands/builtin/baking.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/commands/builtin/baking.py	Mon Mar 14 08:26:14 2016 -0700
@@ -11,6 +11,7 @@
         BakeRecord, BakeRecordEntry, SubPageBakeInfo)
 from piecrust.chefutil import format_timed
 from piecrust.commands.base import ChefCommand
+from piecrust.environment import ExecutionStats
 from piecrust.processing.pipeline import ProcessorPipeline
 from piecrust.processing.records import (
         ProcessorPipelineRecord,
@@ -55,8 +56,8 @@
                 help="Only bake the pages (don't run the asset pipeline).",
                 action='store_true')
         parser.add_argument(
-                '--show-timers',
-                help="Show detailed timing information.",
+                '--show-stats',
+                help="Show detailed information about the bake.",
                 action='store_true')
 
     def run(self, ctx):
@@ -64,7 +65,7 @@
                    os.path.join(ctx.app.root_dir, '_counter'))
 
         success = True
-        ctx.timers = {}
+        ctx.stats = {}
         start_time = time.perf_counter()
         try:
             # Bake the site sources.
@@ -75,11 +76,11 @@
             if not ctx.args.html_only:
                 success = success & self._bakeAssets(ctx, out_dir)
 
-            # Show merged timers.
-            if ctx.args.show_timers:
+            # Show merged stats.
+            if ctx.args.show_stats:
                 logger.info("-------------------")
                 logger.info("Timing information:")
-                _show_timers(ctx.timers)
+                _show_stats(ctx.stats)
 
             # All done.
             logger.info('-------------------------')
@@ -103,7 +104,7 @@
                 applied_config_variant=ctx.config_variant,
                 applied_config_values=ctx.config_values)
         record = baker.bake()
-        _merge_timers(record.timers, ctx.timers)
+        _merge_stats(record.stats, ctx.stats)
         return record.success
 
     def _bakeAssets(self, ctx, out_dir):
@@ -113,39 +114,48 @@
                 applied_config_variant=ctx.config_variant,
                 applied_config_values=ctx.config_values)
         record = proc.run()
-        _merge_timers(record.timers, ctx.timers)
+        _merge_stats(record.stats, ctx.stats)
         return record.success
 
 
-def _merge_timers(source, target):
+def _merge_stats(source, target):
     if source is None:
         return
 
     for name, val in source.items():
-        if isinstance(val, float):
-            if name not in target:
-                target[name] = 0
-            target[name] += val
-        elif isinstance(val, dict):
-            if name not in target:
-                target[name] = {}
-            _merge_timers(val, target[name])
+        if name not in target:
+            target[name] = ExecutionStats()
+        target[name].mergeStats(val)
 
 
-def _show_timers(timers, indent=''):
-    sub_timer_names = []
-    for name in sorted(timers.keys()):
-        if isinstance(timers[name], float):
-            val_str = '%8.1f s' % timers[name]
+def _show_stats(stats, full=False):
+    indent = '    '
+    for name in sorted(stats.keys()):
+        logger.info('%s:' % name)
+        s = stats[name]
+
+        logger.info('  Timers:')
+        for name, val in s.timers.items():
+            val_str = '%8.1f s' % val
             logger.info(
                     "%s[%s%s%s] %s" %
                     (indent, Fore.GREEN, val_str, Fore.RESET, name))
-        else:
-            sub_timer_names.append(name)
+
+        logger.info('  Counters:')
+        for name, val in s.counters.items():
+            val_str = '%8d  ' % val
+            logger.info(
+                    "%s[%s%s%s] %s" %
+                    (indent, Fore.GREEN, val_str, Fore.RESET, name))
 
-    for name in sub_timer_names:
-        logger.info('%s:' % name)
-        _show_timers(timers[name], indent + '  ')
+        logger.info('  Manifests:')
+        for name, val in s.manifests.items():
+            logger.info(
+                    "%s[%s%s%s] [%d entries]" %
+                    (indent, Fore.CYAN, name, Fore.RESET, len(val)))
+            if full:
+                for v in val:
+                    logger.info("%s  - %s" % (indent, v))
 
 
 class ShowRecordCommand(ChefCommand):
@@ -278,7 +288,7 @@
                             PASS_FORMATTING: 'formatting pass',
                             PASS_RENDERING: 'rendering pass'}
                     for p, ri in sub.render_info.items():
-                        logging.info("     - %s" % pass_names[p])
+                        logging.info("     - %s" % p)
                         logging.info("       used sources:  %s" %
                                      _join(ri.used_source_names))
                         pgn_info = 'no'
--- a/piecrust/environment.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/environment.py	Mon Mar 14 08:26:14 2016 -0700
@@ -52,6 +52,60 @@
         self._page_stack = []
 
 
+class ExecutionStats(object):
+    def __init__(self):
+        self.timers = {}
+        self.counters = {}
+        self.manifests = {}
+
+    def registerTimer(self, category, *, raise_if_registered=True):
+        if raise_if_registered and category in self.timers:
+            raise Exception("Timer '%s' has already been registered." %
+                            category)
+        self.timers[category] = 0
+
+    @contextlib.contextmanager
+    def timerScope(self, category):
+        start = time.perf_counter()
+        yield
+        self.timers[category] += time.perf_counter() - start
+
+    def stepTimer(self, category, value):
+        self.timers[category] += value
+
+    def stepTimerSince(self, category, since):
+        self.stepTimer(category, time.perf_counter() - since)
+
+    def registerCounter(self, category, *, raise_if_registered=True):
+        if raise_if_registered and category in self.counters:
+            raise Exception("Counter '%s' has already been registered." %
+                            category)
+        self.counters[category] = 0
+
+    def stepCounter(self, category, inc=1):
+        self.counters[category] += inc
+
+    def registerManifest(self, name, *, raise_if_registered=True):
+        if raise_if_registered and name in self.manifests:
+            raise Exception("Manifest '%s' has already been registered." %
+                            name)
+        self.manifests[name] = []
+
+    def addManifestEntry(self, name, entry):
+        self.manifests[name].append(entry)
+
+    def mergeStats(self, other):
+        for oc, ov in other.timers.items():
+            v = self.timers.setdefault(oc, 0)
+            self.timers[oc] = v + ov
+        for oc, ov in other.counters.items():
+            v = self.counters.setdefault(oc, 0)
+            self.counters[oc] = v + ov
+        for oc, ov in other.manifests.items():
+            v = self.manifests.setdefault(oc, [])
+            self.manifests[oc] = v + ov
+
+
 class Environment(object):
     def __init__(self):
         self.app = None
@@ -66,7 +120,7 @@
         self.fs_cache_only_for_main_page = False
         self.abort_source_use = False
         self._default_layout_extensions = None
-        self._timers = {}
+        self._stats = ExecutionStats()
 
     @property
     def default_layout_extensions(self):
@@ -94,22 +148,41 @@
             repo.fs_cache = cache
 
     def registerTimer(self, category, *, raise_if_registered=True):
-        if raise_if_registered and category in self._timers:
-            raise Exception("Timer '%s' has already been registered." %
-                            category)
-        self._timers[category] = 0
+        self._stats.registerTimer(
+                category, raise_if_registered=raise_if_registered)
 
-    @contextlib.contextmanager
     def timerScope(self, category):
-        start = time.perf_counter()
-        yield
-        self._timers[category] += time.perf_counter() - start
+        return self._stats.timerScope(category)
 
     def stepTimer(self, category, value):
-        self._timers[category] += value
+        self._stats.stepTimer(category, value)
 
     def stepTimerSince(self, category, since):
-        self.stepTimer(category, time.perf_counter() - since)
+        self._stats.stepTimerSince(category, since)
+
+    def registerCounter(self, category, *, raise_if_registered=True):
+        self._stats.registerCounter(
+                category, raise_if_registered=raise_if_registered)
+
+    def stepCounter(self, category, inc=1):
+        self._stats.stepCounter(category, inc)
+
+    def registerManifest(self, name, *, raise_if_registered=True):
+        self._stats.registerManifest(
+                name, raise_if_registered=raise_if_registered)
+
+    def addManifestEntry(self, name, entry):
+        self._stats.addManifestEntry(name, entry)
+
+    def getStats(self):
+        repos = [
+                ('RenderedSegmentsRepo', self.rendered_segments_repository),
+                ('PagesRepo', self.page_repository)]
+        for name, repo in repos:
+            self._stats.counters['%s_hit' % name] = repo._hits
+            self._stats.counters['%s_miss' % name] = repo._misses
+            self._stats.manifests['%s_missedKeys' % name] = list(repo._missed_keys)
+        return self._stats
 
 
 class StandardEnvironment(Environment):
--- a/piecrust/processing/pipeline.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/processing/pipeline.py	Mon Mar 14 08:26:14 2016 -0700
@@ -6,6 +6,7 @@
 import logging
 import multiprocessing
 from piecrust.chefutil import format_timed, format_timed_scope
+from piecrust.environment import ExecutionStats
 from piecrust.processing.base import PipelineContext
 from piecrust.processing.records import (
         ProcessorPipelineRecordEntry, TransitionalProcessorPipelineRecord,
@@ -54,7 +55,6 @@
         ignores = baker_params.get('ignore', [])
         ignores += [
                 '_cache', '_counter',
-                'theme_info.yml',
                 '.DS_Store', 'Thumbs.db',
                 '.git*', '.hg*', '.svn']
         self.ignore_patterns = make_re(ignores)
@@ -148,18 +148,14 @@
 
         # Shutdown the workers and get timing information from them.
         reports = pool.close()
-        record.current.timers = {}
+        total_stats = ExecutionStats()
+        record.current.stats['_Total'] = total_stats
         for i in range(len(reports)):
-            timers = reports[i]
-            if timers is None:
-                continue
-
-            worker_name = 'PipelineWorker_%d' % i
-            record.current.timers[worker_name] = {}
-            for name, val in timers['data'].items():
-                main_val = record.current.timers.setdefault(name, 0)
-                record.current.timers[name] = main_val + val
-                record.current.timers[worker_name][name] = val
+            worker_stats = reports[i]['data']
+            if worker_stats is not None:
+                worker_name = 'PipelineWorker_%d' % i
+                record.current.stats[worker_name] = worker_stats
+                total_stats.mergeStats(worker_stats)
 
         # Invoke post-processors.
         pipeline_ctx.record = record.current
--- a/piecrust/processing/records.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/processing/records.py	Mon Mar 14 08:26:14 2016 -0700
@@ -4,7 +4,7 @@
 
 
 class ProcessorPipelineRecord(Record):
-    RECORD_VERSION = 5
+    RECORD_VERSION = 6
 
     def __init__(self):
         super(ProcessorPipelineRecord, self).__init__()
@@ -12,7 +12,6 @@
         self.process_time = None
         self.processed_count = 0
         self.success = False
-        self.timers = None
 
 
 FLAG_NONE = 0
--- a/piecrust/processing/worker.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/processing/worker.py	Mon Mar 14 08:26:14 2016 -0700
@@ -144,9 +144,10 @@
 
         self.app.env.stepTimerSince("PipelineWorker_%d_Total" % self.wid,
                                     self.work_start_time)
+        data = self.app.env.getStats()
         return {
-                'type': 'timers',
-                'data': self.app.env._timers}
+                'type': 'stats',
+                'data': data}
 
 
 def get_filtered_processors(processors, authorized_names):
--- a/piecrust/records.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/records.py	Mon Mar 14 08:26:14 2016 -0700
@@ -15,6 +15,7 @@
         self.entry_added = Event()
         self.app_version = APP_VERSION
         self.record_version = self.__class__.RECORD_VERSION
+        self.stats = {}
 
     def hasLatestVersion(self):
         return (self.app_version == APP_VERSION and
--- a/piecrust/workerpool.py	Sun Mar 13 16:55:12 2016 -0700
+++ b/piecrust/workerpool.py	Mon Mar 14 08:26:14 2016 -0700
@@ -85,6 +85,7 @@
             try:
                 rep = (task_type, True, wid, (wid, w.getReport()))
             except Exception as e:
+                logger.debug("Error getting report: %s" % e)
                 if params.wrap_exception:
                     e = multiprocessing.ExceptionWithTraceback(
                             e, e.__traceback__)
@@ -265,6 +266,7 @@
                     if pool._error_callback:
                         pool._error_callback(data)
                     else:
+                        logger.error("Got error data:")
                         logger.error(data)
             except Exception as ex:
                 logger.exception(ex)