changeset 876:d1095774bfcf

refactor: Fix some issues with record/cache entry collisions, add counters.
author Ludovic Chabant <ludovic@chabant.com>
date Thu, 15 Jun 2017 07:33:40 -0700
parents 7169bf42ec60
children d6d35b2efd04
files piecrust/app.py piecrust/baking/baker.py piecrust/baking/worker.py piecrust/cache.py piecrust/page.py piecrust/pipelines/base.py piecrust/rendering.py piecrust/serving/procloop.py piecrust/sources/blogarchives.py piecrust/sources/generator.py piecrust/sources/taxonomy.py
diffstat 11 files changed, 109 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/app.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/app.py	Thu Jun 15 07:33:40 2017 -0700
@@ -48,6 +48,9 @@
         stats.registerTimer("PageRenderSegments")
         stats.registerTimer("PageRenderLayout")
         stats.registerTimer("PageSerialize")
+        stats.registerCounter('PageLoads')
+        stats.registerCounter('PageRenderSegments')
+        stats.registerCounter('PageRenderLayout')
 
     @cached_property
     def config(self):
@@ -215,7 +218,7 @@
         return None
 
     def getPage(self, source, content_item):
-        cache_key = content_item.spec
+        cache_key = '%s@%s' % (source.name, content_item.spec)
         return self.env.page_repository.get(
             cache_key,
             lambda: Page(source, content_item))
--- a/piecrust/baking/baker.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/baking/baker.py	Thu Jun 15 07:33:40 2017 -0700
@@ -44,6 +44,9 @@
         self.app.config.set('baker/is_baking', True)
         self.app.config.set('site/asset_url_format', '%page_uri%/%filename%')
 
+        stats = self.app.env.stats
+        stats.registerTimer('WorkerTastPut')
+
         # Make sure the output directory exists.
         if not os.path.isdir(self.out_dir):
             os.makedirs(self.out_dir, 0o755)
@@ -111,13 +114,8 @@
         # Bake the realms -- user first, theme second, so that a user item
         # can override a theme item.
         # Do this for as many times as we have pipeline passes left to do.
-        pp_by_pass_and_realm = {}
-        for ppinfo in ppmngr.getPipelines():
-            pp_by_realm = pp_by_pass_and_realm.setdefault(
-                ppinfo.pipeline.PASS_NUM, {})
-            pplist = pp_by_realm.setdefault(
-                ppinfo.pipeline.source.config['realm'], [])
-            pplist.append(ppinfo)
+        pp_by_pass_and_realm = _get_pipeline_infos_by_pass_and_realm(
+            ppmngr.getPipelines())
 
         for pp_pass_num in sorted(pp_by_pass_and_realm.keys()):
             logger.debug("Pipelines pass %d" % pp_pass_num)
@@ -136,6 +134,7 @@
         # All done with the workers. Close the pool and get reports.
         pool_stats = pool.close()
         total_stats = ExecutionStats()
+        total_stats.mergeStats(stats)
         for ps in pool_stats:
             if ps is not None:
                 total_stats.mergeStats(ps)
@@ -224,6 +223,7 @@
         start_time = time.perf_counter()
         job_count = 0
         realm_name = REALM_NAMES[realm].lower()
+        stats = self.app.env.stats
 
         for ppinfo in pplist:
             src = ppinfo.source
@@ -241,6 +241,8 @@
                 job_count += len(jobs)
                 pool.queueJobs(jobs)
 
+        stats.stepTimer('WorkerTastPut', time.perf_counter() - start_time)
+
         if job_count == 0:
             logger.debug("No jobs queued! Bailing out of this bake pass.")
             return
@@ -273,6 +275,8 @@
                     pool.userdata.next_pass_jobs[sn] = []
                     pool.queueJobs(jobs)
 
+            stats.stepTimer('WorkerTastPut', time.perf_counter() - start_time)
+
             if job_count == 0:
                 break
 
@@ -348,7 +352,7 @@
             e.errors.append(str(exc_data))
             record.addEntry(e)
         else:
-            e = record.getEntry(job.content_item.spec)
+            e = record.getEntry(job.record_entry_spec)
             e.errors.append(str(exc_data))
 
         record.success = False
@@ -366,3 +370,26 @@
         self.records = ppmngr.record_histories.current
         self.cur_pass = 0
         self.next_pass_jobs = {}
+
+
+def _get_pipeline_infos_by_pass_and_realm(pp_infos):
+    pp_by_pass_and_realm = {}
+    for pp_info in pp_infos:
+        pp_pass_num = pp_info.pipeline.PASS_NUM
+        if isinstance(pp_pass_num, list):
+            for ppn in pp_pass_num:
+                _add_pipeline_info_to_pass_and_realm_dict(
+                    ppn, pp_info, pp_by_pass_and_realm)
+        else:
+            _add_pipeline_info_to_pass_and_realm_dict(
+                pp_pass_num, pp_info, pp_by_pass_and_realm)
+    return pp_by_pass_and_realm
+
+
+def _add_pipeline_info_to_pass_and_realm_dict(pp_pass_num, pp_info,
+                                              pp_by_pass_and_realm):
+    pp_by_realm = pp_by_pass_and_realm.setdefault(pp_pass_num, {})
+    pplist = pp_by_realm.setdefault(
+        pp_info.pipeline.source.config['realm'], [])
+    pplist.append(pp_info)
+
--- a/piecrust/baking/worker.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/baking/worker.py	Thu Jun 15 07:33:40 2017 -0700
@@ -80,14 +80,15 @@
         ppinfo = self.ppmngr.getPipeline(job.source_name)
         pp = ppinfo.pipeline
 
+        runctx = PipelineJobRunContext(job, pp.record_name,
+                                       self.record_histories)
+
         ppres = PipelineJobResult()
         # For subsequent pass jobs, there will be a record entry given. For
         # first pass jobs, there's none so we get the pipeline to create it.
         ppres.record_entry = job.data.get('record_entry')
         if ppres.record_entry is None:
-            ppres.record_entry = pp.createRecordEntry(job)
-
-        runctx = PipelineJobRunContext(job, pp, self.record_histories)
+            ppres.record_entry = pp.createRecordEntry(job, runctx)
         pp.run(job, runctx, ppres)
         return ppres
 
--- a/piecrust/cache.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/cache.py	Thu Jun 15 07:33:40 2017 -0700
@@ -191,8 +191,12 @@
             self._hits += 1
             return item
 
-        if (self.fs_cache is not None and
-                fs_cache_time is not None):
+        if self.fs_cache is not None:
+            if fs_cache_time is None:
+                raise ValueError(
+                    "No file-system cache time was given for '%s'. "
+                    "This would result in degraded performance." % key)
+
             # Try first from the file-system cache.
             fs_key = _make_fs_cache_key(key)
             if (fs_key not in self._invalidated_fs_items and
--- a/piecrust/page.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/page.py	Thu Jun 15 07:33:40 2017 -0700
@@ -306,6 +306,8 @@
         'content': json_save_segments(content)}
     cache.write(cache_path, json.dumps(cache_data))
 
+    app.env.stats.stepCounter('PageLoads')
+
     return config, content, False
 
 
--- a/piecrust/pipelines/base.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/pipelines/base.py	Thu Jun 15 07:33:40 2017 -0700
@@ -53,18 +53,28 @@
 class PipelineJobRunContext:
     """ Context for running pipeline baking jobs.
     """
-    def __init__(self, job, pipeline, record_histories):
+    def __init__(self, job, record_name, record_histories):
+        self.job = job
+        self.record_name = record_name
         self.record_histories = record_histories
-        self._job_item_spec = job.content_item.spec
-        self._record_name = pipeline.record_name
+
+    @property
+    def content_item(self):
+        return self.job.content_item
 
     @cached_property
     def previous_record(self):
-        return self.record_histories.getPreviousRecord(self._record_name)
+        return self.record_histories.getPreviousRecord(self.record_name)
+
+    @cached_property
+    def record_entry_spec(self):
+        content_item = self.content_item
+        return content_item.metadata.get('record_entry_spec',
+                                         content_item.spec)
 
     @cached_property
     def previous_entry(self):
-        return self.previous_record.getEntry(self._job_item_spec)
+        return self.previous_record.getEntry(self.record_entry_spec)
 
 
 class PipelineJobResult:
@@ -134,10 +144,10 @@
     def createJob(self, content_item):
         return PipelineJob(self, content_item)
 
-    def createRecordEntry(self, job):
+    def createRecordEntry(self, job, ctx):
         entry_class = self.RECORD_ENTRY_CLASS
         record_entry = entry_class()
-        record_entry.item_spec = job.content_item.spec
+        record_entry.item_spec = ctx.record_entry_spec
         return record_entry
 
     def mergeRecordEntry(self, record_entry, ctx):
--- a/piecrust/rendering.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/rendering.py	Thu Jun 15 07:33:40 2017 -0700
@@ -322,11 +322,15 @@
     res = {
         'segments': formatted_segments,
         'pass_info': _pickle_object(pass_info)}
+
+    app.env.stats.stepCounter('PageRenderSegments')
+
     return res
 
 
 def _do_render_layout(layout_name, page, layout_data):
-    cur_ctx = page.app.env.render_ctx_stack.current_ctx
+    app = page.app
+    cur_ctx = app.env.render_ctx_stack.current_ctx
     assert cur_ctx is not None
     assert cur_ctx.page == page
 
@@ -340,10 +344,10 @@
 
     _, engine_name = os.path.splitext(full_names[0])
     engine_name = engine_name.lstrip('.')
-    engine = get_template_engine(page.app, engine_name)
+    engine = get_template_engine(app, engine_name)
 
     try:
-        with page.app.env.stats.timerScope(
+        with app.env.stats.timerScope(
                 engine.__class__.__name__ + '_layout'):
             output = engine.renderFile(full_names, layout_data)
     except TemplateNotFoundError as ex:
@@ -354,6 +358,9 @@
 
     pass_info = cur_ctx.render_passes[PASS_RENDERING]
     res = {'content': output, 'pass_info': _pickle_object(pass_info)}
+
+    app.env.stats.stepCounter('PageRenderLayout')
+
     return res
 
 
--- a/piecrust/serving/procloop.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/serving/procloop.py	Thu Jun 15 07:33:40 2017 -0700
@@ -239,11 +239,12 @@
         jobctx = PipelineJobCreateContext(0, record_histories)
         jobs = pp.createJobs(jobctx)
         for job in jobs:
-            ppres = PipelineJobResult()
-            ppres.record_entry = pp.createRecordEntry(job)
+            runctx = PipelineJobRunContext(
+                job, pp.record_name, record_histories)
 
-            runctx = PipelineJobRunContext(
-                job, pp, record_histories)
+            ppres = PipelineJobResult()
+            ppres.record_entry = pp.createRecordEntry(job, runctx)
+
             try:
                 pp.run(job, runctx, ppres)
             except Exception as e:
--- a/piecrust/sources/blogarchives.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/sources/blogarchives.py	Thu Jun 15 07:33:40 2017 -0700
@@ -40,8 +40,11 @@
 
     def findContent(self, route_params):
         year = route_params['year']
-        spec = '_index[%04d]' % year
-        metadata = {'route_params': {'year': year}}
+        spec = '_index'
+        metadata = {
+            'record_entry_spec': '_index[%04d]' % year,
+            'route_params': {'year': year}
+        }
         return ContentItem(spec, metadata)
 
     def prepareRenderContext(self, ctx):
@@ -167,6 +170,11 @@
         self._pagebaker.stopWriterQueue()
 
     def createJobs(self, ctx):
+        logger.debug("Caching template page for blog archives '%s'." %
+                     self.inner_source.name)
+        page = self.app.getPage(self.source, ContentItem('_index', {}))
+        page._load()
+
         logger.debug("Building blog archives for: %s" %
                      self.inner_source.name)
         self._buildDirtyYears(ctx)
@@ -176,8 +184,11 @@
         jobs = []
         for y in self._dirty_years:
             item = ContentItem(
-                '_index[%04d]' % y,
-                {'route_params': {'year': y}})
+                '_index',
+                {
+                    'record_entry_spec': '_index[%04d]' % y,
+                    'route_params': {'year': y}
+                })
             jobs.append(self.createJob(item))
         if len(jobs) > 0:
             return jobs
--- a/piecrust/sources/generator.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/sources/generator.py	Thu Jun 15 07:33:40 2017 -0700
@@ -16,6 +16,7 @@
         self._inner_source_name = source_name
 
         self._raw_item = ''
+        self._raw_item_time = time.time()
 
     @cached_property
     def inner_source(self):
@@ -31,5 +32,5 @@
         return io.StringIO(self._raw_item)
 
     def getItemMtime(self, item):
-        return time.time()
+        return self._raw_item_time
 
--- a/piecrust/sources/taxonomy.py	Thu Jun 15 07:32:19 2017 -0700
+++ b/piecrust/sources/taxonomy.py	Thu Jun 15 07:33:40 2017 -0700
@@ -84,8 +84,9 @@
 
     def findContent(self, route_params):
         slugified_term = route_params[self.taxonomy.term_name]
-        spec = '_index[%s]' % slugified_term
+        spec = '_index'
         metadata = {'term': slugified_term,
+                    'record_entry_spec': '_index[%s]' % slugified_term,
                     'route_params': {
                         self.taxonomy.term_name: slugified_term}
                     }
@@ -264,6 +265,11 @@
         self._pagebaker.stopWriterQueue()
 
     def createJobs(self, ctx):
+        logger.debug("Caching template page for taxonomy '%s'." %
+                     self.taxonomy.name)
+        page = self.app.getPage(self.source, ContentItem('_index', {}))
+        page._load()
+
         logger.debug("Building '%s' taxonomy pages for source: %s" %
                      (self.taxonomy.name, self.inner_source.name))
         self._analyzer = _TaxonomyTermsAnalyzer(self, ctx.record_histories)
@@ -275,8 +281,9 @@
         jobs = []
         for slugified_term in self._analyzer.dirty_slugified_terms:
             item = ContentItem(
-                '_index[%s]' % slugified_term,
+                '_index',
                 {'term': slugified_term,
+                 'record_entry_spec': '_index[%s]' % slugified_term,
                  'route_params': {
                      self.taxonomy.term_name: slugified_term}
                  })