changeset 133:9e4c2e68a129

Optimize server for files that already exist. * Only try to find new assets if no previously existing asset or page could be used. * Tidy up a bit the API for passing and returning bake/process records. * Put the process record in its place.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 18 Nov 2014 21:32:04 -0800
parents 3834e2ef0cf2
children 742009d964ef
files piecrust/baking/baker.py piecrust/processing/base.py piecrust/processing/records.py piecrust/records.py piecrust/serving.py
diffstat 5 files changed, 109 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/baking/baker.py	Tue Nov 18 21:29:12 2014 -0800
+++ b/piecrust/baking/baker.py	Tue Nov 18 21:32:04 2014 -0800
@@ -303,7 +303,6 @@
         record = TransitionalBakeRecord()
         record_cache = self.app.cache.getCache('baker')
         record_name = (
-                'pages_' +
                 hashlib.md5(self.out_dir.encode('utf8')).hexdigest() +
                 '.record')
         if not self.force and record_cache.has(record_name):
--- a/piecrust/processing/base.py	Tue Nov 18 21:29:12 2014 -0800
+++ b/piecrust/processing/base.py	Tue Nov 18 21:32:04 2014 -0800
@@ -137,7 +137,9 @@
             lambda p: p.PROCESSOR_NAME in authorized_names,
             self.processors))
 
-    def run(self, src_dir_or_file=None, new_only=False):
+    def run(self, src_dir_or_file=None, *,
+            new_only=False, delete=True,
+            previous_record=None, save_record=True):
         # Invoke pre-processors.
         for proc in self.processors:
             proc.onPipelineStart(self)
@@ -148,16 +150,19 @@
 
         # Create the pipeline record.
         record = TransitionalProcessorPipelineRecord()
-        record_cache = self.app.cache.getCache('baker')
+        record_cache = self.app.cache.getCache('proc')
         record_name = (
-                'assets_' +
                 hashlib.md5(self.out_dir.encode('utf8')).hexdigest() +
                 '.record')
-        if not self.force and record_cache.has(record_name):
+        if previous_record:
+            record.setPrevious(previous_record)
+        elif not self.force and record_cache.has(record_name):
             t = time.clock()
             record.loadPrevious(record_cache.getCachePath(record_name))
             logger.debug(format_timed(t, 'loaded previous bake record',
-                    colored=False))
+                         colored=False))
+        logger.debug("Got %d entries in process record." %
+                len(record.previous.entries))
 
         # Create the workers.
         pool = []
@@ -204,7 +209,7 @@
             raise Exception("Worker pool was aborted.")
 
         # Handle deletions.
-        if not new_only:
+        if delete and not new_only:
             for path, reason in record.getDeletions():
                 logger.debug("Removing '%s': %s" % (path, reason))
                 os.remove(path)
@@ -214,15 +219,18 @@
         for proc in self.processors:
             proc.onPipelineEnd(self)
 
-        # Save the process record.
-        t = time.clock()
+        # Finalize the process record.
         record.current.process_time = time.time()
         record.current.out_dir = self.out_dir
         record.collapseRecords()
-        record.saveCurrent(record_cache.getCachePath(record_name))
-        logger.debug(format_timed(t, 'saved bake record', colored=False))
 
-        return record
+        # Save the process record.
+        if save_record:
+            t = time.clock()
+            record.saveCurrent(record_cache.getCachePath(record_name))
+            logger.debug(format_timed(t, 'saved bake record', colored=False))
+
+        return record.detach()
 
     def processDirectory(self, ctx, start_dir, new_only=False):
         for dirpath, dirnames, filenames in os.walk(start_dir):
--- a/piecrust/processing/records.py	Tue Nov 18 21:29:12 2014 -0800
+++ b/piecrust/processing/records.py	Tue Nov 18 21:32:04 2014 -0800
@@ -21,6 +21,15 @@
                     return entry
         return None
 
+    def replaceEntry(self, new_entry):
+        for e in self.entries:
+            if (e.base_dir == new_entry.base_dir and
+                    e.rel_input == new_entry.rel_input):
+                e.flags = new_entry.flags
+                e.rel_outputs = list(new_entry.rel_outputs)
+                e.errors = list(new_entry.errors)
+                break
+
 
 FLAG_NONE = 0
 FLAG_PROCESSED = 2**0
--- a/piecrust/records.py	Tue Nov 18 21:29:12 2014 -0800
+++ b/piecrust/records.py	Tue Nov 18 21:32:04 2014 -0800
@@ -79,22 +79,38 @@
             self.previous = self._record_class()
             return
 
-        for e in self.previous.entries:
-            key = self.getTransitionKey(e)
-            self.transitions[key] = (e, None)
+        self._rebuildTransitions()
+
+    def setPrevious(self, previous_record):
+        self.previous = previous_record
+        self._rebuildTransitions()
 
     def clearPrevious(self):
-        self.previous = self._record_class()
+        self.setPrevious(self._record_class())
 
     def saveCurrent(self, current_path):
         self.current.save(current_path)
 
+    def detach(self):
+        res = self.current
+        self.current.entry_added -= self._onCurrentEntryAdded
+        self.current = None
+        self.previous = None
+        self.transitions = {}
+        return res
+
     def addEntry(self, entry):
         self.current.addEntry(entry)
 
     def getTransitionKey(self, entry):
         raise NotImplementedError()
 
+    def _rebuildTransitions(self):
+        self.transitions = {}
+        for e in self.previous.entries:
+            key = self.getTransitionKey(e)
+            self.transitions[key] = (e, None)
+
     def _onCurrentEntryAdded(self, entry):
         key = self.getTransitionKey(entry)
         te = self.transitions.get(key)
--- a/piecrust/serving.py	Tue Nov 18 21:29:12 2014 -0800
+++ b/piecrust/serving.py	Tue Nov 18 21:32:04 2014 -0800
@@ -16,7 +16,6 @@
 from piecrust.data.filters import (PaginationFilter, HasFilterClause,
         IsFilterClause)
 from piecrust.environment import StandardEnvironment
-from piecrust.page import Page
 from piecrust.processing.base import ProcessorPipeline
 from piecrust.rendering import PageRenderingContext, render_page
 from piecrust.sources.base import PageFactory, MODE_PARSING
@@ -125,15 +124,21 @@
         if response is not None:
             return response(environ, start_response)
 
-        # Nope. Let's hope it's an actual page.
+        # Nope. Let's see if it's an actual page.
+        # We trap any exception that says "there's no such page" so we can
+        # try another thing before bailing out. But we let any exception
+        # that says "something's wrong" through.
+        exc = None
         try:
             response = self._try_serve_page(app, environ, request)
             return response(environ, start_response)
-        except HTTPException as ex:
-            raise
         except (RouteNotFoundError, SourceNotFoundError) as ex:
             logger.exception(ex)
-            raise NotFound()
+            exc = NotFound(str(ex))
+        except NotFound as ex:
+            exc = ex
+        except HTTPException:
+            raise
         except Exception as ex:
             if app.debug:
                 logger.exception(ex)
@@ -142,41 +147,71 @@
             logger.error(msg)
             raise InternalServerError(msg)
 
+        # Nothing worked so far... let's see if there's a new asset.
+        response = self._try_serve_new_asset(app, environ, request)
+        if response is not None:
+            return response(environ, start_response)
+
+        # Nope. Raise the exception we had in store.
+        raise exc
+
     def _try_serve_asset(self, app, environ, request):
-        logger.debug("Searching for asset with path: %s" % request.path)
+        logger.debug("Searching %d entries for asset with path: %s" %
+                (len(self._asset_record.entries), request.path))
         rel_req_path = request.path.lstrip('/').replace('/', os.sep)
-        entry = self._asset_record.previous.findEntry(rel_req_path)
-        do_synchronous_process = True
-        mounts = app.assets_dirs
+        entry = self._asset_record.findEntry(rel_req_path)
         if entry is None:
-            # We don't know any asset that could have created this path,
-            # but we'll see if there's a new asset that could fit.
-            pipeline = ProcessorPipeline(
-                    app, mounts, self._out_dir,
-                    skip_patterns=self._skip_patterns,
-                    force_patterns=self._force_patterns)
-            record = pipeline.run(new_only=True)
-            entry = record.current.findEntry(rel_req_path)
-            if entry is None:
-                return None
-
-            logger.debug("Found new asset: %s" % entry.path)
-            self._asset_record.addEntry(entry)
-            do_synchronous_process = False
+            # We don't know any asset that could have created this path.
+            # It could be a new asset that the user just created, but we'll
+            # check for that later.
+            return None
 
         # Yep, we know about this URL because we processed an asset that
         # maps to it... make sure it's up to date by re-processing it
         # before serving.
+        mounts = app.assets_dirs
         asset_in_path = entry.path
         asset_out_path = os.path.join(self._out_dir, rel_req_path)
 
-        if self.synchronous_asset_pipeline and do_synchronous_process:
+        if self.synchronous_asset_pipeline:
+            logger.debug("Making sure '%s' is up-to-date." % asset_in_path)
             pipeline = ProcessorPipeline(
                     app, mounts, self._out_dir,
                     skip_patterns=self._skip_patterns,
-                    force_patterns=self._force_patterns)
-            pipeline.run(asset_in_path)
+                    force_patterns=self._force_patterns,
+                    num_workers=1)
+            r = pipeline.run(asset_in_path, delete=False, save_record=False,
+                             previous_record=self._asset_record)
+            assert len(r.entries) == 1
+            self._asset_record.replaceEntry(r.entries[0])
+
+        logger.debug("Serving %s" % asset_out_path)
+        wrapper = wrap_file(environ, open(asset_out_path, 'rb'))
+        response = Response(wrapper)
+        _, ext = os.path.splitext(rel_req_path)
+        response.mimetype = self._mimetype_map.get(
+                ext.lstrip('.'), 'text/plain')
+        return response
 
+    def _try_serve_new_asset(self, app, environ, request):
+        logger.debug("Searching for a new asset with path: %s" % request.path)
+        mounts = app.assets_dirs
+        pipeline = ProcessorPipeline(
+                app, mounts, self._out_dir,
+                skip_patterns=self._skip_patterns,
+                force_patterns=self._force_patterns)
+        r = pipeline.run(new_only=True, delete=False, save_record=False,
+                         previous_record=self._asset_record)
+        for e in r.entries:
+            self._asset_record.addEntry(e)
+
+        rel_req_path = request.path.lstrip('/').replace('/', os.sep)
+        entry = self._asset_record.findEntry(rel_req_path)
+        if entry is None:
+            return None
+
+        asset_out_path = os.path.join(self._out_dir, rel_req_path)
+        logger.debug("Found new asset: %s" % entry.path)
         logger.debug("Serving %s" % asset_out_path)
         wrapper = wrap_file(environ, open(asset_out_path, 'rb'))
         response = Response(wrapper)