diff piecrust/baking/records.py @ 411:e7b865f8f335

bake: Enable multiprocess baking. Baking is now done by running a worker per CPU, and sending jobs to them. This changes several things across the codebase: * Ability to not cache things related to pages other than the 'main' page (i.e. the page at the bottom of the execution stack). * Decouple the baking process from the bake records, so only the main process keeps track (and modifies) the bake record. * Remove the need for 'batch page getters' and loading a page directly from the page factories. There are various smaller changes too included here, including support for scope performance timers that are saved with the bake record and can be printed out to the console. Yes I got carried away. For testing, the in-memory 'mock' file-system doesn't work anymore, since we're spawning processes, so this is replaced by a 'tmpfs' file-system which is saved in temporary files on disk and deleted after tests have run.
author Ludovic Chabant <ludovic@chabant.com>
date Fri, 12 Jun 2015 17:09:19 -0700
parents c12ee6936b8c
children 0e9a94b7fdfa
line wrap: on
line diff
--- a/piecrust/baking/records.py	Sat May 30 15:41:52 2015 -0700
+++ b/piecrust/baking/records.py	Fri Jun 12 17:09:19 2015 -0700
@@ -1,5 +1,6 @@
 import copy
 import os.path
+import hashlib
 import logging
 from piecrust.records import Record, TransitionalRecord
 
@@ -7,35 +8,36 @@
 logger = logging.getLogger(__name__)
 
 
-def _get_transition_key(source_name, rel_path, taxonomy_info=None):
-    key = '%s:%s' % (source_name, rel_path)
+def _get_transition_key(path, taxonomy_info=None):
+    key = path
     if taxonomy_info:
-        taxonomy_name, taxonomy_term, taxonomy_source_name = taxonomy_info
-        key += ';%s:%s=' % (taxonomy_source_name, taxonomy_name)
-        if isinstance(taxonomy_term, tuple):
-            key += '/'.join(taxonomy_term)
+        key += '+%s:%s=' % (taxonomy_info.source_name,
+                            taxonomy_info.taxonomy_name)
+        if isinstance(taxonomy_info.term, tuple):
+            key += '/'.join(taxonomy_info.term)
         else:
-            key += taxonomy_term
-    return key
+            key += taxonomy_info.term
+    return hashlib.md5(key.encode('utf8')).hexdigest()
 
 
 class BakeRecord(Record):
-    RECORD_VERSION = 12
+    RECORD_VERSION = 14
 
     def __init__(self):
         super(BakeRecord, self).__init__()
         self.out_dir = None
         self.bake_time = None
+        self.timers = None
         self.success = True
 
 
-class BakeRecordPassInfo(object):
+class BakePassInfo(object):
     def __init__(self):
         self.used_source_names = set()
         self.used_taxonomy_terms = set()
 
 
-class BakeRecordSubPageEntry(object):
+class SubPageBakeInfo(object):
     FLAG_NONE = 0
     FLAG_BAKED = 2**0
     FLAG_FORCED_BY_SOURCE = 2**1
@@ -68,7 +70,27 @@
                 other.render_passes[p] = copy.deepcopy(pinfo)
 
 
-class BakeRecordPageEntry(object):
+class PageBakeInfo(object):
+    def __init__(self):
+        self.subs = []
+        self.assets = []
+
+
+class FirstRenderInfo(object):
+    def __init__(self):
+        self.assets = []
+        self.used_pagination = False
+        self.pagination_has_more = False
+
+
+class TaxonomyInfo(object):
+    def __init__(self, taxonomy_name, source_name, term):
+        self.taxonomy_name = taxonomy_name
+        self.source_name = source_name
+        self.term = term
+
+
+class BakeRecordEntry(object):
     """ An entry in the bake record.
 
         The `taxonomy_info` attribute should be a tuple of the form:
@@ -79,16 +101,15 @@
     FLAG_SOURCE_MODIFIED = 2**1
     FLAG_OVERRIDEN = 2**2
 
-    def __init__(self, source_name, rel_path, path, taxonomy_info=None):
+    def __init__(self, source_name, path, taxonomy_info=None):
         self.source_name = source_name
-        self.rel_path = rel_path
         self.path = path
         self.taxonomy_info = taxonomy_info
         self.flags = self.FLAG_NONE
         self.config = None
-        self.subs = []
-        self.assets = []
         self.errors = []
+        self.bake_info = None
+        self.first_render_info = None
 
     @property
     def path_mtime(self):
@@ -100,35 +121,59 @@
 
     @property
     def num_subs(self):
-        return len(self.subs)
+        if self.bake_info is None:
+            return 0
+        return len(self.bake_info.subs)
 
     @property
     def was_any_sub_baked(self):
-        for o in self.subs:
-            if o.was_baked:
-                return True
+        if self.bake_info is not None:
+            for o in self.bake_info.subs:
+                if o.was_baked:
+                    return True
+        return False
+
+    @property
+    def subs(self):
+        if self.bake_info is not None:
+            return self.bake_info.subs
+        return []
+
+    @property
+    def has_any_error(self):
+        if len(self.errors) > 0:
+            return True
+        if self.bake_info is not None:
+            for o in self.bake_info.subs:
+                if len(o.errors) > 0:
+                    return True
         return False
 
     def getSub(self, sub_index):
-        return self.subs[sub_index - 1]
+        if self.bake_info is None:
+            raise Exception("No bake info available on this entry.")
+        return self.bake_info.subs[sub_index - 1]
 
     def getAllErrors(self):
         yield from self.errors
-        for o in self.subs:
-            yield from o.errors
+        if self.bake_info is not None:
+            for o in self.bake_info.subs:
+                yield from o.errors
 
     def getAllUsedSourceNames(self):
         res = set()
-        for o in self.subs:
-            for p, pinfo in o.render_passes.items():
-                res |= pinfo.used_source_names
+        if self.bake_info is not None:
+            for o in self.bake_info.subs:
+                for p, pinfo in o.render_passes.items():
+                    res |= pinfo.used_source_names
         return res
 
     def getAllUsedTaxonomyTerms(self):
         res = set()
-        for o in self.subs:
-            for p, pinfo in o.render_passes.items():
-                res |= pinfo.used_taxonomy_terms
+        if self.bake_info is not None:
+            for o in self.bake_info.subs:
+                for p, pinfo in o.render_passes.items():
+                    res |= pinfo.used_taxonomy_terms
         return res
 
 
@@ -141,37 +186,44 @@
     def addEntry(self, entry):
         if (self.previous.bake_time and
                 entry.path_mtime >= self.previous.bake_time):
-            entry.flags |= BakeRecordPageEntry.FLAG_SOURCE_MODIFIED
+            entry.flags |= BakeRecordEntry.FLAG_SOURCE_MODIFIED
             self.dirty_source_names.add(entry.source_name)
         super(TransitionalBakeRecord, self).addEntry(entry)
 
     def getTransitionKey(self, entry):
-        return _get_transition_key(entry.source_name, entry.rel_path,
-                                   entry.taxonomy_info)
+        return _get_transition_key(entry.path, entry.taxonomy_info)
 
-    def getOverrideEntry(self, factory, uri):
+    def getPreviousAndCurrentEntries(self, path, taxonomy_info=None):
+        key = _get_transition_key(path, taxonomy_info)
+        pair = self.transitions.get(key)
+        return pair
+
+    def getOverrideEntry(self, path, uri):
         for pair in self.transitions.values():
             cur = pair[1]
-            if (cur and
-                    (cur.source_name != factory.source.name or
-                        cur.rel_path != factory.rel_path)):
-                    for o in cur.subs:
-                        if o.out_uri == uri:
-                            return cur
+            if cur and cur.path != path:
+                for o in cur.subs:
+                    if o.out_uri == uri:
+                        return cur
         return None
 
-    def getPreviousEntry(self, source_name, rel_path, taxonomy_info=None):
-        key = _get_transition_key(source_name, rel_path, taxonomy_info)
-        pair = self.transitions.get(key)
+    def getPreviousEntry(self, path, taxonomy_info=None):
+        pair = self.getPreviousAndCurrentEntries(path, taxonomy_info)
         if pair is not None:
             return pair[0]
         return None
 
+    def getCurrentEntry(self, path, taxonomy_info=None):
+        pair = self.getPreviousAndCurrentEntries(path, taxonomy_info)
+        if pair is not None:
+            return pair[1]
+        return None
+
     def collapseEntry(self, prev_entry):
         cur_entry = copy.deepcopy(prev_entry)
-        cur_entry.flags = BakeRecordPageEntry.FLAG_NONE
+        cur_entry.flags = BakeRecordEntry.FLAG_NONE
         for o in cur_entry.subs:
-            o.flags = BakeRecordSubPageEntry.FLAG_NONE
+            o.flags = SubPageBakeInfo.FLAG_NONE
         self.addEntry(cur_entry)
 
     def getDeletions(self):
@@ -187,5 +239,5 @@
                     yield (p, 'source file changed outputs')
 
     def _onNewEntryAdded(self, entry):
-        entry.flags |= BakeRecordPageEntry.FLAG_NEW
+        entry.flags |= BakeRecordEntry.FLAG_NEW