comparison piecrust/pipelines/_proctree.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents
children
comparison
equal deleted inserted replaced
851:2c7e57d80bba 852:4850f8c21b6e
1 import os
2 import time
3 import os.path
4 import logging
5 from piecrust.chefutil import format_timed
6 from piecrust.processing.base import FORCE_BUILD
7
8
9 logger = logging.getLogger(__name__)
10
11
12 STATE_UNKNOWN = 0
13 STATE_DIRTY = 1
14 STATE_CLEAN = 2
15
16
17 class ProcessingTreeError(Exception):
18 pass
19
20
21 class ProcessorNotFoundError(ProcessingTreeError):
22 pass
23
24
25 class ProcessorError(ProcessingTreeError):
26 def __init__(self, proc_name, in_path, *args):
27 super(ProcessorError, self).__init__(*args)
28 self.proc_name = proc_name
29 self.in_path = in_path
30
31 def __str__(self):
32 return "Processor %s failed on: %s" % (self.proc_name, self.in_path)
33
34
35 class ProcessingTreeNode(object):
36 def __init__(self, path, available_procs, level=0):
37 self.path = path
38 self.available_procs = available_procs
39 self.outputs = []
40 self.level = level
41 self.state = STATE_UNKNOWN
42 self._processor = None
43
44 def getProcessor(self):
45 if self._processor is None:
46 for p in self.available_procs:
47 if p.matches(self.path):
48 self._processor = p
49 self.available_procs.remove(p)
50 break
51 else:
52 raise ProcessorNotFoundError()
53 return self._processor
54
55 def setState(self, state, recursive=True):
56 self.state = state
57 if recursive:
58 for o in self.outputs:
59 o.setState(state, True)
60
61 @property
62 def is_leaf(self):
63 return len(self.outputs) == 0
64
65 def getLeaves(self):
66 if self.is_leaf:
67 return [self]
68 leaves = []
69 for o in self.outputs:
70 for l in o.getLeaves():
71 leaves.append(l)
72 return leaves
73
74
75 class ProcessingTreeBuilder(object):
76 def __init__(self, processors):
77 self.processors = processors
78
79 def build(self, path):
80 tree_root = ProcessingTreeNode(path, list(self.processors))
81
82 loop_guard = 100
83 walk_stack = [tree_root]
84 while len(walk_stack) > 0:
85 loop_guard -= 1
86 if loop_guard <= 0:
87 raise ProcessingTreeError("Infinite loop detected!")
88
89 cur_node = walk_stack.pop()
90 proc = cur_node.getProcessor()
91
92 # If the root tree node (and only that one) wants to bypass this
93 # whole tree business, so be it.
94 if proc.is_bypassing_structured_processing:
95 if cur_node != tree_root:
96 raise ProcessingTreeError("Only root processors can "
97 "bypass structured processing.")
98 break
99
100 # Get the destination directory and output files.
101 rel_dir, basename = os.path.split(cur_node.path)
102 out_names = proc.getOutputFilenames(basename)
103 if out_names is None:
104 continue
105
106 for n in out_names:
107 out_node = ProcessingTreeNode(
108 os.path.join(rel_dir, n),
109 list(cur_node.available_procs),
110 cur_node.level + 1)
111 cur_node.outputs.append(out_node)
112
113 if proc.PROCESSOR_NAME != 'copy':
114 walk_stack.append(out_node)
115
116 return tree_root
117
118
119 class ProcessingTreeRunner(object):
120 def __init__(self, base_dir, tmp_dir, out_dir):
121 self.base_dir = base_dir
122 self.tmp_dir = tmp_dir
123 self.out_dir = out_dir
124
125 def processSubTree(self, tree_root):
126 did_process = False
127 walk_stack = [tree_root]
128 while len(walk_stack) > 0:
129 cur_node = walk_stack.pop()
130
131 self._computeNodeState(cur_node)
132 if cur_node.state == STATE_DIRTY:
133 did_process_this_node = self.processNode(cur_node)
134 did_process |= did_process_this_node
135
136 if did_process_this_node:
137 for o in cur_node.outputs:
138 if not o.is_leaf:
139 walk_stack.append(o)
140 else:
141 for o in cur_node.outputs:
142 if not o.is_leaf:
143 walk_stack.append(o)
144 return did_process
145
146 def processNode(self, node):
147 full_path = self._getNodePath(node)
148 proc = node.getProcessor()
149 if proc.is_bypassing_structured_processing:
150 try:
151 start_time = time.perf_counter()
152 with proc.app.env.stats.timerScope(proc.__class__.__name__):
153 proc.process(full_path, self.out_dir)
154 print_node(
155 node,
156 format_timed(
157 start_time, "(bypassing structured processing)",
158 colored=False))
159 return True
160 except Exception as e:
161 raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e
162
163 # All outputs of a node must go to the same directory, so we can get
164 # the output directory off of the first output.
165 base_out_dir = self._getNodeBaseDir(node.outputs[0])
166 rel_out_dir = os.path.dirname(node.path)
167 out_dir = os.path.join(base_out_dir, rel_out_dir)
168 if not os.path.isdir(out_dir):
169 try:
170 os.makedirs(out_dir, 0o755, exist_ok=True)
171 except OSError:
172 pass
173
174 try:
175 start_time = time.perf_counter()
176 with proc.app.env.stats.timerScope(proc.__class__.__name__):
177 proc_res = proc.process(full_path, out_dir)
178 if proc_res is None:
179 raise Exception("Processor '%s' didn't return a boolean "
180 "result value." % proc)
181 if proc_res:
182 print_node(node, "-> %s" % out_dir)
183 return True
184 else:
185 print_node(node, "-> %s [clean]" % out_dir)
186 return False
187 except Exception as e:
188 raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e
189
190 def _computeNodeState(self, node):
191 if node.state != STATE_UNKNOWN:
192 return
193
194 proc = node.getProcessor()
195 if (proc.is_bypassing_structured_processing or
196 not proc.is_delegating_dependency_check):
197 # This processor wants to handle things on its own...
198 node.setState(STATE_DIRTY, False)
199 return
200
201 start_time = time.perf_counter()
202
203 # Get paths and modification times for the input path and
204 # all dependencies (if any).
205 base_dir = self._getNodeBaseDir(node)
206 full_path = os.path.join(base_dir, node.path)
207 in_mtime = (full_path, os.path.getmtime(full_path))
208 force_build = False
209 try:
210 deps = proc.getDependencies(full_path)
211 if deps == FORCE_BUILD:
212 force_build = True
213 elif deps is not None:
214 for dep in deps:
215 dep_mtime = os.path.getmtime(dep)
216 if dep_mtime > in_mtime[1]:
217 in_mtime = (dep, dep_mtime)
218 except Exception as e:
219 logger.warning("%s -- Will force-bake: %s" % (e, node.path))
220 node.setState(STATE_DIRTY, True)
221 return
222
223 if force_build:
224 # Just do what the processor told us to do.
225 node.setState(STATE_DIRTY, True)
226 message = "Processor requested a forced build."
227 print_node(node, message)
228 else:
229 # Get paths and modification times for the outputs.
230 message = None
231 for o in node.outputs:
232 full_out_path = self._getNodePath(o)
233 if not os.path.isfile(full_out_path):
234 message = "Output '%s' doesn't exist." % o.path
235 break
236 o_mtime = os.path.getmtime(full_out_path)
237 if o_mtime < in_mtime[1]:
238 message = "Input '%s' is newer than output '%s'." % (
239 in_mtime[0], o.path)
240 break
241 if message is not None:
242 node.setState(STATE_DIRTY, True)
243 message += " Re-processing sub-tree."
244 print_node(node, message)
245 else:
246 node.setState(STATE_CLEAN, False)
247
248 if node.state == STATE_DIRTY:
249 state = "dirty"
250 elif node.state == STATE_CLEAN:
251 state = "clean"
252 else:
253 state = "unknown"
254 logger.debug(format_timed(start_time,
255 "Computed node dirtyness: %s" % state,
256 indent_level=node.level, colored=False))
257
258 def _getNodeBaseDir(self, node):
259 if node.level == 0:
260 return self.base_dir
261 if node.is_leaf:
262 return self.out_dir
263 return os.path.join(self.tmp_dir, str(node.level))
264
265 def _getNodePath(self, node):
266 base_dir = self._getNodeBaseDir(node)
267 return os.path.join(base_dir, node.path)
268
269
270 def print_node(node, message=None, recursive=False):
271 indent = ' ' * node.level
272 try:
273 proc_name = node.getProcessor().PROCESSOR_NAME
274 except ProcessorNotFoundError:
275 proc_name = 'n/a'
276
277 message = message or ''
278 logger.debug('%s%s [%s] %s' % (indent, node.path, proc_name, message))
279
280 if recursive:
281 for o in node.outputs:
282 print_node(o, None, True)
283
284
285 def get_node_name_tree(node):
286 try:
287 proc_name = node.getProcessor().PROCESSOR_NAME
288 except ProcessorNotFoundError:
289 proc_name = 'n/a'
290
291 children = []
292 for o in node.outputs:
293 if not o.outputs:
294 continue
295 children.append(get_node_name_tree(o))
296 return (proc_name, children)
297