Mercurial > piecrust2
comparison piecrust/pipelines/_proctree.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
851:2c7e57d80bba | 852:4850f8c21b6e |
---|---|
1 import os | |
2 import time | |
3 import os.path | |
4 import logging | |
5 from piecrust.chefutil import format_timed | |
6 from piecrust.processing.base import FORCE_BUILD | |
7 | |
8 | |
9 logger = logging.getLogger(__name__) | |
10 | |
11 | |
12 STATE_UNKNOWN = 0 | |
13 STATE_DIRTY = 1 | |
14 STATE_CLEAN = 2 | |
15 | |
16 | |
17 class ProcessingTreeError(Exception): | |
18 pass | |
19 | |
20 | |
21 class ProcessorNotFoundError(ProcessingTreeError): | |
22 pass | |
23 | |
24 | |
25 class ProcessorError(ProcessingTreeError): | |
26 def __init__(self, proc_name, in_path, *args): | |
27 super(ProcessorError, self).__init__(*args) | |
28 self.proc_name = proc_name | |
29 self.in_path = in_path | |
30 | |
31 def __str__(self): | |
32 return "Processor %s failed on: %s" % (self.proc_name, self.in_path) | |
33 | |
34 | |
35 class ProcessingTreeNode(object): | |
36 def __init__(self, path, available_procs, level=0): | |
37 self.path = path | |
38 self.available_procs = available_procs | |
39 self.outputs = [] | |
40 self.level = level | |
41 self.state = STATE_UNKNOWN | |
42 self._processor = None | |
43 | |
44 def getProcessor(self): | |
45 if self._processor is None: | |
46 for p in self.available_procs: | |
47 if p.matches(self.path): | |
48 self._processor = p | |
49 self.available_procs.remove(p) | |
50 break | |
51 else: | |
52 raise ProcessorNotFoundError() | |
53 return self._processor | |
54 | |
55 def setState(self, state, recursive=True): | |
56 self.state = state | |
57 if recursive: | |
58 for o in self.outputs: | |
59 o.setState(state, True) | |
60 | |
61 @property | |
62 def is_leaf(self): | |
63 return len(self.outputs) == 0 | |
64 | |
65 def getLeaves(self): | |
66 if self.is_leaf: | |
67 return [self] | |
68 leaves = [] | |
69 for o in self.outputs: | |
70 for l in o.getLeaves(): | |
71 leaves.append(l) | |
72 return leaves | |
73 | |
74 | |
75 class ProcessingTreeBuilder(object): | |
76 def __init__(self, processors): | |
77 self.processors = processors | |
78 | |
79 def build(self, path): | |
80 tree_root = ProcessingTreeNode(path, list(self.processors)) | |
81 | |
82 loop_guard = 100 | |
83 walk_stack = [tree_root] | |
84 while len(walk_stack) > 0: | |
85 loop_guard -= 1 | |
86 if loop_guard <= 0: | |
87 raise ProcessingTreeError("Infinite loop detected!") | |
88 | |
89 cur_node = walk_stack.pop() | |
90 proc = cur_node.getProcessor() | |
91 | |
92 # If the root tree node (and only that one) wants to bypass this | |
93 # whole tree business, so be it. | |
94 if proc.is_bypassing_structured_processing: | |
95 if cur_node != tree_root: | |
96 raise ProcessingTreeError("Only root processors can " | |
97 "bypass structured processing.") | |
98 break | |
99 | |
100 # Get the destination directory and output files. | |
101 rel_dir, basename = os.path.split(cur_node.path) | |
102 out_names = proc.getOutputFilenames(basename) | |
103 if out_names is None: | |
104 continue | |
105 | |
106 for n in out_names: | |
107 out_node = ProcessingTreeNode( | |
108 os.path.join(rel_dir, n), | |
109 list(cur_node.available_procs), | |
110 cur_node.level + 1) | |
111 cur_node.outputs.append(out_node) | |
112 | |
113 if proc.PROCESSOR_NAME != 'copy': | |
114 walk_stack.append(out_node) | |
115 | |
116 return tree_root | |
117 | |
118 | |
119 class ProcessingTreeRunner(object): | |
120 def __init__(self, base_dir, tmp_dir, out_dir): | |
121 self.base_dir = base_dir | |
122 self.tmp_dir = tmp_dir | |
123 self.out_dir = out_dir | |
124 | |
125 def processSubTree(self, tree_root): | |
126 did_process = False | |
127 walk_stack = [tree_root] | |
128 while len(walk_stack) > 0: | |
129 cur_node = walk_stack.pop() | |
130 | |
131 self._computeNodeState(cur_node) | |
132 if cur_node.state == STATE_DIRTY: | |
133 did_process_this_node = self.processNode(cur_node) | |
134 did_process |= did_process_this_node | |
135 | |
136 if did_process_this_node: | |
137 for o in cur_node.outputs: | |
138 if not o.is_leaf: | |
139 walk_stack.append(o) | |
140 else: | |
141 for o in cur_node.outputs: | |
142 if not o.is_leaf: | |
143 walk_stack.append(o) | |
144 return did_process | |
145 | |
146 def processNode(self, node): | |
147 full_path = self._getNodePath(node) | |
148 proc = node.getProcessor() | |
149 if proc.is_bypassing_structured_processing: | |
150 try: | |
151 start_time = time.perf_counter() | |
152 with proc.app.env.stats.timerScope(proc.__class__.__name__): | |
153 proc.process(full_path, self.out_dir) | |
154 print_node( | |
155 node, | |
156 format_timed( | |
157 start_time, "(bypassing structured processing)", | |
158 colored=False)) | |
159 return True | |
160 except Exception as e: | |
161 raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e | |
162 | |
163 # All outputs of a node must go to the same directory, so we can get | |
164 # the output directory off of the first output. | |
165 base_out_dir = self._getNodeBaseDir(node.outputs[0]) | |
166 rel_out_dir = os.path.dirname(node.path) | |
167 out_dir = os.path.join(base_out_dir, rel_out_dir) | |
168 if not os.path.isdir(out_dir): | |
169 try: | |
170 os.makedirs(out_dir, 0o755, exist_ok=True) | |
171 except OSError: | |
172 pass | |
173 | |
174 try: | |
175 start_time = time.perf_counter() | |
176 with proc.app.env.stats.timerScope(proc.__class__.__name__): | |
177 proc_res = proc.process(full_path, out_dir) | |
178 if proc_res is None: | |
179 raise Exception("Processor '%s' didn't return a boolean " | |
180 "result value." % proc) | |
181 if proc_res: | |
182 print_node(node, "-> %s" % out_dir) | |
183 return True | |
184 else: | |
185 print_node(node, "-> %s [clean]" % out_dir) | |
186 return False | |
187 except Exception as e: | |
188 raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e | |
189 | |
190 def _computeNodeState(self, node): | |
191 if node.state != STATE_UNKNOWN: | |
192 return | |
193 | |
194 proc = node.getProcessor() | |
195 if (proc.is_bypassing_structured_processing or | |
196 not proc.is_delegating_dependency_check): | |
197 # This processor wants to handle things on its own... | |
198 node.setState(STATE_DIRTY, False) | |
199 return | |
200 | |
201 start_time = time.perf_counter() | |
202 | |
203 # Get paths and modification times for the input path and | |
204 # all dependencies (if any). | |
205 base_dir = self._getNodeBaseDir(node) | |
206 full_path = os.path.join(base_dir, node.path) | |
207 in_mtime = (full_path, os.path.getmtime(full_path)) | |
208 force_build = False | |
209 try: | |
210 deps = proc.getDependencies(full_path) | |
211 if deps == FORCE_BUILD: | |
212 force_build = True | |
213 elif deps is not None: | |
214 for dep in deps: | |
215 dep_mtime = os.path.getmtime(dep) | |
216 if dep_mtime > in_mtime[1]: | |
217 in_mtime = (dep, dep_mtime) | |
218 except Exception as e: | |
219 logger.warning("%s -- Will force-bake: %s" % (e, node.path)) | |
220 node.setState(STATE_DIRTY, True) | |
221 return | |
222 | |
223 if force_build: | |
224 # Just do what the processor told us to do. | |
225 node.setState(STATE_DIRTY, True) | |
226 message = "Processor requested a forced build." | |
227 print_node(node, message) | |
228 else: | |
229 # Get paths and modification times for the outputs. | |
230 message = None | |
231 for o in node.outputs: | |
232 full_out_path = self._getNodePath(o) | |
233 if not os.path.isfile(full_out_path): | |
234 message = "Output '%s' doesn't exist." % o.path | |
235 break | |
236 o_mtime = os.path.getmtime(full_out_path) | |
237 if o_mtime < in_mtime[1]: | |
238 message = "Input '%s' is newer than output '%s'." % ( | |
239 in_mtime[0], o.path) | |
240 break | |
241 if message is not None: | |
242 node.setState(STATE_DIRTY, True) | |
243 message += " Re-processing sub-tree." | |
244 print_node(node, message) | |
245 else: | |
246 node.setState(STATE_CLEAN, False) | |
247 | |
248 if node.state == STATE_DIRTY: | |
249 state = "dirty" | |
250 elif node.state == STATE_CLEAN: | |
251 state = "clean" | |
252 else: | |
253 state = "unknown" | |
254 logger.debug(format_timed(start_time, | |
255 "Computed node dirtyness: %s" % state, | |
256 indent_level=node.level, colored=False)) | |
257 | |
258 def _getNodeBaseDir(self, node): | |
259 if node.level == 0: | |
260 return self.base_dir | |
261 if node.is_leaf: | |
262 return self.out_dir | |
263 return os.path.join(self.tmp_dir, str(node.level)) | |
264 | |
265 def _getNodePath(self, node): | |
266 base_dir = self._getNodeBaseDir(node) | |
267 return os.path.join(base_dir, node.path) | |
268 | |
269 | |
270 def print_node(node, message=None, recursive=False): | |
271 indent = ' ' * node.level | |
272 try: | |
273 proc_name = node.getProcessor().PROCESSOR_NAME | |
274 except ProcessorNotFoundError: | |
275 proc_name = 'n/a' | |
276 | |
277 message = message or '' | |
278 logger.debug('%s%s [%s] %s' % (indent, node.path, proc_name, message)) | |
279 | |
280 if recursive: | |
281 for o in node.outputs: | |
282 print_node(o, None, True) | |
283 | |
284 | |
285 def get_node_name_tree(node): | |
286 try: | |
287 proc_name = node.getProcessor().PROCESSOR_NAME | |
288 except ProcessorNotFoundError: | |
289 proc_name = 'n/a' | |
290 | |
291 children = [] | |
292 for o in node.outputs: | |
293 if not o.outputs: | |
294 continue | |
295 children.append(get_node_name_tree(o)) | |
296 return (proc_name, children) | |
297 |