Mercurial > piecrust2
diff piecrust/pipelines/_proctree.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/_proctree.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,297 @@ +import os +import time +import os.path +import logging +from piecrust.chefutil import format_timed +from piecrust.processing.base import FORCE_BUILD + + +logger = logging.getLogger(__name__) + + +STATE_UNKNOWN = 0 +STATE_DIRTY = 1 +STATE_CLEAN = 2 + + +class ProcessingTreeError(Exception): + pass + + +class ProcessorNotFoundError(ProcessingTreeError): + pass + + +class ProcessorError(ProcessingTreeError): + def __init__(self, proc_name, in_path, *args): + super(ProcessorError, self).__init__(*args) + self.proc_name = proc_name + self.in_path = in_path + + def __str__(self): + return "Processor %s failed on: %s" % (self.proc_name, self.in_path) + + +class ProcessingTreeNode(object): + def __init__(self, path, available_procs, level=0): + self.path = path + self.available_procs = available_procs + self.outputs = [] + self.level = level + self.state = STATE_UNKNOWN + self._processor = None + + def getProcessor(self): + if self._processor is None: + for p in self.available_procs: + if p.matches(self.path): + self._processor = p + self.available_procs.remove(p) + break + else: + raise ProcessorNotFoundError() + return self._processor + + def setState(self, state, recursive=True): + self.state = state + if recursive: + for o in self.outputs: + o.setState(state, True) + + @property + def is_leaf(self): + return len(self.outputs) == 0 + + def getLeaves(self): + if self.is_leaf: + return [self] + leaves = [] + for o in self.outputs: + for l in o.getLeaves(): + leaves.append(l) + return leaves + + +class ProcessingTreeBuilder(object): + def __init__(self, processors): + self.processors = processors + + def build(self, path): + tree_root = ProcessingTreeNode(path, list(self.processors)) + + loop_guard = 100 + walk_stack = [tree_root] + while len(walk_stack) > 0: + loop_guard -= 1 + if loop_guard <= 0: + raise ProcessingTreeError("Infinite loop detected!") + + cur_node = walk_stack.pop() + proc = cur_node.getProcessor() + + # If the root tree node (and only that one) wants to bypass this + # whole tree business, so be it. + if proc.is_bypassing_structured_processing: + if cur_node != tree_root: + raise ProcessingTreeError("Only root processors can " + "bypass structured processing.") + break + + # Get the destination directory and output files. + rel_dir, basename = os.path.split(cur_node.path) + out_names = proc.getOutputFilenames(basename) + if out_names is None: + continue + + for n in out_names: + out_node = ProcessingTreeNode( + os.path.join(rel_dir, n), + list(cur_node.available_procs), + cur_node.level + 1) + cur_node.outputs.append(out_node) + + if proc.PROCESSOR_NAME != 'copy': + walk_stack.append(out_node) + + return tree_root + + +class ProcessingTreeRunner(object): + def __init__(self, base_dir, tmp_dir, out_dir): + self.base_dir = base_dir + self.tmp_dir = tmp_dir + self.out_dir = out_dir + + def processSubTree(self, tree_root): + did_process = False + walk_stack = [tree_root] + while len(walk_stack) > 0: + cur_node = walk_stack.pop() + + self._computeNodeState(cur_node) + if cur_node.state == STATE_DIRTY: + did_process_this_node = self.processNode(cur_node) + did_process |= did_process_this_node + + if did_process_this_node: + for o in cur_node.outputs: + if not o.is_leaf: + walk_stack.append(o) + else: + for o in cur_node.outputs: + if not o.is_leaf: + walk_stack.append(o) + return did_process + + def processNode(self, node): + full_path = self._getNodePath(node) + proc = node.getProcessor() + if proc.is_bypassing_structured_processing: + try: + start_time = time.perf_counter() + with proc.app.env.stats.timerScope(proc.__class__.__name__): + proc.process(full_path, self.out_dir) + print_node( + node, + format_timed( + start_time, "(bypassing structured processing)", + colored=False)) + return True + except Exception as e: + raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e + + # All outputs of a node must go to the same directory, so we can get + # the output directory off of the first output. + base_out_dir = self._getNodeBaseDir(node.outputs[0]) + rel_out_dir = os.path.dirname(node.path) + out_dir = os.path.join(base_out_dir, rel_out_dir) + if not os.path.isdir(out_dir): + try: + os.makedirs(out_dir, 0o755, exist_ok=True) + except OSError: + pass + + try: + start_time = time.perf_counter() + with proc.app.env.stats.timerScope(proc.__class__.__name__): + proc_res = proc.process(full_path, out_dir) + if proc_res is None: + raise Exception("Processor '%s' didn't return a boolean " + "result value." % proc) + if proc_res: + print_node(node, "-> %s" % out_dir) + return True + else: + print_node(node, "-> %s [clean]" % out_dir) + return False + except Exception as e: + raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e + + def _computeNodeState(self, node): + if node.state != STATE_UNKNOWN: + return + + proc = node.getProcessor() + if (proc.is_bypassing_structured_processing or + not proc.is_delegating_dependency_check): + # This processor wants to handle things on its own... + node.setState(STATE_DIRTY, False) + return + + start_time = time.perf_counter() + + # Get paths and modification times for the input path and + # all dependencies (if any). + base_dir = self._getNodeBaseDir(node) + full_path = os.path.join(base_dir, node.path) + in_mtime = (full_path, os.path.getmtime(full_path)) + force_build = False + try: + deps = proc.getDependencies(full_path) + if deps == FORCE_BUILD: + force_build = True + elif deps is not None: + for dep in deps: + dep_mtime = os.path.getmtime(dep) + if dep_mtime > in_mtime[1]: + in_mtime = (dep, dep_mtime) + except Exception as e: + logger.warning("%s -- Will force-bake: %s" % (e, node.path)) + node.setState(STATE_DIRTY, True) + return + + if force_build: + # Just do what the processor told us to do. + node.setState(STATE_DIRTY, True) + message = "Processor requested a forced build." + print_node(node, message) + else: + # Get paths and modification times for the outputs. + message = None + for o in node.outputs: + full_out_path = self._getNodePath(o) + if not os.path.isfile(full_out_path): + message = "Output '%s' doesn't exist." % o.path + break + o_mtime = os.path.getmtime(full_out_path) + if o_mtime < in_mtime[1]: + message = "Input '%s' is newer than output '%s'." % ( + in_mtime[0], o.path) + break + if message is not None: + node.setState(STATE_DIRTY, True) + message += " Re-processing sub-tree." + print_node(node, message) + else: + node.setState(STATE_CLEAN, False) + + if node.state == STATE_DIRTY: + state = "dirty" + elif node.state == STATE_CLEAN: + state = "clean" + else: + state = "unknown" + logger.debug(format_timed(start_time, + "Computed node dirtyness: %s" % state, + indent_level=node.level, colored=False)) + + def _getNodeBaseDir(self, node): + if node.level == 0: + return self.base_dir + if node.is_leaf: + return self.out_dir + return os.path.join(self.tmp_dir, str(node.level)) + + def _getNodePath(self, node): + base_dir = self._getNodeBaseDir(node) + return os.path.join(base_dir, node.path) + + +def print_node(node, message=None, recursive=False): + indent = ' ' * node.level + try: + proc_name = node.getProcessor().PROCESSOR_NAME + except ProcessorNotFoundError: + proc_name = 'n/a' + + message = message or '' + logger.debug('%s%s [%s] %s' % (indent, node.path, proc_name, message)) + + if recursive: + for o in node.outputs: + print_node(o, None, True) + + +def get_node_name_tree(node): + try: + proc_name = node.getProcessor().PROCESSOR_NAME + except ProcessorNotFoundError: + proc_name = 'n/a' + + children = [] + for o in node.outputs: + if not o.outputs: + continue + children.append(get_node_name_tree(o)) + return (proc_name, children) +