comparison piecrust/pipelines/asset.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents
children f070a4fc033c
comparison
equal deleted inserted replaced
851:2c7e57d80bba 852:4850f8c21b6e
1 import os
2 import os.path
3 import re
4 import logging
5 from piecrust.pipelines._procrecords import AssetPipelineRecordEntry
6 from piecrust.pipelines._proctree import (
7 ProcessingTreeBuilder, ProcessingTreeRunner,
8 get_node_name_tree, print_node,
9 STATE_DIRTY)
10 from piecrust.pipelines.base import ContentPipeline
11 from piecrust.processing.base import ProcessorContext
12 from piecrust.sources.fs import FSContentSourceBase
13
14
15 logger = logging.getLogger(__name__)
16
17
18 class AssetPipeline(ContentPipeline):
19 PIPELINE_NAME = 'asset'
20 RECORD_CLASS = AssetPipelineRecordEntry
21
22 def __init__(self, source):
23 if not isinstance(source, FSContentSourceBase):
24 raise Exception(
25 "The asset pipeline only support file-system sources.")
26
27 super().__init__(source)
28 self.enabled_processors = None
29 self.ignore_patterns = []
30 self._processors = None
31 self._base_dir = source.fs_endpoint_path
32
33 def initialize(self, ctx):
34 # Get the list of processors for this run.
35 processors = self.app.plugin_loader.getProcessors()
36 if self.enabled_processors is not None:
37 logger.debug("Filtering processors to: %s" %
38 self.enabled_processors)
39 processors = get_filtered_processors(processors,
40 self.enabled_processors)
41
42 # Invoke pre-processors.
43 proc_ctx = ProcessorContext(self, ctx)
44 for proc in processors:
45 proc.onPipelineStart(proc_ctx)
46
47 # Add any extra processors registered in the `onPipelineStart` step.
48 processors += proc_ctx.extra_processors
49
50 # Sort our processors by priority.
51 processors.sort(key=lambda p: p.priority)
52
53 # Ok, that's the list of processors for this run.
54 self._processors = processors
55
56 # Pre-processors can define additional ignore patterns so let's
57 # add them to what we had already.
58 self.ignore_patterns += make_re(proc_ctx.ignore_patterns)
59
60 # Register timers.
61 stats = self.app.env.stats
62 stats.registerTimer('BuildProcessingTree', raise_if_registered=False)
63 stats.registerTimer('RunProcessingTree', raise_if_registered=False)
64
65 def run(self, content_item, ctx, result):
66 # See if we need to ignore this item.
67 rel_path = os.path.relpath(content_item.spec, self._base_dir)
68 if re_matchany(rel_path, self.ignore_patterns):
69 return
70
71 record = result.record
72 stats = self.app.env.stats
73
74 # Build the processing tree for this job.
75 with stats.timerScope('BuildProcessingTree'):
76 builder = ProcessingTreeBuilder(self._processors)
77 tree_root = builder.build(rel_path)
78 record.flags |= AssetPipelineRecordEntry.FLAG_PREPARED
79
80 # Prepare and run the tree.
81 print_node(tree_root, recursive=True)
82 leaves = tree_root.getLeaves()
83 record.rel_outputs = [l.path for l in leaves]
84 record.proc_tree = get_node_name_tree(tree_root)
85 if tree_root.getProcessor().is_bypassing_structured_processing:
86 record.flags |= (
87 AssetPipelineRecordEntry.FLAG_BYPASSED_STRUCTURED_PROCESSING)
88
89 if ctx.force:
90 tree_root.setState(STATE_DIRTY, True)
91
92 with stats.timerScope('RunProcessingTree'):
93 runner = ProcessingTreeRunner(
94 self._base_dir, self.tmp_dir, ctx.out_dir)
95 if runner.processSubTree(tree_root):
96 record.flags |= (
97 AssetPipelineRecordEntry.FLAG_PROCESSED)
98
99 def shutdown(self, ctx):
100 # Invoke post-processors.
101 proc_ctx = ProcessorContext(self, ctx)
102 for proc in self._processors:
103 proc.onPipelineEnd(proc_ctx)
104
105 def collapseRecords(self, record_history):
106 for prev, cur in record_history.diffs():
107 if prev and cur and not cur.was_processed:
108 # This asset wasn't processed, so the information from
109 # last time is still valid.
110 cur.flags = (
111 prev.flags &
112 (~AssetPipelineRecordEntry.FLAG_PROCESSED |
113 AssetPipelineRecordEntry.FLAG_COLLAPSED_FROM_LAST_RUN))
114 cur.out_paths = list(prev.out_paths)
115 cur.errors = list(prev.errors)
116
117 def getDeletions(self, record_history):
118 for prev, cur in record_history.diffs():
119 if prev and not cur:
120 for p in prev.out_paths:
121 yield (p, 'previous asset was removed')
122 elif prev and cur and cur.was_processed_successfully:
123 diff = set(prev.out_paths) - set(cur.out_paths)
124 for p in diff:
125 yield (p, 'asset changed outputs')
126
127
128 split_processor_names_re = re.compile(r'[ ,]+')
129
130
131 def get_filtered_processors(processors, authorized_names):
132 if not authorized_names or authorized_names == 'all':
133 return processors
134
135 if isinstance(authorized_names, str):
136 authorized_names = split_processor_names_re.split(authorized_names)
137
138 procs = []
139 has_star = 'all' in authorized_names
140 for p in processors:
141 for name in authorized_names:
142 if name == p.PROCESSOR_NAME:
143 procs.append(p)
144 break
145 if name == ('-%s' % p.PROCESSOR_NAME):
146 break
147 else:
148 if has_star:
149 procs.append(p)
150 return procs
151
152
153 def make_re(patterns):
154 re_patterns = []
155 for pat in patterns:
156 if pat[0] == '/' and pat[-1] == '/' and len(pat) > 2:
157 re_patterns.append(pat[1:-1])
158 else:
159 escaped_pat = (
160 re.escape(pat)
161 .replace(r'\*', r'[^/\\]*')
162 .replace(r'\?', r'[^/\\]'))
163 re_patterns.append(escaped_pat)
164 return [re.compile(p) for p in re_patterns]
165
166
167 def re_matchany(rel_path, patterns):
168 # skip patterns use a forward slash regardless of the platform.
169 rel_path = rel_path.replace('\\', '/')
170 for pattern in patterns:
171 if pattern.search(rel_path):
172 return True
173 return False
174
175
176 re_ansicolors = re.compile('\033\\[\d+m')
177
178
179 def _get_errors(ex, strip_colors=False):
180 errors = []
181 while ex is not None:
182 msg = str(ex)
183 if strip_colors:
184 msg = re_ansicolors.sub('', msg)
185 errors.append(msg)
186 ex = ex.__cause__
187 return errors
188