comparison piecrust/commands/builtin/baking.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents 08e6484a2600
children f070a4fc033c
comparison
equal deleted inserted replaced
851:2c7e57d80bba 852:4850f8c21b6e
3 import logging 3 import logging
4 import hashlib 4 import hashlib
5 import fnmatch 5 import fnmatch
6 import datetime 6 import datetime
7 from colorama import Fore 7 from colorama import Fore
8 from piecrust import CACHE_DIR
9 from piecrust.baking.baker import Baker
10 from piecrust.baking.records import (
11 BakeRecord, BakeRecordEntry, SubPageBakeInfo)
12 from piecrust.chefutil import format_timed
13 from piecrust.commands.base import ChefCommand 8 from piecrust.commands.base import ChefCommand
14 from piecrust.environment import ExecutionStats
15 from piecrust.processing.pipeline import ProcessorPipeline
16 from piecrust.processing.records import (
17 ProcessorPipelineRecord,
18 FLAG_PREPARED, FLAG_PROCESSED, FLAG_BYPASSED_STRUCTURED_PROCESSING,
19 FLAG_COLLAPSED_FROM_LAST_RUN)
20 from piecrust.rendering import (
21 PASS_FORMATTING, PASS_RENDERING)
22 9
23 10
24 logger = logging.getLogger(__name__) 11 logger = logging.getLogger(__name__)
25 12
26 13
30 self.name = 'bake' 17 self.name = 'bake'
31 self.description = "Bakes your website into static HTML files." 18 self.description = "Bakes your website into static HTML files."
32 19
33 def setupParser(self, parser, app): 20 def setupParser(self, parser, app):
34 parser.add_argument( 21 parser.add_argument(
35 '-o', '--output', 22 '-o', '--output',
36 help="The directory to put all the baked HTML files into " 23 help="The directory to put all the baked HTML files into "
37 "(defaults to `_counter`)") 24 "(defaults to `_counter`)")
38 parser.add_argument( 25 parser.add_argument(
39 '-f', '--force', 26 '-f', '--force',
40 help="Force re-baking the entire website.", 27 help="Force re-baking the entire website.",
41 action='store_true') 28 action='store_true')
42 parser.add_argument( 29 parser.add_argument(
43 '-w', '--workers', 30 '-p', '--pipelines',
44 help="The number of worker processes to spawn.", 31 help="The pipelines to run.",
45 type=int, default=-1) 32 nargs='*')
46 parser.add_argument( 33 parser.add_argument(
47 '--batch-size', 34 '-w', '--workers',
48 help="The number of jobs per batch.", 35 help="The number of worker processes to spawn.",
49 type=int, default=-1) 36 type=int, default=-1)
50 parser.add_argument( 37 parser.add_argument(
51 '--assets-only', 38 '--batch-size',
52 help="Only bake the assets (don't bake the web pages).", 39 help="The number of jobs per batch.",
53 action='store_true') 40 type=int, default=-1)
54 parser.add_argument( 41 parser.add_argument(
55 '--html-only', 42 '--assets-only',
56 help="Only bake the pages (don't run the asset pipeline).", 43 help="Only bake the assets (don't bake the web pages).",
57 action='store_true') 44 action='store_true')
58 parser.add_argument( 45 parser.add_argument(
59 '--show-stats', 46 '--html-only',
60 help="Show detailed information about the bake.", 47 help="Only bake the pages (don't run the asset pipeline).",
61 action='store_true') 48 action='store_true')
49 parser.add_argument(
50 '--show-stats',
51 help="Show detailed information about the bake.",
52 action='store_true')
62 53
63 def run(self, ctx): 54 def run(self, ctx):
55 from piecrust.chefutil import format_timed
56
64 out_dir = (ctx.args.output or 57 out_dir = (ctx.args.output or
65 os.path.join(ctx.app.root_dir, '_counter')) 58 os.path.join(ctx.app.root_dir, '_counter'))
66 59
67 success = True
68 ctx.stats = {}
69 start_time = time.perf_counter() 60 start_time = time.perf_counter()
70 try: 61 try:
71 # Bake the site sources. 62 records = self._doBake(ctx, out_dir)
72 if not ctx.args.assets_only:
73 success = success & self._bakeSources(ctx, out_dir)
74
75 # Bake the assets.
76 if not ctx.args.html_only:
77 success = success & self._bakeAssets(ctx, out_dir)
78 63
79 # Show merged stats. 64 # Show merged stats.
80 if ctx.args.show_stats: 65 if ctx.args.show_stats:
81 logger.info("-------------------") 66 logger.info("-------------------")
82 logger.info("Timing information:") 67 logger.info("Timing information:")
83 _show_stats(ctx.stats) 68 _show_stats(records.stats)
84 69
85 # All done. 70 # All done.
86 logger.info('-------------------------') 71 logger.info('-------------------------')
87 logger.info(format_timed(start_time, 'done baking')) 72 logger.info(format_timed(start_time, 'done baking'))
88 return 0 if success else 1 73 return 0 if records.success else 1
89 except Exception as ex: 74 except Exception as ex:
90 if ctx.app.debug: 75 if ctx.app.debug:
91 logger.exception(ex) 76 logger.exception(ex)
92 else: 77 else:
93 logger.error(str(ex)) 78 logger.error(str(ex))
94 return 1 79 return 1
95 80
96 def _bakeSources(self, ctx, out_dir): 81 def _doBake(self, ctx, out_dir):
82 from piecrust.baking.baker import Baker
83
97 if ctx.args.workers > 0: 84 if ctx.args.workers > 0:
98 ctx.app.config.set('baker/workers', ctx.args.workers) 85 ctx.app.config.set('baker/workers', ctx.args.workers)
99 if ctx.args.batch_size > 0: 86 if ctx.args.batch_size > 0:
100 ctx.app.config.set('baker/batch_size', ctx.args.batch_size) 87 ctx.app.config.set('baker/batch_size', ctx.args.batch_size)
88
89 allowed_pipelines = None
90 if ctx.args.html_only:
91 allowed_pipelines = ['page']
92 elif ctx.args.assets_only:
93 allowed_pipelines = ['asset']
94 elif ctx.args.pipelines:
95 allowed_pipelines = ctx.args.pipelines
96
101 baker = Baker( 97 baker = Baker(
102 ctx.app, out_dir, 98 ctx.appfactory, ctx.app, out_dir,
103 force=ctx.args.force, 99 force=ctx.args.force,
104 applied_config_variant=ctx.config_variant, 100 allowed_pipelines=allowed_pipelines)
105 applied_config_values=ctx.config_values) 101 records = baker.bake()
106 record = baker.bake() 102
107 _merge_stats(record.stats, ctx.stats) 103 return records
108 return record.success
109
110 def _bakeAssets(self, ctx, out_dir):
111 proc = ProcessorPipeline(
112 ctx.app, out_dir,
113 force=ctx.args.force,
114 applied_config_variant=ctx.config_variant,
115 applied_config_values=ctx.config_values)
116 record = proc.run()
117 _merge_stats(record.stats, ctx.stats)
118 return record.success
119
120
121 def _merge_stats(source, target):
122 if source is None:
123 return
124
125 for name, val in source.items():
126 if name not in target:
127 target[name] = ExecutionStats()
128 target[name].mergeStats(val)
129 104
130 105
131 def _show_stats(stats, *, full=False): 106 def _show_stats(stats, *, full=False):
132 indent = ' ' 107 indent = ' '
133 for name in sorted(stats.keys()): 108
134 logger.info('%s:' % name) 109 logger.info(' Timers:')
135 s = stats[name] 110 for name, val in sorted(stats.timers.items(), key=lambda i: i[1],
136 111 reverse=True):
137 logger.info(' Timers:') 112 val_str = '%8.1f s' % val
138 for name, val in sorted(s.timers.items(), key=lambda i: i[1], 113 logger.info(
139 reverse=True): 114 "%s[%s%s%s] %s" %
140 val_str = '%8.1f s' % val 115 (indent, Fore.GREEN, val_str, Fore.RESET, name))
141 logger.info( 116
142 "%s[%s%s%s] %s" % 117 logger.info(' Counters:')
143 (indent, Fore.GREEN, val_str, Fore.RESET, name)) 118 for name in sorted(stats.counters.keys()):
144 119 val_str = '%8d ' % stats.counters[name]
145 logger.info(' Counters:') 120 logger.info(
146 for name in sorted(s.counters.keys()): 121 "%s[%s%s%s] %s" %
147 val_str = '%8d ' % s.counters[name] 122 (indent, Fore.GREEN, val_str, Fore.RESET, name))
148 logger.info( 123
149 "%s[%s%s%s] %s" % 124 logger.info(' Manifests:')
150 (indent, Fore.GREEN, val_str, Fore.RESET, name)) 125 for name in sorted(stats.manifests.keys()):
151 126 val = stats.manifests[name]
152 logger.info(' Manifests:') 127 logger.info(
153 for name in sorted(s.manifests.keys()): 128 "%s[%s%s%s] [%d entries]" %
154 val = s.manifests[name] 129 (indent, Fore.CYAN, name, Fore.RESET, len(val)))
155 logger.info( 130 if full:
156 "%s[%s%s%s] [%d entries]" % 131 for v in val:
157 (indent, Fore.CYAN, name, Fore.RESET, len(val))) 132 logger.info("%s - %s" % (indent, v))
158 if full:
159 for v in val:
160 logger.info("%s - %s" % (indent, v))
161 133
162 134
163 class ShowRecordCommand(ChefCommand): 135 class ShowRecordCommand(ChefCommand):
164 def __init__(self): 136 def __init__(self):
165 super(ShowRecordCommand, self).__init__() 137 super(ShowRecordCommand, self).__init__()
167 self.description = ("Shows the bake record for a given output " 139 self.description = ("Shows the bake record for a given output "
168 "directory.") 140 "directory.")
169 141
170 def setupParser(self, parser, app): 142 def setupParser(self, parser, app):
171 parser.add_argument( 143 parser.add_argument(
172 '-o', '--output', 144 '-o', '--output',
173 help="The output directory for which to show the bake record " 145 help="The output directory for which to show the bake record "
174 "(defaults to `_counter`)", 146 "(defaults to `_counter`)",
175 nargs='?') 147 nargs='?')
176 parser.add_argument( 148 parser.add_argument(
177 '-p', '--path', 149 '-p', '--path',
178 help="A pattern that will be used to filter the relative path " 150 help="A pattern that will be used to filter the relative path "
179 "of entries to show.") 151 "of entries to show.")
180 parser.add_argument( 152 parser.add_argument(
181 '-t', '--out', 153 '-t', '--out',
182 help="A pattern that will be used to filter the output path " 154 help="A pattern that will be used to filter the output path "
183 "of entries to show.") 155 "of entries to show.")
184 parser.add_argument( 156 parser.add_argument(
185 '--last', 157 '--last',
186 type=int, 158 type=int,
187 default=0, 159 default=0,
188 help="Show the last Nth bake record.") 160 help="Show the last Nth bake record.")
189 parser.add_argument( 161 parser.add_argument(
190 '--html-only', 162 '--html-only',
191 action='store_true', 163 action='store_true',
192 help="Only show records for pages (not from the asset " 164 help="Only show records for pages (not from the asset "
193 "pipeline).") 165 "pipeline).")
194 parser.add_argument( 166 parser.add_argument(
195 '--assets-only', 167 '--assets-only',
196 action='store_true', 168 action='store_true',
197 help="Only show records for assets (not from pages).") 169 help="Only show records for assets (not from pages).")
198 parser.add_argument( 170 parser.add_argument(
199 '--show-stats', 171 '--show-stats',
200 action='store_true', 172 action='store_true',
201 help="Show stats from the record.") 173 help="Show stats from the record.")
202 parser.add_argument( 174 parser.add_argument(
203 '--show-manifest', 175 '--show-manifest',
204 help="Show manifest entries from the record.") 176 help="Show manifest entries from the record.")
205 177
206 def run(self, ctx): 178 def run(self, ctx):
179 from piecrust.processing.records import (
180 FLAG_PREPARED, FLAG_PROCESSED, FLAG_BYPASSED_STRUCTURED_PROCESSING,
181 FLAG_COLLAPSED_FROM_LAST_RUN)
182 from piecrust.rendering import (
183 PASS_FORMATTING, PASS_RENDERING)
184
207 out_dir = ctx.args.output or os.path.join(ctx.app.root_dir, '_counter') 185 out_dir = ctx.args.output or os.path.join(ctx.app.root_dir, '_counter')
208 record_id = hashlib.md5(out_dir.encode('utf8')).hexdigest() 186 record_id = hashlib.md5(out_dir.encode('utf8')).hexdigest()
209 suffix = '' if ctx.args.last == 0 else '.%d' % ctx.args.last 187 suffix = '' if ctx.args.last == 0 else '.%d' % ctx.args.last
210 record_name = '%s%s.record' % (record_id, suffix) 188 record_name = '%s%s.record' % (record_id, suffix)
211 189
218 out_pattern = '*%s*' % ctx.args.out.strip('*') 196 out_pattern = '*%s*' % ctx.args.out.strip('*')
219 197
220 if not ctx.args.show_stats and not ctx.args.show_manifest: 198 if not ctx.args.show_stats and not ctx.args.show_manifest:
221 if not ctx.args.assets_only: 199 if not ctx.args.assets_only:
222 self._showBakeRecord( 200 self._showBakeRecord(
223 ctx, record_name, pattern, out_pattern) 201 ctx, record_name, pattern, out_pattern)
224 if not ctx.args.html_only: 202 if not ctx.args.html_only:
225 self._showProcessingRecord( 203 self._showProcessingRecord(
226 ctx, record_name, pattern, out_pattern) 204 ctx, record_name, pattern, out_pattern)
227 return 205 return
228 206
229 stats = {} 207 stats = {}
230 bake_rec = self._getBakeRecord(ctx, record_name) 208 bake_rec = self._getBakeRecord(ctx, record_name)
231 if bake_rec: 209 if bake_rec:
247 logger.info( 225 logger.info(
248 " [%s%s%s] [%d entries]" % 226 " [%s%s%s] [%d entries]" %
249 (Fore.CYAN, name, Fore.RESET, len(val))) 227 (Fore.CYAN, name, Fore.RESET, len(val)))
250 for v in val: 228 for v in val:
251 logger.info(" - %s" % v) 229 logger.info(" - %s" % v)
252
253
254 230
255 def _getBakeRecord(self, ctx, record_name): 231 def _getBakeRecord(self, ctx, record_name):
256 record_cache = ctx.app.cache.getCache('baker') 232 record_cache = ctx.app.cache.getCache('baker')
257 if not record_cache.has(record_name): 233 if not record_cache.has(record_name):
258 logger.warning( 234 logger.warning(
284 any([o for o in entry.all_out_paths 260 any([o for o in entry.all_out_paths
285 if fnmatch.fnmatch(o, out_pattern)])): 261 if fnmatch.fnmatch(o, out_pattern)])):
286 continue 262 continue
287 263
288 flags = _get_flag_descriptions( 264 flags = _get_flag_descriptions(
289 entry.flags, 265 entry.flags,
290 { 266 {
291 BakeRecordEntry.FLAG_NEW: 'new', 267 BakeRecordEntry.FLAG_NEW: 'new',
292 BakeRecordEntry.FLAG_SOURCE_MODIFIED: 'modified', 268 BakeRecordEntry.FLAG_SOURCE_MODIFIED: 'modified',
293 BakeRecordEntry.FLAG_OVERRIDEN: 'overriden'}) 269 BakeRecordEntry.FLAG_OVERRIDEN: 'overriden'})
294 270
295 logging.info(" - ") 271 logging.info(" - ")
296 272
297 rel_path = os.path.relpath(entry.path, ctx.app.root_dir) 273 rel_path = os.path.relpath(entry.path, ctx.app.root_dir)
298 logging.info(" path: %s" % rel_path) 274 logging.info(" path: %s" % rel_path)
306 logging.error(" errors: %s" % entry.errors) 282 logging.error(" errors: %s" % entry.errors)
307 283
308 logging.info(" %d sub-pages:" % len(entry.subs)) 284 logging.info(" %d sub-pages:" % len(entry.subs))
309 for sub in entry.subs: 285 for sub in entry.subs:
310 sub_flags = _get_flag_descriptions( 286 sub_flags = _get_flag_descriptions(
311 sub.flags, 287 sub.flags,
312 { 288 {
313 SubPageBakeInfo.FLAG_BAKED: 'baked', 289 SubPageBakeInfo.FLAG_BAKED: 'baked',
314 SubPageBakeInfo.FLAG_FORCED_BY_SOURCE: 290 SubPageBakeInfo.FLAG_FORCED_BY_SOURCE:
315 'forced by source', 291 'forced by source',
316 SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS: 292 SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS:
317 'forced by missing previous record entry', 293 'forced by missing previous record entry',
318 SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS: 294 SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS:
319 'forced by previous errors', 295 'forced by previous errors',
320 SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED: 296 SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED:
321 'formatting invalidated'}) 297 'formatting invalidated'})
322 298
323 logging.info(" - ") 299 logging.info(" - ")
324 logging.info(" URL: %s" % sub.out_uri) 300 logging.info(" URL: %s" % sub.out_uri)
325 logging.info(" path: %s" % os.path.relpath( 301 logging.info(" path: %s" % os.path.relpath(
326 sub.out_path, record.out_dir)) 302 sub.out_path, record.out_dir))