comparison piecrust/baking/baker.py @ 120:133845647083

Better error management and removal support in baking/processing. * Baker and processor pipeline now store errors in their records. * They also support deleting output files that are no longer valid. * The basic transitional record class implements more boilerplate code. * The processor pipeline is run from the `bake` command directly. * New unit tests. * Unit test mocking now mocks `os.remove` too.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 09 Nov 2014 14:46:23 -0800
parents 7d2fdf43d7ca
children bc63dc20baa0
comparison
equal deleted inserted replaced
119:0811f92cbdc7 120:133845647083
10 BakeRecordPageEntry, 10 BakeRecordPageEntry,
11 FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED) 11 FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED)
12 from piecrust.chefutil import format_timed, log_friendly_exception 12 from piecrust.chefutil import format_timed, log_friendly_exception
13 from piecrust.data.filters import (PaginationFilter, HasFilterClause, 13 from piecrust.data.filters import (PaginationFilter, HasFilterClause,
14 IsFilterClause, AndBooleanClause) 14 IsFilterClause, AndBooleanClause)
15 from piecrust.processing.base import ProcessorPipeline
16 from piecrust.rendering import (PageRenderingContext, render_page, 15 from piecrust.rendering import (PageRenderingContext, render_page,
17 PASS_FORMATTING, PASS_RENDERING) 16 PASS_FORMATTING, PASS_RENDERING)
18 from piecrust.sources.base import (PageFactory, 17 from piecrust.sources.base import (PageFactory,
19 REALM_NAMES, REALM_USER, REALM_THEME) 18 REALM_NAMES, REALM_USER, REALM_THEME)
20 19
142 141
143 # If the current page is known to use pages from other sources, 142 # If the current page is known to use pages from other sources,
144 # see if any of those got baked, or are going to be baked for some 143 # see if any of those got baked, or are going to be baked for some
145 # reason. If so, we need to bake this one too. 144 # reason. If so, we need to bake this one too.
146 # (this happens for instance with the main page of a blog). 145 # (this happens for instance with the main page of a blog).
147 if prev_record_entry: 146 if prev_record_entry and prev_record_entry.was_baked_successfully:
148 invalidated_render_passes = set() 147 invalidated_render_passes = set()
149 used_src_names = list(prev_record_entry.used_source_names) 148 used_src_names = list(prev_record_entry.used_source_names)
150 for src_name, rdr_pass in used_src_names: 149 for src_name, rdr_pass in used_src_names:
151 entries = self.record.getCurrentEntries(src_name) 150 entries = self.record.getCurrentEntries(src_name)
152 for e in entries: 151 for e in entries:
264 return ctx, rp 263 return ctx, rp
265 264
266 265
267 class Baker(object): 266 class Baker(object):
268 def __init__(self, app, out_dir=None, force=False, portable=False, 267 def __init__(self, app, out_dir=None, force=False, portable=False,
269 no_assets=False): 268 no_assets=False, num_workers=4):
270 self.app = app 269 self.app = app
271 self.out_dir = out_dir or os.path.join(app.root_dir, '_counter') 270 self.out_dir = out_dir or os.path.join(app.root_dir, '_counter')
272 self.force = force 271 self.force = force
273 self.portable = portable 272 self.portable = portable
274 self.no_assets = no_assets 273 self.no_assets = no_assets
275 self.num_workers = app.config.get('baker/workers') or 4 274 self.num_workers = num_workers
276 275
277 # Remember what taxonomy pages we should skip 276 # Remember what taxonomy pages we should skip
278 # (we'll bake them repeatedly later with each taxonomy term) 277 # (we'll bake them repeatedly later with each taxonomy term)
279 self.taxonomy_pages = [] 278 self.taxonomy_pages = []
280 logger.debug("Gathering taxonomy page paths:") 279 logger.debug("Gathering taxonomy page paths:")
299 os.makedirs(self.out_dir, 0o755) 298 os.makedirs(self.out_dir, 0o755)
300 299
301 # Load/create the bake record. 300 # Load/create the bake record.
302 record = TransitionalBakeRecord() 301 record = TransitionalBakeRecord()
303 record_cache = self.app.cache.getCache('baker') 302 record_cache = self.app.cache.getCache('baker')
304 record_name = (hashlib.md5(self.out_dir.encode('utf8')).hexdigest() + 303 record_name = (
304 'pages_' +
305 hashlib.md5(self.out_dir.encode('utf8')).hexdigest() +
305 '.record') 306 '.record')
306 if not self.force and record_cache.has(record_name): 307 if not self.force and record_cache.has(record_name):
307 t = time.clock() 308 t = time.clock()
308 record.loadPrevious(record_cache.getCachePath(record_name)) 309 record.loadPrevious(record_cache.getCachePath(record_name))
309 logger.debug(format_timed(t, 'loaded previous bake record', 310 logger.debug(format_timed(t, 'loaded previous bake record',
329 self._bakeRealm(record, realm, srclist) 330 self._bakeRealm(record, realm, srclist)
330 331
331 # Bake taxonomies. 332 # Bake taxonomies.
332 self._bakeTaxonomies(record) 333 self._bakeTaxonomies(record)
333 334
334 # Bake the assets. 335 # Delete files from the output.
335 if not self.no_assets: 336 self._handleDeletetions(record)
336 self._bakeAssets(record)
337 337
338 # Save the bake record. 338 # Save the bake record.
339 t = time.clock() 339 t = time.clock()
340 record.current.bake_time = time.time() 340 record.current.bake_time = time.time()
341 record.current.out_dir = self.out_dir 341 record.current.out_dir = self.out_dir
343 record.saveCurrent(record_cache.getCachePath(record_name)) 343 record.saveCurrent(record_cache.getCachePath(record_name))
344 logger.debug(format_timed(t, 'saved bake record', colored=False)) 344 logger.debug(format_timed(t, 'saved bake record', colored=False))
345 345
346 # All done. 346 # All done.
347 self.app.config.set('baker/is_baking', False) 347 self.app.config.set('baker/is_baking', False)
348 logger.info('-------------------------'); 348 logger.debug(format_timed(start_time, 'done baking'));
349 logger.info(format_timed(start_time, 'done baking'));
350 349
351 def _handleCacheValidity(self, record): 350 def _handleCacheValidity(self, record):
352 start_time = time.clock() 351 start_time = time.clock()
353 352
354 reason = None 353 reason = None
405 if fac.path in self.taxonomy_pages: 404 if fac.path in self.taxonomy_pages:
406 logger.debug("Skipping taxonomy page: %s:%s" % 405 logger.debug("Skipping taxonomy page: %s:%s" %
407 (source.name, fac.ref_spec)) 406 (source.name, fac.ref_spec))
408 continue 407 continue
409 408
409 entry = BakeRecordPageEntry(fac)
410 record.addEntry(entry)
411
410 route = self.app.getRoute(source.name, fac.metadata) 412 route = self.app.getRoute(source.name, fac.metadata)
411 if route is None: 413 if route is None:
412 logger.error("Can't get route for page: %s" % fac.ref_spec) 414 entry.errors.append("Can't get route for page: %s" %
415 fac.ref_spec)
416 logger.error(entry.errors[-1])
413 continue 417 continue
414 418
415 entry = BakeRecordPageEntry(fac)
416 record.addEntry(entry)
417 queue.addJob(BakeWorkerJob(fac, route, entry)) 419 queue.addJob(BakeWorkerJob(fac, route, entry))
418 420
419 self._waitOnWorkerPool(pool, abort) 421 self._waitOnWorkerPool(pool, abort)
420 422
421 def _bakeTaxonomies(self, record): 423 def _bakeTaxonomies(self, record):
437 for prev_entry, cur_entry in record.transitions.values(): 439 for prev_entry, cur_entry in record.transitions.values():
438 for tax in self.app.taxonomies: 440 for tax in self.app.taxonomies:
439 changed_terms = None 441 changed_terms = None
440 # Re-bake all taxonomy pages that include new or changed 442 # Re-bake all taxonomy pages that include new or changed
441 # pages. 443 # pages.
442 if not prev_entry and cur_entry and cur_entry.was_baked: 444 if (not prev_entry and cur_entry and
445 cur_entry.was_baked_successfully):
443 changed_terms = cur_entry.config.get(tax.name) 446 changed_terms = cur_entry.config.get(tax.name)
444 elif prev_entry and cur_entry and cur_entry.was_baked: 447 elif (prev_entry and cur_entry and
448 cur_entry.was_baked_successfully):
445 changed_terms = [] 449 changed_terms = []
446 prev_terms = prev_entry.config.get(tax.name) 450 prev_terms = prev_entry.config.get(tax.name)
447 cur_terms = cur_entry.config.get(tax.name) 451 cur_terms = cur_entry.config.get(tax.name)
448 if tax.is_multiple: 452 if tax.is_multiple:
449 if prev_terms is not None: 453 if prev_terms is not None:
506 queue.addJob( 510 queue.addJob(
507 BakeWorkerJob(fac, route, entry, tax_name, term)) 511 BakeWorkerJob(fac, route, entry, tax_name, term))
508 512
509 self._waitOnWorkerPool(pool, abort) 513 self._waitOnWorkerPool(pool, abort)
510 514
511 def _bakeAssets(self, record): 515 def _handleDeletetions(self, record):
512 mounts = self.app.assets_dirs 516 for path, reason in record.getDeletions():
513 baker_params = self.app.config.get('baker') or {} 517 logger.debug("Removing '%s': %s" % (path, reason))
514 skip_patterns = baker_params.get('skip_patterns') 518 os.remove(path)
515 force_patterns = baker_params.get('force_patterns') 519 logger.info('[delete] %s' % path)
516 proc = ProcessorPipeline(
517 self.app, mounts, self.out_dir, force=self.force,
518 skip_patterns=skip_patterns, force_patterns=force_patterns,
519 num_workers=self.num_workers)
520 proc.run()
521 520
522 def _createWorkerPool(self, record, pool_size=4): 521 def _createWorkerPool(self, record, pool_size=4):
523 pool = [] 522 pool = []
524 queue = BakeScheduler(record) 523 queue = BakeScheduler(record)
525 abort = threading.Event() 524 abort = threading.Event()
695 694
696 def _unsafeRun(self, job): 695 def _unsafeRun(self, job):
697 start_time = time.clock() 696 start_time = time.clock()
698 697
699 entry = job.record_entry 698 entry = job.record_entry
700 self._page_baker.bake(job.factory, job.route, entry, 699 try:
701 taxonomy_name=job.taxonomy_name, 700 self._page_baker.bake(job.factory, job.route, entry,
702 taxonomy_term=job.taxonomy_term) 701 taxonomy_name=job.taxonomy_name,
703 702 taxonomy_term=job.taxonomy_term)
704 if entry.was_baked: 703 except BakingError as ex:
704 logger.debug("Got baking error. Adding it to the record.")
705 while ex:
706 entry.errors.append(str(ex))
707 ex = ex.__cause__
708
709 if entry.was_baked_successfully:
705 uri = entry.out_uris[0] 710 uri = entry.out_uris[0]
706 friendly_uri = uri if uri != '' else '[main page]' 711 friendly_uri = uri if uri != '' else '[main page]'
707 friendly_count = '' 712 friendly_count = ''
708 if entry.num_subs > 1: 713 if entry.num_subs > 1:
709 friendly_count = ' (%d pages)' % entry.num_subs 714 friendly_count = ' (%d pages)' % entry.num_subs
710 logger.info(format_timed(start_time, '[%d] %s%s' % 715 logger.info(format_timed(start_time, '[%d] %s%s' %
711 (self.wid, friendly_uri, friendly_count))) 716 (self.wid, friendly_uri, friendly_count)))
712 717 elif entry.errors:
718 for e in entry.errors:
719 logger.error(e)
720