piecrust2: piecrust/baking/baker.py comparison

comparison piecrust/baking/baker.py @ 447:aefe70229fdd

bake: Commonize worker pool code between html and asset baking. The `workerpool` package now defines a generic-ish worker pool. It's similar to the Python framework pool but with a simpler use-case (only one way to queue jobs) and support for workers to send a final "report" to the master process, which we use to get timing information here. The rest of the changes basically remove a whole bunch of duplicated code that's not needed anymore.

author	Ludovic Chabant <ludovic@chabant.com>
date	Sun, 05 Jul 2015 00:09:41 -0700
parents	21e26ed867b6
children	838f3964f400

comparison

equal deleted inserted replaced

-:4cdf6c2157a0
+:aefe70229fdd
 self._bakeRealm(record, pool, realm, srclist)
 # Bake taxonomies.
 self._bakeTaxonomies(record, pool)
-# All done with the workers.
+# All done with the workers. Close the pool and get timing reports.
-self._terminateWorkerPool(pool)
+reports = pool.close()
-# Get the timing information from the workers.
 record.current.timers = {}
-for i in range(len(pool.workers)):
+for i in range(len(reports)):
-try:
+timers = reports[i]
-timers = pool.results.get(True, 0.1)
+if timers is None:
-except queue.Empty:
+continue
-logger.error("Didn't get timing information from all workers.")
-break
 worker_name = 'BakeWorker_%d' % i
 record.current.timers[worker_name] = {}
 for name, val in timers['data'].items():
 main_val = record.current.timers.setdefault(name, 0)
 start_time,
 "baked %d %s pages." %
 (page_count, REALM_NAMES[realm].lower())))
 def _loadRealmPages(self, record, pool, factories):
+def _handler(res):
+# Create the record entry for this page.
+record_entry = BakeRecordEntry(res.source_name, res.path)
+record_entry.config = res.config
+if res.errors:
+record_entry.errors += res.errors
+record.current.success = False
+self._logErrors(res.path, res.errors)
+record.addEntry(record_entry)
 logger.debug("Loading %d realm pages..." % len(factories))
 with format_timed_scope(logger,
 "loaded %d pages" % len(factories),
 level=logging.DEBUG, colored=False,
 timer_env=self.app.env,
 timer_category='LoadJob'):
-for fac in factories:
+jobs = [
-job = BakeWorkerJob(
+BakeWorkerJob(JOB_LOAD, LoadJobPayload(fac))
-JOB_LOAD,
+for fac in factories]
-LoadJobPayload(fac))
+ar = pool.queueJobs(jobs, handler=_handler)
-pool.queue.put_nowait(job)
+ar.wait()
-def _handler(res):
-# Create the record entry for this page.
-record_entry = BakeRecordEntry(res.source_name, res.path)
-record_entry.config = res.config
-if res.errors:
-record_entry.errors += res.errors
-record.current.success = False
-self._logErrors(res.path, res.errors)
-record.addEntry(record_entry)
-self._waitOnWorkerPool(
-pool,
-expected_result_count=len(factories),
-result_handler=_handler)
 def _renderRealmPages(self, record, pool, factories):
+def _handler(res):
+entry = record.getCurrentEntry(res.path)
+if res.errors:
+entry.errors += res.errors
+record.current.success = False
+self._logErrors(res.path, res.errors)
 logger.debug("Rendering %d realm pages..." % len(factories))
 with format_timed_scope(logger,
 "prepared %d pages" % len(factories),
 level=logging.DEBUG, colored=False,
 timer_env=self.app.env,
 timer_category='RenderFirstSubJob'):
-expected_result_count = 0
+jobs = []
 for fac in factories:
 record_entry = record.getCurrentEntry(fac.path)
 if record_entry.errors:
 logger.debug("Ignoring %s because it had previous "
 "errors." % fac.ref_spec)
 # All good, queue the job.
 job = BakeWorkerJob(
 JOB_RENDER_FIRST,
 RenderFirstSubJobPayload(fac))
-pool.queue.put_nowait(job)
+jobs.append(job)
-expected_result_count += 1
+ar = pool.queueJobs(jobs, handler=_handler)
-def _handler(res):
+ar.wait()
-entry = record.getCurrentEntry(res.path)
-if res.errors:
-entry.errors += res.errors
-record.current.success = False
-self._logErrors(res.path, res.errors)
-self._waitOnWorkerPool(
-pool,
-expected_result_count=expected_result_count,
-result_handler=_handler)
 def _bakeRealmPages(self, record, pool, realm, factories):
+def _handler(res):
+entry = record.getCurrentEntry(res.path, res.taxonomy_info)
+entry.subs = res.sub_entries
+if res.errors:
+entry.errors += res.errors
+self._logErrors(res.path, res.errors)
+if entry.has_any_error:
+record.current.success = False
+if entry.was_any_sub_baked:
+record.current.baked_count[realm] += 1
+record.dirty_source_names.add(entry.source_name)
 logger.debug("Baking %d realm pages..." % len(factories))
 with format_timed_scope(logger,
 "baked %d pages" % len(factories),
 level=logging.DEBUG, colored=False,
 timer_env=self.app.env,
 timer_category='BakeJob'):
-expected_result_count = 0
+jobs = []
 for fac in factories:
-if self._queueBakeJob(record, pool, fac):
+job = self._makeBakeJob(record, fac)
-expected_result_count += 1
+if job is not None:
+jobs.append(job)
-def _handler(res):
-entry = record.getCurrentEntry(res.path, res.taxonomy_info)
+ar = pool.queueJobs(jobs, handler=_handler)
-entry.subs = res.sub_entries
+ar.wait()
-if res.errors:
-entry.errors += res.errors
-self._logErrors(res.path, res.errors)
-if entry.has_any_error:
-record.current.success = False
-if entry.was_any_sub_baked:
-record.current.baked_count[realm] += 1
-record.dirty_source_names.add(entry.source_name)
-self._waitOnWorkerPool(
-pool,
-expected_result_count=expected_result_count,
-result_handler=_handler)
 def _bakeTaxonomies(self, record, pool):
 logger.debug("Baking taxonomy pages...")
 with format_timed_scope(logger, 'built taxonomy buckets',
 level=logging.DEBUG, colored=False):
 tt_info.dirty_terms.add(terms)
 return buckets
 def _bakeTaxonomyBuckets(self, record, pool, buckets):
+def _handler(res):
+entry = record.getCurrentEntry(res.path, res.taxonomy_info)
+entry.subs = res.sub_entries
+if res.errors:
+entry.errors += res.errors
+if entry.has_any_error:
+record.current.success = False
 # Start baking those terms.
-expected_result_count = 0
+jobs = []
 for source_name, source_taxonomies in buckets.items():
 for tax_name, tt_info in source_taxonomies.items():
 terms = tt_info.dirty_terms
 if len(terms) == 0:
 continue
 cur_entry = BakeRecordEntry(
 fac.source.name, fac.path, tax_info)
 record.addEntry(cur_entry)
-if self._queueBakeJob(record, pool, fac, tax_info):
+job = self._makeBakeJob(record, fac, tax_info)
-expected_result_count += 1
+if job is not None:
+jobs.append(job)
-def _handler(res):
-entry = record.getCurrentEntry(res.path, res.taxonomy_info)
+ar = pool.queueJobs(jobs, handler=_handler)
-entry.subs = res.sub_entries
+ar.wait()
-if res.errors:
-entry.errors += res.errors
-if entry.has_any_error:
-record.current.success = False
-self._waitOnWorkerPool(
-pool,
-expected_result_count=expected_result_count,
-result_handler=_handler)
 # Now we create bake entries for all the terms that were *not* dirty.
 # This is because otherwise, on the next incremental bake, we wouldn't
 # find any entry for those things, and figure that we need to delete
 # their outputs.
 record.collapseEntry(prev_entry)
 else:
 logger.debug("Taxonomy term '%s:%s' isn't used anymore." %
 (ti.taxonomy_name, ti.term))
-return expected_result_count
+return len(jobs)
-def _queueBakeJob(self, record, pool, fac, tax_info=None):
+def _makeBakeJob(self, record, fac, tax_info=None):
 # Get the previous (if any) and current entry for this page.
 pair = record.getPreviousAndCurrentEntries(fac.path, tax_info)
 assert pair is not None
 prev_entry, cur_entry = pair
 assert cur_entry is not None
 # Ignore if there were errors in the previous passes.
 if cur_entry.errors:
 logger.debug("Ignoring %s because it had previous "
 "errors." % fac.ref_spec)
-return False
+return None
 # Build the route metadata and find the appropriate route.
 page = fac.buildPage()
 route_metadata = create_route_metadata(page)
 if tax_info is not None:
 "Page '%s' maps to URL '%s' but is overriden "
 "by page '%s'." %
 (fac.ref_spec, uri, override_entry.path))
 logger.error(cur_entry.errors[-1])
 cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN
-return False
+return None
 job = BakeWorkerJob(
 JOB_BAKE,
 BakeJobPayload(fac, route_metadata, prev_entry,
 record.dirty_source_names,
 tax_info))
-pool.queue.put_nowait(job)
+return job
-return True
 def _handleDeletetions(self, record):
 logger.debug("Handling deletions...")
 for path, reason in record.getDeletions():
 logger.debug("Removing '%s': %s" % (path, reason))
 logger.error("Errors found in %s:" % rel_path)
 for e in errors:
 logger.error("  " + e)
 def _createWorkerPool(self):
-import sys
+from piecrust.workerpool import WorkerPool
-from piecrust.baking.worker import BakeWorkerContext, worker_func
+from piecrust.baking.worker import BakeWorkerContext, BakeWorker
-main_module = sys.modules['__main__']
+ctx = BakeWorkerContext(
-is_profiling = os.path.basename(main_module.__file__) in [
+self.app.root_dir, self.app.cache.base_dir, self.out_dir,
-'profile.py', 'cProfile.py']
+force=self.force, debug=self.app.debug)
+pool = WorkerPool(
-pool = _WorkerPool()
+worker_class=BakeWorker,
-for i in range(self.num_workers):
+initargs=(ctx,))
-ctx = BakeWorkerContext(
-self.app.root_dir, self.app.cache.base_dir, self.out_dir,
-pool.queue, pool.results, pool.abort_event,
-force=self.force, debug=self.app.debug,
-is_profiling=is_profiling)
-w = multiprocessing.Process(
-name='BakeWorker_%d' % i,
-target=worker_func, args=(i, ctx))
-w.start()
-pool.workers.append(w)
 return pool
-def _terminateWorkerPool(self, pool):
-pool.abort_event.set()
-for w in pool.workers:
-w.join()
-def _waitOnWorkerPool(self, pool,
-expected_result_count=-1, result_handler=None):
-assert result_handler is None or expected_result_count >= 0
-abort_with_exception = None
-try:
-if result_handler is None:
-pool.queue.join()
-else:
-got_count = 0
-while got_count < expected_result_count:
-try:
-res = pool.results.get(True, 10)
-except queue.Empty:
-logger.error(
-"Got %d results, expected %d, and timed-out "
-"for 10 seconds. A worker might be stuck?" %
-(got_count, expected_result_count))
-abort_with_exception = Exception("Worker time-out.")
-break
-if isinstance(res, dict) and res.get('type') == 'error':
-abort_with_exception = Exception(
-'Worker critical error:\n' +
-'\n'.join(res['messages']))
-break
-got_count += 1
-result_handler(res)
-except KeyboardInterrupt as kiex:
-logger.warning("Bake aborted by user... "
-"waiting for workers to stop.")
-abort_with_exception = kiex
-if abort_with_exception:
-pool.abort_event.set()
-for w in pool.workers:
-w.join(2)
-raise abort_with_exception
-class _WorkerPool(object):
-def __init__(self):
-self.queue = multiprocessing.JoinableQueue()
-self.results = multiprocessing.Queue()
-self.abort_event = multiprocessing.Event()
-self.workers = []
 class _TaxonomyTermsInfo(object):
 def __init__(self):
 self.dirty_terms = set()

Mercurial > piecrust2

comparison piecrust/baking/baker.py @ 447:aefe70229fdd