piecrust2: piecrust/processing/pipeline.py comparison

comparison piecrust/processing/pipeline.py @ 447:aefe70229fdd

bake: Commonize worker pool code between html and asset baking. The `workerpool` package now defines a generic-ish worker pool. It's similar to the Python framework pool but with a simpler use-case (only one way to queue jobs) and support for workers to send a final "report" to the master process, which we use to get timing information here. The rest of the changes basically remove a whole bunch of duplicated code that's not needed anymore.

author	Ludovic Chabant <ludovic@chabant.com>
date	Sun, 05 Jul 2015 00:09:41 -0700
parents	171dde4f61dc
children	d90ccdf18156

comparison

equal deleted inserted replaced

-:4cdf6c2157a0
+:aefe70229fdd
 import os
 import os.path
 import re
 import time
-import queue
 import hashlib
 import logging
 import multiprocessing
 from piecrust.chefutil import format_timed, format_timed_scope
 from piecrust.processing.base import PipelineContext
 from piecrust.processing.records import (
 ProcessorPipelineRecordEntry, TransitionalProcessorPipelineRecord,
 FLAG_PROCESSED)
 from piecrust.processing.worker import (
-ProcessingWorkerContext, ProcessingWorkerJob,
+ProcessingWorkerJob,
-worker_func, get_filtered_processors)
+get_filtered_processors)
 logger = logging.getLogger(__name__)
 class _ProcessingContext(object):
-def __init__(self, pool, record, base_dir, mount_info):
+def __init__(self, jobs, record, base_dir, mount_info):
-self.pool = pool
+self.jobs = jobs
 self.record = record
 self.base_dir = base_dir
 self.mount_info = mount_info
 proc.onPipelineStart(pipeline_ctx)
 # Pre-processors can define additional ignore patterns.
 self.ignore_patterns += make_re(
 pipeline_ctx._additional_ignore_patterns)
-# Create the worker pool.
-pool = _WorkerPool()
 # Create the pipeline record.
 record = TransitionalProcessorPipelineRecord()
 record_cache = self.app.cache.getCache('proc')
 record_name = (
 rel_path = os.path.relpath(res.path, self.app.root_dir)
 logger.error("Errors found in %s:" % rel_path)
 for e in entry.errors:
 logger.error("  " + e)
+jobs = []
+self._process(src_dir_or_file, record, jobs)
 pool = self._createWorkerPool()
-expected_result_count = self._process(src_dir_or_file, pool, record)
+ar = pool.queueJobs(jobs, handler=_handler)
-self._waitOnWorkerPool(pool, expected_result_count, _handler)
+ar.wait()
-self._terminateWorkerPool(pool)
+# Shutdown the workers and get timing information from them.
-# Get timing information from the workers.
+reports = pool.close()
 record.current.timers = {}
-for i in range(len(pool.workers)):
+for i in range(len(reports)):
-try:
+timers = reports[i]
-timers = pool.results.get(True, 0.1)
+if timers is None:
-except queue.Empty:
+continue
-logger.error("Didn't get timing information from all workers.")
-break
 worker_name = 'PipelineWorker_%d' % i
 record.current.timers[worker_name] = {}
 for name, val in timers['data'].items():
 main_val = record.current.timers.setdefault(name, 0)
 start_time,
 "processed %d assets." % record.current.processed_count))
 return record.detach()
-def _process(self, src_dir_or_file, pool, record):
+def _process(self, src_dir_or_file, record, jobs):
-expected_result_count = 0
 if src_dir_or_file is not None:
 # Process only the given path.
 # Find out what mount point this is in.
 for name, info in self.mounts.items():
 path = info['path']
 known_roots = [i['path'] for i in self.mounts.values()]
 raise Exception("Input path '%s' is not part of any known "
 "mount point: %s" %
 (src_dir_or_file, known_roots))
-ctx = _ProcessingContext(pool, record, base_dir, mount_info)
+ctx = _ProcessingContext(jobs, record, base_dir, mount_info)
 logger.debug("Initiating processing pipeline on: %s" %
 src_dir_or_file)
 if os.path.isdir(src_dir_or_file):
-expected_result_count = self._processDirectory(
+self._processDirectory(ctx, src_dir_or_file)
-ctx, src_dir_or_file)
 elif os.path.isfile(src_dir_or_file):
 self._processFile(ctx, src_dir_or_file)
-expected_result_count = 1
 else:
 # Process everything.
 for name, info in self.mounts.items():
 path = info['path']
-ctx = _ProcessingContext(pool, record, path, info)
+ctx = _ProcessingContext(jobs, record, path, info)
 logger.debug("Initiating processing pipeline on: %s" % path)
-expected_result_count = self._processDirectory(ctx, path)
+self._processDirectory(ctx, path)
-return expected_result_count
 def _processDirectory(self, ctx, start_dir):
-queued_count = 0
 for dirpath, dirnames, filenames in os.walk(start_dir):
 rel_dirpath = os.path.relpath(dirpath, start_dir)
 dirnames[:] = [d for d in dirnames
 if not re_matchany(
 d, self.ignore_patterns, rel_dirpath)]
 for filename in filenames:
 if re_matchany(filename, self.ignore_patterns, rel_dirpath):
 continue
 self._processFile(ctx, os.path.join(dirpath, filename))
-queued_count += 1
-return queued_count
 def _processFile(self, ctx, path):
 # TODO: handle overrides between mount-points.
 entry = ProcessorPipelineRecordEntry(path)
 force_this = (self.force or previous_entry is None or
 not previous_entry.was_processed_successfully)
 job = ProcessingWorkerJob(ctx.base_dir, ctx.mount_info, path,
 force=force_this)
+ctx.jobs.append(job)
-logger.debug("Queuing: %s" % path)
-ctx.pool.queue.put_nowait(job)
 def _createWorkerPool(self):
-import sys
+from piecrust.workerpool import WorkerPool
+from piecrust.processing.worker import (
-main_module = sys.modules['__main__']
+ProcessingWorkerContext, ProcessingWorker)
-is_profiling = os.path.basename(main_module.__file__) in [
-'profile.py', 'cProfile.py']
+ctx = ProcessingWorkerContext(
+self.app.root_dir, self.out_dir, self.tmp_dir,
-pool = _WorkerPool()
+self.force, self.app.debug)
-for i in range(self.num_workers):
+ctx.enabled_processors = self.enabled_processors
-ctx = ProcessingWorkerContext(
+ctx.additional_processors = self.additional_processors
-self.app.root_dir, self.out_dir, self.tmp_dir,
-pool.queue, pool.results, pool.abort_event,
+pool = WorkerPool(
-self.force, self.app.debug)
+worker_class=ProcessingWorker,
-ctx.is_profiling = is_profiling
+initargs=(ctx,))
-ctx.enabled_processors = self.enabled_processors
-ctx.additional_processors = self.additional_processors
-w = multiprocessing.Process(
-name='PipelineWorker_%d' % i,
-target=worker_func, args=(i, ctx))
-w.start()
-pool.workers.append(w)
 return pool
-def _waitOnWorkerPool(self, pool, expected_result_count, result_handler):
-abort_with_exception = None
-try:
-got_count = 0
-while got_count < expected_result_count:
-try:
-res = pool.results.get(True, 10)
-except queue.Empty:
-logger.error(
-"Got %d results, expected %d, and timed-out "
-"for 10 seconds. A worker might be stuck?" %
-(got_count, expected_result_count))
-abort_with_exception = Exception("Worker time-out.")
-break
-if isinstance(res, dict) and res.get('type') == 'error':
-abort_with_exception = Exception(
-'Worker critical error:\n' +
-'\n'.join(res['messages']))
-break
-got_count += 1
-result_handler(res)
-except KeyboardInterrupt as kiex:
-logger.warning("Bake aborted by user... "
-"waiting for workers to stop.")
-abort_with_exception = kiex
-if abort_with_exception:
-pool.abort_event.set()
-for w in pool.workers:
-w.join(2)
-raise abort_with_exception
-def _terminateWorkerPool(self, pool):
-pool.abort_event.set()
-for w in pool.workers:
-w.join()
-class _WorkerPool(object):
-def __init__(self):
-self.queue = multiprocessing.JoinableQueue()
-self.results = multiprocessing.Queue()
-self.abort_event = multiprocessing.Event()
-self.workers = []
 def make_mount_infos(mounts, root_dir):
 if isinstance(mounts, list):
 mounts = {m: {} for m in mounts}

Mercurial > piecrust2

comparison piecrust/processing/pipeline.py @ 447:aefe70229fdd