comparison piecrust/baking/baker.py @ 451:838f3964f400

bake: Optimize the bake by not using custom classes for passing info. See previous changeset about pickling performance between processes. Now just use plain standard structures, or the new `fastpickle` when needed.
author Ludovic Chabant <ludovic@chabant.com>
date Mon, 06 Jul 2015 21:30:49 -0700
parents aefe70229fdd
children 8351a77e13f5
comparison
equal deleted inserted replaced
450:298f8f46432a 451:838f3964f400
1 import copy
2 import time 1 import time
3 import os.path 2 import os.path
4 import queue
5 import hashlib 3 import hashlib
6 import logging 4 import logging
7 import multiprocessing 5 import multiprocessing
8 from piecrust.baking.records import ( 6 from piecrust.baking.records import (
9 BakeRecordEntry, TransitionalBakeRecord, TaxonomyInfo) 7 BakeRecordEntry, TransitionalBakeRecord, TaxonomyInfo)
10 from piecrust.baking.worker import ( 8 from piecrust.baking.worker import (
11 BakeWorkerJob, LoadJobPayload, RenderFirstSubJobPayload, 9 save_factory,
12 BakeJobPayload,
13 JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE) 10 JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE)
14 from piecrust.chefutil import ( 11 from piecrust.chefutil import (
15 format_timed_scope, format_timed) 12 format_timed_scope, format_timed)
16 from piecrust.routing import create_route_metadata 13 from piecrust.routing import create_route_metadata
17 from piecrust.sources.base import ( 14 from piecrust.sources.base import (
210 (page_count, REALM_NAMES[realm].lower()))) 207 (page_count, REALM_NAMES[realm].lower())))
211 208
212 def _loadRealmPages(self, record, pool, factories): 209 def _loadRealmPages(self, record, pool, factories):
213 def _handler(res): 210 def _handler(res):
214 # Create the record entry for this page. 211 # Create the record entry for this page.
215 record_entry = BakeRecordEntry(res.source_name, res.path) 212 # This will also update the `dirty_source_names` for the record
216 record_entry.config = res.config 213 # as we add page files whose last modification times are later
217 if res.errors: 214 # than the last bake.
218 record_entry.errors += res.errors 215 record_entry = BakeRecordEntry(res['source_name'], res['path'])
216 record_entry.config = res['config']
217 if res['errors']:
218 record_entry.errors += res['errors']
219 record.current.success = False 219 record.current.success = False
220 self._logErrors(res.path, res.errors) 220 self._logErrors(res['path'], res['errors'])
221 record.addEntry(record_entry) 221 record.addEntry(record_entry)
222 222
223 logger.debug("Loading %d realm pages..." % len(factories)) 223 logger.debug("Loading %d realm pages..." % len(factories))
224 with format_timed_scope(logger, 224 with format_timed_scope(logger,
225 "loaded %d pages" % len(factories), 225 "loaded %d pages" % len(factories),
226 level=logging.DEBUG, colored=False, 226 level=logging.DEBUG, colored=False,
227 timer_env=self.app.env, 227 timer_env=self.app.env,
228 timer_category='LoadJob'): 228 timer_category='LoadJob'):
229 jobs = [ 229 jobs = []
230 BakeWorkerJob(JOB_LOAD, LoadJobPayload(fac)) 230 for fac in factories:
231 for fac in factories] 231 job = {
232 'type': JOB_LOAD,
233 'job': save_factory(fac)}
234 jobs.append(job)
232 ar = pool.queueJobs(jobs, handler=_handler) 235 ar = pool.queueJobs(jobs, handler=_handler)
233 ar.wait() 236 ar.wait()
234 237
235 def _renderRealmPages(self, record, pool, factories): 238 def _renderRealmPages(self, record, pool, factories):
236 def _handler(res): 239 def _handler(res):
237 entry = record.getCurrentEntry(res.path) 240 entry = record.getCurrentEntry(res['path'])
238 if res.errors: 241 if res['errors']:
239 entry.errors += res.errors 242 entry.errors += res['errors']
240 record.current.success = False 243 record.current.success = False
241 self._logErrors(res.path, res.errors) 244 self._logErrors(res['path'], res['errors'])
242 245
243 logger.debug("Rendering %d realm pages..." % len(factories)) 246 logger.debug("Rendering %d realm pages..." % len(factories))
244 with format_timed_scope(logger, 247 with format_timed_scope(logger,
245 "prepared %d pages" % len(factories), 248 "prepared %d pages" % len(factories),
246 level=logging.DEBUG, colored=False, 249 level=logging.DEBUG, colored=False,
271 "Can't get route for page: %s" % fac.ref_spec) 274 "Can't get route for page: %s" % fac.ref_spec)
272 logger.error(record_entry.errors[-1]) 275 logger.error(record_entry.errors[-1])
273 continue 276 continue
274 277
275 # All good, queue the job. 278 # All good, queue the job.
276 job = BakeWorkerJob( 279 job = {
277 JOB_RENDER_FIRST, 280 'type': JOB_RENDER_FIRST,
278 RenderFirstSubJobPayload(fac)) 281 'job': save_factory(fac)}
279 jobs.append(job) 282 jobs.append(job)
280 283
281 ar = pool.queueJobs(jobs, handler=_handler) 284 ar = pool.queueJobs(jobs, handler=_handler)
282 ar.wait() 285 ar.wait()
283 286
284 def _bakeRealmPages(self, record, pool, realm, factories): 287 def _bakeRealmPages(self, record, pool, realm, factories):
285 def _handler(res): 288 def _handler(res):
286 entry = record.getCurrentEntry(res.path, res.taxonomy_info) 289 entry = record.getCurrentEntry(res['path'], res['taxonomy_info'])
287 entry.subs = res.sub_entries 290 entry.subs = res['sub_entries']
288 if res.errors: 291 if res['errors']:
289 entry.errors += res.errors 292 entry.errors += res['errors']
290 self._logErrors(res.path, res.errors) 293 self._logErrors(res['path'], res['errors'])
291 if entry.has_any_error: 294 if entry.has_any_error:
292 record.current.success = False 295 record.current.success = False
293 if entry.was_any_sub_baked: 296 if entry.subs and entry.was_any_sub_baked:
294 record.current.baked_count[realm] += 1 297 record.current.baked_count[realm] += 1
295 record.dirty_source_names.add(entry.source_name)
296 298
297 logger.debug("Baking %d realm pages..." % len(factories)) 299 logger.debug("Baking %d realm pages..." % len(factories))
298 with format_timed_scope(logger, 300 with format_timed_scope(logger,
299 "baked %d pages" % len(factories), 301 "baked %d pages" % len(factories),
300 level=logging.DEBUG, colored=False, 302 level=logging.DEBUG, colored=False,
386 388
387 return buckets 389 return buckets
388 390
389 def _bakeTaxonomyBuckets(self, record, pool, buckets): 391 def _bakeTaxonomyBuckets(self, record, pool, buckets):
390 def _handler(res): 392 def _handler(res):
391 entry = record.getCurrentEntry(res.path, res.taxonomy_info) 393 entry = record.getCurrentEntry(res['path'], res['taxonomy_info'])
392 entry.subs = res.sub_entries 394 entry.subs = res['sub_entries']
393 if res.errors: 395 if res['errors']:
394 entry.errors += res.errors 396 entry.errors += res['errors']
395 if entry.has_any_error: 397 if entry.has_any_error:
396 record.current.success = False 398 record.current.success = False
397 399
398 # Start baking those terms. 400 # Start baking those terms.
399 jobs = [] 401 jobs = []
501 (fac.ref_spec, uri, override_entry.path)) 503 (fac.ref_spec, uri, override_entry.path))
502 logger.error(cur_entry.errors[-1]) 504 logger.error(cur_entry.errors[-1])
503 cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN 505 cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN
504 return None 506 return None
505 507
506 job = BakeWorkerJob( 508 job = {
507 JOB_BAKE, 509 'type': JOB_BAKE,
508 BakeJobPayload(fac, route_metadata, prev_entry, 510 'job': {
509 record.dirty_source_names, 511 'factory_info': save_factory(fac),
510 tax_info)) 512 'taxonomy_info': tax_info,
513 'route_metadata': route_metadata,
514 'prev_entry': prev_entry,
515 'dirty_source_names': record.dirty_source_names
516 }
517 }
511 return job 518 return job
512 519
513 def _handleDeletetions(self, record): 520 def _handleDeletetions(self, record):
514 logger.debug("Handling deletions...") 521 logger.debug("Handling deletions...")
515 for path, reason in record.getDeletions(): 522 for path, reason in record.getDeletions():