comparison piecrust/baking/baker.py @ 158:1187739e5a19

Fix some indentation and line lengths.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 31 Dec 2014 16:56:55 -0800
parents fd146f54bdaa
children 232989a6df36
comparison
equal deleted inserted replaced
157:55910ab4bfea 158:1187739e5a19
2 import os.path 2 import os.path
3 import shutil 3 import shutil
4 import hashlib 4 import hashlib
5 import logging 5 import logging
6 import threading 6 import threading
7 from piecrust.baking.records import (TransitionalBakeRecord, 7 from piecrust.baking.records import (
8 BakeRecordPageEntry) 8 TransitionalBakeRecord, BakeRecordPageEntry)
9 from piecrust.baking.scheduler import BakeScheduler 9 from piecrust.baking.scheduler import BakeScheduler
10 from piecrust.baking.single import (BakingError, PageBaker) 10 from piecrust.baking.single import (BakingError, PageBaker)
11 from piecrust.chefutil import format_timed, log_friendly_exception 11 from piecrust.chefutil import format_timed, log_friendly_exception
12 from piecrust.sources.base import (PageFactory, 12 from piecrust.sources.base import (
13 PageFactory,
13 REALM_NAMES, REALM_USER, REALM_THEME) 14 REALM_NAMES, REALM_USER, REALM_THEME)
14 15
15 16
16 logger = logging.getLogger(__name__) 17 logger = logging.getLogger(__name__)
17 18
18 19
19 class Baker(object): 20 class Baker(object):
20 def __init__(self, app, out_dir, force=False, portable=False, 21 def __init__(self, app, out_dir, force=False, portable=False,
21 no_assets=False, num_workers=4): 22 no_assets=False, num_workers=4):
22 assert app and out_dir 23 assert app and out_dir
23 self.app = app 24 self.app = app
24 self.out_dir = out_dir 25 self.out_dir = out_dir
25 self.force = force 26 self.force = force
26 self.portable = portable 27 self.portable = portable
58 hashlib.md5(self.out_dir.encode('utf8')).hexdigest() + 59 hashlib.md5(self.out_dir.encode('utf8')).hexdigest() +
59 '.record') 60 '.record')
60 if not self.force and record_cache.has(record_name): 61 if not self.force and record_cache.has(record_name):
61 t = time.clock() 62 t = time.clock()
62 record.loadPrevious(record_cache.getCachePath(record_name)) 63 record.loadPrevious(record_cache.getCachePath(record_name))
63 logger.debug(format_timed(t, 'loaded previous bake record', 64 logger.debug(format_timed(
64 colored=False)); 65 t, 'loaded previous bake record',
66 colored=False))
65 67
66 # Figure out if we need to clean the cache because important things 68 # Figure out if we need to clean the cache because important things
67 # have changed. 69 # have changed.
68 self._handleCacheValidity(record) 70 self._handleCacheValidity(record)
69 71
136 logger.debug("Cleaning baker cache: %s" % cache_dir) 138 logger.debug("Cleaning baker cache: %s" % cache_dir)
137 shutil.rmtree(cache_dir) 139 shutil.rmtree(cache_dir)
138 self.force = True 140 self.force = True
139 record.incremental_count = 0 141 record.incremental_count = 0
140 record.clearPrevious() 142 record.clearPrevious()
141 logger.info(format_timed(start_time, 143 logger.info(format_timed(
142 "cleaned cache (reason: %s)" % reason)) 144 start_time,
145 "cleaned cache (reason: %s)" % reason))
143 else: 146 else:
144 record.incremental_count += 1 147 record.incremental_count += 1
145 logger.debug(format_timed(start_time, "cache is assumed valid", 148 logger.debug(format_timed(
146 colored=False)) 149 start_time, "cache is assumed valid",
150 colored=False))
147 151
148 def _bakeRealm(self, record, realm, srclist): 152 def _bakeRealm(self, record, realm, srclist):
149 # Gather all page factories from the sources and queue them 153 # Gather all page factories from the sources and queue them
150 # for the workers to pick up. Just skip taxonomy pages for now. 154 # for the workers to pick up. Just skip taxonomy pages for now.
151 logger.debug("Baking realm %s" % REALM_NAMES[realm]) 155 logger.debug("Baking realm %s" % REALM_NAMES[realm])
153 157
154 for source in srclist: 158 for source in srclist:
155 factories = source.getPageFactories() 159 factories = source.getPageFactories()
156 for fac in factories: 160 for fac in factories:
157 if fac.path in self.taxonomy_pages: 161 if fac.path in self.taxonomy_pages:
158 logger.debug("Skipping taxonomy page: %s:%s" % 162 logger.debug(
163 "Skipping taxonomy page: %s:%s" %
159 (source.name, fac.ref_spec)) 164 (source.name, fac.ref_spec))
160 continue 165 continue
161 166
162 entry = BakeRecordPageEntry(fac) 167 entry = BakeRecordPageEntry(fac)
163 record.addEntry(entry) 168 record.addEntry(entry)
164 169
165 route = self.app.getRoute(source.name, fac.metadata) 170 route = self.app.getRoute(source.name, fac.metadata)
166 if route is None: 171 if route is None:
167 entry.errors.append("Can't get route for page: %s" % 172 entry.errors.append(
168 fac.ref_spec) 173 "Can't get route for page: %s" % fac.ref_spec)
169 logger.error(entry.errors[-1]) 174 logger.error(entry.errors[-1])
170 continue 175 continue
171 176
172 queue.addJob(BakeWorkerJob(fac, route, entry)) 177 queue.addJob(BakeWorkerJob(fac, route, entry))
173 178
236 for source_name, source_taxonomies in buckets.items(): 241 for source_name, source_taxonomies in buckets.items():
237 for tax_name, terms in source_taxonomies.items(): 242 for tax_name, terms in source_taxonomies.items():
238 if len(terms) == 0: 243 if len(terms) == 0:
239 continue 244 continue
240 245
241 logger.debug("Baking '%s' for source '%s': %s" % 246 logger.debug(
247 "Baking '%s' for source '%s': %s" %
242 (tax_name, source_name, terms)) 248 (tax_name, source_name, terms))
243 tax = self.app.getTaxonomy(tax_name) 249 tax = self.app.getTaxonomy(tax_name)
244 route = self.app.getTaxonomyRoute(tax_name, source_name) 250 route = self.app.getTaxonomyRoute(tax_name, source_name)
245 tax_page_ref = tax.getPageRef(source_name) 251 tax_page_ref = tax.getPageRef(source_name)
246 if not tax_page_ref.exists: 252 if not tax_page_ref.exists:
247 logger.debug("No taxonomy page found at '%s', skipping." % 253 logger.debug(
254 "No taxonomy page found at '%s', skipping." %
248 tax.page_ref) 255 tax.page_ref)
249 continue 256 continue
250 257
251 tax_page_source = tax_page_ref.source 258 tax_page_source = tax_page_ref.source
252 tax_page_rel_path = tax_page_ref.rel_path 259 tax_page_rel_path = tax_page_ref.rel_path
253 logger.debug("Using taxonomy page: %s:%s" % 260 logger.debug(
261 "Using taxonomy page: %s:%s" %
254 (tax_page_source.name, tax_page_rel_path)) 262 (tax_page_source.name, tax_page_rel_path))
255 263
256 for term in terms: 264 for term in terms:
257 fac = PageFactory(tax_page_source, tax_page_rel_path, 265 fac = PageFactory(
266 tax_page_source, tax_page_rel_path,
258 {tax.term_name: term}) 267 {tax.term_name: term})
259 logger.debug("Queuing: %s [%s, %s]" % 268 logger.debug(
269 "Queuing: %s [%s, %s]" %
260 (fac.ref_spec, tax_name, term)) 270 (fac.ref_spec, tax_name, term))
261 entry = BakeRecordPageEntry(fac, tax_name, term) 271 entry = BakeRecordPageEntry(fac, tax_name, term)
262 record.addEntry(entry) 272 record.addEntry(entry)
263 queue.addJob( 273 queue.addJob(
264 BakeWorkerJob(fac, route, entry, tax_name, term)) 274 BakeWorkerJob(fac, route, entry, tax_name, term))
279 def _createWorkerPool(self, record, pool_size=4): 289 def _createWorkerPool(self, record, pool_size=4):
280 pool = [] 290 pool = []
281 queue = BakeScheduler(record) 291 queue = BakeScheduler(record)
282 abort = threading.Event() 292 abort = threading.Event()
283 for i in range(pool_size): 293 for i in range(pool_size):
284 ctx = BakeWorkerContext(self.app, self.out_dir, self.force, 294 ctx = BakeWorkerContext(
295 self.app, self.out_dir, self.force,
285 record, queue, abort) 296 record, queue, abort)
286 worker = BakeWorker(i, ctx) 297 worker = BakeWorker(i, ctx)
287 pool.append(worker) 298 pool.append(worker)
288 return pool, queue, abort 299 return pool, queue, abort
289 300
305 raise BakingError("Baking was aborted due to errors.") 316 raise BakingError("Baking was aborted due to errors.")
306 317
307 318
308 class BakeWorkerContext(object): 319 class BakeWorkerContext(object):
309 def __init__(self, app, out_dir, force, record, work_queue, 320 def __init__(self, app, out_dir, force, record, work_queue,
310 abort_event): 321 abort_event):
311 self.app = app 322 self.app = app
312 self.out_dir = out_dir 323 self.out_dir = out_dir
313 self.force = force 324 self.force = force
314 self.record = record 325 self.record = record
315 self.work_queue = work_queue 326 self.work_queue = work_queue
316 self.abort_event = abort_event 327 self.abort_event = abort_event
317 328
318 329
319 class BakeWorkerJob(object): 330 class BakeWorkerJob(object):
320 def __init__(self, factory, route, record_entry, 331 def __init__(self, factory, route, record_entry,
321 taxonomy_name=None, taxonomy_term=None): 332 taxonomy_name=None, taxonomy_term=None):
322 self.factory = factory 333 self.factory = factory
323 self.route = route 334 self.route = route
324 self.record_entry = record_entry 335 self.record_entry = record_entry
325 self.taxonomy_name = taxonomy_name 336 self.taxonomy_name = taxonomy_name
326 self.taxonomy_term = taxonomy_term 337 self.taxonomy_term = taxonomy_term
334 def __init__(self, wid, ctx): 345 def __init__(self, wid, ctx):
335 super(BakeWorker, self).__init__(name=('worker%d' % wid)) 346 super(BakeWorker, self).__init__(name=('worker%d' % wid))
336 self.wid = wid 347 self.wid = wid
337 self.ctx = ctx 348 self.ctx = ctx
338 self.abort_exception = None 349 self.abort_exception = None
339 self._page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force, 350 self._page_baker = PageBaker(
351 ctx.app, ctx.out_dir, ctx.force,
340 ctx.record) 352 ctx.record)
341 353
342 def run(self): 354 def run(self):
343 while(not self.ctx.abort_event.is_set()): 355 while(not self.ctx.abort_event.is_set()):
344 try: 356 try:
345 job = self.ctx.work_queue.getNextJob(wait_timeout=1) 357 job = self.ctx.work_queue.getNextJob(wait_timeout=1)
346 if job is None: 358 if job is None:
347 logger.debug("[%d] No more work... shutting down." % 359 logger.debug(
360 "[%d] No more work... shutting down." %
348 self.wid) 361 self.wid)
349 break 362 break
350
351 self._unsafeRun(job) 363 self._unsafeRun(job)
352 logger.debug("[%d] Done with page." % self.wid) 364 logger.debug("[%d] Done with page." % self.wid)
353 self.ctx.work_queue.onJobFinished(job) 365 self.ctx.work_queue.onJobFinished(job)
354 except Exception as ex: 366 except Exception as ex:
355 self.ctx.abort_event.set() 367 self.ctx.abort_event.set()
362 def _unsafeRun(self, job): 374 def _unsafeRun(self, job):
363 start_time = time.clock() 375 start_time = time.clock()
364 376
365 entry = job.record_entry 377 entry = job.record_entry
366 try: 378 try:
367 self._page_baker.bake(job.factory, job.route, entry, 379 self._page_baker.bake(
380 job.factory, job.route, entry,
368 taxonomy_name=job.taxonomy_name, 381 taxonomy_name=job.taxonomy_name,
369 taxonomy_term=job.taxonomy_term) 382 taxonomy_term=job.taxonomy_term)
370 except BakingError as ex: 383 except BakingError as ex:
371 logger.debug("Got baking error. Adding it to the record.") 384 logger.debug("Got baking error. Adding it to the record.")
372 while ex: 385 while ex:
377 uri = entry.out_uris[0] 390 uri = entry.out_uris[0]
378 friendly_uri = uri if uri != '' else '[main page]' 391 friendly_uri = uri if uri != '' else '[main page]'
379 friendly_count = '' 392 friendly_count = ''
380 if entry.num_subs > 1: 393 if entry.num_subs > 1:
381 friendly_count = ' (%d pages)' % entry.num_subs 394 friendly_count = ' (%d pages)' % entry.num_subs
382 logger.info(format_timed(start_time, '[%d] %s%s' % 395 logger.info(format_timed(
396 start_time, '[%d] %s%s' %
383 (self.wid, friendly_uri, friendly_count))) 397 (self.wid, friendly_uri, friendly_count)))
384 elif entry.errors: 398 elif entry.errors:
385 for e in entry.errors: 399 for e in entry.errors:
386 logger.error(e) 400 logger.error(e)
387 401