Mercurial > piecrust2
diff piecrust/baking/baker.py @ 150:91dcbb5fe1e8
Split baking code in smaller files.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 30 Nov 2014 21:46:42 -0800 |
parents | ab6e7e0e9d44 |
children | fd146f54bdaa |
line wrap: on
line diff
--- a/piecrust/baking/baker.py Sun Nov 30 21:45:55 2014 -0800 +++ b/piecrust/baking/baker.py Sun Nov 30 21:46:42 2014 -0800 @@ -1,19 +1,14 @@ import time import os.path -import codecs import shutil import hashlib import logging import threading -import urllib.request, urllib.error, urllib.parse from piecrust.baking.records import (TransitionalBakeRecord, - BakeRecordPageEntry, - FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED) + BakeRecordPageEntry) +from piecrust.baking.scheduler import BakeScheduler +from piecrust.baking.single import (BakingError, PageBaker) from piecrust.chefutil import format_timed, log_friendly_exception -from piecrust.data.filters import (PaginationFilter, HasFilterClause, - IsFilterClause, AndBooleanClause) -from piecrust.rendering import (PageRenderingContext, render_page, - PASS_FORMATTING, PASS_RENDERING) from piecrust.sources.base import (PageFactory, REALM_NAMES, REALM_USER, REALM_THEME) @@ -21,250 +16,6 @@ logger = logging.getLogger(__name__) -class BakingError(Exception): - pass - - -class PageBaker(object): - def __init__(self, app, out_dir, force=False, record=None, - copy_assets=True): - self.app = app - self.out_dir = out_dir - self.force = force - self.record = record - self.copy_assets = copy_assets - self.site_root = app.config.get('site/root') - self.pretty_urls = app.config.get('site/pretty_urls') - self.pagination_suffix = app.config.get('site/pagination_suffix') - - def getOutputUri(self, uri, num): - suffix = self.pagination_suffix.replace('%num%', str(num)) - if self.pretty_urls: - # Output will be: - # - `uri/name` - # - `uri/name/2` - # - `uri/name.ext` - # - `uri/name.ext/2` - if num <= 1: - return uri - return uri + suffix - else: - # Output will be: - # - `uri/name.html` - # - `uri/name/2.html` - # - `uri/name.ext` - # - `uri/name/2.ext` - if uri == '/': - if num <= 1: - return '/' - return '/' + suffix.lstrip('/') - else: - if num <= 1: - return uri - #TODO: watch out for tags with dots in them. - base_uri, ext = os.path.splitext(uri) - return base_uri + suffix + ext - - def getOutputPath(self, uri): - bake_path = [self.out_dir] - decoded_uri = urllib.parse.unquote(uri.lstrip('/')) - if self.pretty_urls: - bake_path.append(decoded_uri) - bake_path.append('index.html') - else: - name, ext = os.path.splitext(decoded_uri) - if decoded_uri == '': - bake_path.append('index.html') - elif ext: - bake_path.append(decoded_uri) - else: - bake_path.append(decoded_uri + '.html') - - return os.path.normpath(os.path.join(*bake_path)) - - def bake(self, factory, route, record_entry, - taxonomy_name=None, taxonomy_term=None): - custom_data = None - pagination_filter = None - route_metadata = dict(factory.metadata) - if taxonomy_name and taxonomy_term: - # Must bake a taxonomy listing page... we'll have to add a - # pagination filter for only get matching posts, and the output - # URL will be a bit different. - tax = self.app.getTaxonomy(taxonomy_name) - pagination_filter = PaginationFilter() - if tax.is_multiple: - if isinstance(taxonomy_term, tuple): - abc = AndBooleanClause() - for t in taxonomy_term: - abc.addClause(HasFilterClause(taxonomy_name, t)) - pagination_filter.addClause(abc) - slugified_term = '/'.join(taxonomy_term) - else: - pagination_filter.addClause(HasFilterClause(taxonomy_name, - taxonomy_term)) - slugified_term = taxonomy_term - else: - pagination_filter.addClause(IsFilterClause(taxonomy_name, - taxonomy_term)) - slugified_term = taxonomy_term - custom_data = {tax.term_name: taxonomy_term} - route_metadata.update({tax.term_name: slugified_term}) - - # Generate the URL using the route. - page = factory.buildPage() - uri = route.getUri(route_metadata, page) - - override = self.record.getOverrideEntry(factory, uri) - if override is not None: - override_source = self.app.getSource(override.source_name) - if override_source.realm == factory.source.realm: - raise BakingError( - "Page '%s' maps to URL '%s' but is overriden by page" - "'%s:%s'." % (factory.ref_spec, uri, - override.source_name, override.rel_path)) - logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" % - (factory.ref_spec, uri, override.source_name, - override.rel_path)) - record_entry.flags |= FLAG_OVERRIDEN - return - - cur_sub = 1 - has_more_subs = True - force_this = self.force - invalidate_formatting = False - record_entry.config = page.config.get().copy() - prev_record_entry = self.record.getPreviousEntry( - factory.source.name, factory.rel_path, - taxonomy_name, taxonomy_term) - - logger.debug("Baking '%s'..." % uri) - - # If the current page is known to use pages from other sources, - # see if any of those got baked, or are going to be baked for some - # reason. If so, we need to bake this one too. - # (this happens for instance with the main page of a blog). - if prev_record_entry and prev_record_entry.was_baked_successfully: - invalidated_render_passes = set() - used_src_names = list(prev_record_entry.used_source_names) - for src_name, rdr_pass in used_src_names: - entries = self.record.getCurrentEntries(src_name) - for e in entries: - if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED: - invalidated_render_passes.add(rdr_pass) - break - if len(invalidated_render_passes) > 0: - logger.debug("'%s' is known to use sources %s, at least one " - "of which got baked. Will force bake this page. " - % (uri, used_src_names)) - force_this = True - if PASS_FORMATTING in invalidated_render_passes: - logger.debug("Will invalidate cached formatting for '%s' " - "since sources were using during that pass." - % uri) - invalidate_formatting = True - - while has_more_subs: - sub_uri = self.getOutputUri(uri, cur_sub) - out_path = self.getOutputPath(sub_uri) - - # Check for up-to-date outputs. - do_bake = True - if not force_this: - try: - in_path_time = record_entry.path_mtime - out_path_time = os.path.getmtime(out_path) - if out_path_time > in_path_time: - do_bake = False - except OSError: - # File doesn't exist, we'll need to bake. - pass - - # If this page didn't bake because it's already up-to-date. - # Keep trying for as many subs as we know this page has. - if not do_bake: - if (prev_record_entry is not None and - prev_record_entry.num_subs < cur_sub): - logger.debug("") - cur_sub += 1 - has_more_subs = True - logger.debug(" %s is up to date, skipping to next " - "sub-page." % out_path) - continue - - # We don't know how many subs to expect... just skip. - logger.debug(" %s is up to date, skipping bake." % out_path) - break - - # All good, proceed. - try: - if invalidate_formatting: - cache_key = '%s:%s' % (uri, cur_sub) - self.app.env.rendered_segments_repository.invalidate( - cache_key) - - logger.debug(" p%d -> %s" % (cur_sub, out_path)) - ctx, rp = self._bakeSingle(page, sub_uri, cur_sub, out_path, - pagination_filter, custom_data) - except Exception as ex: - if self.app.debug: - logger.exception(ex) - page_rel_path = os.path.relpath(page.path, self.app.root_dir) - raise BakingError("%s: error baking '%s'." % - (page_rel_path, uri)) from ex - - # Copy page assets. - if (cur_sub == 1 and self.copy_assets and - ctx.used_assets is not None): - if self.pretty_urls: - out_assets_dir = os.path.dirname(out_path) - else: - out_assets_dir, out_name = os.path.split(out_path) - if sub_uri != self.site_root: - out_name_noext, _ = os.path.splitext(out_name) - out_assets_dir += out_name_noext - - logger.debug("Copying page assets to: %s" % out_assets_dir) - if not os.path.isdir(out_assets_dir): - os.makedirs(out_assets_dir, 0o755) - for ap in ctx.used_assets: - dest_ap = os.path.join(out_assets_dir, os.path.basename(ap)) - logger.debug(" %s -> %s" % (ap, dest_ap)) - shutil.copy(ap, dest_ap) - - # Record what we did and figure out if we have more work. - record_entry.out_uris.append(sub_uri) - record_entry.out_paths.append(out_path) - record_entry.used_source_names |= ctx.used_source_names - record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms - - has_more_subs = False - if (ctx.used_pagination is not None and - ctx.used_pagination.has_more): - cur_sub += 1 - has_more_subs = True - - def _bakeSingle(self, page, sub_uri, num, out_path, - pagination_filter=None, custom_data=None): - ctx = PageRenderingContext(page, sub_uri) - ctx.page_num = num - if pagination_filter: - ctx.pagination_filter = pagination_filter - if custom_data: - ctx.custom_data = custom_data - - rp = render_page(ctx) - - out_dir = os.path.dirname(out_path) - if not os.path.isdir(out_dir): - os.makedirs(out_dir, 0o755) - - with codecs.open(out_path, 'w', 'utf8') as fp: - fp.write(rp.content) - - return ctx, rp - - class Baker(object): def __init__(self, app, out_dir, force=False, portable=False, no_assets=False, num_workers=4): @@ -554,97 +305,6 @@ raise BakingError("Baking was aborted due to errors.") -class BakeScheduler(object): - _EMPTY = object() - _WAIT = object() - - def __init__(self, record, jobs=None): - self.record = record - self.jobs = list(jobs) if jobs is not None else [] - self._active_jobs = [] - self._lock = threading.Lock() - self._added_event = threading.Event() - self._done_event = threading.Event() - - def addJob(self, job): - logger.debug("Queuing job '%s:%s'." % ( - job.factory.source.name, job.factory.rel_path)) - with self._lock: - self.jobs.append(job) - self._added_event.set() - - def onJobFinished(self, job): - logger.debug("Removing job '%s:%s'." % ( - job.factory.source.name, job.factory.rel_path)) - with self._lock: - self._active_jobs.remove(job) - self._done_event.set() - - def getNextJob(self, wait_timeout=None, empty_timeout=None): - self._added_event.clear() - self._done_event.clear() - job = self._doGetNextJob() - while job in (self._EMPTY, self._WAIT): - if job == self._EMPTY: - if empty_timeout is None: - return None - logger.debug("Waiting for a new job to be added...") - res = self._added_event.wait(empty_timeout) - elif job == self._WAIT: - if wait_timeout is None: - return None - logger.debug("Waiting for a job to be finished...") - res = self._done_event.wait(wait_timeout) - if not res: - logger.debug("Timed-out. No job found.") - return None - job = self._doGetNextJob() - return job - - def _doGetNextJob(self): - with self._lock: - if len(self.jobs) == 0: - return self._EMPTY - - job = self.jobs.pop(0) - first_job = job - while True: - ready, wait_on_src = self._isJobReady(job) - if ready: - break - - logger.debug("Job '%s:%s' isn't ready yet: waiting on pages " - "from source '%s' to finish baking." % - (job.factory.source.name, - job.factory.rel_path, wait_on_src)) - self.jobs.append(job) - job = self.jobs.pop(0) - if job == first_job: - # None of the jobs are ready... we need to wait. - self.jobs.append(job) - return self._WAIT - - logger.debug("Job '%s:%s' is ready to go, moving to active " - "queue." % (job.factory.source.name, job.factory.rel_path)) - self._active_jobs.append(job) - return job - - def _isJobReady(self, job): - e = self.record.getPreviousEntry(job.factory.source.name, - job.factory.rel_path) - if not e: - return (True, None) - for sn, rp in e.used_source_names: - if sn == job.factory.source.name: - continue - if any(filter(lambda j: j.factory.source.name == sn, self.jobs)): - return (False, sn) - if any(filter(lambda j: j.factory.source.name == sn, - self._active_jobs)): - return (False, sn) - return (True, None) - - class BakeWorkerContext(object): def __init__(self, app, out_dir, force, record, work_queue, abort_event):