Mercurial > piecrust2
diff piecrust/pipelines/page.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | |
children | 08e02c2a2a1a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/page.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,373 @@ +import hashlib +from piecrust.pipelines.base import ContentPipeline + + +class PagePipeline(ContentPipeline): + PIPELINE_NAME = 'page' + PIPELINE_PASSES = 3 + + def initialize(self, ctx): + pass + + def run(self, content_item, ctx): + raise NotImplementedError() + + def shutdown(self, ctx): + pass + + def collapseRecords(self, record_history): + pass + + def getDeletions(self, record_history): + for prev, cur in record_history.diffs(): + if prev and not cur: + for sub in prev.subs: + yield (sub.out_path, 'previous source file was removed') + elif prev and cur: + prev_out_paths = [o.out_path for o in prev.subs] + cur_out_paths = [o.out_path for o in cur.subs] + diff = set(prev_out_paths) - set(cur_out_paths) + for p in diff: + yield (p, 'source file changed outputs') + + +JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE = range(0, 3) + + +def _get_transition_key(path, extra_key=None): + key = path + if extra_key: + key += '+%s' % extra_key + return hashlib.md5(key.encode('utf8')).hexdigest() + + +# def getOverrideEntry(self, path, uri): +# for pair in self.transitions.values(): +# cur = pair[1] +# if cur and cur.path != path: +# for o in cur.subs: +# if o.out_uri == uri: +# return cur +# return None + + + +# # Create the job handlers. +# job_handlers = { +# JOB_LOAD: LoadJobHandler(self.ctx), +# JOB_RENDER_FIRST: RenderFirstSubJobHandler(self.ctx), +# JOB_BAKE: BakeJobHandler(self.ctx)} +# for jt, jh in job_handlers.items(): +# app.env.registerTimer(type(jh).__name__) +# self.job_handlers = job_handlers +# +# def process(self, job): +# handler = self.job_handlers[job['type']] +# with self.ctx.app.env.timerScope(type(handler).__name__): +# return handler.handleJob(job['job']) + +# def _loadRealmPages(self, record_history, pool, factories): +# def _handler(res): +# # Create the record entry for this page. +# # This will also update the `dirty_source_names` for the record +# # as we add page files whose last modification times are later +# # than the last bake. +# record_entry = BakeRecordEntry(res['source_name'], res['path']) +# record_entry.config = res['config'] +# record_entry.timestamp = res['timestamp'] +# if res['errors']: +# record_entry.errors += res['errors'] +# record_history.current.success = False +# self._logErrors(res['path'], res['errors']) +# record_history.addEntry(record_entry) +# +# logger.debug("Loading %d realm pages..." % len(factories)) +# with format_timed_scope(logger, +# "loaded %d pages" % len(factories), +# level=logging.DEBUG, colored=False, +# timer_env=self.app.env, +# timer_category='LoadJob'): +# jobs = [] +# for fac in factories: +# job = { +# 'type': JOB_LOAD, +# 'job': save_factory(fac)} +# jobs.append(job) +# ar = pool.queueJobs(jobs, handler=_handler) +# ar.wait() +# +# def _renderRealmPages(self, record_history, pool, factories): +# def _handler(res): +# entry = record_history.getCurrentEntry(res['path']) +# if res['errors']: +# entry.errors += res['errors'] +# record_history.current.success = False +# self._logErrors(res['path'], res['errors']) +# +# logger.debug("Rendering %d realm pages..." % len(factories)) +# with format_timed_scope(logger, +# "prepared %d pages" % len(factories), +# level=logging.DEBUG, colored=False, +# timer_env=self.app.env, +# timer_category='RenderFirstSubJob'): +# jobs = [] +# for fac in factories: +# record_entry = record_history.getCurrentEntry(fac.path) +# if record_entry.errors: +# logger.debug("Ignoring %s because it had previous " +# "errors." % fac.ref_spec) +# continue +# +# # Make sure the source and the route exist for this page, +# # otherwise we add errors to the record entry and we'll skip +# # this page for the rest of the bake. +# source = self.app.getSource(fac.source.name) +# if source is None: +# record_entry.errors.append( +# "Can't get source for page: %s" % fac.ref_spec) +# logger.error(record_entry.errors[-1]) +# continue +# +# route = self.app.getSourceRoute(fac.source.name, fac.metadata) +# if route is None: +# record_entry.errors.append( +# "Can't get route for page: %s" % fac.ref_spec) +# logger.error(record_entry.errors[-1]) +# continue +# +# # All good, queue the job. +# route_index = self.app.routes.index(route) +# job = { +# 'type': JOB_RENDER_FIRST, +# 'job': { +# 'factory_info': save_factory(fac), +# 'route_index': route_index +# } +# } +# jobs.append(job) +# +# ar = pool.queueJobs(jobs, handler=_handler) +# ar.wait() +# +# def _bakeRealmPages(self, record_history, pool, realm, factories): +# def _handler(res): +# entry = record_history.getCurrentEntry(res['path']) +# entry.subs = res['sub_entries'] +# if res['errors']: +# entry.errors += res['errors'] +# self._logErrors(res['path'], res['errors']) +# if entry.has_any_error: +# record_history.current.success = False +# if entry.subs and entry.was_any_sub_baked: +# record_history.current.baked_count[realm] += 1 +# record_history.current.total_baked_count[realm] += len(entry.subs) +# +# logger.debug("Baking %d realm pages..." % len(factories)) +# with format_timed_scope(logger, +# "baked %d pages" % len(factories), +# level=logging.DEBUG, colored=False, +# timer_env=self.app.env, +# timer_category='BakeJob'): +# jobs = [] +# for fac in factories: +# job = self._makeBakeJob(record_history, fac) +# if job is not None: +# jobs.append(job) +# +# ar = pool.queueJobs(jobs, handler=_handler) +# ar.wait() +# + + +# def _makeBakeJob(self, record_history, fac): +# # Get the previous (if any) and current entry for this page. +# pair = record_history.getPreviousAndCurrentEntries(fac.path) +# assert pair is not None +# prev_entry, cur_entry = pair +# assert cur_entry is not None +# +# # Ignore if there were errors in the previous passes. +# if cur_entry.errors: +# logger.debug("Ignoring %s because it had previous " +# "errors." % fac.ref_spec) +# return None +# +# # Build the route metadata and find the appropriate route. +# page = fac.buildPage() +# route_metadata = create_route_metadata(page) +# route = self.app.getSourceRoute(fac.source.name, route_metadata) +# assert route is not None +# +# # Figure out if this page is overriden by another previously +# # baked page. This happens for example when the user has +# # made a page that has the same page/URL as a theme page. +# uri = route.getUri(route_metadata) +# override_entry = record_history.getOverrideEntry(page.path, uri) +# if override_entry is not None: +# override_source = self.app.getSource( +# override_entry.source_name) +# if override_source.realm == fac.source.realm: +# cur_entry.errors.append( +# "Page '%s' maps to URL '%s' but is overriden " +# "by page '%s'." % +# (fac.ref_spec, uri, override_entry.path)) +# logger.error(cur_entry.errors[-1]) +# cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN +# return None +# +# route_index = self.app.routes.index(route) +# job = { +# 'type': JOB_BAKE, +# 'job': { +# 'factory_info': save_factory(fac), +# 'generator_name': None, +# 'generator_record_key': None, +# 'route_index': route_index, +# 'route_metadata': route_metadata, +# 'dirty_source_names': record_history.dirty_source_names +# } +# } +# return job +# +# def _handleDeletetions(self, record_history): +# logger.debug("Handling deletions...") +# for path, reason in record_history.getDeletions(): +# logger.debug("Removing '%s': %s" % (path, reason)) +# record_history.current.deleted.append(path) +# try: +# os.remove(path) +# logger.info('[delete] %s' % path) +# except OSError: +# # Not a big deal if that file had already been removed +# # by the user. +# pass +# + + + +#def save_factory(fac): +# return { +# 'source_name': fac.source.name, +# 'rel_path': fac.rel_path, +# 'metadata': fac.metadata} +# +# +#def load_factory(app, info): +# source = app.getSource(info['source_name']) +# return PageFactory(source, info['rel_path'], info['metadata']) +# +# +#class LoadJobHandler(JobHandler): +# def handleJob(self, job): +# # Just make sure the page has been cached. +# fac = load_factory(self.app, job) +# logger.debug("Loading page: %s" % fac.ref_spec) +# self.app.env.addManifestEntry('LoadJobs', fac.ref_spec) +# result = { +# 'source_name': fac.source.name, +# 'path': fac.path, +# 'config': None, +# 'timestamp': None, +# 'errors': None} +# try: +# page = fac.buildPage() +# page._load() +# result['config'] = page.config.getAll() +# result['timestamp'] = page.datetime.timestamp() +# except Exception as ex: +# logger.debug("Got loading error. Sending it to master.") +# result['errors'] = _get_errors(ex) +# if self.ctx.app.debug: +# logger.exception(ex) +# return result +# +# +#class RenderFirstSubJobHandler(JobHandler): +# def handleJob(self, job): +# # Render the segments for the first sub-page of this page. +# fac = load_factory(self.app, job['factory_info']) +# self.app.env.addManifestEntry('RenderJobs', fac.ref_spec) +# +# route_index = job['route_index'] +# route = self.app.routes[route_index] +# +# page = fac.buildPage() +# qp = QualifiedPage(page, route, route_metadata) +# ctx = RenderingContext(qp) +# self.app.env.abort_source_use = True +# +# result = { +# 'path': fac.path, +# 'aborted': False, +# 'errors': None} +# logger.debug("Preparing page: %s" % fac.ref_spec) +# try: +# render_page_segments(ctx) +# except AbortedSourceUseError: +# logger.debug("Page %s was aborted." % fac.ref_spec) +# self.app.env.stepCounter("SourceUseAbortions") +# result['aborted'] = True +# except Exception as ex: +# logger.debug("Got rendering error. Sending it to master.") +# result['errors'] = _get_errors(ex) +# if self.ctx.app.debug: +# logger.exception(ex) +# finally: +# self.app.env.abort_source_use = False +# return result +# +# +#class BakeJobHandler(JobHandler): +# def __init__(self, ctx): +# super(BakeJobHandler, self).__init__(ctx) +# self.page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force) +# +# def shutdown(self): +# self.page_baker.shutdown() +# +# def handleJob(self, job): +# # Actually bake the page and all its sub-pages to the output folder. +# fac = load_factory(self.app, job['factory_info']) +# self.app.env.addManifestEntry('BakeJobs', fac.ref_spec) +# +# route_index = job['route_index'] +# route_metadata = job['route_metadata'] +# route = self.app.routes[route_index] +# +# gen_name = job['generator_name'] +# gen_key = job['generator_record_key'] +# dirty_source_names = job['dirty_source_names'] +# +# page = fac.buildPage() +# qp = QualifiedPage(page, route, route_metadata) +# +# result = { +# 'path': fac.path, +# 'generator_name': gen_name, +# 'generator_record_key': gen_key, +# 'sub_entries': None, +# 'errors': None} +# +# if job.get('needs_config', False): +# result['config'] = page.config.getAll() +# +# previous_entry = None +# if self.ctx.previous_record_index is not None: +# key = _get_transition_key(fac.path, gen_key) +# previous_entry = self.ctx.previous_record_index.get(key) +# +# logger.debug("Baking page: %s" % fac.ref_spec) +# logger.debug("With route metadata: %s" % route_metadata) +# try: +# sub_entries = self.page_baker.bake( +# qp, previous_entry, dirty_source_names, gen_name) +# result['sub_entries'] = sub_entries +# +# except Exception as ex: +# logger.debug("Got baking error. Sending it to master.") +# result['errors'] = _get_errors(ex) +# if self.ctx.app.debug: +# logger.exception(ex) +# +# return result +#