Mercurial > piecrust2
changeset 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
line wrap: on
line diff
--- a/piecrust/__init__.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/__init__.py Wed May 17 00:11:48 2017 -0700 @@ -19,13 +19,13 @@ PIECRUST_URL = 'https://bolt80.com/piecrust/' -CACHE_VERSION = 29 +CACHE_VERSION = 30 try: from piecrust.__version__ import APP_VERSION except ImportError: APP_VERSION = 'unknown' -import os.path +import os.path # NOQA RESOURCES_DIR = os.path.join(os.path.dirname(__file__), 'resources')
--- a/piecrust/admin/views/create.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/admin/views/create.py Wed May 17 00:11:48 2017 -0700 @@ -5,7 +5,6 @@ g, request, abort, render_template, url_for, redirect, flash) from flask.ext.login import login_required from piecrust.sources.interfaces import IInteractiveSource -from piecrust.sources.base import MODE_CREATING from piecrust.routing import create_route_metadata from ..blueprint import foodtruck_bp from ..views import with_menu_context
--- a/piecrust/app.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/app.py Wed May 17 00:11:48 2017 -0700 @@ -1,21 +1,20 @@ import time import os.path -import hashlib import logging import urllib.parse from werkzeug.utils import cached_property from piecrust import ( - RESOURCES_DIR, - CACHE_DIR, TEMPLATES_DIR, ASSETS_DIR, - THEME_DIR, PLUGINS_DIR, - CONFIG_PATH, THEME_CONFIG_PATH) + RESOURCES_DIR, + CACHE_DIR, TEMPLATES_DIR, ASSETS_DIR, + THEME_DIR, PLUGINS_DIR, + CONFIG_PATH, THEME_CONFIG_PATH) from piecrust.appconfig import PieCrustConfiguration from piecrust.cache import ExtensibleCache, NullExtensibleCache -from piecrust.configuration import ConfigurationError, merge_dicts +from piecrust.configuration import ConfigurationError from piecrust.environment import StandardEnvironment +from piecrust.page import Page from piecrust.plugins.base import PluginLoader from piecrust.routing import Route -from piecrust.sources.base import REALM_THEME logger = logging.getLogger(__name__) @@ -39,14 +38,14 @@ if self.env is None: self.env = StandardEnvironment() self.env.initialize(self) - self.env.registerTimer('SiteConfigLoad') - self.env.registerTimer('PageLoad') - self.env.registerTimer("PageDataBuild") - self.env.registerTimer("BuildRenderData") - self.env.registerTimer("PageRender") - self.env.registerTimer("PageRenderSegments") - self.env.registerTimer("PageRenderLayout") - self.env.registerTimer("PageSerialize") + self.env.stats.registerTimer('SiteConfigLoad') + self.env.stats.registerTimer('PageLoad') + self.env.stats.registerTimer("PageDataBuild") + self.env.stats.registerTimer("BuildRenderData") + self.env.stats.registerTimer("PageRender") + self.env.stats.registerTimer("PageRenderSegments") + self.env.stats.registerTimer("PageRenderLayout") + self.env.stats.registerTimer("PageSerialize") @cached_property def config(self): @@ -64,25 +63,26 @@ config_cache = self.cache.getCache('app') config = PieCrustConfiguration( - path=path, theme_path=theme_path, - cache=config_cache, theme_config=self.theme_site) + path=path, theme_path=theme_path, + cache=config_cache, theme_config=self.theme_site) local_path = os.path.join( - self.root_dir, 'configs', 'local.yml') + self.root_dir, 'configs', 'local.yml') config.addVariant(local_path, raise_if_not_found=False) if self.theme_site: variant_path = os.path.join( - self.root_dir, 'configs', 'theme_preview.yml') + self.root_dir, 'configs', 'theme_preview.yml') config.addVariant(variant_path, raise_if_not_found=False) - self.env.stepTimer('SiteConfigLoad', time.perf_counter() - start_time) + self.env.stats.stepTimer('SiteConfigLoad', + time.perf_counter() - start_time) return config @cached_property def assets_dirs(self): assets_dirs = self._get_configurable_dirs( - ASSETS_DIR, 'site/assets_dirs') + ASSETS_DIR, 'site/assets_dirs') # Also add the theme directory, if any. if self.theme_dir: @@ -95,7 +95,7 @@ @cached_property def templates_dirs(self): templates_dirs = self._get_configurable_dirs( - TEMPLATES_DIR, 'site/templates_dirs') + TEMPLATES_DIR, 'site/templates_dirs') # Also, add the theme directory, if any. if self.theme_dir: @@ -148,6 +148,7 @@ s['type']) src = cls(self, n, s) sources.append(src) + return sources @cached_property @@ -159,22 +160,6 @@ return routes @cached_property - def generators(self): - defs = {} - for cls in self.plugin_loader.getPageGenerators(): - defs[cls.GENERATOR_NAME] = cls - - gens = [] - for n, g in self.config.get('site/generators').items(): - cls = defs.get(g['type']) - if cls is None: - raise ConfigurationError("No such page generator type: %s" % - g['type']) - gen = cls(self, n, g) - gens.append(gen) - return gens - - @cached_property def publishers(self): defs_by_name = {} defs_by_scheme = {} @@ -197,7 +182,7 @@ pub_type = comps.scheme is_scheme = True cls = (defs_by_scheme.get(pub_type) if is_scheme - else defs_by_name.get(pub_type)) + else defs_by_name.get(pub_type)) if cls is None: raise ConfigurationError("No such publisher: %s" % pub_type) tgt = cls(self, n, t) @@ -210,27 +195,15 @@ return source return None - def getGenerator(self, generator_name): - for gen in self.generators: - if gen.name == generator_name: - return gen - return None - def getSourceRoutes(self, source_name): for route in self.routes: if route.source_name == source_name: yield route - def getSourceRoute(self, source_name, route_metadata): + def getSourceRoute(self, source_name, route_params): for route in self.getSourceRoutes(source_name): - if (route_metadata is None or - route.matchesMetadata(route_metadata)): - return route - return None - - def getGeneratorRoute(self, generator_name): - for route in self.routes: - if route.generator_name == generator_name: + if (route_params is None or + route.matchesParameters(route_params)): return route return None @@ -240,6 +213,12 @@ return pub return None + def getPage(self, content_item): + cache_key = content_item.spec + return self.env.page_repository.get( + cache_key, + lambda: Page(content_item)) + def _get_dir(self, default_rel_dir): abs_dir = os.path.join(self.root_dir, default_rel_dir) if os.path.isdir(abs_dir): @@ -269,16 +248,19 @@ if config_variant is not None: logger.debug("Adding configuration variant '%s'." % config_variant) variant_path = os.path.join( - app.root_dir, 'configs', '%s.yml' % config_variant) + app.root_dir, 'configs', '%s.yml' % config_variant) app.config.addVariant(variant_path) if config_values is not None: for name, value in config_values: - logger.debug("Adding configuration override '%s': %s" % (name, value)) + logger.debug("Adding configuration override '%s': %s" % + (name, value)) app.config.addVariantValue(name, value) class PieCrustFactory(object): + """ A class that builds a PieCrust app instance. + """ def __init__( self, root_dir, *, cache=True, cache_key=None, @@ -294,12 +276,12 @@ def create(self): app = PieCrust( - self.root_dir, - cache=self.cache, - cache_key=self.cache_key, - debug=self.debug, - theme_site=self.theme_site) + self.root_dir, + cache=self.cache, + cache_key=self.cache_key, + debug=self.debug, + theme_site=self.theme_site) apply_variant_and_values( - app, self.config_variant, self.config_values) + app, self.config_variant, self.config_values) return app
--- a/piecrust/appconfig.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/appconfig.py Wed May 17 00:11:48 2017 -0700 @@ -16,10 +16,8 @@ from piecrust.cache import NullCache from piecrust.configuration import ( Configuration, ConfigurationError, ConfigurationLoader, - try_get_dict_values, try_get_dict_value, set_dict_value, - merge_dicts, visit_dict, - MERGE_NEW_VALUES, MERGE_OVERWRITE_VALUES, MERGE_PREPEND_LISTS, - MERGE_APPEND_LISTS) + try_get_dict_values, set_dict_value, + merge_dicts, visit_dict) from piecrust.sources.base import REALM_USER, REALM_THEME @@ -177,7 +175,6 @@ # [custom theme] + [default theme] + [default] if theme_values is not None: self._processThemeLayer(theme_values, values) - merge_dicts(values, theme_values) # Make all sources belong to the "theme" realm at this point. srcc = values['site'].get('sources') @@ -190,7 +187,6 @@ # [default] if site_values is not None: self._processSiteLayer(site_values, values) - merge_dicts(values, site_values) # Set the theme site flag. if self.theme_config: @@ -209,10 +205,14 @@ # Generate the default theme model. gen_default_theme_model = bool(try_get_dict_values( (theme_values, 'site/use_default_theme_content'), - (values, 'site/use_default_theme_content'), default=True)) if gen_default_theme_model: - self._generateDefaultThemeModel(theme_values, values) + logger.debug("Generating default theme content model...") + cc = copy.deepcopy(default_theme_content_model_base) + merge_dicts(values, cc) + + # Merge the theme config into the result config. + merge_dicts(values, theme_values) def _processSiteLayer(self, site_values, values): # Default site content. @@ -221,34 +221,29 @@ (values, 'site/use_default_content'), default=True)) if gen_default_site_model: - self._generateDefaultSiteModel(site_values, values) + logger.debug("Generating default content model...") + cc = copy.deepcopy(default_content_model_base) + merge_dicts(values, cc) - def _generateDefaultThemeModel(self, theme_values, values): - logger.debug("Generating default theme content model...") - cc = copy.deepcopy(default_theme_content_model_base) - merge_dicts(values, cc) - - def _generateDefaultSiteModel(self, site_values, values): - logger.debug("Generating default content model...") - cc = copy.deepcopy(default_content_model_base) - merge_dicts(values, cc) + dcm = get_default_content_model(site_values, values) + merge_dicts(values, dcm) - dcm = get_default_content_model(site_values, values) - merge_dicts(values, dcm) + blogsc = try_get_dict_values( + (site_values, 'site/blogs'), + (values, 'site/blogs')) + if blogsc is None: + blogsc = ['posts'] + set_dict_value(site_values, 'site/blogs', blogsc) - blogsc = try_get_dict_values( - (site_values, 'site/blogs'), - (values, 'site/blogs')) - if blogsc is None: - blogsc = ['posts'] - set_dict_value(site_values, 'site/blogs', blogsc) + is_only_blog = (len(blogsc) == 1) + for blog_name in reversed(blogsc): + blog_cfg = get_default_content_model_for_blog( + blog_name, is_only_blog, site_values, values, + theme_site=self.theme_config) + merge_dicts(values, blog_cfg) - is_only_blog = (len(blogsc) == 1) - for blog_name in reversed(blogsc): - blog_cfg = get_default_content_model_for_blog( - blog_name, is_only_blog, site_values, values, - theme_site=self.theme_config) - merge_dicts(values, blog_cfg) + # Merge the site config into the result config. + merge_dicts(values, site_values) def _validateAll(self, values): if values is None: @@ -304,9 +299,6 @@ taxonomies = v.get('taxonomies') if taxonomies is None: v['taxonomies'] = {} - generators = v.get('generators') - if generators is None: - v['generators'] = {} return v @@ -333,8 +325,8 @@ v.setdefault('html', values['site']['default_format']) auto_formats_re = r"\.(%s)$" % ( - '|'.join( - [re.escape(i) for i in list(v.keys())])) + '|'.join( + [re.escape(i) for i in list(v.keys())])) cache.write('auto_formats_re', auto_formats_re) return v @@ -343,7 +335,7 @@ def _validate_site_default_auto_format(v, values, cache): if v not in values['site']['auto_formats']: raise ConfigurationError( - "Default auto-format '%s' is not declared." % v) + "Default auto-format '%s' is not declared." % v) return v @@ -393,27 +385,20 @@ sc.setdefault('type', 'default') sc.setdefault('fs_endpoint', sn) sc.setdefault('ignore_missing_dir', False) - sc.setdefault('data_endpoint', sn) + sc.setdefault('data_endpoint', None) sc.setdefault('data_type', 'iterator') sc.setdefault('item_name', sn) sc.setdefault('items_per_page', 5) sc.setdefault('date_format', DEFAULT_DATE_FORMAT) sc.setdefault('realm', REALM_USER) + sc.setdefault('pipeline', 'page') # Validate endpoints. endpoint = sc['data_endpoint'] if endpoint in reserved_endpoints: raise ConfigurationError( - "Source '%s' is using a reserved endpoint name: %s" % - (sn, endpoint)) - - # Validate generators. - for gn, gc in sc.get('generators', {}).items(): - if not isinstance(gc, dict): - raise ConfigurationError( - "Generators for source '%s' should be defined in a " - "dictionary." % sn) - gc['source'] = sn + "Source '%s' is using a reserved endpoint name: %s" % + (sn, endpoint)) return v @@ -439,20 +424,14 @@ raise ConfigurationError("Route URLs must start with '/'.") r_source = rc.get('source') - r_generator = rc.get('generator') - if r_source is None and r_generator is None: - raise ConfigurationError("Routes must specify a source or " - "generator.") + if r_source is None: + raise ConfigurationError("Routes must specify a source.") if (r_source and r_source not in list(values['site']['sources'].keys())): raise ConfigurationError("Route is referencing unknown " "source: %s" % r_source) - if (r_generator and - r_generator not in list(values['site']['generators'].keys())): - raise ConfigurationError("Route is referencing unknown " - "generator: %s" % r_generator) - rc.setdefault('generator', None) + rc.setdefault('pass', 0) rc.setdefault('page_suffix', '/%num%') return v @@ -461,7 +440,7 @@ def _validate_site_taxonomies(v, values, cache): if not isinstance(v, dict): raise ConfigurationError( - "The 'site/taxonomies' setting must be a mapping.") + "The 'site/taxonomies' setting must be a mapping.") for tn, tc in v.items(): tc.setdefault('multiple', False) tc.setdefault('term', tn) @@ -469,23 +448,12 @@ return v -def _validate_site_generators(v, values, cache): - if not isinstance(v, dict): - raise ConfigurationError( - "The 'site/generators' setting must be a mapping.") - for gn, gc in v.items(): - if 'type' not in gc: - raise ConfigurationError( - "Generator '%s' doesn't specify a type." % gn) - return v - - def _validate_site_plugins(v, values, cache): if isinstance(v, str): v = v.split(',') elif not isinstance(v, list): raise ConfigurationError( - "The 'site/plugins' setting must be an array, or a " - "comma-separated list.") + "The 'site/plugins' setting must be an array, or a " + "comma-separated list.") return v
--- a/piecrust/appconfigdefaults.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/appconfigdefaults.py Wed May 17 00:11:48 2017 -0700 @@ -4,16 +4,16 @@ DEFAULT_DATE_FORMAT, DEFAULT_THEME_SOURCE) from piecrust.configuration import ( get_dict_values, try_get_dict_values) -from piecrust.sources.base import REALM_THEME +# Default configuration for all websites. +# default_configuration = collections.OrderedDict({ 'site': collections.OrderedDict({ 'title': "Untitled PieCrust website", 'root': '/', 'default_format': DEFAULT_FORMAT, 'default_template_engine': DEFAULT_TEMPLATE_ENGINE, - 'enable_gzip': True, 'pretty_urls': False, 'trailing_slash': False, 'date_format': DEFAULT_DATE_FORMAT, @@ -24,11 +24,9 @@ 'default_auto_format': 'md', 'default_pagination_source': None, 'pagination_suffix': '/%num%', + 'asset_url_format': '%uri%', 'slugify_mode': 'encode', 'themes_sources': [DEFAULT_THEME_SOURCE], - 'cache_time': 28800, - 'enable_debug_info': True, - 'show_debug_info': False, 'use_default_content': True, 'use_default_theme_content': True, 'theme_site': False @@ -37,21 +35,32 @@ 'no_bake_setting': 'draft', 'workers': None, 'batch_size': None + }), + 'server': collections.OrderedDict({ + 'enable_gzip': True, + 'cache_time': 28800, + 'enable_debug_info': True, + 'show_debug_info': False }) }) +# Default content model for themes. +# default_theme_content_model_base = collections.OrderedDict({ 'site': collections.OrderedDict({ 'sources': collections.OrderedDict({ 'theme_pages': { - 'type': 'default', + 'fs_endpoint': 'pages', 'ignore_missing_dir': True, - 'fs_endpoint': 'pages', 'data_endpoint': 'site.pages', - 'default_layout': 'default', 'item_name': 'page', - 'realm': REALM_THEME + }, + 'theme_assets': { + 'fs_endpoint': 'assets', + 'ignore_missing_dir': True, + 'type': 'fs', + 'pipeline': 'asset' } }), 'routes': [ @@ -60,15 +69,47 @@ 'source': 'theme_pages', 'func': 'pcurl' } - ], - 'theme_tag_page': 'theme_pages:_tag.%ext%', - 'theme_category_page': 'theme_pages:_category.%ext%', - 'theme_month_page': 'theme_pages:_month.%ext%', - 'theme_year_page': 'theme_pages:_year.%ext%' + ] }) }) +# Additional theme configuration when previewing a theme by itself, +# so it can show some "sample/preview" content. +# +default_theme_preview_content_model = collections.OrderedDict({ + 'site': collections.OrderedDict({ + 'sources': collections.OrderedDict({ + 'theme_preview_pages': { + 'fs_endpoint': 'preview/pages', + 'ignore_missing_dir': True, + 'data_endpoint': 'site.pages', + 'item_name': 'page', + }, + 'theme_preview_posts': { + 'fs_endpoint': 'preview/posts', + 'ignore_missing_dir': True, + 'data_endpoint': 'blog.posts', + 'item_name': 'post' + } + }), + 'routes': [ + { + 'url': '/posts/%year%/%month%/%slug%', + 'source': 'theme_preview_posts' + }, + { + 'url': '/%slug%', + 'source': 'theme_preview_pages', + 'func': 'pcurl' + } + ] + }) +}) + + +# Default content model for websites. +# default_content_model_base = collections.OrderedDict({ 'site': collections.OrderedDict({ 'posts_fs': DEFAULT_POSTS_FS, @@ -77,8 +118,18 @@ 'post_url': '/%year%/%month%/%day%/%slug%', 'year_url': '/archives/%year%', 'tag_url': '/tag/%tag%', + 'tag_feed_url': '/tag/%tag%.xml', 'category_url': '/%category%', - 'posts_per_page': 5 + 'category_feed_url': '/%category%.xml', + 'posts_per_page': 5, + 'sources': { + 'assets': { + 'fs_endpoint': 'assets', + 'ignore_missing_dir': True, + 'type': 'fs', + 'pipeline': 'asset' + } + } }) }) @@ -174,14 +225,7 @@ default_layout = blog_values['default_post_layout'] post_url = '/' + url_prefix + blog_values['post_url'].lstrip('/') year_url = '/' + url_prefix + blog_values['year_url'].lstrip('/') - - year_archive = 'pages:%s_year.%%ext%%' % page_prefix - if not theme_site: - theme_year_page = try_get_dict_values( - (site_values, 'site/theme_year_page'), - (values, 'site/theme_year_page')) - if theme_year_page: - year_archive += ';' + theme_year_page + year_archive_tpl = '%s_year.html' % page_prefix cfg = collections.OrderedDict({ 'site': collections.OrderedDict({ @@ -196,13 +240,11 @@ 'items_per_page': posts_per_page, 'date_format': date_format, 'default_layout': default_layout - }) - }), - 'generators': collections.OrderedDict({ - ('%s_archives' % blog_name): collections.OrderedDict({ + }), + '%s_archives' % blog_name: collections.OrderedDict({ 'type': 'blog_archives', 'source': blog_name, - 'page': year_archive + 'template': year_archive_tpl }) }), 'routes': [ @@ -213,14 +255,14 @@ }, { 'url': year_url, - 'generator': ('%s_archives' % blog_name), + 'source': ('%s_archives' % blog_name), 'func': ('%syearurl' % tpl_func_prefix) } ] }) }) - # Add a generator and a route for each taxonomy. + # Add a source and a route for each taxonomy. taxonomies_cfg = try_get_dict_values( (site_values, 'site/taxonomies'), (values, 'site/taxonomies'), @@ -228,22 +270,16 @@ for tax_name, tax_cfg in taxonomies_cfg.items(): term = tax_cfg.get('term', tax_name) - # Generator. - page_ref = 'pages:%s_%s.%%ext%%' % (page_prefix, term) - if not theme_site: - theme_page_ref = try_get_dict_values( - (site_values, 'site/theme_%s_page' % term), - (values, 'site/theme_%s_page' % term)) - if theme_page_ref: - page_ref += ';' + theme_page_ref - tax_gen_name = '%s_%s' % (blog_name, tax_name) - tax_gen = collections.OrderedDict({ + # Source. + term_tpl = '%s_%s.html' % (page_prefix, term) + tax_src_name = '%s_%s' % (blog_name, tax_name) + tax_src = collections.OrderedDict({ 'type': 'taxonomy', 'source': blog_name, 'taxonomy': tax_name, - 'page': page_ref + 'template': term_tpl }) - cfg['site']['generators'][tax_gen_name] = tax_gen + cfg['site']['sources'][tax_src_name] = tax_src # Route. tax_url_cfg_name = '%s_url' % term @@ -259,8 +295,8 @@ default=('%s%surl' % (tpl_func_prefix, term))) tax_route = collections.OrderedDict({ 'url': tax_url, - 'generator': tax_gen_name, - 'taxonomy': tax_name, + 'pass': 2, + 'source': tax_src_name, 'func': tax_func_name }) cfg['site']['routes'].append(tax_route)
--- a/piecrust/baking/baker.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/baking/baker.py Wed May 17 00:11:48 2017 -0700 @@ -2,79 +2,79 @@ import os.path import hashlib import logging -from piecrust.baking.records import ( - BakeRecordEntry, TransitionalBakeRecord) -from piecrust.baking.worker import ( - save_factory, - JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE) +from piecrust.baking.worker import BakeJob from piecrust.chefutil import ( - format_timed_scope, format_timed) + format_timed_scope, format_timed) from piecrust.environment import ExecutionStats -from piecrust.generation.base import PageGeneratorBakeContext -from piecrust.routing import create_route_metadata -from piecrust.sources.base import ( - REALM_NAMES, REALM_USER, REALM_THEME) +from piecrust.pipelines.base import PipelineContext +from piecrust.pipelines.records import ( + MultiRecordHistory, MultiRecord, RecordEntry, + load_records) +from piecrust.sources.base import REALM_USER, REALM_THEME logger = logging.getLogger(__name__) +def get_bake_records_path(app, out_dir): + records_cache = app.cache.getCache('baker') + records_id = hashlib.md5(out_dir.encode('utf8')).hexdigest() + records_name = records_id + '.record' + return records_cache.getCachePath(records_name) + + class Baker(object): - def __init__(self, app, out_dir, force=False, - applied_config_variant=None, - applied_config_values=None): - assert app and out_dir + def __init__(self, appfactory, app, out_dir, + force=False, allowed_pipelines=None): + self.appfactory = appfactory self.app = app self.out_dir = out_dir self.force = force - self.applied_config_variant = applied_config_variant - self.applied_config_values = applied_config_values + + self._pipeline_classes = {} + for pclass in app.plugin_loader.getPipelines(): + self._pipeline_classes[pclass.PIPELINE_NAME] = pclass - # Remember what generator pages we should skip. - self.generator_pages = [] - logger.debug("Gathering generator page paths:") - for gen in self.app.generators: - for path in gen.page_ref.possible_paths: - self.generator_pages.append(path) - logger.debug(" - %s" % path) + self.allowed_pipelines = allowed_pipelines + if allowed_pipelines is None: + self.allowed_pipelines = list(self._pipeline_classes.keys()) - # Register some timers. - self.app.env.registerTimer('LoadJob', raise_if_registered=False) - self.app.env.registerTimer('RenderFirstSubJob', - raise_if_registered=False) - self.app.env.registerTimer('BakeJob', raise_if_registered=False) + self._records = None def bake(self): + start_time = time.perf_counter() logger.debug(" Bake Output: %s" % self.out_dir) logger.debug(" Root URL: %s" % self.app.config.get('site/root')) # Get into bake mode. - start_time = time.perf_counter() self.app.config.set('baker/is_baking', True) - self.app.env.base_asset_url_format = '%uri%' + self.app.config.set('site/base_asset_url_format', '%uri') # Make sure the output directory exists. if not os.path.isdir(self.out_dir): os.makedirs(self.out_dir, 0o755) - # Load/create the bake record. - record = TransitionalBakeRecord() - record_cache = self.app.cache.getCache('baker') - record_id = hashlib.md5(self.out_dir.encode('utf8')).hexdigest() - record_name = record_id + '.record' - previous_record_path = None - if not self.force and record_cache.has(record_name): - with format_timed_scope(logger, "loaded previous bake record", + # Load/create the bake records. + records_path = get_bake_records_path( + self.app, self.out_dir) + if not self.force and os.path.isfile(records_path): + with format_timed_scope(logger, "loaded previous bake records", level=logging.DEBUG, colored=False): - previous_record_path = record_cache.getCachePath(record_name) - record.loadPrevious(previous_record_path) - record.current.success = True + previous_records = load_records(records_path) + else: + previous_records = MultiRecord() + self._records = MultiRecord() # Figure out if we need to clean the cache because important things # have changed. - is_cache_valid = self._handleCacheValidity(record) + is_cache_valid = self._handleCacheValidity(previous_records, + self._records) if not is_cache_valid: - previous_record_path = None + previous_records = MultiRecord() + + # Create the bake records history which tracks what's up-to-date + # or not since last time we baked to the given output folder. + record_history = MultiRecordHistory(previous_records, self._records) # Pre-create all caches. for cache_name in ['app', 'baker', 'pages', 'renders']: @@ -84,64 +84,82 @@ # separately so we can handle "overriding" (i.e. one realm overrides # another realm's pages, like the user realm overriding the theme # realm). + # + # Also, create and initialize each pipeline for each source. sources_by_realm = {} + ppctx = PipelineContext(self.out_dir, record_history, + force=self.force) for source in self.app.sources: - srclist = sources_by_realm.setdefault(source.realm, []) - srclist.append(source) + pname = source.config['pipeline'] + if pname in self.allowed_pipelines: + srclist = sources_by_realm.setdefault( + source.config['realm'], []) + + pp = self._pipeline_classes[pname](source) + pp.initialize(ppctx) + srclist.append((source, pp)) + else: + logger.debug( + "Skip source '%s' because pipeline '%s' is ignored." % + (source.name, pname)) # Create the worker processes. - pool = self._createWorkerPool(previous_record_path) + pool = self._createWorkerPool(records_path) - # Bake the realms. + # Bake the realms -- user first, theme second, so that a user item + # can override a theme item. realm_list = [REALM_USER, REALM_THEME] for realm in realm_list: srclist = sources_by_realm.get(realm) if srclist is not None: - self._bakeRealm(record, pool, realm, srclist) - - # Call all the page generators. - self._bakePageGenerators(record, pool) + self._bakeRealm(record_history, pool, realm, srclist) # All done with the workers. Close the pool and get reports. - reports = pool.close() + pool_stats = pool.close() total_stats = ExecutionStats() - record.current.stats['_Total'] = total_stats - for i in range(len(reports)): - worker_stats = reports[i]['data'] - if worker_stats is not None: - worker_name = 'BakeWorker_%d' % i - record.current.stats[worker_name] = worker_stats - total_stats.mergeStats(worker_stats) + for ps in pool_stats: + if ps is not None: + total_stats.mergeStats(ps) + record_history.current.stats = total_stats - # Delete files from the output. - self._handleDeletetions(record) + # Shutdown the pipelines. + for realm in realm_list: + srclist = sources_by_realm.get(realm) + if srclist is not None: + for _, pp in srclist: + pp.shutdown(ppctx) # Backup previous records. + records_dir, records_fn = os.path.split(records_path) + records_id, _ = os.path.splitext(records_fn) for i in range(8, -1, -1): suffix = '' if i == 0 else '.%d' % i - record_path = record_cache.getCachePath( - '%s%s.record' % (record_id, suffix)) - if os.path.exists(record_path): - record_path_next = record_cache.getCachePath( - '%s.%s.record' % (record_id, i + 1)) - if os.path.exists(record_path_next): - os.remove(record_path_next) - os.rename(record_path, record_path_next) + records_path_i = os.path.join( + records_dir, + '%s%s.record' % (records_id, suffix)) + if os.path.exists(records_path_i): + records_path_next = os.path.join( + records_dir, + '%s.%s.record' % (records_id, i + 1)) + if os.path.exists(records_path_next): + os.remove(records_path_next) + os.rename(records_path_i, records_path_next) # Save the bake record. - with format_timed_scope(logger, "saved bake record.", + with format_timed_scope(logger, "saved bake records.", level=logging.DEBUG, colored=False): - record.current.bake_time = time.time() - record.current.out_dir = self.out_dir - record.saveCurrent(record_cache.getCachePath(record_name)) + record_history.current.bake_time = time.time() + record_history.current.out_dir = self.out_dir + record_history.current.save(records_path) # All done. self.app.config.set('baker/is_baking', False) logger.debug(format_timed(start_time, 'done baking')) - return record.detach() + self._records = None + return record_history.current - def _handleCacheValidity(self, record): + def _handleCacheValidity(self, previous_records, current_records): start_time = time.perf_counter() reason = None @@ -151,8 +169,7 @@ # The configuration file was changed, or we're running a new # version of the app. reason = "not valid anymore" - elif (not record.previous.bake_time or - not record.previous.hasLatestVersion()): + elif previous_records.invalidated: # We have no valid previous bake record. reason = "need bake record regeneration" else: @@ -165,261 +182,86 @@ for fn in filenames: full_fn = os.path.join(dpath, fn) max_time = max(max_time, os.path.getmtime(full_fn)) - if max_time >= record.previous.bake_time: + if max_time >= previous_records.bake_time: reason = "templates modified" if reason is not None: # We have to bake everything from scratch. self.app.cache.clearCaches(except_names=['app', 'baker']) self.force = True - record.incremental_count = 0 - record.clearPrevious() + current_records.incremental_count = 0 + previous_records = MultiRecord() logger.info(format_timed( - start_time, - "cleaned cache (reason: %s)" % reason)) + start_time, "cleaned cache (reason: %s)" % reason)) return False else: - record.incremental_count += 1 + current_records.incremental_count += 1 logger.debug(format_timed( - start_time, "cache is assumed valid", - colored=False)) + start_time, "cache is assumed valid", colored=False)) return True - def _bakeRealm(self, record, pool, realm, srclist): - start_time = time.perf_counter() - try: - record.current.baked_count[realm] = 0 - record.current.total_baked_count[realm] = 0 - - all_factories = [] - for source in srclist: - factories = source.getPageFactories() - all_factories += [f for f in factories - if f.path not in self.generator_pages] - - self._loadRealmPages(record, pool, all_factories) - self._renderRealmPages(record, pool, all_factories) - self._bakeRealmPages(record, pool, realm, all_factories) - finally: - page_count = record.current.baked_count[realm] - total_page_count = record.current.total_baked_count[realm] - logger.info(format_timed( - start_time, - "baked %d %s pages (%d total)." % - (page_count, REALM_NAMES[realm].lower(), - total_page_count))) - - def _loadRealmPages(self, record, pool, factories): - def _handler(res): - # Create the record entry for this page. - # This will also update the `dirty_source_names` for the record - # as we add page files whose last modification times are later - # than the last bake. - record_entry = BakeRecordEntry(res['source_name'], res['path']) - record_entry.config = res['config'] - record_entry.timestamp = res['timestamp'] - if res['errors']: - record_entry.errors += res['errors'] - record.current.success = False - self._logErrors(res['path'], res['errors']) - record.addEntry(record_entry) - - logger.debug("Loading %d realm pages..." % len(factories)) - with format_timed_scope(logger, - "loaded %d pages" % len(factories), - level=logging.DEBUG, colored=False, - timer_env=self.app.env, - timer_category='LoadJob'): - jobs = [] - for fac in factories: - job = { - 'type': JOB_LOAD, - 'job': save_factory(fac)} - jobs.append(job) - ar = pool.queueJobs(jobs, handler=_handler) - ar.wait() - - def _renderRealmPages(self, record, pool, factories): - def _handler(res): - entry = record.getCurrentEntry(res['path']) - if res['errors']: - entry.errors += res['errors'] - record.current.success = False - self._logErrors(res['path'], res['errors']) - - logger.debug("Rendering %d realm pages..." % len(factories)) - with format_timed_scope(logger, - "prepared %d pages" % len(factories), - level=logging.DEBUG, colored=False, - timer_env=self.app.env, - timer_category='RenderFirstSubJob'): - jobs = [] - for fac in factories: - record_entry = record.getCurrentEntry(fac.path) - if record_entry.errors: - logger.debug("Ignoring %s because it had previous " - "errors." % fac.ref_spec) - continue - - # Make sure the source and the route exist for this page, - # otherwise we add errors to the record entry and we'll skip - # this page for the rest of the bake. - source = self.app.getSource(fac.source.name) - if source is None: - record_entry.errors.append( - "Can't get source for page: %s" % fac.ref_spec) - logger.error(record_entry.errors[-1]) - continue - - route = self.app.getSourceRoute(fac.source.name, fac.metadata) - if route is None: - record_entry.errors.append( - "Can't get route for page: %s" % fac.ref_spec) - logger.error(record_entry.errors[-1]) - continue + def _bakeRealm(self, record_history, pool, realm, srclist): + for source, pp in srclist: + logger.debug("Queuing jobs for source '%s' using pipeline '%s'." % + (source.name, pp.PIPELINE_NAME)) + jobs = [BakeJob(source.name, item.spec, item.metadata) + for item in source.getAllContents()] + pool.queueJobs(jobs) + pool.wait() - # All good, queue the job. - route_index = self.app.routes.index(route) - job = { - 'type': JOB_RENDER_FIRST, - 'job': { - 'factory_info': save_factory(fac), - 'route_index': route_index - } - } - jobs.append(job) - - ar = pool.queueJobs(jobs, handler=_handler) - ar.wait() - - def _bakeRealmPages(self, record, pool, realm, factories): - def _handler(res): - entry = record.getCurrentEntry(res['path']) - entry.subs = res['sub_entries'] - if res['errors']: - entry.errors += res['errors'] - self._logErrors(res['path'], res['errors']) - if entry.has_any_error: - record.current.success = False - if entry.subs and entry.was_any_sub_baked: - record.current.baked_count[realm] += 1 - record.current.total_baked_count[realm] += len(entry.subs) - - logger.debug("Baking %d realm pages..." % len(factories)) - with format_timed_scope(logger, - "baked %d pages" % len(factories), - level=logging.DEBUG, colored=False, - timer_env=self.app.env, - timer_category='BakeJob'): - jobs = [] - for fac in factories: - job = self._makeBakeJob(record, fac) - if job is not None: - jobs.append(job) - - ar = pool.queueJobs(jobs, handler=_handler) - ar.wait() - - def _bakePageGenerators(self, record, pool): - for gen in self.app.generators: - ctx = PageGeneratorBakeContext(self.app, record, pool, gen) - gen.bake(ctx) - - def _makeBakeJob(self, record, fac): - # Get the previous (if any) and current entry for this page. - pair = record.getPreviousAndCurrentEntries(fac.path) - assert pair is not None - prev_entry, cur_entry = pair - assert cur_entry is not None - - # Ignore if there were errors in the previous passes. - if cur_entry.errors: - logger.debug("Ignoring %s because it had previous " - "errors." % fac.ref_spec) - return None - - # Build the route metadata and find the appropriate route. - page = fac.buildPage() - route_metadata = create_route_metadata(page) - route = self.app.getSourceRoute(fac.source.name, route_metadata) - assert route is not None - - # Figure out if this page is overriden by another previously - # baked page. This happens for example when the user has - # made a page that has the same page/URL as a theme page. - uri = route.getUri(route_metadata) - override_entry = record.getOverrideEntry(page.path, uri) - if override_entry is not None: - override_source = self.app.getSource( - override_entry.source_name) - if override_source.realm == fac.source.realm: - cur_entry.errors.append( - "Page '%s' maps to URL '%s' but is overriden " - "by page '%s'." % - (fac.ref_spec, uri, override_entry.path)) - logger.error(cur_entry.errors[-1]) - cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN - return None - - route_index = self.app.routes.index(route) - job = { - 'type': JOB_BAKE, - 'job': { - 'factory_info': save_factory(fac), - 'generator_name': None, - 'generator_record_key': None, - 'route_index': route_index, - 'route_metadata': route_metadata, - 'dirty_source_names': record.dirty_source_names - } - } - return job - - def _handleDeletetions(self, record): - logger.debug("Handling deletions...") - for path, reason in record.getDeletions(): - logger.debug("Removing '%s': %s" % (path, reason)) - record.current.deleted.append(path) - try: - os.remove(path) - logger.info('[delete] %s' % path) - except OSError: - # Not a big deal if that file had already been removed - # by the user. - pass - - def _logErrors(self, path, errors): - rel_path = os.path.relpath(path, self.app.root_dir) - logger.error("Errors found in %s:" % rel_path) + def _logErrors(self, item_spec, errors): + logger.error("Errors found in %s:" % item_spec) for e in errors: logger.error(" " + e) - def _createWorkerPool(self, previous_record_path): - from piecrust.app import PieCrustFactory + def _createWorkerPool(self, previous_records_path): from piecrust.workerpool import WorkerPool from piecrust.baking.worker import BakeWorkerContext, BakeWorker - appfactory = PieCrustFactory( - self.app.root_dir, - cache=self.app.cache.enabled, - cache_key=self.app.cache_key, - config_variant=self.applied_config_variant, - config_values=self.applied_config_values, - debug=self.app.debug, - theme_site=self.app.theme_site) - worker_count = self.app.config.get('baker/workers') batch_size = self.app.config.get('baker/batch_size') ctx = BakeWorkerContext( - appfactory, - self.out_dir, - force=self.force, - previous_record_path=previous_record_path) + self.appfactory, + self.out_dir, + force=self.force, + previous_records_path=previous_records_path, + allowed_pipelines=self.allowed_pipelines) pool = WorkerPool( - worker_count=worker_count, - batch_size=batch_size, - worker_class=BakeWorker, - initargs=(ctx,)) + worker_count=worker_count, + batch_size=batch_size, + worker_class=BakeWorker, + initargs=(ctx,), + callback=self._handleWorkerResult, + error_callback=self._handleWorkerError) return pool + def _handleWorkerResult(self, job, res): + record_name = self._getRecordName(job) + record = self._records.getRecord(record_name) + record.entries.append(res.record) + if not res.record.success: + record.success = False + self._records.success = False + self._logErrors(job.item_spec, res.record.errors) + + def _handleWorkerError(self, job, exc_data): + e = RecordEntry() + e.item_spec = job.item_spec + e.errors.append(str(exc_data)) + + record_name = self._getRecordName(job) + record = self._records.getRecord(record_name) + record.entries.append(e) + + record.success = False + self._records.success = False + + self._logErrors(job.item_spec, e.errors) + if self.app.debug: + logger.error(exc_data.traceback) + + def _getRecordName(self, job): + sn = job.source_name + ppn = self.app.getSource(sn).config['pipeline'] + return '%s@%s' % (sn, ppn)
--- a/piecrust/baking/records.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,204 +0,0 @@ -import copy -import os.path -import hashlib -import logging -from piecrust.records import Record, TransitionalRecord - - -logger = logging.getLogger(__name__) - - -def _get_transition_key(path, extra_key=None): - key = path - if extra_key: - key += '+%s' % extra_key - return hashlib.md5(key.encode('utf8')).hexdigest() - - -class BakeRecord(Record): - RECORD_VERSION = 20 - - def __init__(self): - super(BakeRecord, self).__init__() - self.out_dir = None - self.bake_time = None - self.baked_count = {} - self.total_baked_count = {} - self.deleted = [] - self.success = True - - -class SubPageBakeInfo(object): - FLAG_NONE = 0 - FLAG_BAKED = 2**0 - FLAG_FORCED_BY_SOURCE = 2**1 - FLAG_FORCED_BY_NO_PREVIOUS = 2**2 - FLAG_FORCED_BY_PREVIOUS_ERRORS = 2**3 - FLAG_FORMATTING_INVALIDATED = 2**4 - - def __init__(self, out_uri, out_path): - self.out_uri = out_uri - self.out_path = out_path - self.flags = self.FLAG_NONE - self.errors = [] - self.render_info = [None, None] # Same length as RENDER_PASSES - - @property - def was_clean(self): - return (self.flags & self.FLAG_BAKED) == 0 and len(self.errors) == 0 - - @property - def was_baked(self): - return (self.flags & self.FLAG_BAKED) != 0 - - @property - def was_baked_successfully(self): - return self.was_baked and len(self.errors) == 0 - - def anyPass(self, func): - for pinfo in self.render_info: - if pinfo and func(pinfo): - return True - return False - - def copyRenderInfo(self): - return copy.deepcopy(self.render_info) - - -class BakeRecordEntry(object): - """ An entry in the bake record. - """ - FLAG_NONE = 0 - FLAG_NEW = 2**0 - FLAG_SOURCE_MODIFIED = 2**1 - FLAG_OVERRIDEN = 2**2 - - def __init__(self, source_name, path, extra_key=None): - self.source_name = source_name - self.path = path - self.extra_key = extra_key - self.flags = self.FLAG_NONE - self.config = None - self.timestamp = None - self.errors = [] - self.subs = [] - - @property - def path_mtime(self): - return os.path.getmtime(self.path) - - @property - def was_overriden(self): - return (self.flags & self.FLAG_OVERRIDEN) != 0 - - @property - def num_subs(self): - return len(self.subs) - - @property - def was_any_sub_baked(self): - for o in self.subs: - if o.was_baked: - return True - return False - - @property - def all_assets(self): - for sub in self.subs: - yield from sub.assets - - @property - def all_out_paths(self): - for sub in self.subs: - yield sub.out_path - - @property - def has_any_error(self): - if len(self.errors) > 0: - return True - for o in self.subs: - if len(o.errors) > 0: - return True - return False - - def getSub(self, sub_index): - return self.subs[sub_index - 1] - - def getAllErrors(self): - yield from self.errors - for o in self.subs: - yield from o.errors - - def getAllUsedSourceNames(self): - res = set() - for o in self.subs: - for pinfo in o.render_info: - if pinfo: - res |= pinfo.used_source_names - return res - - -class TransitionalBakeRecord(TransitionalRecord): - def __init__(self, previous_path=None): - super(TransitionalBakeRecord, self).__init__(BakeRecord, - previous_path) - self.dirty_source_names = set() - - def addEntry(self, entry): - if (self.previous.bake_time and - entry.path_mtime >= self.previous.bake_time): - entry.flags |= BakeRecordEntry.FLAG_SOURCE_MODIFIED - self.dirty_source_names.add(entry.source_name) - super(TransitionalBakeRecord, self).addEntry(entry) - - def getTransitionKey(self, entry): - return _get_transition_key(entry.path, entry.extra_key) - - def getPreviousAndCurrentEntries(self, path, extra_key=None): - key = _get_transition_key(path, extra_key) - pair = self.transitions.get(key) - return pair - - def getOverrideEntry(self, path, uri): - for pair in self.transitions.values(): - cur = pair[1] - if cur and cur.path != path: - for o in cur.subs: - if o.out_uri == uri: - return cur - return None - - def getPreviousEntry(self, path, extra_key=None): - pair = self.getPreviousAndCurrentEntries(path, extra_key) - if pair is not None: - return pair[0] - return None - - def getCurrentEntry(self, path, extra_key=None): - pair = self.getPreviousAndCurrentEntries(path, extra_key) - if pair is not None: - return pair[1] - return None - - def collapseEntry(self, prev_entry): - cur_entry = copy.deepcopy(prev_entry) - cur_entry.flags = BakeRecordEntry.FLAG_NONE - for o in cur_entry.subs: - o.flags = SubPageBakeInfo.FLAG_NONE - self.addEntry(cur_entry) - - def getDeletions(self): - for prev, cur in self.transitions.values(): - if prev and not cur: - for sub in prev.subs: - yield (sub.out_path, 'previous source file was removed') - elif prev and cur: - prev_out_paths = [o.out_path for o in prev.subs] - cur_out_paths = [o.out_path for o in cur.subs] - diff = set(prev_out_paths) - set(cur_out_paths) - for p in diff: - yield (p, 'source file changed outputs') - - def _onNewEntryAdded(self, entry): - entry.flags |= BakeRecordEntry.FLAG_NEW -
--- a/piecrust/baking/single.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,270 +0,0 @@ -import os.path -import queue -import logging -import threading -import urllib.parse -from piecrust import ASSET_DIR_SUFFIX -from piecrust.baking.records import SubPageBakeInfo -from piecrust.rendering import ( - QualifiedPage, PageRenderingContext, render_page, - PASS_FORMATTING) -from piecrust.uriutil import split_uri - - -logger = logging.getLogger(__name__) - - -class BakingError(Exception): - pass - - -def _text_writer(q): - while True: - item = q.get() - if item is not None: - out_path, txt = item - out_dir = os.path.dirname(out_path) - _ensure_dir_exists(out_dir) - - with open(out_path, 'w', encoding='utf8') as fp: - fp.write(txt) - - q.task_done() - else: - # Sentinel object, terminate the thread. - q.task_done() - break - - -class PageBaker(object): - def __init__(self, app, out_dir, force=False, copy_assets=True): - self.app = app - self.out_dir = out_dir - self.force = force - self.copy_assets = copy_assets - self.site_root = app.config.get('site/root') - self.pretty_urls = app.config.get('site/pretty_urls') - self._writer_queue = queue.Queue() - self._writer = threading.Thread( - name='PageSerializer', - target=_text_writer, - args=(self._writer_queue,)) - self._writer.start() - - def shutdown(self): - self._writer_queue.put_nowait(None) - self._writer.join() - - def getOutputPath(self, uri, pretty_urls): - uri_root, uri_path = split_uri(self.app, uri) - - bake_path = [self.out_dir] - decoded_uri = urllib.parse.unquote(uri_path) - if pretty_urls: - bake_path.append(decoded_uri) - bake_path.append('index.html') - elif decoded_uri == '': - bake_path.append('index.html') - else: - bake_path.append(decoded_uri) - - return os.path.normpath(os.path.join(*bake_path)) - - def bake(self, qualified_page, prev_entry, dirty_source_names, - generator_name=None): - # Start baking the sub-pages. - cur_sub = 1 - has_more_subs = True - sub_entries = [] - - while has_more_subs: - # Get the URL and path for this sub-page. - sub_uri = qualified_page.getUri(cur_sub) - logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) - pretty_urls = qualified_page.config.get('pretty_urls', - self.pretty_urls) - out_path = self.getOutputPath(sub_uri, pretty_urls) - - # Create the sub-entry for the bake record. - sub_entry = SubPageBakeInfo(sub_uri, out_path) - sub_entries.append(sub_entry) - - # Find a corresponding sub-entry in the previous bake record. - prev_sub_entry = None - if prev_entry: - try: - prev_sub_entry = prev_entry.getSub(cur_sub) - except IndexError: - pass - - # Figure out if we need to invalidate or force anything. - force_this_sub, invalidate_formatting = _compute_force_flags( - prev_sub_entry, sub_entry, dirty_source_names) - force_this_sub = force_this_sub or self.force - - # Check for up-to-date outputs. - do_bake = True - if not force_this_sub: - try: - in_path_time = qualified_page.path_mtime - out_path_time = os.path.getmtime(out_path) - if out_path_time >= in_path_time: - do_bake = False - except OSError: - # File doesn't exist, we'll need to bake. - pass - - # If this page didn't bake because it's already up-to-date. - # Keep trying for as many subs as we know this page has. - if not do_bake: - sub_entry.render_info = prev_sub_entry.copyRenderInfo() - sub_entry.flags = SubPageBakeInfo.FLAG_NONE - - if prev_entry.num_subs >= cur_sub + 1: - cur_sub += 1 - has_more_subs = True - logger.debug(" %s is up to date, skipping to next " - "sub-page." % out_path) - continue - - logger.debug(" %s is up to date, skipping bake." % out_path) - break - - # All good, proceed. - try: - if invalidate_formatting: - cache_key = sub_uri - self.app.env.rendered_segments_repository.invalidate( - cache_key) - sub_entry.flags |= \ - SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED - - logger.debug(" p%d -> %s" % (cur_sub, out_path)) - rp = self._bakeSingle(qualified_page, cur_sub, out_path, - generator_name) - except Exception as ex: - logger.exception(ex) - page_rel_path = os.path.relpath(qualified_page.path, - self.app.root_dir) - raise BakingError("%s: error baking '%s'." % - (page_rel_path, sub_uri)) from ex - - # Record what we did. - sub_entry.flags |= SubPageBakeInfo.FLAG_BAKED - sub_entry.render_info = rp.copyRenderInfo() - - # Copy page assets. - if (cur_sub == 1 and self.copy_assets and - sub_entry.anyPass(lambda p: p.used_assets)): - if pretty_urls: - out_assets_dir = os.path.dirname(out_path) - else: - out_assets_dir, out_name = os.path.split(out_path) - if sub_uri != self.site_root: - out_name_noext, _ = os.path.splitext(out_name) - out_assets_dir = os.path.join(out_assets_dir, - out_name_noext) - - logger.debug("Copying page assets to: %s" % out_assets_dir) - _ensure_dir_exists(out_assets_dir) - - qualified_page.source.buildAssetor(qualified_page, sub_uri).copyAssets(out_assets_dir) - - # Figure out if we have more work. - has_more_subs = False - if sub_entry.anyPass(lambda p: p.pagination_has_more): - cur_sub += 1 - has_more_subs = True - - return sub_entries - - def _bakeSingle(self, qp, num, out_path, - generator_name=None): - ctx = PageRenderingContext(qp, page_num=num) - if qp.route.is_generator_route: - qp.route.generator.prepareRenderContext(ctx) - - with self.app.env.timerScope("PageRender"): - rp = render_page(ctx) - - with self.app.env.timerScope("PageSerialize"): - self._writer_queue.put_nowait((out_path, rp.content)) - - return rp - - -def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names): - # Figure out what to do with this page. - force_this_sub = False - invalidate_formatting = False - sub_uri = sub_entry.out_uri - if (prev_sub_entry and - (prev_sub_entry.was_baked_successfully or - prev_sub_entry.was_clean)): - # If the current page is known to use pages from other sources, - # see if any of those got baked, or are going to be baked for - # some reason. If so, we need to bake this one too. - # (this happens for instance with the main page of a blog). - dirty_for_this, invalidated_render_passes = ( - _get_dirty_source_names_and_render_passes( - prev_sub_entry, dirty_source_names)) - if len(invalidated_render_passes) > 0: - logger.debug( - "'%s' is known to use sources %s, which have " - "items that got (re)baked. Will force bake this " - "page. " % (sub_uri, dirty_for_this)) - sub_entry.flags |= \ - SubPageBakeInfo.FLAG_FORCED_BY_SOURCE - force_this_sub = True - - if PASS_FORMATTING in invalidated_render_passes: - logger.debug( - "Will invalidate cached formatting for '%s' " - "since sources were using during that pass." - % sub_uri) - invalidate_formatting = True - elif (prev_sub_entry and - prev_sub_entry.errors): - # Previous bake failed. We'll have to bake it again. - logger.debug( - "Previous record entry indicates baking failed for " - "'%s'. Will bake it again." % sub_uri) - sub_entry.flags |= \ - SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS - force_this_sub = True - elif not prev_sub_entry: - # No previous record. We'll have to bake it. - logger.debug("No previous record entry found for '%s'. Will " - "force bake it." % sub_uri) - sub_entry.flags |= \ - SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS - force_this_sub = True - - return force_this_sub, invalidate_formatting - - -def _get_dirty_source_names_and_render_passes(sub_entry, dirty_source_names): - dirty_for_this = set() - invalidated_render_passes = set() - for p, pinfo in enumerate(sub_entry.render_info): - if pinfo: - for src_name in pinfo.used_source_names: - is_dirty = (src_name in dirty_source_names) - if is_dirty: - invalidated_render_passes.add(p) - dirty_for_this.add(src_name) - break - return dirty_for_this, invalidated_render_passes - - -def _ensure_dir_exists(path): - try: - os.makedirs(path, mode=0o755, exist_ok=True) - except OSError: - # In a multiprocess environment, several process may very - # occasionally try to create the same directory at the same time. - # Let's ignore any error and if something's really wrong (like file - # acces permissions or whatever), then it will more legitimately fail - # just after this when we try to write files. - pass -
--- a/piecrust/baking/worker.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/baking/worker.py Wed May 17 00:11:48 2017 -0700 @@ -1,13 +1,9 @@ import time import logging -from piecrust.app import PieCrust, apply_variant_and_values -from piecrust.baking.records import BakeRecord, _get_transition_key -from piecrust.baking.single import PageBaker, BakingError -from piecrust.environment import AbortedSourceUseError -from piecrust.rendering import ( - QualifiedPage, PageRenderingContext, render_page_segments) -from piecrust.routing import create_route_metadata -from piecrust.sources.base import PageFactory +from piecrust.pipelines.base import PipelineContext, PipelineResult +from piecrust.pipelines.records import ( + MultiRecordHistory, MultiRecord, Record, load_records) +from piecrust.sources.base import ContentItem from piecrust.workerpool import IWorker @@ -16,80 +12,109 @@ class BakeWorkerContext(object): def __init__(self, appfactory, out_dir, *, - force=False, previous_record_path=None): + force=False, previous_records_path=None, + allowed_pipelines=None): self.appfactory = appfactory self.out_dir = out_dir self.force = force - self.previous_record_path = previous_record_path - self.app = None - self.previous_record = None - self.previous_record_index = None + self.previous_records_path = previous_records_path + self.allowed_pipelines = allowed_pipelines class BakeWorker(IWorker): def __init__(self, ctx): self.ctx = ctx - self.work_start_time = time.perf_counter() + self.app = None + self.record_history = None + self._work_start_time = time.perf_counter() + self._sources = {} + self._ppctx = None def initialize(self): # Create the app local to this worker. app = self.ctx.appfactory.create() app.config.set('baker/is_baking', True) app.config.set('baker/worker_id', self.wid) - app.env.base_asset_url_format = '%uri%' + app.config.set('site/base_asset_url_format', '%uri') + app.env.fs_cache_only_for_main_page = True - app.env.registerTimer("BakeWorker_%d_Total" % self.wid) - app.env.registerTimer("BakeWorkerInit") - app.env.registerTimer("JobReceive") - app.env.registerCounter("SourceUseAbortions") - app.env.registerManifest("LoadJobs") - app.env.registerManifest("RenderJobs") - app.env.registerManifest("BakeJobs") - self.ctx.app = app + + stats = app.env.stats + stats.registerTimer("BakeWorker_%d_Total" % self.wid) + stats.registerTimer("BakeWorkerInit") + stats.registerTimer("JobReceive") + stats.registerTimer('LoadJob', raise_if_registered=False) + stats.registerTimer('RenderFirstSubJob', + raise_if_registered=False) + stats.registerTimer('BakeJob', raise_if_registered=False) + + stats.registerCounter("SourceUseAbortions") + + stats.registerManifest("LoadJobs") + stats.registerManifest("RenderJobs") + stats.registerManifest("BakeJobs") + + self.app = app # Load previous record - if self.ctx.previous_record_path: - self.ctx.previous_record = BakeRecord.load( - self.ctx.previous_record_path) - self.ctx.previous_record_index = {} - for e in self.ctx.previous_record.entries: - key = _get_transition_key(e.path, e.extra_key) - self.ctx.previous_record_index[key] = e + if self.ctx.previous_records_path: + previous_records = load_records(self.ctx.previous_records_path) + else: + previous_records = MultiRecord() + current_records = MultiRecord() + self.record_history = MultiRecordHistory( + previous_records, current_records) + + # Cache sources and create pipelines. + ppclasses = {} + for ppclass in app.plugin_loader.getPipelines(): + ppclasses[ppclass.PIPELINE_NAME] = ppclass - # Create the job handlers. - job_handlers = { - JOB_LOAD: LoadJobHandler(self.ctx), - JOB_RENDER_FIRST: RenderFirstSubJobHandler(self.ctx), - JOB_BAKE: BakeJobHandler(self.ctx)} - for jt, jh in job_handlers.items(): - app.env.registerTimer(type(jh).__name__) - self.job_handlers = job_handlers + self._ppctx = PipelineContext(self.ctx.out_dir, self.record_history, + worker_id=self.wid, + force=self.ctx.force) + for src in app.sources: + ppname = src.config['pipeline'] + if (self.ctx.allowed_pipelines is not None and + ppname not in self.ctx.allowed_pipelines): + continue - app.env.stepTimerSince("BakeWorkerInit", self.work_start_time) + pp = ppclasses[ppname](src) + pp.initialize(self._ppctx) + self._sources[src.name] = (src, pp) + + stats.stepTimerSince("BakeWorkerInit", self._work_start_time) def process(self, job): - handler = self.job_handlers[job['type']] - with self.ctx.app.env.timerScope(type(handler).__name__): - return handler.handleJob(job['job']) + logger.debug("Received job: %s@%s" % (job.source_name, job.item_spec)) + src, pp = self._sources[job.source_name] + item = ContentItem(job.item_spec, job.item_metadata) - def getReport(self, pool_reports): - self.ctx.app.env.stepTimerSince("BakeWorker_%d_Total" % self.wid, - self.work_start_time) - data = self.ctx.app.env.getStats() - data.timers.update(pool_reports) - return { - 'type': 'stats', - 'data': data} + record_class = pp.RECORD_CLASS or Record + ppres = PipelineResult(record_class()) + ppres.record.item_spec = job.item_spec + pp.run(item, self._ppctx, ppres) + return ppres + + def getStats(self): + stats = self.app.env.stats + stats.stepTimerSince("BakeWorker_%d_Total" % self.wid, + self._work_start_time) + return stats def shutdown(self): - for jh in self.job_handlers.values(): - jh.shutdown() + for src, pp in self._sources.values(): + pp.shutdown(self._ppctx) -JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE = range(0, 3) +class BakeJob: + def __init__(self, source_name, item_spec, item_metadata): + self.source_name = source_name + self.item_spec = item_spec + self.item_metadata = item_metadata -class JobHandler(object): +class JobHandler: def __init__(self, ctx): self.ctx = ctx @@ -111,131 +136,3 @@ ex = ex.__cause__ return errors - -def save_factory(fac): - return { - 'source_name': fac.source.name, - 'rel_path': fac.rel_path, - 'metadata': fac.metadata} - - -def load_factory(app, info): - source = app.getSource(info['source_name']) - return PageFactory(source, info['rel_path'], info['metadata']) - - -class LoadJobHandler(JobHandler): - def handleJob(self, job): - # Just make sure the page has been cached. - fac = load_factory(self.app, job) - logger.debug("Loading page: %s" % fac.ref_spec) - self.app.env.addManifestEntry('LoadJobs', fac.ref_spec) - result = { - 'source_name': fac.source.name, - 'path': fac.path, - 'config': None, - 'timestamp': None, - 'errors': None} - try: - page = fac.buildPage() - page._load() - result['config'] = page.config.getAll() - result['timestamp'] = page.datetime.timestamp() - except Exception as ex: - logger.debug("Got loading error. Sending it to master.") - result['errors'] = _get_errors(ex) - if self.ctx.app.debug: - logger.exception(ex) - return result - - -class RenderFirstSubJobHandler(JobHandler): - def handleJob(self, job): - # Render the segments for the first sub-page of this page. - fac = load_factory(self.app, job['factory_info']) - self.app.env.addManifestEntry('RenderJobs', fac.ref_spec) - - route_index = job['route_index'] - route = self.app.routes[route_index] - - page = fac.buildPage() - route_metadata = create_route_metadata(page) - qp = QualifiedPage(page, route, route_metadata) - ctx = PageRenderingContext(qp) - self.app.env.abort_source_use = True - - result = { - 'path': fac.path, - 'aborted': False, - 'errors': None} - logger.debug("Preparing page: %s" % fac.ref_spec) - try: - render_page_segments(ctx) - except AbortedSourceUseError: - logger.debug("Page %s was aborted." % fac.ref_spec) - self.app.env.stepCounter("SourceUseAbortions") - result['aborted'] = True - except Exception as ex: - logger.debug("Got rendering error. Sending it to master.") - result['errors'] = _get_errors(ex) - if self.ctx.app.debug: - logger.exception(ex) - finally: - self.app.env.abort_source_use = False - return result - - -class BakeJobHandler(JobHandler): - def __init__(self, ctx): - super(BakeJobHandler, self).__init__(ctx) - self.page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force) - - def shutdown(self): - self.page_baker.shutdown() - - def handleJob(self, job): - # Actually bake the page and all its sub-pages to the output folder. - fac = load_factory(self.app, job['factory_info']) - self.app.env.addManifestEntry('BakeJobs', fac.ref_spec) - - route_index = job['route_index'] - route_metadata = job['route_metadata'] - route = self.app.routes[route_index] - - gen_name = job['generator_name'] - gen_key = job['generator_record_key'] - dirty_source_names = job['dirty_source_names'] - - page = fac.buildPage() - qp = QualifiedPage(page, route, route_metadata) - - result = { - 'path': fac.path, - 'generator_name': gen_name, - 'generator_record_key': gen_key, - 'sub_entries': None, - 'errors': None} - - if job.get('needs_config', False): - result['config'] = page.config.getAll() - - previous_entry = None - if self.ctx.previous_record_index is not None: - key = _get_transition_key(fac.path, gen_key) - previous_entry = self.ctx.previous_record_index.get(key) - - logger.debug("Baking page: %s" % fac.ref_spec) - logger.debug("With route metadata: %s" % route_metadata) - try: - sub_entries = self.page_baker.bake( - qp, previous_entry, dirty_source_names, gen_name) - result['sub_entries'] = sub_entries - - except Exception as ex: - logger.debug("Got baking error. Sending it to master.") - result['errors'] = _get_errors(ex) - if self.ctx.app.debug: - logger.exception(ex) - - return result -
--- a/piecrust/cache.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/cache.py Wed May 17 00:11:48 2017 -0700 @@ -5,7 +5,6 @@ import codecs import hashlib import logging -import collections import repoze.lru
--- a/piecrust/commands/base.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/commands/base.py Wed May 17 00:11:48 2017 -0700 @@ -8,12 +8,11 @@ class CommandContext(object): - def __init__(self, app, parser, args): + def __init__(self, appfactory, app, parser, args): + self.appfactory = appfactory self.app = app self.parser = parser self.args = args - self.config_variant = None - self.config_values = None class ChefCommand(object): @@ -27,8 +26,9 @@ raise NotImplementedError() def run(self, ctx): - raise NotImplementedError("Command '%s' doesn't implement the `run` " - "method." % type(self)) + raise NotImplementedError( + "Command '%s' doesn't implement the `run` " + "method." % type(self)) def checkedRun(self, ctx): if ctx.app.root_dir is None and self.requires_website: @@ -83,8 +83,9 @@ return [(n, d) for (n, d, e) in self._topic_providers] def setupParser(self, parser, app): - parser.add_argument('topic', nargs='?', - help="The command name or topic on which to get help.") + parser.add_argument( + 'topic', nargs='?', + help="The command name or topic on which to get help.") extensions = self.getExtensions(app) for ext in extensions: @@ -106,8 +107,8 @@ for c in ctx.app.plugin_loader.getCommands(): if c.name == topic: fake = argparse.ArgumentParser( - prog='%s %s' % (ctx.parser.prog, c.name), - description=c.description) + prog='%s %s' % (ctx.parser.prog, c.name), + description=c.description) c.setupParser(fake, ctx.app) fake.print_help() return 0
--- a/piecrust/commands/builtin/admin.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/commands/builtin/admin.py Wed May 17 00:11:48 2017 -0700 @@ -20,33 +20,33 @@ subparsers = parser.add_subparsers() p = subparsers.add_parser( - 'init', - help="Creates a new administration panel website.") + 'init', + help="Creates a new administration panel website.") p.set_defaults(sub_func=self._initFoodTruck) p = subparsers.add_parser( - 'genpass', - help=("Generates the hashed password for use as an " - "admin password")) + 'genpass', + help=("Generates the hashed password for use as an " + "admin password")) p.add_argument('password', help="The password to hash.") p.set_defaults(sub_func=self._generatePassword) p = subparsers.add_parser( - 'run', - help="Runs the administrative panel website.") + 'run', + help="Runs the administrative panel website.") p.add_argument( - '-p', '--port', - help="The port for the administrative panel website.", - default=8090) + '-p', '--port', + help="The port for the administrative panel website.", + default=8090) p.add_argument( - '-a', '--address', - help="The host for the administrative panel website.", - default='localhost') + '-a', '--address', + help="The host for the administrative panel website.", + default='localhost') p.add_argument( - '--no-assets', - help="Don't process and monitor the asset folder(s).", - dest='monitor_assets', - action='store_false') + '--no-assets', + help="Don't process and monitor the asset folder(s).", + dest='monitor_assets', + action='store_false') p.set_defaults(sub_func=self._runFoodTruck) def checkedRun(self, ctx): @@ -59,34 +59,26 @@ return ctx.args.sub_func(ctx) def _runFoodTruck(self, ctx): - # See `_run_sse_check` in `piecrust.serving.wrappers` for an explanation - # of this check. + # See `_run_sse_check` in `piecrust.serving.wrappers` for an + # explanation of this check. if (ctx.args.monitor_assets and ( not ctx.args.debug or os.environ.get('WERKZEUG_RUN_MAIN') == 'true')): - from piecrust.app import PieCrustFactory from piecrust.serving.procloop import ProcessingLoop - appfactory = PieCrustFactory( - ctx.app.root_dir, - cache=ctx.app.cache.enabled, - cache_key=ctx.app.cache_key, - config_variant=ctx.config_variant, - config_values=ctx.config_values, - debug=ctx.app.debug, - theme_site=ctx.app.theme_site) - out_dir = os.path.join(ctx.app.root_dir, CACHE_DIR, 'foodtruck', 'server') - proc_loop = ProcessingLoop(appfactory, out_dir) + out_dir = os.path.join( + ctx.app.root_dir, CACHE_DIR, 'foodtruck', 'server') + proc_loop = ProcessingLoop(ctx.appfactory, out_dir) proc_loop.start() es = { - 'FOODTRUCK_CMDLINE_MODE': True, - 'FOODTRUCK_ROOT': ctx.app.root_dir} + 'FOODTRUCK_CMDLINE_MODE': True, + 'FOODTRUCK_ROOT': ctx.app.root_dir} from piecrust.admin.main import run_foodtruck run_foodtruck( - host=ctx.args.address, - port=ctx.args.port, - debug=ctx.args.debug, - extra_settings=es) + host=ctx.args.address, + port=ctx.args.port, + debug=ctx.args.debug, + extra_settings=es) def _initFoodTruck(self, ctx): import getpass @@ -113,9 +105,9 @@ password: %(password)s """ ft_config = ft_config % { - 'username': admin_username, - 'password': admin_password - } + 'username': admin_username, + 'password': admin_password + } with open('foodtruck.yml', 'w', encoding='utf8') as fp: fp.write(ft_config)
--- a/piecrust/commands/builtin/baking.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/commands/builtin/baking.py Wed May 17 00:11:48 2017 -0700 @@ -5,20 +5,7 @@ import fnmatch import datetime from colorama import Fore -from piecrust import CACHE_DIR -from piecrust.baking.baker import Baker -from piecrust.baking.records import ( - BakeRecord, BakeRecordEntry, SubPageBakeInfo) -from piecrust.chefutil import format_timed from piecrust.commands.base import ChefCommand -from piecrust.environment import ExecutionStats -from piecrust.processing.pipeline import ProcessorPipeline -from piecrust.processing.records import ( - ProcessorPipelineRecord, - FLAG_PREPARED, FLAG_PROCESSED, FLAG_BYPASSED_STRUCTURED_PROCESSING, - FLAG_COLLAPSED_FROM_LAST_RUN) -from piecrust.rendering import ( - PASS_FORMATTING, PASS_RENDERING) logger = logging.getLogger(__name__) @@ -32,60 +19,58 @@ def setupParser(self, parser, app): parser.add_argument( - '-o', '--output', - help="The directory to put all the baked HTML files into " - "(defaults to `_counter`)") + '-o', '--output', + help="The directory to put all the baked HTML files into " + "(defaults to `_counter`)") + parser.add_argument( + '-f', '--force', + help="Force re-baking the entire website.", + action='store_true') parser.add_argument( - '-f', '--force', - help="Force re-baking the entire website.", - action='store_true') + '-p', '--pipelines', + help="The pipelines to run.", + nargs='*') parser.add_argument( - '-w', '--workers', - help="The number of worker processes to spawn.", - type=int, default=-1) + '-w', '--workers', + help="The number of worker processes to spawn.", + type=int, default=-1) parser.add_argument( - '--batch-size', - help="The number of jobs per batch.", - type=int, default=-1) + '--batch-size', + help="The number of jobs per batch.", + type=int, default=-1) parser.add_argument( - '--assets-only', - help="Only bake the assets (don't bake the web pages).", - action='store_true') + '--assets-only', + help="Only bake the assets (don't bake the web pages).", + action='store_true') parser.add_argument( - '--html-only', - help="Only bake the pages (don't run the asset pipeline).", - action='store_true') + '--html-only', + help="Only bake the pages (don't run the asset pipeline).", + action='store_true') parser.add_argument( - '--show-stats', - help="Show detailed information about the bake.", - action='store_true') + '--show-stats', + help="Show detailed information about the bake.", + action='store_true') def run(self, ctx): + from piecrust.chefutil import format_timed + out_dir = (ctx.args.output or os.path.join(ctx.app.root_dir, '_counter')) - success = True - ctx.stats = {} start_time = time.perf_counter() try: - # Bake the site sources. - if not ctx.args.assets_only: - success = success & self._bakeSources(ctx, out_dir) - - # Bake the assets. - if not ctx.args.html_only: - success = success & self._bakeAssets(ctx, out_dir) + records = self._doBake(ctx, out_dir) # Show merged stats. if ctx.args.show_stats: logger.info("-------------------") logger.info("Timing information:") - _show_stats(ctx.stats) + _show_stats(records.stats) # All done. logger.info('-------------------------') logger.info(format_timed(start_time, 'done baking')) - return 0 if success else 1 + return 0 if records.success else 1 except Exception as ex: if ctx.app.debug: logger.exception(ex) @@ -93,71 +78,58 @@ logger.error(str(ex)) return 1 - def _bakeSources(self, ctx, out_dir): + def _doBake(self, ctx, out_dir): + from piecrust.baking.baker import Baker + if ctx.args.workers > 0: ctx.app.config.set('baker/workers', ctx.args.workers) if ctx.args.batch_size > 0: ctx.app.config.set('baker/batch_size', ctx.args.batch_size) - baker = Baker( - ctx.app, out_dir, - force=ctx.args.force, - applied_config_variant=ctx.config_variant, - applied_config_values=ctx.config_values) - record = baker.bake() - _merge_stats(record.stats, ctx.stats) - return record.success - def _bakeAssets(self, ctx, out_dir): - proc = ProcessorPipeline( - ctx.app, out_dir, - force=ctx.args.force, - applied_config_variant=ctx.config_variant, - applied_config_values=ctx.config_values) - record = proc.run() - _merge_stats(record.stats, ctx.stats) - return record.success + allowed_pipelines = None + if ctx.args.html_only: + allowed_pipelines = ['page'] + elif ctx.args.assets_only: + allowed_pipelines = ['asset'] + elif ctx.args.pipelines: + allowed_pipelines = ctx.args.pipelines - -def _merge_stats(source, target): - if source is None: - return + baker = Baker( + ctx.appfactory, ctx.app, out_dir, + force=ctx.args.force, + allowed_pipelines=allowed_pipelines) + records = baker.bake() - for name, val in source.items(): - if name not in target: - target[name] = ExecutionStats() - target[name].mergeStats(val) + return records def _show_stats(stats, *, full=False): indent = ' ' - for name in sorted(stats.keys()): - logger.info('%s:' % name) - s = stats[name] - logger.info(' Timers:') - for name, val in sorted(s.timers.items(), key=lambda i: i[1], - reverse=True): - val_str = '%8.1f s' % val - logger.info( - "%s[%s%s%s] %s" % - (indent, Fore.GREEN, val_str, Fore.RESET, name)) + logger.info(' Timers:') + for name, val in sorted(stats.timers.items(), key=lambda i: i[1], + reverse=True): + val_str = '%8.1f s' % val + logger.info( + "%s[%s%s%s] %s" % + (indent, Fore.GREEN, val_str, Fore.RESET, name)) - logger.info(' Counters:') - for name in sorted(s.counters.keys()): - val_str = '%8d ' % s.counters[name] - logger.info( - "%s[%s%s%s] %s" % - (indent, Fore.GREEN, val_str, Fore.RESET, name)) + logger.info(' Counters:') + for name in sorted(stats.counters.keys()): + val_str = '%8d ' % stats.counters[name] + logger.info( + "%s[%s%s%s] %s" % + (indent, Fore.GREEN, val_str, Fore.RESET, name)) - logger.info(' Manifests:') - for name in sorted(s.manifests.keys()): - val = s.manifests[name] - logger.info( - "%s[%s%s%s] [%d entries]" % - (indent, Fore.CYAN, name, Fore.RESET, len(val))) - if full: - for v in val: - logger.info("%s - %s" % (indent, v)) + logger.info(' Manifests:') + for name in sorted(stats.manifests.keys()): + val = stats.manifests[name] + logger.info( + "%s[%s%s%s] [%d entries]" % + (indent, Fore.CYAN, name, Fore.RESET, len(val))) + if full: + for v in val: + logger.info("%s - %s" % (indent, v)) class ShowRecordCommand(ChefCommand): @@ -169,41 +141,47 @@ def setupParser(self, parser, app): parser.add_argument( - '-o', '--output', - help="The output directory for which to show the bake record " - "(defaults to `_counter`)", - nargs='?') + '-o', '--output', + help="The output directory for which to show the bake record " + "(defaults to `_counter`)", + nargs='?') parser.add_argument( - '-p', '--path', - help="A pattern that will be used to filter the relative path " - "of entries to show.") + '-p', '--path', + help="A pattern that will be used to filter the relative path " + "of entries to show.") parser.add_argument( - '-t', '--out', - help="A pattern that will be used to filter the output path " - "of entries to show.") + '-t', '--out', + help="A pattern that will be used to filter the output path " + "of entries to show.") parser.add_argument( - '--last', - type=int, - default=0, - help="Show the last Nth bake record.") + '--last', + type=int, + default=0, + help="Show the last Nth bake record.") parser.add_argument( - '--html-only', - action='store_true', - help="Only show records for pages (not from the asset " - "pipeline).") + '--html-only', + action='store_true', + help="Only show records for pages (not from the asset " + "pipeline).") parser.add_argument( - '--assets-only', - action='store_true', - help="Only show records for assets (not from pages).") + '--assets-only', + action='store_true', + help="Only show records for assets (not from pages).") parser.add_argument( - '--show-stats', - action='store_true', - help="Show stats from the record.") + '--show-stats', + action='store_true', + help="Show stats from the record.") parser.add_argument( - '--show-manifest', - help="Show manifest entries from the record.") + '--show-manifest', + help="Show manifest entries from the record.") def run(self, ctx): + from piecrust.processing.records import ( + FLAG_PREPARED, FLAG_PROCESSED, FLAG_BYPASSED_STRUCTURED_PROCESSING, + FLAG_COLLAPSED_FROM_LAST_RUN) + from piecrust.rendering import ( + PASS_FORMATTING, PASS_RENDERING) + out_dir = ctx.args.output or os.path.join(ctx.app.root_dir, '_counter') record_id = hashlib.md5(out_dir.encode('utf8')).hexdigest() suffix = '' if ctx.args.last == 0 else '.%d' % ctx.args.last @@ -220,10 +198,10 @@ if not ctx.args.show_stats and not ctx.args.show_manifest: if not ctx.args.assets_only: self._showBakeRecord( - ctx, record_name, pattern, out_pattern) + ctx, record_name, pattern, out_pattern) if not ctx.args.html_only: self._showProcessingRecord( - ctx, record_name, pattern, out_pattern) + ctx, record_name, pattern, out_pattern) return stats = {} @@ -250,8 +228,6 @@ for v in val: logger.info(" - %s" % v) - - def _getBakeRecord(self, ctx, record_name): record_cache = ctx.app.cache.getCache('baker') if not record_cache.has(record_name): @@ -286,11 +262,11 @@ continue flags = _get_flag_descriptions( - entry.flags, - { - BakeRecordEntry.FLAG_NEW: 'new', - BakeRecordEntry.FLAG_SOURCE_MODIFIED: 'modified', - BakeRecordEntry.FLAG_OVERRIDEN: 'overriden'}) + entry.flags, + { + BakeRecordEntry.FLAG_NEW: 'new', + BakeRecordEntry.FLAG_SOURCE_MODIFIED: 'modified', + BakeRecordEntry.FLAG_OVERRIDEN: 'overriden'}) logging.info(" - ") @@ -308,17 +284,17 @@ logging.info(" %d sub-pages:" % len(entry.subs)) for sub in entry.subs: sub_flags = _get_flag_descriptions( - sub.flags, - { - SubPageBakeInfo.FLAG_BAKED: 'baked', - SubPageBakeInfo.FLAG_FORCED_BY_SOURCE: - 'forced by source', - SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS: - 'forced by missing previous record entry', - SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS: - 'forced by previous errors', - SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED: - 'formatting invalidated'}) + sub.flags, + { + SubPageBakeInfo.FLAG_BAKED: 'baked', + SubPageBakeInfo.FLAG_FORCED_BY_SOURCE: + 'forced by source', + SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS: + 'forced by missing previous record entry', + SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS: + 'forced by previous errors', + SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED: + 'formatting invalidated'}) logging.info(" - ") logging.info(" URL: %s" % sub.out_uri)
--- a/piecrust/commands/builtin/info.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/commands/builtin/info.py Wed May 17 00:11:48 2017 -0700 @@ -3,6 +3,7 @@ import fnmatch from piecrust.commands.base import ChefCommand from piecrust.configuration import ConfigurationDumper +from piecrust.sources.fs import FSContentSourceBase logger = logging.getLogger(__name__) @@ -29,9 +30,9 @@ def setupParser(self, parser, app): parser.add_argument( - 'path', - help="The path to a config section or value", - nargs='?') + 'path', + help="The path to a config section or value", + nargs='?') def run(self, ctx): if ctx.args.path: @@ -65,7 +66,11 @@ for src in ctx.app.sources: logger.info("%s:" % src.name) logger.info(" type: %s" % src.config.get('type')) - logger.info(" class: %s" % type(src)) + logger.debug(" class: %s" % type(src)) + desc = src.describe() + if isinstance(desc, dict): + for k, v in desc.items(): + logger.info(" %s: %s" % (k, v)) class ShowRoutesCommand(ChefCommand): @@ -81,7 +86,6 @@ for route in ctx.app.routes: logger.info("%s:" % route.uri_pattern) logger.info(" source: %s" % (route.source_name or '')) - logger.info(" generator: %s" % (route.generator_name or '')) logger.info(" regex: %s" % route.uri_re.pattern) logger.info(" function: %s(%s)" % ( route.func_name, @@ -118,29 +122,29 @@ def setupParser(self, parser, app): parser.add_argument( - 'pattern', - help="The pattern to match with page filenames", - nargs='?') + 'pattern', + help="The pattern to match with page filenames", + nargs='?') parser.add_argument( - '-n', '--name', - help="Limit the search to sources matching this name") + '-n', '--name', + help="Limit the search to sources matching this name") parser.add_argument( - '--full-path', - help="Return full paths instead of root-relative paths", - action='store_true') + '--full-path', + help="Return full paths instead of root-relative paths", + action='store_true') parser.add_argument( - '--metadata', - help="Return metadata about the page instead of just the path", - action='store_true') + '--metadata', + help="Return metadata about the page instead of just the path", + action='store_true') parser.add_argument( - '--include-theme', - help="Include theme pages to the search", - action='store_true') + '--include-theme', + help="Include theme pages to the search", + action='store_true') parser.add_argument( - '--exact', - help=("Match the exact given pattern, instead of any page " - "containing the pattern"), - action='store_true') + '--exact', + help=("Match the exact given pattern, instead of any page " + "containing the pattern"), + action='store_true') def run(self, ctx): pattern = ctx.args.pattern @@ -154,17 +158,28 @@ if ctx.args.name and not fnmatch.fnmatch(src.name, ctx.args.name): continue - page_facs = src.getPageFactories() - for pf in page_facs: - name = os.path.relpath(pf.path, ctx.app.root_dir) - if pattern is None or fnmatch.fnmatch(name, pattern): - if ctx.args.full_path: - name = pf.path - if ctx.args.metadata: - logger.info("path:%s" % pf.path) - for key, val in pf.metadata.items(): - logger.info("%s:%s" % (key, val)) - logger.info("---") + is_fs_src = isinstance(src, FSContentSourceBase) + items = src.getAllContents() + for item in items: + if ctx.args.metadata: + logger.info("spec:%s" % item.spec) + for key, val in item.metadata.items(): + logger.info("%s:%s" % (key, val)) + logger.info("---") + else: + if is_fs_src: + name = os.path.relpath(item.spec, ctx.app.root_dir) + if pattern is None or fnmatch.fnmatch(name, pattern): + if ctx.args.metadata: + logger.info("path:%s" % item.spec) + for key, val in item.metadata.items(): + logger.info("%s:%s" % (key, val)) + logger.info("---") + else: + if ctx.args.full_path: + name = item.spec + logger.info(name) else: - logger.info(name) + if pattern is None or fnmatch.fnmatch(name, pattern): + logger.info(item.spec)
--- a/piecrust/commands/builtin/scaffolding.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/commands/builtin/scaffolding.py Wed May 17 00:11:48 2017 -0700 @@ -1,6 +1,5 @@ import os import os.path -import re import io import time import glob @@ -9,19 +8,13 @@ from piecrust import RESOURCES_DIR from piecrust.chefutil import print_help_item from piecrust.commands.base import ExtendableChefCommand, ChefCommandExtension -from piecrust.sources.base import MODE_CREATING -from piecrust.sources.interfaces import IPreparingSource -from piecrust.uriutil import multi_replace +from piecrust.pathutil import SiteNotFoundError +from piecrust.sources.fs import FSContentSourceBase logger = logging.getLogger(__name__) -def make_title(slug): - slug = re.sub(r'[\-_]', ' ', slug) - return slug.title() - - class PrepareCommand(ExtendableChefCommand): """ Chef command for creating pages with some default content. """ @@ -36,6 +29,8 @@ if app.root_dir is None: return + from piecrust.sources.interfaces import IPreparingSource + subparsers = parser.add_subparsers() for src in app.sources: if not isinstance(src, IPreparingSource): @@ -47,14 +42,16 @@ "source." % src.name) continue p = subparsers.add_parser( - src.item_name, - help=("Creates an empty page in the '%s' source." % - src.name)) + src.config['item_name'], + help=("Creates an empty page in the '%s' source." % + src.name)) src.setupPrepareParser(p, app) p.add_argument('-t', '--template', default='default', help="The template to use, which will change the " - "generated text and header. Run `chef help " - "scaffolding` for more information.") + "generated text and header. Run `chef help " + "scaffolding` for more information.") + p.add_argument('-f', '--force', action='store_true', + help="Overwrite any existing content.") p.set_defaults(source=src) p.set_defaults(sub_func=self._doRun) @@ -68,60 +65,55 @@ ctx.args.sub_func(ctx) def _doRun(self, ctx): + from piecrust.uriutil import multi_replace + if not hasattr(ctx.args, 'source'): raise Exception("No source specified. " "Please run `chef prepare -h` for usage.") app = ctx.app - source = ctx.args.source - metadata = source.buildMetadata(ctx.args) - factory = source.findPageFactory(metadata, MODE_CREATING) - path = factory.path - name, ext = os.path.splitext(path) - if ext == '.*': - path = '%s.%s' % ( - name, - app.config.get('site/default_auto_format')) - if os.path.exists(path): - raise Exception("'%s' already exists." % path) - tpl_name = ctx.args.template extensions = self.getExtensions(app) ext = next( - filter( - lambda e: tpl_name in e.getTemplateNames(ctx.app), - extensions), - None) + filter( + lambda e: tpl_name in e.getTemplateNames(app), + extensions), + None) if ext is None: raise Exception("No such page template: %s" % tpl_name) - - tpl_text = ext.getTemplate(ctx.app, tpl_name) + tpl_text = ext.getTemplate(app, tpl_name) if tpl_text is None: raise Exception("Error loading template: %s" % tpl_name) - title = (metadata.get('slug') or metadata.get('path') or - 'Untitled page') - title = make_title(title) - tokens = { - '%title%': title, - '%time.today%': time.strftime('%Y/%m/%d'), - '%time.now%': time.strftime('%H:%M:%S')} - tpl_text = multi_replace(tpl_text, tokens) + + source = ctx.args.source + content_item = source.createContent(ctx.args) - logger.info("Creating page: %s" % os.path.relpath(path, app.root_dir)) - if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path), 0o755) + config_tokens = { + '%title%': "Untitled Content", + '%time.today%': time.strftime('%Y/%m/%d'), + '%time.now%': time.strftime('%H:%M:%S') + } + config = content_item.metadata.get('config') + if config: + for k, v in config.items(): + config_tokens['%%%s%%' % k] = v + tpl_text = multi_replace(tpl_text, config_tokens) - with open(path, 'w') as f: + logger.info("Creating content: %s" % content_item.spec) + mode = 'w' if ctx.args.force else 'x' + with content_item.open(mode) as f: f.write(tpl_text) + # If this was a file-system content item, see if we need to auto-open + # an editor on it. editor = ctx.app.config.get('prepare/editor') editor_type = ctx.app.config.get('prepare/editor_type', 'exe') - if editor: + if editor and isinstance(source, FSContentSourceBase): import shlex shell = False - args = '%s "%s"' % (editor, path) + args = '%s "%s"' % (editor, content_item.spec) if '%path%' in editor: - args = editor.replace('%path%', path) + args = editor.replace('%path%', content_item.spec) if editor_type.lower() == 'shell': shell = True @@ -146,9 +138,9 @@ def getTemplateDescription(self, app, name): descs = { - 'default': "The default template, for a simple page.", - 'rss': "A fully functional RSS feed.", - 'atom': "A fully functional Atom feed."} + 'default': "The default template, for a simple page.", + 'rss': "A fully functional RSS feed.", + 'atom': "A fully functional Atom feed."} return descs[name] def getTemplate(self, app, name): @@ -189,7 +181,7 @@ raise Exception("No such page scaffolding template: %s" % name) if len(matches) > 1: raise Exception( - "More than one scaffolding template has name: %s" % name) + "More than one scaffolding template has name: %s" % name) with open(matches[0], 'r', encoding='utf8') as fp: return fp.read() @@ -214,16 +206,16 @@ help_list = tplh.getvalue() help_txt = ( - textwrap.fill( - "Running the 'prepare' command will let " - "PieCrust setup a page for you in the correct place, with " - "some hopefully useful default text.") + - "\n\n" + - textwrap.fill("The following templates are available:") + - "\n\n" + - help_list + - "\n" + - "You can add user-defined templates by creating pages in a " - "`scaffold/pages` sub-directory in your website.") + textwrap.fill( + "Running the 'prepare' command will let " + "PieCrust setup a page for you in the correct place, with " + "some hopefully useful default text.") + + "\n\n" + + textwrap.fill("The following templates are available:") + + "\n\n" + + help_list + + "\n" + + "You can add user-defined templates by creating pages in a " + "`scaffold/pages` sub-directory in your website.") return help_txt
--- a/piecrust/commands/builtin/serving.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/commands/builtin/serving.py Wed May 17 00:11:48 2017 -0700 @@ -1,6 +1,5 @@ import logging from piecrust.commands.base import ChefCommand -from piecrust.serving.wrappers import run_werkzeug_server, run_gunicorn_server logger = logging.getLogger(__name__) @@ -15,54 +14,46 @@ def setupParser(self, parser, app): parser.add_argument( - '-p', '--port', - help="The port for the web server", - default=8080) + '-p', '--port', + help="The port for the web server", + default=8080) parser.add_argument( - '-a', '--address', - help="The host for the web server", - default='localhost') + '-a', '--address', + help="The host for the web server", + default='localhost') parser.add_argument( - '--use-reloader', - help="Restart the server when PieCrust code changes", - action='store_true') + '--use-reloader', + help="Restart the server when PieCrust code changes", + action='store_true') parser.add_argument( - '--use-debugger', - help="Show the debugger when an error occurs", - action='store_true') + '--use-debugger', + help="Show the debugger when an error occurs", + action='store_true') parser.add_argument( - '--wsgi', - help="The WSGI server implementation to use", - choices=['werkzeug', 'gunicorn'], - default='werkzeug') + '--wsgi', + help="The WSGI server implementation to use", + choices=['werkzeug', 'gunicorn'], + default='werkzeug') def run(self, ctx): - root_dir = ctx.app.root_dir host = ctx.args.address port = int(ctx.args.port) debug = ctx.args.debug or ctx.args.use_debugger - - from piecrust.app import PieCrustFactory - appfactory = PieCrustFactory( - ctx.app.root_dir, - cache=ctx.app.cache.enabled, - cache_key=ctx.app.cache_key, - config_variant=ctx.config_variant, - config_values=ctx.config_values, - debug=ctx.app.debug, - theme_site=ctx.app.theme_site) + appfactory = ctx.appfactory if ctx.args.wsgi == 'werkzeug': + from piecrust.serving.wrappers import run_werkzeug_server run_werkzeug_server( - appfactory, host, port, - use_debugger=debug, - use_reloader=ctx.args.use_reloader) + appfactory, host, port, + use_debugger=debug, + use_reloader=ctx.args.use_reloader) elif ctx.args.wsgi == 'gunicorn': + from piecrust.serving.wrappers import run_gunicorn_server options = { - 'bind': '%s:%s' % (host, port), - 'accesslog': '-', # print access log to stderr - } + 'bind': '%s:%s' % (host, port), + 'accesslog': '-', # print access log to stderr + } if debug: options['loglevel'] = 'debug' if ctx.args.use_reloader:
--- a/piecrust/configuration.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/configuration.py Wed May 17 00:11:48 2017 -0700 @@ -73,7 +73,7 @@ other_values = other._values else: raise Exception( - "Unsupported value type to merge: %s" % type(other)) + "Unsupported value type to merge: %s" % type(other)) merge_dicts(self._values, other_values, validator=self._validateValue) @@ -96,7 +96,7 @@ return if not isinstance(v, allowed_types): raise ConfigurationError( - "Value '%s' is of forbidden type: %s" % (v, type(v))) + "Value '%s' is of forbidden type: %s" % (v, type(v))) if isinstance(v, dict): self._validateDictTypesRecursive(v, allowed_types) elif isinstance(v, list): @@ -223,7 +223,7 @@ header_regex = re.compile( - r'(---\s*\n)(?P<header>(.*\n)*?)^(---\s*\n)', re.MULTILINE) + r'(---\s*\n)(?P<header>(.*\n)*?)^(---\s*\n)', re.MULTILINE) def parse_config_header(text): @@ -239,17 +239,18 @@ class ConfigurationLoader(SafeLoader): - """ A YAML loader that loads mappings into ordered dictionaries. + """ A YAML loader that loads mappings into ordered dictionaries, + and supports sexagesimal notations for timestamps. """ def __init__(self, *args, **kwargs): super(ConfigurationLoader, self).__init__(*args, **kwargs) self.add_constructor('tag:yaml.org,2002:map', - type(self).construct_yaml_map) + type(self).construct_yaml_map) self.add_constructor('tag:yaml.org,2002:omap', - type(self).construct_yaml_map) + type(self).construct_yaml_map) self.add_constructor('tag:yaml.org,2002:sexagesimal', - type(self).construct_yaml_time) + type(self).construct_yaml_time) def construct_yaml_map(self, node): data = collections.OrderedDict() @@ -259,21 +260,23 @@ def construct_mapping(self, node, deep=False): if not isinstance(node, yaml.MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) + raise ConstructorError( + None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) mapping = collections.OrderedDict() for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) if not isinstance(key, collections.Hashable): - raise ConstructorError("while constructing a mapping", node.start_mark, - "found unhashable key", key_node.start_mark) + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) value = self.construct_object(value_node, deep=deep) mapping[key] = value return mapping time_regexp = re.compile( - r'''^(?P<hour>[0-9][0-9]?) + r'''^(?P<hour>[0-9][0-9]?) :(?P<minute>[0-9][0-9]) (:(?P<second>[0-9][0-9]) (\.(?P<fraction>[0-9]+))?)?$''', re.X) @@ -294,10 +297,10 @@ ConfigurationLoader.add_implicit_resolver( - 'tag:yaml.org,2002:sexagesimal', - re.compile(r'''^[0-9][0-9]?:[0-9][0-9] + 'tag:yaml.org,2002:sexagesimal', + re.compile(r'''^[0-9][0-9]?:[0-9][0-9] (:[0-9][0-9](\.[0-9]+)?)?$''', re.X), - list('0123456789')) + list('0123456789')) # We need to add our `sexagesimal` resolver before the `int` one, which @@ -319,5 +322,5 @@ ConfigurationDumper.add_representer(collections.OrderedDict, - ConfigurationDumper.represent_ordered_dict) + ConfigurationDumper.represent_ordered_dict)
--- a/piecrust/data/assetor.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/data/assetor.py Wed May 17 00:11:48 2017 -0700 @@ -3,6 +3,7 @@ import shutil import logging from piecrust import ASSET_DIR_SUFFIX +from piecrust.sources.base import REL_ASSETS from piecrust.uriutil import multi_replace @@ -14,7 +15,7 @@ def build_base_url(app, uri, rel_assets_path): - base_url_format = app.env.base_asset_url_format + base_url_format = app.config.get('site/base_asset_url_format') rel_assets_path = rel_assets_path.replace('\\', '/') # Remove any extension since we'll be copying assets into the 1st @@ -24,15 +25,20 @@ uri, _ = os.path.splitext(uri) base_url = multi_replace( - base_url_format, - { - '%path%': rel_assets_path, - '%uri%': uri}) + base_url_format, + { + '%path%': rel_assets_path, + '%uri%': uri}) return base_url.rstrip('/') + '/' -class AssetorBase(object): +class Assetor(object): + debug_render_doc = """Helps render URLs to files in the current page's + asset folder.""" + debug_render = [] + debug_render_dynamic = ['_debugRenderAssetNames'] + def __init__(self, page, uri): self._page = page self._uri = uri @@ -65,44 +71,22 @@ if self._cache is not None: return - self._cache = dict(self.findAssets()) - - def findAssets(self): - raise NotImplementedError() - - def copyAssets(self, dest_dir): - raise NotImplementedError() - -class Assetor(AssetorBase): - debug_render_doc = """Helps render URLs to files in the current page's - asset folder.""" - debug_render = [] - debug_render_dynamic = ['_debugRenderAssetNames'] + self._cache = self.findAssets() or {} def findAssets(self): - assets = {} - name, ext = os.path.splitext(self._page.path) - assets_dir = name + ASSET_DIR_SUFFIX - if not os.path.isdir(assets_dir): - return assets + content_item = self._page.content_item + source = content_item.source + assets = source.getRelatedContent(content_item, REL_ASSETS) + if assets is None: + return {} - rel_assets_dir = os.path.relpath(assets_dir, self._page.app.root_dir) - base_url = build_base_url(self._page.app, self._uri, rel_assets_dir) - for fn in os.listdir(assets_dir): - full_fn = os.path.join(assets_dir, fn) - if not os.path.isfile(full_fn): - raise Exception("Skipping: %s" % full_fn) - continue + app = source.app + stack = app.env.render_ctx_stack + cur_ctx = stack.current_ctx + if cur_ctx is not None: + cur_ctx.current_pass_info.used_assets = True - name, ext = os.path.splitext(fn) - if name in assets: - raise UnsupportedAssetsError( - "Multiple asset files are named '%s'." % name) - assets[name] = (base_url + fn, full_fn) - - cpi = self._page.app.env.exec_info_stack.current_page_info - if cpi is not None: - cpi.render_ctx.current_pass_info.used_assets = True + # base_url = build_base_url(app, self._uri, rel_assets_dir) return assets @@ -115,3 +99,4 @@ dest_ap = os.path.join(dest_dir, fn) logger.debug(" %s -> %s" % (full_fn, dest_ap)) shutil.copy(full_fn, dest_ap) +
--- a/piecrust/data/builder.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/data/builder.py Wed May 17 00:11:48 2017 -0700 @@ -1,53 +1,43 @@ import logging -from werkzeug.utils import cached_property from piecrust.data.base import MergedMapping from piecrust.data.linker import PageLinkerData from piecrust.data.pagedata import PageData from piecrust.data.paginator import Paginator from piecrust.data.piecrustdata import PieCrustData from piecrust.data.providersdata import DataProvidersData -from piecrust.routing import CompositeRouteFunction +from piecrust.routing import RouteFunction logger = logging.getLogger(__name__) class DataBuildingContext(object): - def __init__(self, qualified_page, page_num=1): - self.page = qualified_page - self.page_num = page_num + def __init__(self, qualified_page): + self.qualified_page = qualified_page self.pagination_source = None self.pagination_filter = None - @property - def app(self): - return self.page.app - - @cached_property - def uri(self): - return self.page.getUri(self.page_num) - def build_page_data(ctx): - app = ctx.app - page = ctx.page + qpage = ctx.qualified_page + page = qpage.page + app = page.app pgn_source = ctx.pagination_source or get_default_pagination_source(page) first_uri = ctx.page.getUri(1) pc_data = PieCrustData() config_data = PageData(page, ctx) - paginator = Paginator(page, pgn_source, - page_num=ctx.page_num, + paginator = Paginator(qpage, pgn_source, pgn_filter=ctx.pagination_filter) assetor = page.source.buildAssetor(page, first_uri) linker = PageLinkerData(page.source, page.rel_path) data = { - 'piecrust': pc_data, - 'page': config_data, - 'assets': assetor, - 'pagination': paginator, - 'family': linker - } + 'piecrust': pc_data, + 'page': config_data, + 'assets': assetor, + 'pagination': paginator, + 'family': linker + } for route in app.routes: name = route.func_name @@ -56,17 +46,13 @@ func = data.get(name) if func is None: - func = CompositeRouteFunction() - func.addFunc(route) - data[name] = func - elif isinstance(func, CompositeRouteFunction): - func.addFunc(route) + data[name] = RouteFunction(route) else: raise Exception("Route function '%s' collides with an " "existing function or template data." % name) - #TODO: handle slugified taxonomy terms. + # TODO: handle slugified taxonomy terms. site_data = app.config.getAll() providers_data = DataProvidersData(page) @@ -81,7 +67,7 @@ return data -def build_layout_data(page, page_data, contents): +def add_layout_data(page_data, contents): for name, txt in contents.items(): if name in page_data: logger.warning("Content segment '%s' will hide existing data." %
--- a/piecrust/data/iterators.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/data/iterators.py Wed May 17 00:11:48 2017 -0700 @@ -1,8 +1,7 @@ import logging -from piecrust.data.filters import PaginationFilter, IsFilterClause, NotClause -from piecrust.environment import AbortedSourceUseError +from piecrust.data.filters import PaginationFilter from piecrust.events import Event -from piecrust.sources.base import PageSource +from piecrust.sources.base import ContentSource, AbortedSourceUseError from piecrust.sources.interfaces import IPaginationSource @@ -146,20 +145,6 @@ if src_it is not None: self._pages = src_it - # If we're currently baking, apply the default baker filter - # to exclude things like draft posts. - if (isinstance(source, PageSource) and - source.app.config.get('baker/is_baking')): - setting_name = source.app.config.get('baker/no_bake_setting', - 'draft') - accessor = self._getSettingAccessor() - draft_filter = PaginationFilter(accessor) - draft_filter.root_clause = NotClause() - draft_filter.root_clause.addClause( - IsFilterClause(setting_name, True)) - self._simpleNonSortedWrap( - PaginationFilterIterator, draft_filter) - # Apply any filter first, before we start sorting or slicing. if pagination_filter is not None: self._simpleNonSortedWrap(PaginationFilterIterator, @@ -325,7 +310,7 @@ if (self._current_page is not None and self._current_page.app.env.abort_source_use and - isinstance(self._source, PageSource)): + isinstance(self._source, ContentSource)): logger.debug("Aborting iteration from %s." % self._current_page.ref_spec) raise AbortedSourceUseError()
--- a/piecrust/data/linker.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/data/linker.py Wed May 17 00:11:48 2017 -0700 @@ -3,7 +3,7 @@ from piecrust.data.iterators import PageIterator from piecrust.data.pagedata import LazyPageConfigLoaderHasNoValue from piecrust.data.paginationdata import PaginationData -from piecrust.sources.interfaces import IPaginationSource, IListableSource +from piecrust.sources.interfaces import IPaginationSource logger = logging.getLogger(__name__) @@ -11,17 +11,17 @@ class PageLinkerData(object): """ Entry template data to get access to related pages from a given - root page. + root page. """ debug_render = ['parent', 'ancestors', 'siblings', 'children', 'root', 'forpath'] debug_render_invoke = ['parent', 'ancestors', 'siblings', 'children', 'root'] debug_render_redirect = { - 'ancestors': '_debugRenderAncestors', - 'siblings': '_debugRenderSiblings', - 'children': '_debugRenderChildren', - 'root': '_debugRenderRoot'} + 'ancestors': '_debugRenderAncestors', + 'siblings': '_debugRenderSiblings', + 'children': '_debugRenderChildren', + 'root': '_debugRenderRoot'} def __init__(self, source, page_path): self._source = source @@ -82,10 +82,6 @@ self._is_loaded = True - is_listable = isinstance(self._source, IListableSource) - if not is_listable: - return - dir_path = self._source.getDirpath(self._root_page_path) self._linker = Linker(self._source, dir_path, root_page_path=self._root_page_path) @@ -260,8 +256,8 @@ item = _LinkedPage(parent_page) item._linker_info.name = parent_name item._linker_info.child_linker = Linker( - self._source, parent_dir_path, - root_page_path=self._root_page_path) + self._source, parent_dir_path, + root_page_path=self._root_page_path) self._parent = LinkedPageData(item) break else: @@ -308,10 +304,6 @@ if self._items is not None: return - is_listable = isinstance(self._source, IListableSource) - if not is_listable: - raise Exception("Source '%s' can't be listed." % self._source.name) - items = list(self._source.listPath(self._dir_path)) self._items = collections.OrderedDict() for is_dir, name, data in items:
--- a/piecrust/data/pagedata.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/data/pagedata.py Wed May 17 00:11:48 2017 -0700 @@ -75,8 +75,8 @@ except Exception as ex: logger.exception(ex) raise Exception( - "Error while loading attribute '%s' for: %s" % - (name, self._page.rel_path)) from ex + "Error while loading attribute '%s' for: %s" % + (name, self._page.rel_path)) from ex # Forget this loader now that it served its purpose. try: @@ -95,8 +95,8 @@ except Exception as ex: logger.exception(ex) raise Exception( - "Error while loading attribute '%s' for: %s" % - (name, self._page.rel_path)) from ex + "Error while loading attribute '%s' for: %s" % + (name, self._page.rel_path)) from ex # We always keep the wildcard loader in the loaders list. return self._values[name] @@ -116,12 +116,14 @@ if not override_existing and attr_name in self._loaders: raise Exception( - "A loader has already been mapped for: %s" % attr_name) + "A loader has already been mapped for: %s" % attr_name) self._loaders[attr_name] = loader def _mapValue(self, attr_name, value, override_existing=False): - loader = lambda _, __: value - self._mapLoader(attr_name, loader, override_existing=override_existing) + self._mapLoader( + attr_name, + lambda _, __: value, + override_existing=override_existing) def _ensureLoaded(self): if self._is_loaded: @@ -133,8 +135,8 @@ except Exception as ex: logger.exception(ex) raise Exception( - "Error while loading data for: %s" % - self._page.rel_path) from ex + "Error while loading data for: %s" % + self._page.rel_path) from ex def _load(self): pass @@ -152,7 +154,7 @@ """ Template data for a page. """ def __init__(self, page, ctx): - super(PageData, self).__init__(page) + super().__init__(page) self._ctx = ctx def _load(self):
--- a/piecrust/data/paginationdata.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/data/paginationdata.py Wed May 17 00:11:48 2017 -0700 @@ -1,42 +1,28 @@ import time import logging -from piecrust.data.assetor import Assetor from piecrust.data.pagedata import LazyPageConfigData -from piecrust.routing import create_route_metadata -from piecrust.uriutil import split_uri logger = logging.getLogger(__name__) class PaginationData(LazyPageConfigData): - def __init__(self, page): - super(PaginationData, self).__init__(page) - self._route = None - self._route_metadata = None - - def _get_uri(self): - page = self._page - if self._route is None: - # TODO: this is not quite correct, as we're missing parts of the - # route metadata if the current page is a taxonomy page. - route_metadata = create_route_metadata(page) - self._route = page.app.getSourceRoute(page.source.name, route_metadata) - self._route_metadata = route_metadata - if self._route is None: - raise Exception("Can't get route for page: %s" % page.path) - return self._route.getUri(self._route_metadata) + def __init__(self, qualified_page): + super(PaginationData, self).__init__(qualified_page.page) + self._qualified_page = qualified_page def _load(self): + from piecrust.uriutil import split_uri + page = self._page dt = page.datetime - page_url = self._get_uri() + page_url = self._qualified_page.uri _, slug = split_uri(page.app, page_url) self._setValue('url', page_url) self._setValue('slug', slug) self._setValue( - 'timestamp', - time.mktime(page.datetime.timetuple())) + 'timestamp', + time.mktime(page.datetime.timetuple())) self._setValue('datetime', { 'year': dt.year, 'month': dt.month, 'day': dt.day, 'hour': dt.hour, 'minute': dt.minute, 'second': dt.second}) @@ -54,8 +40,8 @@ def _load_rendered_segment(self, data, name): do_render = True - eis = self._page.app.env.exec_info_stack - if eis is not None and eis.hasPage(self._page): + stack = self._page.app.env.render_ctx_stack + if stack.hasPage(self._page): # This is the pagination data for the page that is currently # being rendered! Inception! But this is possible... so just # prevent infinite recursion. @@ -64,20 +50,17 @@ assert self is data if do_render: - uri = self._get_uri() + uri = self._qualified_page.uri try: from piecrust.rendering import ( - QualifiedPage, PageRenderingContext, - render_page_segments) - qp = QualifiedPage(self._page, self._route, - self._route_metadata) - ctx = PageRenderingContext(qp) + RenderingContext, render_page_segments) + ctx = RenderingContext(self._qualified_page) render_result = render_page_segments(ctx) segs = render_result.segments except Exception as ex: logger.exception(ex) raise Exception( - "Error rendering segments for '%s'" % uri) from ex + "Error rendering segments for '%s'" % uri) from ex else: segs = {} for name in self._page.config.get('segments'):
--- a/piecrust/data/paginator.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/data/paginator.py Wed May 17 00:11:48 2017 -0700 @@ -11,23 +11,23 @@ class Paginator(object): debug_render = [ - 'has_more', 'items', 'has_items', 'items_per_page', - 'items_this_page', 'prev_page_number', 'this_page_number', - 'next_page_number', 'prev_page', 'next_page', - 'total_item_count', 'total_page_count', - 'next_item', 'prev_item'] + 'has_more', 'items', 'has_items', 'items_per_page', + 'items_this_page', 'prev_page_number', 'this_page_number', + 'next_page_number', 'prev_page', 'next_page', + 'total_item_count', 'total_page_count', + 'next_item', 'prev_item'] debug_render_invoke = [ - 'has_more', 'items', 'has_items', 'items_per_page', - 'items_this_page', 'prev_page_number', 'this_page_number', - 'next_page_number', 'prev_page', 'next_page', - 'total_item_count', 'total_page_count', - 'next_item', 'prev_item'] + 'has_more', 'items', 'has_items', 'items_per_page', + 'items_this_page', 'prev_page_number', 'this_page_number', + 'next_page_number', 'prev_page', 'next_page', + 'total_item_count', 'total_page_count', + 'next_item', 'prev_item'] def __init__(self, qualified_page, source, *, - page_num=1, pgn_filter=None, items_per_page=-1): + pgn_filter=None, items_per_page=-1): self._parent_page = qualified_page + self._page_num = qualified_page.page_num self._source = source - self._page_num = page_num self._iterator = None self._pgn_filter = pgn_filter self._items_per_page = items_per_page @@ -89,7 +89,7 @@ if self._items_per_page > 0: return self._items_per_page if self._parent_page: - ipp = self._parent_page.config.get('items_per_page') + ipp = self._parent_page.page.config.get('items_per_page') if ipp is not None: return ipp if isinstance(self._source, IPaginationSource): @@ -195,11 +195,11 @@ if self._parent_page: current_page = self._parent_page.page self._iterator = PageIterator( - self._source, - current_page=current_page, - pagination_filter=pag_filter, - offset=offset, limit=self.items_per_page, - locked=True) + self._source, + current_page=current_page, + pagination_filter=pag_filter, + offset=offset, limit=self.items_per_page, + locked=True) self._iterator._iter_event += self._onIteration def _getPaginationFilter(self):
--- a/piecrust/environment.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/environment.py Wed May 17 00:11:48 2017 -0700 @@ -1,68 +1,23 @@ import time import logging import contextlib -from piecrust.cache import MemCache logger = logging.getLogger(__name__) -class AbortedSourceUseError(Exception): - pass - - -class ExecutionInfo(object): - def __init__(self, page, render_ctx): - self.page = page - self.render_ctx = render_ctx - self.was_cache_valid = False - self.start_time = time.perf_counter() - - -class ExecutionInfoStack(object): - def __init__(self): - self._page_stack = [] - - @property - def current_page_info(self): - if len(self._page_stack) == 0: - return None - return self._page_stack[-1] - - @property - def is_main_page(self): - return len(self._page_stack) == 1 - - def hasPage(self, page): - for ei in self._page_stack: - if ei.page == page: - return True - return False - - def pushPage(self, page, render_ctx): - if len(self._page_stack) > 0: - top = self._page_stack[-1] - assert top.page is not page - self._page_stack.append(ExecutionInfo(page, render_ctx)) - - def popPage(self): - del self._page_stack[-1] - - def clear(self): - self._page_stack = [] - - -class ExecutionStats(object): +class ExecutionStats: def __init__(self): self.timers = {} self.counters = {} self.manifests = {} - def registerTimer(self, category, *, raise_if_registered=True): + def registerTimer(self, category, *, + raise_if_registered=True, time=0): if raise_if_registered and category in self.timers: raise Exception("Timer '%s' has already been registered." % category) - self.timers[category] = 0 + self.timers[category] = time @contextlib.contextmanager def timerScope(self, category): @@ -106,83 +61,42 @@ self.manifests[oc] = v + ov -class Environment(object): +class Environment: def __init__(self): + from piecrust.cache import MemCache + from piecrust.rendering import RenderingContextStack + self.app = None self.start_time = None - self.exec_info_stack = ExecutionInfoStack() self.was_cache_cleaned = False - self.base_asset_url_format = '%uri%' self.page_repository = MemCache() self.rendered_segments_repository = MemCache() - self.fs_caches = { - 'renders': self.rendered_segments_repository} + self.render_ctx_stack = RenderingContextStack() self.fs_cache_only_for_main_page = False self.abort_source_use = False self._default_layout_extensions = None self._stats = ExecutionStats() @property - def default_layout_extensions(self): - if self._default_layout_extensions is not None: - return self._default_layout_extensions - - if self.app is None: - raise Exception("This environment has not been initialized yet.") - - from piecrust.rendering import get_template_engine - dte = get_template_engine(self.app, None) - self._default_layout_extensions = ['.' + e.lstrip('.') - for e in dte.EXTENSIONS] - return self._default_layout_extensions + def stats(self): + return self._stats def initialize(self, app): self.app = app self.start_time = time.perf_counter() - self.exec_info_stack.clear() - self.was_cache_cleaned = False - self.base_asset_url_format = '%uri%' - for name, repo in self.fs_caches.items(): - cache = app.cache.getCache(name) - repo.fs_cache = cache - - def registerTimer(self, category, *, raise_if_registered=True): - self._stats.registerTimer( - category, raise_if_registered=raise_if_registered) - - def timerScope(self, category): - return self._stats.timerScope(category) - - def stepTimer(self, category, value): - self._stats.stepTimer(category, value) + self.rendered_segments_repository.fs_cache = \ + app.cache.getCache('renders') - def stepTimerSince(self, category, since): - self._stats.stepTimerSince(category, since) - - def registerCounter(self, category, *, raise_if_registered=True): - self._stats.registerCounter( - category, raise_if_registered=raise_if_registered) - - def stepCounter(self, category, inc=1): - self._stats.stepCounter(category, inc) - - def registerManifest(self, name, *, raise_if_registered=True): - self._stats.registerManifest( - name, raise_if_registered=raise_if_registered) - - def addManifestEntry(self, name, entry): - self._stats.addManifestEntry(name, entry) - - def getStats(self): + def _mergeCacheStats(self): repos = [ - ('RenderedSegmentsRepo', self.rendered_segments_repository), - ('PagesRepo', self.page_repository)] + ('RenderedSegmentsRepo', self.rendered_segments_repository), + ('PagesRepo', self.page_repository)] for name, repo in repos: self._stats.counters['%s_hit' % name] = repo._hits self._stats.counters['%s_miss' % name] = repo._misses - self._stats.manifests['%s_missedKeys' % name] = list(repo._missed_keys) - return self._stats + self._stats.manifests['%s_missedKeys' % name] = \ + list(repo._missed_keys) class StandardEnvironment(Environment):
--- a/piecrust/fastpickle.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/fastpickle.py Wed May 17 00:11:48 2017 -0700 @@ -102,7 +102,7 @@ 'day': obj.day} elif op == _UNPICKLING: return datetime.date( - obj['year'], obj['month'], obj['day']) + obj['year'], obj['month'], obj['day']) def _datetime_convert(obj, func, op): @@ -117,8 +117,8 @@ 'microsecond': obj.microsecond} elif op == _UNPICKLING: return datetime.datetime( - obj['year'], obj['month'], obj['day'], - obj['hour'], obj['minute'], obj['second'], obj['microsecond']) + obj['year'], obj['month'], obj['day'], + obj['hour'], obj['minute'], obj['second'], obj['microsecond']) def _time_convert(obj, func, op): @@ -130,47 +130,47 @@ 'microsecond': obj.microsecond} elif op == _UNPICKLING: return datetime.time( - obj['hour'], obj['minute'], obj['second'], obj['microsecond']) + obj['hour'], obj['minute'], obj['second'], obj['microsecond']) _type_convert = { - type(None): _identity_dispatch, - bool: _identity_dispatch, - int: _identity_dispatch, - float: _identity_dispatch, - str: _identity_dispatch, - datetime.date: _date_convert, - datetime.datetime: _datetime_convert, - datetime.time: _time_convert, - tuple: _tuple_convert, - list: _list_convert, - dict: _dict_convert, - set: _set_convert, - collections.OrderedDict: _ordered_dict_convert, - } + type(None): _identity_dispatch, + bool: _identity_dispatch, + int: _identity_dispatch, + float: _identity_dispatch, + str: _identity_dispatch, + datetime.date: _date_convert, + datetime.datetime: _datetime_convert, + datetime.time: _time_convert, + tuple: _tuple_convert, + list: _list_convert, + dict: _dict_convert, + set: _set_convert, + collections.OrderedDict: _ordered_dict_convert, +} _type_unconvert = { - type(None): _identity_dispatch, - bool: _identity_dispatch, - int: _identity_dispatch, - float: _identity_dispatch, - str: _identity_dispatch, - 'date': _date_convert, - 'datetime': _datetime_convert, - 'time': _time_convert, - } + type(None): _identity_dispatch, + bool: _identity_dispatch, + int: _identity_dispatch, + float: _identity_dispatch, + str: _identity_dispatch, + 'date': _date_convert, + 'datetime': _datetime_convert, + 'time': _time_convert, +} _collection_unconvert = { - '__type__:tuple': _tuple_convert, - '__type__:set': _set_convert, - } + '__type__:tuple': _tuple_convert, + '__type__:set': _set_convert, +} _mapping_unconvert = { - 'OrderedDict': _ordered_dict_convert - } + 'OrderedDict': _ordered_dict_convert +} def _pickle_object(obj):
--- a/piecrust/generation/base.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,163 +0,0 @@ -import logging -from werkzeug.utils import cached_property -from piecrust.baking.records import BakeRecordEntry -from piecrust.baking.worker import save_factory, JOB_BAKE -from piecrust.configuration import ConfigurationError -from piecrust.routing import create_route_metadata -from piecrust.sources.pageref import PageRef - - -logger = logging.getLogger(__name__) - - -class InvalidRecordExtraKey(Exception): - pass - - -class PageGeneratorBakeContext(object): - def __init__(self, app, record, pool, generator): - self._app = app - self._record = record - self._pool = pool - self._generator = generator - self._job_queue = [] - self._is_running = False - - def getRecordExtraKey(self, seed): - return '%s:%s' % (self._generator.name, seed) - - def matchesRecordExtraKey(self, extra_key): - return (extra_key is not None and - extra_key.startswith(self._generator.name + ':')) - - def getSeedFromRecordExtraKey(self, extra_key): - if not self.matchesRecordExtraKey(extra_key): - raise InvalidRecordExtraKey("Invalid extra key: %s" % extra_key) - return extra_key[len(self._generator.name) + 1:] - - def getAllPageRecords(self): - return self._record.transitions.values() - - def getBakedPageRecords(self): - for prev, cur in self.getAllPageRecords(): - if cur and cur.was_any_sub_baked: - yield (prev, cur) - - def collapseRecord(self, entry): - self._record.collapseEntry(entry) - - def queueBakeJob(self, page_fac, route, extra_route_metadata, seed): - if self._is_running: - raise Exception("The job queue is running.") - - extra_key = self.getRecordExtraKey(seed) - entry = BakeRecordEntry( - page_fac.source.name, - page_fac.path, - extra_key) - self._record.addEntry(entry) - - page = page_fac.buildPage() - route_metadata = create_route_metadata(page) - route_metadata.update(extra_route_metadata) - uri = route.getUri(route_metadata) - override_entry = self._record.getOverrideEntry(page.path, uri) - if override_entry is not None: - override_source = self.app.getSource( - override_entry.source_name) - if override_source.realm == page_fac.source.realm: - entry.errors.append( - "Page '%s' maps to URL '%s' but is overriden " - "by page '%s'." % - (page_fac.ref_spec, uri, override_entry.path)) - logger.error(entry.errors[-1]) - entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN - return - - route_index = self._app.routes.index(route) - job = { - 'type': JOB_BAKE, - 'job': { - 'factory_info': save_factory(page_fac), - 'generator_name': self._generator.name, - 'generator_record_key': extra_key, - 'route_index': route_index, - 'route_metadata': route_metadata, - 'dirty_source_names': self._record.dirty_source_names, - 'needs_config': True - } - } - self._job_queue.append(job) - - def runJobQueue(self): - def _handler(res): - entry = self._record.getCurrentEntry( - res['path'], res['generator_record_key']) - entry.config = res['config'] - entry.subs = res['sub_entries'] - if res['errors']: - entry.errors += res['errors'] - if entry.has_any_error: - self._record.current.success = False - - self._is_running = True - try: - ar = self._pool.queueJobs(self._job_queue, handler=_handler) - ar.wait() - finally: - self._is_running = False - - -class PageGenerator(object): - def __init__(self, app, name, config): - self.app = app - self.name = name - self.config = config or {} - - self.source_name = config.get('source') - if self.source_name is None: - raise ConfigurationError( - "Generator '%s' requires a source name" % name) - - page_ref = config.get('page') - if page_ref is None: - raise ConfigurationError( - "Generator '%s' requires a listing page ref." % name) - self.page_ref = PageRef(app, page_ref) - - self.data_endpoint = config.get('data_endpoint') - self.data_type = config.get('data_type') - if self.data_endpoint and not self.data_type: - raise ConfigurationError( - "Generator '%s' requires a data type because it has " - "a data endpoint." % name) - - self._provider_type = None - - @cached_property - def source(self): - for src in self.app.sources: - if src.name == self.source_name: - return src - raise Exception("Can't find source '%s' for generator '%s'." % ( - self.source_name, self.name)) - - def getSupportedRouteParameters(self): - raise NotImplementedError() - - def getPageFactory(self, route_metadata): - # This will raise `PageNotFoundError` naturally if not found. - return self.page_ref.getFactory() - - def bake(self, ctx): - raise NotImplementedError() - - def onRouteFunctionUsed(self, route, route_metadata): - pass - - def buildDataProvider(self, page, override): - if not self._provider_type: - from piecrust.data.provider import get_data_provider_class - self._provider_type = get_data_provider_class(self.app, - self.data_type) - return self._provider_type(self, page, override)
--- a/piecrust/generation/blogarchives.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ -import logging -import datetime -from piecrust.chefutil import format_timed_scope -from piecrust.data.filters import PaginationFilter, IFilterClause -from piecrust.data.iterators import PageIterator -from piecrust.generation.base import PageGenerator, InvalidRecordExtraKey -from piecrust.routing import RouteParameter - - -logger = logging.getLogger(__name__) - - -class BlogArchivesPageGenerator(PageGenerator): - GENERATOR_NAME = 'blog_archives' - - def __init__(self, app, name, config): - super(BlogArchivesPageGenerator, self).__init__(app, name, config) - - def getSupportedRouteParameters(self): - return [RouteParameter('year', RouteParameter.TYPE_INT4)] - - def onRouteFunctionUsed(self, route, route_metadata): - pass - - def prepareRenderContext(self, ctx): - ctx.pagination_source = self.source - - year = ctx.page.route_metadata.get('year') - if year is None: - raise Exception( - "Can't find the archive year in the route metadata") - if type(year) is not int: - raise Exception( - "The route for generator '%s' should specify an integer " - "parameter for 'year'." % self.name) - - flt = PaginationFilter() - flt.addClause(IsFromYearFilterClause(year)) - ctx.pagination_filter = flt - - ctx.custom_data['year'] = year - - flt2 = PaginationFilter() - flt2.addClause(IsFromYearFilterClause(year)) - it = PageIterator(self.source, pagination_filter=flt2, - sorter=_date_sorter) - ctx.custom_data['archives'] = it - - def bake(self, ctx): - if not self.page_ref.exists: - logger.debug( - "No page found at '%s', skipping %s archives." % - (self.page_ref, self.source_name)) - return - - logger.debug("Baking %s archives...", self.source_name) - with format_timed_scope(logger, 'gathered archive years', - level=logging.DEBUG, colored=False): - all_years, dirty_years = self._buildDirtyYears(ctx) - - with format_timed_scope(logger, "baked %d %s archives." % - (len(dirty_years), self.source_name)): - self._bakeDirtyYears(ctx, all_years, dirty_years) - - def _buildDirtyYears(self, ctx): - logger.debug("Gathering dirty post years.") - all_years = set() - dirty_years = set() - for _, cur_entry in ctx.getAllPageRecords(): - if cur_entry and cur_entry.source_name == self.source_name: - dt = datetime.datetime.fromtimestamp(cur_entry.timestamp) - all_years.add(dt.year) - if cur_entry.was_any_sub_baked: - dirty_years.add(dt.year) - return all_years, dirty_years - - def _bakeDirtyYears(self, ctx, all_years, dirty_years): - route = self.app.getGeneratorRoute(self.name) - if route is None: - raise Exception( - "No routes have been defined for generator: %s" % - self.name) - - logger.debug("Using archive page: %s" % self.page_ref) - fac = self.page_ref.getFactory() - - for y in dirty_years: - extra_route_metadata = {'year': y} - - logger.debug("Queuing: %s [%s]" % (fac.ref_spec, y)) - ctx.queueBakeJob(fac, route, extra_route_metadata, str(y)) - ctx.runJobQueue() - - # Create bake entries for the years that were *not* dirty. - # Otherwise, when checking for deleted pages, we would not find any - # outputs and would delete those files. - all_str_years = [str(y) for y in all_years] - for prev_entry, cur_entry in ctx.getAllPageRecords(): - if prev_entry and not cur_entry: - try: - y = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) - except InvalidRecordExtraKey: - continue - if y in all_str_years: - logger.debug( - "Creating unbaked entry for year %s archive." % y) - ctx.collapseRecord(prev_entry) - else: - logger.debug( - "No page references year %s anymore." % y) - - -class IsFromYearFilterClause(IFilterClause): - def __init__(self, year): - self.year = year - - def pageMatches(self, fil, page): - return (page.datetime.year == self.year) - - -def _date_sorter(it): - return sorted(it, key=lambda x: x.datetime) -
--- a/piecrust/generation/taxonomy.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,427 +0,0 @@ -import re -import time -import logging -import unidecode -from piecrust.chefutil import format_timed, format_timed_scope -from piecrust.configuration import ConfigurationError -from piecrust.data.filters import ( - PaginationFilter, SettingFilterClause, - page_value_accessor) -from piecrust.generation.base import PageGenerator, InvalidRecordExtraKey -from piecrust.routing import RouteParameter - - -logger = logging.getLogger(__name__) - - -SLUGIFY_ENCODE = 1 -SLUGIFY_TRANSLITERATE = 2 -SLUGIFY_LOWERCASE = 4 -SLUGIFY_DOT_TO_DASH = 8 -SLUGIFY_SPACE_TO_DASH = 16 - - -re_first_dot_to_dash = re.compile(r'^\.+') -re_dot_to_dash = re.compile(r'\.+') -re_space_to_dash = re.compile(r'\s+') - - -class Taxonomy(object): - def __init__(self, name, config): - self.name = name - self.config = config - self.term_name = config.get('term', name) - self.is_multiple = bool(config.get('multiple', False)) - self.separator = config.get('separator', '/') - self.page_ref = config.get('page') - - @property - def setting_name(self): - if self.is_multiple: - return self.name - return self.term_name - - -class TaxonomyPageGenerator(PageGenerator): - """ A page generator that handles taxonomies, _i.e._ lists of keywords - that pages are labelled with, and for which we need to generate - listing pages. - """ - GENERATOR_NAME = 'taxonomy' - - def __init__(self, app, name, config): - super(TaxonomyPageGenerator, self).__init__(app, name, config) - - tax_name = config.get('taxonomy') - if tax_name is None: - raise ConfigurationError( - "Generator '%s' requires a taxonomy name." % name) - tax_config = app.config.get('site/taxonomies/' + tax_name) - if tax_config is None: - raise ConfigurationError( - "Error initializing generator '%s', no such taxonomy: %s", - (name, tax_name)) - self.taxonomy = Taxonomy(tax_name, tax_config) - - sm = config.get('slugify_mode') - if not sm: - sm = app.config.get('site/slugify_mode', 'encode') - self.slugify_mode = _parse_slugify_mode(sm) - self.slugifier = _Slugifier(self.taxonomy, self.slugify_mode) - - def getSupportedRouteParameters(self): - name = self.taxonomy.term_name - param_type = (RouteParameter.TYPE_PATH if self.taxonomy.is_multiple - else RouteParameter.TYPE_STRING) - return [RouteParameter(name, param_type, - variadic=self.taxonomy.is_multiple)] - - def slugify(self, term): - return self.slugifier.slugify(term) - - def slugifyMultiple(self, terms): - return self.slugifier.slugifyMultiple(terms) - - def prepareRenderContext(self, ctx): - # Set the pagination source as the source we're generating for. - ctx.pagination_source = self.source - - # Get the taxonomy terms from the route metadata... this can come from - # the browser's URL (while serving) or from the baking (see `bake` - # method below). In both cases, we expect to have the *slugified* - # version of the term, because we're going to set a filter that also - # slugifies the terms found on each page. - # - # This is because: - # * while serving, we get everything from the request URL, so we only - # have the slugified version. - # * if 2 slightly different terms "collide" into the same slugified - # term, we'll get a merge of the 2 on the listing page, which is - # what the user expects. - # - tax_terms, is_combination = self._getTaxonomyTerms( - ctx.page.route_metadata) - self._setTaxonomyFilter(ctx, tax_terms, is_combination) - - # Add some custom data for rendering. - ctx.custom_data.update({ - self.taxonomy.term_name: tax_terms, - 'is_multiple_%s' % self.taxonomy.term_name: is_combination}) - # Add some "plural" version of the term... so for instance, if this - # is the "tags" taxonomy, "tag" will have one term most of the time, - # except when it's a combination. Here, we add "tags" as something that - # is always a tuple, even when it's not a combination. - if (self.taxonomy.is_multiple and - self.taxonomy.name != self.taxonomy.term_name): - mult_val = tax_terms - if not is_combination: - mult_val = (mult_val,) - ctx.custom_data[self.taxonomy.name] = mult_val - - def _getTaxonomyTerms(self, route_metadata): - # Get the individual slugified terms from the route metadata. - all_values = route_metadata.get(self.taxonomy.term_name) - if all_values is None: - raise Exception("'%s' values couldn't be found in route metadata" % - self.taxonomy.term_name) - - # If it's a "multiple" taxonomy, we need to potentially split the - # route value into the individual terms (_e.g._ when listing all pages - # that have 2 given tags, we need to get each of those 2 tags). - if self.taxonomy.is_multiple: - sep = self.taxonomy.separator - if sep in all_values: - return tuple(all_values.split(sep)), True - # Not a "multiple" taxonomy, so there's only the one value. - return all_values, False - - def _setTaxonomyFilter(self, ctx, term_value, is_combination): - # Set up the filter that will check the pages' terms. - flt = PaginationFilter(value_accessor=page_value_accessor) - flt.addClause(HasTaxonomyTermsFilterClause( - self.taxonomy, self.slugify_mode, term_value, is_combination)) - ctx.pagination_filter = flt - - def onRouteFunctionUsed(self, route, route_metadata): - # Get the values, and slugify them appropriately. - values = route_metadata[self.taxonomy.term_name] - if self.taxonomy.is_multiple: - # TODO: here we assume the route has been properly configured. - slugified_values = self.slugifyMultiple((str(v) for v in values)) - route_val = self.taxonomy.separator.join(slugified_values) - else: - slugified_values = self.slugify(str(values)) - route_val = slugified_values - - # We need to register this use of a taxonomy term. - eis = self.app.env.exec_info_stack - cpi = eis.current_page_info.render_ctx.current_pass_info - if cpi: - utt = cpi.getCustomInfo('used_taxonomy_terms', [], True) - utt.append(slugified_values) - - # Put the slugified values in the route metadata so they're used to - # generate the URL. - route_metadata[self.taxonomy.term_name] = route_val - - def bake(self, ctx): - if not self.page_ref.exists: - logger.debug( - "No page found at '%s', skipping taxonomy '%s'." % - (self.page_ref, self.taxonomy.name)) - return - - logger.debug("Baking %s pages...", self.taxonomy.name) - analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy, - self.slugify_mode) - with format_timed_scope(logger, 'gathered taxonomy terms', - level=logging.DEBUG, colored=False): - analyzer.analyze(ctx) - - start_time = time.perf_counter() - page_count = self._bakeTaxonomyTerms(ctx, analyzer) - if page_count > 0: - logger.info(format_timed( - start_time, - "baked %d %s pages for %s." % ( - page_count, self.taxonomy.term_name, self.source_name))) - - def _bakeTaxonomyTerms(self, ctx, analyzer): - # Start baking those terms. - logger.debug( - "Baking '%s' for source '%s': %d terms" % - (self.taxonomy.name, self.source_name, - len(analyzer.dirty_slugified_terms))) - - route = self.app.getGeneratorRoute(self.name) - if route is None: - raise Exception("No routes have been defined for generator: %s" % - self.name) - - logger.debug("Using taxonomy page: %s" % self.page_ref) - fac = self.page_ref.getFactory() - - job_count = 0 - for slugified_term in analyzer.dirty_slugified_terms: - extra_route_metadata = { - self.taxonomy.term_name: slugified_term} - - # Use the slugified term as the record's extra key seed. - logger.debug( - "Queuing: %s [%s=%s]" % - (fac.ref_spec, self.taxonomy.name, slugified_term)) - ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term) - job_count += 1 - ctx.runJobQueue() - - # Now we create bake entries for all the terms that were *not* dirty. - # This is because otherwise, on the next incremental bake, we wouldn't - # find any entry for those things, and figure that we need to delete - # their outputs. - for prev_entry, cur_entry in ctx.getAllPageRecords(): - # Only consider taxonomy-related entries that don't have any - # current version (i.e. they weren't baked just now). - if prev_entry and not cur_entry: - try: - t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) - except InvalidRecordExtraKey: - continue - - if analyzer.isKnownSlugifiedTerm(t): - logger.debug("Creating unbaked entry for %s term: %s" % - (self.name, t)) - ctx.collapseRecord(prev_entry) - else: - logger.debug("Term %s in %s isn't used anymore." % - (self.name, t)) - - return job_count - - -class HasTaxonomyTermsFilterClause(SettingFilterClause): - def __init__(self, taxonomy, slugify_mode, value, is_combination): - super(HasTaxonomyTermsFilterClause, self).__init__( - taxonomy.setting_name, value) - self._taxonomy = taxonomy - self._is_combination = is_combination - self._slugifier = _Slugifier(taxonomy, slugify_mode) - - def pageMatches(self, fil, page): - if self._taxonomy.is_multiple: - # Multiple taxonomy, i.e. it supports multiple terms, like tags. - page_values = fil.value_accessor(page, self.name) - if page_values is None or not isinstance(page_values, list): - return False - - page_set = set(map(self._slugifier.slugify, page_values)) - if self._is_combination: - # Multiple taxonomy, and multiple terms to match. Check that - # the ones to match are all in the page's terms. - value_set = set(self.value) - return value_set.issubset(page_set) - else: - # Multiple taxonomy, one term to match. - return self.value in page_set - else: - # Single taxonomy. Just compare the values. - page_value = fil.value_accessor(page, self.name) - if page_value is None: - return False - page_value = self._slugifier.slugify(page_value) - return page_value == self.value - - -class _TaxonomyTermsAnalyzer(object): - def __init__(self, source_name, taxonomy, slugify_mode): - self.source_name = source_name - self.taxonomy = taxonomy - self.slugifier = _Slugifier(taxonomy, slugify_mode) - self._all_terms = {} - self._single_dirty_slugified_terms = set() - self._all_dirty_slugified_terms = None - - @property - def dirty_slugified_terms(self): - """ Returns the slugified terms that have been 'dirtied' during - this bake. - """ - return self._all_dirty_slugified_terms - - def isKnownSlugifiedTerm(self, term): - """ Returns whether the given slugified term has been seen during - this bake. - """ - return term in self._all_terms - - def analyze(self, ctx): - # Build the list of terms for our taxonomy, and figure out which ones - # are 'dirty' for the current bake. - # - # Remember all terms used. - for _, cur_entry in ctx.getAllPageRecords(): - if cur_entry and not cur_entry.was_overriden: - cur_terms = cur_entry.config.get(self.taxonomy.setting_name) - if cur_terms: - if not self.taxonomy.is_multiple: - self._addTerm(cur_entry.path, cur_terms) - else: - self._addTerms(cur_entry.path, cur_terms) - - # Re-bake all taxonomy terms that include new or changed pages, by - # marking them as 'dirty'. - for prev_entry, cur_entry in ctx.getBakedPageRecords(): - if cur_entry.source_name != self.source_name: - continue - - entries = [cur_entry] - if prev_entry: - entries.append(prev_entry) - - for e in entries: - entry_terms = e.config.get(self.taxonomy.setting_name) - if entry_terms: - if not self.taxonomy.is_multiple: - self._single_dirty_slugified_terms.add( - self.slugifier.slugify(entry_terms)) - else: - self._single_dirty_slugified_terms.update( - (self.slugifier.slugify(t) - for t in entry_terms)) - - self._all_dirty_slugified_terms = list( - self._single_dirty_slugified_terms) - logger.debug("Gathered %d dirty taxonomy terms", - len(self._all_dirty_slugified_terms)) - - # Re-bake the combination pages for terms that are 'dirty'. - # We make all terms into tuple, even those that are not actual - # combinations, so that we have less things to test further down the - # line. - # - # Add the combinations to that list. We get those combinations from - # wherever combinations were used, so they're coming from the - # `onRouteFunctionUsed` method. - if self.taxonomy.is_multiple: - known_combinations = set() - for _, cur_entry in ctx.getAllPageRecords(): - if cur_entry: - used_terms = _get_all_entry_taxonomy_terms(cur_entry) - for terms in used_terms: - if len(terms) > 1: - known_combinations.add(terms) - - dcc = 0 - for terms in known_combinations: - if not self._single_dirty_slugified_terms.isdisjoint( - set(terms)): - self._all_dirty_slugified_terms.append( - self.taxonomy.separator.join(terms)) - dcc += 1 - logger.debug("Gathered %d term combinations, with %d dirty." % - (len(known_combinations), dcc)) - - def _addTerms(self, entry_path, terms): - for t in terms: - self._addTerm(entry_path, t) - - def _addTerm(self, entry_path, term): - st = self.slugifier.slugify(term) - orig_terms = self._all_terms.setdefault(st, []) - if orig_terms and orig_terms[0] != term: - logger.warning( - "Term '%s' in '%s' is slugified to '%s' which conflicts with " - "previously existing '%s'. The two will be merged." % - (term, entry_path, st, orig_terms[0])) - orig_terms.append(term) - - -def _get_all_entry_taxonomy_terms(entry): - res = set() - for o in entry.subs: - for pinfo in o.render_info: - if pinfo: - terms = pinfo.getCustomInfo('used_taxonomy_terms') - if terms: - res |= set(terms) - return res - - -class _Slugifier(object): - def __init__(self, taxonomy, mode): - self.taxonomy = taxonomy - self.mode = mode - - def slugifyMultiple(self, terms): - return tuple(map(self.slugify, terms)) - - def slugify(self, term): - if self.mode & SLUGIFY_TRANSLITERATE: - term = unidecode.unidecode(term) - if self.mode & SLUGIFY_LOWERCASE: - term = term.lower() - if self.mode & SLUGIFY_DOT_TO_DASH: - term = re_first_dot_to_dash.sub('', term) - term = re_dot_to_dash.sub('-', term) - if self.mode & SLUGIFY_SPACE_TO_DASH: - term = re_space_to_dash.sub('-', term) - return term - - -def _parse_slugify_mode(value): - mapping = { - 'encode': SLUGIFY_ENCODE, - 'transliterate': SLUGIFY_TRANSLITERATE, - 'lowercase': SLUGIFY_LOWERCASE, - 'dot_to_dash': SLUGIFY_DOT_TO_DASH, - 'space_to_dash': SLUGIFY_SPACE_TO_DASH} - mode = 0 - for v in value.split(','): - f = mapping.get(v.strip()) - if f is None: - if v == 'iconv': - raise Exception("'iconv' is not supported as a slugify mode " - "in PieCrust2. Use 'transliterate'.") - raise Exception("Unknown slugify flag: %s" % v) - mode |= f - return mode -
--- a/piecrust/importing/wordpress.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/importing/wordpress.py Wed May 17 00:11:48 2017 -0700 @@ -5,9 +5,8 @@ from collections import OrderedDict from piecrust import CONFIG_PATH from piecrust.configuration import ( - ConfigurationLoader, ConfigurationDumper, merge_dicts) + ConfigurationLoader, ConfigurationDumper, merge_dicts) from piecrust.importing.base import Importer, create_page, download_asset -from piecrust.sources.base import MODE_CREATING logger = logging.getLogger(__name__) @@ -16,25 +15,25 @@ class WordpressImporterBase(Importer): def setupParser(self, parser, app): parser.add_argument( - '--pages-source', - default="pages", - help="The source to store pages in.") + '--pages-source', + default="pages", + help="The source to store pages in.") parser.add_argument( - '--posts-source', - default="posts", - help="The source to store posts in.") + '--posts-source', + default="posts", + help="The source to store posts in.") parser.add_argument( - '--default-post-layout', - help="The default layout to use for posts.") + '--default-post-layout', + help="The default layout to use for posts.") parser.add_argument( - '--default-post-category', - help="The default category to use for posts.") + '--default-post-category', + help="The default category to use for posts.") parser.add_argument( - '--default-page-layout', - help="The default layout to use for pages.") + '--default-page-layout', + help="The default layout to use for pages.") parser.add_argument( - '--default-page-category', - help="The default category to use for pages.") + '--default-page-category', + help="The default category to use for pages.") def importWebsite(self, app, args): impl = self._getImplementation(app, args) @@ -60,8 +59,8 @@ site_config = self._getSiteConfig(ctx) site_config.setdefault('site', {}) site_config['site'].update({ - 'post_url': '%year%/%month%/%slug%', - 'category_url': 'category/%category%'}) + 'post_url': '%year%/%month%/%slug%', + 'category_url': 'category/%category%'}) site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH) with open(site_config_path, 'r') as fp: @@ -102,10 +101,10 @@ def _createPost(self, post_info): post_dt = post_info['datetime'] finder = { - 'year': post_dt.year, - 'month': post_dt.month, - 'day': post_dt.day, - 'slug': post_info['slug']} + 'year': post_dt.year, + 'month': post_dt.month, + 'day': post_dt.day, + 'slug': post_info['slug']} if post_info['type'] == 'post': source = self._posts_source elif post_info['type'] == 'page': @@ -174,25 +173,25 @@ title = find_text(channel, 'title') description = find_text(channel, 'description') site_config = OrderedDict({ - 'site': { - 'title': title, - 'description': description} - }) + 'site': { + 'title': title, + 'description': description} + }) # Get authors' names. authors = {} for a in channel.findall('wp:author', self.ns_wp): login = find_text(a, 'wp:author_login', self.ns_wp) authors[login] = { - 'email': find_text(a, 'wp:author_email', self.ns_wp), - 'display_name': find_text(a, 'wp:author_display_name', - self.ns_wp), - 'first_name': find_text(a, 'wp:author_first_name', - self.ns_wp), - 'last_name': find_text(a, 'wp:author_last_name', - self.ns_wp), - 'author_id': find_text(a, 'wp:author_id', - self.ns_wp)} + 'email': find_text(a, 'wp:author_email', self.ns_wp), + 'display_name': find_text(a, 'wp:author_display_name', + self.ns_wp), + 'first_name': find_text(a, 'wp:author_first_name', + self.ns_wp), + 'last_name': find_text(a, 'wp:author_last_name', + self.ns_wp), + 'author_id': find_text(a, 'wp:author_id', + self.ns_wp)} site_config['site']['authors'] = authors return site_config @@ -216,9 +215,9 @@ post_name = find_text(node, 'wp:post_name', self.ns_wp) post_type = find_text(node, 'wp:post_type', self.ns_wp) post_info = { - 'type': post_type, - 'slug': post_name, - 'datetime': post_date} + 'type': post_type, + 'slug': post_name, + 'datetime': post_date} title = find_text(node, 'title') creator = find_text(node, 'dc:creator', self.ns_dc) @@ -228,12 +227,12 @@ description = find_text(node, 'description') # TODO: menu order, parent, password, sticky post_info.update({ - 'title': title, - 'author': creator, - 'status': status, - 'post_id': post_id, - 'post_guid': guid, - 'description': description}) + 'title': title, + 'author': creator, + 'status': status, + 'post_id': post_id, + 'post_guid': guid, + 'description': description}) categories = [] for c in node.findall('category'): @@ -250,8 +249,8 @@ content = find_text(node, 'content:encoded', self.ns_content) excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt) post_info.update({ - 'content': content, - 'excerpt': excerpt}) + 'content': content, + 'excerpt': excerpt}) return post_info
--- a/piecrust/main.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/main.py Wed May 17 00:11:48 2017 -0700 @@ -9,9 +9,9 @@ import colorama from piecrust import APP_VERSION from piecrust.app import ( - PieCrust, PieCrustConfiguration, apply_variant_and_values) + PieCrustFactory, PieCrustConfiguration) from piecrust.chefutil import ( - format_timed, log_friendly_exception, print_help_item) + format_timed, log_friendly_exception, print_help_item) from piecrust.commands.base import CommandContext from piecrust.pathutil import SiteNotFoundError, find_app_root from piecrust.plugins.base import PluginLoader @@ -22,12 +22,12 @@ class ColoredFormatter(logging.Formatter): COLORS = { - 'DEBUG': colorama.Fore.BLACK + colorama.Style.BRIGHT, - 'INFO': '', - 'WARNING': colorama.Fore.YELLOW, - 'ERROR': colorama.Fore.RED, - 'CRITICAL': colorama.Back.RED + colorama.Fore.WHITE - } + 'DEBUG': colorama.Fore.BLACK + colorama.Style.BRIGHT, + 'INFO': '', + 'WARNING': colorama.Fore.YELLOW, + 'ERROR': colorama.Fore.RED, + 'CRITICAL': colorama.Back.RED + colorama.Fore.WHITE + } def __init__(self, fmt=None, datefmt=None): super(ColoredFormatter, self).__init__(fmt, datefmt) @@ -79,67 +79,67 @@ def _setup_main_parser_arguments(parser): parser.add_argument( - '--version', - action='version', - version=('%(prog)s ' + APP_VERSION)) + '--version', + action='version', + version=('%(prog)s ' + APP_VERSION)) parser.add_argument( - '--root', - help="The root directory of the website.") + '--root', + help="The root directory of the website.") parser.add_argument( - '--theme', - action='store_true', - help="Makes the current command apply to a theme website.") + '--theme', + action='store_true', + help="Makes the current command apply to a theme website.") parser.add_argument( - '--config', - dest='config_variant', - help="The configuration variant to use for this command.") + '--config', + dest='config_variant', + help="The configuration variant to use for this command.") parser.add_argument( - '--config-set', - nargs=2, - metavar=('NAME', 'VALUE'), - action='append', - dest='config_values', - help="Sets a specific site configuration setting.") + '--config-set', + nargs=2, + metavar=('NAME', 'VALUE'), + action='append', + dest='config_values', + help="Sets a specific site configuration setting.") parser.add_argument( - '--debug', - help="Show debug information.", action='store_true') + '--debug', + help="Show debug information.", action='store_true') parser.add_argument( - '--debug-only', - action='append', - help="Only show debug information for the given categories.") + '--debug-only', + action='append', + help="Only show debug information for the given categories.") parser.add_argument( - '--no-cache', - help="When applicable, disable caching.", - action='store_true') + '--no-cache', + help="When applicable, disable caching.", + action='store_true') parser.add_argument( - '--quiet', - help="Print only important information.", - action='store_true') + '--quiet', + help="Print only important information.", + action='store_true') parser.add_argument( - '--log', - dest='log_file', - help="Send log messages to the specified file.") + '--log', + dest='log_file', + help="Send log messages to the specified file.") parser.add_argument( - '--log-debug', - help="Log debug messages to the log file.", - action='store_true') + '--log-debug', + help="Log debug messages to the log file.", + action='store_true') parser.add_argument( - '--no-color', - help="Don't use colorized output.", - action='store_true') + '--no-color', + help="Don't use colorized output.", + action='store_true') parser.add_argument( - '--pid-file', - dest='pid_file', - help="Write a PID file for the current process.") + '--pid-file', + dest='pid_file', + help="Write a PID file for the current process.") """ Kinda hacky, but we want the `serve` command to use a different cache - so that PieCrust doesn't need to re-render all the pages when going - between `serve` and `bake` (or, worse, *not* re-render them all correctly - and end up serving or baking the wrong version). +so that PieCrust doesn't need to re-render all the pages when going +between `serve` and `bake` (or, worse, *not* re-render them all correctly +and end up serving or baking the wrong version). """ _command_caches = { - 'serve': 'server'} + 'serve': 'server'} def _pre_parse_chef_args(argv): @@ -235,30 +235,32 @@ # Can't apply custom configuration stuff if there's no website. if (pre_args.config_variant or pre_args.config_values) and not root: raise SiteNotFoundError( - "Can't apply any configuration variant or value overrides, " - "there is no website here.") + "Can't apply any configuration variant or value overrides, " + "there is no website here.") if root: cache_key = None if not pre_args.no_cache: cache_key = _build_cache_key(pre_args) - app = PieCrust( - root, - theme_site=pre_args.theme, - cache=(not pre_args.no_cache), - cache_key=cache_key, - debug=pre_args.debug) - apply_variant_and_values( - app, pre_args.config_variant, pre_args.config_values) + appfactory = PieCrustFactory( + root, + theme_site=pre_args.theme, + cache=(not pre_args.no_cache), + cache_key=cache_key, + debug=pre_args.debug, + config_variant=pre_args.config_variant, + config_values=pre_args.config_values) + app = appfactory.create() else: + appfactory = None app = NullPieCrust( - theme_site=pre_args.theme) + theme_site=pre_args.theme) # Setup the arg parser. parser = argparse.ArgumentParser( - prog='chef', - description="The PieCrust chef manages your website.", - formatter_class=argparse.RawDescriptionHelpFormatter) + prog='chef', + description="The PieCrust chef manages your website.", + formatter_class=argparse.RawDescriptionHelpFormatter) _setup_main_parser_arguments(parser) commands = sorted(app.plugin_loader.getCommands(), @@ -289,10 +291,7 @@ return 0 # Run the command! - ctx = CommandContext(app, parser, result) - ctx.config_variant = pre_args.config_variant - ctx.config_values = pre_args.config_values - + ctx = CommandContext(appfactory, app, parser, result) exit_code = result.func(ctx) if exit_code is None: return 0
--- a/piecrust/page.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/page.py Wed May 17 00:11:48 2017 -0700 @@ -9,8 +9,8 @@ import collections from werkzeug.utils import cached_property from piecrust.configuration import ( - Configuration, ConfigurationError, - parse_config_header) + Configuration, ConfigurationError, + parse_config_header) logger = logging.getLogger(__name__) @@ -36,32 +36,61 @@ FLAG_RAW_CACHE_VALID = 2**0 +class PageNotFoundError(Exception): + pass + + +class QualifiedPage(object): + def __init__(self, page, route, route_params, *, page_num=1): + self.page = page + self.page_num = page_num + self.route = route + self.route_params = route_params + + @property + def app(self): + return self.page.app + + @property + def source(self): + return self.page.source + + @cached_property + def uri(self): + return self.route.getUri(self.route_params, self.page_num) + + def getSubPage(self, page_num): + return QualifiedPage(self.page, self.route, self.route_params, + page_num=self.page_num + 1) + + class Page(object): - def __init__(self, source, source_metadata, rel_path): - self.source = source - self.source_metadata = source_metadata - self.rel_path = rel_path + def __init__(self, content_item): + self.content_item = content_item self._config = None self._segments = None self._flags = FLAG_NONE self._datetime = None @property - def app(self): - return self.source.app + def source(self): + return self.content_item.source @property - def ref_spec(self): - return '%s:%s' % (self.source.name, self.rel_path) + def source_metadata(self): + return self.content_item.metadata + + @property + def content_spec(self): + return self.content_item.spec + + @property + def app(self): + return self.content_item.source.app @cached_property - def path(self): - path, _ = self.source.resolveRef(self.rel_path) - return path - - @cached_property - def path_mtime(self): - return os.path.getmtime(self.path) + def content_mtime(self): + return self.content_item.getmtime() @property def flags(self): @@ -91,20 +120,20 @@ page_time = _parse_config_time(self.config.get('time')) if page_time is not None: self._datetime = datetime.datetime( - page_date.year, - page_date.month, - page_date.day) + page_time + page_date.year, + page_date.month, + page_date.day) + page_time else: self._datetime = datetime.datetime( - page_date.year, page_date.month, page_date.day) + page_date.year, page_date.month, page_date.day) elif 'date' in self.config: # Get the date from the page config, and maybe the # time too. page_date = _parse_config_date(self.config.get('date')) self._datetime = datetime.datetime( - page_date.year, - page_date.month, - page_date.day) + page_date.year, + page_date.month, + page_date.day) page_time = _parse_config_time(self.config.get('time')) if page_time is not None: self._datetime += page_time @@ -114,8 +143,8 @@ except Exception as ex: logger.exception(ex) raise Exception( - "Error computing time for page: %s" % - self.path) from ex + "Error computing time for page: %s" % + self.path) from ex return self._datetime @datetime.setter @@ -129,8 +158,9 @@ if self._config is not None: return - config, content, was_cache_valid = load_page(self.app, self.path, - self.path_mtime) + config, content, was_cache_valid = load_page( + self.app, self.path, self.path_mtime) + if 'config' in self.source_metadata: config.merge(self.source_metadata['config']) @@ -141,6 +171,7 @@ self.source.finalizeConfig(self) + def _parse_config_date(page_date): if page_date is None: return None @@ -152,9 +183,9 @@ logger.exception(ex) raise ConfigurationError("Invalid date: %s" % page_date) from ex return datetime.date( - year=parsed_d.year, - month=parsed_d.month, - day=parsed_d.day) + year=parsed_d.year, + month=parsed_d.month, + day=parsed_d.day) raise ConfigurationError("Invalid date: %s" % page_date) @@ -173,9 +204,9 @@ logger.exception(ex) raise ConfigurationError("Invalid time: %s" % page_time) from ex return datetime.timedelta( - hours=parsed_t.hour, - minutes=parsed_t.minute, - seconds=parsed_t.second) + hours=parsed_t.hour, + minutes=parsed_t.minute, + seconds=parsed_t.second) if isinstance(page_time, int): # Total seconds... convert to a time struct. @@ -187,8 +218,8 @@ class PageLoadingError(Exception): def __init__(self, path, inner=None): super(PageLoadingError, self).__init__( - "Error loading page: %s" % path, - inner) + "Error loading page: %s" % path, + inner) class ContentSegment(object): @@ -242,8 +273,8 @@ return _do_load_page(app, path, path_mtime) except Exception as e: logger.exception( - "Error loading page: %s" % - os.path.relpath(path, app.root_dir)) + "Error loading page: %s" % + os.path.relpath(path, app.root_dir)) _, __, traceback = sys.exc_info() raise PageLoadingError(path, e).with_traceback(traceback) @@ -255,11 +286,11 @@ page_time = path_mtime or os.path.getmtime(path) if cache.isValid(cache_path, page_time): cache_data = json.loads( - cache.read(cache_path), - object_pairs_hook=collections.OrderedDict) + cache.read(cache_path), + object_pairs_hook=collections.OrderedDict) config = PageConfiguration( - values=cache_data['config'], - validate=False) + values=cache_data['config'], + validate=False) content = json_load_segments(cache_data['content']) return config, content, True @@ -280,19 +311,19 @@ # Save to the cache. cache_data = { - 'config': config.getAll(), - 'content': json_save_segments(content)} + 'config': config.getAll(), + 'content': json_save_segments(content)} cache.write(cache_path, json.dumps(cache_data)) return config, content, False segment_pattern = re.compile( - r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""", - re.M) + r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""", + re.M) part_pattern = re.compile( - r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""", - re.M) + r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""", + re.M) def _count_lines(s): @@ -323,7 +354,7 @@ if not do_parse: seg = ContentSegment() seg.parts = [ - ContentSegmentPart(raw[offset:], None, offset, current_line)] + ContentSegmentPart(raw[offset:], None, offset, current_line)] return {'content': seg} # Start parsing segments and parts. @@ -337,7 +368,7 @@ # There's some default content segment at the beginning. seg = ContentSegment() seg.parts, current_line = parse_segment_parts( - raw, offset, first_offset, current_line) + raw, offset, first_offset, current_line) contents['content'] = seg for i in range(1, num_matches): @@ -345,16 +376,16 @@ m2 = matches[i] seg = ContentSegment() seg.parts, current_line = parse_segment_parts( - raw, m1.end() + 1, m2.start(), current_line, - m1.group('fmt')) + raw, m1.end() + 1, m2.start(), current_line, + m1.group('fmt')) contents[m1.group('name')] = seg # Handle text past the last match. lastm = matches[-1] seg = ContentSegment() seg.parts, current_line = parse_segment_parts( - raw, lastm.end() + 1, len(raw), current_line, - lastm.group('fmt')) + raw, lastm.end() + 1, len(raw), current_line, + lastm.group('fmt')) contents[lastm.group('name')] = seg return contents @@ -362,7 +393,7 @@ # No segments, just content. seg = ContentSegment() seg.parts, current_line = parse_segment_parts( - raw, offset, len(raw), current_line) + raw, offset, len(raw), current_line) return {'content': seg} @@ -375,8 +406,8 @@ # First part, before the first format change. part_text = raw[start:matches[0].start()] parts.append( - ContentSegmentPart(part_text, first_part_fmt, start, - line_offset)) + ContentSegmentPart(part_text, first_part_fmt, start, + line_offset)) line_offset += _count_lines(part_text) for i in range(1, num_matches): @@ -384,16 +415,16 @@ m2 = matches[i] part_text = raw[m1.end() + 1:m2.start()] parts.append( - ContentSegmentPart( - part_text, m1.group('fmt'), m1.end() + 1, - line_offset)) + ContentSegmentPart( + part_text, m1.group('fmt'), m1.end() + 1, + line_offset)) line_offset += _count_lines(part_text) lastm = matches[-1] part_text = raw[lastm.end() + 1:end] parts.append(ContentSegmentPart( - part_text, lastm.group('fmt'), lastm.end() + 1, - line_offset)) + part_text, lastm.group('fmt'), lastm.end() + 1, + line_offset)) return parts, line_offset else:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/_pagebaker.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,272 @@ +import os.path +import queue +import logging +import threading +import urllib.parse +from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry +from piecrust.rendering import RenderingContext, render_page, PASS_FORMATTING +from piecrust.uriutil import split_uri + + +logger = logging.getLogger(__name__) + + +class BakingError(Exception): + pass + + +class PageBaker(object): + def __init__(self, app, out_dir, force=False, copy_assets=True): + self.app = app + self.out_dir = out_dir + self.force = force + self.copy_assets = copy_assets + self.site_root = app.config.get('site/root') + self.pretty_urls = app.config.get('site/pretty_urls') + self._writer_queue = None + self._writer = None + + def startWriterQueue(self): + self._writer_queue = queue.Queue() + self._writer = threading.Thread( + name='PageSerializer', + target=_text_writer, + args=(self._writer_queue,)) + self._writer.start() + + def stopWriterQueue(self): + self._writer_queue.put_nowait(None) + self._writer.join() + + def getOutputPath(self, uri, pretty_urls): + uri_root, uri_path = split_uri(self.app, uri) + + bake_path = [self.out_dir] + decoded_uri = urllib.parse.unquote(uri_path) + if pretty_urls: + bake_path.append(decoded_uri) + bake_path.append('index.html') + elif decoded_uri == '': + bake_path.append('index.html') + else: + bake_path.append(decoded_uri) + + return os.path.normpath(os.path.join(*bake_path)) + + def bake(self, qualified_page, prev_entry, dirty_source_names): + # Start baking the sub-pages. + cur_sub = 1 + has_more_subs = True + sub_entries = [] + pretty_urls = qualified_page.config.get( + 'pretty_urls', self.pretty_urls) + + while has_more_subs: + sub_page = qualified_page.getSubPage(cur_sub) + sub_uri = sub_page.uri + logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) + + out_path = self.getOutputPath(sub_uri, pretty_urls) + + # Create the sub-entry for the bake record. + sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) + sub_entries.append(sub_entry) + + # Find a corresponding sub-entry in the previous bake record. + prev_sub_entry = None + if prev_entry is not None: + try: + prev_sub_entry = prev_entry.getSub(cur_sub) + except IndexError: + pass + + # Figure out if we need to invalidate or force anything. + force_this_sub, invalidate_formatting = _compute_force_flags( + prev_sub_entry, sub_entry, dirty_source_names) + force_this_sub = force_this_sub or self.force + + # Check for up-to-date outputs. + do_bake = True + if not force_this_sub: + try: + in_path_time = qualified_page.path_mtime + out_path_time = os.path.getmtime(out_path) + if out_path_time >= in_path_time: + do_bake = False + except OSError: + # File doesn't exist, we'll need to bake. + pass + + # If this page didn't bake because it's already up-to-date. + # Keep trying for as many subs as we know this page has. + if not do_bake: + sub_entry.render_info = prev_sub_entry.copyRenderInfo() + sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE + + if prev_entry.num_subs >= cur_sub + 1: + cur_sub += 1 + has_more_subs = True + logger.debug(" %s is up to date, skipping to next " + "sub-page." % out_path) + continue + + logger.debug(" %s is up to date, skipping bake." % out_path) + break + + # All good, proceed. + try: + if invalidate_formatting: + cache_key = sub_uri + self.app.env.rendered_segments_repository.invalidate( + cache_key) + sub_entry.flags |= \ + SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED + + logger.debug(" p%d -> %s" % (cur_sub, out_path)) + rp = self._bakeSingle(qualified_page, cur_sub, out_path) + except Exception as ex: + logger.exception(ex) + page_rel_path = os.path.relpath(qualified_page.path, + self.app.root_dir) + raise BakingError("%s: error baking '%s'." % + (page_rel_path, sub_uri)) from ex + + # Record what we did. + sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED + sub_entry.render_info = rp.copyRenderInfo() + + # Copy page assets. + if (cur_sub == 1 and self.copy_assets and + sub_entry.anyPass(lambda p: p.used_assets)): + if pretty_urls: + out_assets_dir = os.path.dirname(out_path) + else: + out_assets_dir, out_name = os.path.split(out_path) + if sub_uri != self.site_root: + out_name_noext, _ = os.path.splitext(out_name) + out_assets_dir = os.path.join(out_assets_dir, + out_name_noext) + + logger.debug("Copying page assets to: %s" % out_assets_dir) + _ensure_dir_exists(out_assets_dir) + + qualified_page.source.buildAssetor(qualified_page, sub_uri).copyAssets(out_assets_dir) + + # Figure out if we have more work. + has_more_subs = False + if sub_entry.anyPass(lambda p: p.pagination_has_more): + cur_sub += 1 + has_more_subs = True + + return sub_entries + + def _bakeSingle(self, qp, out_path): + ctx = RenderingContext(qp) + qp.source.prepareRenderContext(ctx) + + with self.app.env.timerScope("PageRender"): + rp = render_page(ctx) + + with self.app.env.timerScope("PageSerialize"): + if self._writer_queue is not None: + self._writer_queue.put_nowait((out_path, rp.content)) + else: + with open(out_path, 'w', encoding='utf8') as fp: + fp.write(rp.content) + + return rp + + +def _text_writer(q): + while True: + item = q.get() + if item is not None: + out_path, txt = item + out_dir = os.path.dirname(out_path) + _ensure_dir_exists(out_dir) + + with open(out_path, 'w', encoding='utf8') as fp: + fp.write(txt) + + q.task_done() + else: + # Sentinel object, terminate the thread. + q.task_done() + break + + +def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names): + # Figure out what to do with this page. + force_this_sub = False + invalidate_formatting = False + sub_uri = sub_entry.out_uri + if (prev_sub_entry and + (prev_sub_entry.was_baked_successfully or + prev_sub_entry.was_clean)): + # If the current page is known to use pages from other sources, + # see if any of those got baked, or are going to be baked for + # some reason. If so, we need to bake this one too. + # (this happens for instance with the main page of a blog). + dirty_for_this, invalidated_render_passes = ( + _get_dirty_source_names_and_render_passes( + prev_sub_entry, dirty_source_names)) + if len(invalidated_render_passes) > 0: + logger.debug( + "'%s' is known to use sources %s, which have " + "items that got (re)baked. Will force bake this " + "page. " % (sub_uri, dirty_for_this)) + sub_entry.flags |= \ + SubPagePipelineRecordEntry.FLAG_FORCED_BY_SOURCE + force_this_sub = True + + if PASS_FORMATTING in invalidated_render_passes: + logger.debug( + "Will invalidate cached formatting for '%s' " + "since sources were using during that pass." + % sub_uri) + invalidate_formatting = True + elif (prev_sub_entry and + prev_sub_entry.errors): + # Previous bake failed. We'll have to bake it again. + logger.debug( + "Previous record entry indicates baking failed for " + "'%s'. Will bake it again." % sub_uri) + sub_entry.flags |= \ + SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS + force_this_sub = True + elif not prev_sub_entry: + # No previous record. We'll have to bake it. + logger.debug("No previous record entry found for '%s'. Will " + "force bake it." % sub_uri) + sub_entry.flags |= \ + SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS + force_this_sub = True + + return force_this_sub, invalidate_formatting + + +def _get_dirty_source_names_and_render_passes(sub_entry, dirty_source_names): + dirty_for_this = set() + invalidated_render_passes = set() + for p, pinfo in enumerate(sub_entry.render_info): + if pinfo: + for src_name in pinfo.used_source_names: + is_dirty = (src_name in dirty_source_names) + if is_dirty: + invalidated_render_passes.add(p) + dirty_for_this.add(src_name) + break + return dirty_for_this, invalidated_render_passes + + +def _ensure_dir_exists(path): + try: + os.makedirs(path, mode=0o755, exist_ok=True) + except OSError: + # In a multiprocess environment, several process may very + # occasionally try to create the same directory at the same time. + # Let's ignore any error and if something's really wrong (like file + # acces permissions or whatever), then it will more legitimately fail + # just after this when we try to write files. + pass +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/_pagerecords.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,103 @@ +import copy +from piecrust.pipelines.records import RecordEntry + + +class SubPagePipelineRecordEntry: + FLAG_NONE = 0 + FLAG_BAKED = 2**0 + FLAG_FORCED_BY_SOURCE = 2**1 + FLAG_FORCED_BY_NO_PREVIOUS = 2**2 + FLAG_FORCED_BY_PREVIOUS_ERRORS = 2**3 + FLAG_FORMATTING_INVALIDATED = 2**4 + + def __init__(self, out_uri, out_path): + self.out_uri = out_uri + self.out_path = out_path + self.flags = self.FLAG_NONE + self.errors = [] + self.render_info = [None, None] # Same length as RENDER_PASSES + + @property + def was_clean(self): + return (self.flags & self.FLAG_BAKED) == 0 and len(self.errors) == 0 + + @property + def was_baked(self): + return (self.flags & self.FLAG_BAKED) != 0 + + @property + def was_baked_successfully(self): + return self.was_baked and len(self.errors) == 0 + + def anyPass(self, func): + for pinfo in self.render_info: + if pinfo and func(pinfo): + return True + return False + + def copyRenderInfo(self): + return copy.deepcopy(self.render_info) + + +class PagePipelineRecordEntry(RecordEntry): + FLAG_NONE = 0 + FLAG_NEW = 2**0 + FLAG_SOURCE_MODIFIED = 2**1 + FLAG_OVERRIDEN = 2**2 + + def __init__(self): + super().__init__() + self.flags = self.FLAG_NONE + self.config = None + self.subs = [] + + @property + def was_overriden(self): + return (self.flags & self.FLAG_OVERRIDEN) != 0 + + @property + def num_subs(self): + return len(self.subs) + + @property + def was_any_sub_baked(self): + for o in self.subs: + if o.was_baked: + return True + return False + + @property + def all_assets(self): + for sub in self.subs: + yield from sub.assets + + @property + def all_out_paths(self): + for sub in self.subs: + yield sub.out_path + + @property + def has_any_error(self): + if len(self.errors) > 0: + return True + for o in self.subs: + if len(o.errors) > 0: + return True + return False + + def getSub(self, page_num): + return self.subs[page_num - 1] + + def getAllErrors(self): + yield from self.errors + for o in self.subs: + yield from o.errors + + def getAllUsedSourceNames(self): + res = set() + for o in self.subs: + for pinfo in o.render_info: + if pinfo: + res |= pinfo.used_source_names + return res +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/_procrecords.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,35 @@ +from piecrust.pipelines.records import RecordEntry + + +class AssetPipelineRecordEntry(RecordEntry): + FLAG_NONE = 0 + FLAG_PREPARED = 2**0 + FLAG_PROCESSED = 2**1 + FLAG_BYPASSED_STRUCTURED_PROCESSING = 2**3 + FLAG_COLLAPSED_FROM_LAST_RUN = 2**4 + + def __init__(self): + super().__init__() + self.out_paths = [] + self.flags = self.FLAG_NONE + self.proc_tree = None + + @property + def was_prepared(self): + return bool(self.flags & self.FLAG_PREPARED) + + @property + def was_processed(self): + return (self.was_prepared and + (bool(self.flags & self.FLAG_PROCESSED) or + len(self.errors) > 0)) + + @property + def was_processed_successfully(self): + return self.was_processed and not self.errors + + @property + def was_collapsed_from_last_run(self): + return self.flags & self.FLAG_COLLAPSED_FROM_LAST_RUN + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/_proctree.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,297 @@ +import os +import time +import os.path +import logging +from piecrust.chefutil import format_timed +from piecrust.processing.base import FORCE_BUILD + + +logger = logging.getLogger(__name__) + + +STATE_UNKNOWN = 0 +STATE_DIRTY = 1 +STATE_CLEAN = 2 + + +class ProcessingTreeError(Exception): + pass + + +class ProcessorNotFoundError(ProcessingTreeError): + pass + + +class ProcessorError(ProcessingTreeError): + def __init__(self, proc_name, in_path, *args): + super(ProcessorError, self).__init__(*args) + self.proc_name = proc_name + self.in_path = in_path + + def __str__(self): + return "Processor %s failed on: %s" % (self.proc_name, self.in_path) + + +class ProcessingTreeNode(object): + def __init__(self, path, available_procs, level=0): + self.path = path + self.available_procs = available_procs + self.outputs = [] + self.level = level + self.state = STATE_UNKNOWN + self._processor = None + + def getProcessor(self): + if self._processor is None: + for p in self.available_procs: + if p.matches(self.path): + self._processor = p + self.available_procs.remove(p) + break + else: + raise ProcessorNotFoundError() + return self._processor + + def setState(self, state, recursive=True): + self.state = state + if recursive: + for o in self.outputs: + o.setState(state, True) + + @property + def is_leaf(self): + return len(self.outputs) == 0 + + def getLeaves(self): + if self.is_leaf: + return [self] + leaves = [] + for o in self.outputs: + for l in o.getLeaves(): + leaves.append(l) + return leaves + + +class ProcessingTreeBuilder(object): + def __init__(self, processors): + self.processors = processors + + def build(self, path): + tree_root = ProcessingTreeNode(path, list(self.processors)) + + loop_guard = 100 + walk_stack = [tree_root] + while len(walk_stack) > 0: + loop_guard -= 1 + if loop_guard <= 0: + raise ProcessingTreeError("Infinite loop detected!") + + cur_node = walk_stack.pop() + proc = cur_node.getProcessor() + + # If the root tree node (and only that one) wants to bypass this + # whole tree business, so be it. + if proc.is_bypassing_structured_processing: + if cur_node != tree_root: + raise ProcessingTreeError("Only root processors can " + "bypass structured processing.") + break + + # Get the destination directory and output files. + rel_dir, basename = os.path.split(cur_node.path) + out_names = proc.getOutputFilenames(basename) + if out_names is None: + continue + + for n in out_names: + out_node = ProcessingTreeNode( + os.path.join(rel_dir, n), + list(cur_node.available_procs), + cur_node.level + 1) + cur_node.outputs.append(out_node) + + if proc.PROCESSOR_NAME != 'copy': + walk_stack.append(out_node) + + return tree_root + + +class ProcessingTreeRunner(object): + def __init__(self, base_dir, tmp_dir, out_dir): + self.base_dir = base_dir + self.tmp_dir = tmp_dir + self.out_dir = out_dir + + def processSubTree(self, tree_root): + did_process = False + walk_stack = [tree_root] + while len(walk_stack) > 0: + cur_node = walk_stack.pop() + + self._computeNodeState(cur_node) + if cur_node.state == STATE_DIRTY: + did_process_this_node = self.processNode(cur_node) + did_process |= did_process_this_node + + if did_process_this_node: + for o in cur_node.outputs: + if not o.is_leaf: + walk_stack.append(o) + else: + for o in cur_node.outputs: + if not o.is_leaf: + walk_stack.append(o) + return did_process + + def processNode(self, node): + full_path = self._getNodePath(node) + proc = node.getProcessor() + if proc.is_bypassing_structured_processing: + try: + start_time = time.perf_counter() + with proc.app.env.stats.timerScope(proc.__class__.__name__): + proc.process(full_path, self.out_dir) + print_node( + node, + format_timed( + start_time, "(bypassing structured processing)", + colored=False)) + return True + except Exception as e: + raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e + + # All outputs of a node must go to the same directory, so we can get + # the output directory off of the first output. + base_out_dir = self._getNodeBaseDir(node.outputs[0]) + rel_out_dir = os.path.dirname(node.path) + out_dir = os.path.join(base_out_dir, rel_out_dir) + if not os.path.isdir(out_dir): + try: + os.makedirs(out_dir, 0o755, exist_ok=True) + except OSError: + pass + + try: + start_time = time.perf_counter() + with proc.app.env.stats.timerScope(proc.__class__.__name__): + proc_res = proc.process(full_path, out_dir) + if proc_res is None: + raise Exception("Processor '%s' didn't return a boolean " + "result value." % proc) + if proc_res: + print_node(node, "-> %s" % out_dir) + return True + else: + print_node(node, "-> %s [clean]" % out_dir) + return False + except Exception as e: + raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e + + def _computeNodeState(self, node): + if node.state != STATE_UNKNOWN: + return + + proc = node.getProcessor() + if (proc.is_bypassing_structured_processing or + not proc.is_delegating_dependency_check): + # This processor wants to handle things on its own... + node.setState(STATE_DIRTY, False) + return + + start_time = time.perf_counter() + + # Get paths and modification times for the input path and + # all dependencies (if any). + base_dir = self._getNodeBaseDir(node) + full_path = os.path.join(base_dir, node.path) + in_mtime = (full_path, os.path.getmtime(full_path)) + force_build = False + try: + deps = proc.getDependencies(full_path) + if deps == FORCE_BUILD: + force_build = True + elif deps is not None: + for dep in deps: + dep_mtime = os.path.getmtime(dep) + if dep_mtime > in_mtime[1]: + in_mtime = (dep, dep_mtime) + except Exception as e: + logger.warning("%s -- Will force-bake: %s" % (e, node.path)) + node.setState(STATE_DIRTY, True) + return + + if force_build: + # Just do what the processor told us to do. + node.setState(STATE_DIRTY, True) + message = "Processor requested a forced build." + print_node(node, message) + else: + # Get paths and modification times for the outputs. + message = None + for o in node.outputs: + full_out_path = self._getNodePath(o) + if not os.path.isfile(full_out_path): + message = "Output '%s' doesn't exist." % o.path + break + o_mtime = os.path.getmtime(full_out_path) + if o_mtime < in_mtime[1]: + message = "Input '%s' is newer than output '%s'." % ( + in_mtime[0], o.path) + break + if message is not None: + node.setState(STATE_DIRTY, True) + message += " Re-processing sub-tree." + print_node(node, message) + else: + node.setState(STATE_CLEAN, False) + + if node.state == STATE_DIRTY: + state = "dirty" + elif node.state == STATE_CLEAN: + state = "clean" + else: + state = "unknown" + logger.debug(format_timed(start_time, + "Computed node dirtyness: %s" % state, + indent_level=node.level, colored=False)) + + def _getNodeBaseDir(self, node): + if node.level == 0: + return self.base_dir + if node.is_leaf: + return self.out_dir + return os.path.join(self.tmp_dir, str(node.level)) + + def _getNodePath(self, node): + base_dir = self._getNodeBaseDir(node) + return os.path.join(base_dir, node.path) + + +def print_node(node, message=None, recursive=False): + indent = ' ' * node.level + try: + proc_name = node.getProcessor().PROCESSOR_NAME + except ProcessorNotFoundError: + proc_name = 'n/a' + + message = message or '' + logger.debug('%s%s [%s] %s' % (indent, node.path, proc_name, message)) + + if recursive: + for o in node.outputs: + print_node(o, None, True) + + +def get_node_name_tree(node): + try: + proc_name = node.getProcessor().PROCESSOR_NAME + except ProcessorNotFoundError: + proc_name = 'n/a' + + children = [] + for o in node.outputs: + if not o.outputs: + continue + children.append(get_node_name_tree(o)) + return (proc_name, children) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/asset.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,188 @@ +import os +import os.path +import re +import logging +from piecrust.pipelines._procrecords import AssetPipelineRecordEntry +from piecrust.pipelines._proctree import ( + ProcessingTreeBuilder, ProcessingTreeRunner, + get_node_name_tree, print_node, + STATE_DIRTY) +from piecrust.pipelines.base import ContentPipeline +from piecrust.processing.base import ProcessorContext +from piecrust.sources.fs import FSContentSourceBase + + +logger = logging.getLogger(__name__) + + +class AssetPipeline(ContentPipeline): + PIPELINE_NAME = 'asset' + RECORD_CLASS = AssetPipelineRecordEntry + + def __init__(self, source): + if not isinstance(source, FSContentSourceBase): + raise Exception( + "The asset pipeline only support file-system sources.") + + super().__init__(source) + self.enabled_processors = None + self.ignore_patterns = [] + self._processors = None + self._base_dir = source.fs_endpoint_path + + def initialize(self, ctx): + # Get the list of processors for this run. + processors = self.app.plugin_loader.getProcessors() + if self.enabled_processors is not None: + logger.debug("Filtering processors to: %s" % + self.enabled_processors) + processors = get_filtered_processors(processors, + self.enabled_processors) + + # Invoke pre-processors. + proc_ctx = ProcessorContext(self, ctx) + for proc in processors: + proc.onPipelineStart(proc_ctx) + + # Add any extra processors registered in the `onPipelineStart` step. + processors += proc_ctx.extra_processors + + # Sort our processors by priority. + processors.sort(key=lambda p: p.priority) + + # Ok, that's the list of processors for this run. + self._processors = processors + + # Pre-processors can define additional ignore patterns so let's + # add them to what we had already. + self.ignore_patterns += make_re(proc_ctx.ignore_patterns) + + # Register timers. + stats = self.app.env.stats + stats.registerTimer('BuildProcessingTree', raise_if_registered=False) + stats.registerTimer('RunProcessingTree', raise_if_registered=False) + + def run(self, content_item, ctx, result): + # See if we need to ignore this item. + rel_path = os.path.relpath(content_item.spec, self._base_dir) + if re_matchany(rel_path, self.ignore_patterns): + return + + record = result.record + stats = self.app.env.stats + + # Build the processing tree for this job. + with stats.timerScope('BuildProcessingTree'): + builder = ProcessingTreeBuilder(self._processors) + tree_root = builder.build(rel_path) + record.flags |= AssetPipelineRecordEntry.FLAG_PREPARED + + # Prepare and run the tree. + print_node(tree_root, recursive=True) + leaves = tree_root.getLeaves() + record.rel_outputs = [l.path for l in leaves] + record.proc_tree = get_node_name_tree(tree_root) + if tree_root.getProcessor().is_bypassing_structured_processing: + record.flags |= ( + AssetPipelineRecordEntry.FLAG_BYPASSED_STRUCTURED_PROCESSING) + + if ctx.force: + tree_root.setState(STATE_DIRTY, True) + + with stats.timerScope('RunProcessingTree'): + runner = ProcessingTreeRunner( + self._base_dir, self.tmp_dir, ctx.out_dir) + if runner.processSubTree(tree_root): + record.flags |= ( + AssetPipelineRecordEntry.FLAG_PROCESSED) + + def shutdown(self, ctx): + # Invoke post-processors. + proc_ctx = ProcessorContext(self, ctx) + for proc in self._processors: + proc.onPipelineEnd(proc_ctx) + + def collapseRecords(self, record_history): + for prev, cur in record_history.diffs(): + if prev and cur and not cur.was_processed: + # This asset wasn't processed, so the information from + # last time is still valid. + cur.flags = ( + prev.flags & + (~AssetPipelineRecordEntry.FLAG_PROCESSED | + AssetPipelineRecordEntry.FLAG_COLLAPSED_FROM_LAST_RUN)) + cur.out_paths = list(prev.out_paths) + cur.errors = list(prev.errors) + + def getDeletions(self, record_history): + for prev, cur in record_history.diffs(): + if prev and not cur: + for p in prev.out_paths: + yield (p, 'previous asset was removed') + elif prev and cur and cur.was_processed_successfully: + diff = set(prev.out_paths) - set(cur.out_paths) + for p in diff: + yield (p, 'asset changed outputs') + + +split_processor_names_re = re.compile(r'[ ,]+') + + +def get_filtered_processors(processors, authorized_names): + if not authorized_names or authorized_names == 'all': + return processors + + if isinstance(authorized_names, str): + authorized_names = split_processor_names_re.split(authorized_names) + + procs = [] + has_star = 'all' in authorized_names + for p in processors: + for name in authorized_names: + if name == p.PROCESSOR_NAME: + procs.append(p) + break + if name == ('-%s' % p.PROCESSOR_NAME): + break + else: + if has_star: + procs.append(p) + return procs + + +def make_re(patterns): + re_patterns = [] + for pat in patterns: + if pat[0] == '/' and pat[-1] == '/' and len(pat) > 2: + re_patterns.append(pat[1:-1]) + else: + escaped_pat = ( + re.escape(pat) + .replace(r'\*', r'[^/\\]*') + .replace(r'\?', r'[^/\\]')) + re_patterns.append(escaped_pat) + return [re.compile(p) for p in re_patterns] + + +def re_matchany(rel_path, patterns): + # skip patterns use a forward slash regardless of the platform. + rel_path = rel_path.replace('\\', '/') + for pattern in patterns: + if pattern.search(rel_path): + return True + return False + + +re_ansicolors = re.compile('\033\\[\d+m') + + +def _get_errors(ex, strip_colors=False): + errors = [] + while ex is not None: + msg = str(ex) + if strip_colors: + msg = re_ansicolors.sub('', msg) + errors.append(msg) + ex = ex.__cause__ + return errors +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/base.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,73 @@ +import os.path +import logging + + +logger = logging.getLogger(__name__) + + +class PipelineContext: + """ The context for running a content pipeline. + """ + def __init__(self, out_dir, record_history, *, + worker_id=-1, force=None): + self.out_dir = out_dir + self.record_history = record_history + self.worker_id = worker_id + self.force = force + + @property + def is_worker(self): + """ Returns `True` if the content pipeline is running inside + a worker process, and this is the first one. + """ + return self.worker_id >= 0 + + @property + def is_main_process(self): + """ Returns `True` is the content pipeline is running inside + the main process (and not a worker process). This is the case + if there are no worker processes at all. + """ + return self.worker_id < 0 + + +class PipelineResult: + def __init__(self, record): + self.record = record + + +class ContentPipeline: + """ A pipeline that processes content from a `ContentSource`. + """ + PIPELINE_NAME = None + PIPELINE_PASSES = 1 + RECORD_CLASS = None + + def __init__(self, source): + self.source = source + + app = source.app + tmp_dir = app.cache_dir + if not tmp_dir: + import tempfile + tmp_dir = os.path.join(tempfile.gettempdir(), 'piecrust') + self.tmp_dir = os.path.join(tmp_dir, self.PIPELINE_NAME) + + @property + def app(self): + return self.source.app + + def initialize(self, ctx): + pass + + def run(self, content_item, ctx, result): + raise NotImplementedError() + + def shutdown(self, ctx): + pass + + def collapseRecords(self, record_history): + pass + + def getDeletions(self, record_history): + pass
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/page.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,373 @@ +import hashlib +from piecrust.pipelines.base import ContentPipeline + + +class PagePipeline(ContentPipeline): + PIPELINE_NAME = 'page' + PIPELINE_PASSES = 3 + + def initialize(self, ctx): + pass + + def run(self, content_item, ctx): + raise NotImplementedError() + + def shutdown(self, ctx): + pass + + def collapseRecords(self, record_history): + pass + + def getDeletions(self, record_history): + for prev, cur in record_history.diffs(): + if prev and not cur: + for sub in prev.subs: + yield (sub.out_path, 'previous source file was removed') + elif prev and cur: + prev_out_paths = [o.out_path for o in prev.subs] + cur_out_paths = [o.out_path for o in cur.subs] + diff = set(prev_out_paths) - set(cur_out_paths) + for p in diff: + yield (p, 'source file changed outputs') + + +JOB_LOAD, JOB_RENDER_FIRST, JOB_BAKE = range(0, 3) + + +def _get_transition_key(path, extra_key=None): + key = path + if extra_key: + key += '+%s' % extra_key + return hashlib.md5(key.encode('utf8')).hexdigest() + + +# def getOverrideEntry(self, path, uri): +# for pair in self.transitions.values(): +# cur = pair[1] +# if cur and cur.path != path: +# for o in cur.subs: +# if o.out_uri == uri: +# return cur +# return None + + + +# # Create the job handlers. +# job_handlers = { +# JOB_LOAD: LoadJobHandler(self.ctx), +# JOB_RENDER_FIRST: RenderFirstSubJobHandler(self.ctx), +# JOB_BAKE: BakeJobHandler(self.ctx)} +# for jt, jh in job_handlers.items(): +# app.env.registerTimer(type(jh).__name__) +# self.job_handlers = job_handlers +# +# def process(self, job): +# handler = self.job_handlers[job['type']] +# with self.ctx.app.env.timerScope(type(handler).__name__): +# return handler.handleJob(job['job']) + +# def _loadRealmPages(self, record_history, pool, factories): +# def _handler(res): +# # Create the record entry for this page. +# # This will also update the `dirty_source_names` for the record +# # as we add page files whose last modification times are later +# # than the last bake. +# record_entry = BakeRecordEntry(res['source_name'], res['path']) +# record_entry.config = res['config'] +# record_entry.timestamp = res['timestamp'] +# if res['errors']: +# record_entry.errors += res['errors'] +# record_history.current.success = False +# self._logErrors(res['path'], res['errors']) +# record_history.addEntry(record_entry) +# +# logger.debug("Loading %d realm pages..." % len(factories)) +# with format_timed_scope(logger, +# "loaded %d pages" % len(factories), +# level=logging.DEBUG, colored=False, +# timer_env=self.app.env, +# timer_category='LoadJob'): +# jobs = [] +# for fac in factories: +# job = { +# 'type': JOB_LOAD, +# 'job': save_factory(fac)} +# jobs.append(job) +# ar = pool.queueJobs(jobs, handler=_handler) +# ar.wait() +# +# def _renderRealmPages(self, record_history, pool, factories): +# def _handler(res): +# entry = record_history.getCurrentEntry(res['path']) +# if res['errors']: +# entry.errors += res['errors'] +# record_history.current.success = False +# self._logErrors(res['path'], res['errors']) +# +# logger.debug("Rendering %d realm pages..." % len(factories)) +# with format_timed_scope(logger, +# "prepared %d pages" % len(factories), +# level=logging.DEBUG, colored=False, +# timer_env=self.app.env, +# timer_category='RenderFirstSubJob'): +# jobs = [] +# for fac in factories: +# record_entry = record_history.getCurrentEntry(fac.path) +# if record_entry.errors: +# logger.debug("Ignoring %s because it had previous " +# "errors." % fac.ref_spec) +# continue +# +# # Make sure the source and the route exist for this page, +# # otherwise we add errors to the record entry and we'll skip +# # this page for the rest of the bake. +# source = self.app.getSource(fac.source.name) +# if source is None: +# record_entry.errors.append( +# "Can't get source for page: %s" % fac.ref_spec) +# logger.error(record_entry.errors[-1]) +# continue +# +# route = self.app.getSourceRoute(fac.source.name, fac.metadata) +# if route is None: +# record_entry.errors.append( +# "Can't get route for page: %s" % fac.ref_spec) +# logger.error(record_entry.errors[-1]) +# continue +# +# # All good, queue the job. +# route_index = self.app.routes.index(route) +# job = { +# 'type': JOB_RENDER_FIRST, +# 'job': { +# 'factory_info': save_factory(fac), +# 'route_index': route_index +# } +# } +# jobs.append(job) +# +# ar = pool.queueJobs(jobs, handler=_handler) +# ar.wait() +# +# def _bakeRealmPages(self, record_history, pool, realm, factories): +# def _handler(res): +# entry = record_history.getCurrentEntry(res['path']) +# entry.subs = res['sub_entries'] +# if res['errors']: +# entry.errors += res['errors'] +# self._logErrors(res['path'], res['errors']) +# if entry.has_any_error: +# record_history.current.success = False +# if entry.subs and entry.was_any_sub_baked: +# record_history.current.baked_count[realm] += 1 +# record_history.current.total_baked_count[realm] += len(entry.subs) +# +# logger.debug("Baking %d realm pages..." % len(factories)) +# with format_timed_scope(logger, +# "baked %d pages" % len(factories), +# level=logging.DEBUG, colored=False, +# timer_env=self.app.env, +# timer_category='BakeJob'): +# jobs = [] +# for fac in factories: +# job = self._makeBakeJob(record_history, fac) +# if job is not None: +# jobs.append(job) +# +# ar = pool.queueJobs(jobs, handler=_handler) +# ar.wait() +# + + +# def _makeBakeJob(self, record_history, fac): +# # Get the previous (if any) and current entry for this page. +# pair = record_history.getPreviousAndCurrentEntries(fac.path) +# assert pair is not None +# prev_entry, cur_entry = pair +# assert cur_entry is not None +# +# # Ignore if there were errors in the previous passes. +# if cur_entry.errors: +# logger.debug("Ignoring %s because it had previous " +# "errors." % fac.ref_spec) +# return None +# +# # Build the route metadata and find the appropriate route. +# page = fac.buildPage() +# route_metadata = create_route_metadata(page) +# route = self.app.getSourceRoute(fac.source.name, route_metadata) +# assert route is not None +# +# # Figure out if this page is overriden by another previously +# # baked page. This happens for example when the user has +# # made a page that has the same page/URL as a theme page. +# uri = route.getUri(route_metadata) +# override_entry = record_history.getOverrideEntry(page.path, uri) +# if override_entry is not None: +# override_source = self.app.getSource( +# override_entry.source_name) +# if override_source.realm == fac.source.realm: +# cur_entry.errors.append( +# "Page '%s' maps to URL '%s' but is overriden " +# "by page '%s'." % +# (fac.ref_spec, uri, override_entry.path)) +# logger.error(cur_entry.errors[-1]) +# cur_entry.flags |= BakeRecordEntry.FLAG_OVERRIDEN +# return None +# +# route_index = self.app.routes.index(route) +# job = { +# 'type': JOB_BAKE, +# 'job': { +# 'factory_info': save_factory(fac), +# 'generator_name': None, +# 'generator_record_key': None, +# 'route_index': route_index, +# 'route_metadata': route_metadata, +# 'dirty_source_names': record_history.dirty_source_names +# } +# } +# return job +# +# def _handleDeletetions(self, record_history): +# logger.debug("Handling deletions...") +# for path, reason in record_history.getDeletions(): +# logger.debug("Removing '%s': %s" % (path, reason)) +# record_history.current.deleted.append(path) +# try: +# os.remove(path) +# logger.info('[delete] %s' % path) +# except OSError: +# # Not a big deal if that file had already been removed +# # by the user. +# pass +# + + + +#def save_factory(fac): +# return { +# 'source_name': fac.source.name, +# 'rel_path': fac.rel_path, +# 'metadata': fac.metadata} +# +# +#def load_factory(app, info): +# source = app.getSource(info['source_name']) +# return PageFactory(source, info['rel_path'], info['metadata']) +# +# +#class LoadJobHandler(JobHandler): +# def handleJob(self, job): +# # Just make sure the page has been cached. +# fac = load_factory(self.app, job) +# logger.debug("Loading page: %s" % fac.ref_spec) +# self.app.env.addManifestEntry('LoadJobs', fac.ref_spec) +# result = { +# 'source_name': fac.source.name, +# 'path': fac.path, +# 'config': None, +# 'timestamp': None, +# 'errors': None} +# try: +# page = fac.buildPage() +# page._load() +# result['config'] = page.config.getAll() +# result['timestamp'] = page.datetime.timestamp() +# except Exception as ex: +# logger.debug("Got loading error. Sending it to master.") +# result['errors'] = _get_errors(ex) +# if self.ctx.app.debug: +# logger.exception(ex) +# return result +# +# +#class RenderFirstSubJobHandler(JobHandler): +# def handleJob(self, job): +# # Render the segments for the first sub-page of this page. +# fac = load_factory(self.app, job['factory_info']) +# self.app.env.addManifestEntry('RenderJobs', fac.ref_spec) +# +# route_index = job['route_index'] +# route = self.app.routes[route_index] +# +# page = fac.buildPage() +# qp = QualifiedPage(page, route, route_metadata) +# ctx = RenderingContext(qp) +# self.app.env.abort_source_use = True +# +# result = { +# 'path': fac.path, +# 'aborted': False, +# 'errors': None} +# logger.debug("Preparing page: %s" % fac.ref_spec) +# try: +# render_page_segments(ctx) +# except AbortedSourceUseError: +# logger.debug("Page %s was aborted." % fac.ref_spec) +# self.app.env.stepCounter("SourceUseAbortions") +# result['aborted'] = True +# except Exception as ex: +# logger.debug("Got rendering error. Sending it to master.") +# result['errors'] = _get_errors(ex) +# if self.ctx.app.debug: +# logger.exception(ex) +# finally: +# self.app.env.abort_source_use = False +# return result +# +# +#class BakeJobHandler(JobHandler): +# def __init__(self, ctx): +# super(BakeJobHandler, self).__init__(ctx) +# self.page_baker = PageBaker(ctx.app, ctx.out_dir, ctx.force) +# +# def shutdown(self): +# self.page_baker.shutdown() +# +# def handleJob(self, job): +# # Actually bake the page and all its sub-pages to the output folder. +# fac = load_factory(self.app, job['factory_info']) +# self.app.env.addManifestEntry('BakeJobs', fac.ref_spec) +# +# route_index = job['route_index'] +# route_metadata = job['route_metadata'] +# route = self.app.routes[route_index] +# +# gen_name = job['generator_name'] +# gen_key = job['generator_record_key'] +# dirty_source_names = job['dirty_source_names'] +# +# page = fac.buildPage() +# qp = QualifiedPage(page, route, route_metadata) +# +# result = { +# 'path': fac.path, +# 'generator_name': gen_name, +# 'generator_record_key': gen_key, +# 'sub_entries': None, +# 'errors': None} +# +# if job.get('needs_config', False): +# result['config'] = page.config.getAll() +# +# previous_entry = None +# if self.ctx.previous_record_index is not None: +# key = _get_transition_key(fac.path, gen_key) +# previous_entry = self.ctx.previous_record_index.get(key) +# +# logger.debug("Baking page: %s" % fac.ref_spec) +# logger.debug("With route metadata: %s" % route_metadata) +# try: +# sub_entries = self.page_baker.bake( +# qp, previous_entry, dirty_source_names, gen_name) +# result['sub_entries'] = sub_entries +# +# except Exception as ex: +# logger.debug("Got baking error. Sending it to master.") +# result['errors'] = _get_errors(ex) +# if self.ctx.app.debug: +# logger.exception(ex) +# +# return result +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/pipelines/records.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,181 @@ +import os +import os.path +import pickle +import hashlib +import logging +from piecrust import APP_VERSION + + +logger = logging.getLogger(__name__) + + +class MultiRecord: + RECORD_VERSION = 12 + + def __init__(self): + self.records = [] + self.success = True + self.bake_time = 0 + self.incremental_count = 0 + self.invalidated = False + self.stats = None + self._app_version = APP_VERSION + self._record_version = self.RECORD_VERSION + + def getRecord(self, record_name, auto_create=True): + for r in self.records: + if r.name == record_name: + return r + if not auto_create: + return None + record = Record() + self.records.append(record) + return record + + def save(self, path): + path_dir = os.path.dirname(path) + if not os.path.isdir(path_dir): + os.makedirs(path_dir, 0o755) + + with open(path, 'wb') as fp: + pickle.dump(self, fp, pickle.HIGHEST_PROTOCOL) + + @staticmethod + def load(path): + logger.debug("Loading bake records from: %s" % path) + with open(path, 'rb') as fp: + return pickle.load(fp) + + +class Record: + def __init__(self): + self.name = None + self.entries = [] + self.stats = {} + self.out_dir = None + self.success = True + + +class RecordEntry: + def __init__(self): + self.item_spec = None + self.errors = [] + + @property + def success(self): + return len(self.errors) == 0 + + +def _are_records_valid(multi_record): + return (multi_record._app_version == APP_VERSION and + multi_record._record_version == MultiRecord.RECORD_VERSION) + + +def load_records(path): + try: + multi_record = MultiRecord.load(path) + except Exception as ex: + logger.debug("Error loading records from: %s" % path) + logger.debug(ex) + logger.debug("Will use empty records.") + multi_record = None + + was_invalid = False + if multi_record is not None and not _are_records_valid(multi_record): + logger.debug( + "Records from '%s' have old version: %s/%s." % + (path, multi_record._app_version, multi_record._record_version)) + logger.debug("Will use empty records.") + multi_record = None + was_invalid = True + + if multi_record is None: + multi_record = MultiRecord() + multi_record.invalidated = was_invalid + + return multi_record + + +def _build_diff_key(item_spec): + return hashlib.md5(item_spec.encode('utf8')).hexdigest() + + +class MultiRecordHistory: + def __init__(self, previous, current): + if previous is None or current is None: + raise ValueError() + + self.previous = previous + self.current = current + self.histories = [] + self._buildHistories(previous, current) + + def getHistory(self, record_name): + for h in self.histories: + if h.name == record_name: + return h + return None + + def _buildHistories(self, previous, current): + pairs = {} + if previous: + for r in previous.records: + pairs[r.name] = (r, None) + if current: + for r in current.records: + p = pairs.get(r.name, (None, None)) + if p[1] is not None: + raise Exception("Got several records named: %s" % r.name) + pairs[r.name] = (p[0], r) + + for p, c in pairs.values(): + self.histories.append(RecordHistory(p, c)) + + +class RecordHistory: + def __init__(self, previous, current): + self._diffs = {} + self._previous = previous + self._current = current + + if previous and current and previous.name != current.name: + raise Exception("The two records must have the same name! " + "Got '%s' and '%s'." % + (previous.name, current.name)) + + self._buildDiffs() + + @property + def name(self): + return self._current.name + + @property + def current(self): + return self._current + + @property + def previous(self): + return self._previous + + @property + def diffs(self): + return self._diffs.values() + + def _buildDiffs(self): + if self._previous is not None: + for e in self._previous.entries: + key = _build_diff_key(e.item_spec) + self._diffs[key] = (e, None) + + if self._current is not None: + for e in self._current.entries: + key = _build_diff_key(e.item_spec) + diff = self._diffs.get(key) + if diff is None: + self._diffs[key] = (None, e) + elif diff[1] is None: + self._diffs[key] = (diff[0], e) + else: + raise Exception( + "A current record entry already exists for: %s" % key) +
--- a/piecrust/plugins/base.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/plugins/base.py Wed May 17 00:11:48 2017 -0700 @@ -39,7 +39,7 @@ def getSources(self): return [] - def getPageGenerators(self): + def getPipelines(self): return [] def getPublishers(self): @@ -62,15 +62,15 @@ def getFormatters(self): return self._getPluginComponents( - 'getFormatters', - initialize=True, register_timer=True, - order_key=lambda f: f.priority) + 'getFormatters', + initialize=True, register_timer=True, + order_key=lambda f: f.priority) def getTemplateEngines(self): return self._getPluginComponents( - 'getTemplateEngines', - initialize=True, register_timer=True, - register_timer_suffixes=['_segment', '_layout']) + 'getTemplateEngines', + initialize=True, register_timer=True, + register_timer_suffixes=['_segment', '_layout']) def getTemplateEngineExtensions(self, engine_name): return self._getPluginComponents('getTemplateEngineExtensions', @@ -81,9 +81,9 @@ def getProcessors(self): return self._getPluginComponents( - 'getProcessors', - initialize=True, register_timer=True, - order_key=lambda p: p.priority) + 'getProcessors', + initialize=True, register_timer=True, + order_key=lambda p: p.priority) def getImporters(self): return self._getPluginComponents('getImporters') @@ -100,8 +100,8 @@ def getSources(self): return self._getPluginComponents('getSources') - def getPageGenerators(self): - return self._getPluginComponents('getPageGenerators') + def getPipelines(self): + return self._getPluginComponents('getPipelines') def getPublishers(self): return self._getPluginComponents('getPublishers') @@ -142,7 +142,6 @@ if mod is None: logger.error("Failed to load plugin '%s'." % plugin_name) - logger.error(ex) return plugin_class = getattr(mod, '__piecrust_plugin__', None) @@ -180,10 +179,11 @@ if register_timer: for comp in plugin_components: if not register_timer_suffixes: - self.app.env.registerTimer(comp.__class__.__name__) + self.app.env.stats.registerTimer( + comp.__class__.__name__) else: for s in register_timer_suffixes: - self.app.env.registerTimer( + self.app.env.stats.registerTimer( comp.__class__.__name__ + s) if order_key is not None:
--- a/piecrust/plugins/builtin.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/plugins/builtin.py Wed May 17 00:11:48 2017 -0700 @@ -53,29 +53,35 @@ DefaultPrepareTemplatesHelpTopic()] def getSources(self): - from piecrust.sources.default import DefaultPageSource + from piecrust.sources.autoconfig import ( + AutoConfigContentSource, OrderedContentSource) + from piecrust.sources.blogarchives import BlogArchivesSource + from piecrust.sources.default import DefaultContentSource + from piecrust.sources.fs import FSContentSource from piecrust.sources.posts import ( - FlatPostsSource, ShallowPostsSource, HierarchyPostsSource) - from piecrust.sources.autoconfig import ( - AutoConfigSource, OrderedPageSource) + FlatPostsSource, ShallowPostsSource, HierarchyPostsSource) from piecrust.sources.prose import ProseSource + from piecrust.sources.taxonomy import TaxonomySource return [ - DefaultPageSource, + AutoConfigContentSource, + BlogArchivesSource, + DefaultContentSource, + FSContentSource, FlatPostsSource, - ShallowPostsSource, HierarchyPostsSource, - AutoConfigSource, - OrderedPageSource, - ProseSource] + OrderedContentSource, + ProseSource, + ShallowPostsSource, + TaxonomySource] - def getPageGenerators(self): - from piecrust.generation.blogarchives import BlogArchivesPageGenerator - from piecrust.generation.taxonomy import TaxonomyPageGenerator + def getPipelines(self): + from piecrust.pipelines.page import PagePipeline + from piecrust.pipelines.asset import AssetPipeline return [ - TaxonomyPageGenerator, - BlogArchivesPageGenerator] + PagePipeline, + AssetPipeline] def getDataProviders(self): from piecrust.data.provider import ( @@ -107,10 +113,10 @@ TextileFormatter()] def getProcessors(self): - from piecrust.processing.base import CopyFileProcessor from piecrust.processing.compass import CompassProcessor from piecrust.processing.compressors import ( CleanCssProcessor, UglifyJSProcessor) + from piecrust.processing.copy import CopyFileProcessor from piecrust.processing.less import LessProcessor from piecrust.processing.pygments_style import PygmentsStyleProcessor from piecrust.processing.requirejs import RequireJSProcessor
--- a/piecrust/processing/base.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/processing/base.py Wed May 17 00:11:48 2017 -0700 @@ -1,4 +1,3 @@ -import shutil import os.path import logging @@ -11,26 +10,31 @@ PRIORITY_LAST = 1 -class PipelineContext(object): - def __init__(self, worker_id, app, out_dir, tmp_dir, force=None): - self.worker_id = worker_id - self.app = app - self.out_dir = out_dir - self.tmp_dir = tmp_dir - self.force = force - self.record = None - self._additional_ignore_patterns = [] +FORCE_BUILD = object() + + +class ProcessorContext: + def __init__(self, pipeline, pipeline_ctx): + self.ignore_patterns = [] + self.extra_processors = [] + self._pipeline = pipeline + self._pipeline_ctx = pipeline_ctx @property - def is_first_worker(self): - return self.worker_id == 0 + def tmp_dir(self): + return self._pipeline.tmp_dir @property - def is_pipeline_process(self): - return self.worker_id < 0 + def out_dir(self): + return self._pipeline_ctx.out_dir - def addIgnorePatterns(self, patterns): - self._additional_ignore_patterns += patterns + @property + def worker_id(self): + return self._pipeline_ctx.worker_id + + @property + def is_main_process(self): + return self._pipeline_ctx.is_main_process class Processor(object): @@ -63,24 +67,12 @@ pass -class CopyFileProcessor(Processor): - PROCESSOR_NAME = 'copy' - - def __init__(self): - super(CopyFileProcessor, self).__init__() - self.priority = PRIORITY_LAST +class ExternalProcessException(Exception): + def __init__(self, stderr_data): + self.stderr_data = stderr_data - def matches(self, path): - return True - - def getOutputFilenames(self, filename): - return [filename] - - def process(self, path, out_dir): - out_path = os.path.join(out_dir, os.path.basename(path)) - logger.debug("Copying: %s -> %s" % (path, out_path)) - shutil.copyfile(path, out_path) - return True + def __str__(self): + return self.stderr_data class SimpleFileProcessor(Processor): @@ -109,12 +101,3 @@ def _doProcess(self, in_path, out_path): raise NotImplementedError() - -class ExternalProcessException(Exception): - def __init__(self, stderr_data): - self.stderr_data = stderr_data - - def __str__(self): - return self.stderr_data - -
--- a/piecrust/processing/compass.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/processing/compass.py Wed May 17 00:11:48 2017 -0700 @@ -28,13 +28,13 @@ def initialize(self, app): super(CompassProcessor, self).initialize(app) - def onPipelineStart(self, pipeline): - super(CompassProcessor, self).onPipelineStart(pipeline) - self._maybeActivate(pipeline) + def onPipelineStart(self, ctx): + super(CompassProcessor, self).onPipelineStart(ctx) + self._maybeActivate(ctx) - def onPipelineEnd(self, pipeline): - super(CompassProcessor, self).onPipelineEnd(pipeline) - self._maybeRunCompass(pipeline) + def onPipelineEnd(self, ctx): + super(CompassProcessor, self).onPipelineEnd(ctx) + self._maybeRunCompass(ctx) def matches(self, path): if self._state != self.STATE_ACTIVE: @@ -62,7 +62,7 @@ "is done.") self._runInSite = True - def _maybeActivate(self, pipeline): + def _maybeActivate(self, ctx): if self._state != self.STATE_UNKNOWN: return @@ -95,17 +95,17 @@ if custom_args: self._args += ' ' + custom_args - out_dir = pipeline.out_dir - tmp_dir = os.path.join(pipeline.tmp_dir, 'compass') + out_dir = ctx.out_dir + tmp_dir = os.path.join(ctx.tmp_dir, 'compass') self._args = multi_replace( - self._args, - {'%out_dir%': out_dir, - '%tmp_dir%': tmp_dir}) + self._args, + {'%out_dir%': out_dir, + '%tmp_dir%': tmp_dir}) self._runInSite = False self._runInTheme = False - def _maybeRunCompass(self, pipeline): + def _maybeRunCompass(self, ctx): if self._state != self.STATE_ACTIVE: return
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/processing/copy.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,27 @@ +import os.path +import shutil +import logging +from piecrust.processing.base import Processor, PRIORITY_LAST + + +logger = logging.getLogger(__name__) + + +class CopyFileProcessor(Processor): + PROCESSOR_NAME = 'copy' + + def __init__(self): + super(CopyFileProcessor, self).__init__() + self.priority = PRIORITY_LAST + + def matches(self, path): + return True + + def getOutputFilenames(self, filename): + return [filename] + + def process(self, path, out_dir): + out_path = os.path.join(out_dir, os.path.basename(path)) + logger.debug("Copying: %s -> %s" % (path, out_path)) + shutil.copyfile(path, out_path) + return True
--- a/piecrust/processing/less.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/processing/less.py Wed May 17 00:11:48 2017 -0700 @@ -7,8 +7,7 @@ import platform import subprocess from piecrust.processing.base import ( - SimpleFileProcessor, ExternalProcessException) -from piecrust.processing.tree import FORCE_BUILD + SimpleFileProcessor, ExternalProcessException, FORCE_BUILD) logger = logging.getLogger(__name__) @@ -22,9 +21,9 @@ self._conf = None self._map_dir = None - def onPipelineStart(self, pipeline): - self._map_dir = os.path.join(pipeline.tmp_dir, 'less') - if (pipeline.is_first_worker and + def onPipelineStart(self, ctx): + self._map_dir = os.path.join(ctx.tmp_dir, 'less') + if (ctx.is_main_process and not os.path.isdir(self._map_dir)): os.makedirs(self._map_dir) @@ -59,7 +58,7 @@ map_path = self._getMapPath(in_path) map_url = '/' + os.path.relpath( - map_path, self.app.root_dir).replace('\\', '/') + map_path, self.app.root_dir).replace('\\', '/') # On Windows, it looks like LESSC is confused with paths when the # map file is not to be created in the same directory as the input @@ -67,8 +66,8 @@ # a mix of relative and absolute paths stuck together). # So create it there and move it afterwards... :( temp_map_path = os.path.join( - os.path.dirname(in_path), - os.path.basename(map_path)) + os.path.dirname(in_path), + os.path.basename(map_path)) args = [self._conf['bin'], '--source-map=%s' % temp_map_path, @@ -83,8 +82,8 @@ shell = (platform.system() == 'Windows') try: proc = subprocess.Popen( - args, shell=shell, - stderr=subprocess.PIPE) + args, shell=shell, + stderr=subprocess.PIPE) stdout_data, stderr_data = proc.communicate() except FileNotFoundError as ex: logger.error("Tried running LESS processor with command: %s" % @@ -93,7 +92,7 @@ "Did you install it?") from ex if proc.returncode != 0: raise ExternalProcessException( - stderr_data.decode(sys.stderr.encoding)) + stderr_data.decode(sys.stderr.encoding)) logger.debug("Moving map file: %s -> %s" % (temp_map_path, map_path)) if os.path.exists(map_path): @@ -115,8 +114,8 @@ def _getMapPath(self, path): map_name = "%s_%s.map" % ( - os.path.basename(path), - hashlib.md5(path.encode('utf8')).hexdigest()) + os.path.basename(path), + hashlib.md5(path.encode('utf8')).hexdigest()) map_path = os.path.join(self._map_dir, map_name) return map_path
--- a/piecrust/processing/pipeline.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,325 +0,0 @@ -import os -import os.path -import re -import time -import hashlib -import logging -import multiprocessing -from piecrust.chefutil import format_timed, format_timed_scope -from piecrust.environment import ExecutionStats -from piecrust.processing.base import PipelineContext -from piecrust.processing.records import ( - ProcessorPipelineRecordEntry, TransitionalProcessorPipelineRecord, - FLAG_PROCESSED) -from piecrust.processing.worker import ( - ProcessingWorkerJob, - get_filtered_processors) - - -logger = logging.getLogger(__name__) - - -class _ProcessingContext(object): - def __init__(self, jobs, record, base_dir, mount_info): - self.jobs = jobs - self.record = record - self.base_dir = base_dir - self.mount_info = mount_info - - -class ProcessorPipeline(object): - def __init__(self, app, out_dir, force=False, - applied_config_variant=None, - applied_config_values=None): - assert app and out_dir - self.app = app - self.out_dir = out_dir - self.force = force - self.applied_config_variant = applied_config_variant - self.applied_config_values = applied_config_values - - tmp_dir = app.cache_dir - if not tmp_dir: - import tempfile - tmp_dir = os.path.join(tempfile.gettempdir(), 'piecrust') - self.tmp_dir = os.path.join(tmp_dir, 'proc') - - baker_params = app.config.get('baker', {}) - - mount_params = baker_params.get('assets_dirs', {}) - self.mounts = make_mount_infos(app, mount_params) - - self.num_workers = baker_params.get( - 'workers', multiprocessing.cpu_count()) - - ignores = baker_params.get('ignore', []) - ignores += [ - '_cache', '_counter', - '.DS_Store', 'Thumbs.db', - '.git*', '.hg*', '.svn'] - self.ignore_patterns = make_re(ignores) - self.force_patterns = make_re(baker_params.get('force', [])) - - # Those things are mostly for unit-testing. - # - # Note that additiona processors can't be passed as instances. - # Instead, we need some factory functions because we need to create - # one instance right away to use during the initialization phase, and - # another instance to pass to the worker pool. The initialized one will - # be tied to the PieCrust app instance, which can't be pickled across - # processes. - self.enabled_processors = None - self.additional_processors_factories = None - - def addIgnorePatterns(self, patterns): - self.ignore_patterns += make_re(patterns) - - def run(self, src_dir_or_file=None, *, - delete=True, previous_record=None, save_record=True): - start_time = time.perf_counter() - - # Get the list of processors for this run. - processors = self.app.plugin_loader.getProcessors() - if self.enabled_processors is not None: - logger.debug("Filtering processors to: %s" % - self.enabled_processors) - processors = get_filtered_processors(processors, - self.enabled_processors) - if self.additional_processors_factories is not None: - logger.debug("Adding %s additional processors." % - len(self.additional_processors_factories)) - for proc_fac in self.additional_processors_factories: - proc = proc_fac() - self.app.env.registerTimer(proc.__class__.__name__, - raise_if_registered=False) - proc.initialize(self.app) - processors.append(proc) - - # Invoke pre-processors. - pipeline_ctx = PipelineContext(-1, self.app, self.out_dir, - self.tmp_dir, self.force) - for proc in processors: - proc.onPipelineStart(pipeline_ctx) - - # Pre-processors can define additional ignore patterns. - self.ignore_patterns += make_re( - pipeline_ctx._additional_ignore_patterns) - - # Create the pipeline record. - record = TransitionalProcessorPipelineRecord() - record_cache = self.app.cache.getCache('proc') - record_name = ( - hashlib.md5(self.out_dir.encode('utf8')).hexdigest() + - '.record') - if previous_record: - record.setPrevious(previous_record) - elif not self.force and record_cache.has(record_name): - with format_timed_scope(logger, 'loaded previous bake record', - level=logging.DEBUG, colored=False): - record.loadPrevious(record_cache.getCachePath(record_name)) - logger.debug("Got %d entries in process record." % - len(record.previous.entries)) - record.current.success = True - record.current.processed_count = 0 - - # Work! - def _handler(res): - entry = record.getCurrentEntry(res.path) - assert entry is not None - entry.flags = res.flags - entry.proc_tree = res.proc_tree - entry.rel_outputs = res.rel_outputs - if entry.flags & FLAG_PROCESSED: - record.current.processed_count += 1 - if res.errors: - entry.errors += res.errors - record.current.success = False - - rel_path = os.path.relpath(res.path, self.app.root_dir) - logger.error("Errors found in %s:" % rel_path) - for e in entry.errors: - logger.error(" " + e) - - jobs = [] - self._process(src_dir_or_file, record, jobs) - pool = self._createWorkerPool() - ar = pool.queueJobs(jobs, handler=_handler) - ar.wait() - - # Shutdown the workers and get timing information from them. - reports = pool.close() - total_stats = ExecutionStats() - record.current.stats['_Total'] = total_stats - for i in range(len(reports)): - worker_stats = reports[i]['data'] - if worker_stats is not None: - worker_name = 'PipelineWorker_%d' % i - record.current.stats[worker_name] = worker_stats - total_stats.mergeStats(worker_stats) - - # Invoke post-processors. - pipeline_ctx.record = record.current - for proc in processors: - proc.onPipelineEnd(pipeline_ctx) - - # Handle deletions. - if delete: - for path, reason in record.getDeletions(): - logger.debug("Removing '%s': %s" % (path, reason)) - record.current.deleted.append(path) - try: - os.remove(path) - except FileNotFoundError: - pass - logger.info('[delete] %s' % path) - - # Finalize the process record. - record.current.process_time = time.time() - record.current.out_dir = self.out_dir - record.collapseRecords() - - # Save the process record. - if save_record: - with format_timed_scope(logger, 'saved bake record', - level=logging.DEBUG, colored=False): - record.saveCurrent(record_cache.getCachePath(record_name)) - - logger.info(format_timed( - start_time, - "processed %d assets." % record.current.processed_count)) - - return record.detach() - - def _process(self, src_dir_or_file, record, jobs): - if src_dir_or_file is not None: - # Process only the given path. - # Find out what mount point this is in. - for path, info in self.mounts.items(): - if src_dir_or_file[:len(path)] == path: - base_dir = path - mount_info = info - break - else: - known_roots = list(self.mounts.keys()) - raise Exception("Input path '%s' is not part of any known " - "mount point: %s" % - (src_dir_or_file, known_roots)) - - ctx = _ProcessingContext(jobs, record, base_dir, mount_info) - logger.debug("Initiating processing pipeline on: %s" % - src_dir_or_file) - if os.path.isdir(src_dir_or_file): - self._processDirectory(ctx, src_dir_or_file) - elif os.path.isfile(src_dir_or_file): - self._processFile(ctx, src_dir_or_file) - - else: - # Process everything. - for path, info in self.mounts.items(): - ctx = _ProcessingContext(jobs, record, path, info) - logger.debug("Initiating processing pipeline on: %s" % path) - self._processDirectory(ctx, path) - - def _processDirectory(self, ctx, start_dir): - for dirpath, dirnames, filenames in os.walk(start_dir): - rel_dirpath = os.path.relpath(dirpath, start_dir) - dirnames[:] = [d for d in dirnames - if not re_matchany( - d, self.ignore_patterns, rel_dirpath)] - - for filename in filenames: - if re_matchany(filename, self.ignore_patterns, rel_dirpath): - continue - self._processFile(ctx, os.path.join(dirpath, filename)) - - def _processFile(self, ctx, path): - # TODO: handle overrides between mount-points. - - entry = ProcessorPipelineRecordEntry(path) - ctx.record.addEntry(entry) - - previous_entry = ctx.record.getPreviousEntry(path) - force_this = (self.force or previous_entry is None or - not previous_entry.was_processed_successfully) - - job = ProcessingWorkerJob(ctx.base_dir, ctx.mount_info, path, - force=force_this) - ctx.jobs.append(job) - - def _createWorkerPool(self): - from piecrust.app import PieCrustFactory - from piecrust.workerpool import WorkerPool - from piecrust.processing.worker import ( - ProcessingWorkerContext, ProcessingWorker) - - appfactory = PieCrustFactory( - self.app.root_dir, - cache=self.app.cache.enabled, - cache_key=self.app.cache_key, - config_variant=self.applied_config_variant, - config_values=self.applied_config_values, - debug=self.app.debug, - theme_site=self.app.theme_site) - - ctx = ProcessingWorkerContext( - appfactory, - self.out_dir, self.tmp_dir, - force=self.force) - ctx.enabled_processors = self.enabled_processors - if self.additional_processors_factories is not None: - ctx.additional_processors = [ - proc_fac() - for proc_fac in self.additional_processors_factories] - - pool = WorkerPool( - worker_class=ProcessingWorker, - initargs=(ctx,)) - return pool - - -def make_mount_infos(app, mount_params): - mounts = {d: {} for d in app.assets_dirs} - - for name, cfg in mount_params.items(): - mdir = os.path.join(app.root_dir, name) - mounts[mdir] = cfg - - for mdir, info in mounts.items(): - mname = os.path.basename(mdir) - info_from_config = mount_params.get(mname) - if info_from_config is not None: - if not isinstance(info, dict): - raise Exception("Asset directory info for '%s' is not a " - "dictionary." % mname) - info.update(info_from_config) - info.setdefault('processors', 'all -uglifyjs -cleancss') - info['name'] = mname - - return mounts - - -def make_re(patterns): - re_patterns = [] - for pat in patterns: - if pat[0] == '/' and pat[-1] == '/' and len(pat) > 2: - re_patterns.append(pat[1:-1]) - else: - escaped_pat = ( - re.escape(pat) - .replace(r'\*', r'[^/\\]*') - .replace(r'\?', r'[^/\\]')) - re_patterns.append(escaped_pat) - return [re.compile(p) for p in re_patterns] - - -def re_matchany(filename, patterns, dirname=None): - if dirname and dirname != '.': - filename = os.path.join(dirname, filename) - - # skip patterns use a forward slash regardless of the platform. - filename = filename.replace('\\', '/') - for pattern in patterns: - if pattern.search(filename): - return True - return False -
--- a/piecrust/processing/records.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -import os.path -import hashlib -from piecrust.records import Record, TransitionalRecord - - -class ProcessorPipelineRecord(Record): - RECORD_VERSION = 7 - - def __init__(self): - super(ProcessorPipelineRecord, self).__init__() - self.out_dir = None - self.process_time = None - self.processed_count = 0 - self.deleted = [] - self.success = False - - -FLAG_NONE = 0 -FLAG_PREPARED = 2**0 -FLAG_PROCESSED = 2**1 -FLAG_BYPASSED_STRUCTURED_PROCESSING = 2**3 -FLAG_COLLAPSED_FROM_LAST_RUN = 2**4 - - -def _get_transition_key(path): - return hashlib.md5(path.encode('utf8')).hexdigest() - - -class ProcessorPipelineRecordEntry(object): - def __init__(self, path): - self.path = path - - self.flags = FLAG_NONE - self.rel_outputs = [] - self.proc_tree = None - self.errors = [] - - @property - def was_prepared(self): - return bool(self.flags & FLAG_PREPARED) - - @property - def was_processed(self): - return (self.was_prepared and - (bool(self.flags & FLAG_PROCESSED) or len(self.errors) > 0)) - - @property - def was_processed_successfully(self): - return self.was_processed and not self.errors - - @property - def was_collapsed_from_last_run(self): - return self.flags & FLAG_COLLAPSED_FROM_LAST_RUN - - -class TransitionalProcessorPipelineRecord(TransitionalRecord): - def __init__(self, previous_path=None): - super(TransitionalProcessorPipelineRecord, self).__init__( - ProcessorPipelineRecord, previous_path) - - def getTransitionKey(self, entry): - return _get_transition_key(entry.path) - - def getCurrentEntry(self, path): - key = _get_transition_key(path) - pair = self.transitions.get(key) - if pair is not None: - return pair[1] - return None - - def getPreviousEntry(self, path): - key = _get_transition_key(path) - pair = self.transitions.get(key) - if pair is not None: - return pair[0] - return None - - def collapseRecords(self): - for prev, cur in self.transitions.values(): - if prev and cur and not cur.was_processed: - # This asset wasn't processed, so the information from - # last time is still valid. - cur.flags = (prev.flags - & ~FLAG_PROCESSED - | FLAG_COLLAPSED_FROM_LAST_RUN) - cur.rel_outputs = list(prev.rel_outputs) - cur.errors = list(prev.errors) - - def getDeletions(self): - for prev, cur in self.transitions.values(): - if prev and not cur: - for p in prev.rel_outputs: - abs_p = os.path.join(self.previous.out_dir, p) - yield (abs_p, 'previous asset was removed') - elif prev and cur and cur.was_processed_successfully: - diff = set(prev.rel_outputs) - set(cur.rel_outputs) - for p in diff: - abs_p = os.path.join(self.previous.out_dir, p) - yield (abs_p, 'asset changed outputs') -
--- a/piecrust/processing/requirejs.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/processing/requirejs.py Wed May 17 00:11:48 2017 -0700 @@ -1,12 +1,9 @@ import os import os.path -import json -import hashlib import logging import platform import subprocess -from piecrust.processing.base import Processor, PRIORITY_FIRST -from piecrust.processing.tree import FORCE_BUILD +from piecrust.processing.base import Processor, PRIORITY_FIRST, FORCE_BUILD logger = logging.getLogger(__name__) @@ -33,15 +30,15 @@ self._conf.setdefault('bin', 'r.js') self._conf.setdefault('out_path', self._conf['build_path']) - def onPipelineStart(self, pipeline): - super(RequireJSProcessor, self).onPipelineStart(pipeline) + def onPipelineStart(self, ctx): + super(RequireJSProcessor, self).onPipelineStart(ctx) if self._conf is None: return logger.debug("Adding Javascript suppressor to build pipeline.") skip = _JavascriptSkipProcessor(self._conf['build_path']) - pipeline.processors.append(skip) + ctx.extra_processors.append(skip) def matches(self, path): if self._conf is None:
--- a/piecrust/processing/sass.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/processing/sass.py Wed May 17 00:11:48 2017 -0700 @@ -5,8 +5,7 @@ import logging import platform import subprocess -from piecrust.processing.base import SimpleFileProcessor -from piecrust.processing.tree import FORCE_BUILD +from piecrust.processing.base import SimpleFileProcessor, FORCE_BUILD logger = logging.getLogger(__name__) @@ -17,23 +16,23 @@ def __init__(self): super(SassProcessor, self).__init__( - extensions={'scss': 'css', 'sass': 'css'}) + extensions={'scss': 'css', 'sass': 'css'}) self._conf = None self._map_dir = None def initialize(self, app): super(SassProcessor, self).initialize(app) - def onPipelineStart(self, pipeline): - super(SassProcessor, self).onPipelineStart(pipeline) + def onPipelineStart(self, ctx): + super(SassProcessor, self).onPipelineStart(ctx) - self._map_dir = os.path.join(pipeline.tmp_dir, 'sass') - if pipeline.is_first_worker: + self._map_dir = os.path.join(ctx.tmp_dir, 'sass') + if ctx.is_main_process: if not os.path.isdir(self._map_dir): os.makedirs(self._map_dir) # Ignore include-only Sass files. - pipeline.addIgnorePatterns(['_*.scss', '_*.sass']) + ctx.ignore_patterns += ['_*.scss', '_*.sass'] def getDependencies(self, path): if _is_include_only(path):
--- a/piecrust/processing/sitemap.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/processing/sitemap.py Wed May 17 00:11:48 2017 -0700 @@ -3,16 +3,15 @@ import yaml from piecrust.data.iterators import PageIterator from piecrust.processing.base import SimpleFileProcessor -from piecrust.routing import create_route_metadata logger = logging.getLogger(__name__) SITEMAP_HEADER = \ -"""<?xml version="1.0" encoding="utf-8"?> -<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> -""" + """<?xml version="1.0" encoding="utf-8"?> + <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> + """ SITEMAP_FOOTER = "</urlset>\n" SITEURL_HEADER = " <url>\n" @@ -30,7 +29,7 @@ super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) self._start_time = None - def onPipelineStart(self, pipeline): + def onPipelineStart(self, ctx): self._start_time = time.time() def _doProcess(self, in_path, out_path):
--- a/piecrust/processing/tree.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,299 +0,0 @@ -import os -import time -import os.path -import logging -from piecrust.chefutil import format_timed - - -logger = logging.getLogger(__name__) - - -STATE_UNKNOWN = 0 -STATE_DIRTY = 1 -STATE_CLEAN = 2 - - -FORCE_BUILD = object() - - -class ProcessingTreeError(Exception): - pass - - -class ProcessorNotFoundError(ProcessingTreeError): - pass - - -class ProcessorError(ProcessingTreeError): - def __init__(self, proc_name, in_path, *args): - super(ProcessorError, self).__init__(*args) - self.proc_name = proc_name - self.in_path = in_path - - def __str__(self): - return "Processor %s failed on: %s" % (self.proc_name, self.in_path) - - -class ProcessingTreeNode(object): - def __init__(self, path, available_procs, level=0): - self.path = path - self.available_procs = available_procs - self.outputs = [] - self.level = level - self.state = STATE_UNKNOWN - self._processor = None - - def getProcessor(self): - if self._processor is None: - for p in self.available_procs: - if p.matches(self.path): - self._processor = p - self.available_procs.remove(p) - break - else: - raise ProcessorNotFoundError() - return self._processor - - def setState(self, state, recursive=True): - self.state = state - if recursive: - for o in self.outputs: - o.setState(state, True) - - @property - def is_leaf(self): - return len(self.outputs) == 0 - - def getLeaves(self): - if self.is_leaf: - return [self] - leaves = [] - for o in self.outputs: - for l in o.getLeaves(): - leaves.append(l) - return leaves - - -class ProcessingTreeBuilder(object): - def __init__(self, processors): - self.processors = processors - - def build(self, path): - tree_root = ProcessingTreeNode(path, list(self.processors)) - - loop_guard = 100 - walk_stack = [tree_root] - while len(walk_stack) > 0: - loop_guard -= 1 - if loop_guard <= 0: - raise ProcessingTreeError("Infinite loop detected!") - - cur_node = walk_stack.pop() - proc = cur_node.getProcessor() - - # If the root tree node (and only that one) wants to bypass this - # whole tree business, so be it. - if proc.is_bypassing_structured_processing: - if cur_node != tree_root: - raise ProcessingTreeError("Only root processors can " - "bypass structured processing.") - break - - # Get the destination directory and output files. - rel_dir, basename = os.path.split(cur_node.path) - out_names = proc.getOutputFilenames(basename) - if out_names is None: - continue - - for n in out_names: - out_node = ProcessingTreeNode( - os.path.join(rel_dir, n), - list(cur_node.available_procs), - cur_node.level + 1) - cur_node.outputs.append(out_node) - - if proc.PROCESSOR_NAME != 'copy': - walk_stack.append(out_node) - - return tree_root - - -class ProcessingTreeRunner(object): - def __init__(self, base_dir, tmp_dir, out_dir): - self.base_dir = base_dir - self.tmp_dir = tmp_dir - self.out_dir = out_dir - - def processSubTree(self, tree_root): - did_process = False - walk_stack = [tree_root] - while len(walk_stack) > 0: - cur_node = walk_stack.pop() - - self._computeNodeState(cur_node) - if cur_node.state == STATE_DIRTY: - did_process_this_node = self.processNode(cur_node) - did_process |= did_process_this_node - - if did_process_this_node: - for o in cur_node.outputs: - if not o.is_leaf: - walk_stack.append(o) - else: - for o in cur_node.outputs: - if not o.is_leaf: - walk_stack.append(o) - return did_process - - def processNode(self, node): - full_path = self._getNodePath(node) - proc = node.getProcessor() - if proc.is_bypassing_structured_processing: - try: - start_time = time.perf_counter() - with proc.app.env.timerScope(proc.__class__.__name__): - proc.process(full_path, self.out_dir) - print_node( - node, - format_timed( - start_time, "(bypassing structured processing)", - colored=False)) - return True - except Exception as e: - raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e - - # All outputs of a node must go to the same directory, so we can get - # the output directory off of the first output. - base_out_dir = self._getNodeBaseDir(node.outputs[0]) - rel_out_dir = os.path.dirname(node.path) - out_dir = os.path.join(base_out_dir, rel_out_dir) - if not os.path.isdir(out_dir): - try: - os.makedirs(out_dir, 0o755, exist_ok=True) - except OSError: - pass - - try: - start_time = time.perf_counter() - with proc.app.env.timerScope(proc.__class__.__name__): - proc_res = proc.process(full_path, out_dir) - if proc_res is None: - raise Exception("Processor '%s' didn't return a boolean " - "result value." % proc) - if proc_res: - print_node(node, "-> %s" % out_dir) - return True - else: - print_node(node, "-> %s [clean]" % out_dir) - return False - except Exception as e: - raise ProcessorError(proc.PROCESSOR_NAME, full_path) from e - - def _computeNodeState(self, node): - if node.state != STATE_UNKNOWN: - return - - proc = node.getProcessor() - if (proc.is_bypassing_structured_processing or - not proc.is_delegating_dependency_check): - # This processor wants to handle things on its own... - node.setState(STATE_DIRTY, False) - return - - start_time = time.perf_counter() - - # Get paths and modification times for the input path and - # all dependencies (if any). - base_dir = self._getNodeBaseDir(node) - full_path = os.path.join(base_dir, node.path) - in_mtime = (full_path, os.path.getmtime(full_path)) - force_build = False - try: - deps = proc.getDependencies(full_path) - if deps == FORCE_BUILD: - force_build = True - elif deps is not None: - for dep in deps: - dep_mtime = os.path.getmtime(dep) - if dep_mtime > in_mtime[1]: - in_mtime = (dep, dep_mtime) - except Exception as e: - logger.warning("%s -- Will force-bake: %s" % (e, node.path)) - node.setState(STATE_DIRTY, True) - return - - if force_build: - # Just do what the processor told us to do. - node.setState(STATE_DIRTY, True) - message = "Processor requested a forced build." - print_node(node, message) - else: - # Get paths and modification times for the outputs. - message = None - for o in node.outputs: - full_out_path = self._getNodePath(o) - if not os.path.isfile(full_out_path): - message = "Output '%s' doesn't exist." % o.path - break - o_mtime = os.path.getmtime(full_out_path) - if o_mtime < in_mtime[1]: - message = "Input '%s' is newer than output '%s'." % ( - in_mtime[0], o.path) - break - if message is not None: - node.setState(STATE_DIRTY, True) - message += " Re-processing sub-tree." - print_node(node, message) - else: - node.setState(STATE_CLEAN, False) - - if node.state == STATE_DIRTY: - state = "dirty" - elif node.state == STATE_CLEAN: - state = "clean" - else: - state = "unknown" - logger.debug(format_timed(start_time, - "Computed node dirtyness: %s" % state, - indent_level=node.level, colored=False)) - - def _getNodeBaseDir(self, node): - if node.level == 0: - return self.base_dir - if node.is_leaf: - return self.out_dir - return os.path.join(self.tmp_dir, str(node.level)) - - def _getNodePath(self, node): - base_dir = self._getNodeBaseDir(node) - return os.path.join(base_dir, node.path) - - -def print_node(node, message=None, recursive=False): - indent = ' ' * node.level - try: - proc_name = node.getProcessor().PROCESSOR_NAME - except ProcessorNotFoundError: - proc_name = 'n/a' - - message = message or '' - logger.debug('%s%s [%s] %s' % (indent, node.path, proc_name, message)) - - if recursive: - for o in node.outputs: - print_node(o, None, True) - - -def get_node_name_tree(node): - try: - proc_name = node.getProcessor().PROCESSOR_NAME - except ProcessorNotFoundError: - proc_name = 'n/a' - - children = [] - for o in node.outputs: - if not o.outputs: - continue - children.append(get_node_name_tree(o)) - return (proc_name, children) -
--- a/piecrust/processing/worker.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,185 +0,0 @@ -import re -import os.path -import time -import logging -from piecrust.app import PieCrust, apply_variant_and_values -from piecrust.processing.base import PipelineContext -from piecrust.processing.records import ( - FLAG_NONE, FLAG_PREPARED, FLAG_PROCESSED, - FLAG_BYPASSED_STRUCTURED_PROCESSING) -from piecrust.processing.tree import ( - ProcessingTreeBuilder, ProcessingTreeRunner, - ProcessingTreeError, ProcessorError, - get_node_name_tree, print_node, - STATE_DIRTY) -from piecrust.workerpool import IWorker - - -logger = logging.getLogger(__name__) - - -split_processor_names_re = re.compile(r'[ ,]+') -re_ansicolors = re.compile('\033\\[\d+m') - - -class ProcessingWorkerContext(object): - def __init__(self, appfactory, out_dir, tmp_dir, *, - force=False): - self.appfactory = appfactory - self.out_dir = out_dir - self.tmp_dir = tmp_dir - self.force = force - self.is_profiling = False - self.enabled_processors = None - self.additional_processors = None - - -class ProcessingWorkerJob(object): - def __init__(self, base_dir, mount_info, path, *, force=False): - self.base_dir = base_dir - self.mount_info = mount_info - self.path = path - self.force = force - - -class ProcessingWorkerResult(object): - def __init__(self, path): - self.path = path - self.flags = FLAG_NONE - self.proc_tree = None - self.rel_outputs = None - self.errors = None - - -class ProcessingWorker(IWorker): - def __init__(self, ctx): - self.ctx = ctx - self.work_start_time = time.perf_counter() - - def initialize(self): - # Create the app local to this worker. - app = self.ctx.appfactory.create() - app.env.registerTimer("PipelineWorker_%d_Total" % self.wid) - app.env.registerTimer("PipelineWorkerInit") - app.env.registerTimer("JobReceive") - app.env.registerTimer('BuildProcessingTree') - app.env.registerTimer('RunProcessingTree') - self.app = app - - processors = app.plugin_loader.getProcessors() - if self.ctx.enabled_processors: - logger.debug("Filtering processors to: %s" % - self.ctx.enabled_processors) - processors = get_filtered_processors(processors, - self.ctx.enabled_processors) - if self.ctx.additional_processors: - logger.debug("Adding %s additional processors." % - len(self.ctx.additional_processors)) - for proc in self.ctx.additional_processors: - app.env.registerTimer(proc.__class__.__name__) - proc.initialize(app) - processors.append(proc) - self.processors = processors - - # Invoke pre-processors. - pipeline_ctx = PipelineContext(self.wid, self.app, self.ctx.out_dir, - self.ctx.tmp_dir, self.ctx.force) - for proc in processors: - proc.onPipelineStart(pipeline_ctx) - - # Sort our processors again in case the pre-process step involved - # patching the processors with some new ones. - processors.sort(key=lambda p: p.priority) - - app.env.stepTimerSince("PipelineWorkerInit", self.work_start_time) - - def process(self, job): - result = ProcessingWorkerResult(job.path) - - processors = get_filtered_processors( - self.processors, job.mount_info['processors']) - - # Build the processing tree for this job. - rel_path = os.path.relpath(job.path, job.base_dir) - try: - with self.app.env.timerScope('BuildProcessingTree'): - builder = ProcessingTreeBuilder(processors) - tree_root = builder.build(rel_path) - result.flags |= FLAG_PREPARED - except ProcessingTreeError as ex: - result.errors = _get_errors(ex) - return result - - # Prepare and run the tree. - print_node(tree_root, recursive=True) - leaves = tree_root.getLeaves() - result.rel_outputs = [l.path for l in leaves] - result.proc_tree = get_node_name_tree(tree_root) - if tree_root.getProcessor().is_bypassing_structured_processing: - result.flags |= FLAG_BYPASSED_STRUCTURED_PROCESSING - - if job.force: - tree_root.setState(STATE_DIRTY, True) - - try: - with self.app.env.timerScope('RunProcessingTree'): - runner = ProcessingTreeRunner( - job.base_dir, self.ctx.tmp_dir, self.ctx.out_dir) - if runner.processSubTree(tree_root): - result.flags |= FLAG_PROCESSED - except ProcessingTreeError as ex: - if isinstance(ex, ProcessorError): - ex = ex.__cause__ - # Need to strip out colored errors from external processes. - result.errors = _get_errors(ex, strip_colors=True) - - return result - - def getReport(self, pool_reports): - # Invoke post-processors. - pipeline_ctx = PipelineContext(self.wid, self.app, self.ctx.out_dir, - self.ctx.tmp_dir, self.ctx.force) - for proc in self.processors: - proc.onPipelineEnd(pipeline_ctx) - - self.app.env.stepTimerSince("PipelineWorker_%d_Total" % self.wid, - self.work_start_time) - data = self.app.env.getStats() - data.timers.update(pool_reports) - return { - 'type': 'stats', - 'data': data} - - -def get_filtered_processors(processors, authorized_names): - if not authorized_names or authorized_names == 'all': - return processors - - if isinstance(authorized_names, str): - authorized_names = split_processor_names_re.split(authorized_names) - - procs = [] - has_star = 'all' in authorized_names - for p in processors: - for name in authorized_names: - if name == p.PROCESSOR_NAME: - procs.append(p) - break - if name == ('-%s' % p.PROCESSOR_NAME): - break - else: - if has_star: - procs.append(p) - return procs - - -def _get_errors(ex, strip_colors=False): - errors = [] - while ex is not None: - msg = str(ex) - if strip_colors: - msg = re_ansicolors.sub('', msg) - errors.append(msg) - ex = ex.__cause__ - return errors -
--- a/piecrust/records.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,133 +0,0 @@ -import os -import os.path -import pickle -import logging -from piecrust import APP_VERSION -from piecrust.events import Event - - -logger = logging.getLogger(__name__) - - -class Record(object): - def __init__(self): - self.entries = [] - self.entry_added = Event() - self.app_version = APP_VERSION - self.record_version = self.__class__.RECORD_VERSION - self.stats = {} - - def hasLatestVersion(self): - return (self.app_version == APP_VERSION and - self.record_version == self.__class__.RECORD_VERSION) - - def addEntry(self, entry): - self.entries.append(entry) - self.entry_added.fire(entry) - - def save(self, path): - path_dir = os.path.dirname(path) - if not os.path.isdir(path_dir): - os.makedirs(path_dir, 0o755) - - with open(path, 'wb') as fp: - pickle.dump(self, fp, pickle.HIGHEST_PROTOCOL) - - def __getstate__(self): - odict = self.__dict__.copy() - del odict['entry_added'] - return odict - - def __setstate__(self, state): - state['entry_added'] = Event() - self.__dict__.update(state) - - @staticmethod - def load(path): - logger.debug("Loading bake record from: %s" % path) - with open(path, 'rb') as fp: - return pickle.load(fp) - - -class TransitionalRecord(object): - def __init__(self, record_class, previous_path=None): - self._record_class = record_class - self.transitions = {} - self.incremental_count = 0 - self.current = record_class() - if previous_path: - self.loadPrevious(previous_path) - else: - self.previous = record_class() - self.current.entry_added += self._onCurrentEntryAdded - - def loadPrevious(self, previous_path): - previous_record_valid = True - try: - self.previous = self._record_class.load(previous_path) - except Exception as ex: - logger.debug("Error loading previous record: %s" % ex) - logger.debug("Will reset to an empty one.") - previous_record_valid = False - - if self.previous.record_version != self._record_class.RECORD_VERSION: - logger.debug( - "Previous record has old version %s." % - self.previous.record_version) - logger.debug("Will reset to an empty one.") - previous_record_valid = False - - if not previous_record_valid: - self.previous = self._record_class() - return - - self._rebuildTransitions() - - def setPrevious(self, previous_record): - self.previous = previous_record - self._rebuildTransitions() - - def clearPrevious(self): - self.setPrevious(self._record_class()) - - def saveCurrent(self, current_path): - self.current.save(current_path) - - def detach(self): - res = self.current - self.current.entry_added -= self._onCurrentEntryAdded - self.current = None - self.previous = None - self.transitions = {} - return res - - def addEntry(self, entry): - self.current.addEntry(entry) - - def getTransitionKey(self, entry): - raise NotImplementedError() - - def _rebuildTransitions(self): - self.transitions = {} - for e in self.previous.entries: - key = self.getTransitionKey(e) - self.transitions[key] = (e, None) - - def _onCurrentEntryAdded(self, entry): - key = self.getTransitionKey(entry) - te = self.transitions.get(key) - if te is None: - logger.debug("Adding new record entry: %s" % key) - self.transitions[key] = (None, entry) - self._onNewEntryAdded(entry) - return - - if te[1] is not None: - raise Exception("A current entry already exists for: %s" % - key) - logger.debug("Setting current record entry: %s" % key) - self.transitions[key] = (te[0], entry) - - def _onNewEntryAdded(self, entry): - pass -
--- a/piecrust/rendering.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/rendering.py Wed May 17 00:11:48 2017 -0700 @@ -2,13 +2,10 @@ import os.path import copy import logging -from werkzeug.utils import cached_property from piecrust.data.builder import ( - DataBuildingContext, build_page_data, build_layout_data) -from piecrust.data.filters import ( - PaginationFilter, SettingFilterClause, page_value_accessor) + DataBuildingContext, build_page_data, add_layout_data) from piecrust.fastpickle import _pickle_object, _unpickle_object -from piecrust.sources.base import PageSource +from piecrust.sources.base import ContentSource from piecrust.templating.base import TemplateNotFoundError, TemplatingError @@ -19,7 +16,7 @@ re.MULTILINE) -class PageRenderingError(Exception): +class RenderingError(Exception): pass @@ -27,19 +24,6 @@ pass -class QualifiedPage(object): - def __init__(self, page, route, route_metadata): - self.page = page - self.route = route - self.route_metadata = route_metadata - - def getUri(self, sub_num=1): - return self.route.getUri(self.route_metadata, sub_num=sub_num) - - def __getattr__(self, name): - return getattr(self.page, name) - - class RenderedSegments(object): def __init__(self, segments, render_pass_info): self.segments = segments @@ -53,17 +37,15 @@ class RenderedPage(object): - def __init__(self, page, uri, num=1): - self.page = page - self.uri = uri - self.num = num + def __init__(self, qualified_page): + self.qualified_page = qualified_page self.data = None self.content = None self.render_info = [None, None] @property def app(self): - return self.page.app + return self.qualified_page.app def copyRenderInfo(self): return copy.deepcopy(self.render_info) @@ -94,13 +76,10 @@ return self._custom_info.get(key, default) -class PageRenderingContext(object): - def __init__(self, qualified_page, page_num=1, - force_render=False, is_from_request=False): - self.page = qualified_page - self.page_num = page_num +class RenderingContext(object): + def __init__(self, qualified_page, force_render=False): + self.qualified_page = qualified_page self.force_render = force_render - self.is_from_request = is_from_request self.pagination_source = None self.pagination_filter = None self.custom_data = {} @@ -109,15 +88,7 @@ @property def app(self): - return self.page.app - - @property - def source_metadata(self): - return self.page.source_metadata - - @cached_property - def uri(self): - return self.page.getUri(self.page_num) + return self.qualified_page.app @property def current_pass_info(self): @@ -142,7 +113,7 @@ def addUsedSource(self, source): self._raiseIfNoCurrentPass() - if isinstance(source, PageSource): + if isinstance(source, ContentSource): pass_info = self.current_pass_info pass_info.used_source_names.add(source.name) @@ -151,103 +122,149 @@ raise Exception("No rendering pass is currently active.") +class RenderingContextStack(object): + def __init__(self): + self._ctx_stack = [] + + @property + def current_ctx(self): + if len(self._ctx_stack) == 0: + return None + return self._ctx_stack[-1] + + @property + def is_main_ctx(self): + return len(self._ctx_stack) == 1 + + def hasPage(self, page): + for ei in self._ctx_stack: + if ei.qualified_page.page == page: + return True + return False + + def pushCtx(self, render_ctx): + for ctx in self._ctx_stack: + if ctx.qualified_page.page == render_ctx.qualified_page.page: + raise Exception("Loop detected during rendering!") + self._ctx_stack.append(render_ctx) + + def popCtx(self): + del self._ctx_stack[-1] + + def clear(self): + self._ctx_stack = [] + + def render_page(ctx): - eis = ctx.app.env.exec_info_stack - eis.pushPage(ctx.page, ctx) + env = ctx.app.env + + stack = env.render_ctx_stack + stack.pushCtx(ctx) + + qpage = ctx.qualified_page + try: # Build the data for both segment and layout rendering. - with ctx.app.env.timerScope("BuildRenderData"): + with env.timerScope("BuildRenderData"): page_data = _build_render_data(ctx) # Render content segments. ctx.setCurrentPass(PASS_FORMATTING) - repo = ctx.app.env.rendered_segments_repository + repo = env.rendered_segments_repository save_to_fs = True - if ctx.app.env.fs_cache_only_for_main_page and not eis.is_main_page: + if env.fs_cache_only_for_main_page and not stack.is_main_ctx: save_to_fs = False - with ctx.app.env.timerScope("PageRenderSegments"): - if repo and not ctx.force_render: + with env.timerScope("PageRenderSegments"): + if repo is not None and not ctx.force_render: render_result = repo.get( - ctx.uri, - lambda: _do_render_page_segments(ctx.page, page_data), - fs_cache_time=ctx.page.path_mtime, - save_to_fs=save_to_fs) + qpage.uri, + lambda: _do_render_page_segments(ctx, page_data), + fs_cache_time=qpage.page.content_mtime, + save_to_fs=save_to_fs) else: - render_result = _do_render_page_segments(ctx.page, page_data) + render_result = _do_render_page_segments(ctx, page_data) if repo: - repo.put(ctx.uri, render_result, save_to_fs) + repo.put(qpage.uri, render_result, save_to_fs) # Render layout. - page = ctx.page ctx.setCurrentPass(PASS_RENDERING) - layout_name = page.config.get('layout') + layout_name = qpage.page.config.get('layout') if layout_name is None: - layout_name = page.source.config.get('default_layout', 'default') + layout_name = qpage.page.source.config.get( + 'default_layout', 'default') null_names = ['', 'none', 'nil'] if layout_name not in null_names: with ctx.app.env.timerScope("BuildRenderData"): - build_layout_data(page, page_data, render_result['segments']) + add_layout_data(page_data, render_result['segments']) with ctx.app.env.timerScope("PageRenderLayout"): - layout_result = _do_render_layout(layout_name, page, page_data) + layout_result = _do_render_layout( + layout_name, qpage, page_data) else: layout_result = { - 'content': render_result['segments']['content'], - 'pass_info': None} + 'content': render_result['segments']['content'], + 'pass_info': None} - rp = RenderedPage(page, ctx.uri, ctx.page_num) + rp = RenderedPage(qpage) rp.data = page_data rp.content = layout_result['content'] rp.render_info[PASS_FORMATTING] = _unpickle_object( - render_result['pass_info']) + render_result['pass_info']) if layout_result['pass_info'] is not None: rp.render_info[PASS_RENDERING] = _unpickle_object( - layout_result['pass_info']) + layout_result['pass_info']) return rp + except Exception as ex: if ctx.app.debug: raise logger.exception(ex) page_rel_path = os.path.relpath(ctx.page.path, ctx.app.root_dir) raise Exception("Error rendering page: %s" % page_rel_path) from ex + finally: ctx.setCurrentPass(PASS_NONE) - eis.popPage() + stack.popCtx() def render_page_segments(ctx): - eis = ctx.app.env.exec_info_stack - eis.pushPage(ctx.page, ctx) + env = ctx.app.env + + stack = env.render_ctx_stack + stack.pushCtx(ctx) + + qpage = ctx.qualified_page + try: ctx.setCurrentPass(PASS_FORMATTING) repo = ctx.app.env.rendered_segments_repository save_to_fs = True - if ctx.app.env.fs_cache_only_for_main_page and not eis.is_main_page: + if ctx.app.env.fs_cache_only_for_main_page and not stack.is_main_ctx: save_to_fs = False with ctx.app.env.timerScope("PageRenderSegments"): - if repo and not ctx.force_render: + if repo is not None and not ctx.force_render: render_result = repo.get( - ctx.uri, + qpage.uri, lambda: _do_render_page_segments_from_ctx(ctx), - fs_cache_time=ctx.page.path_mtime, + fs_cache_time=qpage.page.content_mtime, save_to_fs=save_to_fs) else: render_result = _do_render_page_segments_from_ctx(ctx) if repo: - repo.put(ctx.uri, render_result, save_to_fs) + repo.put(qpage.uri, render_result, save_to_fs) finally: ctx.setCurrentPass(PASS_NONE) - eis.popPage() + stack.popCtx() rs = RenderedSegments( - render_result['segments'], - _unpickle_object(render_result['pass_info'])) + render_result['segments'], + _unpickle_object(render_result['pass_info'])) return rs def _build_render_data(ctx): with ctx.app.env.timerScope("PageDataBuild"): - data_ctx = DataBuildingContext(ctx.page, page_num=ctx.page_num) + data_ctx = DataBuildingContext(ctx.qualified_page) data_ctx.pagination_source = ctx.pagination_source data_ctx.pagination_filter = ctx.pagination_filter page_data = build_page_data(data_ctx) @@ -258,16 +275,13 @@ def _do_render_page_segments_from_ctx(ctx): page_data = _build_render_data(ctx) - return _do_render_page_segments(ctx.page, page_data) + return _do_render_page_segments(ctx, page_data) -def _do_render_page_segments(page, page_data): +def _do_render_page_segments(ctx, page_data): + page = ctx.qualified_page.page app = page.app - cpi = app.env.exec_info_stack.current_page_info - assert cpi is not None - assert cpi.page == page - engine_name = page.config.get('template_engine') format_name = page.config.get('format') @@ -282,7 +296,7 @@ with app.env.timerScope( engine.__class__.__name__ + '_segment'): part_text = engine.renderSegmentPart( - page.path, seg_part, page_data) + page.path, seg_part, page_data) except TemplatingError as err: err.lineno += seg_part.line raise err @@ -298,10 +312,10 @@ content_abstract = seg_text[:offset] formatted_segments['content.abstract'] = content_abstract - pass_info = cpi.render_ctx.render_passes[PASS_FORMATTING] + pass_info = ctx.render_passes[PASS_FORMATTING] res = { - 'segments': formatted_segments, - 'pass_info': _pickle_object(pass_info)} + 'segments': formatted_segments, + 'pass_info': _pickle_object(pass_info)} return res
--- a/piecrust/resources/theme/pages/_category.html Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ ---- -title: -format: none ---- -<h2>Posts in {{ category }}</h2> - -<section> - {% for post in pagination.posts %} - {% include 'partial_post.html' %} - {% endfor %} -</section> -<section> - {% if pagination.prev_page %}<div class="prev"><a href="{{ pagination.prev_page }}">Next Posts</a></div>{% endif %} - {% if pagination.next_page %}<div class="next"><a href="{{ pagination.next_page }}">Previous Posts</a></div>{% endif %} -</section> -
--- a/piecrust/resources/theme/pages/_tag.html Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ ---- -title: -format: none ---- -{% set display_tag = tag %} -{% if is_multiple_tag %} - {% set display_tag = tag|join(', ') %} -{% endif %} -<h2>Posts tagged with {{ display_tag }}</h2> - -<section> - {% for post in pagination.posts %} - {% include 'partial_post.html' %} - {% endfor %} -</section> -<section> - {% if pagination.prev_page %}<div class="prev"><a href="{{ pagination.prev_page }}">Next Posts</a></div>{% endif %} - {% if pagination.next_page %}<div class="next"><a href="{{ pagination.next_page }}">Previous Posts</a></div>{% endif %} -</section> -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/resources/theme/templates/_category.html Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,16 @@ +--- +title: +format: none +--- +<h2>Posts in {{ category }}</h2> + +<section> + {% for post in pagination.posts %} + {% include 'partial_post.html' %} + {% endfor %} +</section> +<section> + {% if pagination.prev_page %}<div class="prev"><a href="{{ pagination.prev_page }}">Next Posts</a></div>{% endif %} + {% if pagination.next_page %}<div class="next"><a href="{{ pagination.next_page }}">Previous Posts</a></div>{% endif %} +</section> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/resources/theme/templates/_tag.html Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,20 @@ +--- +title: +format: none +--- +{% set display_tag = tag %} +{% if is_multiple_tag %} + {% set display_tag = tag|join(', ') %} +{% endif %} +<h2>Posts tagged with {{ display_tag }}</h2> + +<section> + {% for post in pagination.posts %} + {% include 'partial_post.html' %} + {% endfor %} +</section> +<section> + {% if pagination.prev_page %}<div class="prev"><a href="{{ pagination.prev_page }}">Next Posts</a></div>{% endif %} + {% if pagination.next_page %}<div class="next"><a href="{{ pagination.next_page }}">Previous Posts</a></div>{% endif %} +</section> +
--- a/piecrust/routing.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/routing.py Wed May 17 00:11:48 2017 -0700 @@ -1,6 +1,5 @@ import re import os.path -import copy import logging import urllib.parse from werkzeug.utils import cached_property @@ -10,7 +9,8 @@ route_re = re.compile(r'%((?P<qual>[\w\d]+):)?(?P<var>\+)?(?P<name>\w+)%') -route_esc_re = re.compile(r'\\%((?P<qual>[\w\d]+)\\:)?(?P<var>\\\+)?(?P<name>\w+)\\%') +route_esc_re = re.compile( + r'\\%((?P<qual>[\w\d]+)\\:)?(?P<var>\\\+)?(?P<name>\w+)\\%') ugly_url_cleaner = re.compile(r'\.html$') @@ -22,15 +22,6 @@ pass -def create_route_metadata(page): - route_metadata = copy.deepcopy(page.source_metadata) - return route_metadata - - -ROUTE_TYPE_SOURCE = 0 -ROUTE_TYPE_GENERATOR = 1 - - class RouteParameter(object): TYPE_STRING = 0 TYPE_PATH = 1 @@ -46,29 +37,21 @@ class Route(object): """ Information about a route for a PieCrust application. Each route defines the "shape" of an URL and how it maps to - sources and generators. + content sources. """ def __init__(self, app, cfg): self.app = app - self.source_name = cfg.get('source') - self.generator_name = cfg.get('generator') - if not self.source_name and not self.generator_name: - raise InvalidRouteError( - "Both `source` and `generator` are specified.") - + self.source_name = cfg['source'] self.uri_pattern = cfg['url'].lstrip('/') - if self.is_source_route: - self.supported_params = self.source.getSupportedRouteParameters() - else: - self.supported_params = self.generator.getSupportedRouteParameters() + self.supported_params = self.source.getSupportedRouteParameters() self.pretty_urls = app.config.get('site/pretty_urls') self.trailing_slash = app.config.get('site/trailing_slash') self.show_debug_info = app.config.get('site/show_debug_info') self.pagination_suffix_format = app.config.get( - '__cache/pagination_suffix_format') + '__cache/pagination_suffix_format') self.uri_root = app.config.get('site/root') self.uri_params = [] @@ -87,9 +70,9 @@ # (maybe there's a better way to do it but I can't think of any # right now) uri_pattern_no_path = ( - route_re.sub(self._uriNoPathRepl, self.uri_pattern) - .replace('//', '/') - .rstrip('/')) + route_re.sub(self._uriNoPathRepl, self.uri_pattern) + .replace('//', '/') + .rstrip('/')) if uri_pattern_no_path != self.uri_pattern: p = route_esc_re.sub(self._uriPatternRepl, re.escape(uri_pattern_no_path)) + '$' @@ -109,43 +92,15 @@ last_param = self.getParameter(self.uri_params[-1]) self.func_has_variadic_parameter = last_param.variadic - @property - def route_type(self): - if self.source_name: - return ROUTE_TYPE_SOURCE - elif self.generator_name: - return ROUTE_TYPE_GENERATOR - else: - raise InvalidRouteError() - - @property - def is_source_route(self): - return self.route_type == ROUTE_TYPE_SOURCE - - @property - def is_generator_route(self): - return self.route_type == ROUTE_TYPE_GENERATOR - @cached_property def source(self): - if not self.is_source_route: - return InvalidRouteError("This is not a source route.") for src in self.app.sources: if src.name == self.source_name: return src - raise Exception("Can't find source '%s' for route '%s'." % ( + raise Exception( + "Can't find source '%s' for route '%s'." % ( self.source_name, self.uri_pattern)) - @cached_property - def generator(self): - if not self.is_generator_route: - return InvalidRouteError("This is not a generator route.") - for gen in self.app.generators: - if gen.name == self.generator_name: - return gen - raise Exception("Can't find generator '%s' for route '%s'." % ( - self.generator_name, self.uri_pattern)) - def hasParameter(self, name): return any(lambda p: p.param_name == name, self.supported_params) @@ -159,8 +114,8 @@ def getParameterType(self, name): return self.getParameter(name).param_type - def matchesMetadata(self, route_metadata): - return set(self.uri_params).issubset(route_metadata.keys()) + def matchesParameters(self, route_params): + return set(self.uri_params).issubset(route_params.keys()) def matchUri(self, uri, strict=False): if not uri.startswith(self.uri_root): @@ -172,42 +127,42 @@ elif self.trailing_slash: uri = uri.rstrip('/') - route_metadata = None + route_params = None m = self.uri_re.match(uri) if m: - route_metadata = m.groupdict() + route_params = m.groupdict() if self.uri_re_no_path: m = self.uri_re_no_path.match(uri) if m: - route_metadata = m.groupdict() - if route_metadata is None: + route_params = m.groupdict() + if route_params is None: return None if not strict: # When matching URIs, if the URI is a match but is missing some - # metadata, fill those up with empty strings. This can happen if, + # parameters, fill those up with empty strings. This can happen if, # say, a route's pattern is `/foo/%slug%`, and we're matching an # URL like `/foo`. - matched_keys = set(route_metadata.keys()) + matched_keys = set(route_params.keys()) missing_keys = set(self.uri_params) - matched_keys for k in missing_keys: if self.getParameterType(k) != RouteParameter.TYPE_PATH: return None - route_metadata[k] = '' + route_params[k] = '' - for k in route_metadata: - route_metadata[k] = self._coerceRouteParameter( - k, route_metadata[k]) + for k in route_params: + route_params[k] = self._coerceRouteParameter( + k, route_params[k]) - return route_metadata + return route_params - def getUri(self, route_metadata, *, sub_num=1): - route_metadata = dict(route_metadata) - for k in route_metadata: - route_metadata[k] = self._coerceRouteParameter( - k, route_metadata[k]) + def getUri(self, route_params, *, sub_num=1): + route_params = dict(route_params) + for k in route_params: + route_params[k] = self._coerceRouteParameter( + k, route_params[k]) - uri = self.uri_format % route_metadata + uri = self.uri_format % route_params suffix = None if sub_num > 1: # Note that we know the pagination suffix starts with a slash. @@ -258,9 +213,9 @@ if len(args) < fixed_param_count: raise Exception( - "Route function '%s' expected %d arguments, " - "got %d: %s" % - (self.func_name, fixed_param_count, len(args), args)) + "Route function '%s' expected %d arguments, " + "got %d: %s" % + (self.func_name, fixed_param_count, len(args), args)) if self.func_has_variadic_parameter: coerced_args = list(args[:fixed_param_count]) @@ -270,15 +225,14 @@ else: coerced_args = args - metadata = {} + route_params = {} for arg_name, arg_val in zip(self.uri_params, coerced_args): - metadata[arg_name] = self._coerceRouteParameter( - arg_name, arg_val) + route_params[arg_name] = self._coerceRouteParameter( + arg_name, arg_val) - if self.is_generator_route: - self.generator.onRouteFunctionUsed(self, metadata) + self.source.onRouteFunctionUsed(self, route_params) - return self.getUri(metadata) + return self.getUri(route_params) def _uriFormatRepl(self, m): if m.group('qual') or m.group('var'): @@ -350,32 +304,9 @@ return name -class CompositeRouteFunction(object): - def __init__(self): - self._routes = [] - self._arg_names = None - - def addFunc(self, route): - if self._arg_names is None: - self._arg_names = list(route.uri_params) - - if route.uri_params != self._arg_names: - raise Exception("Cannot merge route function with arguments '%s' " - "with route function with arguments '%s'." % - (route.uri_params, self._arg_names)) - self._routes.append(route) +class RouteFunction: + def __init__(self, route): + self._route = route def __call__(self, *args, **kwargs): - if len(self._routes) == 1 or len(args) == len(self._arg_names): - return self._routes[0].execTemplateFunc(*args, **kwargs) - - if len(args) == len(self._arg_names) + 1: - f_args = args[:-1] - for r in self._routes: - if r.source_name == args[-1]: - return r.execTemplateFunc(*f_args, **kwargs) - raise Exception("No such source: %s" % args[-1]) - - raise Exception("Incorrect number of arguments for route function. " - "Expected '%s', got '%s'" % (self._arg_names, args)) - + return self._route.execTemplateFunc(*args, **kwargs)
--- a/piecrust/serving/middlewares.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/serving/middlewares.py Wed May 17 00:11:48 2017 -0700 @@ -4,12 +4,12 @@ from werkzeug.wsgi import ClosingIterator from piecrust import RESOURCES_DIR, CACHE_DIR from piecrust.data.builder import ( - DataBuildingContext, build_page_data) + DataBuildingContext, build_page_data) from piecrust.data.debug import build_var_debug_info +from piecrust.page import PageNotFoundError from piecrust.routing import RouteNotFoundError from piecrust.serving.util import ( - make_wrapped_file_response, get_requested_page, get_app_for_server) -from piecrust.sources.pageref import PageNotFoundError + make_wrapped_file_response, get_requested_page, get_app_for_server) class StaticResourcesMiddleware(object): @@ -29,7 +29,7 @@ full_path = os.path.join(mount, rel_req_path) try: response = make_wrapped_file_response( - environ, request, full_path) + environ, request, full_path) return response(environ, start_response) except OSError: pass @@ -47,11 +47,11 @@ self.run_sse_check = run_sse_check self._proc_loop = None self._out_dir = os.path.join( - appfactory.root_dir, CACHE_DIR, appfactory.cache_key, 'server') + appfactory.root_dir, CACHE_DIR, appfactory.cache_key, 'server') self._handlers = { - 'debug_info': self._getDebugInfo, - 'werkzeug_shutdown': self._shutdownWerkzeug, - 'pipeline_status': self._startSSEProvider} + 'debug_info': self._getDebugInfo, + 'werkzeug_shutdown': self._shutdownWerkzeug, + 'pipeline_status': self._startSSEProvider} if not self.run_sse_check or self.run_sse_check(): # When using a server with code reloading, some implementations @@ -111,15 +111,15 @@ def _startSSEProvider(self, request, start_response): from piecrust.serving.procloop import ( - PipelineStatusServerSentEventProducer) + PipelineStatusServerSentEventProducer) provider = PipelineStatusServerSentEventProducer( - self._proc_loop) + self._proc_loop) it = provider.run() response = Response(it, mimetype='text/event-stream') response.headers['Cache-Control'] = 'no-cache' response.headers['Last-Event-ID'] = \ self._proc_loop.last_status_id return ClosingIterator( - response(request.environ, start_response), - [provider.close]) + response(request.environ, start_response), + [provider.close])
--- a/piecrust/serving/server.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/serving/server.py Wed May 17 00:11:48 2017 -0700 @@ -6,15 +6,15 @@ import hashlib import logging from werkzeug.exceptions import ( - NotFound, MethodNotAllowed, InternalServerError, HTTPException) + NotFound, MethodNotAllowed, InternalServerError, HTTPException) from werkzeug.wrappers import Request, Response from jinja2 import FileSystemLoader, Environment from piecrust import CACHE_DIR, RESOURCES_DIR -from piecrust.rendering import PageRenderingContext, render_page +from piecrust.rendering import RenderingContext, render_page from piecrust.routing import RouteNotFoundError from piecrust.serving.util import ( - content_type_map, make_wrapped_file_response, get_requested_page, - get_app_for_server) + content_type_map, make_wrapped_file_response, get_requested_page, + get_app_for_server) from piecrust.sources.base import SourceNotFoundError @@ -22,6 +22,8 @@ class WsgiServer(object): + """ A WSGI application that serves a PieCrust website. + """ def __init__(self, appfactory, **kwargs): self.server = Server(appfactory, **kwargs) @@ -29,30 +31,11 @@ return self.server._run_request(environ, start_response) -class ServeRecord(object): - def __init__(self): - self.entries = {} - - def addEntry(self, entry): - key = self._makeKey(entry.uri, entry.sub_num) - self.entries[key] = entry - - def getEntry(self, uri, sub_num): - key = self._makeKey(uri, sub_num) - return self.entries.get(key) - - def _makeKey(self, uri, sub_num): - return "%s:%s" % (uri, sub_num) - - -class ServeRecordPageEntry(object): - def __init__(self, uri, sub_num): - self.uri = uri - self.sub_num = sub_num - self.used_source_names = set() - - class MultipleNotFound(HTTPException): + """ Represents a 404 (not found) error that tried to serve one or + more pages. It will report which pages it tried to serve + before failing. + """ code = 404 def __init__(self, description, nfes): @@ -70,6 +53,8 @@ class Server(object): + """ The PieCrust server. + """ def __init__(self, appfactory, enable_debug_info=True, root_url='/', @@ -78,12 +63,11 @@ self.enable_debug_info = enable_debug_info self.root_url = root_url self.static_preview = static_preview - self._page_record = ServeRecord() self._out_dir = os.path.join( - appfactory.root_dir, - CACHE_DIR, - (appfactory.cache_key or 'default'), - 'server') + appfactory.root_dir, + CACHE_DIR, + (appfactory.cache_key or 'default'), + 'server') def _run_request(self, environ, start_response): try: @@ -104,11 +88,17 @@ request.method) raise MethodNotAllowed() - # Also handle requests to a pipeline-built asset right away. + # Handle requests to a pipeline-built asset right away. response = self._try_serve_asset(environ, request) if response is not None: return response + # Same for page assets. + response = self._try_serve_page_asset( + self.appfactory.root_dir, environ, request) + if response is not None: + return response + # Create the app for this request. app = get_app_for_server(self.appfactory, root_url=self.root_url) @@ -118,14 +108,10 @@ app.config.set('site/show_debug_info', True) # We'll serve page assets directly from where they are. - app.env.base_asset_url_format = self.root_url + '_asset/%path%' + app.config.set('site/asset_url_format', + self.root_url + '_asset/%path%') - # Let's see if it can be a page asset. - response = self._try_serve_page_asset(app, environ, request) - if response is not None: - return response - - # Nope. Let's see if it's an actual page. + # Let's try to serve a page. try: response = self._try_serve_page(app, environ, request) return response @@ -152,23 +138,22 @@ full_path = os.path.join(self._out_dir, rel_req_path) try: - response = make_wrapped_file_response(environ, request, full_path) - return response + return make_wrapped_file_response(environ, request, full_path) except OSError: - pass - return None + return None - def _try_serve_page_asset(self, app, environ, request): + def _try_serve_page_asset(self, app_root_dir, environ, request): if not request.path.startswith(self.root_url + '_asset/'): return None offset = len(self.root_url + '_asset/') - full_path = os.path.join(app.root_dir, request.path[offset:]) - if not os.path.isfile(full_path): + full_path = os.path.join(app_root_dir, request.path[offset:]) + + try: + return make_wrapped_file_response(environ, request, full_path) + except OSError: return None - return make_wrapped_file_response(environ, request, full_path) - def _try_serve_page(self, app, environ, request): # Find a matching page. req_page = get_requested_page(app, request.path) @@ -181,33 +166,12 @@ raise MultipleNotFound(msg, req_page.not_found_errors) # We have a page, let's try to render it. - render_ctx = PageRenderingContext(qp, - page_num=req_page.page_num, - force_render=True, - is_from_request=True) - if qp.route.is_generator_route: - qp.route.generator.prepareRenderContext(render_ctx) - - # See if this page is known to use sources. If that's the case, - # just don't use cached rendered segments for that page (but still - # use them for pages that are included in it). - uri = qp.getUri() - entry = self._page_record.getEntry(uri, req_page.page_num) - if (qp.route.is_generator_route or entry is None or - entry.used_source_names): - cache_key = '%s:%s' % (uri, req_page.page_num) - app.env.rendered_segments_repository.invalidate(cache_key) + render_ctx = RenderingContext(qp, force_render=True) + qp.page.source.prepareRenderContext(render_ctx) # Render the page. rendered_page = render_page(render_ctx) - # Remember stuff for next time. - if entry is None: - entry = ServeRecordPageEntry(req_page.req_path, req_page.page_num) - self._page_record.addEntry(entry) - for pinfo in render_ctx.render_passes: - entry.used_source_names |= pinfo.used_source_names - # Start doing stuff. page = rendered_page.page rp_content = rendered_page.content @@ -216,10 +180,10 @@ if app.config.get('site/show_debug_info'): now_time = time.perf_counter() timing_info = ( - '%8.1f ms' % - ((now_time - app.env.start_time) * 1000.0)) + '%8.1f ms' % + ((now_time - app.env.start_time) * 1000.0)) rp_content = rp_content.replace( - '__PIECRUST_TIMING_INFORMATION__', timing_info) + '__PIECRUST_TIMING_INFORMATION__', timing_info) # Build the response. response = Response() @@ -311,4 +275,3 @@ template += '.html' return super(ErrorMessageLoader, self).get_source(env, template) -
--- a/piecrust/serving/util.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/serving/util.py Wed May 17 00:11:48 2017 -0700 @@ -5,11 +5,8 @@ import datetime from werkzeug.wrappers import Response from werkzeug.wsgi import wrap_file -from piecrust.app import PieCrust, apply_variant_and_values -from piecrust.rendering import QualifiedPage +from piecrust.page import QualifiedPage, PageNotFoundError from piecrust.routing import RouteNotFoundError -from piecrust.sources.base import MODE_PARSING -from piecrust.sources.pageref import PageNotFoundError from piecrust.uriutil import split_sub_uri @@ -27,24 +24,18 @@ def __init__(self): self.qualified_page = None self.req_path = None - self.page_num = 1 self.not_found_errors = [] -def find_routes(routes, uri, is_sub_page=False): - """ Returns routes matching the given URL, but puts generator routes - at the end. +def find_routes(routes, uri, sub_num=1): + """ Returns routes matching the given URL. """ res = [] - gen_res = [] for route in routes: - metadata = route.matchUri(uri) - if metadata is not None: - if route.is_source_route: - res.append((route, metadata, is_sub_page)) - else: - gen_res.append((route, metadata, is_sub_page)) - return res + gen_res + route_params = route.matchUri(uri) + if route_params is not None: + res.append((route, route_params, sub_num)) + return res def get_requested_page(app, req_path): @@ -60,49 +51,40 @@ # we try to also match the base URL (without the number). req_path_no_num, page_num = split_sub_uri(app, req_path) if page_num > 1: - routes += find_routes(app.routes, req_path_no_num, True) + routes += find_routes(app.routes, req_path_no_num, page_num) if len(routes) == 0: raise RouteNotFoundError("Can't find route for: %s" % req_path) req_page = RequestedPage() - for route, route_metadata, is_sub_page in routes: - try: - cur_req_path = req_path - if is_sub_page: - cur_req_path = req_path_no_num + for route, route_params, route_sub_num in routes: + cur_req_path = req_path + if route_sub_num > 1: + cur_req_path = req_path_no_num - qp = _get_requested_page_for_route( - app, route, route_metadata, cur_req_path) - if qp is not None: - req_page.qualified_page = qp - req_page.req_path = cur_req_path - if is_sub_page: - req_page.page_num = page_num - break - except PageNotFoundError as nfe: - req_page.not_found_errors.append(nfe) + qp = _get_requested_page_for_route(app, route, route_params, + route_sub_num) + if qp is not None: + req_page.qualified_page = qp + req_page.req_path = cur_req_path + break + + req_page.not_found_errors.append(PageNotFoundError( + "No path found for '%s' in source '%s'." % + (cur_req_path, route.source_name))) + return req_page -def _get_requested_page_for_route(app, route, route_metadata, req_path): - if not route.is_generator_route: - source = app.getSource(route.source_name) - factory = source.findPageFactory(route_metadata, MODE_PARSING) - if factory is None: - raise PageNotFoundError( - "No path found for '%s' in source '%s'." % - (req_path, source.name)) - else: - factory = route.generator.getPageFactory(route_metadata) - if factory is None: - raise PageNotFoundError( - "No path found for '%s' in generator '%s'." % - (req_path, route.generator.name)) +def _get_requested_page_for_route(app, route, route_params, sub_num): + source = app.getSource(route.source_name) + item = source.findContent(route_params) + if item is None: + return None # Build the page. - page = factory.buildPage() - qp = QualifiedPage(page, route, route_metadata) + page = app.getPage(item) + qp = QualifiedPage(page, route, route_params, sub_num) return qp
--- a/piecrust/serving/wrappers.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/serving/wrappers.py Wed May 17 00:11:48 2017 -0700 @@ -1,7 +1,6 @@ import os import signal import logging -import urllib.request logger = logging.getLogger(__name__) @@ -99,11 +98,11 @@ def _get_piecrust_server(appfactory, run_sse_check=None): from piecrust.serving.middlewares import ( - StaticResourcesMiddleware, PieCrustDebugMiddleware) + StaticResourcesMiddleware, PieCrustDebugMiddleware) from piecrust.serving.server import WsgiServer app = WsgiServer(appfactory) app = StaticResourcesMiddleware(app) app = PieCrustDebugMiddleware( - app, appfactory, run_sse_check=run_sse_check) + app, appfactory, run_sse_check=run_sse_check) return app
--- a/piecrust/sources/array.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ -from piecrust.sources.base import PageSource -from piecrust.sources.mixins import SimplePaginationSourceMixin -from piecrust.sources.pageref import PageRef - - -class CachedPageFactory(object): - """ A `PageFactory` (in appearance) that already has a page built. - """ - def __init__(self, page): - self._page = page - - @property - def rel_path(self): - return self._page.rel_path - - @property - def metadata(self): - return self._page.source_metadata - - @property - def ref_spec(self): - return self._page.ref_spec - - @property - def path(self): - return self._page.path - - def buildPage(self): - return self._page - - -class ArraySource(PageSource, SimplePaginationSourceMixin): - def __init__(self, app, inner_source, name='array', config=None): - super(ArraySource, self).__init__(app, name, config) - self.inner_source = inner_source - - @property - def page_count(self): - return len(self.inner_source) - - def getPageFactories(self): - for p in self.inner_source: - yield CachedPageFactory(p) -
--- a/piecrust/sources/autoconfig.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/sources/autoconfig.py Wed May 17 00:11:48 2017 -0700 @@ -3,30 +3,19 @@ import os.path import logging from piecrust.configuration import ConfigurationError -from piecrust.routing import RouteParameter -from piecrust.sources.base import ( - PageSource, PageFactory, InvalidFileSystemEndpointError) -from piecrust.sources.default import ( - filter_page_dirname, filter_page_filename) -from piecrust.sources.interfaces import IListableSource -from piecrust.sources.mixins import SimplePaginationSourceMixin +from piecrust.sources.base import ContentItem +from piecrust.sources.default import DefaultContentSource logger = logging.getLogger(__name__) -class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin, - IListableSource): - """ Base class for page sources that automatically apply configuration +class AutoConfigContentSourceBase(DefaultContentSource): + """ Base class for content sources that automatically apply configuration settings to their generated pages based on those pages' paths. """ def __init__(self, app, name, config): - super(AutoConfigSourceBase, self).__init__(app, name, config) - self.fs_endpoint = config.get('fs_endpoint', name) - self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) - self.supported_extensions = list( - app.config.get('site/auto_formats').keys()) - self.default_auto_format = app.config.get('site/default_auto_format') + DefaultContentSource.__init__(app, name, config) self.capture_mode = config.get('capture_mode', 'path') if self.capture_mode not in ['path', 'dirname', 'filename']: @@ -34,91 +23,34 @@ "one of: path, dirname, filename" % name) - def getSupportedRouteParameters(self): - return [ - RouteParameter('slug', RouteParameter.TYPE_PATH)] - - def buildPageFactories(self): - logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path) - if not os.path.isdir(self.fs_endpoint_path): - raise InvalidFileSystemEndpointError(self.name, - self.fs_endpoint_path) - - for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): - rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path) - dirnames[:] = list(filter(filter_page_dirname, dirnames)) + def _finalizeContent(self, parent_group, items, groups): + DefaultContentSource._finalizeContent(parent_group, items, groups) - # If `capture_mode` is `dirname`, we don't need to recompute it - # for each filename, so we do it here. - if self.capture_mode == 'dirname': - config = self._extractConfigFragment(rel_dirpath) - - for f in filter(filter_page_filename, filenames): - if self.capture_mode == 'path': - path = os.path.join(rel_dirpath, f) - config = self._extractConfigFragment(path) - elif self.capture_mode == 'filename': - config = self._extractConfigFragment(f) - - fac_path = f - if rel_dirpath != '.': - fac_path = os.path.join(rel_dirpath, f) - - slug = self._makeSlug(fac_path) - - metadata = { - 'slug': slug, - 'config': config} - yield PageFactory(self, fac_path, metadata) + # If `capture_mode` is `dirname`, we don't need to recompute it + # for each filename, so we do it here. + if self.capture_mode == 'dirname': + rel_dirpath = os.path.relpath(parent_group.spec, + self.fs_endpoint_path) + config = self._extractConfigFragment(rel_dirpath) - def resolveRef(self, ref_path): - path = os.path.normpath( - os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/"))) - - config = None - if self.capture_mode == 'dirname': - config = self._extractConfigFragment(os.path.dirname(ref_path)) - elif self.capture_mode == 'path': - config = self._extractConfigFragment(ref_path) - elif self.capture_mode == 'filename': - config = self._extractConfigFragment(os.path.basename(ref_path)) - - slug = self._makeSlug(ref_path) - metadata = {'slug': slug, 'config': config} - return path, metadata - - def listPath(self, rel_path): - raise NotImplementedError() + for i in items: + # Compute the confif for the other capture modes. + if self.capture_mode == 'path': + rel_path = os.path.relpath(i.spec, self.fs_endpoint_path) + config = self._extractConfigFragment(rel_path) + elif self.capture_mode == 'filename': + fname = os.path.basename(i.spec) + config = self._extractConfigFragment(fname) - def getDirpath(self, rel_path): - return os.path.dirname(rel_path) - - def getBasename(self, rel_path): - filename = os.path.basename(rel_path) - name, _ = os.path.splitext(filename) - return name - - def _makeSlug(self, rel_path): - slug = rel_path.replace('\\', '/') - slug = self._cleanSlug(slug) - slug, ext = os.path.splitext(slug) - if ext.lstrip('.') not in self.supported_extensions: - slug += ext - if slug.startswith('./'): - slug = slug[2:] - if slug == '_index': - slug = '' - return slug - - def _cleanSlug(self, slug): - return slug + # Set the config on the content item's metadata. + i.metadata.setdefault('config', {}).update(config) def _extractConfigFragment(self, rel_path): raise NotImplementedError() -class AutoConfigSource(AutoConfigSourceBase): - """ Page source that extracts configuration settings from the sub-folders +class AutoConfigContentSource(AutoConfigContentSourceBase): + """ Content source that extracts configuration settings from the sub-folders each page resides in. This is ideal for setting tags or categories on pages based on the folders they're in. """ @@ -126,13 +58,12 @@ def __init__(self, app, name, config): config['capture_mode'] = 'dirname' - super(AutoConfigSource, self).__init__(app, name, config) + AutoConfigContentSourceBase.__init__(app, name, config) + self.setting_name = config.get('setting_name', name) self.only_single_values = config.get('only_single_values', False) self.collapse_single_values = config.get('collapse_single_values', False) - self.supported_extensions = list( - app.config.get('site/auto_formats').keys()) def _extractConfigFragment(self, rel_path): if rel_path == '.': @@ -157,48 +88,27 @@ return {self.setting_name: values} - def findPageFactory(self, metadata, mode): + def findContent(self, route_params): # Pages from this source are effectively flattened, so we need to # find pages using a brute-force kinda way. + route_slug = route_params.get('slug', '') + if not route_slug: + route_slug = '_index' + for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): for f in filenames: slug, _ = os.path.splitext(f) - if slug == metadata['slug']: + if slug == route_slug: path = os.path.join(dirpath, f) rel_path = os.path.relpath(path, self.fs_endpoint_path) config = self._extractConfigFragment(rel_path) metadata = {'slug': slug, 'config': config} - return PageFactory(self, rel_path, metadata) + return ContentItem(path, metadata) return None - def listPath(self, rel_path): - rel_path = rel_path.lstrip('\\/') - path = os.path.join(self.fs_endpoint_path, rel_path) - names = sorted(os.listdir(path)) - items = [] - for name in names: - if os.path.isdir(os.path.join(path, name)): - if filter_page_dirname(name): - rel_subdir = os.path.join(rel_path, name) - items.append((True, name, rel_subdir)) - else: - if filter_page_filename(name): - cur_rel_path = os.path.join(rel_path, name) - slug = self._makeSlug(cur_rel_path) - config = self._extractConfigFragment(cur_rel_path) - metadata = {'slug': slug, 'config': config} - fac = PageFactory(self, cur_rel_path, metadata) - name, _ = os.path.splitext(name) - items.append((False, name, fac)) - return items - - def _cleanSlug(self, slug): - return os.path.basename(slug) - - -class OrderedPageSource(AutoConfigSourceBase): - """ A page source that assigns an "order" to its pages based on a +class OrderedContentSource(AutoConfigContentSourceBase): + """ A content source that assigns an "order" to its pages based on a numerical prefix in their filename. Page iterators will automatically sort pages using that order. """ @@ -208,14 +118,13 @@ def __init__(self, app, name, config): config['capture_mode'] = 'path' - super(OrderedPageSource, self).__init__(app, name, config) + AutoConfigContentSourceBase.__init__(app, name, config) + self.setting_name = config.get('setting_name', 'order') self.default_value = config.get('default_value', 0) - self.supported_extensions = list( - app.config.get('site/auto_formats').keys()) - def findPageFactory(self, metadata, mode): - uri_path = metadata.get('slug', '') + def findContent(self, route_params): + uri_path = route_params.get('slug', '') if uri_path == '': uri_path = '_index' @@ -253,60 +162,16 @@ if not found: return None - fac_path = os.path.relpath(path, self.fs_endpoint_path) - config = self._extractConfigFragment(fac_path) + rel_path = os.path.relpath(path, self.fs_endpoint_path) + config = self._extractConfigFragment(rel_path) metadata = {'slug': uri_path, 'config': config} - - return PageFactory(self, fac_path, metadata) + return ContentItem(path, metadata) def getSorterIterator(self, it): accessor = self.getSettingAccessor() return OrderTrailSortIterator(it, self.setting_name + '_trail', value_accessor=accessor) - def listPath(self, rel_path): - rel_path = rel_path.lstrip('/') - path = self.fs_endpoint_path - if rel_path != '': - parts = rel_path.split('/') - for p in parts: - p_pat = r'(\d+_)?' + re.escape(p) + '$' - for name in os.listdir(path): - if re.match(p_pat, name): - path = os.path.join(path, name) - break - else: - raise Exception("No such path: %s" % rel_path) - - items = [] - names = sorted(os.listdir(path)) - for name in names: - clean_name = self.re_pattern.sub('', name) - clean_name, _ = os.path.splitext(clean_name) - if os.path.isdir(os.path.join(path, name)): - if filter_page_dirname(name): - rel_subdir = os.path.join(rel_path, name) - items.append((True, clean_name, rel_subdir)) - else: - if filter_page_filename(name): - slug = self._makeSlug(os.path.join(rel_path, name)) - - fac_path = name - if rel_path != '.': - fac_path = os.path.join(rel_path, name) - fac_path = fac_path.replace('\\', '/') - - config = self._extractConfigFragment(fac_path) - metadata = {'slug': slug, 'config': config} - fac = PageFactory(self, fac_path, metadata) - - name, _ = os.path.splitext(name) - items.append((False, clean_name, fac)) - return items - - def _cleanSlug(self, slug): - return self.re_pattern.sub(r'\1', slug) - def _extractConfigFragment(self, rel_path): values = [] for m in self.re_pattern.finditer(rel_path): @@ -317,15 +182,12 @@ values.append(self.default_value) return { - self.setting_name: values[-1], - self.setting_name + '_trail': values} + self.setting_name: values[-1], + self.setting_name + '_trail': values} - def _populateMetadata(self, rel_path, metadata, mode=None): - _, filename = os.path.split(rel_path) - config = self._extractConfigFragment(filename) - metadata['config'] = config - slug = metadata['slug'] - metadata['slug'] = self.re_pattern.sub(r'\1', slug) + def _makeSlug(self, path): + slug = super()._makeSlug(path) + return self.re_pattern.sub(r'\1', slug) class OrderTrailSortIterator(object):
--- a/piecrust/sources/base.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/sources/base.py Wed May 17 00:11:48 2017 -0700 @@ -1,88 +1,78 @@ -import copy import logging -from werkzeug.utils import cached_property -from piecrust.page import Page -from piecrust.data.assetor import Assetor +import collections +# Source realms, to differentiate sources in the site itself ('User') +# and sources in the site's theme ('Theme'). REALM_USER = 0 REALM_THEME = 1 REALM_NAMES = { - REALM_USER: 'User', - REALM_THEME: 'Theme'} + REALM_USER: 'User', + REALM_THEME: 'Theme'} -MODE_PARSING = 0 -MODE_CREATING = 1 +# Types of relationships a content source can be asked for. +REL_ASSETS = 1 logger = logging.getLogger(__name__) -def build_pages(app, factories): - for f in factories: - yield f.buildPage() - - class SourceNotFoundError(Exception): pass -class InvalidFileSystemEndpointError(Exception): - def __init__(self, source_name, fs_endpoint): - super(InvalidFileSystemEndpointError, self).__init__( - "Invalid file-system endpoint for source '%s': %s" % - (source_name, fs_endpoint)) +class InsufficientRouteParameters(Exception): + pass + + +class AbortedSourceUseError(Exception): + pass -class PageFactory(object): - """ A class responsible for creating a page. +class GeneratedContentException(Exception): + pass + + +CONTENT_TYPE_PAGE = 0 +CONTENT_TYPE_ASSET = 1 + + +class ContentItem: + """ Describes a piece of content. """ - def __init__(self, source, rel_path, metadata): - self.source = source - self.rel_path = rel_path + def __init__(self, spec, metadata): + self.spec = spec self.metadata = metadata - @cached_property - def ref_spec(self): - return '%s:%s' % (self.source.name, self.rel_path) - - @cached_property - def path(self): - path, _ = self.source.resolveRef(self.rel_path) - return path - - def buildPage(self): - repo = self.source.app.env.page_repository - cache_key = '%s:%s' % (self.source.name, self.rel_path) - return repo.get(cache_key, self._doBuildPage) - - def _doBuildPage(self): - logger.debug("Building page: %s" % self.path) - page = Page(self.source, copy.deepcopy(self.metadata), self.rel_path) - return page + @property + def is_group(self): + return False -class PageSource(object): - """ A source for pages, e.g. a directory with one file per page. +class ContentGroup: + """ Describes a group of `ContentItem`s. + """ + def __init__(self, spec, metadata): + self.spec = spec + self.metadata = metadata + + @property + def is_group(self): + return True + + +class ContentSource: + """ A source for content. """ def __init__(self, app, name, config): self.app = app self.name = name self.config = config or {} - self.config.setdefault('realm', REALM_USER) - self._factories = None - self._provider_type = None - - def __getattr__(self, name): - try: - return self.config[name] - except KeyError: - raise AttributeError() @property def is_theme_source(self): - return self.realm == REALM_THEME + return self.config['realm'] == REALM_THEME @property def root_dir(self): @@ -90,48 +80,47 @@ return self.app.theme_dir return self.app.root_dir - def getPages(self): - return build_pages(self.app, self.getPageFactories()) + def openItem(self, item, mode='r'): + raise NotImplementedError() + + def getItemMtime(self, item): + raise NotImplementedError() - def getPage(self, metadata): - factory = self.findPageFactory(metadata, MODE_PARSING) - if factory is None: - return None - return factory.buildPage() + def getAllContents(self): + stack = collections.deque() + stack.append(None) + while len(stack) > 0: + cur = stack.popleft() + try: + contents = self.getContents(cur) + except GeneratedContentException: + continue + if contents is not None: + for c in contents: + if c.is_group: + stack.append(c) + else: + yield c - def getPageFactories(self): - if self._factories is None: - self._factories = list(self.buildPageFactories()) - return self._factories + def getContents(self, group): + raise NotImplementedError("'%s' doesn't implement 'getContents'." % + self.__class__) + + def getRelatedContents(self, item, relationship): + raise NotImplementedError() + + def findContent(self, route_params): + raise NotImplementedError() def getSupportedRouteParameters(self): raise NotImplementedError() - def buildPageFactories(self): - raise NotImplementedError() - - def buildPageFactory(self, path): - raise NotImplementedError() - - def resolveRef(self, ref_path): - """ Returns the full path and source metadata given a source - (relative) path, like a ref-spec. - """ - raise NotImplementedError() - - def findPageFactory(self, metadata, mode): - raise NotImplementedError() - - def buildDataProvider(self, page, override): - if not self._provider_type: - from piecrust.data.provider import get_data_provider_class - self._provider_type = get_data_provider_class(self.app, - self.data_type) - return self._provider_type(self, page, override) - - def finalizeConfig(self, page): + def prepareRenderContext(self, ctx): pass - def buildAssetor(self, page, uri): - return Assetor(page, uri) + def onRouteFunctionUsed(self, route_params): + pass + def describe(self): + return None +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/sources/blogarchives.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,126 @@ +import logging +import datetime +from piecrust.chefutil import format_timed_scope +from piecrust.data.filters import PaginationFilter, IFilterClause +from piecrust.data.iterators import PageIterator +from piecrust.routing import RouteParameter +from piecrust.sources.base import ContentSource, GeneratedContentException + + +logger = logging.getLogger(__name__) + + +class BlogArchivesSource(ContentSource): + SOURCE_NAME = 'blog_archives' + + def __init__(self, app, name, config): + super().__init__(app, name, config) + + def getContents(self, group): + raise GeneratedContentException() + + def prepareRenderContext(self, ctx): + ctx.pagination_source = self.source + + year = ctx.page.route_metadata.get('year') + if year is None: + raise Exception( + "Can't find the archive year in the route metadata") + if type(year) is not int: + raise Exception( + "The route for generator '%s' should specify an integer " + "parameter for 'year'." % self.name) + + flt = PaginationFilter() + flt.addClause(IsFromYearFilterClause(year)) + ctx.pagination_filter = flt + + ctx.custom_data['year'] = year + + flt2 = PaginationFilter() + flt2.addClause(IsFromYearFilterClause(year)) + it = PageIterator(self.source, pagination_filter=flt2, + sorter=_date_sorter) + ctx.custom_data['archives'] = it + + def bake(self, ctx): + if not self.page_ref.exists: + logger.debug( + "No page found at '%s', skipping %s archives." % + (self.page_ref, self.source_name)) + return + + logger.debug("Baking %s archives...", self.source_name) + with format_timed_scope(logger, 'gathered archive years', + level=logging.DEBUG, colored=False): + all_years, dirty_years = self._buildDirtyYears(ctx) + + with format_timed_scope(logger, "baked %d %s archives." % + (len(dirty_years), self.source_name)): + self._bakeDirtyYears(ctx, all_years, dirty_years) + + def _getSource(self): + return self.app.getSource(self.config['source']) + + def _buildDirtyYears(self, ctx): + logger.debug("Gathering dirty post years.") + all_years = set() + dirty_years = set() + for _, cur_entry in ctx.getAllPageRecords(): + if cur_entry and cur_entry.source_name == self.source_name: + dt = datetime.datetime.fromtimestamp(cur_entry.timestamp) + all_years.add(dt.year) + if cur_entry.was_any_sub_baked: + dirty_years.add(dt.year) + return all_years, dirty_years + + def _bakeDirtyYears(self, ctx, all_years, dirty_years): + route = self.app.getGeneratorRoute(self.name) + if route is None: + raise Exception( + "No routes have been defined for generator: %s" % + self.name) + + logger.debug("Using archive page: %s" % self.page_ref) + fac = self.page_ref.getFactory() + + for y in dirty_years: + extra_route_metadata = {'year': y} + + logger.debug("Queuing: %s [%s]" % (fac.ref_spec, y)) + ctx.queueBakeJob(fac, route, extra_route_metadata, str(y)) + ctx.runJobQueue() + + # Create bake entries for the years that were *not* dirty. + # Otherwise, when checking for deleted pages, we would not find any + # outputs and would delete those files. + all_str_years = [str(y) for y in all_years] + for prev_entry, cur_entry in ctx.getAllPageRecords(): + if prev_entry and not cur_entry: + try: + y = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) + except InvalidRecordExtraKey: + continue + if y in all_str_years: + logger.debug( + "Creating unbaked entry for year %s archive." % y) + ctx.collapseRecord(prev_entry) + else: + logger.debug( + "No page references year %s anymore." % y) + + def getSupportedRouteParameters(self): + return [RouteParameter('year', RouteParameter.TYPE_INT4)] + + +class IsFromYearFilterClause(IFilterClause): + def __init__(self, year): + self.year = year + + def pageMatches(self, fil, page): + return (page.datetime.year == self.year) + + +def _date_sorter(it): + return sorted(it, key=lambda x: x.datetime) +
--- a/piecrust/sources/default.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/sources/default.py Wed May 17 00:11:48 2017 -0700 @@ -1,164 +1,49 @@ import os.path import logging -from piecrust import osutil from piecrust.routing import RouteParameter -from piecrust.sources.base import ( - PageFactory, PageSource, InvalidFileSystemEndpointError, - MODE_CREATING) +from piecrust.sources.base import REL_ASSETS, ContentItem +from piecrust.sources.fs import FSContentSource from piecrust.sources.interfaces import ( - IListableSource, IPreparingSource, IInteractiveSource, - InteractiveField) -from piecrust.sources.mixins import SimplePaginationSourceMixin + IPreparingSource, IInteractiveSource, InteractiveField) +from piecrust.sources.mixins import SimpleAssetsSubDirMixin +from piecrust.uriutil import uri_to_title logger = logging.getLogger(__name__) -def filter_page_dirname(d): - return not (d.startswith('.') or d.endswith('-assets')) - - -def filter_page_filename(f): - return (f[0] != '.' and # .DS_store and other crap - f[-1] != '~' and # Vim temp files and what-not - f not in ['Thumbs.db']) # Windows bullshit - - -class DefaultPageSource(PageSource, - IListableSource, IPreparingSource, IInteractiveSource, - SimplePaginationSourceMixin): +class DefaultContentSource(FSContentSource, + SimpleAssetsSubDirMixin, + IPreparingSource, IInteractiveSource): SOURCE_NAME = 'default' def __init__(self, app, name, config): - super(DefaultPageSource, self).__init__(app, name, config) - self.fs_endpoint = config.get('fs_endpoint', name) - self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) - self.supported_extensions = list( - app.config.get('site/auto_formats').keys()) + super().__init__(app, name, config) + self.auto_formats = app.config.get('site/auto_formats') self.default_auto_format = app.config.get('site/default_auto_format') - - def getSupportedRouteParameters(self): - return [ - RouteParameter('slug', RouteParameter.TYPE_PATH)] - - def buildPageFactories(self): - logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path) - if not os.path.isdir(self.fs_endpoint_path): - if self.ignore_missing_dir: - return - raise InvalidFileSystemEndpointError(self.name, - self.fs_endpoint_path) - - for dirpath, dirnames, filenames in osutil.walk(self.fs_endpoint_path): - rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path) - dirnames[:] = list(filter(filter_page_dirname, dirnames)) - for f in sorted(filter(filter_page_filename, filenames)): - fac_path = f - if rel_dirpath != '.': - fac_path = os.path.join(rel_dirpath, f) + self.supported_extensions = list(self.auto_formats) - slug = self._makeSlug(fac_path) - metadata = {'slug': slug} - fac_path = fac_path.replace('\\', '/') - self._populateMetadata(fac_path, metadata) - yield PageFactory(self, fac_path, metadata) + def _createItemMetadata(self, path): + return self._doCreateItemMetadata(path) - def buildPageFactory(self, path): - if not path.startswith(self.fs_endpoint_path): - raise Exception("Page path '%s' isn't inside '%s'." % ( - path, self.fs_enpoint_path)) - rel_path = path[len(self.fs_endpoint_path):].lstrip('\\/') - slug = self._makeSlug(rel_path) - metadata = {'slug': slug} - fac_path = rel_path.replace('\\', '/') - self._populateMetadata(fac_path, metadata) - return PageFactory(self, fac_path, metadata) - - def resolveRef(self, ref_path): - path = os.path.normpath( - os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/"))) - slug = self._makeSlug(ref_path) - metadata = {'slug': slug} - self._populateMetadata(ref_path, metadata) - return path, metadata - - def findPageFactory(self, metadata, mode): - uri_path = metadata.get('slug', '') - if not uri_path: - uri_path = '_index' - path = os.path.join(self.fs_endpoint_path, uri_path) - _, ext = os.path.splitext(path) + def _finalizeContent(self, parent_group, items, groups): + SimpleAssetsSubDirMixin._onFinalizeContent( + self, parent_group, items, groups) - if mode == MODE_CREATING: - if ext == '': - path = '%s.%s' % (path, self.default_auto_format) - rel_path = os.path.relpath(path, self.fs_endpoint_path) - rel_path = rel_path.replace('\\', '/') - self._populateMetadata(rel_path, metadata, mode) - return PageFactory(self, rel_path, metadata) - - if ext == '': - paths_to_check = [ - '%s.%s' % (path, e) - for e in self.supported_extensions] - else: - paths_to_check = [path] - for path in paths_to_check: - if os.path.isfile(path): - rel_path = os.path.relpath(path, self.fs_endpoint_path) - rel_path = rel_path.replace('\\', '/') - self._populateMetadata(rel_path, metadata, mode) - return PageFactory(self, rel_path, metadata) - - return None + def _doCreateItemMetadata(self, path): + slug = self._makeSlug(path) + metadata = { + 'slug': slug + } + _, ext = os.path.splitext(path) + if ext: + fmt = self.auto_formats.get(ext.lstrip('.')) + if fmt: + metadata['config'] = {'format': fmt} + return metadata - def listPath(self, rel_path): - rel_path = rel_path.lstrip('\\/') - path = os.path.join(self.fs_endpoint_path, rel_path) - names = sorted(osutil.listdir(path)) - items = [] - for name in names: - if os.path.isdir(os.path.join(path, name)): - if filter_page_dirname(name): - rel_subdir = os.path.join(rel_path, name) - items.append((True, name, rel_subdir)) - else: - if filter_page_filename(name): - slug = self._makeSlug(os.path.join(rel_path, name)) - metadata = {'slug': slug} - - fac_path = name - if rel_path != '.': - fac_path = os.path.join(rel_path, name) - fac_path = fac_path.replace('\\', '/') - - self._populateMetadata(fac_path, metadata) - fac = PageFactory(self, fac_path, metadata) - - name, _ = os.path.splitext(name) - items.append((False, name, fac)) - return items - - def getDirpath(self, rel_path): - return os.path.dirname(rel_path) - - def getBasename(self, rel_path): - filename = os.path.basename(rel_path) - name, _ = os.path.splitext(filename) - return name - - def setupPrepareParser(self, parser, app): - parser.add_argument('uri', help='The URI for the new page.') - - def buildMetadata(self, args): - return {'slug': args.uri} - - def getInteractiveFields(self): - return [ - InteractiveField('slug', InteractiveField.TYPE_STRING, - 'new-page')] - - def _makeSlug(self, rel_path): + def _makeSlug(self, path): + rel_path = os.path.relpath(path, self.fs_endpoint_path) slug, ext = os.path.splitext(rel_path) slug = slug.replace('\\', '/') if ext.lstrip('.') not in self.supported_extensions: @@ -169,6 +54,56 @@ slug = '' return slug - def _populateMetadata(self, rel_path, metadata, mode=None): - pass + def getRelatedContents(self, item, relationship): + if relationship == REL_ASSETS: + SimpleAssetsSubDirMixin._getRelatedAssetsContents(self, item) + raise NotImplementedError() + + def getSupportedRouteParameters(self): + return [ + RouteParameter('slug', RouteParameter.TYPE_PATH)] + + def findContent(self, route_params): + uri_path = route_params.get('slug', '') + if not uri_path: + uri_path = '_index' + path = os.path.join(self.fs_endpoint_path, uri_path) + _, ext = os.path.splitext(path) + if ext == '': + paths_to_check = [ + '%s.%s' % (path, e) + for e in self.supported_extensions] + else: + paths_to_check = [path] + for path in paths_to_check: + if os.path.isfile(path): + metadata = self._doCreateItemMetadata(path) + return ContentItem(path, metadata) + return None + + def setupPrepareParser(self, parser, app): + parser.add_argument('uri', help='The URI for the new page.') + + def createContent(self, args): + if not hasattr(args, 'uri'): + uri = None + else: + uri = args.uri + if not uri: + uri = '_index' + path = os.path.join(self.fs_endpoint_path, uri) + _, ext = os.path.splitext(path) + if ext == '': + path = '%s.%s' % (path, self.default_auto_format) + + metadata = self._doCreateItemMetadata(path) + config = metadata.setdefault('config', {}) + config.update({'title': uri_to_title( + os.path.basename(metadata['slug']))}) + return ContentItem(path, metadata) + + def getInteractiveFields(self): + return [ + InteractiveField('slug', InteractiveField.TYPE_STRING, + 'new-page')]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/sources/fs.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,111 @@ +import os.path +import logging +from piecrust import osutil +from piecrust.routing import RouteParameter +from piecrust.sources.base import ContentItem, ContentGroup, ContentSource + + +logger = logging.getLogger(__name__) + + +class InvalidFileSystemEndpointError(Exception): + def __init__(self, source_name, fs_endpoint): + super(InvalidFileSystemEndpointError, self).__init__( + "Invalid file-system endpoint for source '%s': %s" % + (source_name, fs_endpoint)) + + +def _filter_crap_files(f): + return (f[-1] != '~' and # Vim temp files and what-not + f not in ['.DS_Store', 'Thumbs.db']) # OSX and Windows bullshit + + +class FSContentSourceBase(ContentSource): + """ Implements some basic stuff for a `ContentSource` that stores its + items as files on disk. + """ + def __init__(self, app, name, config): + super().__init__(app, name, config) + self.fs_endpoint = config.get('fs_endpoint', name) + self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) + self._fs_filter = None + + def _checkFSEndpoint(self): + if not os.path.isdir(self.fs_endpoint_path): + if self.config.get('ignore_missing_dir'): + return False + raise InvalidFileSystemEndpointError(self.name, + self.fs_endpoint_path) + return True + + def openItem(self, item, mode='r'): + for m in 'wxa+': + if m in mode: + # If opening the file for writing, let's make sure the + # directory exists. + dirname = os.path.dirname(item.spec) + if not os.path.exists(dirname): + os.makedirs(dirname, 0o755) + break + return open(item.spec, mode) + + def getItemMtime(self, item): + return os.path.getmtime(item.spec) + + +class FSContentSource(FSContentSourceBase): + """ Implements a `ContentSource` that simply returns files on disk + under a given root directory. + """ + SOURCE_NAME = 'fs' + + def getContents(self, group): + logger.debug("Scanning for content in: %s" % self.fs_endpoint_path) + if not self._checkFSEndpoint(): + return None + + parent_path = self.fs_endpoint_path + if group is not None: + parent_path = group.spec + + names = filter(_filter_crap_files, osutil.listdir(parent_path)) + if self._fs_filter is not None: + names = filter(self._fs_filter, names) + + items = [] + groups = [] + for name in names: + path = os.path.join(parent_path, name) + if os.path.isdir(path): + metadata = self._createGroupMetadata(path) + groups.append(ContentGroup(path, metadata)) + else: + metadata = self._createItemMetadata(path) + items.append(ContentItem(path, metadata)) + self._finalizeContent(group, items, groups) + return items + groups + + def _createGroupMetadata(self, path): + return {} + + def _createItemMetadata(self, path): + return {} + + def _finalizeContent(self, parent_group, items, groups): + pass + + def getRelatedContents(self, item, relationship): + return None + + def findContent(self, route_params): + rel_path = route_params['path'] + path = os.path.join(self.fs_endpoint_path, rel_path) + metadata = self._createItemMetadata(path) + return ContentItem(path, metadata) + + def getSupportedRouteParameters(self): + return [ + RouteParameter('path', RouteParameter.TYPE_PATH)] + + def describe(self): + return {'endpoint_path': self.fs_endpoint_path}
--- a/piecrust/sources/interfaces.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/sources/interfaces.py Wed May 17 00:11:48 2017 -0700 @@ -16,27 +16,10 @@ def getTailIterator(self, it): raise NotImplementedError() - def getPaginationFilter(self, page): - raise NotImplementedError() - def getSettingAccessor(self): raise NotImplementedError() -class IListableSource(object): - """ Defines the interface for a source that can be iterated on in a - hierarchical manner, for use with the `family` data endpoint. - """ - def listPath(self, rel_path): - raise NotImplementedError() - - def getDirpath(self, rel_path): - raise NotImplementedError() - - def getBasename(self, rel_path): - raise NotImplementedError() - - class IPreparingSource(object): """ Defines the interface for a source whose pages can be created by the `chef prepare` command. @@ -44,11 +27,13 @@ def setupPrepareParser(self, parser, app): raise NotImplementedError() - def buildMetadata(self, args): + def createContent(self, args): raise NotImplementedError() class InteractiveField(object): + """ A field to display in the administration web UI. + """ TYPE_STRING = 0 TYPE_INT = 1 @@ -59,6 +44,9 @@ class IInteractiveSource(object): + """ A content source that a user can interact with in the administration + web UI. + """ def getInteractiveFields(self): raise NotImplementedError()
--- a/piecrust/sources/mixins.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/sources/mixins.py Wed May 17 00:11:48 2017 -0700 @@ -1,17 +1,17 @@ -import os import os.path import logging -from piecrust.data.filters import PaginationFilter, page_value_accessor +from piecrust import osutil from piecrust.data.paginationdata import PaginationData -from piecrust.sources.base import PageFactory -from piecrust.sources.interfaces import IPaginationSource, IListableSource -from piecrust.sources.pageref import PageRef +from piecrust.sources.base import ContentItem +from piecrust.sources.interfaces import IPaginationSource logger = logging.getLogger(__name__) +assets_suffix = '-assets' -class SourceFactoryIterator(object): + +class ContentSourceIterator(object): def __init__(self, source): self.source = source @@ -20,34 +20,7 @@ self.it = None def __iter__(self): - return self.source.getPages() - - -class SourceFactoryWithoutGeneratorsIterator(object): - def __init__(self, source): - self.source = source - self._generator_pages = None - # See comment above. - self.it = None - - def __iter__(self): - self._cacheGeneratorPages() - for p in self.source.getPages(): - if p.rel_path in self._generator_pages: - continue - yield p - - def _cacheGeneratorPages(self): - if self._generator_pages is not None: - return - - app = self.source.app - self._generator_pages = set() - for src in app.sources: - for gen in app.generators: - for sn, rp in gen.page_ref.possible_split_ref_specs: - if sn == self.source.name: - self._generator_pages.add(rp) + return self.source.getAllContentItems() class DateSortIterator(object): @@ -66,10 +39,10 @@ def __iter__(self): for page in self.it: - if page is None: + if page is not None: + yield PaginationData(page) + else: yield None - else: - yield PaginationData(page) class SimplePaginationSourceMixin(IPaginationSource): @@ -80,9 +53,7 @@ return self.config['items_per_page'] def getSourceIterator(self): - if self.config.get('iteration_includes_generator_pages', False): - return SourceFactoryIterator(self) - return SourceFactoryWithoutGeneratorsIterator(self) + return ContentSourceIterator(self) def getSorterIterator(self, it): return DateSortIterator(it) @@ -90,76 +61,33 @@ def getTailIterator(self, it): return PaginationDataBuilderIterator(it) - def getPaginationFilter(self, page): - conf = (page.config.get('items_filters') or - self.config.get('items_filters')) - if conf == 'none' or conf == 'nil' or conf == '': - conf = None - if conf is not None: - f = PaginationFilter(value_accessor=page_value_accessor) - f.addClausesFromConfig(conf) - return f - return None - def getSettingAccessor(self): - return page_value_accessor - - -class SimpleListableSourceMixin(IListableSource): - """ Implements the `IListableSource` interface for sources that map to - simple file-system structures. - """ - def listPath(self, rel_path): - rel_path = rel_path.lstrip('\\/') - path = self._getFullPath(rel_path) - names = self._sortFilenames(os.listdir(path)) - - items = [] - for name in names: - if os.path.isdir(os.path.join(path, name)): - if self._filterPageDirname(name): - rel_subdir = os.path.join(rel_path, name) - items.append((True, name, rel_subdir)) - else: - if self._filterPageFilename(name): - slug = self._makeSlug(os.path.join(rel_path, name)) - metadata = {'slug': slug} +class SimpleAssetsSubDirMixin: + def _getRelatedAssetsContents(self, item, relationship): + if not item.metadata.get('__has_assets', False): + return None - fac_path = name - if rel_path != '.': - fac_path = os.path.join(rel_path, name) - fac_path = fac_path.replace('\\', '/') - - self._populateMetadata(fac_path, metadata) - fac = PageFactory(self, fac_path, metadata) - - name, _ = os.path.splitext(name) - items.append((False, name, fac)) - return items - - def getDirpath(self, rel_path): - return os.path.dirname(rel_path) + assets = {} + assets_dir = item.spec + assets_suffix + for f in osutil.listdir(assets_dir): + fpath = os.path.join(assets_dir, f) + name, _ = os.path.splitext(f) + if name in assets: + raise Exception("Multiple assets are named '%s'." % + name) + assets[name] = ContentItem(fpath, {'__is_asset': True}) + return assets - def getBasename(self, rel_path): - filename = os.path.basename(rel_path) - name, _ = os.path.splitext(filename) - return name - - def _getFullPath(self, rel_path): - return os.path.join(self.fs_endpoint_path, rel_path) - - def _sortFilenames(self, names): - return sorted(names) + def _onFinalizeContent(self, parent_group, items, groups): + assetsGroups = [] + for g in groups: + if not g.spec.endswith(assets_suffix): + continue + match = g.spec[:-len(assets_suffix)] + item = next(filter(lambda i: i.spec == match), None) + if item: + item.metadata['__has_assets'] = True + assetsGroups.append(g) + for g in assetsGroups: + groups.remove(g) - def _filterPageDirname(self, name): - return True - - def _filterPageFilename(self, name): - return True - - def _makeSlug(self, rel_path): - return rel_path.replace('\\', '/') - - def _populateMetadata(self, rel_path, metadata, mode=None): - pass -
--- a/piecrust/sources/pageref.py Sat Apr 29 21:42:22 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,137 +0,0 @@ -import re -import os.path -import copy -from piecrust.sources.base import PageFactory - - -page_ref_pattern = re.compile(r'(?P<src>[\w]+)\:(?P<path>.*?)(;|$)') - - -class PageNotFoundError(Exception): - pass - - -class PageRef(object): - """ A reference to a page, with support for looking a page in different - realms. - """ - _INDEX_NEEDS_LOADING = -2 - _INDEX_NOT_FOUND = -1 - - class _HitInfo(object): - def __init__(self, source_name, rel_path, path, metadata): - self.source_name = source_name - self.rel_path = rel_path - self.path = path - self.metadata = metadata - - def __init__(self, app, page_ref): - self.app = app - self._page_ref = page_ref - self._hits = None - self._first_valid_hit_index = self._INDEX_NEEDS_LOADING - self._exts = list(app.config.get('site/auto_formats').keys()) - - def __str__(self): - return self._page_ref - - @property - def exists(self): - try: - self._checkHits() - return True - except PageNotFoundError: - return False - - @property - def source_name(self): - return self._first_valid_hit.source_name - - @property - def source(self): - return self.app.getSource(self.source_name) - - @property - def rel_path(self): - return self._first_valid_hit.rel_path - - @property - def path(self): - return self._first_valid_hit.path - - @property - def metadata(self): - return self._first_valid_hit.metadata - - @property - def possible_ref_specs(self): - self._load() - return ['%s:%s' % (h.source_name, h.rel_path) for h in self._hits] - - @property - def possible_split_ref_specs(self): - self._load() - return [(h.source_name, h.rel_path) for h in self._hits] - - @property - def possible_paths(self): - self._load() - return [h.path for h in self._hits] - - def getFactory(self): - return PageFactory(self.source, self.rel_path, - copy.deepcopy(self.metadata)) - - @property - def _first_valid_hit(self): - self._checkHits() - return self._hits[self._first_valid_hit_index] - - def _load(self): - if self._hits is not None: - return - - self._hits = [] - - if self._page_ref is None: - self._first_valid_hit_index = self._INDEX_NOT_FOUND - return - - it = list(page_ref_pattern.finditer(self._page_ref)) - if len(it) == 0: - raise Exception("Invalid page ref: %s" % self._page_ref) - - for m in it: - source_name = m.group('src') - source = self.app.getSource(source_name) - if source is None: - raise Exception("No such source: %s" % source_name) - rel_path = m.group('path') - if '%ext%' in rel_path: - for e in self._exts: - cur_rel_path = rel_path.replace('%ext%', e) - path, metadata = source.resolveRef(cur_rel_path) - self._hits.append(self._HitInfo( - source_name, cur_rel_path, path, metadata)) - else: - path, metadata = source.resolveRef(rel_path) - self._hits.append( - self._HitInfo(source_name, rel_path, path, metadata)) - - def _checkHits(self): - if self._first_valid_hit_index >= 0: - return - - if self._first_valid_hit_index == self._INDEX_NEEDS_LOADING: - self._load() - self._first_valid_hit_index = self._INDEX_NOT_FOUND - for i, hit in enumerate(self._hits): - if os.path.isfile(hit.path): - self._first_valid_hit_index = i - break - - if self._first_valid_hit_index == self._INDEX_NOT_FOUND: - raise PageNotFoundError( - "No valid paths were found for page reference: %s" % - self._page_ref) -
--- a/piecrust/sources/posts.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/sources/posts.py Wed May 17 00:11:48 2017 -0700 @@ -5,80 +5,48 @@ import datetime from piecrust import osutil from piecrust.routing import RouteParameter -from piecrust.sources.base import ( - PageSource, InvalidFileSystemEndpointError, PageFactory, - MODE_CREATING, MODE_PARSING) +from piecrust.sources.base import REL_ASSETS, ContentItem +from piecrust.sources.fs import ( + FSContentSource, InvalidFileSystemEndpointError) from piecrust.sources.interfaces import ( - IPreparingSource, IInteractiveSource, InteractiveField) -from piecrust.sources.mixins import SimplePaginationSourceMixin -from piecrust.uriutil import multi_replace + IPreparingSource, IInteractiveSource, InteractiveField) +from piecrust.sources.mixins import ( + SimplePaginationSourceMixin, SimpleAssetsSubDirMixin) +from piecrust.uriutil import uri_to_title logger = logging.getLogger(__name__) -class PostsSource(PageSource, IPreparingSource, IInteractiveSource, - SimplePaginationSourceMixin): +class PostsSource(FSContentSource, + SimpleAssetsSubDirMixin, + IPreparingSource, IInteractiveSource): PATH_FORMAT = None def __init__(self, app, name, config): - PageSource.__init__(self, app, name, config) - self.fs_endpoint = config.get('fs_endpoint', name) - self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) - self.supported_extensions = list(app.config.get('site/auto_formats').keys()) + FSContentSource.__init__(self, app, name, config) + self.auto_formats = app.config.get('site/auto_formats') self.default_auto_format = app.config.get('site/default_auto_format') - self._source_it_cache = None + self.supported_extensions = list(self.auto_formats) @property def path_format(self): return self.__class__.PATH_FORMAT - def resolveRef(self, ref_path): - path = os.path.normpath(os.path.join(self.fs_endpoint_path, ref_path)) - metadata = self._parseMetadataFromPath(ref_path) - return path, metadata - - def getSupportedRouteParameters(self): - return [ - RouteParameter('slug', RouteParameter.TYPE_STRING), - RouteParameter('day', RouteParameter.TYPE_INT2), - RouteParameter('month', RouteParameter.TYPE_INT2), - RouteParameter('year', RouteParameter.TYPE_INT4)] + def _finalizeContent(self, parent_group, items, groups): + SimpleAssetsSubDirMixin._onFinalizeContent( + parent_group, items, groups) - def buildPageFactory(self, path): - if not path.startswith(self.fs_endpoint_path): - raise Exception("Page path '%s' isn't inside '%s'." % ( - path, self.fs_endpoint_path)) - rel_path = path[len(self.fs_endpoint_path):].lstrip('\\/') - pat = self.PATH_FORMAT % { - 'year': 'YEAR', - 'month': 'MONTH', - 'day': 'DAY', - 'slug': 'SLUG', - 'ext': 'EXT'} - pat = re.escape(pat) - pat = multi_replace(pat, { - 'YEAR': '(\d{4})', - 'MONTH': '(\d{2})', - 'DAY': '(\d{2})', - 'SLUG': '(.*)', - 'EXT': '(.*)'}) - m = re.match(pat, rel_path) - if m is None: - raise Exception("'%s' isn't a proper %s page path." % ( - rel_path, self.SOURCE_NAME)) - return self._makeFactory( - rel_path, - m.group(4), - int(m.group(1)), - int(m.group(2)), - int(m.group(3))) + def getRelatedContents(self, item, relationship): + if relationship == REL_ASSETS: + SimpleAssetsSubDirMixin._getRelatedAssetsContents(item) + raise NotImplementedError() - def findPageFactory(self, metadata, mode): - year = metadata.get('year') - month = metadata.get('month') - day = metadata.get('day') - slug = metadata.get('slug') + def findContent(self, route_params): + year = route_params.get('year') + month = route_params.get('month') + day = route_params.get('day') + slug = route_params.get('slug') try: if year is not None: @@ -90,20 +58,18 @@ except ValueError: return None - ext = metadata.get('ext') + ext = route_params.get('ext') if ext is None: if len(self.supported_extensions) == 1: ext = self.supported_extensions[0] - elif mode == MODE_CREATING and self.default_auto_format: - ext = self.default_auto_format replacements = { - 'year': '%04d' % year if year is not None else None, - 'month': '%02d' % month if month is not None else None, - 'day': '%02d' % day if day is not None else None, - 'slug': slug, - 'ext': ext - } + 'year': '%04d' % year if year is not None else None, + 'month': '%02d' % month if month is not None else None, + 'day': '%02d' % day if day is not None else None, + 'slug': slug, + 'ext': ext + } needs_recapture = False if year is None: needs_recapture = True @@ -121,22 +87,53 @@ needs_recapture = True replacements['ext'] = '*' path = os.path.normpath(os.path.join( - self.fs_endpoint_path, self.path_format % replacements)) + self.fs_endpoint_path, self.path_format % replacements)) if needs_recapture: - if mode == MODE_CREATING: - raise ValueError("Not enough information to find a post path.") possible_paths = osutil.glob(path) if len(possible_paths) != 1: return None path = possible_paths[0] - elif mode == MODE_PARSING and not os.path.isfile(path): + elif not os.path.isfile(path): return None - rel_path = os.path.relpath(path, self.fs_endpoint_path) - rel_path = rel_path.replace('\\', '/') - fac_metadata = self._parseMetadataFromPath(rel_path) - return PageFactory(self, rel_path, fac_metadata) + metadata = self._parseMetadataFromPath(path) + return ContentItem(path, metadata) + + def _parseMetadataFromPath(self, path): + regex_repl = { + 'year': '(?P<year>\d{4})', + 'month': '(?P<month>\d{2})', + 'day': '(?P<day>\d{2})', + 'slug': '(?P<slug>.*)', + 'ext': '(?P<ext>.*)' + } + path_format_re = re.sub(r'([\-\.])', r'\\\1', self.path_format) + pattern = path_format_re % regex_repl + '$' + m = re.search(pattern, path.replace('\\', '/')) + if not m: + raise Exception("Expected to be able to match path with path " + "format: %s" % path) + + year = int(m.group('year')) + month = int(m.group('month')) + day = int(m.group('day')) + timestamp = datetime.date(year, month, day) + metadata = { + 'year': year, + 'month': month, + 'day': day, + 'slug': m.group('slug'), + 'date': timestamp + } + return metadata + + def getSupportedRouteParameters(self): + return [ + RouteParameter('slug', RouteParameter.TYPE_STRING), + RouteParameter('day', RouteParameter.TYPE_INT2), + RouteParameter('month', RouteParameter.TYPE_INT2), + RouteParameter('year', RouteParameter.TYPE_INT4)] def getSourceIterator(self): if self._source_it_cache is None: @@ -146,11 +143,11 @@ def setupPrepareParser(self, parser, app): parser.add_argument( - '-d', '--date', help="The date of the post, " - "in `year/month/day` format (defaults to today).") + '-d', '--date', help="The date of the post, " + "in `year/month/day` format (defaults to today).") parser.add_argument('slug', help="The URL slug for the new post.") - def buildMetadata(self, args): + def createContent(self, args): dt = datetime.date.today() if args.date: if args.date == 'today': @@ -170,8 +167,23 @@ "YEAR/MONTH/DAY.") dt = datetime.date(year, month, day) + slug, ext = os.path.splitext(args.slug) + if not ext: + ext = self.default_auto_format year, month, day = dt.year, dt.month, dt.day - return {'year': year, 'month': month, 'day': day, 'slug': args.slug} + tokens = { + 'slug': args.slug, + 'ext': ext, + 'year': '%04d' % year, + 'month': '%02d' % month, + 'day': '%02d' % day + } + rel_path = self.path_format % tokens + path = os.path.join(self.fs_endpoint_path, rel_path) + metadata = { + 'config': {'title': uri_to_title(slug)} + } + return ContentItem(path, metadata) def getInteractiveFields(self): dt = datetime.date.today() @@ -185,96 +197,78 @@ if not os.path.isdir(self.fs_endpoint_path): if self.ignore_missing_dir: return False - raise InvalidFileSystemEndpointError(self.name, self.fs_endpoint_path) + raise InvalidFileSystemEndpointError(self.name, + self.fs_endpoint_path) return True - def _parseMetadataFromPath(self, path): - regex_repl = { - 'year': '(?P<year>\d{4})', - 'month': '(?P<month>\d{2})', - 'day': '(?P<day>\d{2})', - 'slug': '(?P<slug>.*)', - 'ext': '(?P<ext>.*)' - } - path_format_re = re.sub(r'([\-\.])', r'\\\1', self.path_format) - pattern = path_format_re % regex_repl + '$' - m = re.search(pattern, path.replace('\\', '/')) - if not m: - raise Exception("Expected to be able to match path with path " - "format: %s" % path) - - year = int(m.group('year')) - month = int(m.group('month')) - day = int(m.group('day')) - timestamp = datetime.date(year, month, day) - metadata = { - 'year': year, - 'month': month, - 'day': day, - 'slug': m.group('slug'), - 'date': timestamp - } - return metadata - - def _makeFactory(self, path, slug, year, month, day): + def _makeContentItem(self, path, slug, year, month, day): path = path.replace('\\', '/') timestamp = datetime.date(year, month, day) metadata = { - 'slug': slug, - 'year': year, - 'month': month, - 'day': day, - 'date': timestamp} - return PageFactory(self, path, metadata) + 'slug': slug, + 'year': year, + 'month': month, + 'day': day, + 'date': timestamp} + return ContentItem(path, metadata) class FlatPostsSource(PostsSource): SOURCE_NAME = 'posts/flat' PATH_FORMAT = '%(year)s-%(month)s-%(day)s_%(slug)s.%(ext)s' + PATTERN = re.compile(r'(\d{4})-(\d{2})-(\d{2})_(.*)\.(\w+)$') def __init__(self, app, name, config): - super(FlatPostsSource, self).__init__(app, name, config) + super().__init__(app, name, config) - def buildPageFactories(self): - if not self._checkFsEndpointPath(): - return - logger.debug("Scanning for posts (flat) in: %s" % self.fs_endpoint_path) - pattern = re.compile(r'(\d{4})-(\d{2})-(\d{2})_(.*)\.(\w+)$') + def getContents(self, group): + if not self._checkFSEndpoint(): + return None + + logger.debug("Scanning for posts (flat) in: %s" % + self.fs_endpoint_path) + pattern = FlatPostsSource.PATTERN _, __, filenames = next(osutil.walk(self.fs_endpoint_path)) for f in filenames: match = pattern.match(f) if match is None: name, ext = os.path.splitext(f) - logger.warning("'%s' is not formatted as 'YYYY-MM-DD_slug-title.%s' " - "and will be ignored. Is that a typo?" % (f, ext)) + logger.warning( + "'%s' is not formatted as 'YYYY-MM-DD_slug-title.%s' " + "and will be ignored. Is that a typo?" % (f, ext)) continue - yield self._makeFactory( - f, - match.group(4), - int(match.group(1)), - int(match.group(2)), - int(match.group(3))) + yield self._makeContentItem( + f, + match.group(4), + int(match.group(1)), + int(match.group(2)), + int(match.group(3))) class ShallowPostsSource(PostsSource): SOURCE_NAME = 'posts/shallow' PATH_FORMAT = '%(year)s/%(month)s-%(day)s_%(slug)s.%(ext)s' + YEAR_PATTERN = re.compile(r'(\d{4})$') + FILE_PATTERN = re.compile(r'(\d{2})-(\d{2})_(.*)\.(\w+)$') def __init__(self, app, name, config): super(ShallowPostsSource, self).__init__(app, name, config) - def buildPageFactories(self): + def getContents(self, group): if not self._checkFsEndpointPath(): return - logger.debug("Scanning for posts (shallow) in: %s" % self.fs_endpoint_path) - year_pattern = re.compile(r'(\d{4})$') - file_pattern = re.compile(r'(\d{2})-(\d{2})_(.*)\.(\w+)$') + + logger.debug("Scanning for posts (shallow) in: %s" % + self.fs_endpoint_path) + year_pattern = ShallowPostsSource.YEAR_PATTERN + file_pattern = ShallowPostsSource.FILE_PATTERN _, year_dirs, __ = next(osutil.walk(self.fs_endpoint_path)) year_dirs = [d for d in year_dirs if year_pattern.match(d)] for yd in year_dirs: if year_pattern.match(yd) is None: - logger.warning("'%s' is not formatted as 'YYYY' and will be ignored. " - "Is that a typo?") + logger.warning( + "'%s' is not formatted as 'YYYY' and will be ignored. " + "Is that a typo?") continue year = int(yd) year_dir = os.path.join(self.fs_endpoint_path, yd) @@ -284,31 +278,37 @@ match = file_pattern.match(f) if match is None: name, ext = os.path.splitext(f) - logger.warning("'%s' is not formatted as 'MM-DD_slug-title.%s' " - "and will be ignored. Is that a typo?" % (f, ext)) + logger.warning( + "'%s' is not formatted as 'MM-DD_slug-title.%s' " + "and will be ignored. Is that a typo?" % (f, ext)) continue - yield self._makeFactory( - os.path.join(yd, f), - match.group(3), - year, - int(match.group(1)), - int(match.group(2))) + yield self._makeContentItem( + os.path.join(yd, f), + match.group(3), + year, + int(match.group(1)), + int(match.group(2))) class HierarchyPostsSource(PostsSource): SOURCE_NAME = 'posts/hierarchy' PATH_FORMAT = '%(year)s/%(month)s/%(day)s_%(slug)s.%(ext)s' + YEAR_PATTERN = re.compile(r'(\d{4})$') + MONTH_PATTERN = re.compile(r'(\d{2})$') + FILE_PATTERN = re.compile(r'(\d{2})_(.*)\.(\w+)$') def __init__(self, app, name, config): super(HierarchyPostsSource, self).__init__(app, name, config) - def buildPageFactories(self): + def getContents(self, group): if not self._checkFsEndpointPath(): return - logger.debug("Scanning for posts (hierarchy) in: %s" % self.fs_endpoint_path) - year_pattern = re.compile(r'(\d{4})$') - month_pattern = re.compile(r'(\d{2})$') - file_pattern = re.compile(r'(\d{2})_(.*)\.(\w+)$') + + logger.debug("Scanning for posts (hierarchy) in: %s" % + self.fs_endpoint_path) + year_pattern = HierarchyPostsSource.YEAR_PATTERN + month_pattern = HierarchyPostsSource.MONTH_PATTERN + file_pattern = HierarchyPostsSource.FILE_PATTERN _, year_dirs, __ = next(osutil.walk(self.fs_endpoint_path)) year_dirs = [d for d in year_dirs if year_pattern.match(d)] for yd in year_dirs: @@ -326,14 +326,15 @@ match = file_pattern.match(f) if match is None: name, ext = os.path.splitext(f) - logger.warning("'%s' is not formatted as 'DD_slug-title.%s' " - "and will be ignored. Is that a typo?" % (f, ext)) + logger.warning( + "'%s' is not formatted as 'DD_slug-title.%s' " + "and will be ignored. Is that a typo?" % (f, ext)) continue rel_name = os.path.join(yd, md, f) - yield self._makeFactory( - rel_name, - match.group(2), - year, - month, - int(match.group(1))) + yield self._makeContentItem( + rel_name, + match.group(2), + year, + month, + int(match.group(1)))
--- a/piecrust/sources/prose.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/sources/prose.py Wed May 17 00:11:48 2017 -0700 @@ -2,14 +2,13 @@ import os.path import copy import logging -from piecrust.sources.base import MODE_CREATING, MODE_PARSING -from piecrust.sources.default import DefaultPageSource +from piecrust.sources.default import DefaultContentSource logger = logging.getLogger(__name__) -class ProseSource(DefaultPageSource): +class ProseSource(DefaultContentSource): SOURCE_NAME = 'prose' def __init__(self, app, name, config):
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/sources/taxonomy.py Wed May 17 00:11:48 2017 -0700 @@ -0,0 +1,433 @@ +import re +import time +import logging +import unidecode +from piecrust.chefutil import format_timed, format_timed_scope +from piecrust.configuration import ConfigurationError +from piecrust.data.filters import ( + PaginationFilter, SettingFilterClause, + page_value_accessor) +from piecrust.routing import RouteParameter +from piecrust.sources.base import ContentSource, GeneratedContentException + + +logger = logging.getLogger(__name__) + + +SLUGIFY_ENCODE = 1 +SLUGIFY_TRANSLITERATE = 2 +SLUGIFY_LOWERCASE = 4 +SLUGIFY_DOT_TO_DASH = 8 +SLUGIFY_SPACE_TO_DASH = 16 + + +re_first_dot_to_dash = re.compile(r'^\.+') +re_dot_to_dash = re.compile(r'\.+') +re_space_to_dash = re.compile(r'\s+') + + +class Taxonomy(object): + def __init__(self, name, config): + self.name = name + self.config = config + self.term_name = config.get('term', name) + self.is_multiple = bool(config.get('multiple', False)) + self.separator = config.get('separator', '/') + self.page_ref = config.get('page') + + @property + def setting_name(self): + if self.is_multiple: + return self.name + return self.term_name + + +class TaxonomySource(ContentSource): + """ A page generator that handles taxonomies, _i.e._ lists of keywords + that pages are labelled with, and for which we need to generate + listing pages. + """ + SOURCE_NAME = 'taxonomy' + + def __init__(self, app, name, config): + super().__init__(app, name, config) + + tax_name = config.get('taxonomy') + if tax_name is None: + raise ConfigurationError( + "Generator '%s' requires a taxonomy name." % name) + tax_config = app.config.get('site/taxonomies/' + tax_name) + if tax_config is None: + raise ConfigurationError( + "Error initializing generator '%s', no such taxonomy: %s", + (name, tax_name)) + self.taxonomy = Taxonomy(tax_name, tax_config) + + sm = config.get('slugify_mode') + if not sm: + sm = app.config.get('site/slugify_mode', 'encode') + self.slugify_mode = _parse_slugify_mode(sm) + self.slugifier = _Slugifier(self.taxonomy, self.slugify_mode) + + def getContents(self, group): + raise GeneratedContentException() + + def getSupportedRouteParameters(self): + name = self.taxonomy.term_name + param_type = (RouteParameter.TYPE_PATH if self.taxonomy.is_multiple + else RouteParameter.TYPE_STRING) + return [RouteParameter(name, param_type, + variadic=self.taxonomy.is_multiple)] + + def slugify(self, term): + return self.slugifier.slugify(term) + + def slugifyMultiple(self, terms): + return self.slugifier.slugifyMultiple(terms) + + def prepareRenderContext(self, ctx): + # Set the pagination source as the source we're generating for. + ctx.pagination_source = self.source + + # Get the taxonomy terms from the route metadata... this can come from + # the browser's URL (while serving) or from the baking (see `bake` + # method below). In both cases, we expect to have the *slugified* + # version of the term, because we're going to set a filter that also + # slugifies the terms found on each page. + # + # This is because: + # * while serving, we get everything from the request URL, so we only + # have the slugified version. + # * if 2 slightly different terms "collide" into the same slugified + # term, we'll get a merge of the 2 on the listing page, which is + # what the user expects. + # + tax_terms, is_combination = self._getTaxonomyTerms( + ctx.page.route_metadata) + self._setTaxonomyFilter(ctx, tax_terms, is_combination) + + # Add some custom data for rendering. + ctx.custom_data.update({ + self.taxonomy.term_name: tax_terms, + 'is_multiple_%s' % self.taxonomy.term_name: is_combination}) + # Add some "plural" version of the term... so for instance, if this + # is the "tags" taxonomy, "tag" will have one term most of the time, + # except when it's a combination. Here, we add "tags" as something that + # is always a tuple, even when it's not a combination. + if (self.taxonomy.is_multiple and + self.taxonomy.name != self.taxonomy.term_name): + mult_val = tax_terms + if not is_combination: + mult_val = (mult_val,) + ctx.custom_data[self.taxonomy.name] = mult_val + + def _getSource(self): + return self.app.getSource(self.config['source']) + + def _getTaxonomyTerms(self, route_metadata): + # Get the individual slugified terms from the route metadata. + all_values = route_metadata.get(self.taxonomy.term_name) + if all_values is None: + raise Exception("'%s' values couldn't be found in route metadata" % + self.taxonomy.term_name) + + # If it's a "multiple" taxonomy, we need to potentially split the + # route value into the individual terms (_e.g._ when listing all pages + # that have 2 given tags, we need to get each of those 2 tags). + if self.taxonomy.is_multiple: + sep = self.taxonomy.separator + if sep in all_values: + return tuple(all_values.split(sep)), True + # Not a "multiple" taxonomy, so there's only the one value. + return all_values, False + + def _setTaxonomyFilter(self, ctx, term_value, is_combination): + # Set up the filter that will check the pages' terms. + flt = PaginationFilter(value_accessor=page_value_accessor) + flt.addClause(HasTaxonomyTermsFilterClause( + self.taxonomy, self.slugify_mode, term_value, is_combination)) + ctx.pagination_filter = flt + + def onRouteFunctionUsed(self, route, route_metadata): + # Get the values, and slugify them appropriately. + values = route_metadata[self.taxonomy.term_name] + if self.taxonomy.is_multiple: + # TODO: here we assume the route has been properly configured. + slugified_values = self.slugifyMultiple((str(v) for v in values)) + route_val = self.taxonomy.separator.join(slugified_values) + else: + slugified_values = self.slugify(str(values)) + route_val = slugified_values + + # We need to register this use of a taxonomy term. + eis = self.app.env.exec_info_stack + cpi = eis.current_page_info.render_ctx.current_pass_info + if cpi: + utt = cpi.getCustomInfo('used_taxonomy_terms', [], True) + utt.append(slugified_values) + + # Put the slugified values in the route metadata so they're used to + # generate the URL. + route_metadata[self.taxonomy.term_name] = route_val + + def bake(self, ctx): + if not self.page_ref.exists: + logger.debug( + "No page found at '%s', skipping taxonomy '%s'." % + (self.page_ref, self.taxonomy.name)) + return + + logger.debug("Baking %s pages...", self.taxonomy.name) + analyzer = _TaxonomyTermsAnalyzer(self.source_name, self.taxonomy, + self.slugify_mode) + with format_timed_scope(logger, 'gathered taxonomy terms', + level=logging.DEBUG, colored=False): + analyzer.analyze(ctx) + + start_time = time.perf_counter() + page_count = self._bakeTaxonomyTerms(ctx, analyzer) + if page_count > 0: + logger.info(format_timed( + start_time, + "baked %d %s pages for %s." % ( + page_count, self.taxonomy.term_name, self.source_name))) + + def _bakeTaxonomyTerms(self, ctx, analyzer): + # Start baking those terms. + logger.debug( + "Baking '%s' for source '%s': %d terms" % + (self.taxonomy.name, self.source_name, + len(analyzer.dirty_slugified_terms))) + + route = self.app.getGeneratorRoute(self.name) + if route is None: + raise Exception("No routes have been defined for generator: %s" % + self.name) + + logger.debug("Using taxonomy page: %s" % self.page_ref) + fac = self.page_ref.getFactory() + + job_count = 0 + for slugified_term in analyzer.dirty_slugified_terms: + extra_route_metadata = { + self.taxonomy.term_name: slugified_term} + + # Use the slugified term as the record's extra key seed. + logger.debug( + "Queuing: %s [%s=%s]" % + (fac.ref_spec, self.taxonomy.name, slugified_term)) + ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term) + job_count += 1 + ctx.runJobQueue() + + # Now we create bake entries for all the terms that were *not* dirty. + # This is because otherwise, on the next incremental bake, we wouldn't + # find any entry for those things, and figure that we need to delete + # their outputs. + for prev_entry, cur_entry in ctx.getAllPageRecords(): + # Only consider taxonomy-related entries that don't have any + # current version (i.e. they weren't baked just now). + if prev_entry and not cur_entry: + try: + t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) + except InvalidRecordExtraKey: + continue + + if analyzer.isKnownSlugifiedTerm(t): + logger.debug("Creating unbaked entry for %s term: %s" % + (self.name, t)) + ctx.collapseRecord(prev_entry) + else: + logger.debug("Term %s in %s isn't used anymore." % + (self.name, t)) + + return job_count + + +class HasTaxonomyTermsFilterClause(SettingFilterClause): + def __init__(self, taxonomy, slugify_mode, value, is_combination): + super(HasTaxonomyTermsFilterClause, self).__init__( + taxonomy.setting_name, value) + self._taxonomy = taxonomy + self._is_combination = is_combination + self._slugifier = _Slugifier(taxonomy, slugify_mode) + + def pageMatches(self, fil, page): + if self._taxonomy.is_multiple: + # Multiple taxonomy, i.e. it supports multiple terms, like tags. + page_values = fil.value_accessor(page, self.name) + if page_values is None or not isinstance(page_values, list): + return False + + page_set = set(map(self._slugifier.slugify, page_values)) + if self._is_combination: + # Multiple taxonomy, and multiple terms to match. Check that + # the ones to match are all in the page's terms. + value_set = set(self.value) + return value_set.issubset(page_set) + else: + # Multiple taxonomy, one term to match. + return self.value in page_set + else: + # Single taxonomy. Just compare the values. + page_value = fil.value_accessor(page, self.name) + if page_value is None: + return False + page_value = self._slugifier.slugify(page_value) + return page_value == self.value + + +class _TaxonomyTermsAnalyzer(object): + def __init__(self, source_name, taxonomy, slugify_mode): + self.source_name = source_name + self.taxonomy = taxonomy + self.slugifier = _Slugifier(taxonomy, slugify_mode) + self._all_terms = {} + self._single_dirty_slugified_terms = set() + self._all_dirty_slugified_terms = None + + @property + def dirty_slugified_terms(self): + """ Returns the slugified terms that have been 'dirtied' during + this bake. + """ + return self._all_dirty_slugified_terms + + def isKnownSlugifiedTerm(self, term): + """ Returns whether the given slugified term has been seen during + this bake. + """ + return term in self._all_terms + + def analyze(self, ctx): + # Build the list of terms for our taxonomy, and figure out which ones + # are 'dirty' for the current bake. + # + # Remember all terms used. + for _, cur_entry in ctx.getAllPageRecords(): + if cur_entry and not cur_entry.was_overriden: + cur_terms = cur_entry.config.get(self.taxonomy.setting_name) + if cur_terms: + if not self.taxonomy.is_multiple: + self._addTerm(cur_entry.path, cur_terms) + else: + self._addTerms(cur_entry.path, cur_terms) + + # Re-bake all taxonomy terms that include new or changed pages, by + # marking them as 'dirty'. + for prev_entry, cur_entry in ctx.getBakedPageRecords(): + if cur_entry.source_name != self.source_name: + continue + + entries = [cur_entry] + if prev_entry: + entries.append(prev_entry) + + for e in entries: + entry_terms = e.config.get(self.taxonomy.setting_name) + if entry_terms: + if not self.taxonomy.is_multiple: + self._single_dirty_slugified_terms.add( + self.slugifier.slugify(entry_terms)) + else: + self._single_dirty_slugified_terms.update( + (self.slugifier.slugify(t) + for t in entry_terms)) + + self._all_dirty_slugified_terms = list( + self._single_dirty_slugified_terms) + logger.debug("Gathered %d dirty taxonomy terms", + len(self._all_dirty_slugified_terms)) + + # Re-bake the combination pages for terms that are 'dirty'. + # We make all terms into tuple, even those that are not actual + # combinations, so that we have less things to test further down the + # line. + # + # Add the combinations to that list. We get those combinations from + # wherever combinations were used, so they're coming from the + # `onRouteFunctionUsed` method. + if self.taxonomy.is_multiple: + known_combinations = set() + for _, cur_entry in ctx.getAllPageRecords(): + if cur_entry: + used_terms = _get_all_entry_taxonomy_terms(cur_entry) + for terms in used_terms: + if len(terms) > 1: + known_combinations.add(terms) + + dcc = 0 + for terms in known_combinations: + if not self._single_dirty_slugified_terms.isdisjoint( + set(terms)): + self._all_dirty_slugified_terms.append( + self.taxonomy.separator.join(terms)) + dcc += 1 + logger.debug("Gathered %d term combinations, with %d dirty." % + (len(known_combinations), dcc)) + + def _addTerms(self, entry_path, terms): + for t in terms: + self._addTerm(entry_path, t) + + def _addTerm(self, entry_path, term): + st = self.slugifier.slugify(term) + orig_terms = self._all_terms.setdefault(st, []) + if orig_terms and orig_terms[0] != term: + logger.warning( + "Term '%s' in '%s' is slugified to '%s' which conflicts with " + "previously existing '%s'. The two will be merged." % + (term, entry_path, st, orig_terms[0])) + orig_terms.append(term) + + +def _get_all_entry_taxonomy_terms(entry): + res = set() + for o in entry.subs: + for pinfo in o.render_info: + if pinfo: + terms = pinfo.getCustomInfo('used_taxonomy_terms') + if terms: + res |= set(terms) + return res + + +class _Slugifier(object): + def __init__(self, taxonomy, mode): + self.taxonomy = taxonomy + self.mode = mode + + def slugifyMultiple(self, terms): + return tuple(map(self.slugify, terms)) + + def slugify(self, term): + if self.mode & SLUGIFY_TRANSLITERATE: + term = unidecode.unidecode(term) + if self.mode & SLUGIFY_LOWERCASE: + term = term.lower() + if self.mode & SLUGIFY_DOT_TO_DASH: + term = re_first_dot_to_dash.sub('', term) + term = re_dot_to_dash.sub('-', term) + if self.mode & SLUGIFY_SPACE_TO_DASH: + term = re_space_to_dash.sub('-', term) + return term + + +def _parse_slugify_mode(value): + mapping = { + 'encode': SLUGIFY_ENCODE, + 'transliterate': SLUGIFY_TRANSLITERATE, + 'lowercase': SLUGIFY_LOWERCASE, + 'dot_to_dash': SLUGIFY_DOT_TO_DASH, + 'space_to_dash': SLUGIFY_SPACE_TO_DASH} + mode = 0 + for v in value.split(','): + f = mapping.get(v.strip()) + if f is None: + if v == 'iconv': + raise Exception("'iconv' is not supported as a slugify mode " + "in PieCrust2. Use 'transliterate'.") + raise Exception("Unknown slugify flag: %s" % v) + mode |= f + return mode +
--- a/piecrust/templating/jinjaengine.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/templating/jinjaengine.py Wed May 17 00:11:48 2017 -0700 @@ -1,6 +1,6 @@ import os.path import logging -from piecrust.environment import AbortedSourceUseError +from piecrust.sources.base import AbortedSourceUseError from piecrust.templating.base import (TemplateEngine, TemplateNotFoundError, TemplatingError)
--- a/piecrust/uriutil.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/uriutil.py Wed May 17 00:11:48 2017 -0700 @@ -1,6 +1,5 @@ import re import os.path -import string import logging @@ -57,3 +56,8 @@ return uri, page_num + +def uri_to_title(slug): + slug = re.sub(r'[\-_]', ' ', slug) + return slug.title() +
--- a/piecrust/workerpool.py Sat Apr 29 21:42:22 2017 -0700 +++ b/piecrust/workerpool.py Wed May 17 00:11:48 2017 -0700 @@ -2,37 +2,61 @@ import os import sys import time -import zlib -import queue +import pickle import logging -import itertools import threading +import traceback import multiprocessing from piecrust import fastpickle +from piecrust.environment import ExecutionStats logger = logging.getLogger(__name__) use_fastqueue = True +use_fastpickle = False class IWorker(object): + """ Interface for a pool worker. + """ def initialize(self): raise NotImplementedError() def process(self, job): raise NotImplementedError() - def getReport(self, pool_reports): + def getStats(self): return None def shutdown(self): pass +class WorkerExceptionData: + def __init__(self, wid): + super().__init__() + self.wid = wid + t, v, tb = sys.exc_info() + self.type = t + self.value = '\n'.join(_get_errors(v)) + self.traceback = ''.join(traceback.format_exception(t, v, tb)) + + def __str__(self): + return str(self.value) + + +def _get_errors(ex): + errors = [] + while ex is not None: + msg = str(ex) + errors.append(msg) + ex = ex.__cause__ + return errors + + TASK_JOB = 0 -TASK_BATCH = 1 -TASK_END = 2 +TASK_END = 1 def worker_func(params): @@ -52,6 +76,12 @@ def _real_worker_func(params): + wid = params.wid + + stats = ExecutionStats() + stats.registerTimer('WorkerInit') + init_start_time = time.perf_counter() + # In a context where `multiprocessing` is using the `spawn` forking model, # the new process doesn't inherit anything, so we lost all our logging # configuration here. Let's set it up again. @@ -60,7 +90,6 @@ from piecrust.main import _pre_parse_chef_args _pre_parse_chef_args(sys.argv[1:]) - wid = params.wid logger.debug("Worker %d initializing..." % wid) # We don't need those. @@ -78,67 +107,49 @@ params.outqueue.put(None) return - use_threads = False - if use_threads: - # Create threads to read/write the jobs and results from/to the - # main arbitrator process. - local_job_queue = queue.Queue() - reader_thread = threading.Thread( - target=_job_queue_reader, - args=(params.inqueue.get, local_job_queue), - name="JobQueueReaderThread") - reader_thread.start() - - local_result_queue = queue.Queue() - writer_thread = threading.Thread( - target=_job_results_writer, - args=(local_result_queue, params.outqueue.put), - name="JobResultWriterThread") - writer_thread.start() - - get = local_job_queue.get - put = local_result_queue.put_nowait - else: - get = params.inqueue.get - put = params.outqueue.put + stats.stepTimerSince('WorkerInit', init_start_time) # Start pumping! completed = 0 time_in_get = 0 time_in_put = 0 + get = params.inqueue.get + put = params.outqueue.put + while True: get_start_time = time.perf_counter() task = get() time_in_get += (time.perf_counter() - get_start_time) task_type, task_data = task + + # End task... gather stats to send back to the main process. if task_type == TASK_END: logger.debug("Worker %d got end task, exiting." % wid) - wprep = { - 'WorkerTaskGet': time_in_get, - 'WorkerResultPut': time_in_put} + stats.registerTimer('WorkerTaskGet', time=time_in_get) + stats.registerTimer('WorkerResultPut', time=time_in_put) try: - rep = (task_type, True, wid, (wid, w.getReport(wprep))) + stats.mergeStats(w.getStats()) + rep = (task_type, task_data, True, wid, (wid, stats)) except Exception as e: - logger.debug("Error getting report: %s" % e) - if params.wrap_exception: - e = multiprocessing.ExceptionWithTraceback( - e, e.__traceback__) - rep = (task_type, False, wid, (wid, e)) + logger.debug( + "Error getting report, sending exception to main process:") + logger.debug(traceback.format_exc()) + we = WorkerExceptionData(wid) + rep = (task_type, task_data, False, wid, (wid, we)) put(rep) break - if task_type == TASK_JOB: - task_data = (task_data,) - - for t in task_data: + # Job task... just do it. + elif task_type == TASK_JOB: try: - res = (TASK_JOB, True, wid, w.process(t)) + res = (task_type, task_data, True, wid, w.process(task_data)) except Exception as e: - if params.wrap_exception: - e = multiprocessing.ExceptionWithTraceback( - e, e.__traceback__) - res = (TASK_JOB, False, wid, e) + logger.debug( + "Error processing job, sending exception to main process:") + logger.debug(traceback.format_exc()) + we = WorkerExceptionData(wid) + res = (task_type, task_data, False, wid, we) put_start_time = time.perf_counter() put(res) @@ -146,62 +157,28 @@ completed += 1 - if use_threads: - logger.debug("Worker %d waiting for reader/writer threads." % wid) - local_result_queue.put_nowait(None) - reader_thread.join() - writer_thread.join() + else: + raise Exception("Unknown task type: %s" % task_type) w.shutdown() - logger.debug("Worker %d completed %d tasks." % (wid, completed)) -def _job_queue_reader(getter, out_queue): - while True: - try: - task = getter() - except (EOFError, OSError): - logger.debug("Worker encountered connection problem.") - break - - out_queue.put_nowait(task) - - if task[0] == TASK_END: - # Done reading jobs from the main process. - logger.debug("Got end task, exiting task queue reader thread.") - break - - -def _job_results_writer(in_queue, putter): - while True: - res = in_queue.get() - if res is not None: - putter(res) - in_queue.task_done() - else: - # Got sentinel. Exit. - in_queue.task_done() - break - logger.debug("Exiting result queue writer thread.") - - -class _WorkerParams(object): +class _WorkerParams: def __init__(self, wid, inqueue, outqueue, worker_class, initargs=(), - wrap_exception=False, is_profiling=False): + is_profiling=False): self.wid = wid self.inqueue = inqueue self.outqueue = outqueue self.worker_class = worker_class self.initargs = initargs - self.wrap_exception = wrap_exception self.is_profiling = is_profiling -class WorkerPool(object): - def __init__(self, worker_class, initargs=(), - worker_count=None, batch_size=None, - wrap_exception=False): +class WorkerPool: + def __init__(self, worker_class, initargs=(), *, + callback=None, error_callback=None, + worker_count=None, batch_size=None): worker_count = worker_count or os.cpu_count() or 1 if use_fastqueue: @@ -215,22 +192,22 @@ self._quick_put = self._task_queue._writer.send self._quick_get = self._result_queue._reader.recv + self._callback = callback + self._error_callback = error_callback self._batch_size = batch_size - self._callback = None - self._error_callback = None - self._listener = None + self._jobs_left = 0 + self._event = threading.Event() main_module = sys.modules['__main__'] is_profiling = os.path.basename(main_module.__file__) in [ - 'profile.py', 'cProfile.py'] + 'profile.py', 'cProfile.py'] self._pool = [] for i in range(worker_count): worker_params = _WorkerParams( - i, self._task_queue, self._result_queue, - worker_class, initargs, - wrap_exception=wrap_exception, - is_profiling=is_profiling) + i, self._task_queue, self._result_queue, + worker_class, initargs, + is_profiling=is_profiling) w = multiprocessing.Process(target=worker_func, args=(worker_params,)) w.name = w.name.replace('Process', 'PoolWorker') @@ -239,66 +216,35 @@ self._pool.append(w) self._result_handler = threading.Thread( - target=WorkerPool._handleResults, - args=(self,)) + target=WorkerPool._handleResults, + args=(self,)) self._result_handler.daemon = True self._result_handler.start() self._closed = False - def setHandler(self, callback=None, error_callback=None): - self._callback = callback - self._error_callback = error_callback - - def queueJobs(self, jobs, handler=None, chunk_size=None): + def queueJobs(self, jobs): if self._closed: raise Exception("This worker pool has been closed.") - if self._listener is not None: - raise Exception("A previous job queue has not finished yet.") - if any([not p.is_alive() for p in self._pool]): - raise Exception("Some workers have prematurely exited.") - - if handler is not None: - self.setHandler(handler) - - if not hasattr(jobs, '__len__'): - jobs = list(jobs) - job_count = len(jobs) - - res = AsyncResult(self, job_count) - if res._count == 0: - res._event.set() - return res + for job in jobs: + self._jobs_left += 1 + self._quick_put((TASK_JOB, job)) - self._listener = res - - if chunk_size is None: - chunk_size = self._batch_size - if chunk_size is None: - chunk_size = max(1, job_count // 50) - logger.debug("Using chunk size of %d" % chunk_size) + if self._jobs_left > 0: + self._event.clear() - if chunk_size is None or chunk_size == 1: - for job in jobs: - self._quick_put((TASK_JOB, job)) - else: - it = iter(jobs) - while True: - batch = tuple([i for i in itertools.islice(it, chunk_size)]) - if not batch: - break - self._quick_put((TASK_BATCH, batch)) - - return res + def wait(self, timeout=None): + return self._event.wait(timeout) def close(self): - if self._listener is not None: + if self._jobs_left > 0 or not self._event.is_set(): raise Exception("A previous job queue has not finished yet.") logger.debug("Closing worker pool...") handler = _ReportHandler(len(self._pool)) self._callback = handler._handle + self._error_callback = handler._handleError for w in self._pool: self._quick_put((TASK_END, None)) for w in self._pool: @@ -308,8 +254,8 @@ if not handler.wait(2): missing = handler.reports.index(None) logger.warning( - "Didn't receive all worker reports before timeout. " - "Missing report from worker %d." % missing) + "Didn't receive all worker reports before timeout. " + "Missing report from worker %d." % missing) logger.debug("Exiting result handler thread...") self._result_queue.put(None) @@ -318,6 +264,11 @@ return handler.reports + def _onTaskDone(self): + self._jobs_left -= 1 + if self._jobs_left == 0: + self._event.set() + @staticmethod def _handleResults(pool): while True: @@ -332,44 +283,26 @@ logger.debug("Result handler exiting.") break - task_type, success, wid, data = res + task_type, task_data, success, wid, data = res try: - if success and pool._callback: - pool._callback(data) - elif not success: + if success: + if pool._callback: + pool._callback(task_data, data) + else: if pool._error_callback: - pool._error_callback(data) + pool._error_callback(task_data, data) else: - logger.error("Got error data:") + logger.error( + "Worker %d failed to process a job:" % wid) logger.error(data) except Exception as ex: logger.exception(ex) if task_type == TASK_JOB: - pool._listener._onTaskDone() + pool._onTaskDone() -class AsyncResult(object): - def __init__(self, pool, count): - self._pool = pool - self._count = count - self._event = threading.Event() - - def ready(self): - return self._event.is_set() - - def wait(self, timeout=None): - return self._event.wait(timeout) - - def _onTaskDone(self): - self._count -= 1 - if self._count == 0: - self._pool.setHandler(None) - self._pool._listener = None - self._event.set() - - -class _ReportHandler(object): +class _ReportHandler: def __init__(self, worker_count): self.reports = [None] * worker_count self._count = worker_count @@ -379,7 +312,7 @@ def wait(self, timeout=None): return self._event.wait(timeout) - def _handle(self, res): + def _handle(self, job, res): wid, data = res if wid < 0 or wid > self._count: logger.error("Ignoring report from unknown worker %d." % wid) @@ -391,13 +324,12 @@ if self._received == self._count: self._event.set() - def _handleError(self, res): - wid, data = res - logger.error("Worker %d failed to send its report." % wid) - logger.exception(data) + def _handleError(self, job, res): + logger.error("Worker %d failed to send its report." % res.wid) + logger.error(res) -class FastQueue(object): +class FastQueue: def __init__(self): self._reader, self._writer = multiprocessing.Pipe(duplex=False) self._rlock = multiprocessing.Lock() @@ -429,11 +361,11 @@ self._rbuf.write(e.args[0]) self._rbuf.seek(0) - return self._unpickle(self._rbuf, bufsize) + return _unpickle(self._rbuf, bufsize) def put(self, obj): self._wbuf.seek(0) - self._pickle(obj, self._wbuf) + _pickle(obj, self._wbuf) size = self._wbuf.tell() self._wbuf.seek(0) @@ -441,9 +373,27 @@ with self._wbuf.getbuffer() as b: self._writer.send_bytes(b, 0, size) - def _pickle(self, obj, buf): - fastpickle.pickle_intob(obj, buf) + +def _pickle_fast(obj, buf): + fastpickle.pickle_intob(obj, buf) + + +def _unpickle_fast(buf, bufsize): + return fastpickle.unpickle_fromb(buf, bufsize) + + +def _pickle_default(obj, buf): + pickle.dump(obj, buf) - def _unpickle(self, buf, bufsize): - return fastpickle.unpickle_fromb(buf, bufsize) + +def _unpickle_default(buf, bufsize): + return pickle.load(buf) + +if use_fastpickle: + _pickle = _pickle_fast + _unpickle = _unpickle_fast +else: + _pickle = _pickle_default + _unpickle = _unpickle_default +