Mercurial > piecrust2
view piecrust/sources/autoconfig.py @ 411:e7b865f8f335
bake: Enable multiprocess baking.
Baking is now done by running a worker per CPU, and sending jobs to them.
This changes several things across the codebase:
* Ability to not cache things related to pages other than the 'main' page
(i.e. the page at the bottom of the execution stack).
* Decouple the baking process from the bake records, so only the main process
keeps track (and modifies) the bake record.
* Remove the need for 'batch page getters' and loading a page directly from
the page factories.
There are various smaller changes too included here, including support for
scope performance timers that are saved with the bake record and can be
printed out to the console. Yes I got carried away.
For testing, the in-memory 'mock' file-system doesn't work anymore, since
we're spawning processes, so this is replaced by a 'tmpfs' file-system which
is saved in temporary files on disk and deleted after tests have run.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Fri, 12 Jun 2015 17:09:19 -0700 |
parents | dd25bd3ce1f9 |
children | 883a5544cd7f |
line wrap: on
line source
import re import os import os.path import logging from piecrust.configuration import ConfigurationError from piecrust.sources.base import ( PageSource, PageFactory, InvalidFileSystemEndpointError) from piecrust.sources.default import ( filter_page_dirname, filter_page_filename) from piecrust.sources.interfaces import IListableSource from piecrust.sources.mixins import SimplePaginationSourceMixin logger = logging.getLogger(__name__) class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin, IListableSource): """ Base class for page sources that automatically apply configuration settings to their generated pages based on those pages' paths. """ def __init__(self, app, name, config): super(AutoConfigSourceBase, self).__init__(app, name, config) self.fs_endpoint = config.get('fs_endpoint', name) self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) self.supported_extensions = list( app.config.get('site/auto_formats').keys()) self.default_auto_format = app.config.get('site/default_auto_format') self.capture_mode = config.get('capture_mode', 'path') if self.capture_mode not in ['path', 'dirname', 'filename']: raise ConfigurationError("Capture mode in source '%s' must be " "one of: path, dirname, filename" % name) def buildPageFactories(self): logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path) if not os.path.isdir(self.fs_endpoint_path): raise InvalidFileSystemEndpointError(self.name, self.fs_endpoint_path) for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path) dirnames[:] = list(filter(filter_page_dirname, dirnames)) # If `capture_mode` is `dirname`, we don't need to recompute it # for each filename, so we do it here. if self.capture_mode == 'dirname': config = self._extractConfigFragment(rel_dirpath) for f in filter(filter_page_filename, filenames): if self.capture_mode == 'path': path = os.path.join(rel_dirpath, f) config = self._extractConfigFragment(path) elif self.capture_mode == 'filename': config = self._extractConfigFragment(f) fac_path = f if rel_dirpath != '.': fac_path = os.path.join(rel_dirpath, f) slug = self._makeSlug(fac_path) metadata = { 'slug': slug, 'config': config} yield PageFactory(self, fac_path, metadata) def resolveRef(self, ref_path): path = os.path.normpath( os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/"))) config = None if self.capture_mode == 'dirname': config = self._extractConfigFragment(os.path.dirname(ref_path)) elif self.capture_mode == 'path': config = self._extractConfigFragment(ref_path) elif self.capture_mode == 'filename': config = self._extractConfigFragment(os.path.basename(ref_path)) slug = self._makeSlug(ref_path) metadata = {'slug': slug, 'config': config} return path, metadata def listPath(self, rel_path): raise NotImplementedError() def getDirpath(self, rel_path): return os.path.dirname(rel_path) def getBasename(self, rel_path): filename = os.path.basename(rel_path) name, _ = os.path.splitext(filename) return name def _makeSlug(self, rel_path): slug = rel_path.replace('\\', '/') slug = self._cleanSlug(slug) slug, ext = os.path.splitext(slug) if ext.lstrip('.') not in self.supported_extensions: slug += ext if slug.startswith('./'): slug = slug[2:] if slug == '_index': slug = '' return slug def _cleanSlug(self, slug): return slug def _extractConfigFragment(self, rel_path): raise NotImplementedError() class AutoConfigSource(AutoConfigSourceBase): """ Page source that extracts configuration settings from the sub-folders each page resides in. This is ideal for setting tags or categories on pages based on the folders they're in. """ SOURCE_NAME = 'autoconfig' def __init__(self, app, name, config): config['capture_mode'] = 'dirname' super(AutoConfigSource, self).__init__(app, name, config) self.setting_name = config.get('setting_name', name) self.only_single_values = config.get('only_single_values', False) self.collapse_single_values = config.get('collapse_single_values', False) self.supported_extensions = list( app.config.get('site/auto_formats').keys()) def _extractConfigFragment(self, rel_path): if rel_path == '.': values = [] else: values = rel_path.split(os.sep) if self.only_single_values: if len(values) > 1: raise Exception("Only one folder level is allowed for pages " "in source '%s'." % self.name) elif len(values) == 1: values = values[0] else: values = None if self.collapse_single_values: if len(values) == 1: values = values[0] elif len(values) == 0: values = None return {self.setting_name: values} def findPageFactory(self, metadata, mode): # Pages from this source are effectively flattened, so we need to # find pages using a brute-force kinda way. for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): for f in filenames: slug, _ = os.path.splitext(f) if slug == metadata['slug']: path = os.path.join(dirpath, f) rel_path = os.path.relpath(path, self.fs_endpoint_path) config = self._extractConfigFragment(rel_path) metadata = {'slug': slug, 'config': config} return PageFactory(self, rel_path, metadata) return None def listPath(self, rel_path): rel_path = rel_path.lstrip('\\/') path = os.path.join(self.fs_endpoint_path, rel_path) names = sorted(os.listdir(path)) items = [] for name in names: if os.path.isdir(os.path.join(path, name)): if filter_page_dirname(name): rel_subdir = os.path.join(rel_path, name) items.append((True, name, rel_subdir)) else: if filter_page_filename(name): cur_rel_path = os.path.join(rel_path, name) slug = self._makeSlug(cur_rel_path) config = self._extractConfigFragment(cur_rel_path) metadata = {'slug': slug, 'config': config} fac = PageFactory(self, cur_rel_path, metadata) name, _ = os.path.splitext(name) items.append((False, name, fac)) return items def _cleanSlug(self, slug): return os.path.basename(slug) class OrderedPageSource(AutoConfigSourceBase): """ A page source that assigns an "order" to its pages based on a numerical prefix in their filename. Page iterators will automatically sort pages using that order. """ SOURCE_NAME = 'ordered' re_pattern = re.compile(r'(^|[/\\])(?P<num>\d+)_') def __init__(self, app, name, config): config['capture_mode'] = 'path' super(OrderedPageSource, self).__init__(app, name, config) self.setting_name = config.get('setting_name', 'order') self.default_value = config.get('default_value', 0) self.supported_extensions = list( app.config.get('site/auto_formats').keys()) def findPageFactory(self, metadata, mode): uri_path = metadata.get('slug', '') if uri_path == '': uri_path = '_index' path = self.fs_endpoint_path uri_parts = uri_path.split('/') for i, p in enumerate(uri_parts): if i == len(uri_parts) - 1: # Last part, this is the filename. We need to check for either # the name, or the name with the prefix, but also handle a # possible extension. p_pat = r'(\d+_)?' + re.escape(p) _, ext = os.path.splitext(uri_path) if ext == '': p_pat += r'\.[\w\d]+' found = False for name in os.listdir(path): if re.match(p_pat, name): path = os.path.join(path, name) found = True break if not found: return None else: # Find each sub-directory. It can either be a directory with # the name itself, or the name with a number prefix. p_pat = r'(\d+_)?' + re.escape(p) found = False for name in os.listdir(path): if re.match(p_pat, name): path = os.path.join(path, name) found = True break if not found: return None fac_path = os.path.relpath(path, self.fs_endpoint_path) config = self._extractConfigFragment(fac_path) metadata = {'slug': uri_path, 'config': config} return PageFactory(self, fac_path, metadata) def getSorterIterator(self, it): accessor = self.getSettingAccessor() return OrderTrailSortIterator(it, self.setting_name + '_trail', value_accessor=accessor) def listPath(self, rel_path): rel_path = rel_path.lstrip('/') path = self.fs_endpoint_path if rel_path != '': parts = rel_path.split('/') for p in parts: p_pat = r'(\d+_)?' + re.escape(p) + '$' for name in os.listdir(path): if re.match(p_pat, name): path = os.path.join(path, name) break else: raise Exception("No such path: %s" % rel_path) items = [] names = sorted(os.listdir(path)) for name in names: clean_name = self.re_pattern.sub('', name) clean_name, _ = os.path.splitext(clean_name) if os.path.isdir(os.path.join(path, name)): if filter_page_dirname(name): rel_subdir = os.path.join(rel_path, name) items.append((True, clean_name, rel_subdir)) else: if filter_page_filename(name): slug = self._makeSlug(os.path.join(rel_path, name)) fac_path = name if rel_path != '.': fac_path = os.path.join(rel_path, name) fac_path = fac_path.replace('\\', '/') config = self._extractConfigFragment(fac_path) metadata = {'slug': slug, 'config': config} fac = PageFactory(self, fac_path, metadata) name, _ = os.path.splitext(name) items.append((False, clean_name, fac)) return items def _cleanSlug(self, slug): return self.re_pattern.sub(r'\1', slug) def _extractConfigFragment(self, rel_path): values = [] for m in self.re_pattern.finditer(rel_path): val = int(m.group('num')) values.append(val) if len(values) == 0: values.append(self.default_value) return { self.setting_name: values[-1], self.setting_name + '_trail': values} def _populateMetadata(self, rel_path, metadata, mode=None): _, filename = os.path.split(rel_path) config = self._extractConfigFragment(filename) metadata['config'] = config slug = metadata['slug'] metadata['slug'] = self.re_pattern.sub(r'\1', slug) class OrderTrailSortIterator(object): def __init__(self, it, trail_name, value_accessor): self.it = it self.trail_name = trail_name self.value_accessor = value_accessor def __iter__(self): return iter(sorted(self.it, key=self._key_getter)) def _key_getter(self, item): values = self.value_accessor(item, self.trail_name) key = ''.join(values) return key