view piecrust/sources/autoconfig.py @ 281:0641fe5c3ef9

serve: Don't crash when a post URL doesn't match our expectations. It can happen that a posts source's URL pattern matches that of something else, like a taxonomy page. In this case, don't crash when what we think will be integers turn out to be something else. Instead, just return that no page has been found.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 01 Mar 2015 21:41:05 -0800
parents f512905ae812
children a2d283d1033d
line wrap: on
line source

import re
import os
import os.path
import logging
from piecrust.configuration import ConfigurationError
from piecrust.sources.base import (
        PageSource, PageFactory, InvalidFileSystemEndpointError)
from piecrust.sources.default import (
        filter_page_dirname, filter_page_filename)
from piecrust.sources.interfaces import IListableSource
from piecrust.sources.mixins import SimplePaginationSourceMixin


logger = logging.getLogger(__name__)


class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin,
                           IListableSource):
    """ Base class for page sources that automatically apply configuration
        settings to their generated pages based on those pages' paths.
    """
    def __init__(self, app, name, config):
        super(AutoConfigSourceBase, self).__init__(app, name, config)
        self.fs_endpoint = config.get('fs_endpoint', name)
        self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
        self.supported_extensions = list(
                app.config.get('site/auto_formats').keys())
        self.default_auto_format = app.config.get('site/default_auto_format')

        self.capture_mode = config.get('capture_mode', 'path')
        if self.capture_mode not in ['path', 'dirname', 'filename']:
            raise ConfigurationError("Capture mode in source '%s' must be "
                                     "one of: path, dirname, filename" %
                                     name)

    def buildPageFactories(self):
        logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path)
        if not os.path.isdir(self.fs_endpoint_path):
            raise InvalidFileSystemEndpointError(self.name,
                                                 self.fs_endpoint_path)

        for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
            rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path)
            dirnames[:] = list(filter(filter_page_dirname, dirnames))

            # If `capture_mode` is `dirname`, we don't need to recompute it
            # for each filename, so we do it here.
            if self.capture_mode == 'dirname':
                config = self._extractConfigFragment(rel_dirpath)

            for f in filter(filter_page_filename, filenames):
                if self.capture_mode == 'path':
                    path = os.path.join(rel_dirpath, f)
                    config = self._extractConfigFragment(path)
                elif self.capture_mode == 'filename':
                    config = self._extractConfigFragment(f)

                fac_path = f
                if rel_dirpath != '.':
                    fac_path = os.path.join(rel_dirpath, f)

                slug = self._makeSlug(fac_path)

                metadata = {
                        'slug': slug,
                        'config': config}
                yield PageFactory(self, fac_path, metadata)

    def resolveRef(self, ref_path):
        return os.path.normpath(
                os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/")))

    def listPath(self, rel_path):
        raise NotImplementedError()

    def getDirpath(self, rel_path):
        return os.path.dirname(rel_path)

    def getBasename(self, rel_path):
        filename = os.path.basename(rel_path)
        name, _ = os.path.splitext(filename)
        return name

    def _makeSlug(self, rel_path):
        slug = rel_path.replace('\\', '/')
        slug = self._cleanSlug(slug)
        slug, ext = os.path.splitext(slug)
        if ext.lstrip('.') not in self.supported_extensions:
            slug += ext
        if slug.startswith('./'):
            slug = slug[2:]
        if slug == '_index':
            slug = ''
        return slug

    def _cleanSlug(self, slug):
        return slug

    def _extractConfigFragment(self, rel_path):
        raise NotImplementedError()


class AutoConfigSource(AutoConfigSourceBase):
    """ Page source that extracts configuration settings from the sub-folders
        each page resides in. This is ideal for setting tags or categories
        on pages based on the folders they're in.
    """
    SOURCE_NAME = 'autoconfig'

    def __init__(self, app, name, config):
        config['capture_mode'] = 'dirname'
        super(AutoConfigSource, self).__init__(app, name, config)
        self.setting_name = config.get('setting_name', name)
        self.only_single_values = config.get('only_single_values', False)
        self.collapse_single_values = config.get('collapse_single_values',
                                                 False)
        self.supported_extensions = list(
                app.config.get('site/auto_formats').keys())

    def _extractConfigFragment(self, rel_path):
        if rel_path == '.':
            values = []
        else:
            values = rel_path.split(os.sep)

        if self.only_single_values:
            if len(values) > 1:
                raise Exception("Only one folder level is allowed for pages "
                                "in source '%s'." % self.name)
            elif len(values) == 1:
                values = values[0]
            else:
                values = None

        if self.collapse_single_values:
            if len(values) == 1:
                values = values[0]
            elif len(values) == 0:
                values = None

        return {self.setting_name: values}

    def findPagePath(self, metadata, mode):
        # Pages from this source are effectively flattened, so we need to
        # find pages using a brute-force kinda way.
        for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
            for f in filenames:
                slug, _ = os.path.splitext(f)
                if slug == metadata['slug']:
                    path = os.path.join(dirpath, f)
                    rel_path = os.path.relpath(path, self.fs_endpoint_path)
                    config = self._extractConfigFragment(rel_path)
                    metadata = {'slug': slug, 'config': config}
                    return rel_path, metadata

    def listPath(self, rel_path):
        rel_path = rel_path.lstrip('\\/')
        path = os.path.join(self.fs_endpoint_path, rel_path)
        names = sorted(os.listdir(path))
        items = []
        for name in names:
            if os.path.isdir(os.path.join(path, name)):
                if filter_page_dirname(name):
                    rel_subdir = os.path.join(rel_path, name)
                    items.append((True, name, rel_subdir))
            else:
                if filter_page_filename(name):
                    cur_rel_path = os.path.join(rel_path, name)
                    slug = self._makeSlug(cur_rel_path)
                    config = self._extractConfigFragment(cur_rel_path)
                    metadata = {'slug': slug, 'config': config}
                    fac = PageFactory(self, cur_rel_path, metadata)

                    name, _ = os.path.splitext(name)
                    items.append((False, name, fac))
        return items

    def _cleanSlug(self, slug):
        return os.path.basename(slug)


class OrderedPageSource(AutoConfigSourceBase):
    """ A page source that assigns an "order" to its pages based on a
        numerical prefix in their filename. Page iterators will automatically
        sort pages using that order.
    """
    SOURCE_NAME = 'ordered'

    re_pattern = re.compile(r'(^|/)(?P<num>\d+)_')

    def __init__(self, app, name, config):
        config['capture_mode'] = 'path'
        super(OrderedPageSource, self).__init__(app, name, config)
        self.setting_name = config.get('setting_name', 'order')
        self.default_value = config.get('default_value', 0)
        self.supported_extensions = list(
                app.config.get('site/auto_formats').keys())

    def findPagePath(self, metadata, mode):
        uri_path = metadata.get('slug', '')
        if uri_path == '':
            uri_path = '_index'

        path = self.fs_endpoint_path
        uri_parts = uri_path.split('/')
        for i, p in enumerate(uri_parts):
            if i == len(uri_parts) - 1:
                # Last part, this is the filename. We need to check for either
                # the name, or the name with the prefix, but also handle a
                # possible extension.
                p_pat = r'(\d+_)?' + re.escape(p)

                _, ext = os.path.splitext(uri_path)
                if ext == '':
                    p_pat += r'\.[\w\d]+'

                found = False
                for name in os.listdir(path):
                    if re.match(p_pat, name):
                        path = os.path.join(path, name)
                        found = True
                        break
                if not found:
                    return None, None
            else:
                # Find each sub-directory. It can either be a directory with
                # the name itself, or the name with a number prefix.
                p_pat = r'(\d+_)?' + re.escape(p)
                found = False
                for name in os.listdir(path):
                    if re.match(p_pat, name):
                        path = os.path.join(path, name)
                        found = True
                        break
                if not found:
                    return None, None

        fac_path = os.path.relpath(path, self.fs_endpoint_path)
        config = self._extractConfigFragment(fac_path)
        metadata = {'slug': uri_path, 'config': config}

        return fac_path, metadata

    def getSorterIterator(self, it):
        accessor = self.getSettingAccessor()
        return OrderTrailSortIterator(it, self.setting_name + '_trail',
                                      value_accessor=accessor)

    def listPath(self, rel_path):
        rel_path = rel_path.lstrip('/')
        path = self.fs_endpoint_path
        if rel_path != '':
            parts = rel_path.split('/')
            for p in parts:
                p_pat = r'(\d+_)?' + re.escape(p)
                for name in os.listdir(path):
                    if re.match(p_pat, name):
                        path = os.path.join(path, name)
                        break
                else:
                    raise Exception("No such path: %s" % rel_path)

        items = []
        names = sorted(os.listdir(path))
        for name in names:
            clean_name = self.re_pattern.sub('', name)
            clean_name, _ = os.path.splitext(clean_name)
            if os.path.isdir(os.path.join(path, name)):
                if filter_page_dirname(name):
                    rel_subdir = os.path.join(rel_path, name)
                    items.append((True, clean_name, rel_subdir))
            else:
                if filter_page_filename(name):
                    slug = self._makeSlug(os.path.join(rel_path, name))

                    fac_path = name
                    if rel_path != '.':
                        fac_path = os.path.join(rel_path, name)
                    fac_path = fac_path.replace('\\', '/')

                    config = self._extractConfigFragment(fac_path)
                    metadata = {'slug': slug, 'config': config}
                    fac = PageFactory(self, fac_path, metadata)

                    name, _ = os.path.splitext(name)
                    items.append((False, clean_name, fac))
        return items

    def _cleanSlug(self, slug):
        return self.re_pattern.sub(r'\1', slug)

    def _extractConfigFragment(self, rel_path):
        values = []
        for m in self.re_pattern.finditer(rel_path):
            val = int(m.group('num'))
            values.append(val)

        if len(values) == 0:
            values.append(self.default_value)

        return {
                self.setting_name: values[-1],
                self.setting_name + '_trail': values}

    def _populateMetadata(self, rel_path, metadata, mode=None):
        _, filename = os.path.split(rel_path)
        config = self._extractConfigFragment(filename)
        metadata['config'] = config
        slug = metadata['slug']
        metadata['slug'] = self.re_pattern.sub(r'\1', slug)


class OrderTrailSortIterator(object):
    def __init__(self, it, trail_name, value_accessor):
        self.it = it
        self.trail_name = trail_name
        self.value_accessor = value_accessor

    def __iter__(self):
        return iter(sorted(self.it, key=self._key_getter))

    def _key_getter(self, item):
        values = self.value_accessor(item, self.trail_name)
        key = ''.join(values)
        return key