Mercurial > piecrust2
changeset 856:9bb22bbe093c
refactor: Make the blog archives functional again.
The blog archives are using the same pattern as the taxonomy support.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 06 Jun 2017 01:23:25 -0700 |
parents | 448710d84121 |
children | d231a10d18f9 |
files | piecrust/dataproviders/pageiterator.py piecrust/page.py piecrust/resources/theme/templates/_year.html piecrust/sources/blogarchives.py piecrust/sources/generator.py piecrust/sources/taxonomy.py |
diffstat | 6 files changed, 219 insertions(+), 127 deletions(-) [+] |
line wrap: on
line diff
--- a/piecrust/dataproviders/pageiterator.py Tue Jun 06 00:26:21 2017 -0700 +++ b/piecrust/dataproviders/pageiterator.py Tue Jun 06 01:23:25 2017 -0700 @@ -190,6 +190,13 @@ self._it = it_class(self._it, *args, **kwargs) return self + def _wrapAsSort(self, sort_it_class, *args, **kwargs): + self._ensureUnlocked() + self._ensureUnloaded() + self._it = sort_it_class(self._it, *args, **kwargs) + self._has_sorter = True + return self + def _lockIterator(self): self._ensureUnlocked() self._locked = True
--- a/piecrust/page.py Tue Jun 06 00:26:21 2017 -0700 +++ b/piecrust/page.py Tue Jun 06 01:23:25 2017 -0700 @@ -89,7 +89,8 @@ def datetime(self): if self._datetime is None: try: - self._datetime = self._computeDateTime() + self._datetime = _compute_datetime(self.source_metadata, + self.config) except Exception as ex: logger.exception(ex) raise Exception( @@ -113,38 +114,6 @@ def getSegment(self, name='content'): return self.segments[name] - def _computeDateTime(self): - if 'datetime' in self.source_metadata: - # Get the date/time from the source. - self._datetime = self.source_metadata['datetime'] - elif 'date' in self.source_metadata: - # Get the date from the source. Potentially get the - # time from the page config. - page_date = self.source_metadata['date'] - page_time = _parse_config_time(self.config.get('time')) - if page_time is not None: - self._datetime = datetime.datetime( - page_date.year, - page_date.month, - page_date.day) + page_time - else: - self._datetime = datetime.datetime( - page_date.year, page_date.month, page_date.day) - elif 'date' in self.config: - # Get the date from the page config, and maybe the - # time too. - page_date = _parse_config_date(self.config.get('date')) - self._datetime = datetime.datetime( - page_date.year, - page_date.month, - page_date.day) - page_time = _parse_config_time(self.config.get('time')) - if page_time is not None: - self._datetime += page_time - else: - # No idea what the date/time for this page is. - self._datetime = datetime.datetime.fromtimestamp(0) - def _load(self): if self._config is not None: return @@ -166,6 +135,42 @@ self._flags |= FLAG_RAW_CACHE_VALID +def _compute_datetime(source_metadata, config): + # Get the date/time from the source. + dt = source_metadata.get('datetime') + if dt is not None: + return dt + + # Get the date from the source. Potentially get the + # time from the page config. + page_date = source_metadata.get('date') + if page_date is not None: + dt = datetime.datetime( + page_date.year, page_date.month, page_date.day) + + page_time = _parse_config_time(config.get('time')) + if page_time is not None: + dt += page_time + + return dt + + # Get the date from the page config, and maybe the + # time too. + page_date = _parse_config_date(config.get('date')) + if page_date is not None: + dt = datetime.datetime( + page_date.year, page_date.month, page_date.day) + + page_time = _parse_config_time(config.get('time')) + if page_time is not None: + dt += page_time + + return dt + + # No idea what the date/time for this page is. + return datetime.datetime.fromtimestamp(0) + + def _parse_config_date(page_date): if page_date is None: return None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/resources/theme/templates/_year.html Tue Jun 06 01:23:25 2017 -0700 @@ -0,0 +1,15 @@ +{% extends "default.html" %} + +{% block main %} +<h2>Posts in {{ year }}</h2> + +<section> + {% for post in pagination.posts %} + <p><em>{{post.timestamp|date('%d %B')}}</em> – <a href="{{ post.url }}">{{ post.title }}</a></p> + {% endfor %} +</section> +<section> + {% if pagination.prev_page %}<div class="prev"><a href="{{ pagination.prev_page }}">Next Posts</a></div>{% endif %} + {% if pagination.next_page %}<div class="next"><a href="{{ pagination.next_page }}">Previous Posts</a></div>{% endif %} +</section> +{% endblock %}
--- a/piecrust/sources/blogarchives.py Tue Jun 06 00:26:21 2017 -0700 +++ b/piecrust/sources/blogarchives.py Tue Jun 06 01:23:25 2017 -0700 @@ -1,30 +1,51 @@ import logging import datetime -from piecrust.chefutil import format_timed_scope from piecrust.data.filters import PaginationFilter, IFilterClause -from piecrust.dataproviders.pageiterator import PageIterator -from piecrust.pipelines.base import ContentPipeline +from piecrust.dataproviders.pageiterator import ( + PageIterator, HardCodedFilterIterator, DateSortIterator) +from piecrust.page import Page +from piecrust.pipelines._pagebaker import PageBaker +from piecrust.pipelines._pagerecords import PagePipelineRecordEntry +from piecrust.pipelines.base import ( + ContentPipeline, get_record_name_for_source) from piecrust.routing import RouteParameter -from piecrust.sources.base import ContentSource, GeneratedContentException +from piecrust.sources.base import ContentItem +from piecrust.sources.generator import GeneratorSourceBase logger = logging.getLogger(__name__) -class BlogArchivesSource(ContentSource): +_year_index = """--- +layout: %(template)s +--- +""" + + +class BlogArchivesSource(GeneratorSourceBase): SOURCE_NAME = 'blog_archives' DEFAULT_PIPELINE_NAME = 'blog_archives' def __init__(self, app, name, config): super().__init__(app, name, config) - def getContents(self, group): - raise GeneratedContentException() + tpl_name = config.get('template', '_year.html') + self._raw_item = _year_index % {'template': tpl_name} + + def getSupportedRouteParameters(self): + return [RouteParameter('year', RouteParameter.TYPE_INT4)] + + def findContent(self, route_params): + year = route_params['year'] + spec = '_index[%04d]' % year + metadata = {'route_params': {'year': year}} + return ContentItem(spec, metadata) def prepareRenderContext(self, ctx): - ctx.pagination_source = self.source + ctx.pagination_source = self.inner_source - year = ctx.page.route_metadata.get('year') + route_params = ctx.page.source_metadata['route_params'] + year = route_params.get('year') if year is None: raise Exception( "Can't find the archive year in the route metadata") @@ -41,41 +62,11 @@ flt2 = PaginationFilter() flt2.addClause(IsFromYearFilterClause(year)) - it = PageIterator(self.source, pagination_filter=flt2, - sorter=_date_sorter) + it = PageIterator(self.inner_source) + it._simpleNonSortedWrap(HardCodedFilterIterator, flt2) + it._wrapAsSort(DateSortIterator, reverse=False) ctx.custom_data['archives'] = it - def bake(self, ctx): - if not self.page_ref.exists: - logger.debug( - "No page found at '%s', skipping %s archives." % - (self.page_ref, self.source_name)) - return - - logger.debug("Baking %s archives...", self.source_name) - with format_timed_scope(logger, 'gathered archive years', - level=logging.DEBUG, colored=False): - all_years, dirty_years = self._buildDirtyYears(ctx) - - with format_timed_scope(logger, "baked %d %s archives." % - (len(dirty_years), self.source_name)): - self._bakeDirtyYears(ctx, all_years, dirty_years) - - def _getSource(self): - return self.app.getSource(self.config['source']) - - def _buildDirtyYears(self, ctx): - logger.debug("Gathering dirty post years.") - all_years = set() - dirty_years = set() - for _, cur_entry in ctx.getAllPageRecords(): - if cur_entry and cur_entry.source_name == self.source_name: - dt = datetime.datetime.fromtimestamp(cur_entry.timestamp) - all_years.add(dt.year) - if cur_entry.was_any_sub_baked: - dirty_years.add(dt.year) - return all_years, dirty_years - def _bakeDirtyYears(self, ctx, all_years, dirty_years): route = self.app.getGeneratorRoute(self.name) if route is None: @@ -93,27 +84,6 @@ ctx.queueBakeJob(fac, route, extra_route_metadata, str(y)) ctx.runJobQueue() - # Create bake entries for the years that were *not* dirty. - # Otherwise, when checking for deleted pages, we would not find any - # outputs and would delete those files. - all_str_years = [str(y) for y in all_years] - for prev_entry, cur_entry in ctx.getAllPageRecords(): - if prev_entry and not cur_entry: - try: - y = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) - except InvalidRecordExtraKey: - continue - if y in all_str_years: - logger.debug( - "Creating unbaked entry for year %s archive." % y) - ctx.collapseRecord(prev_entry) - else: - logger.debug( - "No page references year %s anymore." % y) - - def getSupportedRouteParameters(self): - return [RouteParameter('year', RouteParameter.TYPE_INT4)] - class IsFromYearFilterClause(IFilterClause): def __init__(self, year): @@ -127,6 +97,93 @@ return sorted(it, key=lambda x: x.datetime) +class BlogArchivesPipelineRecordEntry(PagePipelineRecordEntry): + def __init__(self): + super().__init__() + self.year = None + + class BlogArchivesPipeline(ContentPipeline): PIPELINE_NAME = 'blog_archives' PASS_NUM = 1 + RECORD_ENTRY_CLASS = BlogArchivesPipelineRecordEntry + + def __init__(self, source, ctx): + if not isinstance(source, BlogArchivesSource): + raise Exception("The blog archives pipeline only supports blog " + "archives content sources.") + + super().__init__(source, ctx) + self.inner_source = source.inner_source + self._tpl_name = source.config['template'] + self._all_years = None + self._dirty_years = None + self._pagebaker = None + + def initialize(self): + self._pagebaker = PageBaker(self.app, + self.ctx.out_dir, + force=self.ctx.force) + self._pagebaker.startWriterQueue() + + def shutdown(self): + self._pagebaker.stopWriterQueue() + + def createJobs(self, ctx): + logger.debug("Building blog archives for: %s" % + self.inner_source.name) + self._buildDirtyYears(ctx) + logger.debug("Got %d dirty years out of %d." % + (len(self._dirty_years), len(self._all_years))) + + jobs = [] + for y in self._dirty_years: + item = ContentItem( + '_index[%04d]' % y, + {'route_params': {'year': y}}) + jobs.append(self.createJob(item)) + if len(jobs) > 0: + return jobs + return None + + def run(self, job, ctx, result): + page = Page(self.source, job.content_item) + prev_entry = ctx.previous_entry + cur_entry = result.record_entry + cur_entry.year = job.content_item.metadata['route_params']['year'] + self._pagebaker.bake(page, prev_entry, cur_entry, []) + + def postJobRun(self, ctx): + # Create bake entries for the years that were *not* dirty. + # Otherwise, when checking for deleted pages, we would not find any + # outputs and would delete those files. + all_str_years = [str(y) for y in self._all_years] + for prev, cur in ctx.record_history.diffs: + if prev and not cur: + y = prev.year + if y in all_str_years: + logger.debug( + "Creating unbaked entry for year %s archive." % y) + cur.year = y + cur.out_paths = list(prev.out_paths) + cur.errors = list(prev.errors) + else: + logger.debug( + "No page references year %s anymore." % y) + + def _buildDirtyYears(self, ctx): + all_years = set() + dirty_years = set() + + record_name = get_record_name_for_source(self.inner_source) + current_records = ctx.record_histories.current + cur_rec = current_records.getRecord(record_name) + for cur_entry in cur_rec.getEntries(): + dt = datetime.datetime.fromtimestamp(cur_entry.timestamp) + all_years.add(dt.year) + if cur_entry.was_any_sub_baked: + dirty_years.add(dt.year) + + self._all_years = all_years + self._dirty_years = dirty_years +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/sources/generator.py Tue Jun 06 01:23:25 2017 -0700 @@ -0,0 +1,35 @@ +import io +import time +from werkzeug.utils import cached_property +from piecrust.configuration import ConfigurationError +from piecrust.sources.base import ContentSource, GeneratedContentException + + +class GeneratorSourceBase(ContentSource): + def __init__(self, app, name, config): + super().__init__(app, name, config) + + source_name = config.get('source') + if source_name is None: + raise ConfigurationError( + "Taxonomy source '%s' requires an inner source." % name) + self._inner_source_name = source_name + + self._raw_item = '' + + @cached_property + def inner_source(self): + return self.app.getSource(self._inner_source_name) + + def getContents(self, group): + # Our content is procedurally generated from other content sources, + # so we really don't support listing anything here -- it would be + # typically quite costly. + raise GeneratedContentException() + + def openItem(self, item, mode='r', **kwargs): + return io.StringIO(self._raw_item) + + def getItemMtime(self, item): + return time.time() +
--- a/piecrust/sources/taxonomy.py Tue Jun 06 00:26:21 2017 -0700 +++ b/piecrust/sources/taxonomy.py Tue Jun 06 01:23:25 2017 -0700 @@ -1,9 +1,6 @@ -import io import re -import time import logging import unidecode -from werkzeug.utils import cached_property from piecrust.configuration import ConfigurationError from piecrust.data.filters import ( PaginationFilter, SettingFilterClause) @@ -14,8 +11,8 @@ ContentPipeline, get_record_name_for_source) from piecrust.pipelines.records import RecordHistory from piecrust.routing import RouteParameter -from piecrust.sources.base import ( - ContentItem, ContentSource, GeneratedContentException) +from piecrust.sources.base import ContentItem +from piecrust.sources.generator import GeneratorSourceBase logger = logging.getLogger(__name__) @@ -57,7 +54,7 @@ """ -class TaxonomySource(ContentSource): +class TaxonomySource(GeneratorSourceBase): """ A content source that generates taxonomy listing pages. """ SOURCE_NAME = 'taxonomy' @@ -66,12 +63,6 @@ def __init__(self, app, name, config): super().__init__(app, name, config) - source_name = config.get('source') - if source_name is None: - raise ConfigurationError( - "Taxonomy source '%s' requires an inner source." % name) - self._inner_source_name = source_name - tax_name = config.get('taxonomy') if tax_name is None: raise ConfigurationError( @@ -84,25 +75,6 @@ tpl_name = config.get('template', '_%s.html' % tax_name) self._raw_item = _taxonomy_index % {'template': tpl_name} - @cached_property - def inner_source(self): - return self.app.getSource(self._inner_source_name) - - def openItem(self, item, mode='r', **kwargs): - return io.StringIO(self._raw_item) - - def getItemMtime(self, item): - return time.time() - - def getContents(self, group): - # Our content is procedurally generated from other content sources, - # so we really don't support listing anything here -- it would be - # quite costly. - # - # Instead, our pipeline (the `TaxonomyPipeline`) will generate - # content items for us when it is asked to produce bake jobs. - raise GeneratedContentException() - def getSupportedRouteParameters(self): name = self.taxonomy.term_name param_type = (RouteParameter.TYPE_PATH if self.taxonomy.is_multiple @@ -321,6 +293,7 @@ page = Page(self.source, job.content_item) prev_entry = ctx.previous_entry cur_entry = result.record_entry + cur_entry.term = content_item.metadata['term'] self._pagebaker.bake(page, prev_entry, cur_entry, []) def postJobRun(self, ctx):