comparison piecrust/sources/blogarchives.py @ 856:9bb22bbe093c

refactor: Make the blog archives functional again. The blog archives are using the same pattern as the taxonomy support.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 06 Jun 2017 01:23:25 -0700
parents 08e02c2a2a1a
children d231a10d18f9
comparison
equal deleted inserted replaced
855:448710d84121 856:9bb22bbe093c
1 import logging 1 import logging
2 import datetime 2 import datetime
3 from piecrust.chefutil import format_timed_scope
4 from piecrust.data.filters import PaginationFilter, IFilterClause 3 from piecrust.data.filters import PaginationFilter, IFilterClause
5 from piecrust.dataproviders.pageiterator import PageIterator 4 from piecrust.dataproviders.pageiterator import (
6 from piecrust.pipelines.base import ContentPipeline 5 PageIterator, HardCodedFilterIterator, DateSortIterator)
6 from piecrust.page import Page
7 from piecrust.pipelines._pagebaker import PageBaker
8 from piecrust.pipelines._pagerecords import PagePipelineRecordEntry
9 from piecrust.pipelines.base import (
10 ContentPipeline, get_record_name_for_source)
7 from piecrust.routing import RouteParameter 11 from piecrust.routing import RouteParameter
8 from piecrust.sources.base import ContentSource, GeneratedContentException 12 from piecrust.sources.base import ContentItem
13 from piecrust.sources.generator import GeneratorSourceBase
9 14
10 15
11 logger = logging.getLogger(__name__) 16 logger = logging.getLogger(__name__)
12 17
13 18
14 class BlogArchivesSource(ContentSource): 19 _year_index = """---
20 layout: %(template)s
21 ---
22 """
23
24
25 class BlogArchivesSource(GeneratorSourceBase):
15 SOURCE_NAME = 'blog_archives' 26 SOURCE_NAME = 'blog_archives'
16 DEFAULT_PIPELINE_NAME = 'blog_archives' 27 DEFAULT_PIPELINE_NAME = 'blog_archives'
17 28
18 def __init__(self, app, name, config): 29 def __init__(self, app, name, config):
19 super().__init__(app, name, config) 30 super().__init__(app, name, config)
20 31
21 def getContents(self, group): 32 tpl_name = config.get('template', '_year.html')
22 raise GeneratedContentException() 33 self._raw_item = _year_index % {'template': tpl_name}
34
35 def getSupportedRouteParameters(self):
36 return [RouteParameter('year', RouteParameter.TYPE_INT4)]
37
38 def findContent(self, route_params):
39 year = route_params['year']
40 spec = '_index[%04d]' % year
41 metadata = {'route_params': {'year': year}}
42 return ContentItem(spec, metadata)
23 43
24 def prepareRenderContext(self, ctx): 44 def prepareRenderContext(self, ctx):
25 ctx.pagination_source = self.source 45 ctx.pagination_source = self.inner_source
26 46
27 year = ctx.page.route_metadata.get('year') 47 route_params = ctx.page.source_metadata['route_params']
48 year = route_params.get('year')
28 if year is None: 49 if year is None:
29 raise Exception( 50 raise Exception(
30 "Can't find the archive year in the route metadata") 51 "Can't find the archive year in the route metadata")
31 if type(year) is not int: 52 if type(year) is not int:
32 raise Exception( 53 raise Exception(
39 60
40 ctx.custom_data['year'] = year 61 ctx.custom_data['year'] = year
41 62
42 flt2 = PaginationFilter() 63 flt2 = PaginationFilter()
43 flt2.addClause(IsFromYearFilterClause(year)) 64 flt2.addClause(IsFromYearFilterClause(year))
44 it = PageIterator(self.source, pagination_filter=flt2, 65 it = PageIterator(self.inner_source)
45 sorter=_date_sorter) 66 it._simpleNonSortedWrap(HardCodedFilterIterator, flt2)
67 it._wrapAsSort(DateSortIterator, reverse=False)
46 ctx.custom_data['archives'] = it 68 ctx.custom_data['archives'] = it
47
48 def bake(self, ctx):
49 if not self.page_ref.exists:
50 logger.debug(
51 "No page found at '%s', skipping %s archives." %
52 (self.page_ref, self.source_name))
53 return
54
55 logger.debug("Baking %s archives...", self.source_name)
56 with format_timed_scope(logger, 'gathered archive years',
57 level=logging.DEBUG, colored=False):
58 all_years, dirty_years = self._buildDirtyYears(ctx)
59
60 with format_timed_scope(logger, "baked %d %s archives." %
61 (len(dirty_years), self.source_name)):
62 self._bakeDirtyYears(ctx, all_years, dirty_years)
63
64 def _getSource(self):
65 return self.app.getSource(self.config['source'])
66
67 def _buildDirtyYears(self, ctx):
68 logger.debug("Gathering dirty post years.")
69 all_years = set()
70 dirty_years = set()
71 for _, cur_entry in ctx.getAllPageRecords():
72 if cur_entry and cur_entry.source_name == self.source_name:
73 dt = datetime.datetime.fromtimestamp(cur_entry.timestamp)
74 all_years.add(dt.year)
75 if cur_entry.was_any_sub_baked:
76 dirty_years.add(dt.year)
77 return all_years, dirty_years
78 69
79 def _bakeDirtyYears(self, ctx, all_years, dirty_years): 70 def _bakeDirtyYears(self, ctx, all_years, dirty_years):
80 route = self.app.getGeneratorRoute(self.name) 71 route = self.app.getGeneratorRoute(self.name)
81 if route is None: 72 if route is None:
82 raise Exception( 73 raise Exception(
91 82
92 logger.debug("Queuing: %s [%s]" % (fac.ref_spec, y)) 83 logger.debug("Queuing: %s [%s]" % (fac.ref_spec, y))
93 ctx.queueBakeJob(fac, route, extra_route_metadata, str(y)) 84 ctx.queueBakeJob(fac, route, extra_route_metadata, str(y))
94 ctx.runJobQueue() 85 ctx.runJobQueue()
95 86
96 # Create bake entries for the years that were *not* dirty.
97 # Otherwise, when checking for deleted pages, we would not find any
98 # outputs and would delete those files.
99 all_str_years = [str(y) for y in all_years]
100 for prev_entry, cur_entry in ctx.getAllPageRecords():
101 if prev_entry and not cur_entry:
102 try:
103 y = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key)
104 except InvalidRecordExtraKey:
105 continue
106 if y in all_str_years:
107 logger.debug(
108 "Creating unbaked entry for year %s archive." % y)
109 ctx.collapseRecord(prev_entry)
110 else:
111 logger.debug(
112 "No page references year %s anymore." % y)
113
114 def getSupportedRouteParameters(self):
115 return [RouteParameter('year', RouteParameter.TYPE_INT4)]
116
117 87
118 class IsFromYearFilterClause(IFilterClause): 88 class IsFromYearFilterClause(IFilterClause):
119 def __init__(self, year): 89 def __init__(self, year):
120 self.year = year 90 self.year = year
121 91
125 95
126 def _date_sorter(it): 96 def _date_sorter(it):
127 return sorted(it, key=lambda x: x.datetime) 97 return sorted(it, key=lambda x: x.datetime)
128 98
129 99
100 class BlogArchivesPipelineRecordEntry(PagePipelineRecordEntry):
101 def __init__(self):
102 super().__init__()
103 self.year = None
104
105
130 class BlogArchivesPipeline(ContentPipeline): 106 class BlogArchivesPipeline(ContentPipeline):
131 PIPELINE_NAME = 'blog_archives' 107 PIPELINE_NAME = 'blog_archives'
132 PASS_NUM = 1 108 PASS_NUM = 1
109 RECORD_ENTRY_CLASS = BlogArchivesPipelineRecordEntry
110
111 def __init__(self, source, ctx):
112 if not isinstance(source, BlogArchivesSource):
113 raise Exception("The blog archives pipeline only supports blog "
114 "archives content sources.")
115
116 super().__init__(source, ctx)
117 self.inner_source = source.inner_source
118 self._tpl_name = source.config['template']
119 self._all_years = None
120 self._dirty_years = None
121 self._pagebaker = None
122
123 def initialize(self):
124 self._pagebaker = PageBaker(self.app,
125 self.ctx.out_dir,
126 force=self.ctx.force)
127 self._pagebaker.startWriterQueue()
128
129 def shutdown(self):
130 self._pagebaker.stopWriterQueue()
131
132 def createJobs(self, ctx):
133 logger.debug("Building blog archives for: %s" %
134 self.inner_source.name)
135 self._buildDirtyYears(ctx)
136 logger.debug("Got %d dirty years out of %d." %
137 (len(self._dirty_years), len(self._all_years)))
138
139 jobs = []
140 for y in self._dirty_years:
141 item = ContentItem(
142 '_index[%04d]' % y,
143 {'route_params': {'year': y}})
144 jobs.append(self.createJob(item))
145 if len(jobs) > 0:
146 return jobs
147 return None
148
149 def run(self, job, ctx, result):
150 page = Page(self.source, job.content_item)
151 prev_entry = ctx.previous_entry
152 cur_entry = result.record_entry
153 cur_entry.year = job.content_item.metadata['route_params']['year']
154 self._pagebaker.bake(page, prev_entry, cur_entry, [])
155
156 def postJobRun(self, ctx):
157 # Create bake entries for the years that were *not* dirty.
158 # Otherwise, when checking for deleted pages, we would not find any
159 # outputs and would delete those files.
160 all_str_years = [str(y) for y in self._all_years]
161 for prev, cur in ctx.record_history.diffs:
162 if prev and not cur:
163 y = prev.year
164 if y in all_str_years:
165 logger.debug(
166 "Creating unbaked entry for year %s archive." % y)
167 cur.year = y
168 cur.out_paths = list(prev.out_paths)
169 cur.errors = list(prev.errors)
170 else:
171 logger.debug(
172 "No page references year %s anymore." % y)
173
174 def _buildDirtyYears(self, ctx):
175 all_years = set()
176 dirty_years = set()
177
178 record_name = get_record_name_for_source(self.inner_source)
179 current_records = ctx.record_histories.current
180 cur_rec = current_records.getRecord(record_name)
181 for cur_entry in cur_rec.getEntries():
182 dt = datetime.datetime.fromtimestamp(cur_entry.timestamp)
183 all_years.add(dt.year)
184 if cur_entry.was_any_sub_baked:
185 dirty_years.add(dt.year)
186
187 self._all_years = all_years
188 self._dirty_years = dirty_years
189