Mercurial > piecrust2
comparison piecrust/generation/taxonomy.py @ 711:ab5c6a8ae90a
bake: Replace hard-coded taxonomy support with "generator" system.
* Taxonomies are now implemented one or more `TaxonomyGenerator`s.
* A `BlogArchivesGenerator` stub is there but non-functional.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Thu, 26 May 2016 19:52:47 -0700 |
parents | |
children | 3e188d88a9ac |
comparison
equal
deleted
inserted
replaced
710:e85f29b28b84 | 711:ab5c6a8ae90a |
---|---|
1 import re | |
2 import time | |
3 import logging | |
4 import unidecode | |
5 from piecrust.chefutil import format_timed, format_timed_scope | |
6 from piecrust.configuration import ConfigurationError | |
7 from piecrust.data.filters import ( | |
8 PaginationFilter, SettingFilterClause, | |
9 page_value_accessor) | |
10 from piecrust.generation.base import PageGenerator, InvalidRecordExtraKey | |
11 from piecrust.sources.pageref import PageRef, PageNotFoundError | |
12 | |
13 | |
14 logger = logging.getLogger(__name__) | |
15 | |
16 | |
17 SLUGIFY_ENCODE = 1 | |
18 SLUGIFY_TRANSLITERATE = 2 | |
19 SLUGIFY_LOWERCASE = 4 | |
20 SLUGIFY_DOT_TO_DASH = 8 | |
21 SLUGIFY_SPACE_TO_DASH = 16 | |
22 | |
23 | |
24 re_first_dot_to_dash = re.compile(r'^\.+') | |
25 re_dot_to_dash = re.compile(r'\.+') | |
26 re_space_to_dash = re.compile(r'\s+') | |
27 | |
28 | |
29 class Taxonomy(object): | |
30 def __init__(self, name, config): | |
31 self.name = name | |
32 self.config = config | |
33 self.term_name = config.get('term', name) | |
34 self.is_multiple = bool(config.get('multiple', False)) | |
35 self.separator = config.get('separator', '/') | |
36 self.page_ref = config.get('page') | |
37 self._source_page_refs = {} | |
38 | |
39 @property | |
40 def setting_name(self): | |
41 if self.is_multiple: | |
42 return self.name | |
43 return self.term_name | |
44 | |
45 | |
46 class TaxonomyPageGenerator(PageGenerator): | |
47 GENERATOR_NAME = 'taxonomy' | |
48 | |
49 def __init__(self, app, name, config): | |
50 super(TaxonomyPageGenerator, self).__init__(app, name, config) | |
51 | |
52 tax_name = config.get('taxonomy') | |
53 if tax_name is None: | |
54 raise ConfigurationError( | |
55 "Generator '%s' requires a taxonomy name." % name) | |
56 tax_config = app.config.get('site/taxonomies/' + tax_name) | |
57 if tax_config is None: | |
58 raise ConfigurationError( | |
59 "Error initializing generator '%s', no such taxonomy: %s", | |
60 (name, tax_name)) | |
61 self.taxonomy = Taxonomy(tax_name, tax_config) | |
62 | |
63 sm = config.get('slugify_mode') | |
64 if not sm: | |
65 sm = app.config.get('site/slugify_mode', 'encode') | |
66 self.slugify_mode = _parse_slugify_mode(sm) | |
67 | |
68 @property | |
69 def page_ref_path(self): | |
70 try: | |
71 return self.page_ref.path | |
72 except PageNotFoundError: | |
73 return None | |
74 | |
75 def getPageFactory(self, route_metadata): | |
76 # This will raise `PageNotFoundError` naturally if not found. | |
77 return self.page_ref.getFactory() | |
78 | |
79 def prepareRenderContext(self, ctx): | |
80 tax_terms, is_combination = self._getTaxonomyTerms( | |
81 ctx.page.route_metadata) | |
82 self._setTaxonomyFilter(ctx, tax_terms, is_combination) | |
83 | |
84 ctx.custom_data = { | |
85 self.taxonomy.term_name: tax_terms, | |
86 'is_multiple_%s' % self.taxonomy.term_name: is_combination} | |
87 logger.debug("Prepared render context with: %s" % ctx.custom_data) | |
88 | |
89 def _getTaxonomyTerms(self, route_metadata): | |
90 all_values = route_metadata.get(self.taxonomy.term_name) | |
91 if all_values is None: | |
92 raise Exception("'%s' values couldn't be found in route metadata" % | |
93 self.taxonomy.term_name) | |
94 | |
95 if self.taxonomy.is_multiple: | |
96 sep = self.taxonomy.separator | |
97 if sep in all_values: | |
98 return tuple(all_values.split(sep)), True | |
99 return all_values, False | |
100 | |
101 def _setTaxonomyFilter(self, ctx, term_value, is_combination): | |
102 flt = PaginationFilter(value_accessor=page_value_accessor) | |
103 flt.addClause(HasTaxonomyTermsFilterClause( | |
104 self.taxonomy, self.slugify_mode, term_value, is_combination)) | |
105 ctx.pagination_filter = flt | |
106 | |
107 def onRouteFunctionUsed(self, route, route_metadata): | |
108 # Get the values. | |
109 values = route_metadata[self.taxonomy.term_name] | |
110 if self.taxonomy.is_multiple: | |
111 #TODO: here we assume the route has been properly configured. | |
112 values = tuple([str(v) for v in values]) | |
113 else: | |
114 values = (str(values),) | |
115 | |
116 # We need to register this use of a taxonomy term. | |
117 eis = self.app.env.exec_info_stack | |
118 cpi = eis.current_page_info.render_ctx.current_pass_info | |
119 if cpi: | |
120 utt = cpi.getCustomInfo('used_taxonomy_terms', [], True) | |
121 utt.append(values) | |
122 | |
123 # We need to slugify the terms before they get transformed | |
124 # into URL-bits. | |
125 s = _Slugifier(self.taxonomy, self.slugify_mode) | |
126 str_values = s.slugify(values) | |
127 route_metadata[self.taxonomy.term_name] = str_values | |
128 logger.debug("Changed route metadata to: %s" % route_metadata) | |
129 | |
130 def bake(self, ctx): | |
131 logger.debug("Baking taxonomy pages...") | |
132 with format_timed_scope(logger, 'gathered taxonomy terms', | |
133 level=logging.DEBUG, colored=False): | |
134 all_terms, dirty_terms = self._buildDirtyTaxonomyTerms(ctx) | |
135 | |
136 start_time = time.perf_counter() | |
137 page_count = self._bakeTaxonomyTerms(ctx, all_terms, dirty_terms) | |
138 logger.info(format_timed(start_time, | |
139 "baked %d taxonomy pages." % page_count)) | |
140 | |
141 def _buildDirtyTaxonomyTerms(self, ctx): | |
142 # Build the list of terms for our taxonomy, and figure out which ones | |
143 # are 'dirty' for the current bake. | |
144 logger.debug("Gathering dirty taxonomy terms") | |
145 all_terms = set() | |
146 dirty_terms = set() | |
147 | |
148 # Re-bake all taxonomy terms that include new or changed pages. | |
149 for prev_entry, cur_entry in ctx.getBakedPageRecords(): | |
150 entries = [cur_entry] | |
151 if prev_entry: | |
152 entries.append(prev_entry) | |
153 | |
154 terms = [] | |
155 for e in entries: | |
156 entry_terms = e.config.get(self.taxonomy.setting_name) | |
157 if entry_terms: | |
158 if not self.taxonomy.is_multiple: | |
159 terms.append(entry_terms) | |
160 else: | |
161 terms += entry_terms | |
162 if terms: | |
163 dirty_terms.update([(t,) for t in terms]) | |
164 | |
165 # Remember all terms used. | |
166 for _, cur_entry in ctx.getAllPageRecords(): | |
167 if cur_entry and not cur_entry.was_overriden: | |
168 cur_terms = cur_entry.config.get(self.taxonomy.setting_name) | |
169 if cur_terms: | |
170 if not self.taxonomy.is_multiple: | |
171 all_terms.add(cur_terms) | |
172 else: | |
173 all_terms |= set(cur_terms) | |
174 | |
175 # Re-bake the combination pages for terms that are 'dirty'. | |
176 if self.taxonomy.is_multiple: | |
177 known_combinations = set() | |
178 logger.debug("Gathering dirty term combinations") | |
179 for _, cur_entry in ctx.getAllPageRecords(): | |
180 if cur_entry: | |
181 used_terms = _get_all_entry_taxonomy_terms(cur_entry) | |
182 for terms in used_terms: | |
183 if len(terms) > 1: | |
184 known_combinations.add(terms) | |
185 | |
186 for terms in known_combinations: | |
187 if not dirty_terms.isdisjoint(set(terms)): | |
188 dirty_terms.add(terms) | |
189 | |
190 return all_terms, dirty_terms | |
191 | |
192 def _bakeTaxonomyTerms(self, ctx, all_terms, dirty_terms): | |
193 # Start baking those terms. | |
194 logger.debug( | |
195 "Baking '%s' for source '%s': %s" % | |
196 (self.taxonomy.name, self.source_name, dirty_terms)) | |
197 | |
198 if not self.page_ref.exists: | |
199 logger.debug( | |
200 "No taxonomy page found at '%s', skipping." % | |
201 self.page_ref) | |
202 return 0 | |
203 | |
204 route = self.app.getGeneratorRoute(self.name) | |
205 if route is None: | |
206 raise Exception("No routes have been defined for generator: %s" % | |
207 self.name) | |
208 | |
209 logger.debug("Using taxonomy page: %s" % self.page_ref) | |
210 fac = self.page_ref.getFactory() | |
211 | |
212 job_count = 0 | |
213 s = _Slugifier(self.taxonomy, self.slugify_mode) | |
214 for term in dirty_terms: | |
215 if not self.taxonomy.is_multiple: | |
216 term = term[0] | |
217 slugified_term = s.slugify(term) | |
218 | |
219 logger.debug( | |
220 "Queuing: %s [%s=%s]" % | |
221 (fac.ref_spec, self.taxonomy.name, slugified_term)) | |
222 | |
223 extra_route_metadata = {self.taxonomy.term_name: slugified_term} | |
224 ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term) | |
225 job_count += 1 | |
226 ctx.runJobQueue() | |
227 | |
228 # Now we create bake entries for all the terms that were *not* dirty. | |
229 # This is because otherwise, on the next incremental bake, we wouldn't | |
230 # find any entry for those things, and figure that we need to delete | |
231 # their outputs. | |
232 for prev_entry, cur_entry in ctx.getAllPageRecords(): | |
233 # Only consider taxonomy-related entries that don't have any | |
234 # current version (i.e. they weren't baked just now). | |
235 if (prev_entry and not cur_entry): | |
236 try: | |
237 t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key) | |
238 except InvalidRecordExtraKey: | |
239 continue | |
240 | |
241 if t in all_terms: | |
242 logger.debug("Creating unbaked entry for %s term: %s" % | |
243 (self.name, t)) | |
244 ctx.collapseRecord(prev_entry) | |
245 else: | |
246 logger.debug("Term %s in %s isn't used anymore." % | |
247 (self.name, t)) | |
248 | |
249 return job_count | |
250 | |
251 | |
252 def _get_all_entry_taxonomy_terms(entry): | |
253 res = set() | |
254 for o in entry.subs: | |
255 for pinfo in o.render_info: | |
256 if pinfo: | |
257 res |= set(pinfo.getCustomInfo('used_taxonomy_terms', [])) | |
258 return res | |
259 | |
260 | |
261 class HasTaxonomyTermsFilterClause(SettingFilterClause): | |
262 def __init__(self, taxonomy, slugify_mode, value, is_combination): | |
263 super(HasTaxonomyTermsFilterClause, self).__init__( | |
264 taxonomy.setting_name, value) | |
265 self._taxonomy = taxonomy | |
266 self._is_combination = is_combination | |
267 self._slugifier = _Slugifier(taxonomy, slugify_mode) | |
268 | |
269 def pageMatches(self, fil, page): | |
270 if self._taxonomy.is_multiple: | |
271 # Multiple taxonomy, i.e. it supports multiple terms, like tags. | |
272 page_values = fil.value_accessor(page, self.name) | |
273 if page_values is None or not isinstance(page_values, list): | |
274 return False | |
275 | |
276 page_set = set(map(self._slugifier.slugify, page_values)) | |
277 if self._is_combination: | |
278 # Multiple taxonomy, and multiple terms to match. Check that | |
279 # the ones to match are all in the page's terms. | |
280 value_set = set(self.value) | |
281 return value_set.issubset(page_set) | |
282 else: | |
283 # Multiple taxonomy, one term to match. | |
284 return self.value in page_set | |
285 else: | |
286 # Single taxonomy. Just compare the values. | |
287 page_value = fil.value_accessor(page, self.name) | |
288 if page_value is None: | |
289 return False | |
290 page_value = self._slugifier.slugify(page_value) | |
291 return page_value == self.value | |
292 | |
293 | |
294 class _Slugifier(object): | |
295 def __init__(self, taxonomy, mode): | |
296 self.taxonomy = taxonomy | |
297 self.mode = mode | |
298 | |
299 def slugify(self, term): | |
300 if isinstance(term, tuple): | |
301 return self.taxonomy.separator.join( | |
302 map(self._slugifyOne, term)) | |
303 return self._slugifyOne(term) | |
304 | |
305 def _slugifyOne(self, term): | |
306 if self.mode & SLUGIFY_TRANSLITERATE: | |
307 term = unidecode.unidecode(term) | |
308 if self.mode & SLUGIFY_LOWERCASE: | |
309 term = term.lower() | |
310 if self.mode & SLUGIFY_DOT_TO_DASH: | |
311 term = re_first_dot_to_dash.sub('', term) | |
312 term = re_dot_to_dash.sub('-', term) | |
313 if self.mode & SLUGIFY_SPACE_TO_DASH: | |
314 term = re_space_to_dash.sub('-', term) | |
315 return term | |
316 | |
317 | |
318 def _parse_slugify_mode(value): | |
319 mapping = { | |
320 'encode': SLUGIFY_ENCODE, | |
321 'transliterate': SLUGIFY_TRANSLITERATE, | |
322 'lowercase': SLUGIFY_LOWERCASE, | |
323 'dot_to_dash': SLUGIFY_DOT_TO_DASH, | |
324 'space_to_dash': SLUGIFY_SPACE_TO_DASH} | |
325 mode = 0 | |
326 for v in value.split(','): | |
327 f = mapping.get(v.strip()) | |
328 if f is None: | |
329 if v == 'iconv': | |
330 raise Exception("'iconv' is not supported as a slugify mode " | |
331 "in PieCrust2. Use 'transliterate'.") | |
332 raise Exception("Unknown slugify flag: %s" % v) | |
333 mode |= f | |
334 return mode | |
335 |