comparison piecrust/generation/taxonomy.py @ 711:ab5c6a8ae90a

bake: Replace hard-coded taxonomy support with "generator" system. * Taxonomies are now implemented one or more `TaxonomyGenerator`s. * A `BlogArchivesGenerator` stub is there but non-functional.
author Ludovic Chabant <ludovic@chabant.com>
date Thu, 26 May 2016 19:52:47 -0700
parents
children 3e188d88a9ac
comparison
equal deleted inserted replaced
710:e85f29b28b84 711:ab5c6a8ae90a
1 import re
2 import time
3 import logging
4 import unidecode
5 from piecrust.chefutil import format_timed, format_timed_scope
6 from piecrust.configuration import ConfigurationError
7 from piecrust.data.filters import (
8 PaginationFilter, SettingFilterClause,
9 page_value_accessor)
10 from piecrust.generation.base import PageGenerator, InvalidRecordExtraKey
11 from piecrust.sources.pageref import PageRef, PageNotFoundError
12
13
14 logger = logging.getLogger(__name__)
15
16
17 SLUGIFY_ENCODE = 1
18 SLUGIFY_TRANSLITERATE = 2
19 SLUGIFY_LOWERCASE = 4
20 SLUGIFY_DOT_TO_DASH = 8
21 SLUGIFY_SPACE_TO_DASH = 16
22
23
24 re_first_dot_to_dash = re.compile(r'^\.+')
25 re_dot_to_dash = re.compile(r'\.+')
26 re_space_to_dash = re.compile(r'\s+')
27
28
29 class Taxonomy(object):
30 def __init__(self, name, config):
31 self.name = name
32 self.config = config
33 self.term_name = config.get('term', name)
34 self.is_multiple = bool(config.get('multiple', False))
35 self.separator = config.get('separator', '/')
36 self.page_ref = config.get('page')
37 self._source_page_refs = {}
38
39 @property
40 def setting_name(self):
41 if self.is_multiple:
42 return self.name
43 return self.term_name
44
45
46 class TaxonomyPageGenerator(PageGenerator):
47 GENERATOR_NAME = 'taxonomy'
48
49 def __init__(self, app, name, config):
50 super(TaxonomyPageGenerator, self).__init__(app, name, config)
51
52 tax_name = config.get('taxonomy')
53 if tax_name is None:
54 raise ConfigurationError(
55 "Generator '%s' requires a taxonomy name." % name)
56 tax_config = app.config.get('site/taxonomies/' + tax_name)
57 if tax_config is None:
58 raise ConfigurationError(
59 "Error initializing generator '%s', no such taxonomy: %s",
60 (name, tax_name))
61 self.taxonomy = Taxonomy(tax_name, tax_config)
62
63 sm = config.get('slugify_mode')
64 if not sm:
65 sm = app.config.get('site/slugify_mode', 'encode')
66 self.slugify_mode = _parse_slugify_mode(sm)
67
68 @property
69 def page_ref_path(self):
70 try:
71 return self.page_ref.path
72 except PageNotFoundError:
73 return None
74
75 def getPageFactory(self, route_metadata):
76 # This will raise `PageNotFoundError` naturally if not found.
77 return self.page_ref.getFactory()
78
79 def prepareRenderContext(self, ctx):
80 tax_terms, is_combination = self._getTaxonomyTerms(
81 ctx.page.route_metadata)
82 self._setTaxonomyFilter(ctx, tax_terms, is_combination)
83
84 ctx.custom_data = {
85 self.taxonomy.term_name: tax_terms,
86 'is_multiple_%s' % self.taxonomy.term_name: is_combination}
87 logger.debug("Prepared render context with: %s" % ctx.custom_data)
88
89 def _getTaxonomyTerms(self, route_metadata):
90 all_values = route_metadata.get(self.taxonomy.term_name)
91 if all_values is None:
92 raise Exception("'%s' values couldn't be found in route metadata" %
93 self.taxonomy.term_name)
94
95 if self.taxonomy.is_multiple:
96 sep = self.taxonomy.separator
97 if sep in all_values:
98 return tuple(all_values.split(sep)), True
99 return all_values, False
100
101 def _setTaxonomyFilter(self, ctx, term_value, is_combination):
102 flt = PaginationFilter(value_accessor=page_value_accessor)
103 flt.addClause(HasTaxonomyTermsFilterClause(
104 self.taxonomy, self.slugify_mode, term_value, is_combination))
105 ctx.pagination_filter = flt
106
107 def onRouteFunctionUsed(self, route, route_metadata):
108 # Get the values.
109 values = route_metadata[self.taxonomy.term_name]
110 if self.taxonomy.is_multiple:
111 #TODO: here we assume the route has been properly configured.
112 values = tuple([str(v) for v in values])
113 else:
114 values = (str(values),)
115
116 # We need to register this use of a taxonomy term.
117 eis = self.app.env.exec_info_stack
118 cpi = eis.current_page_info.render_ctx.current_pass_info
119 if cpi:
120 utt = cpi.getCustomInfo('used_taxonomy_terms', [], True)
121 utt.append(values)
122
123 # We need to slugify the terms before they get transformed
124 # into URL-bits.
125 s = _Slugifier(self.taxonomy, self.slugify_mode)
126 str_values = s.slugify(values)
127 route_metadata[self.taxonomy.term_name] = str_values
128 logger.debug("Changed route metadata to: %s" % route_metadata)
129
130 def bake(self, ctx):
131 logger.debug("Baking taxonomy pages...")
132 with format_timed_scope(logger, 'gathered taxonomy terms',
133 level=logging.DEBUG, colored=False):
134 all_terms, dirty_terms = self._buildDirtyTaxonomyTerms(ctx)
135
136 start_time = time.perf_counter()
137 page_count = self._bakeTaxonomyTerms(ctx, all_terms, dirty_terms)
138 logger.info(format_timed(start_time,
139 "baked %d taxonomy pages." % page_count))
140
141 def _buildDirtyTaxonomyTerms(self, ctx):
142 # Build the list of terms for our taxonomy, and figure out which ones
143 # are 'dirty' for the current bake.
144 logger.debug("Gathering dirty taxonomy terms")
145 all_terms = set()
146 dirty_terms = set()
147
148 # Re-bake all taxonomy terms that include new or changed pages.
149 for prev_entry, cur_entry in ctx.getBakedPageRecords():
150 entries = [cur_entry]
151 if prev_entry:
152 entries.append(prev_entry)
153
154 terms = []
155 for e in entries:
156 entry_terms = e.config.get(self.taxonomy.setting_name)
157 if entry_terms:
158 if not self.taxonomy.is_multiple:
159 terms.append(entry_terms)
160 else:
161 terms += entry_terms
162 if terms:
163 dirty_terms.update([(t,) for t in terms])
164
165 # Remember all terms used.
166 for _, cur_entry in ctx.getAllPageRecords():
167 if cur_entry and not cur_entry.was_overriden:
168 cur_terms = cur_entry.config.get(self.taxonomy.setting_name)
169 if cur_terms:
170 if not self.taxonomy.is_multiple:
171 all_terms.add(cur_terms)
172 else:
173 all_terms |= set(cur_terms)
174
175 # Re-bake the combination pages for terms that are 'dirty'.
176 if self.taxonomy.is_multiple:
177 known_combinations = set()
178 logger.debug("Gathering dirty term combinations")
179 for _, cur_entry in ctx.getAllPageRecords():
180 if cur_entry:
181 used_terms = _get_all_entry_taxonomy_terms(cur_entry)
182 for terms in used_terms:
183 if len(terms) > 1:
184 known_combinations.add(terms)
185
186 for terms in known_combinations:
187 if not dirty_terms.isdisjoint(set(terms)):
188 dirty_terms.add(terms)
189
190 return all_terms, dirty_terms
191
192 def _bakeTaxonomyTerms(self, ctx, all_terms, dirty_terms):
193 # Start baking those terms.
194 logger.debug(
195 "Baking '%s' for source '%s': %s" %
196 (self.taxonomy.name, self.source_name, dirty_terms))
197
198 if not self.page_ref.exists:
199 logger.debug(
200 "No taxonomy page found at '%s', skipping." %
201 self.page_ref)
202 return 0
203
204 route = self.app.getGeneratorRoute(self.name)
205 if route is None:
206 raise Exception("No routes have been defined for generator: %s" %
207 self.name)
208
209 logger.debug("Using taxonomy page: %s" % self.page_ref)
210 fac = self.page_ref.getFactory()
211
212 job_count = 0
213 s = _Slugifier(self.taxonomy, self.slugify_mode)
214 for term in dirty_terms:
215 if not self.taxonomy.is_multiple:
216 term = term[0]
217 slugified_term = s.slugify(term)
218
219 logger.debug(
220 "Queuing: %s [%s=%s]" %
221 (fac.ref_spec, self.taxonomy.name, slugified_term))
222
223 extra_route_metadata = {self.taxonomy.term_name: slugified_term}
224 ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term)
225 job_count += 1
226 ctx.runJobQueue()
227
228 # Now we create bake entries for all the terms that were *not* dirty.
229 # This is because otherwise, on the next incremental bake, we wouldn't
230 # find any entry for those things, and figure that we need to delete
231 # their outputs.
232 for prev_entry, cur_entry in ctx.getAllPageRecords():
233 # Only consider taxonomy-related entries that don't have any
234 # current version (i.e. they weren't baked just now).
235 if (prev_entry and not cur_entry):
236 try:
237 t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key)
238 except InvalidRecordExtraKey:
239 continue
240
241 if t in all_terms:
242 logger.debug("Creating unbaked entry for %s term: %s" %
243 (self.name, t))
244 ctx.collapseRecord(prev_entry)
245 else:
246 logger.debug("Term %s in %s isn't used anymore." %
247 (self.name, t))
248
249 return job_count
250
251
252 def _get_all_entry_taxonomy_terms(entry):
253 res = set()
254 for o in entry.subs:
255 for pinfo in o.render_info:
256 if pinfo:
257 res |= set(pinfo.getCustomInfo('used_taxonomy_terms', []))
258 return res
259
260
261 class HasTaxonomyTermsFilterClause(SettingFilterClause):
262 def __init__(self, taxonomy, slugify_mode, value, is_combination):
263 super(HasTaxonomyTermsFilterClause, self).__init__(
264 taxonomy.setting_name, value)
265 self._taxonomy = taxonomy
266 self._is_combination = is_combination
267 self._slugifier = _Slugifier(taxonomy, slugify_mode)
268
269 def pageMatches(self, fil, page):
270 if self._taxonomy.is_multiple:
271 # Multiple taxonomy, i.e. it supports multiple terms, like tags.
272 page_values = fil.value_accessor(page, self.name)
273 if page_values is None or not isinstance(page_values, list):
274 return False
275
276 page_set = set(map(self._slugifier.slugify, page_values))
277 if self._is_combination:
278 # Multiple taxonomy, and multiple terms to match. Check that
279 # the ones to match are all in the page's terms.
280 value_set = set(self.value)
281 return value_set.issubset(page_set)
282 else:
283 # Multiple taxonomy, one term to match.
284 return self.value in page_set
285 else:
286 # Single taxonomy. Just compare the values.
287 page_value = fil.value_accessor(page, self.name)
288 if page_value is None:
289 return False
290 page_value = self._slugifier.slugify(page_value)
291 return page_value == self.value
292
293
294 class _Slugifier(object):
295 def __init__(self, taxonomy, mode):
296 self.taxonomy = taxonomy
297 self.mode = mode
298
299 def slugify(self, term):
300 if isinstance(term, tuple):
301 return self.taxonomy.separator.join(
302 map(self._slugifyOne, term))
303 return self._slugifyOne(term)
304
305 def _slugifyOne(self, term):
306 if self.mode & SLUGIFY_TRANSLITERATE:
307 term = unidecode.unidecode(term)
308 if self.mode & SLUGIFY_LOWERCASE:
309 term = term.lower()
310 if self.mode & SLUGIFY_DOT_TO_DASH:
311 term = re_first_dot_to_dash.sub('', term)
312 term = re_dot_to_dash.sub('-', term)
313 if self.mode & SLUGIFY_SPACE_TO_DASH:
314 term = re_space_to_dash.sub('-', term)
315 return term
316
317
318 def _parse_slugify_mode(value):
319 mapping = {
320 'encode': SLUGIFY_ENCODE,
321 'transliterate': SLUGIFY_TRANSLITERATE,
322 'lowercase': SLUGIFY_LOWERCASE,
323 'dot_to_dash': SLUGIFY_DOT_TO_DASH,
324 'space_to_dash': SLUGIFY_SPACE_TO_DASH}
325 mode = 0
326 for v in value.split(','):
327 f = mapping.get(v.strip())
328 if f is None:
329 if v == 'iconv':
330 raise Exception("'iconv' is not supported as a slugify mode "
331 "in PieCrust2. Use 'transliterate'.")
332 raise Exception("Unknown slugify flag: %s" % v)
333 mode |= f
334 return mode
335