Mercurial > piecrust2
comparison piecrust/page.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | e01473c3ea7e |
children | f070a4fc033c |
comparison
equal
deleted
inserted
replaced
851:2c7e57d80bba | 852:4850f8c21b6e |
---|---|
7 import datetime | 7 import datetime |
8 import dateutil.parser | 8 import dateutil.parser |
9 import collections | 9 import collections |
10 from werkzeug.utils import cached_property | 10 from werkzeug.utils import cached_property |
11 from piecrust.configuration import ( | 11 from piecrust.configuration import ( |
12 Configuration, ConfigurationError, | 12 Configuration, ConfigurationError, |
13 parse_config_header) | 13 parse_config_header) |
14 | 14 |
15 | 15 |
16 logger = logging.getLogger(__name__) | 16 logger = logging.getLogger(__name__) |
17 | 17 |
18 | 18 |
34 | 34 |
35 FLAG_NONE = 0 | 35 FLAG_NONE = 0 |
36 FLAG_RAW_CACHE_VALID = 2**0 | 36 FLAG_RAW_CACHE_VALID = 2**0 |
37 | 37 |
38 | 38 |
39 class PageNotFoundError(Exception): | |
40 pass | |
41 | |
42 | |
43 class QualifiedPage(object): | |
44 def __init__(self, page, route, route_params, *, page_num=1): | |
45 self.page = page | |
46 self.page_num = page_num | |
47 self.route = route | |
48 self.route_params = route_params | |
49 | |
50 @property | |
51 def app(self): | |
52 return self.page.app | |
53 | |
54 @property | |
55 def source(self): | |
56 return self.page.source | |
57 | |
58 @cached_property | |
59 def uri(self): | |
60 return self.route.getUri(self.route_params, self.page_num) | |
61 | |
62 def getSubPage(self, page_num): | |
63 return QualifiedPage(self.page, self.route, self.route_params, | |
64 page_num=self.page_num + 1) | |
65 | |
66 | |
39 class Page(object): | 67 class Page(object): |
40 def __init__(self, source, source_metadata, rel_path): | 68 def __init__(self, content_item): |
41 self.source = source | 69 self.content_item = content_item |
42 self.source_metadata = source_metadata | |
43 self.rel_path = rel_path | |
44 self._config = None | 70 self._config = None |
45 self._segments = None | 71 self._segments = None |
46 self._flags = FLAG_NONE | 72 self._flags = FLAG_NONE |
47 self._datetime = None | 73 self._datetime = None |
48 | 74 |
49 @property | 75 @property |
76 def source(self): | |
77 return self.content_item.source | |
78 | |
79 @property | |
80 def source_metadata(self): | |
81 return self.content_item.metadata | |
82 | |
83 @property | |
84 def content_spec(self): | |
85 return self.content_item.spec | |
86 | |
87 @property | |
50 def app(self): | 88 def app(self): |
51 return self.source.app | 89 return self.content_item.source.app |
52 | |
53 @property | |
54 def ref_spec(self): | |
55 return '%s:%s' % (self.source.name, self.rel_path) | |
56 | 90 |
57 @cached_property | 91 @cached_property |
58 def path(self): | 92 def content_mtime(self): |
59 path, _ = self.source.resolveRef(self.rel_path) | 93 return self.content_item.getmtime() |
60 return path | |
61 | |
62 @cached_property | |
63 def path_mtime(self): | |
64 return os.path.getmtime(self.path) | |
65 | 94 |
66 @property | 95 @property |
67 def flags(self): | 96 def flags(self): |
68 return self._flags | 97 return self._flags |
69 | 98 |
89 # time from the page config. | 118 # time from the page config. |
90 page_date = self.source_metadata['date'] | 119 page_date = self.source_metadata['date'] |
91 page_time = _parse_config_time(self.config.get('time')) | 120 page_time = _parse_config_time(self.config.get('time')) |
92 if page_time is not None: | 121 if page_time is not None: |
93 self._datetime = datetime.datetime( | 122 self._datetime = datetime.datetime( |
94 page_date.year, | 123 page_date.year, |
95 page_date.month, | 124 page_date.month, |
96 page_date.day) + page_time | 125 page_date.day) + page_time |
97 else: | 126 else: |
98 self._datetime = datetime.datetime( | 127 self._datetime = datetime.datetime( |
99 page_date.year, page_date.month, page_date.day) | 128 page_date.year, page_date.month, page_date.day) |
100 elif 'date' in self.config: | 129 elif 'date' in self.config: |
101 # Get the date from the page config, and maybe the | 130 # Get the date from the page config, and maybe the |
102 # time too. | 131 # time too. |
103 page_date = _parse_config_date(self.config.get('date')) | 132 page_date = _parse_config_date(self.config.get('date')) |
104 self._datetime = datetime.datetime( | 133 self._datetime = datetime.datetime( |
105 page_date.year, | 134 page_date.year, |
106 page_date.month, | 135 page_date.month, |
107 page_date.day) | 136 page_date.day) |
108 page_time = _parse_config_time(self.config.get('time')) | 137 page_time = _parse_config_time(self.config.get('time')) |
109 if page_time is not None: | 138 if page_time is not None: |
110 self._datetime += page_time | 139 self._datetime += page_time |
111 else: | 140 else: |
112 # No idea what the date/time for this page is. | 141 # No idea what the date/time for this page is. |
113 self._datetime = datetime.datetime.fromtimestamp(0) | 142 self._datetime = datetime.datetime.fromtimestamp(0) |
114 except Exception as ex: | 143 except Exception as ex: |
115 logger.exception(ex) | 144 logger.exception(ex) |
116 raise Exception( | 145 raise Exception( |
117 "Error computing time for page: %s" % | 146 "Error computing time for page: %s" % |
118 self.path) from ex | 147 self.path) from ex |
119 return self._datetime | 148 return self._datetime |
120 | 149 |
121 @datetime.setter | 150 @datetime.setter |
122 def datetime(self, value): | 151 def datetime(self, value): |
123 self._datetime = value | 152 self._datetime = value |
127 | 156 |
128 def _load(self): | 157 def _load(self): |
129 if self._config is not None: | 158 if self._config is not None: |
130 return | 159 return |
131 | 160 |
132 config, content, was_cache_valid = load_page(self.app, self.path, | 161 config, content, was_cache_valid = load_page( |
133 self.path_mtime) | 162 self.app, self.path, self.path_mtime) |
163 | |
134 if 'config' in self.source_metadata: | 164 if 'config' in self.source_metadata: |
135 config.merge(self.source_metadata['config']) | 165 config.merge(self.source_metadata['config']) |
136 | 166 |
137 self._config = config | 167 self._config = config |
138 self._segments = content | 168 self._segments = content |
139 if was_cache_valid: | 169 if was_cache_valid: |
140 self._flags |= FLAG_RAW_CACHE_VALID | 170 self._flags |= FLAG_RAW_CACHE_VALID |
141 | 171 |
142 self.source.finalizeConfig(self) | 172 self.source.finalizeConfig(self) |
173 | |
143 | 174 |
144 def _parse_config_date(page_date): | 175 def _parse_config_date(page_date): |
145 if page_date is None: | 176 if page_date is None: |
146 return None | 177 return None |
147 | 178 |
150 parsed_d = dateutil.parser.parse(page_date) | 181 parsed_d = dateutil.parser.parse(page_date) |
151 except Exception as ex: | 182 except Exception as ex: |
152 logger.exception(ex) | 183 logger.exception(ex) |
153 raise ConfigurationError("Invalid date: %s" % page_date) from ex | 184 raise ConfigurationError("Invalid date: %s" % page_date) from ex |
154 return datetime.date( | 185 return datetime.date( |
155 year=parsed_d.year, | 186 year=parsed_d.year, |
156 month=parsed_d.month, | 187 month=parsed_d.month, |
157 day=parsed_d.day) | 188 day=parsed_d.day) |
158 | 189 |
159 raise ConfigurationError("Invalid date: %s" % page_date) | 190 raise ConfigurationError("Invalid date: %s" % page_date) |
160 | 191 |
161 | 192 |
162 def _parse_config_time(page_time): | 193 def _parse_config_time(page_time): |
171 parsed_t = dateutil.parser.parse(page_time) | 202 parsed_t = dateutil.parser.parse(page_time) |
172 except Exception as ex: | 203 except Exception as ex: |
173 logger.exception(ex) | 204 logger.exception(ex) |
174 raise ConfigurationError("Invalid time: %s" % page_time) from ex | 205 raise ConfigurationError("Invalid time: %s" % page_time) from ex |
175 return datetime.timedelta( | 206 return datetime.timedelta( |
176 hours=parsed_t.hour, | 207 hours=parsed_t.hour, |
177 minutes=parsed_t.minute, | 208 minutes=parsed_t.minute, |
178 seconds=parsed_t.second) | 209 seconds=parsed_t.second) |
179 | 210 |
180 if isinstance(page_time, int): | 211 if isinstance(page_time, int): |
181 # Total seconds... convert to a time struct. | 212 # Total seconds... convert to a time struct. |
182 return datetime.timedelta(seconds=page_time) | 213 return datetime.timedelta(seconds=page_time) |
183 | 214 |
185 | 216 |
186 | 217 |
187 class PageLoadingError(Exception): | 218 class PageLoadingError(Exception): |
188 def __init__(self, path, inner=None): | 219 def __init__(self, path, inner=None): |
189 super(PageLoadingError, self).__init__( | 220 super(PageLoadingError, self).__init__( |
190 "Error loading page: %s" % path, | 221 "Error loading page: %s" % path, |
191 inner) | 222 inner) |
192 | 223 |
193 | 224 |
194 class ContentSegment(object): | 225 class ContentSegment(object): |
195 debug_render_func = 'debug_render' | 226 debug_render_func = 'debug_render' |
196 | 227 |
240 try: | 271 try: |
241 with app.env.timerScope('PageLoad'): | 272 with app.env.timerScope('PageLoad'): |
242 return _do_load_page(app, path, path_mtime) | 273 return _do_load_page(app, path, path_mtime) |
243 except Exception as e: | 274 except Exception as e: |
244 logger.exception( | 275 logger.exception( |
245 "Error loading page: %s" % | 276 "Error loading page: %s" % |
246 os.path.relpath(path, app.root_dir)) | 277 os.path.relpath(path, app.root_dir)) |
247 _, __, traceback = sys.exc_info() | 278 _, __, traceback = sys.exc_info() |
248 raise PageLoadingError(path, e).with_traceback(traceback) | 279 raise PageLoadingError(path, e).with_traceback(traceback) |
249 | 280 |
250 | 281 |
251 def _do_load_page(app, path, path_mtime): | 282 def _do_load_page(app, path, path_mtime): |
253 cache = app.cache.getCache('pages') | 284 cache = app.cache.getCache('pages') |
254 cache_path = hashlib.md5(path.encode('utf8')).hexdigest() + '.json' | 285 cache_path = hashlib.md5(path.encode('utf8')).hexdigest() + '.json' |
255 page_time = path_mtime or os.path.getmtime(path) | 286 page_time = path_mtime or os.path.getmtime(path) |
256 if cache.isValid(cache_path, page_time): | 287 if cache.isValid(cache_path, page_time): |
257 cache_data = json.loads( | 288 cache_data = json.loads( |
258 cache.read(cache_path), | 289 cache.read(cache_path), |
259 object_pairs_hook=collections.OrderedDict) | 290 object_pairs_hook=collections.OrderedDict) |
260 config = PageConfiguration( | 291 config = PageConfiguration( |
261 values=cache_data['config'], | 292 values=cache_data['config'], |
262 validate=False) | 293 validate=False) |
263 content = json_load_segments(cache_data['content']) | 294 content = json_load_segments(cache_data['content']) |
264 return config, content, True | 295 return config, content, True |
265 | 296 |
266 # Nope, load the page from the source file. | 297 # Nope, load the page from the source file. |
267 logger.debug("Loading page configuration from: %s" % path) | 298 logger.debug("Loading page configuration from: %s" % path) |
278 content = parse_segments(raw, offset) | 309 content = parse_segments(raw, offset) |
279 config.set('segments', list(content.keys())) | 310 config.set('segments', list(content.keys())) |
280 | 311 |
281 # Save to the cache. | 312 # Save to the cache. |
282 cache_data = { | 313 cache_data = { |
283 'config': config.getAll(), | 314 'config': config.getAll(), |
284 'content': json_save_segments(content)} | 315 'content': json_save_segments(content)} |
285 cache.write(cache_path, json.dumps(cache_data)) | 316 cache.write(cache_path, json.dumps(cache_data)) |
286 | 317 |
287 return config, content, False | 318 return config, content, False |
288 | 319 |
289 | 320 |
290 segment_pattern = re.compile( | 321 segment_pattern = re.compile( |
291 r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""", | 322 r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""", |
292 re.M) | 323 re.M) |
293 part_pattern = re.compile( | 324 part_pattern = re.compile( |
294 r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""", | 325 r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""", |
295 re.M) | 326 re.M) |
296 | 327 |
297 | 328 |
298 def _count_lines(s): | 329 def _count_lines(s): |
299 return len(s.split('\n')) | 330 return len(s.split('\n')) |
300 | 331 |
321 # Figure out if we need any parsing. | 352 # Figure out if we need any parsing. |
322 do_parse = _string_needs_parsing(raw, offset) | 353 do_parse = _string_needs_parsing(raw, offset) |
323 if not do_parse: | 354 if not do_parse: |
324 seg = ContentSegment() | 355 seg = ContentSegment() |
325 seg.parts = [ | 356 seg.parts = [ |
326 ContentSegmentPart(raw[offset:], None, offset, current_line)] | 357 ContentSegmentPart(raw[offset:], None, offset, current_line)] |
327 return {'content': seg} | 358 return {'content': seg} |
328 | 359 |
329 # Start parsing segments and parts. | 360 # Start parsing segments and parts. |
330 matches = list(segment_pattern.finditer(raw, offset)) | 361 matches = list(segment_pattern.finditer(raw, offset)) |
331 num_matches = len(matches) | 362 num_matches = len(matches) |
335 first_offset = matches[0].start() | 366 first_offset = matches[0].start() |
336 if first_offset > 0: | 367 if first_offset > 0: |
337 # There's some default content segment at the beginning. | 368 # There's some default content segment at the beginning. |
338 seg = ContentSegment() | 369 seg = ContentSegment() |
339 seg.parts, current_line = parse_segment_parts( | 370 seg.parts, current_line = parse_segment_parts( |
340 raw, offset, first_offset, current_line) | 371 raw, offset, first_offset, current_line) |
341 contents['content'] = seg | 372 contents['content'] = seg |
342 | 373 |
343 for i in range(1, num_matches): | 374 for i in range(1, num_matches): |
344 m1 = matches[i - 1] | 375 m1 = matches[i - 1] |
345 m2 = matches[i] | 376 m2 = matches[i] |
346 seg = ContentSegment() | 377 seg = ContentSegment() |
347 seg.parts, current_line = parse_segment_parts( | 378 seg.parts, current_line = parse_segment_parts( |
348 raw, m1.end() + 1, m2.start(), current_line, | 379 raw, m1.end() + 1, m2.start(), current_line, |
349 m1.group('fmt')) | 380 m1.group('fmt')) |
350 contents[m1.group('name')] = seg | 381 contents[m1.group('name')] = seg |
351 | 382 |
352 # Handle text past the last match. | 383 # Handle text past the last match. |
353 lastm = matches[-1] | 384 lastm = matches[-1] |
354 seg = ContentSegment() | 385 seg = ContentSegment() |
355 seg.parts, current_line = parse_segment_parts( | 386 seg.parts, current_line = parse_segment_parts( |
356 raw, lastm.end() + 1, len(raw), current_line, | 387 raw, lastm.end() + 1, len(raw), current_line, |
357 lastm.group('fmt')) | 388 lastm.group('fmt')) |
358 contents[lastm.group('name')] = seg | 389 contents[lastm.group('name')] = seg |
359 | 390 |
360 return contents | 391 return contents |
361 else: | 392 else: |
362 # No segments, just content. | 393 # No segments, just content. |
363 seg = ContentSegment() | 394 seg = ContentSegment() |
364 seg.parts, current_line = parse_segment_parts( | 395 seg.parts, current_line = parse_segment_parts( |
365 raw, offset, len(raw), current_line) | 396 raw, offset, len(raw), current_line) |
366 return {'content': seg} | 397 return {'content': seg} |
367 | 398 |
368 | 399 |
369 def parse_segment_parts(raw, start, end, line_offset, first_part_fmt=None): | 400 def parse_segment_parts(raw, start, end, line_offset, first_part_fmt=None): |
370 matches = list(part_pattern.finditer(raw, start, end)) | 401 matches = list(part_pattern.finditer(raw, start, end)) |
373 parts = [] | 404 parts = [] |
374 | 405 |
375 # First part, before the first format change. | 406 # First part, before the first format change. |
376 part_text = raw[start:matches[0].start()] | 407 part_text = raw[start:matches[0].start()] |
377 parts.append( | 408 parts.append( |
378 ContentSegmentPart(part_text, first_part_fmt, start, | 409 ContentSegmentPart(part_text, first_part_fmt, start, |
379 line_offset)) | 410 line_offset)) |
380 line_offset += _count_lines(part_text) | 411 line_offset += _count_lines(part_text) |
381 | 412 |
382 for i in range(1, num_matches): | 413 for i in range(1, num_matches): |
383 m1 = matches[i - 1] | 414 m1 = matches[i - 1] |
384 m2 = matches[i] | 415 m2 = matches[i] |
385 part_text = raw[m1.end() + 1:m2.start()] | 416 part_text = raw[m1.end() + 1:m2.start()] |
386 parts.append( | 417 parts.append( |
387 ContentSegmentPart( | 418 ContentSegmentPart( |
388 part_text, m1.group('fmt'), m1.end() + 1, | 419 part_text, m1.group('fmt'), m1.end() + 1, |
389 line_offset)) | 420 line_offset)) |
390 line_offset += _count_lines(part_text) | 421 line_offset += _count_lines(part_text) |
391 | 422 |
392 lastm = matches[-1] | 423 lastm = matches[-1] |
393 part_text = raw[lastm.end() + 1:end] | 424 part_text = raw[lastm.end() + 1:end] |
394 parts.append(ContentSegmentPart( | 425 parts.append(ContentSegmentPart( |
395 part_text, lastm.group('fmt'), lastm.end() + 1, | 426 part_text, lastm.group('fmt'), lastm.end() + 1, |
396 line_offset)) | 427 line_offset)) |
397 | 428 |
398 return parts, line_offset | 429 return parts, line_offset |
399 else: | 430 else: |
400 part_text = raw[start:end] | 431 part_text = raw[start:end] |
401 parts = [ContentSegmentPart(part_text, first_part_fmt, start, | 432 parts = [ContentSegmentPart(part_text, first_part_fmt, start, |