comparison piecrust/page.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents e01473c3ea7e
children f070a4fc033c
comparison
equal deleted inserted replaced
851:2c7e57d80bba 852:4850f8c21b6e
7 import datetime 7 import datetime
8 import dateutil.parser 8 import dateutil.parser
9 import collections 9 import collections
10 from werkzeug.utils import cached_property 10 from werkzeug.utils import cached_property
11 from piecrust.configuration import ( 11 from piecrust.configuration import (
12 Configuration, ConfigurationError, 12 Configuration, ConfigurationError,
13 parse_config_header) 13 parse_config_header)
14 14
15 15
16 logger = logging.getLogger(__name__) 16 logger = logging.getLogger(__name__)
17 17
18 18
34 34
35 FLAG_NONE = 0 35 FLAG_NONE = 0
36 FLAG_RAW_CACHE_VALID = 2**0 36 FLAG_RAW_CACHE_VALID = 2**0
37 37
38 38
39 class PageNotFoundError(Exception):
40 pass
41
42
43 class QualifiedPage(object):
44 def __init__(self, page, route, route_params, *, page_num=1):
45 self.page = page
46 self.page_num = page_num
47 self.route = route
48 self.route_params = route_params
49
50 @property
51 def app(self):
52 return self.page.app
53
54 @property
55 def source(self):
56 return self.page.source
57
58 @cached_property
59 def uri(self):
60 return self.route.getUri(self.route_params, self.page_num)
61
62 def getSubPage(self, page_num):
63 return QualifiedPage(self.page, self.route, self.route_params,
64 page_num=self.page_num + 1)
65
66
39 class Page(object): 67 class Page(object):
40 def __init__(self, source, source_metadata, rel_path): 68 def __init__(self, content_item):
41 self.source = source 69 self.content_item = content_item
42 self.source_metadata = source_metadata
43 self.rel_path = rel_path
44 self._config = None 70 self._config = None
45 self._segments = None 71 self._segments = None
46 self._flags = FLAG_NONE 72 self._flags = FLAG_NONE
47 self._datetime = None 73 self._datetime = None
48 74
49 @property 75 @property
76 def source(self):
77 return self.content_item.source
78
79 @property
80 def source_metadata(self):
81 return self.content_item.metadata
82
83 @property
84 def content_spec(self):
85 return self.content_item.spec
86
87 @property
50 def app(self): 88 def app(self):
51 return self.source.app 89 return self.content_item.source.app
52
53 @property
54 def ref_spec(self):
55 return '%s:%s' % (self.source.name, self.rel_path)
56 90
57 @cached_property 91 @cached_property
58 def path(self): 92 def content_mtime(self):
59 path, _ = self.source.resolveRef(self.rel_path) 93 return self.content_item.getmtime()
60 return path
61
62 @cached_property
63 def path_mtime(self):
64 return os.path.getmtime(self.path)
65 94
66 @property 95 @property
67 def flags(self): 96 def flags(self):
68 return self._flags 97 return self._flags
69 98
89 # time from the page config. 118 # time from the page config.
90 page_date = self.source_metadata['date'] 119 page_date = self.source_metadata['date']
91 page_time = _parse_config_time(self.config.get('time')) 120 page_time = _parse_config_time(self.config.get('time'))
92 if page_time is not None: 121 if page_time is not None:
93 self._datetime = datetime.datetime( 122 self._datetime = datetime.datetime(
94 page_date.year, 123 page_date.year,
95 page_date.month, 124 page_date.month,
96 page_date.day) + page_time 125 page_date.day) + page_time
97 else: 126 else:
98 self._datetime = datetime.datetime( 127 self._datetime = datetime.datetime(
99 page_date.year, page_date.month, page_date.day) 128 page_date.year, page_date.month, page_date.day)
100 elif 'date' in self.config: 129 elif 'date' in self.config:
101 # Get the date from the page config, and maybe the 130 # Get the date from the page config, and maybe the
102 # time too. 131 # time too.
103 page_date = _parse_config_date(self.config.get('date')) 132 page_date = _parse_config_date(self.config.get('date'))
104 self._datetime = datetime.datetime( 133 self._datetime = datetime.datetime(
105 page_date.year, 134 page_date.year,
106 page_date.month, 135 page_date.month,
107 page_date.day) 136 page_date.day)
108 page_time = _parse_config_time(self.config.get('time')) 137 page_time = _parse_config_time(self.config.get('time'))
109 if page_time is not None: 138 if page_time is not None:
110 self._datetime += page_time 139 self._datetime += page_time
111 else: 140 else:
112 # No idea what the date/time for this page is. 141 # No idea what the date/time for this page is.
113 self._datetime = datetime.datetime.fromtimestamp(0) 142 self._datetime = datetime.datetime.fromtimestamp(0)
114 except Exception as ex: 143 except Exception as ex:
115 logger.exception(ex) 144 logger.exception(ex)
116 raise Exception( 145 raise Exception(
117 "Error computing time for page: %s" % 146 "Error computing time for page: %s" %
118 self.path) from ex 147 self.path) from ex
119 return self._datetime 148 return self._datetime
120 149
121 @datetime.setter 150 @datetime.setter
122 def datetime(self, value): 151 def datetime(self, value):
123 self._datetime = value 152 self._datetime = value
127 156
128 def _load(self): 157 def _load(self):
129 if self._config is not None: 158 if self._config is not None:
130 return 159 return
131 160
132 config, content, was_cache_valid = load_page(self.app, self.path, 161 config, content, was_cache_valid = load_page(
133 self.path_mtime) 162 self.app, self.path, self.path_mtime)
163
134 if 'config' in self.source_metadata: 164 if 'config' in self.source_metadata:
135 config.merge(self.source_metadata['config']) 165 config.merge(self.source_metadata['config'])
136 166
137 self._config = config 167 self._config = config
138 self._segments = content 168 self._segments = content
139 if was_cache_valid: 169 if was_cache_valid:
140 self._flags |= FLAG_RAW_CACHE_VALID 170 self._flags |= FLAG_RAW_CACHE_VALID
141 171
142 self.source.finalizeConfig(self) 172 self.source.finalizeConfig(self)
173
143 174
144 def _parse_config_date(page_date): 175 def _parse_config_date(page_date):
145 if page_date is None: 176 if page_date is None:
146 return None 177 return None
147 178
150 parsed_d = dateutil.parser.parse(page_date) 181 parsed_d = dateutil.parser.parse(page_date)
151 except Exception as ex: 182 except Exception as ex:
152 logger.exception(ex) 183 logger.exception(ex)
153 raise ConfigurationError("Invalid date: %s" % page_date) from ex 184 raise ConfigurationError("Invalid date: %s" % page_date) from ex
154 return datetime.date( 185 return datetime.date(
155 year=parsed_d.year, 186 year=parsed_d.year,
156 month=parsed_d.month, 187 month=parsed_d.month,
157 day=parsed_d.day) 188 day=parsed_d.day)
158 189
159 raise ConfigurationError("Invalid date: %s" % page_date) 190 raise ConfigurationError("Invalid date: %s" % page_date)
160 191
161 192
162 def _parse_config_time(page_time): 193 def _parse_config_time(page_time):
171 parsed_t = dateutil.parser.parse(page_time) 202 parsed_t = dateutil.parser.parse(page_time)
172 except Exception as ex: 203 except Exception as ex:
173 logger.exception(ex) 204 logger.exception(ex)
174 raise ConfigurationError("Invalid time: %s" % page_time) from ex 205 raise ConfigurationError("Invalid time: %s" % page_time) from ex
175 return datetime.timedelta( 206 return datetime.timedelta(
176 hours=parsed_t.hour, 207 hours=parsed_t.hour,
177 minutes=parsed_t.minute, 208 minutes=parsed_t.minute,
178 seconds=parsed_t.second) 209 seconds=parsed_t.second)
179 210
180 if isinstance(page_time, int): 211 if isinstance(page_time, int):
181 # Total seconds... convert to a time struct. 212 # Total seconds... convert to a time struct.
182 return datetime.timedelta(seconds=page_time) 213 return datetime.timedelta(seconds=page_time)
183 214
185 216
186 217
187 class PageLoadingError(Exception): 218 class PageLoadingError(Exception):
188 def __init__(self, path, inner=None): 219 def __init__(self, path, inner=None):
189 super(PageLoadingError, self).__init__( 220 super(PageLoadingError, self).__init__(
190 "Error loading page: %s" % path, 221 "Error loading page: %s" % path,
191 inner) 222 inner)
192 223
193 224
194 class ContentSegment(object): 225 class ContentSegment(object):
195 debug_render_func = 'debug_render' 226 debug_render_func = 'debug_render'
196 227
240 try: 271 try:
241 with app.env.timerScope('PageLoad'): 272 with app.env.timerScope('PageLoad'):
242 return _do_load_page(app, path, path_mtime) 273 return _do_load_page(app, path, path_mtime)
243 except Exception as e: 274 except Exception as e:
244 logger.exception( 275 logger.exception(
245 "Error loading page: %s" % 276 "Error loading page: %s" %
246 os.path.relpath(path, app.root_dir)) 277 os.path.relpath(path, app.root_dir))
247 _, __, traceback = sys.exc_info() 278 _, __, traceback = sys.exc_info()
248 raise PageLoadingError(path, e).with_traceback(traceback) 279 raise PageLoadingError(path, e).with_traceback(traceback)
249 280
250 281
251 def _do_load_page(app, path, path_mtime): 282 def _do_load_page(app, path, path_mtime):
253 cache = app.cache.getCache('pages') 284 cache = app.cache.getCache('pages')
254 cache_path = hashlib.md5(path.encode('utf8')).hexdigest() + '.json' 285 cache_path = hashlib.md5(path.encode('utf8')).hexdigest() + '.json'
255 page_time = path_mtime or os.path.getmtime(path) 286 page_time = path_mtime or os.path.getmtime(path)
256 if cache.isValid(cache_path, page_time): 287 if cache.isValid(cache_path, page_time):
257 cache_data = json.loads( 288 cache_data = json.loads(
258 cache.read(cache_path), 289 cache.read(cache_path),
259 object_pairs_hook=collections.OrderedDict) 290 object_pairs_hook=collections.OrderedDict)
260 config = PageConfiguration( 291 config = PageConfiguration(
261 values=cache_data['config'], 292 values=cache_data['config'],
262 validate=False) 293 validate=False)
263 content = json_load_segments(cache_data['content']) 294 content = json_load_segments(cache_data['content'])
264 return config, content, True 295 return config, content, True
265 296
266 # Nope, load the page from the source file. 297 # Nope, load the page from the source file.
267 logger.debug("Loading page configuration from: %s" % path) 298 logger.debug("Loading page configuration from: %s" % path)
278 content = parse_segments(raw, offset) 309 content = parse_segments(raw, offset)
279 config.set('segments', list(content.keys())) 310 config.set('segments', list(content.keys()))
280 311
281 # Save to the cache. 312 # Save to the cache.
282 cache_data = { 313 cache_data = {
283 'config': config.getAll(), 314 'config': config.getAll(),
284 'content': json_save_segments(content)} 315 'content': json_save_segments(content)}
285 cache.write(cache_path, json.dumps(cache_data)) 316 cache.write(cache_path, json.dumps(cache_data))
286 317
287 return config, content, False 318 return config, content, False
288 319
289 320
290 segment_pattern = re.compile( 321 segment_pattern = re.compile(
291 r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""", 322 r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""",
292 re.M) 323 re.M)
293 part_pattern = re.compile( 324 part_pattern = re.compile(
294 r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""", 325 r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""",
295 re.M) 326 re.M)
296 327
297 328
298 def _count_lines(s): 329 def _count_lines(s):
299 return len(s.split('\n')) 330 return len(s.split('\n'))
300 331
321 # Figure out if we need any parsing. 352 # Figure out if we need any parsing.
322 do_parse = _string_needs_parsing(raw, offset) 353 do_parse = _string_needs_parsing(raw, offset)
323 if not do_parse: 354 if not do_parse:
324 seg = ContentSegment() 355 seg = ContentSegment()
325 seg.parts = [ 356 seg.parts = [
326 ContentSegmentPart(raw[offset:], None, offset, current_line)] 357 ContentSegmentPart(raw[offset:], None, offset, current_line)]
327 return {'content': seg} 358 return {'content': seg}
328 359
329 # Start parsing segments and parts. 360 # Start parsing segments and parts.
330 matches = list(segment_pattern.finditer(raw, offset)) 361 matches = list(segment_pattern.finditer(raw, offset))
331 num_matches = len(matches) 362 num_matches = len(matches)
335 first_offset = matches[0].start() 366 first_offset = matches[0].start()
336 if first_offset > 0: 367 if first_offset > 0:
337 # There's some default content segment at the beginning. 368 # There's some default content segment at the beginning.
338 seg = ContentSegment() 369 seg = ContentSegment()
339 seg.parts, current_line = parse_segment_parts( 370 seg.parts, current_line = parse_segment_parts(
340 raw, offset, first_offset, current_line) 371 raw, offset, first_offset, current_line)
341 contents['content'] = seg 372 contents['content'] = seg
342 373
343 for i in range(1, num_matches): 374 for i in range(1, num_matches):
344 m1 = matches[i - 1] 375 m1 = matches[i - 1]
345 m2 = matches[i] 376 m2 = matches[i]
346 seg = ContentSegment() 377 seg = ContentSegment()
347 seg.parts, current_line = parse_segment_parts( 378 seg.parts, current_line = parse_segment_parts(
348 raw, m1.end() + 1, m2.start(), current_line, 379 raw, m1.end() + 1, m2.start(), current_line,
349 m1.group('fmt')) 380 m1.group('fmt'))
350 contents[m1.group('name')] = seg 381 contents[m1.group('name')] = seg
351 382
352 # Handle text past the last match. 383 # Handle text past the last match.
353 lastm = matches[-1] 384 lastm = matches[-1]
354 seg = ContentSegment() 385 seg = ContentSegment()
355 seg.parts, current_line = parse_segment_parts( 386 seg.parts, current_line = parse_segment_parts(
356 raw, lastm.end() + 1, len(raw), current_line, 387 raw, lastm.end() + 1, len(raw), current_line,
357 lastm.group('fmt')) 388 lastm.group('fmt'))
358 contents[lastm.group('name')] = seg 389 contents[lastm.group('name')] = seg
359 390
360 return contents 391 return contents
361 else: 392 else:
362 # No segments, just content. 393 # No segments, just content.
363 seg = ContentSegment() 394 seg = ContentSegment()
364 seg.parts, current_line = parse_segment_parts( 395 seg.parts, current_line = parse_segment_parts(
365 raw, offset, len(raw), current_line) 396 raw, offset, len(raw), current_line)
366 return {'content': seg} 397 return {'content': seg}
367 398
368 399
369 def parse_segment_parts(raw, start, end, line_offset, first_part_fmt=None): 400 def parse_segment_parts(raw, start, end, line_offset, first_part_fmt=None):
370 matches = list(part_pattern.finditer(raw, start, end)) 401 matches = list(part_pattern.finditer(raw, start, end))
373 parts = [] 404 parts = []
374 405
375 # First part, before the first format change. 406 # First part, before the first format change.
376 part_text = raw[start:matches[0].start()] 407 part_text = raw[start:matches[0].start()]
377 parts.append( 408 parts.append(
378 ContentSegmentPart(part_text, first_part_fmt, start, 409 ContentSegmentPart(part_text, first_part_fmt, start,
379 line_offset)) 410 line_offset))
380 line_offset += _count_lines(part_text) 411 line_offset += _count_lines(part_text)
381 412
382 for i in range(1, num_matches): 413 for i in range(1, num_matches):
383 m1 = matches[i - 1] 414 m1 = matches[i - 1]
384 m2 = matches[i] 415 m2 = matches[i]
385 part_text = raw[m1.end() + 1:m2.start()] 416 part_text = raw[m1.end() + 1:m2.start()]
386 parts.append( 417 parts.append(
387 ContentSegmentPart( 418 ContentSegmentPart(
388 part_text, m1.group('fmt'), m1.end() + 1, 419 part_text, m1.group('fmt'), m1.end() + 1,
389 line_offset)) 420 line_offset))
390 line_offset += _count_lines(part_text) 421 line_offset += _count_lines(part_text)
391 422
392 lastm = matches[-1] 423 lastm = matches[-1]
393 part_text = raw[lastm.end() + 1:end] 424 part_text = raw[lastm.end() + 1:end]
394 parts.append(ContentSegmentPart( 425 parts.append(ContentSegmentPart(
395 part_text, lastm.group('fmt'), lastm.end() + 1, 426 part_text, lastm.group('fmt'), lastm.end() + 1,
396 line_offset)) 427 line_offset))
397 428
398 return parts, line_offset 429 return parts, line_offset
399 else: 430 else:
400 part_text = raw[start:end] 431 part_text = raw[start:end]
401 parts = [ContentSegmentPart(part_text, first_part_fmt, start, 432 parts = [ContentSegmentPart(part_text, first_part_fmt, start,