comparison piecrust/page.py @ 853:f070a4fc033c

core: Continue PieCrust3 refactor, simplify pages. The asset pipeline is still the only function pipeline at this point. * No more `QualifiedPage`, and several other pieces of code deleted. * Data providers are simpler and more focused. For instance, the page iterator doesn't try to support other types of items. * Route parameters are proper known source metadata to remove the confusion between the two. * Make the baker and pipeline more correctly manage records and record histories. * Add support for record collapsing and deleting stale outputs in the asset pipeline.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 21 May 2017 00:06:59 -0700
parents 4850f8c21b6e
children 9bb22bbe093c
comparison
equal deleted inserted replaced
852:4850f8c21b6e 853:f070a4fc033c
1 import re 1 import re
2 import sys
3 import json 2 import json
4 import os.path
5 import hashlib 3 import hashlib
6 import logging 4 import logging
7 import datetime 5 import datetime
8 import dateutil.parser 6 import dateutil.parser
9 import collections 7 import collections
10 from werkzeug.utils import cached_property 8 from werkzeug.utils import cached_property
11 from piecrust.configuration import ( 9 from piecrust.configuration import (
12 Configuration, ConfigurationError, 10 Configuration, ConfigurationError,
13 parse_config_header) 11 parse_config_header,
12 MERGE_PREPEND_LISTS)
14 13
15 14
16 logger = logging.getLogger(__name__) 15 logger = logging.getLogger(__name__)
17 16
18 17
38 37
39 class PageNotFoundError(Exception): 38 class PageNotFoundError(Exception):
40 pass 39 pass
41 40
42 41
43 class QualifiedPage(object): 42 class Page:
44 def __init__(self, page, route, route_params, *, page_num=1): 43 """ Represents a page that is text content with an optional YAML
45 self.page = page 44 front-matter, and that goes through the page pipeline.
46 self.page_num = page_num 45 """
47 self.route = route 46 def __init__(self, source, content_item):
48 self.route_params = route_params 47 self.source = source
49
50 @property
51 def app(self):
52 return self.page.app
53
54 @property
55 def source(self):
56 return self.page.source
57
58 @cached_property
59 def uri(self):
60 return self.route.getUri(self.route_params, self.page_num)
61
62 def getSubPage(self, page_num):
63 return QualifiedPage(self.page, self.route, self.route_params,
64 page_num=self.page_num + 1)
65
66
67 class Page(object):
68 def __init__(self, content_item):
69 self.content_item = content_item 48 self.content_item = content_item
70 self._config = None 49 self._config = None
71 self._segments = None 50 self._segments = None
72 self._flags = FLAG_NONE 51 self._flags = FLAG_NONE
73 self._datetime = None 52 self._datetime = None
74 53
75 @property 54 @cached_property
76 def source(self): 55 def app(self):
77 return self.content_item.source 56 return self.source.app
57
58 @cached_property
59 def route(self):
60 return self.source.route
78 61
79 @property 62 @property
80 def source_metadata(self): 63 def source_metadata(self):
81 return self.content_item.metadata 64 return self.content_item.metadata
82 65
83 @property 66 @property
84 def content_spec(self): 67 def content_spec(self):
85 return self.content_item.spec 68 return self.content_item.spec
86 69
87 @property
88 def app(self):
89 return self.content_item.source.app
90
91 @cached_property 70 @cached_property
92 def content_mtime(self): 71 def content_mtime(self):
93 return self.content_item.getmtime() 72 return self.source.getItemMtime(self.content_item)
94 73
95 @property 74 @property
96 def flags(self): 75 def flags(self):
97 return self._flags 76 return self._flags
98 77
108 87
109 @property 88 @property
110 def datetime(self): 89 def datetime(self):
111 if self._datetime is None: 90 if self._datetime is None:
112 try: 91 try:
113 if 'datetime' in self.source_metadata: 92 self._datetime = self._computeDateTime()
114 # Get the date/time from the source.
115 self._datetime = self.source_metadata['datetime']
116 elif 'date' in self.source_metadata:
117 # Get the date from the source. Potentially get the
118 # time from the page config.
119 page_date = self.source_metadata['date']
120 page_time = _parse_config_time(self.config.get('time'))
121 if page_time is not None:
122 self._datetime = datetime.datetime(
123 page_date.year,
124 page_date.month,
125 page_date.day) + page_time
126 else:
127 self._datetime = datetime.datetime(
128 page_date.year, page_date.month, page_date.day)
129 elif 'date' in self.config:
130 # Get the date from the page config, and maybe the
131 # time too.
132 page_date = _parse_config_date(self.config.get('date'))
133 self._datetime = datetime.datetime(
134 page_date.year,
135 page_date.month,
136 page_date.day)
137 page_time = _parse_config_time(self.config.get('time'))
138 if page_time is not None:
139 self._datetime += page_time
140 else:
141 # No idea what the date/time for this page is.
142 self._datetime = datetime.datetime.fromtimestamp(0)
143 except Exception as ex: 93 except Exception as ex:
144 logger.exception(ex) 94 logger.exception(ex)
145 raise Exception( 95 raise Exception(
146 "Error computing time for page: %s" % 96 "Error computing time for page: %s" %
147 self.path) from ex 97 self.content_spec) from ex
98
99 if self._datetime is None:
100 self._datetime = datetime.datetime.fromtimestamp(
101 self.content_mtime)
102
148 return self._datetime 103 return self._datetime
149 104
150 @datetime.setter 105 @datetime.setter
151 def datetime(self, value): 106 def datetime(self, value):
152 self._datetime = value 107 self._datetime = value
153 108
109 def getUri(self, sub_num=1):
110 route_params = self.source_metadata['route_params']
111 return self.route.getUri(route_params, sub_num=sub_num)
112
154 def getSegment(self, name='content'): 113 def getSegment(self, name='content'):
155 return self.segments[name] 114 return self.segments[name]
115
116 def _computeDateTime(self):
117 if 'datetime' in self.source_metadata:
118 # Get the date/time from the source.
119 self._datetime = self.source_metadata['datetime']
120 elif 'date' in self.source_metadata:
121 # Get the date from the source. Potentially get the
122 # time from the page config.
123 page_date = self.source_metadata['date']
124 page_time = _parse_config_time(self.config.get('time'))
125 if page_time is not None:
126 self._datetime = datetime.datetime(
127 page_date.year,
128 page_date.month,
129 page_date.day) + page_time
130 else:
131 self._datetime = datetime.datetime(
132 page_date.year, page_date.month, page_date.day)
133 elif 'date' in self.config:
134 # Get the date from the page config, and maybe the
135 # time too.
136 page_date = _parse_config_date(self.config.get('date'))
137 self._datetime = datetime.datetime(
138 page_date.year,
139 page_date.month,
140 page_date.day)
141 page_time = _parse_config_time(self.config.get('time'))
142 if page_time is not None:
143 self._datetime += page_time
144 else:
145 # No idea what the date/time for this page is.
146 self._datetime = datetime.datetime.fromtimestamp(0)
156 147
157 def _load(self): 148 def _load(self):
158 if self._config is not None: 149 if self._config is not None:
159 return 150 return
160 151
161 config, content, was_cache_valid = load_page( 152 config, content, was_cache_valid = load_page(
162 self.app, self.path, self.path_mtime) 153 self.source, self.content_item)
163 154
164 if 'config' in self.source_metadata: 155 extra_config = self.source_metadata.get('config')
165 config.merge(self.source_metadata['config']) 156 if extra_config is not None:
157 # Merge the source metadata configuration settings with the
158 # configuration settings from the page's contents. We only
159 # prepend to lists, i.e. we don't overwrite values because we
160 # want to keep what the user wrote in the file.
161 config.merge(extra_config, mode=MERGE_PREPEND_LISTS)
166 162
167 self._config = config 163 self._config = config
168 self._segments = content 164 self._segments = content
169 if was_cache_valid: 165 if was_cache_valid:
170 self._flags |= FLAG_RAW_CACHE_VALID 166 self._flags |= FLAG_RAW_CACHE_VALID
171
172 self.source.finalizeConfig(self)
173 167
174 168
175 def _parse_config_date(page_date): 169 def _parse_config_date(page_date):
176 if page_date is None: 170 if page_date is None:
177 return None 171 return None
214 208
215 raise ConfigurationError("Invalid time: %s" % page_time) 209 raise ConfigurationError("Invalid time: %s" % page_time)
216 210
217 211
218 class PageLoadingError(Exception): 212 class PageLoadingError(Exception):
219 def __init__(self, path, inner=None): 213 def __init__(self, spec):
220 super(PageLoadingError, self).__init__( 214 super().__init__("Error loading page: %s" % spec)
221 "Error loading page: %s" % path,
222 inner)
223 215
224 216
225 class ContentSegment(object): 217 class ContentSegment(object):
226 debug_render_func = 'debug_render' 218 debug_render_func = 'debug_render'
227 219
265 seg_data.append(p_data) 257 seg_data.append(p_data)
266 data[key] = seg_data 258 data[key] = seg_data
267 return data 259 return data
268 260
269 261
270 def load_page(app, path, path_mtime=None): 262 def load_page(source, content_item):
271 try: 263 try:
272 with app.env.timerScope('PageLoad'): 264 with source.app.env.stats.timerScope('PageLoad'):
273 return _do_load_page(app, path, path_mtime) 265 return _do_load_page(source, content_item)
274 except Exception as e: 266 except Exception as e:
275 logger.exception( 267 logger.exception("Error loading page: %s" % content_item.spec)
276 "Error loading page: %s" % 268 raise PageLoadingError(content_item.spec) from e
277 os.path.relpath(path, app.root_dir)) 269
278 _, __, traceback = sys.exc_info() 270
279 raise PageLoadingError(path, e).with_traceback(traceback) 271 def _do_load_page(source, content_item):
280
281
282 def _do_load_page(app, path, path_mtime):
283 # Check the cache first. 272 # Check the cache first.
273 app = source.app
284 cache = app.cache.getCache('pages') 274 cache = app.cache.getCache('pages')
285 cache_path = hashlib.md5(path.encode('utf8')).hexdigest() + '.json' 275 cache_token = "%s@%s" % (source.name, content_item.spec)
286 page_time = path_mtime or os.path.getmtime(path) 276 cache_path = hashlib.md5(cache_token.encode('utf8')).hexdigest() + '.json'
277 page_time = source.getItemMtime(content_item)
287 if cache.isValid(cache_path, page_time): 278 if cache.isValid(cache_path, page_time):
288 cache_data = json.loads( 279 cache_data = json.loads(
289 cache.read(cache_path), 280 cache.read(cache_path),
290 object_pairs_hook=collections.OrderedDict) 281 object_pairs_hook=collections.OrderedDict)
291 config = PageConfiguration( 282 config = PageConfiguration(
293 validate=False) 284 validate=False)
294 content = json_load_segments(cache_data['content']) 285 content = json_load_segments(cache_data['content'])
295 return config, content, True 286 return config, content, True
296 287
297 # Nope, load the page from the source file. 288 # Nope, load the page from the source file.
298 logger.debug("Loading page configuration from: %s" % path) 289 logger.debug("Loading page configuration from: %s" % content_item.spec)
299 with open(path, 'r', encoding='utf-8') as fp: 290 with source.openItem(content_item, 'r', encoding='utf-8') as fp:
300 raw = fp.read() 291 raw = fp.read()
301 header, offset = parse_config_header(raw) 292 header, offset = parse_config_header(raw)
302
303 if 'format' not in header:
304 auto_formats = app.config.get('site/auto_formats')
305 name, ext = os.path.splitext(path)
306 header['format'] = auto_formats.get(ext, None)
307 293
308 config = PageConfiguration(header) 294 config = PageConfiguration(header)
309 content = parse_segments(raw, offset) 295 content = parse_segments(raw, offset)
310 config.set('segments', list(content.keys())) 296 config.set('segments', list(content.keys()))
311 297