Mercurial > piecrust2
comparison piecrust/page.py @ 853:f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
The asset pipeline is still the only function pipeline at this point.
* No more `QualifiedPage`, and several other pieces of code deleted.
* Data providers are simpler and more focused. For instance, the page iterator
doesn't try to support other types of items.
* Route parameters are proper known source metadata to remove the confusion
between the two.
* Make the baker and pipeline more correctly manage records and record
histories.
* Add support for record collapsing and deleting stale outputs in the asset
pipeline.
| author | Ludovic Chabant <ludovic@chabant.com> |
|---|---|
| date | Sun, 21 May 2017 00:06:59 -0700 |
| parents | 4850f8c21b6e |
| children | 9bb22bbe093c |
comparison
equal
deleted
inserted
replaced
| 852:4850f8c21b6e | 853:f070a4fc033c |
|---|---|
| 1 import re | 1 import re |
| 2 import sys | |
| 3 import json | 2 import json |
| 4 import os.path | |
| 5 import hashlib | 3 import hashlib |
| 6 import logging | 4 import logging |
| 7 import datetime | 5 import datetime |
| 8 import dateutil.parser | 6 import dateutil.parser |
| 9 import collections | 7 import collections |
| 10 from werkzeug.utils import cached_property | 8 from werkzeug.utils import cached_property |
| 11 from piecrust.configuration import ( | 9 from piecrust.configuration import ( |
| 12 Configuration, ConfigurationError, | 10 Configuration, ConfigurationError, |
| 13 parse_config_header) | 11 parse_config_header, |
| 12 MERGE_PREPEND_LISTS) | |
| 14 | 13 |
| 15 | 14 |
| 16 logger = logging.getLogger(__name__) | 15 logger = logging.getLogger(__name__) |
| 17 | 16 |
| 18 | 17 |
| 38 | 37 |
| 39 class PageNotFoundError(Exception): | 38 class PageNotFoundError(Exception): |
| 40 pass | 39 pass |
| 41 | 40 |
| 42 | 41 |
| 43 class QualifiedPage(object): | 42 class Page: |
| 44 def __init__(self, page, route, route_params, *, page_num=1): | 43 """ Represents a page that is text content with an optional YAML |
| 45 self.page = page | 44 front-matter, and that goes through the page pipeline. |
| 46 self.page_num = page_num | 45 """ |
| 47 self.route = route | 46 def __init__(self, source, content_item): |
| 48 self.route_params = route_params | 47 self.source = source |
| 49 | |
| 50 @property | |
| 51 def app(self): | |
| 52 return self.page.app | |
| 53 | |
| 54 @property | |
| 55 def source(self): | |
| 56 return self.page.source | |
| 57 | |
| 58 @cached_property | |
| 59 def uri(self): | |
| 60 return self.route.getUri(self.route_params, self.page_num) | |
| 61 | |
| 62 def getSubPage(self, page_num): | |
| 63 return QualifiedPage(self.page, self.route, self.route_params, | |
| 64 page_num=self.page_num + 1) | |
| 65 | |
| 66 | |
| 67 class Page(object): | |
| 68 def __init__(self, content_item): | |
| 69 self.content_item = content_item | 48 self.content_item = content_item |
| 70 self._config = None | 49 self._config = None |
| 71 self._segments = None | 50 self._segments = None |
| 72 self._flags = FLAG_NONE | 51 self._flags = FLAG_NONE |
| 73 self._datetime = None | 52 self._datetime = None |
| 74 | 53 |
| 75 @property | 54 @cached_property |
| 76 def source(self): | 55 def app(self): |
| 77 return self.content_item.source | 56 return self.source.app |
| 57 | |
| 58 @cached_property | |
| 59 def route(self): | |
| 60 return self.source.route | |
| 78 | 61 |
| 79 @property | 62 @property |
| 80 def source_metadata(self): | 63 def source_metadata(self): |
| 81 return self.content_item.metadata | 64 return self.content_item.metadata |
| 82 | 65 |
| 83 @property | 66 @property |
| 84 def content_spec(self): | 67 def content_spec(self): |
| 85 return self.content_item.spec | 68 return self.content_item.spec |
| 86 | 69 |
| 87 @property | |
| 88 def app(self): | |
| 89 return self.content_item.source.app | |
| 90 | |
| 91 @cached_property | 70 @cached_property |
| 92 def content_mtime(self): | 71 def content_mtime(self): |
| 93 return self.content_item.getmtime() | 72 return self.source.getItemMtime(self.content_item) |
| 94 | 73 |
| 95 @property | 74 @property |
| 96 def flags(self): | 75 def flags(self): |
| 97 return self._flags | 76 return self._flags |
| 98 | 77 |
| 108 | 87 |
| 109 @property | 88 @property |
| 110 def datetime(self): | 89 def datetime(self): |
| 111 if self._datetime is None: | 90 if self._datetime is None: |
| 112 try: | 91 try: |
| 113 if 'datetime' in self.source_metadata: | 92 self._datetime = self._computeDateTime() |
| 114 # Get the date/time from the source. | |
| 115 self._datetime = self.source_metadata['datetime'] | |
| 116 elif 'date' in self.source_metadata: | |
| 117 # Get the date from the source. Potentially get the | |
| 118 # time from the page config. | |
| 119 page_date = self.source_metadata['date'] | |
| 120 page_time = _parse_config_time(self.config.get('time')) | |
| 121 if page_time is not None: | |
| 122 self._datetime = datetime.datetime( | |
| 123 page_date.year, | |
| 124 page_date.month, | |
| 125 page_date.day) + page_time | |
| 126 else: | |
| 127 self._datetime = datetime.datetime( | |
| 128 page_date.year, page_date.month, page_date.day) | |
| 129 elif 'date' in self.config: | |
| 130 # Get the date from the page config, and maybe the | |
| 131 # time too. | |
| 132 page_date = _parse_config_date(self.config.get('date')) | |
| 133 self._datetime = datetime.datetime( | |
| 134 page_date.year, | |
| 135 page_date.month, | |
| 136 page_date.day) | |
| 137 page_time = _parse_config_time(self.config.get('time')) | |
| 138 if page_time is not None: | |
| 139 self._datetime += page_time | |
| 140 else: | |
| 141 # No idea what the date/time for this page is. | |
| 142 self._datetime = datetime.datetime.fromtimestamp(0) | |
| 143 except Exception as ex: | 93 except Exception as ex: |
| 144 logger.exception(ex) | 94 logger.exception(ex) |
| 145 raise Exception( | 95 raise Exception( |
| 146 "Error computing time for page: %s" % | 96 "Error computing time for page: %s" % |
| 147 self.path) from ex | 97 self.content_spec) from ex |
| 98 | |
| 99 if self._datetime is None: | |
| 100 self._datetime = datetime.datetime.fromtimestamp( | |
| 101 self.content_mtime) | |
| 102 | |
| 148 return self._datetime | 103 return self._datetime |
| 149 | 104 |
| 150 @datetime.setter | 105 @datetime.setter |
| 151 def datetime(self, value): | 106 def datetime(self, value): |
| 152 self._datetime = value | 107 self._datetime = value |
| 153 | 108 |
| 109 def getUri(self, sub_num=1): | |
| 110 route_params = self.source_metadata['route_params'] | |
| 111 return self.route.getUri(route_params, sub_num=sub_num) | |
| 112 | |
| 154 def getSegment(self, name='content'): | 113 def getSegment(self, name='content'): |
| 155 return self.segments[name] | 114 return self.segments[name] |
| 115 | |
| 116 def _computeDateTime(self): | |
| 117 if 'datetime' in self.source_metadata: | |
| 118 # Get the date/time from the source. | |
| 119 self._datetime = self.source_metadata['datetime'] | |
| 120 elif 'date' in self.source_metadata: | |
| 121 # Get the date from the source. Potentially get the | |
| 122 # time from the page config. | |
| 123 page_date = self.source_metadata['date'] | |
| 124 page_time = _parse_config_time(self.config.get('time')) | |
| 125 if page_time is not None: | |
| 126 self._datetime = datetime.datetime( | |
| 127 page_date.year, | |
| 128 page_date.month, | |
| 129 page_date.day) + page_time | |
| 130 else: | |
| 131 self._datetime = datetime.datetime( | |
| 132 page_date.year, page_date.month, page_date.day) | |
| 133 elif 'date' in self.config: | |
| 134 # Get the date from the page config, and maybe the | |
| 135 # time too. | |
| 136 page_date = _parse_config_date(self.config.get('date')) | |
| 137 self._datetime = datetime.datetime( | |
| 138 page_date.year, | |
| 139 page_date.month, | |
| 140 page_date.day) | |
| 141 page_time = _parse_config_time(self.config.get('time')) | |
| 142 if page_time is not None: | |
| 143 self._datetime += page_time | |
| 144 else: | |
| 145 # No idea what the date/time for this page is. | |
| 146 self._datetime = datetime.datetime.fromtimestamp(0) | |
| 156 | 147 |
| 157 def _load(self): | 148 def _load(self): |
| 158 if self._config is not None: | 149 if self._config is not None: |
| 159 return | 150 return |
| 160 | 151 |
| 161 config, content, was_cache_valid = load_page( | 152 config, content, was_cache_valid = load_page( |
| 162 self.app, self.path, self.path_mtime) | 153 self.source, self.content_item) |
| 163 | 154 |
| 164 if 'config' in self.source_metadata: | 155 extra_config = self.source_metadata.get('config') |
| 165 config.merge(self.source_metadata['config']) | 156 if extra_config is not None: |
| 157 # Merge the source metadata configuration settings with the | |
| 158 # configuration settings from the page's contents. We only | |
| 159 # prepend to lists, i.e. we don't overwrite values because we | |
| 160 # want to keep what the user wrote in the file. | |
| 161 config.merge(extra_config, mode=MERGE_PREPEND_LISTS) | |
| 166 | 162 |
| 167 self._config = config | 163 self._config = config |
| 168 self._segments = content | 164 self._segments = content |
| 169 if was_cache_valid: | 165 if was_cache_valid: |
| 170 self._flags |= FLAG_RAW_CACHE_VALID | 166 self._flags |= FLAG_RAW_CACHE_VALID |
| 171 | |
| 172 self.source.finalizeConfig(self) | |
| 173 | 167 |
| 174 | 168 |
| 175 def _parse_config_date(page_date): | 169 def _parse_config_date(page_date): |
| 176 if page_date is None: | 170 if page_date is None: |
| 177 return None | 171 return None |
| 214 | 208 |
| 215 raise ConfigurationError("Invalid time: %s" % page_time) | 209 raise ConfigurationError("Invalid time: %s" % page_time) |
| 216 | 210 |
| 217 | 211 |
| 218 class PageLoadingError(Exception): | 212 class PageLoadingError(Exception): |
| 219 def __init__(self, path, inner=None): | 213 def __init__(self, spec): |
| 220 super(PageLoadingError, self).__init__( | 214 super().__init__("Error loading page: %s" % spec) |
| 221 "Error loading page: %s" % path, | |
| 222 inner) | |
| 223 | 215 |
| 224 | 216 |
| 225 class ContentSegment(object): | 217 class ContentSegment(object): |
| 226 debug_render_func = 'debug_render' | 218 debug_render_func = 'debug_render' |
| 227 | 219 |
| 265 seg_data.append(p_data) | 257 seg_data.append(p_data) |
| 266 data[key] = seg_data | 258 data[key] = seg_data |
| 267 return data | 259 return data |
| 268 | 260 |
| 269 | 261 |
| 270 def load_page(app, path, path_mtime=None): | 262 def load_page(source, content_item): |
| 271 try: | 263 try: |
| 272 with app.env.timerScope('PageLoad'): | 264 with source.app.env.stats.timerScope('PageLoad'): |
| 273 return _do_load_page(app, path, path_mtime) | 265 return _do_load_page(source, content_item) |
| 274 except Exception as e: | 266 except Exception as e: |
| 275 logger.exception( | 267 logger.exception("Error loading page: %s" % content_item.spec) |
| 276 "Error loading page: %s" % | 268 raise PageLoadingError(content_item.spec) from e |
| 277 os.path.relpath(path, app.root_dir)) | 269 |
| 278 _, __, traceback = sys.exc_info() | 270 |
| 279 raise PageLoadingError(path, e).with_traceback(traceback) | 271 def _do_load_page(source, content_item): |
| 280 | |
| 281 | |
| 282 def _do_load_page(app, path, path_mtime): | |
| 283 # Check the cache first. | 272 # Check the cache first. |
| 273 app = source.app | |
| 284 cache = app.cache.getCache('pages') | 274 cache = app.cache.getCache('pages') |
| 285 cache_path = hashlib.md5(path.encode('utf8')).hexdigest() + '.json' | 275 cache_token = "%s@%s" % (source.name, content_item.spec) |
| 286 page_time = path_mtime or os.path.getmtime(path) | 276 cache_path = hashlib.md5(cache_token.encode('utf8')).hexdigest() + '.json' |
| 277 page_time = source.getItemMtime(content_item) | |
| 287 if cache.isValid(cache_path, page_time): | 278 if cache.isValid(cache_path, page_time): |
| 288 cache_data = json.loads( | 279 cache_data = json.loads( |
| 289 cache.read(cache_path), | 280 cache.read(cache_path), |
| 290 object_pairs_hook=collections.OrderedDict) | 281 object_pairs_hook=collections.OrderedDict) |
| 291 config = PageConfiguration( | 282 config = PageConfiguration( |
| 293 validate=False) | 284 validate=False) |
| 294 content = json_load_segments(cache_data['content']) | 285 content = json_load_segments(cache_data['content']) |
| 295 return config, content, True | 286 return config, content, True |
| 296 | 287 |
| 297 # Nope, load the page from the source file. | 288 # Nope, load the page from the source file. |
| 298 logger.debug("Loading page configuration from: %s" % path) | 289 logger.debug("Loading page configuration from: %s" % content_item.spec) |
| 299 with open(path, 'r', encoding='utf-8') as fp: | 290 with source.openItem(content_item, 'r', encoding='utf-8') as fp: |
| 300 raw = fp.read() | 291 raw = fp.read() |
| 301 header, offset = parse_config_header(raw) | 292 header, offset = parse_config_header(raw) |
| 302 | |
| 303 if 'format' not in header: | |
| 304 auto_formats = app.config.get('site/auto_formats') | |
| 305 name, ext = os.path.splitext(path) | |
| 306 header['format'] = auto_formats.get(ext, None) | |
| 307 | 293 |
| 308 config = PageConfiguration(header) | 294 config = PageConfiguration(header) |
| 309 content = parse_segments(raw, offset) | 295 content = parse_segments(raw, offset) |
| 310 config.set('segments', list(content.keys())) | 296 config.set('segments', list(content.keys())) |
| 311 | 297 |
