Mercurial > piecrust2
view piecrust/page.py @ 803:bf9f4e55f751
rendering: Separate performance timers for renering segments and layouts.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Thu, 29 Sep 2016 21:59:20 -0700 |
parents | 58ebf50235a5 |
children | e01473c3ea7e |
line wrap: on
line source
import re import sys import json import os.path import hashlib import logging import datetime import dateutil.parser import collections from werkzeug.utils import cached_property from piecrust.configuration import ( Configuration, ConfigurationError, parse_config_header) logger = logging.getLogger(__name__) class PageConfiguration(Configuration): def __init__(self, values=None, validate=True): super(PageConfiguration, self).__init__(values, validate) def _validateAll(self, values): values.setdefault('title', 'Untitled Page') values.setdefault('content_type', 'html') ppp = values.get('posts_per_page') if ppp is not None: values.setdefault('items_per_page', ppp) pf = values.get('posts_filters') if pf is not None: values.setdefault('items_filters', pf) return values FLAG_NONE = 0 FLAG_RAW_CACHE_VALID = 2**0 class Page(object): def __init__(self, source, source_metadata, rel_path): self.source = source self.source_metadata = source_metadata self.rel_path = rel_path self._config = None self._segments = None self._flags = FLAG_NONE self._datetime = None @property def app(self): return self.source.app @property def ref_spec(self): return '%s:%s' % (self.source.name, self.rel_path) @cached_property def path(self): path, _ = self.source.resolveRef(self.rel_path) return path @cached_property def path_mtime(self): return os.path.getmtime(self.path) @property def flags(self): return self._flags @property def config(self): self._load() return self._config @property def segments(self): self._load() return self._segments @property def datetime(self): if self._datetime is None: try: if 'datetime' in self.source_metadata: # Get the date/time from the source. self._datetime = self.source_metadata['datetime'] elif 'date' in self.source_metadata: # Get the date from the source. Potentially get the # time from the page config. page_date = self.source_metadata['date'] page_time = _parse_config_time(self.config.get('time')) if page_time is not None: self._datetime = datetime.datetime( page_date.year, page_date.month, page_date.day) + page_time else: self._datetime = datetime.datetime( page_date.year, page_date.month, page_date.day) elif 'date' in self.config: # Get the date from the page config, and maybe the # time too. page_date = _parse_config_date(self.config.get('date')) self._datetime = datetime.datetime( page_date.year, page_date.month, page_date.day) page_time = _parse_config_time(self.config.get('time')) if page_time is not None: self._datetime += page_time else: # No idea what the date/time for this page is. self._datetime = datetime.datetime.fromtimestamp(0) except Exception as ex: logger.exception(ex) raise Exception( "Error computing time for page: %s" % self.path) from ex return self._datetime @datetime.setter def datetime(self, value): self._datetime = value def getSegment(self, name='content'): return self.segments[name] def _load(self): if self._config is not None: return config, content, was_cache_valid = load_page(self.app, self.path, self.path_mtime) if 'config' in self.source_metadata: config.merge(self.source_metadata['config']) self._config = config self._segments = content if was_cache_valid: self._flags |= FLAG_RAW_CACHE_VALID def _parse_config_date(page_date): if page_date is None: return None if isinstance(page_date, str): try: parsed_d = dateutil.parser.parse(page_date) except Exception as ex: logger.exception(ex) raise ConfigurationError("Invalid date: %s" % page_date) from ex return datetime.date( year=parsed_d.year, month=parsed_d.month, day=parsed_d.day) raise ConfigurationError("Invalid date: %s" % page_date) def _parse_config_time(page_time): if page_time is None: return None if isinstance(page_time, datetime.timedelta): return page_time if isinstance(page_time, str): try: parsed_t = dateutil.parser.parse(page_time) except Exception as ex: logger.exception(ex) raise ConfigurationError("Invalid time: %s" % page_time) from ex return datetime.timedelta( hours=parsed_t.hour, minutes=parsed_t.minute, seconds=parsed_t.second) if isinstance(page_time, int): # Total seconds... convert to a time struct. return datetime.timedelta(seconds=page_time) raise ConfigurationError("Invalid time: %s" % page_time) class PageLoadingError(Exception): def __init__(self, path, inner=None): super(PageLoadingError, self).__init__( "Error loading page: %s" % path, inner) class ContentSegment(object): debug_render_func = 'debug_render' def __init__(self): self.parts = [] def debug_render(self): return '\n'.join([p.content for p in self.parts]) class ContentSegmentPart(object): def __init__(self, content, fmt=None, offset=-1, line=-1): self.content = content self.fmt = fmt self.offset = offset self.line = line def __str__(self): return '%s [%s]' % (self.content, self.fmt or '<default>') def json_load_segments(data): segments = {} for key, seg_data in data.items(): seg = ContentSegment() for p_data in seg_data: part = ContentSegmentPart(p_data['c'], p_data['f'], p_data['o'], p_data['l']) seg.parts.append(part) segments[key] = seg return segments def json_save_segments(segments): data = {} for key, seg in segments.items(): seg_data = [] for part in seg.parts: p_data = {'c': part.content, 'f': part.fmt, 'o': part.offset, 'l': part.line} seg_data.append(p_data) data[key] = seg_data return data def load_page(app, path, path_mtime=None): try: with app.env.timerScope('PageLoad'): return _do_load_page(app, path, path_mtime) except Exception as e: logger.exception( "Error loading page: %s" % os.path.relpath(path, app.root_dir)) _, __, traceback = sys.exc_info() raise PageLoadingError(path, e).with_traceback(traceback) def _do_load_page(app, path, path_mtime): # Check the cache first. cache = app.cache.getCache('pages') cache_path = hashlib.md5(path.encode('utf8')).hexdigest() + '.json' page_time = path_mtime or os.path.getmtime(path) if cache.isValid(cache_path, page_time): cache_data = json.loads( cache.read(cache_path), object_pairs_hook=collections.OrderedDict) config = PageConfiguration( values=cache_data['config'], validate=False) content = json_load_segments(cache_data['content']) return config, content, True # Nope, load the page from the source file. logger.debug("Loading page configuration from: %s" % path) with open(path, 'r', encoding='utf-8') as fp: raw = fp.read() header, offset = parse_config_header(raw) if 'format' not in header: auto_formats = app.config.get('site/auto_formats') name, ext = os.path.splitext(path) header['format'] = auto_formats.get(ext, None) config = PageConfiguration(header) content = parse_segments(raw, offset) config.set('segments', list(content.keys())) # Save to the cache. cache_data = { 'config': config.getAll(), 'content': json_save_segments(content)} cache.write(cache_path, json.dumps(cache_data)) return config, content, False segment_pattern = re.compile( r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""", re.M) part_pattern = re.compile( r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""", re.M) def _count_lines(s): return len(s.split('\n')) def _string_needs_parsing(txt, offset): txtlen = len(txt) index = txt.find('-', offset) while index >= 0 and index < txtlen - 8: # Look for a potential `<--format-->` if index > 0 and txt[index - 1] == '<' and txt[index + 1] == '-': return True # Look for a potential `---segment---` if txt[index + 1] == '-' and txt[index + 2] == '-': return True index = txt.find('-', index + 1) return False def parse_segments(raw, offset=0): # Get the number of lines in the header. header_lines = _count_lines(raw[:offset].rstrip()) current_line = header_lines # Figure out if we need any parsing. do_parse = _string_needs_parsing(raw, offset) if not do_parse: seg = ContentSegment() seg.parts = [ ContentSegmentPart(raw[offset:], None, offset, current_line)] return {'content': seg} # Start parsing segments and parts. matches = list(segment_pattern.finditer(raw, offset)) num_matches = len(matches) if num_matches > 0: contents = {} first_offset = matches[0].start() if first_offset > 0: # There's some default content segment at the beginning. seg = ContentSegment() seg.parts, current_line = parse_segment_parts( raw, offset, first_offset, current_line) contents['content'] = seg for i in range(1, num_matches): m1 = matches[i - 1] m2 = matches[i] seg = ContentSegment() seg.parts, current_line = parse_segment_parts( raw, m1.end() + 1, m2.start(), current_line, m1.group('fmt')) contents[m1.group('name')] = seg # Handle text past the last match. lastm = matches[-1] seg = ContentSegment() seg.parts, current_line = parse_segment_parts( raw, lastm.end() + 1, len(raw), current_line, lastm.group('fmt')) contents[lastm.group('name')] = seg return contents else: # No segments, just content. seg = ContentSegment() seg.parts, current_line = parse_segment_parts( raw, offset, len(raw), current_line) return {'content': seg} def parse_segment_parts(raw, start, end, line_offset, first_part_fmt=None): matches = list(part_pattern.finditer(raw, start, end)) num_matches = len(matches) if num_matches > 0: parts = [] # First part, before the first format change. part_text = raw[start:matches[0].start()] parts.append( ContentSegmentPart(part_text, first_part_fmt, start, line_offset)) line_offset += _count_lines(part_text) for i in range(1, num_matches): m1 = matches[i - 1] m2 = matches[i] part_text = raw[m1.end() + 1:m2.start()] parts.append( ContentSegmentPart( part_text, m1.group('fmt'), m1.end() + 1, line_offset)) line_offset += _count_lines(part_text) lastm = matches[-1] part_text = raw[lastm.end() + 1:end] parts.append(ContentSegmentPart( part_text, lastm.group('fmt'), lastm.end() + 1, line_offset)) return parts, line_offset else: part_text = raw[start:end] parts = [ContentSegmentPart(part_text, first_part_fmt, start, line_offset)] return parts, line_offset