comparison piecrust/page.py @ 3:f485ba500df3

Gigantic change to basically make PieCrust 2 vaguely functional. - Serving works, with debug window. - Baking works, multi-threading, with dependency handling. - Various things not implemented yet.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 10 Aug 2014 23:43:16 -0700
parents
children 474c9882decf
comparison
equal deleted inserted replaced
2:40fa08b261b9 3:f485ba500df3
1 import re
2 import sys
3 import json
4 import codecs
5 import os.path
6 import hashlib
7 import logging
8 import datetime
9 import dateutil.parser
10 import threading
11 from piecrust.configuration import (Configuration, ConfigurationError,
12 parse_config_header)
13 from piecrust.environment import PHASE_PAGE_PARSING
14
15
16 logger = logging.getLogger(__name__)
17
18
19 class PageConfiguration(Configuration):
20 def __init__(self, values=None, validate=True):
21 super(PageConfiguration, self).__init__(values, validate)
22
23 def _validateAll(self, values):
24 values.setdefault('title', 'Untitled Page')
25 values.setdefault('content_type', 'html')
26 ppp = values.get('posts_per_page')
27 if ppp is not None:
28 values.setdefault('items_per_page', ppp)
29 pf = values.get('posts_filters')
30 if pf is not None:
31 values.setdefault('items_filters', pf)
32 return values
33
34
35 class Page(object):
36 def __init__(self, source, source_metadata, rel_path):
37 self.source = source
38 self.source_metadata = source_metadata
39 self.rel_path = rel_path
40 self.path = source.resolveRef(rel_path)
41 self._config = None
42 self._raw_content = None
43 self._datetime = None
44
45 @property
46 def app(self):
47 return self.source.app
48
49 @property
50 def ref_spec(self):
51 return '%s:%s' % (self.source.name, self.rel_path)
52
53 @property
54 def config(self):
55 self._load()
56 return self._config
57
58 @property
59 def raw_content(self):
60 self._load()
61 return self._raw_content
62
63 @property
64 def datetime(self):
65 if self._datetime is None:
66 if 'datetime' in self.source_metadata:
67 self._datetime = self.source_metadata['datetime']
68 elif 'date' in self.source_metadata:
69 page_date = self.source_metadata['date']
70 timestr = self.config.get('time')
71 if timestr is not None:
72 try:
73 time_dt = dateutil.parser.parse(timestr)
74 except Exception as e:
75 raise ConfigurationError(
76 "Invalid time '%s' in page: %s" %
77 (timestr, self.path), e)
78 page_time = datetime.time(time_dt.hour, time_dt.minute, time_dt.second)
79 else:
80 page_time = datetime.time(0, 0, 0)
81 self._datetime = datetime.datetime.combine(page_date, page_time)
82 else:
83 self._datetime = datetime.datetime.fromtimestamp(os.path.getmtime(self.path))
84 return self._datetime
85
86 @datetime.setter
87 def datetime(self, value):
88 self._datetime = value
89
90 def getSegment(self, name='content'):
91 return self.raw_content[name]
92
93 def _load(self):
94 if self._config is not None:
95 return
96
97 eis = self.app.env.exec_info_stack
98 eis.pushPage(self, PHASE_PAGE_PARSING, None)
99 try:
100 config, content = load_page(self.app, self.path)
101 self._config = config
102 self._raw_content = content
103 finally:
104 eis.popPage()
105
106
107 class PageLoadingError(Exception):
108 def __init__(self, path, inner=None):
109 super(PageLoadingError, self).__init__(
110 "Error loading page: %s" % path,
111 inner)
112
113
114 class ContentSegment(object):
115 debug_render_func = 'debug_render'
116
117 def __init__(self, content=None, fmt=None):
118 self.parts = []
119 if content is not None:
120 self.parts.append(ContentSegmentPart(content, fmt))
121
122 def debug_render(self):
123 return '\n'.join([p.content for p in self.parts])
124
125
126 class ContentSegmentPart(object):
127 def __init__(self, content, fmt=None, line=-1):
128 self.content = content
129 self.fmt = fmt
130 self.line = line
131
132 def __str__(self):
133 return '%s [%s]' % (self.content, self.fmt or '<default>')
134
135
136 def json_load_segments(data):
137 segments = {}
138 for key, seg_data in data.iteritems():
139 seg = ContentSegment()
140 for p_data in seg_data:
141 part = ContentSegmentPart(p_data['c'], p_data['f'], p_data['l'])
142 seg.parts.append(part)
143 segments[key] = seg
144 return segments
145
146
147 def json_save_segments(segments):
148 data = {}
149 for key, seg in segments.iteritems():
150 seg_data = []
151 for part in seg.parts:
152 p_data = {'c': part.content, 'f': part.fmt, 'l': part.line}
153 seg_data.append(p_data)
154 data[key] = seg_data
155 return data
156
157
158 def load_page(app, path):
159 try:
160 return _do_load_page(app, path)
161 except Exception as e:
162 logger.exception("Error loading page: %s" %
163 os.path.relpath(path, app.root_dir))
164 _, __, traceback = sys.exc_info()
165 raise PageLoadingError(path, e), None, traceback
166
167
168 def _do_load_page(app, path):
169 exec_info = app.env.exec_info_stack.current_page_info
170 if exec_info is None:
171 raise Exception("Loading page '%s' but not execution context has "
172 "been created for it." % path)
173
174 # Check the cache first.
175 cache = app.cache.getCache('pages')
176 cache_path = "%s.json" % hashlib.md5(path).hexdigest()
177 page_time = os.path.getmtime(path)
178 if cache.isValid(cache_path, page_time):
179 exec_info.was_cache_valid = True
180 cache_data = json.loads(cache.read(cache_path))
181 config = PageConfiguration(values=cache_data['config'],
182 validate=False)
183 content = json_load_segments(cache_data['content'])
184 return config, content
185
186 # Nope, load the page from the source file.
187 exec_info.was_cache_valid = False
188 logger.debug("Loading page configuration from: %s" % path)
189 with codecs.open(path, 'r', 'utf-8') as fp:
190 raw = fp.read()
191 header, offset = parse_config_header(raw)
192
193 if not 'format' in header:
194 auto_formats = app.config.get('site/auto_formats')
195 name, ext = os.path.splitext(path)
196 header['format'] = auto_formats.get(ext, None)
197
198 config = PageConfiguration(header)
199 content = parse_segments(raw, offset)
200 config.set('segments', list(content.iterkeys()))
201
202 # Save to the cache.
203 cache_data = {
204 'config': config.get(),
205 'content': json_save_segments(content)}
206 cache.write(cache_path, json.dumps(cache_data))
207
208 return config, content
209
210
211 segment_pattern = re.compile(
212 r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""",
213 re.M)
214 part_pattern = re.compile(
215 r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""",
216 re.M)
217
218
219 def parse_segments(raw, offset=0):
220 matches = list(segment_pattern.finditer(raw, offset))
221 num_matches = len(matches)
222 if num_matches > 0:
223 contents = {}
224
225 first_offset = matches[0].start()
226 if first_offset > 0:
227 # There's some default content segment at the beginning.
228 seg = ContentSegment()
229 seg.parts = parse_segment_parts(raw, offset, first_offset)
230 contents['content'] = seg
231
232 for i in range(1, num_matches):
233 m1 = matches[i - 1]
234 m2 = matches[i]
235 seg = ContentSegment()
236 seg.parts = parse_segment_parts(raw, m1.end() + 1,
237 m2.start(), m1.group('fmt'))
238 contents[m1.group('name')] = seg
239
240 # Handle text past the last match.
241 lastm = matches[-1]
242 seg = ContentSegment()
243 seg.parts = parse_segment_parts(raw, lastm.end() + 1,
244 len(raw), lastm.group('fmt'))
245 contents[lastm.group('name')] = seg
246
247 return contents
248 else:
249 # No segments, just content.
250 seg = ContentSegment()
251 seg.parts = parse_segment_parts(raw, offset, len(raw))
252 return {'content': seg}
253
254
255 def parse_segment_parts(raw, start, end, first_part_fmt=None):
256 matches = list(part_pattern.finditer(raw, start, end))
257 num_matches = len(matches)
258 if num_matches > 0:
259 parts = []
260
261 # First part, before the first format change.
262 parts.append(
263 ContentSegmentPart(raw[start:matches[0].start()],
264 first_part_fmt,
265 start))
266
267 for i in range(1, num_matches):
268 m1 = matches[i - 1]
269 m2 = matches[i]
270 parts.append(
271 ContentSegmentPart(
272 raw[m1.end() + 1:m2.start()],
273 m1.group('fmt'),
274 m1.end() + 1))
275
276 lastm = matches[-1]
277 parts.append(ContentSegmentPart(raw[lastm.end() + 1:end],
278 lastm.group('fmt'),
279 lastm.end() + 1))
280
281 return parts
282 else:
283 return [ContentSegmentPart(raw[start:end], first_part_fmt)]
284