Mercurial > piecrust2
comparison piecrust/pipelines/_pagebaker.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | |
children | 08e02c2a2a1a |
comparison
equal
deleted
inserted
replaced
851:2c7e57d80bba | 852:4850f8c21b6e |
---|---|
1 import os.path | |
2 import queue | |
3 import logging | |
4 import threading | |
5 import urllib.parse | |
6 from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry | |
7 from piecrust.rendering import RenderingContext, render_page, PASS_FORMATTING | |
8 from piecrust.uriutil import split_uri | |
9 | |
10 | |
11 logger = logging.getLogger(__name__) | |
12 | |
13 | |
14 class BakingError(Exception): | |
15 pass | |
16 | |
17 | |
18 class PageBaker(object): | |
19 def __init__(self, app, out_dir, force=False, copy_assets=True): | |
20 self.app = app | |
21 self.out_dir = out_dir | |
22 self.force = force | |
23 self.copy_assets = copy_assets | |
24 self.site_root = app.config.get('site/root') | |
25 self.pretty_urls = app.config.get('site/pretty_urls') | |
26 self._writer_queue = None | |
27 self._writer = None | |
28 | |
29 def startWriterQueue(self): | |
30 self._writer_queue = queue.Queue() | |
31 self._writer = threading.Thread( | |
32 name='PageSerializer', | |
33 target=_text_writer, | |
34 args=(self._writer_queue,)) | |
35 self._writer.start() | |
36 | |
37 def stopWriterQueue(self): | |
38 self._writer_queue.put_nowait(None) | |
39 self._writer.join() | |
40 | |
41 def getOutputPath(self, uri, pretty_urls): | |
42 uri_root, uri_path = split_uri(self.app, uri) | |
43 | |
44 bake_path = [self.out_dir] | |
45 decoded_uri = urllib.parse.unquote(uri_path) | |
46 if pretty_urls: | |
47 bake_path.append(decoded_uri) | |
48 bake_path.append('index.html') | |
49 elif decoded_uri == '': | |
50 bake_path.append('index.html') | |
51 else: | |
52 bake_path.append(decoded_uri) | |
53 | |
54 return os.path.normpath(os.path.join(*bake_path)) | |
55 | |
56 def bake(self, qualified_page, prev_entry, dirty_source_names): | |
57 # Start baking the sub-pages. | |
58 cur_sub = 1 | |
59 has_more_subs = True | |
60 sub_entries = [] | |
61 pretty_urls = qualified_page.config.get( | |
62 'pretty_urls', self.pretty_urls) | |
63 | |
64 while has_more_subs: | |
65 sub_page = qualified_page.getSubPage(cur_sub) | |
66 sub_uri = sub_page.uri | |
67 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) | |
68 | |
69 out_path = self.getOutputPath(sub_uri, pretty_urls) | |
70 | |
71 # Create the sub-entry for the bake record. | |
72 sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) | |
73 sub_entries.append(sub_entry) | |
74 | |
75 # Find a corresponding sub-entry in the previous bake record. | |
76 prev_sub_entry = None | |
77 if prev_entry is not None: | |
78 try: | |
79 prev_sub_entry = prev_entry.getSub(cur_sub) | |
80 except IndexError: | |
81 pass | |
82 | |
83 # Figure out if we need to invalidate or force anything. | |
84 force_this_sub, invalidate_formatting = _compute_force_flags( | |
85 prev_sub_entry, sub_entry, dirty_source_names) | |
86 force_this_sub = force_this_sub or self.force | |
87 | |
88 # Check for up-to-date outputs. | |
89 do_bake = True | |
90 if not force_this_sub: | |
91 try: | |
92 in_path_time = qualified_page.path_mtime | |
93 out_path_time = os.path.getmtime(out_path) | |
94 if out_path_time >= in_path_time: | |
95 do_bake = False | |
96 except OSError: | |
97 # File doesn't exist, we'll need to bake. | |
98 pass | |
99 | |
100 # If this page didn't bake because it's already up-to-date. | |
101 # Keep trying for as many subs as we know this page has. | |
102 if not do_bake: | |
103 sub_entry.render_info = prev_sub_entry.copyRenderInfo() | |
104 sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE | |
105 | |
106 if prev_entry.num_subs >= cur_sub + 1: | |
107 cur_sub += 1 | |
108 has_more_subs = True | |
109 logger.debug(" %s is up to date, skipping to next " | |
110 "sub-page." % out_path) | |
111 continue | |
112 | |
113 logger.debug(" %s is up to date, skipping bake." % out_path) | |
114 break | |
115 | |
116 # All good, proceed. | |
117 try: | |
118 if invalidate_formatting: | |
119 cache_key = sub_uri | |
120 self.app.env.rendered_segments_repository.invalidate( | |
121 cache_key) | |
122 sub_entry.flags |= \ | |
123 SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED | |
124 | |
125 logger.debug(" p%d -> %s" % (cur_sub, out_path)) | |
126 rp = self._bakeSingle(qualified_page, cur_sub, out_path) | |
127 except Exception as ex: | |
128 logger.exception(ex) | |
129 page_rel_path = os.path.relpath(qualified_page.path, | |
130 self.app.root_dir) | |
131 raise BakingError("%s: error baking '%s'." % | |
132 (page_rel_path, sub_uri)) from ex | |
133 | |
134 # Record what we did. | |
135 sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED | |
136 sub_entry.render_info = rp.copyRenderInfo() | |
137 | |
138 # Copy page assets. | |
139 if (cur_sub == 1 and self.copy_assets and | |
140 sub_entry.anyPass(lambda p: p.used_assets)): | |
141 if pretty_urls: | |
142 out_assets_dir = os.path.dirname(out_path) | |
143 else: | |
144 out_assets_dir, out_name = os.path.split(out_path) | |
145 if sub_uri != self.site_root: | |
146 out_name_noext, _ = os.path.splitext(out_name) | |
147 out_assets_dir = os.path.join(out_assets_dir, | |
148 out_name_noext) | |
149 | |
150 logger.debug("Copying page assets to: %s" % out_assets_dir) | |
151 _ensure_dir_exists(out_assets_dir) | |
152 | |
153 qualified_page.source.buildAssetor(qualified_page, sub_uri).copyAssets(out_assets_dir) | |
154 | |
155 # Figure out if we have more work. | |
156 has_more_subs = False | |
157 if sub_entry.anyPass(lambda p: p.pagination_has_more): | |
158 cur_sub += 1 | |
159 has_more_subs = True | |
160 | |
161 return sub_entries | |
162 | |
163 def _bakeSingle(self, qp, out_path): | |
164 ctx = RenderingContext(qp) | |
165 qp.source.prepareRenderContext(ctx) | |
166 | |
167 with self.app.env.timerScope("PageRender"): | |
168 rp = render_page(ctx) | |
169 | |
170 with self.app.env.timerScope("PageSerialize"): | |
171 if self._writer_queue is not None: | |
172 self._writer_queue.put_nowait((out_path, rp.content)) | |
173 else: | |
174 with open(out_path, 'w', encoding='utf8') as fp: | |
175 fp.write(rp.content) | |
176 | |
177 return rp | |
178 | |
179 | |
180 def _text_writer(q): | |
181 while True: | |
182 item = q.get() | |
183 if item is not None: | |
184 out_path, txt = item | |
185 out_dir = os.path.dirname(out_path) | |
186 _ensure_dir_exists(out_dir) | |
187 | |
188 with open(out_path, 'w', encoding='utf8') as fp: | |
189 fp.write(txt) | |
190 | |
191 q.task_done() | |
192 else: | |
193 # Sentinel object, terminate the thread. | |
194 q.task_done() | |
195 break | |
196 | |
197 | |
198 def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names): | |
199 # Figure out what to do with this page. | |
200 force_this_sub = False | |
201 invalidate_formatting = False | |
202 sub_uri = sub_entry.out_uri | |
203 if (prev_sub_entry and | |
204 (prev_sub_entry.was_baked_successfully or | |
205 prev_sub_entry.was_clean)): | |
206 # If the current page is known to use pages from other sources, | |
207 # see if any of those got baked, or are going to be baked for | |
208 # some reason. If so, we need to bake this one too. | |
209 # (this happens for instance with the main page of a blog). | |
210 dirty_for_this, invalidated_render_passes = ( | |
211 _get_dirty_source_names_and_render_passes( | |
212 prev_sub_entry, dirty_source_names)) | |
213 if len(invalidated_render_passes) > 0: | |
214 logger.debug( | |
215 "'%s' is known to use sources %s, which have " | |
216 "items that got (re)baked. Will force bake this " | |
217 "page. " % (sub_uri, dirty_for_this)) | |
218 sub_entry.flags |= \ | |
219 SubPagePipelineRecordEntry.FLAG_FORCED_BY_SOURCE | |
220 force_this_sub = True | |
221 | |
222 if PASS_FORMATTING in invalidated_render_passes: | |
223 logger.debug( | |
224 "Will invalidate cached formatting for '%s' " | |
225 "since sources were using during that pass." | |
226 % sub_uri) | |
227 invalidate_formatting = True | |
228 elif (prev_sub_entry and | |
229 prev_sub_entry.errors): | |
230 # Previous bake failed. We'll have to bake it again. | |
231 logger.debug( | |
232 "Previous record entry indicates baking failed for " | |
233 "'%s'. Will bake it again." % sub_uri) | |
234 sub_entry.flags |= \ | |
235 SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS | |
236 force_this_sub = True | |
237 elif not prev_sub_entry: | |
238 # No previous record. We'll have to bake it. | |
239 logger.debug("No previous record entry found for '%s'. Will " | |
240 "force bake it." % sub_uri) | |
241 sub_entry.flags |= \ | |
242 SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS | |
243 force_this_sub = True | |
244 | |
245 return force_this_sub, invalidate_formatting | |
246 | |
247 | |
248 def _get_dirty_source_names_and_render_passes(sub_entry, dirty_source_names): | |
249 dirty_for_this = set() | |
250 invalidated_render_passes = set() | |
251 for p, pinfo in enumerate(sub_entry.render_info): | |
252 if pinfo: | |
253 for src_name in pinfo.used_source_names: | |
254 is_dirty = (src_name in dirty_source_names) | |
255 if is_dirty: | |
256 invalidated_render_passes.add(p) | |
257 dirty_for_this.add(src_name) | |
258 break | |
259 return dirty_for_this, invalidated_render_passes | |
260 | |
261 | |
262 def _ensure_dir_exists(path): | |
263 try: | |
264 os.makedirs(path, mode=0o755, exist_ok=True) | |
265 except OSError: | |
266 # In a multiprocess environment, several process may very | |
267 # occasionally try to create the same directory at the same time. | |
268 # Let's ignore any error and if something's really wrong (like file | |
269 # acces permissions or whatever), then it will more legitimately fail | |
270 # just after this when we try to write files. | |
271 pass | |
272 |