comparison piecrust/pipelines/_pagebaker.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents
children 08e02c2a2a1a
comparison
equal deleted inserted replaced
851:2c7e57d80bba 852:4850f8c21b6e
1 import os.path
2 import queue
3 import logging
4 import threading
5 import urllib.parse
6 from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry
7 from piecrust.rendering import RenderingContext, render_page, PASS_FORMATTING
8 from piecrust.uriutil import split_uri
9
10
11 logger = logging.getLogger(__name__)
12
13
14 class BakingError(Exception):
15 pass
16
17
18 class PageBaker(object):
19 def __init__(self, app, out_dir, force=False, copy_assets=True):
20 self.app = app
21 self.out_dir = out_dir
22 self.force = force
23 self.copy_assets = copy_assets
24 self.site_root = app.config.get('site/root')
25 self.pretty_urls = app.config.get('site/pretty_urls')
26 self._writer_queue = None
27 self._writer = None
28
29 def startWriterQueue(self):
30 self._writer_queue = queue.Queue()
31 self._writer = threading.Thread(
32 name='PageSerializer',
33 target=_text_writer,
34 args=(self._writer_queue,))
35 self._writer.start()
36
37 def stopWriterQueue(self):
38 self._writer_queue.put_nowait(None)
39 self._writer.join()
40
41 def getOutputPath(self, uri, pretty_urls):
42 uri_root, uri_path = split_uri(self.app, uri)
43
44 bake_path = [self.out_dir]
45 decoded_uri = urllib.parse.unquote(uri_path)
46 if pretty_urls:
47 bake_path.append(decoded_uri)
48 bake_path.append('index.html')
49 elif decoded_uri == '':
50 bake_path.append('index.html')
51 else:
52 bake_path.append(decoded_uri)
53
54 return os.path.normpath(os.path.join(*bake_path))
55
56 def bake(self, qualified_page, prev_entry, dirty_source_names):
57 # Start baking the sub-pages.
58 cur_sub = 1
59 has_more_subs = True
60 sub_entries = []
61 pretty_urls = qualified_page.config.get(
62 'pretty_urls', self.pretty_urls)
63
64 while has_more_subs:
65 sub_page = qualified_page.getSubPage(cur_sub)
66 sub_uri = sub_page.uri
67 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub))
68
69 out_path = self.getOutputPath(sub_uri, pretty_urls)
70
71 # Create the sub-entry for the bake record.
72 sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path)
73 sub_entries.append(sub_entry)
74
75 # Find a corresponding sub-entry in the previous bake record.
76 prev_sub_entry = None
77 if prev_entry is not None:
78 try:
79 prev_sub_entry = prev_entry.getSub(cur_sub)
80 except IndexError:
81 pass
82
83 # Figure out if we need to invalidate or force anything.
84 force_this_sub, invalidate_formatting = _compute_force_flags(
85 prev_sub_entry, sub_entry, dirty_source_names)
86 force_this_sub = force_this_sub or self.force
87
88 # Check for up-to-date outputs.
89 do_bake = True
90 if not force_this_sub:
91 try:
92 in_path_time = qualified_page.path_mtime
93 out_path_time = os.path.getmtime(out_path)
94 if out_path_time >= in_path_time:
95 do_bake = False
96 except OSError:
97 # File doesn't exist, we'll need to bake.
98 pass
99
100 # If this page didn't bake because it's already up-to-date.
101 # Keep trying for as many subs as we know this page has.
102 if not do_bake:
103 sub_entry.render_info = prev_sub_entry.copyRenderInfo()
104 sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE
105
106 if prev_entry.num_subs >= cur_sub + 1:
107 cur_sub += 1
108 has_more_subs = True
109 logger.debug(" %s is up to date, skipping to next "
110 "sub-page." % out_path)
111 continue
112
113 logger.debug(" %s is up to date, skipping bake." % out_path)
114 break
115
116 # All good, proceed.
117 try:
118 if invalidate_formatting:
119 cache_key = sub_uri
120 self.app.env.rendered_segments_repository.invalidate(
121 cache_key)
122 sub_entry.flags |= \
123 SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED
124
125 logger.debug(" p%d -> %s" % (cur_sub, out_path))
126 rp = self._bakeSingle(qualified_page, cur_sub, out_path)
127 except Exception as ex:
128 logger.exception(ex)
129 page_rel_path = os.path.relpath(qualified_page.path,
130 self.app.root_dir)
131 raise BakingError("%s: error baking '%s'." %
132 (page_rel_path, sub_uri)) from ex
133
134 # Record what we did.
135 sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED
136 sub_entry.render_info = rp.copyRenderInfo()
137
138 # Copy page assets.
139 if (cur_sub == 1 and self.copy_assets and
140 sub_entry.anyPass(lambda p: p.used_assets)):
141 if pretty_urls:
142 out_assets_dir = os.path.dirname(out_path)
143 else:
144 out_assets_dir, out_name = os.path.split(out_path)
145 if sub_uri != self.site_root:
146 out_name_noext, _ = os.path.splitext(out_name)
147 out_assets_dir = os.path.join(out_assets_dir,
148 out_name_noext)
149
150 logger.debug("Copying page assets to: %s" % out_assets_dir)
151 _ensure_dir_exists(out_assets_dir)
152
153 qualified_page.source.buildAssetor(qualified_page, sub_uri).copyAssets(out_assets_dir)
154
155 # Figure out if we have more work.
156 has_more_subs = False
157 if sub_entry.anyPass(lambda p: p.pagination_has_more):
158 cur_sub += 1
159 has_more_subs = True
160
161 return sub_entries
162
163 def _bakeSingle(self, qp, out_path):
164 ctx = RenderingContext(qp)
165 qp.source.prepareRenderContext(ctx)
166
167 with self.app.env.timerScope("PageRender"):
168 rp = render_page(ctx)
169
170 with self.app.env.timerScope("PageSerialize"):
171 if self._writer_queue is not None:
172 self._writer_queue.put_nowait((out_path, rp.content))
173 else:
174 with open(out_path, 'w', encoding='utf8') as fp:
175 fp.write(rp.content)
176
177 return rp
178
179
180 def _text_writer(q):
181 while True:
182 item = q.get()
183 if item is not None:
184 out_path, txt = item
185 out_dir = os.path.dirname(out_path)
186 _ensure_dir_exists(out_dir)
187
188 with open(out_path, 'w', encoding='utf8') as fp:
189 fp.write(txt)
190
191 q.task_done()
192 else:
193 # Sentinel object, terminate the thread.
194 q.task_done()
195 break
196
197
198 def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names):
199 # Figure out what to do with this page.
200 force_this_sub = False
201 invalidate_formatting = False
202 sub_uri = sub_entry.out_uri
203 if (prev_sub_entry and
204 (prev_sub_entry.was_baked_successfully or
205 prev_sub_entry.was_clean)):
206 # If the current page is known to use pages from other sources,
207 # see if any of those got baked, or are going to be baked for
208 # some reason. If so, we need to bake this one too.
209 # (this happens for instance with the main page of a blog).
210 dirty_for_this, invalidated_render_passes = (
211 _get_dirty_source_names_and_render_passes(
212 prev_sub_entry, dirty_source_names))
213 if len(invalidated_render_passes) > 0:
214 logger.debug(
215 "'%s' is known to use sources %s, which have "
216 "items that got (re)baked. Will force bake this "
217 "page. " % (sub_uri, dirty_for_this))
218 sub_entry.flags |= \
219 SubPagePipelineRecordEntry.FLAG_FORCED_BY_SOURCE
220 force_this_sub = True
221
222 if PASS_FORMATTING in invalidated_render_passes:
223 logger.debug(
224 "Will invalidate cached formatting for '%s' "
225 "since sources were using during that pass."
226 % sub_uri)
227 invalidate_formatting = True
228 elif (prev_sub_entry and
229 prev_sub_entry.errors):
230 # Previous bake failed. We'll have to bake it again.
231 logger.debug(
232 "Previous record entry indicates baking failed for "
233 "'%s'. Will bake it again." % sub_uri)
234 sub_entry.flags |= \
235 SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS
236 force_this_sub = True
237 elif not prev_sub_entry:
238 # No previous record. We'll have to bake it.
239 logger.debug("No previous record entry found for '%s'. Will "
240 "force bake it." % sub_uri)
241 sub_entry.flags |= \
242 SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS
243 force_this_sub = True
244
245 return force_this_sub, invalidate_formatting
246
247
248 def _get_dirty_source_names_and_render_passes(sub_entry, dirty_source_names):
249 dirty_for_this = set()
250 invalidated_render_passes = set()
251 for p, pinfo in enumerate(sub_entry.render_info):
252 if pinfo:
253 for src_name in pinfo.used_source_names:
254 is_dirty = (src_name in dirty_source_names)
255 if is_dirty:
256 invalidated_render_passes.add(p)
257 dirty_for_this.add(src_name)
258 break
259 return dirty_for_this, invalidated_render_passes
260
261
262 def _ensure_dir_exists(path):
263 try:
264 os.makedirs(path, mode=0o755, exist_ok=True)
265 except OSError:
266 # In a multiprocess environment, several process may very
267 # occasionally try to create the same directory at the same time.
268 # Let's ignore any error and if something's really wrong (like file
269 # acces permissions or whatever), then it will more legitimately fail
270 # just after this when we try to write files.
271 pass
272