comparison piecrust/pipelines/_pagebaker.py @ 871:504ddb370df8

refactor: Fixing some issues with baking assets.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 13 Jun 2017 22:30:27 -0700
parents 08e02c2a2a1a
children d6d35b2efd04
comparison
equal deleted inserted replaced
870:48d25fd68b8d 871:504ddb370df8
1 import os.path 1 import os.path
2 import queue 2 import queue
3 import shutil
3 import logging 4 import logging
4 import threading 5 import threading
5 import urllib.parse 6 import urllib.parse
6 from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry 7 from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry
7 from piecrust.rendering import RenderingContext, render_page, PASS_FORMATTING 8 from piecrust.rendering import RenderingContext, render_page
8 from piecrust.uriutil import split_uri 9 from piecrust.uriutil import split_uri
9 10
10 11
11 logger = logging.getLogger(__name__) 12 logger = logging.getLogger(__name__)
12 13
14 class BakingError(Exception): 15 class BakingError(Exception):
15 pass 16 pass
16 17
17 18
18 class PageBaker(object): 19 class PageBaker(object):
19 def __init__(self, app, out_dir, force=False, copy_assets=True): 20 def __init__(self, app, out_dir, force=False):
20 self.app = app 21 self.app = app
21 self.out_dir = out_dir 22 self.out_dir = out_dir
22 self.force = force 23 self.force = force
23 self.copy_assets = copy_assets
24 self.site_root = app.config.get('site/root') 24 self.site_root = app.config.get('site/root')
25 self.pretty_urls = app.config.get('site/pretty_urls') 25 self.pretty_urls = app.config.get('site/pretty_urls')
26 self._do_write = self._writeDirect
26 self._writer_queue = None 27 self._writer_queue = None
27 self._writer = None 28 self._writer = None
28 self._stats = app.env.stats 29 self._stats = app.env.stats
30 self._rsr = app.env.rendered_segments_repository
29 31
30 def startWriterQueue(self): 32 def startWriterQueue(self):
31 self._writer_queue = queue.Queue() 33 self._writer_queue = queue.Queue()
32 self._writer = threading.Thread( 34 self._writer = threading.Thread(
33 name='PageSerializer', 35 name='PageSerializer',
34 target=_text_writer, 36 target=_text_writer,
35 args=(self._writer_queue,)) 37 args=(self._writer_queue,))
36 self._writer.start() 38 self._writer.start()
39 self._do_write = self._sendToWriterQueue
37 40
38 def stopWriterQueue(self): 41 def stopWriterQueue(self):
39 self._writer_queue.put_nowait(None) 42 self._writer_queue.put_nowait(None)
40 self._writer.join() 43 self._writer.join()
44
45 def _sendToWriterQueue(self, out_path, content):
46 self._writer_queue.put_nowait((out_path, content))
47
48 def _writeDirect(self, out_path, content):
49 with open(out_path, 'w', encoding='utf8') as fp:
50 fp.write(content)
41 51
42 def getOutputPath(self, uri, pretty_urls): 52 def getOutputPath(self, uri, pretty_urls):
43 uri_root, uri_path = split_uri(self.app, uri) 53 uri_root, uri_path = split_uri(self.app, uri)
44 54
45 bake_path = [self.out_dir] 55 bake_path = [self.out_dir]
52 else: 62 else:
53 bake_path.append(decoded_uri) 63 bake_path.append(decoded_uri)
54 64
55 return os.path.normpath(os.path.join(*bake_path)) 65 return os.path.normpath(os.path.join(*bake_path))
56 66
57 def bake(self, page, prev_entry, cur_entry, dirty_source_names): 67 def bake(self, page, prev_entry, cur_entry):
58 # Start baking the sub-pages.
59 cur_sub = 1 68 cur_sub = 1
60 has_more_subs = True 69 has_more_subs = True
61 pretty_urls = page.config.get('pretty_urls', self.pretty_urls) 70 pretty_urls = page.config.get('pretty_urls', self.pretty_urls)
62 71
72 # Start baking the sub-pages.
63 while has_more_subs: 73 while has_more_subs:
64 sub_uri = page.getUri(sub_num=cur_sub) 74 sub_uri = page.getUri(sub_num=cur_sub)
65 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) 75 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub))
66 76
67 out_path = self.getOutputPath(sub_uri, pretty_urls) 77 out_path = self.getOutputPath(sub_uri, pretty_urls)
68 78
69 # Create the sub-entry for the bake record. 79 # Create the sub-entry for the bake record.
70 sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) 80 cur_sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path)
71 cur_entry.subs.append(sub_entry) 81 cur_entry.subs.append(cur_sub_entry)
72 82
73 # Find a corresponding sub-entry in the previous bake record. 83 # Find a corresponding sub-entry in the previous bake record.
74 prev_sub_entry = None 84 prev_sub_entry = None
75 if prev_entry is not None: 85 if prev_entry is not None:
76 try: 86 try:
77 prev_sub_entry = prev_entry.getSub(cur_sub) 87 prev_sub_entry = prev_entry.getSub(cur_sub)
78 except IndexError: 88 except IndexError:
79 pass 89 pass
80 90
81 # Figure out if we need to invalidate or force anything. 91 # Figure out if we need to bake this page.
82 force_this_sub, invalidate_formatting = _compute_force_flags( 92 bake_status = _get_bake_status(page, out_path, self.force,
83 prev_sub_entry, sub_entry, dirty_source_names) 93 prev_sub_entry, cur_sub_entry)
84 force_this_sub = force_this_sub or self.force
85
86 # Check for up-to-date outputs.
87 do_bake = True
88 if not force_this_sub:
89 try:
90 in_path_time = page.content_mtime
91 out_path_time = os.path.getmtime(out_path)
92 if out_path_time >= in_path_time:
93 do_bake = False
94 except OSError:
95 # File doesn't exist, we'll need to bake.
96 pass
97 94
98 # If this page didn't bake because it's already up-to-date. 95 # If this page didn't bake because it's already up-to-date.
99 # Keep trying for as many subs as we know this page has. 96 # Keep trying for as many subs as we know this page has.
100 if not do_bake: 97 if bake_status == STATUS_CLEAN:
101 sub_entry.render_info = prev_sub_entry.copyRenderInfo() 98 cur_sub_entry.render_info = prev_sub_entry.copyRenderInfo()
102 sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE 99 cur_sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE
103 100
104 if prev_entry.num_subs >= cur_sub + 1: 101 if prev_entry.num_subs >= cur_sub + 1:
105 cur_sub += 1 102 cur_sub += 1
106 has_more_subs = True 103 has_more_subs = True
107 logger.debug(" %s is up to date, skipping to next " 104 logger.debug(" %s is up to date, skipping to next "
111 logger.debug(" %s is up to date, skipping bake." % out_path) 108 logger.debug(" %s is up to date, skipping bake." % out_path)
112 break 109 break
113 110
114 # All good, proceed. 111 # All good, proceed.
115 try: 112 try:
116 if invalidate_formatting: 113 if bake_status == STATUS_INVALIDATE_AND_BAKE:
117 cache_key = sub_uri 114 cache_key = sub_uri
118 self.app.env.rendered_segments_repository.invalidate( 115 self._rsr.invalidate(cache_key)
119 cache_key) 116 cur_sub_entry.flags |= \
120 sub_entry.flags |= \
121 SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED 117 SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED
122 118
123 logger.debug(" p%d -> %s" % (cur_sub, out_path)) 119 logger.debug(" p%d -> %s" % (cur_sub, out_path))
124 rp = self._bakeSingle(page, cur_sub, out_path) 120 rp = self._bakeSingle(page, cur_sub, out_path)
125 except Exception as ex: 121 except Exception as ex:
126 logger.exception(ex) 122 logger.exception(ex)
127 raise BakingError("%s: error baking '%s'." % 123 raise BakingError("%s: error baking '%s'." %
128 (page.content_spec, sub_uri)) from ex 124 (page.content_spec, sub_uri)) from ex
129 125
130 # Record what we did. 126 # Record what we did.
131 sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED 127 cur_sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED
132 sub_entry.render_info = rp.copyRenderInfo() 128 cur_sub_entry.render_info = rp.copyRenderInfo()
133 129
134 # Copy page assets. 130 # Copy page assets.
135 if (cur_sub == 1 and self.copy_assets and 131 if (cur_sub == 1 and
136 sub_entry.anyPass(lambda p: p.used_assets)): 132 cur_sub_entry.anyPass(lambda p: p.used_assets)):
137 if pretty_urls: 133 if pretty_urls:
138 out_assets_dir = os.path.dirname(out_path) 134 out_assets_dir = os.path.dirname(out_path)
139 else: 135 else:
140 out_assets_dir, out_name = os.path.split(out_path) 136 out_assets_dir, out_name = os.path.split(out_path)
141 if sub_uri != self.site_root: 137 if sub_uri != self.site_root:
143 out_assets_dir = os.path.join(out_assets_dir, 139 out_assets_dir = os.path.join(out_assets_dir,
144 out_name_noext) 140 out_name_noext)
145 141
146 logger.debug("Copying page assets to: %s" % out_assets_dir) 142 logger.debug("Copying page assets to: %s" % out_assets_dir)
147 _ensure_dir_exists(out_assets_dir) 143 _ensure_dir_exists(out_assets_dir)
148 # TODO: copy assets to out dir 144 assetor = rp.data.get('assets')
145 if assetor is not None:
146 for i in assetor._getAssetItems():
147 fn = os.path.basename(i.spec)
148 out_asset_path = os.path.join(out_assets_dir, fn)
149 logger.debug(" %s -> %s" % (i.spec, out_asset_path))
150 shutil.copy(i.spec, out_asset_path)
149 151
150 # Figure out if we have more work. 152 # Figure out if we have more work.
151 has_more_subs = False 153 has_more_subs = False
152 if sub_entry.anyPass(lambda p: p.pagination_has_more): 154 if cur_sub_entry.anyPass(lambda p: p.pagination_has_more):
153 cur_sub += 1 155 cur_sub += 1
154 has_more_subs = True 156 has_more_subs = True
155 157
156 def _bakeSingle(self, page, sub_num, out_path): 158 def _bakeSingle(self, page, sub_num, out_path):
157 ctx = RenderingContext(page, sub_num=sub_num) 159 ctx = RenderingContext(page, sub_num=sub_num)
159 161
160 with self._stats.timerScope("PageRender"): 162 with self._stats.timerScope("PageRender"):
161 rp = render_page(ctx) 163 rp = render_page(ctx)
162 164
163 with self._stats.timerScope("PageSerialize"): 165 with self._stats.timerScope("PageSerialize"):
164 if self._writer_queue is not None: 166 self._do_write(out_path, rp.content)
165 self._writer_queue.put_nowait((out_path, rp.content))
166 else:
167 with open(out_path, 'w', encoding='utf8') as fp:
168 fp.write(rp.content)
169 167
170 return rp 168 return rp
171 169
172 170
173 def _text_writer(q): 171 def _text_writer(q):
186 # Sentinel object, terminate the thread. 184 # Sentinel object, terminate the thread.
187 q.task_done() 185 q.task_done()
188 break 186 break
189 187
190 188
191 def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names): 189 STATUS_CLEAN = 0
192 # Figure out what to do with this page. 190 STATUS_BAKE = 1
193 force_this_sub = False 191 STATUS_INVALIDATE_AND_BAKE = 2
194 invalidate_formatting = False 192
195 sub_uri = sub_entry.out_uri 193
196 if (prev_sub_entry and 194 def _get_bake_status(page, out_path, force, prev_sub_entry, cur_sub_entry):
197 (prev_sub_entry.was_baked_successfully or 195 # Figure out if we need to invalidate or force anything.
198 prev_sub_entry.was_clean)): 196 status = _compute_force_flags(prev_sub_entry, cur_sub_entry)
199 # If the current page is known to use pages from other sources, 197 if status != STATUS_CLEAN:
200 # see if any of those got baked, or are going to be baked for 198 return status
201 # some reason. If so, we need to bake this one too. 199
202 # (this happens for instance with the main page of a blog). 200 # Easy test.
203 dirty_for_this, invalidated_render_passes = ( 201 if force:
204 _get_dirty_source_names_and_render_passes( 202 return STATUS_BAKE
205 prev_sub_entry, dirty_source_names)) 203
206 if len(invalidated_render_passes) > 0: 204 # Check for up-to-date outputs.
207 logger.debug( 205 in_path_time = page.content_mtime
208 "'%s' is known to use sources %s, which have " 206 try:
209 "items that got (re)baked. Will force bake this " 207 out_path_time = os.path.getmtime(out_path)
210 "page. " % (sub_uri, dirty_for_this)) 208 except OSError:
211 sub_entry.flags |= \ 209 # File doesn't exist, we'll need to bake.
212 SubPagePipelineRecordEntry.FLAG_FORCED_BY_SOURCE 210 return STATUS_BAKE
213 force_this_sub = True 211
214 212 if out_path_time <= in_path_time:
215 if PASS_FORMATTING in invalidated_render_passes: 213 return STATUS_BAKE
216 logger.debug( 214
217 "Will invalidate cached formatting for '%s' " 215 # Nope, all good.
218 "since sources were using during that pass." 216 return STATUS_CLEAN
219 % sub_uri) 217
220 invalidate_formatting = True 218
221 elif (prev_sub_entry and 219 def _compute_force_flags(prev_sub_entry, cur_sub_entry):
222 prev_sub_entry.errors): 220 if prev_sub_entry and prev_sub_entry.errors:
223 # Previous bake failed. We'll have to bake it again. 221 # Previous bake failed. We'll have to bake it again.
224 logger.debug( 222 cur_sub_entry.flags |= \
225 "Previous record entry indicates baking failed for "
226 "'%s'. Will bake it again." % sub_uri)
227 sub_entry.flags |= \
228 SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS 223 SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS
229 force_this_sub = True 224 return STATUS_BAKE
230 elif not prev_sub_entry: 225
231 # No previous record. We'll have to bake it. 226 if not prev_sub_entry:
232 logger.debug("No previous record entry found for '%s'. Will " 227 cur_sub_entry.flags |= \
233 "force bake it." % sub_uri)
234 sub_entry.flags |= \
235 SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS 228 SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS
236 force_this_sub = True 229 return STATUS_BAKE
237 230
238 return force_this_sub, invalidate_formatting 231 return STATUS_CLEAN
239
240
241 def _get_dirty_source_names_and_render_passes(sub_entry, dirty_source_names):
242 dirty_for_this = set()
243 invalidated_render_passes = set()
244 for p, pinfo in enumerate(sub_entry.render_info):
245 if pinfo:
246 for src_name in pinfo.used_source_names:
247 is_dirty = (src_name in dirty_source_names)
248 if is_dirty:
249 invalidated_render_passes.add(p)
250 dirty_for_this.add(src_name)
251 break
252 return dirty_for_this, invalidated_render_passes
253 232
254 233
255 def _ensure_dir_exists(path): 234 def _ensure_dir_exists(path):
256 try: 235 try:
257 os.makedirs(path, mode=0o755, exist_ok=True) 236 os.makedirs(path, mode=0o755, exist_ok=True)