Mercurial > piecrust2
comparison piecrust/pipelines/_pagebaker.py @ 871:504ddb370df8
refactor: Fixing some issues with baking assets.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 13 Jun 2017 22:30:27 -0700 |
parents | 08e02c2a2a1a |
children | d6d35b2efd04 |
comparison
equal
deleted
inserted
replaced
870:48d25fd68b8d | 871:504ddb370df8 |
---|---|
1 import os.path | 1 import os.path |
2 import queue | 2 import queue |
3 import shutil | |
3 import logging | 4 import logging |
4 import threading | 5 import threading |
5 import urllib.parse | 6 import urllib.parse |
6 from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry | 7 from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry |
7 from piecrust.rendering import RenderingContext, render_page, PASS_FORMATTING | 8 from piecrust.rendering import RenderingContext, render_page |
8 from piecrust.uriutil import split_uri | 9 from piecrust.uriutil import split_uri |
9 | 10 |
10 | 11 |
11 logger = logging.getLogger(__name__) | 12 logger = logging.getLogger(__name__) |
12 | 13 |
14 class BakingError(Exception): | 15 class BakingError(Exception): |
15 pass | 16 pass |
16 | 17 |
17 | 18 |
18 class PageBaker(object): | 19 class PageBaker(object): |
19 def __init__(self, app, out_dir, force=False, copy_assets=True): | 20 def __init__(self, app, out_dir, force=False): |
20 self.app = app | 21 self.app = app |
21 self.out_dir = out_dir | 22 self.out_dir = out_dir |
22 self.force = force | 23 self.force = force |
23 self.copy_assets = copy_assets | |
24 self.site_root = app.config.get('site/root') | 24 self.site_root = app.config.get('site/root') |
25 self.pretty_urls = app.config.get('site/pretty_urls') | 25 self.pretty_urls = app.config.get('site/pretty_urls') |
26 self._do_write = self._writeDirect | |
26 self._writer_queue = None | 27 self._writer_queue = None |
27 self._writer = None | 28 self._writer = None |
28 self._stats = app.env.stats | 29 self._stats = app.env.stats |
30 self._rsr = app.env.rendered_segments_repository | |
29 | 31 |
30 def startWriterQueue(self): | 32 def startWriterQueue(self): |
31 self._writer_queue = queue.Queue() | 33 self._writer_queue = queue.Queue() |
32 self._writer = threading.Thread( | 34 self._writer = threading.Thread( |
33 name='PageSerializer', | 35 name='PageSerializer', |
34 target=_text_writer, | 36 target=_text_writer, |
35 args=(self._writer_queue,)) | 37 args=(self._writer_queue,)) |
36 self._writer.start() | 38 self._writer.start() |
39 self._do_write = self._sendToWriterQueue | |
37 | 40 |
38 def stopWriterQueue(self): | 41 def stopWriterQueue(self): |
39 self._writer_queue.put_nowait(None) | 42 self._writer_queue.put_nowait(None) |
40 self._writer.join() | 43 self._writer.join() |
44 | |
45 def _sendToWriterQueue(self, out_path, content): | |
46 self._writer_queue.put_nowait((out_path, content)) | |
47 | |
48 def _writeDirect(self, out_path, content): | |
49 with open(out_path, 'w', encoding='utf8') as fp: | |
50 fp.write(content) | |
41 | 51 |
42 def getOutputPath(self, uri, pretty_urls): | 52 def getOutputPath(self, uri, pretty_urls): |
43 uri_root, uri_path = split_uri(self.app, uri) | 53 uri_root, uri_path = split_uri(self.app, uri) |
44 | 54 |
45 bake_path = [self.out_dir] | 55 bake_path = [self.out_dir] |
52 else: | 62 else: |
53 bake_path.append(decoded_uri) | 63 bake_path.append(decoded_uri) |
54 | 64 |
55 return os.path.normpath(os.path.join(*bake_path)) | 65 return os.path.normpath(os.path.join(*bake_path)) |
56 | 66 |
57 def bake(self, page, prev_entry, cur_entry, dirty_source_names): | 67 def bake(self, page, prev_entry, cur_entry): |
58 # Start baking the sub-pages. | |
59 cur_sub = 1 | 68 cur_sub = 1 |
60 has_more_subs = True | 69 has_more_subs = True |
61 pretty_urls = page.config.get('pretty_urls', self.pretty_urls) | 70 pretty_urls = page.config.get('pretty_urls', self.pretty_urls) |
62 | 71 |
72 # Start baking the sub-pages. | |
63 while has_more_subs: | 73 while has_more_subs: |
64 sub_uri = page.getUri(sub_num=cur_sub) | 74 sub_uri = page.getUri(sub_num=cur_sub) |
65 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) | 75 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) |
66 | 76 |
67 out_path = self.getOutputPath(sub_uri, pretty_urls) | 77 out_path = self.getOutputPath(sub_uri, pretty_urls) |
68 | 78 |
69 # Create the sub-entry for the bake record. | 79 # Create the sub-entry for the bake record. |
70 sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) | 80 cur_sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) |
71 cur_entry.subs.append(sub_entry) | 81 cur_entry.subs.append(cur_sub_entry) |
72 | 82 |
73 # Find a corresponding sub-entry in the previous bake record. | 83 # Find a corresponding sub-entry in the previous bake record. |
74 prev_sub_entry = None | 84 prev_sub_entry = None |
75 if prev_entry is not None: | 85 if prev_entry is not None: |
76 try: | 86 try: |
77 prev_sub_entry = prev_entry.getSub(cur_sub) | 87 prev_sub_entry = prev_entry.getSub(cur_sub) |
78 except IndexError: | 88 except IndexError: |
79 pass | 89 pass |
80 | 90 |
81 # Figure out if we need to invalidate or force anything. | 91 # Figure out if we need to bake this page. |
82 force_this_sub, invalidate_formatting = _compute_force_flags( | 92 bake_status = _get_bake_status(page, out_path, self.force, |
83 prev_sub_entry, sub_entry, dirty_source_names) | 93 prev_sub_entry, cur_sub_entry) |
84 force_this_sub = force_this_sub or self.force | |
85 | |
86 # Check for up-to-date outputs. | |
87 do_bake = True | |
88 if not force_this_sub: | |
89 try: | |
90 in_path_time = page.content_mtime | |
91 out_path_time = os.path.getmtime(out_path) | |
92 if out_path_time >= in_path_time: | |
93 do_bake = False | |
94 except OSError: | |
95 # File doesn't exist, we'll need to bake. | |
96 pass | |
97 | 94 |
98 # If this page didn't bake because it's already up-to-date. | 95 # If this page didn't bake because it's already up-to-date. |
99 # Keep trying for as many subs as we know this page has. | 96 # Keep trying for as many subs as we know this page has. |
100 if not do_bake: | 97 if bake_status == STATUS_CLEAN: |
101 sub_entry.render_info = prev_sub_entry.copyRenderInfo() | 98 cur_sub_entry.render_info = prev_sub_entry.copyRenderInfo() |
102 sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE | 99 cur_sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE |
103 | 100 |
104 if prev_entry.num_subs >= cur_sub + 1: | 101 if prev_entry.num_subs >= cur_sub + 1: |
105 cur_sub += 1 | 102 cur_sub += 1 |
106 has_more_subs = True | 103 has_more_subs = True |
107 logger.debug(" %s is up to date, skipping to next " | 104 logger.debug(" %s is up to date, skipping to next " |
111 logger.debug(" %s is up to date, skipping bake." % out_path) | 108 logger.debug(" %s is up to date, skipping bake." % out_path) |
112 break | 109 break |
113 | 110 |
114 # All good, proceed. | 111 # All good, proceed. |
115 try: | 112 try: |
116 if invalidate_formatting: | 113 if bake_status == STATUS_INVALIDATE_AND_BAKE: |
117 cache_key = sub_uri | 114 cache_key = sub_uri |
118 self.app.env.rendered_segments_repository.invalidate( | 115 self._rsr.invalidate(cache_key) |
119 cache_key) | 116 cur_sub_entry.flags |= \ |
120 sub_entry.flags |= \ | |
121 SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED | 117 SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED |
122 | 118 |
123 logger.debug(" p%d -> %s" % (cur_sub, out_path)) | 119 logger.debug(" p%d -> %s" % (cur_sub, out_path)) |
124 rp = self._bakeSingle(page, cur_sub, out_path) | 120 rp = self._bakeSingle(page, cur_sub, out_path) |
125 except Exception as ex: | 121 except Exception as ex: |
126 logger.exception(ex) | 122 logger.exception(ex) |
127 raise BakingError("%s: error baking '%s'." % | 123 raise BakingError("%s: error baking '%s'." % |
128 (page.content_spec, sub_uri)) from ex | 124 (page.content_spec, sub_uri)) from ex |
129 | 125 |
130 # Record what we did. | 126 # Record what we did. |
131 sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED | 127 cur_sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED |
132 sub_entry.render_info = rp.copyRenderInfo() | 128 cur_sub_entry.render_info = rp.copyRenderInfo() |
133 | 129 |
134 # Copy page assets. | 130 # Copy page assets. |
135 if (cur_sub == 1 and self.copy_assets and | 131 if (cur_sub == 1 and |
136 sub_entry.anyPass(lambda p: p.used_assets)): | 132 cur_sub_entry.anyPass(lambda p: p.used_assets)): |
137 if pretty_urls: | 133 if pretty_urls: |
138 out_assets_dir = os.path.dirname(out_path) | 134 out_assets_dir = os.path.dirname(out_path) |
139 else: | 135 else: |
140 out_assets_dir, out_name = os.path.split(out_path) | 136 out_assets_dir, out_name = os.path.split(out_path) |
141 if sub_uri != self.site_root: | 137 if sub_uri != self.site_root: |
143 out_assets_dir = os.path.join(out_assets_dir, | 139 out_assets_dir = os.path.join(out_assets_dir, |
144 out_name_noext) | 140 out_name_noext) |
145 | 141 |
146 logger.debug("Copying page assets to: %s" % out_assets_dir) | 142 logger.debug("Copying page assets to: %s" % out_assets_dir) |
147 _ensure_dir_exists(out_assets_dir) | 143 _ensure_dir_exists(out_assets_dir) |
148 # TODO: copy assets to out dir | 144 assetor = rp.data.get('assets') |
145 if assetor is not None: | |
146 for i in assetor._getAssetItems(): | |
147 fn = os.path.basename(i.spec) | |
148 out_asset_path = os.path.join(out_assets_dir, fn) | |
149 logger.debug(" %s -> %s" % (i.spec, out_asset_path)) | |
150 shutil.copy(i.spec, out_asset_path) | |
149 | 151 |
150 # Figure out if we have more work. | 152 # Figure out if we have more work. |
151 has_more_subs = False | 153 has_more_subs = False |
152 if sub_entry.anyPass(lambda p: p.pagination_has_more): | 154 if cur_sub_entry.anyPass(lambda p: p.pagination_has_more): |
153 cur_sub += 1 | 155 cur_sub += 1 |
154 has_more_subs = True | 156 has_more_subs = True |
155 | 157 |
156 def _bakeSingle(self, page, sub_num, out_path): | 158 def _bakeSingle(self, page, sub_num, out_path): |
157 ctx = RenderingContext(page, sub_num=sub_num) | 159 ctx = RenderingContext(page, sub_num=sub_num) |
159 | 161 |
160 with self._stats.timerScope("PageRender"): | 162 with self._stats.timerScope("PageRender"): |
161 rp = render_page(ctx) | 163 rp = render_page(ctx) |
162 | 164 |
163 with self._stats.timerScope("PageSerialize"): | 165 with self._stats.timerScope("PageSerialize"): |
164 if self._writer_queue is not None: | 166 self._do_write(out_path, rp.content) |
165 self._writer_queue.put_nowait((out_path, rp.content)) | |
166 else: | |
167 with open(out_path, 'w', encoding='utf8') as fp: | |
168 fp.write(rp.content) | |
169 | 167 |
170 return rp | 168 return rp |
171 | 169 |
172 | 170 |
173 def _text_writer(q): | 171 def _text_writer(q): |
186 # Sentinel object, terminate the thread. | 184 # Sentinel object, terminate the thread. |
187 q.task_done() | 185 q.task_done() |
188 break | 186 break |
189 | 187 |
190 | 188 |
191 def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names): | 189 STATUS_CLEAN = 0 |
192 # Figure out what to do with this page. | 190 STATUS_BAKE = 1 |
193 force_this_sub = False | 191 STATUS_INVALIDATE_AND_BAKE = 2 |
194 invalidate_formatting = False | 192 |
195 sub_uri = sub_entry.out_uri | 193 |
196 if (prev_sub_entry and | 194 def _get_bake_status(page, out_path, force, prev_sub_entry, cur_sub_entry): |
197 (prev_sub_entry.was_baked_successfully or | 195 # Figure out if we need to invalidate or force anything. |
198 prev_sub_entry.was_clean)): | 196 status = _compute_force_flags(prev_sub_entry, cur_sub_entry) |
199 # If the current page is known to use pages from other sources, | 197 if status != STATUS_CLEAN: |
200 # see if any of those got baked, or are going to be baked for | 198 return status |
201 # some reason. If so, we need to bake this one too. | 199 |
202 # (this happens for instance with the main page of a blog). | 200 # Easy test. |
203 dirty_for_this, invalidated_render_passes = ( | 201 if force: |
204 _get_dirty_source_names_and_render_passes( | 202 return STATUS_BAKE |
205 prev_sub_entry, dirty_source_names)) | 203 |
206 if len(invalidated_render_passes) > 0: | 204 # Check for up-to-date outputs. |
207 logger.debug( | 205 in_path_time = page.content_mtime |
208 "'%s' is known to use sources %s, which have " | 206 try: |
209 "items that got (re)baked. Will force bake this " | 207 out_path_time = os.path.getmtime(out_path) |
210 "page. " % (sub_uri, dirty_for_this)) | 208 except OSError: |
211 sub_entry.flags |= \ | 209 # File doesn't exist, we'll need to bake. |
212 SubPagePipelineRecordEntry.FLAG_FORCED_BY_SOURCE | 210 return STATUS_BAKE |
213 force_this_sub = True | 211 |
214 | 212 if out_path_time <= in_path_time: |
215 if PASS_FORMATTING in invalidated_render_passes: | 213 return STATUS_BAKE |
216 logger.debug( | 214 |
217 "Will invalidate cached formatting for '%s' " | 215 # Nope, all good. |
218 "since sources were using during that pass." | 216 return STATUS_CLEAN |
219 % sub_uri) | 217 |
220 invalidate_formatting = True | 218 |
221 elif (prev_sub_entry and | 219 def _compute_force_flags(prev_sub_entry, cur_sub_entry): |
222 prev_sub_entry.errors): | 220 if prev_sub_entry and prev_sub_entry.errors: |
223 # Previous bake failed. We'll have to bake it again. | 221 # Previous bake failed. We'll have to bake it again. |
224 logger.debug( | 222 cur_sub_entry.flags |= \ |
225 "Previous record entry indicates baking failed for " | |
226 "'%s'. Will bake it again." % sub_uri) | |
227 sub_entry.flags |= \ | |
228 SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS | 223 SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS |
229 force_this_sub = True | 224 return STATUS_BAKE |
230 elif not prev_sub_entry: | 225 |
231 # No previous record. We'll have to bake it. | 226 if not prev_sub_entry: |
232 logger.debug("No previous record entry found for '%s'. Will " | 227 cur_sub_entry.flags |= \ |
233 "force bake it." % sub_uri) | |
234 sub_entry.flags |= \ | |
235 SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS | 228 SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS |
236 force_this_sub = True | 229 return STATUS_BAKE |
237 | 230 |
238 return force_this_sub, invalidate_formatting | 231 return STATUS_CLEAN |
239 | |
240 | |
241 def _get_dirty_source_names_and_render_passes(sub_entry, dirty_source_names): | |
242 dirty_for_this = set() | |
243 invalidated_render_passes = set() | |
244 for p, pinfo in enumerate(sub_entry.render_info): | |
245 if pinfo: | |
246 for src_name in pinfo.used_source_names: | |
247 is_dirty = (src_name in dirty_source_names) | |
248 if is_dirty: | |
249 invalidated_render_passes.add(p) | |
250 dirty_for_this.add(src_name) | |
251 break | |
252 return dirty_for_this, invalidated_render_passes | |
253 | 232 |
254 | 233 |
255 def _ensure_dir_exists(path): | 234 def _ensure_dir_exists(path): |
256 try: | 235 try: |
257 os.makedirs(path, mode=0o755, exist_ok=True) | 236 os.makedirs(path, mode=0o755, exist_ok=True) |