comparison piecrust/pipelines/_pagebaker.py @ 989:8adc27285d93

bake: Big pass on bake performance. - Reduce the amount of data passed between processes. - Make inter-process data simple objects to make it easier to test with alternatives to pickle. - Make sources have the basic requirement to be able to find a content item from an item spec (path). - Make Hoedown the default Markdown formatter.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 19 Nov 2017 14:29:17 -0800
parents 45ad976712ec
children 298b07a899b5
comparison
equal deleted inserted replaced
988:f83ae0a5d793 989:8adc27285d93
1 import os.path 1 import os.path
2 import copy
2 import queue 3 import queue
3 import shutil 4 import shutil
4 import logging 5 import logging
5 import threading 6 import threading
6 import urllib.parse 7 import urllib.parse
7 from piecrust.pipelines._pagerecords import SubPagePipelineRecordEntry 8 from piecrust.pipelines._pagerecords import (
9 SubPageFlags, create_subpage_job_result)
8 from piecrust.rendering import RenderingContext, render_page 10 from piecrust.rendering import RenderingContext, render_page
9 from piecrust.sources.base import AbortedSourceUseError 11 from piecrust.sources.base import AbortedSourceUseError
10 from piecrust.uriutil import split_uri 12 from piecrust.uriutil import split_uri
11 13
12 14
65 67
66 def _writeDirect(self, out_path, content): 68 def _writeDirect(self, out_path, content):
67 with open(out_path, 'w', encoding='utf8') as fp: 69 with open(out_path, 'w', encoding='utf8') as fp:
68 fp.write(content) 70 fp.write(content)
69 71
70 def bake(self, page, prev_entry, cur_entry): 72 def bake(self, page, prev_entry, force=False):
71 cur_sub = 1 73 cur_sub = 1
72 has_more_subs = True 74 has_more_subs = True
73 app = self.app 75 app = self.app
74 out_dir = self.out_dir 76 out_dir = self.out_dir
77 force_bake = self.force or force
75 pretty_urls = page.config.get('pretty_urls', self.pretty_urls) 78 pretty_urls = page.config.get('pretty_urls', self.pretty_urls)
79
80 rendered_subs = []
76 81
77 # Start baking the sub-pages. 82 # Start baking the sub-pages.
78 while has_more_subs: 83 while has_more_subs:
79 sub_uri = page.getUri(sub_num=cur_sub) 84 sub_uri = page.getUri(sub_num=cur_sub)
80 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) 85 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub))
81 86
82 out_path = get_output_path(app, out_dir, sub_uri, pretty_urls) 87 out_path = get_output_path(app, out_dir, sub_uri, pretty_urls)
83 88
84 # Create the sub-entry for the bake record. 89 # Create the sub-entry for the bake record.
85 cur_sub_entry = SubPagePipelineRecordEntry(sub_uri, out_path) 90 cur_sub_entry = create_subpage_job_result(sub_uri, out_path)
91 rendered_subs.append(cur_sub_entry)
86 92
87 # Find a corresponding sub-entry in the previous bake record. 93 # Find a corresponding sub-entry in the previous bake record.
88 prev_sub_entry = None 94 prev_sub_entry = None
89 if prev_entry is not None: 95 if prev_entry is not None:
90 try: 96 try:
91 prev_sub_entry = prev_entry.getSub(cur_sub) 97 prev_sub_entry = prev_entry.getSub(cur_sub)
92 except IndexError: 98 except IndexError:
93 pass 99 pass
94 100
95 # Figure out if we need to bake this page. 101 # Figure out if we need to bake this page.
96 bake_status = _get_bake_status(page, out_path, self.force, 102 bake_status = _get_bake_status(page, out_path, force_bake,
97 prev_sub_entry, cur_sub_entry) 103 prev_sub_entry, cur_sub_entry)
98 104
99 # If this page didn't bake because it's already up-to-date. 105 # If this page didn't bake because it's already up-to-date.
100 # Keep trying for as many subs as we know this page has. 106 # Keep trying for as many subs as we know this page has.
101 if bake_status == STATUS_CLEAN: 107 if bake_status == STATUS_CLEAN:
102 cur_sub_entry.render_info = prev_sub_entry.copyRenderInfo() 108 cur_sub_entry['render_info'] = copy.deepcopy(
103 cur_sub_entry.flags = SubPagePipelineRecordEntry.FLAG_NONE 109 prev_sub_entry['render_info'])
104 cur_entry.subs.append(cur_sub_entry) 110 cur_sub_entry['flags'] = SubPageFlags.FLAG_NONE
105 111
106 if prev_entry.num_subs >= cur_sub + 1: 112 if prev_entry.num_subs >= cur_sub + 1:
107 cur_sub += 1 113 cur_sub += 1
108 has_more_subs = True 114 has_more_subs = True
109 logger.debug(" %s is up to date, skipping to next " 115 logger.debug(" %s is up to date, skipping to next "
116 # All good, proceed. 122 # All good, proceed.
117 try: 123 try:
118 if bake_status == STATUS_INVALIDATE_AND_BAKE: 124 if bake_status == STATUS_INVALIDATE_AND_BAKE:
119 cache_key = sub_uri 125 cache_key = sub_uri
120 self._rsr.invalidate(cache_key) 126 self._rsr.invalidate(cache_key)
121 cur_sub_entry.flags |= \ 127 cur_sub_entry['flags'] |= \
122 SubPagePipelineRecordEntry.FLAG_FORMATTING_INVALIDATED 128 SubPageFlags.FLAG_RENDER_CACHE_INVALIDATED
123 129
124 logger.debug(" p%d -> %s" % (cur_sub, out_path)) 130 logger.debug(" p%d -> %s" % (cur_sub, out_path))
125 rp = self._bakeSingle(page, cur_sub, out_path) 131 rp = self._bakeSingle(page, cur_sub, out_path)
126 except AbortedSourceUseError: 132 except AbortedSourceUseError:
127 raise 133 raise
129 logger.exception(ex) 135 logger.exception(ex)
130 raise BakingError("%s: error baking '%s'." % 136 raise BakingError("%s: error baking '%s'." %
131 (page.content_spec, sub_uri)) from ex 137 (page.content_spec, sub_uri)) from ex
132 138
133 # Record what we did. 139 # Record what we did.
134 cur_sub_entry.flags |= SubPagePipelineRecordEntry.FLAG_BAKED 140 cur_sub_entry['flags'] |= SubPageFlags.FLAG_BAKED
135 cur_sub_entry.render_info = rp.copyRenderInfo() 141 cur_sub_entry['render_info'] = copy.deepcopy(rp.render_info)
136 cur_entry.subs.append(cur_sub_entry)
137 142
138 # Copy page assets. 143 # Copy page assets.
139 if (cur_sub == 1 and 144 if (cur_sub == 1 and
140 cur_sub_entry.anyPass(lambda p: p.used_assets)): 145 cur_sub_entry['render_info']['used_assets']):
141 if pretty_urls: 146 if pretty_urls:
142 out_assets_dir = os.path.dirname(out_path) 147 out_assets_dir = os.path.dirname(out_path)
143 else: 148 else:
144 out_assets_dir, out_name = os.path.split(out_path) 149 out_assets_dir, out_name = os.path.split(out_path)
145 if sub_uri != self.site_root: 150 if sub_uri != self.site_root:
157 logger.debug(" %s -> %s" % (i.spec, out_asset_path)) 162 logger.debug(" %s -> %s" % (i.spec, out_asset_path))
158 shutil.copy(i.spec, out_asset_path) 163 shutil.copy(i.spec, out_asset_path)
159 164
160 # Figure out if we have more work. 165 # Figure out if we have more work.
161 has_more_subs = False 166 has_more_subs = False
162 if cur_sub_entry.anyPass(lambda p: p.pagination_has_more): 167 if cur_sub_entry['render_info']['pagination_has_more']:
163 cur_sub += 1 168 cur_sub += 1
164 has_more_subs = True 169 has_more_subs = True
170
171 return rendered_subs
165 172
166 def _bakeSingle(self, page, sub_num, out_path): 173 def _bakeSingle(self, page, sub_num, out_path):
167 ctx = RenderingContext(page, sub_num=sub_num) 174 ctx = RenderingContext(page, sub_num=sub_num)
168 page.source.prepareRenderContext(ctx) 175 page.source.prepareRenderContext(ctx)
169 176
205 if status != STATUS_CLEAN: 212 if status != STATUS_CLEAN:
206 return status 213 return status
207 214
208 # Easy test. 215 # Easy test.
209 if force: 216 if force:
210 cur_sub_entry.flags |= \ 217 cur_sub_entry['flags'] |= \
211 SubPagePipelineRecordEntry.FLAG_FORCED_BY_GENERAL_FORCE 218 SubPageFlags.FLAG_FORCED_BY_GENERAL_FORCE
212 return STATUS_BAKE 219 # We need to invalidate any cache we have on this page because
220 # it's being forced, so something important has changed somehow.
221 return STATUS_INVALIDATE_AND_BAKE
213 222
214 # Check for up-to-date outputs. 223 # Check for up-to-date outputs.
215 in_path_time = page.content_mtime 224 in_path_time = page.content_mtime
216 try: 225 try:
217 out_path_time = os.path.getmtime(out_path) 226 out_path_time = os.path.getmtime(out_path)
218 except OSError: 227 except OSError:
219 # File doesn't exist, we'll need to bake. 228 # File doesn't exist, we'll need to bake.
220 cur_sub_entry.flags |= \ 229 cur_sub_entry['flags'] |= \
221 SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS 230 SubPageFlags.FLAG_FORCED_BY_NO_PREVIOUS
222 return STATUS_BAKE 231 return STATUS_BAKE
223 232
224 if out_path_time <= in_path_time: 233 if out_path_time <= in_path_time:
225 return STATUS_BAKE 234 return STATUS_BAKE
226 235
227 # Nope, all good. 236 # Nope, all good.
228 return STATUS_CLEAN 237 return STATUS_CLEAN
229 238
230 239
231 def _compute_force_flags(prev_sub_entry, cur_sub_entry): 240 def _compute_force_flags(prev_sub_entry, cur_sub_entry):
232 if prev_sub_entry and prev_sub_entry.errors: 241 if prev_sub_entry and len(prev_sub_entry['errors']) > 0:
233 # Previous bake failed. We'll have to bake it again. 242 # Previous bake failed. We'll have to bake it again.
234 cur_sub_entry.flags |= \ 243 cur_sub_entry['flags'] |= \
235 SubPagePipelineRecordEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS 244 SubPageFlags.FLAG_FORCED_BY_PREVIOUS_ERRORS
236 return STATUS_BAKE 245 return STATUS_BAKE
237 246
238 if not prev_sub_entry: 247 if not prev_sub_entry:
239 cur_sub_entry.flags |= \ 248 # No previous record, so most probably was never baked. Bake it.
240 SubPagePipelineRecordEntry.FLAG_FORCED_BY_NO_PREVIOUS 249 cur_sub_entry['flags'] |= \
250 SubPageFlags.FLAG_FORCED_BY_NO_PREVIOUS
241 return STATUS_BAKE 251 return STATUS_BAKE
242 252
243 return STATUS_CLEAN 253 return STATUS_CLEAN
244 254
245 255