comparison piecrust/page.py @ 924:1bb704434ee2

formatting: Remove segment parts, you can use template tags instead. Segment parts were used to switch formatters insides a given content segment, but that's also achievable with template tags like `pcformat` in Jinja to some degree. It's not totally the same but removing it simplifies the code and improves performance.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 01 Oct 2017 20:36:04 -0700
parents d1095774bfcf
children 84d8fadf9e67
comparison
equal deleted inserted replaced
923:5713b6a2850d 924:1bb704434ee2
220 220
221 221
222 class ContentSegment(object): 222 class ContentSegment(object):
223 debug_render_func = 'debug_render' 223 debug_render_func = 'debug_render'
224 224
225 def __init__(self):
226 self.parts = []
227
228 def debug_render(self):
229 return '\n'.join([p.content for p in self.parts])
230
231
232 class ContentSegmentPart(object):
233 def __init__(self, content, fmt=None, offset=-1, line=-1): 225 def __init__(self, content, fmt=None, offset=-1, line=-1):
234 self.content = content 226 self.content = content
235 self.fmt = fmt 227 self.fmt = fmt
236 self.offset = offset 228 self.offset = offset
237 self.line = line 229 self.line = line
238 230
239 def __str__(self): 231 def debug_render(self):
240 return '%s [%s]' % (self.content, self.fmt or '<default>') 232 return '[%s] %s' % (self.fmt or '<none>', self.content)
241 233
242 234
243 def json_load_segments(data): 235 def json_load_segments(data):
244 segments = {} 236 segments = {}
245 for key, seg_data in data.items(): 237 for key, sd in data.items():
246 seg = ContentSegment() 238 seg = ContentSegment(sd['c'], sd['f'], sd['o'], sd['l'])
247 for p_data in seg_data:
248 part = ContentSegmentPart(p_data['c'], p_data['f'], p_data['o'],
249 p_data['l'])
250 seg.parts.append(part)
251 segments[key] = seg 239 segments[key] = seg
252 return segments 240 return segments
253 241
254 242
255 def json_save_segments(segments): 243 def json_save_segments(segments):
256 data = {} 244 data = {}
257 for key, seg in segments.items(): 245 for key, seg in segments.items():
258 seg_data = [] 246 seg_data = {
259 for part in seg.parts: 247 'c': seg.content, 'f': seg.fmt, 'o': seg.offset, 'l': seg.line}
260 p_data = {'c': part.content, 'f': part.fmt, 'o': part.offset,
261 'l': part.line}
262 seg_data.append(p_data)
263 data[key] = seg_data 248 data[key] = seg_data
264 return data 249 return data
265 250
266 251
267 def load_page(source, content_item): 252 def load_page(source, content_item):
312 297
313 298
314 segment_pattern = re.compile( 299 segment_pattern = re.compile(
315 r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""", 300 r"""^\-\-\-\s*(?P<name>\w+)(\:(?P<fmt>\w+))?\s*\-\-\-\s*$""",
316 re.M) 301 re.M)
317 part_pattern = re.compile( 302
318 r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""", 303
319 re.M) 304 def _count_lines(txt, start=0, end=-1):
320 305 cur = start
321 306 line_count = 1
322 def _count_lines(s): 307 while True:
323 return len(s.split('\n')) 308 nex = txt.find('\n', cur)
309 if nex < 0:
310 break
311
312 cur = nex + 1
313 line_count += 1
314
315 if end >= 0 and cur >= end:
316 break
317
318 return line_count
324 319
325 320
326 def _string_needs_parsing(txt, offset): 321 def _string_needs_parsing(txt, offset):
327 txtlen = len(txt) 322 txtlen = len(txt)
328 index = txt.find('-', offset) 323 index = txt.find('-', offset)
329 while index >= 0 and index < txtlen - 8: 324 while index >= 0 and index < txtlen - 8:
330 # Look for a potential `<--format-->`
331 if index > 0 and txt[index - 1] == '<' and txt[index + 1] == '-':
332 return True
333 # Look for a potential `---segment---` 325 # Look for a potential `---segment---`
334 if txt[index + 1] == '-' and txt[index + 2] == '-': 326 if (index > 0 and
327 txt[index - 1] == '\n' and
328 txt[index + 1] == '-' and txt[index + 2] == '-'):
335 return True 329 return True
336 index = txt.find('-', index + 1) 330 index = txt.find('-', index + 1)
337 return False 331 return False
338 332
339 333
340 def parse_segments(raw, offset=0): 334 def parse_segments(raw, offset=0):
341 # Get the number of lines in the header. 335 # Get the number of lines in the header.
342 header_lines = _count_lines(raw[:offset].rstrip()) 336 header_lines = _count_lines(raw, 0, offset)
343 current_line = header_lines 337 current_line = header_lines
344 338
345 # Figure out if we need any parsing. 339 # Figure out if we need any parsing.
346 do_parse = _string_needs_parsing(raw, offset) 340 do_parse = _string_needs_parsing(raw, offset)
347 if not do_parse: 341 if not do_parse:
348 seg = ContentSegment() 342 seg = ContentSegment(raw[offset:], None, offset, current_line)
349 seg.parts = [
350 ContentSegmentPart(raw[offset:], None, offset, current_line)]
351 return {'content': seg} 343 return {'content': seg}
352 344
353 # Start parsing segments and parts. 345 # Start parsing segments.
354 matches = list(segment_pattern.finditer(raw, offset)) 346 matches = list(segment_pattern.finditer(raw, offset))
355 num_matches = len(matches) 347 num_matches = len(matches)
356 if num_matches > 0: 348 if num_matches > 0:
357 contents = {} 349 contents = {}
358 350
359 first_offset = matches[0].start() 351 first_offset = matches[0].start()
360 if first_offset > 0: 352 if first_offset > 0:
361 # There's some default content segment at the beginning. 353 # There's some default content segment at the beginning.
362 seg = ContentSegment() 354 seg = ContentSegment(
363 seg.parts, current_line = parse_segment_parts( 355 raw[offset:first_offset], None, offset, current_line)
364 raw, offset, first_offset, current_line) 356 current_line += _count_lines(seg.content)
365 contents['content'] = seg 357 contents['content'] = seg
366 358
367 for i in range(1, num_matches): 359 for i in range(1, num_matches):
368 m1 = matches[i - 1] 360 m1 = matches[i - 1]
369 m2 = matches[i] 361 m2 = matches[i]
370 seg = ContentSegment() 362
371 seg.parts, current_line = parse_segment_parts( 363 cur_seg_start = m1.end() + 1
372 raw, m1.end() + 1, m2.start(), current_line, 364 cur_seg_end = m2.start()
373 m1.group('fmt')) 365
366 seg = ContentSegment(
367 raw[cur_seg_start:cur_seg_end],
368 m1.group('fmt'),
369 cur_seg_start,
370 current_line)
371 current_line += _count_lines(seg.content)
374 contents[m1.group('name')] = seg 372 contents[m1.group('name')] = seg
375 373
376 # Handle text past the last match. 374 # Handle text past the last match.
377 lastm = matches[-1] 375 lastm = matches[-1]
378 seg = ContentSegment() 376
379 seg.parts, current_line = parse_segment_parts( 377 last_seg_start = lastm.end()
380 raw, lastm.end() + 1, len(raw), current_line, 378
381 lastm.group('fmt')) 379 seg = ContentSegment(
380 raw[last_seg_start:],
381 lastm.group('fmt'),
382 last_seg_start,
383 current_line)
382 contents[lastm.group('name')] = seg 384 contents[lastm.group('name')] = seg
385 # No need to count lines for the last one.
383 386
384 return contents 387 return contents
385 else: 388 else:
386 # No segments, just content. 389 # No segments, just content.
387 seg = ContentSegment() 390 seg = ContentSegment(raw[offset:], None, offset, current_line)
388 seg.parts, current_line = parse_segment_parts(
389 raw, offset, len(raw), current_line)
390 return {'content': seg} 391 return {'content': seg}
391
392
393 def parse_segment_parts(raw, start, end, line_offset, first_part_fmt=None):
394 matches = list(part_pattern.finditer(raw, start, end))
395 num_matches = len(matches)
396 if num_matches > 0:
397 parts = []
398
399 # First part, before the first format change.
400 part_text = raw[start:matches[0].start()]
401 parts.append(
402 ContentSegmentPart(part_text, first_part_fmt, start,
403 line_offset))
404 line_offset += _count_lines(part_text)
405
406 for i in range(1, num_matches):
407 m1 = matches[i - 1]
408 m2 = matches[i]
409 part_text = raw[m1.end() + 1:m2.start()]
410 parts.append(
411 ContentSegmentPart(
412 part_text, m1.group('fmt'), m1.end() + 1,
413 line_offset))
414 line_offset += _count_lines(part_text)
415
416 lastm = matches[-1]
417 part_text = raw[lastm.end() + 1:end]
418 parts.append(ContentSegmentPart(
419 part_text, lastm.group('fmt'), lastm.end() + 1,
420 line_offset))
421
422 return parts, line_offset
423 else:
424 part_text = raw[start:end]
425 parts = [ContentSegmentPart(part_text, first_part_fmt, start,
426 line_offset)]
427 return parts, line_offset
428