comparison piecrust/page.py @ 128:28444014ce7d

Fix error reporting and counting of lines.
author Ludovic Chabant <ludovic@chabant.com>
date Fri, 14 Nov 2014 22:49:50 +0100
parents d31cbbdb4ecc
children 0bdd938d6b9f
comparison
equal deleted inserted replaced
127:bc63dc20baa0 128:28444014ce7d
250 part_pattern = re.compile( 250 part_pattern = re.compile(
251 r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""", 251 r"""^<\-\-\s*(?P<fmt>\w+)\s*\-\->\s*$""",
252 re.M) 252 re.M)
253 253
254 254
255 def _count_lines(s):
256 return len(s.split('\n'))
257
258
255 def parse_segments(raw, offset=0): 259 def parse_segments(raw, offset=0):
260 # Get the number of lines in the header.
261 header_lines = _count_lines(raw[:offset].rstrip())
262 current_line = header_lines
263
264 # Start parsing.
256 matches = list(segment_pattern.finditer(raw, offset)) 265 matches = list(segment_pattern.finditer(raw, offset))
257 num_matches = len(matches) 266 num_matches = len(matches)
258 if num_matches > 0: 267 if num_matches > 0:
259 contents = {} 268 contents = {}
260 269
261 first_offset = matches[0].start() 270 first_offset = matches[0].start()
262 if first_offset > 0: 271 if first_offset > 0:
263 # There's some default content segment at the beginning. 272 # There's some default content segment at the beginning.
264 seg = ContentSegment() 273 seg = ContentSegment()
265 seg.parts = parse_segment_parts(raw, offset, first_offset) 274 seg.parts, current_line = parse_segment_parts(
275 raw, offset, first_offset, current_line)
266 contents['content'] = seg 276 contents['content'] = seg
267 277
268 for i in range(1, num_matches): 278 for i in range(1, num_matches):
269 m1 = matches[i - 1] 279 m1 = matches[i - 1]
270 m2 = matches[i] 280 m2 = matches[i]
271 seg = ContentSegment() 281 seg = ContentSegment()
272 seg.parts = parse_segment_parts(raw, m1.end() + 1, 282 seg.parts, current_line = parse_segment_parts(
273 m2.start(), m1.group('fmt')) 283 raw, m1.end() + 1, m2.start(), current_line,
284 m1.group('fmt'))
274 contents[m1.group('name')] = seg 285 contents[m1.group('name')] = seg
275 286
276 # Handle text past the last match. 287 # Handle text past the last match.
277 lastm = matches[-1] 288 lastm = matches[-1]
278 seg = ContentSegment() 289 seg = ContentSegment()
279 seg.parts = parse_segment_parts(raw, lastm.end() + 1, 290 seg.parts, current_line = parse_segment_parts(
280 len(raw), lastm.group('fmt')) 291 raw, lastm.end() + 1, len(raw), current_line,
292 lastm.group('fmt'))
281 contents[lastm.group('name')] = seg 293 contents[lastm.group('name')] = seg
282 294
283 return contents 295 return contents
284 else: 296 else:
285 # No segments, just content. 297 # No segments, just content.
286 seg = ContentSegment() 298 seg = ContentSegment()
287 seg.parts = parse_segment_parts(raw, offset, len(raw)) 299 seg.parts, current_line = parse_segment_parts(
300 raw, offset, len(raw), current_line)
288 return {'content': seg} 301 return {'content': seg}
289 302
290 303
291 def parse_segment_parts(raw, start, end, first_part_fmt=None): 304 def parse_segment_parts(raw, start, end, line_offset, first_part_fmt=None):
292 matches = list(part_pattern.finditer(raw, start, end)) 305 matches = list(part_pattern.finditer(raw, start, end))
293 num_matches = len(matches) 306 num_matches = len(matches)
294 if num_matches > 0: 307 if num_matches > 0:
295 parts = [] 308 parts = []
296 309
297 # First part, before the first format change. 310 # First part, before the first format change.
311 part_text = raw[start:matches[0].start()]
298 parts.append( 312 parts.append(
299 ContentSegmentPart(raw[start:matches[0].start()], 313 ContentSegmentPart(part_text, first_part_fmt, line_offset))
300 first_part_fmt, 314 line_offset += _count_lines(part_text)
301 start))
302 315
303 for i in range(1, num_matches): 316 for i in range(1, num_matches):
304 m1 = matches[i - 1] 317 m1 = matches[i - 1]
305 m2 = matches[i] 318 m2 = matches[i]
319 part_text = raw[m1.end() + 1:m2.start()]
306 parts.append( 320 parts.append(
307 ContentSegmentPart( 321 ContentSegmentPart(
308 raw[m1.end() + 1:m2.start()], 322 part_text, m1.group('fmt'), line_offset))
309 m1.group('fmt'), 323 line_offset += _count_lines(part_text)
310 m1.end() + 1))
311 324
312 lastm = matches[-1] 325 lastm = matches[-1]
313 parts.append(ContentSegmentPart(raw[lastm.end() + 1:end], 326 part_text = raw[lastm.end() + 1:end]
314 lastm.group('fmt'), 327 parts.append(ContentSegmentPart(
315 lastm.end() + 1)) 328 part_text, lastm.group('fmt'), line_offset))
316 329
317 return parts 330 return parts, line_offset
318 else: 331 else:
319 return [ContentSegmentPart(raw[start:end], first_part_fmt)] 332 part_text = raw[start:end]
320 333 parts = [ContentSegmentPart(part_text, first_part_fmt, line_offset)]
334 return parts, line_offset
335