comparison fontaine/parser.py @ 2:59fe8cb6190d

Add lots of tests, fix lots of bugs.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 03 Jan 2017 09:05:28 -0800
parents 74b83e3d921e
children
comparison
equal deleted inserted replaced
1:74b83e3d921e 2:59fe8cb6190d
1 import re 1 import re
2 import logging 2 import logging
3 from .document import TYPE_ACTION
3 4
4 5
5 logger = logging.getLogger(__name__) 6 logger = logging.getLogger(__name__)
6 7
7 8
8 class FontaineState: 9 class FontaineState:
9 can_merge = False 10 def __init__(self):
10 needs_pending_empty_lines = True 11 pass
11
12 def __init__(self):
13 self.has_pending_empty_line = False
14 12
15 def match(self, fp, ctx): 13 def match(self, fp, ctx):
16 return False 14 return False
17 15
18 def consume(self, fp, ctx): 16 def consume(self, fp, ctx):
19 raise NotImplementedError() 17 raise NotImplementedError()
20 18
21 def merge(self): 19 def exit(self, ctx, next_state):
22 pass
23
24 def exit(self, ctx):
25 pass 20 pass
26 21
27 22
28 class _PassThroughState(FontaineState): 23 class _PassThroughState(FontaineState):
29 def consume(self, fp, ctx): 24 def consume(self, fp, ctx):
40 35
41 36
42 RE_EMPTY_LINE = re.compile(r"^$", re.M) 37 RE_EMPTY_LINE = re.compile(r"^$", re.M)
43 RE_BLANK_LINE = re.compile(r"^\s*$", re.M) 38 RE_BLANK_LINE = re.compile(r"^\s*$", re.M)
44 39
45 RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:") 40 RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:\s*")
46 41
47 42
48 class _TitlePageState(FontaineState): 43 class _TitlePageState(FontaineState):
49 def __init__(self): 44 def __init__(self):
50 super().__init__() 45 super().__init__()
63 58
64 m = RE_TITLE_KEY_VALUE.match(line) 59 m = RE_TITLE_KEY_VALUE.match(line)
65 if m: 60 if m:
66 # Commit current value, start new one. 61 # Commit current value, start new one.
67 self._commit(ctx) 62 self._commit(ctx)
68 self._cur_key = m.group('key') 63 self._cur_key = m.group('key').lower()
69 self._cur_val = line[m.end():].strip() 64 self._cur_val = line[m.end():]
70 else: 65 else:
71 # Keep accumulating the value of one of the title page's 66 # Keep accumulating the value of one of the title page's
72 # values. 67 # values.
73 self._cur_val += line.strip() 68 self._cur_val += line.lstrip()
74 69
75 if RE_EMPTY_LINE.match(fp.peekline()): 70 if RE_EMPTY_LINE.match(fp.peekline()):
76 self._commit(ctx) 71 self._commit(ctx)
77 # Finished with the page title, now move on to the first scene. 72 # Finished with the page title, now move on to the first scene.
78 self.has_pending_empty_line = True
79 break 73 break
80 74
81 return ANY_STATE 75 return ANY_STATE
82 76
83 def exit(self, ctx): 77 def exit(self, ctx, next_state):
84 self._commit(ctx) 78 self._commit(ctx)
85 79
86 def _commit(self, ctx): 80 def _commit(self, ctx):
87 if self._cur_key is not None: 81 if self._cur_key is not None:
88 ctx.document.title_values[self._cur_key] = self._cur_val 82 val = self._cur_val.rstrip('\r\n')
83 ctx.document.title_values[self._cur_key] = val
89 self._cur_key = None 84 self._cur_key = None
90 self._cur_val = None 85 self._cur_val = None
91 86
92 87
93 RE_SCENE_HEADER_PATTERN = re.compile( 88 RE_SCENE_HEADER_PATTERN = re.compile(
105 def consume(self, fp, ctx): 100 def consume(self, fp, ctx):
106 fp.readline() # Get past the blank line. 101 fp.readline() # Get past the blank line.
107 line = fp.readline().rstrip('\r\n') 102 line = fp.readline().rstrip('\r\n')
108 line = line.lstrip('.') # In case it was forced. 103 line = line.lstrip('.') # In case it was forced.
109 ctx.document.addScene(line) 104 ctx.document.addScene(line)
110 self.has_pending_empty_line = True
111 return ANY_STATE 105 return ANY_STATE
112 106
113 107
114 class _ActionState(FontaineState): 108 class _ActionState(FontaineState):
115 can_merge = True
116 needs_pending_empty_lines = False
117
118 def __init__(self): 109 def __init__(self):
119 super().__init__() 110 super().__init__()
120 self.text = '' 111 self.text = ''
121 self._to_merge = None
122 self._was_merged = False
123 112
124 def match(self, fp, ctx): 113 def match(self, fp, ctx):
125 return True 114 return True
126 115
127 def consume(self, fp, ctx): 116 def consume(self, fp, ctx):
130 line = fp.readline() 119 line = fp.readline()
131 if not line: 120 if not line:
132 return EOF_STATE 121 return EOF_STATE
133 122
134 if is_first_line: 123 if is_first_line:
124 # Ignore the fake blank line at 0 if it's threre.
125 if fp.line_no == 0:
126 continue
127
135 line = line.lstrip('!') # In case it was forced. 128 line = line.lstrip('!') # In case it was forced.
136 is_first_line = False 129 is_first_line = False
137 130
138 # If the next line is empty, strip the carriage return from 131 # If the next line is empty, strip the carriage return from
139 # the line we just got because it's probably gonna be the 132 # the line we just got because it's probably gonna be the
140 # last one. 133 # last one.
141 if RE_EMPTY_LINE.match(fp.peekline()): 134 if RE_EMPTY_LINE.match(fp.peekline()):
142 stripped_line = line.rstrip("\r\n") 135 self.text += line.rstrip("\r\n")
143 self.text += stripped_line
144 self._to_merge = line[len(stripped_line):]
145 break 136 break
146 # ...otherwise, add the line with in full. 137 # ...otherwise, add the line with in full.
147 self.text += line 138 self.text += line
148 139
149 return ANY_STATE 140 return ANY_STATE
150 141
151 def merge(self): 142 def exit(self, ctx, next_state):
152 # Put back the stuff we stripped from what we thought was the 143 last_para = ctx.document.lastParagraph()
153 # last line. 144 if last_para and last_para.type == TYPE_ACTION:
154 self.text += self._to_merge 145 last_para.text += '\n' + self.text
155 self._was_merged = True 146 else:
156 147 ctx.document.lastScene().addAction(self.text)
157 def exit(self, ctx):
158 ctx.document.lastScene().addAction(self.text)
159 148
160 149
161 RE_CENTERED_LINE = re.compile(r"^\s*>\s*.*\s*<\s*$", re.M) 150 RE_CENTERED_LINE = re.compile(r"^\s*>\s*.*\s*<\s*$", re.M)
162 151
163 152
188 if clean_line[0] != '>' or clean_line[-1] != '<': 177 if clean_line[0] != '>' or clean_line[-1] != '<':
189 # The whole paragraph must have `>` and `<` wrappers, so 178 # The whole paragraph must have `>` and `<` wrappers, so
190 # if we detect a line that doesn't have them, we make this 179 # if we detect a line that doesn't have them, we make this
191 # paragraph be a normal action instead. 180 # paragraph be a normal action instead.
192 fp.restore(snapshot) 181 fp.restore(snapshot)
193 self.has_pending_empty_line = True
194 self._aborted = True 182 self._aborted = True
195 return _ActionState() 183 return _ActionState()
196 else: 184 else:
197 # Remove wrapping `>`/`<`, and spaces. 185 # Remove wrapping `>`/`<`, and spaces.
198 clean_line = clean_line[1:-1].strip() 186 clean_line = clean_line[1:-1].strip()
199 187
200 if RE_EMPTY_LINE.match(fp.peekline()): 188 if RE_EMPTY_LINE.match(fp.peekline()):
201 self.text += clean_line 189 self.text += clean_line
202 self.has_pending_empty_line = True
203 break 190 break
204 self.text += clean_line + eol 191 self.text += clean_line + eol
205 192
206 return ANY_STATE 193 return ANY_STATE
207 194
208 def exit(self, ctx): 195 def exit(self, ctx, next_state):
209 if not self._aborted: 196 if not self._aborted:
210 ctx.document.lastScene().addCenteredAction(self.text) 197 ctx.document.lastScene().addCenteredAction(self.text)
211 198
212 199
213 RE_CHARACTER_LINE = re.compile(r"^\s*[A-Z\-]+\s*(\(.*\))?$", re.M) 200 RE_CHARACTER_LINE = re.compile(r"^\s*[A-Z][A-Z\-\._\s]+\s*(\(.*\))?$", re.M)
214 201
215 202
216 class _CharacterState(FontaineState): 203 class _CharacterState(FontaineState):
217 def match(self, fp, ctx): 204 def match(self, fp, ctx):
218 lines = fp.peeklines(3) 205 lines = fp.peeklines(3)
245 232
246 next_line = fp.peekline() 233 next_line = fp.peekline()
247 if not RE_EMPTY_LINE.match(next_line): 234 if not RE_EMPTY_LINE.match(next_line):
248 return _DialogState() 235 return _DialogState()
249 236
250 self.has_pending_empty_line = True
251 return ANY_STATE 237 return ANY_STATE
252 238
253 239
254 class _DialogState(FontaineState): 240 class _DialogState(FontaineState):
255 def __init__(self): 241 def __init__(self):
278 self.text += line.rstrip('\r\n') 264 self.text += line.rstrip('\r\n')
279 return _ParentheticalState() 265 return _ParentheticalState()
280 266
281 if RE_EMPTY_LINE.match(next_line): 267 if RE_EMPTY_LINE.match(next_line):
282 self.text += line.rstrip('\r\n') 268 self.text += line.rstrip('\r\n')
283 self.has_pending_empty_line = True
284 break 269 break
285 self.text += line 270 self.text += line
286 271
287 return ANY_STATE 272 return ANY_STATE
288 273
289 def exit(self, ctx): 274 def exit(self, ctx, next_state):
290 ctx.document.lastScene().addDialog(self.text.rstrip('\r\n')) 275 ctx.document.lastScene().addDialog(self.text.rstrip('\r\n'))
291 276
292 277
293 class _LyricsState(FontaineState): 278 class _LyricsState(FontaineState):
294 def __init__(self): 279 def __init__(self):
310 if line.startswith('~'): 295 if line.startswith('~'):
311 line = line.lstrip('~') 296 line = line.lstrip('~')
312 else: 297 else:
313 logger.debug("Rolling back lyrics into action paragraph.") 298 logger.debug("Rolling back lyrics into action paragraph.")
314 fp.restore(snapshot) 299 fp.restore(snapshot)
315 self.has_pending_empty_line = True
316 self._aborted = True 300 self._aborted = True
317 return _ActionState() 301 return _ActionState()
318 302
319 if RE_EMPTY_LINE.match(fp.peekline()): 303 if RE_EMPTY_LINE.match(fp.peekline()):
320 self.text += line.rstrip('\r\n') 304 self.text += line.rstrip('\r\n')
321 self.has_pending_empty_line = True
322 break 305 break
323 self.text += line 306 self.text += line
324 307
325 return ANY_STATE 308 return ANY_STATE
326 309
327 def exit(self, ctx): 310 def exit(self, ctx, next_state):
328 if not self._aborted: 311 if not self._aborted:
329 ctx.document.lastScene().addLyrics(self.text) 312 ctx.document.lastScene().addLyrics(self.text)
330 313
331 314
332 RE_TRANSITION_LINE = re.compile(r"^\s*[^a-z]+TO\:$", re.M) 315 RE_TRANSITION_LINE = re.compile(r"^\s*[^a-z]+TO\:$", re.M)
343 def consume(self, fp, ctx): 326 def consume(self, fp, ctx):
344 fp.readline() # Get past the empty line. 327 fp.readline() # Get past the empty line.
345 line = fp.readline().lstrip().rstrip('\r\n') 328 line = fp.readline().lstrip().rstrip('\r\n')
346 line = line.lstrip('>') # In case it was forced. 329 line = line.lstrip('>') # In case it was forced.
347 ctx.document.lastScene().addTransition(line) 330 ctx.document.lastScene().addTransition(line)
348 self.has_pending_empty_line = True 331 return ANY_STATE
349 332
350 333
351 RE_PAGE_BREAK_LINE = re.compile(r"^\=\=\=+$", re.M) 334 RE_PAGE_BREAK_LINE = re.compile(r"^\=\=\=+$", re.M)
352 335
353 336
361 344
362 def consume(self, fp, ctx): 345 def consume(self, fp, ctx):
363 fp.readline() 346 fp.readline()
364 fp.readline() 347 fp.readline()
365 ctx.document.lastScene().addPageBreak() 348 ctx.document.lastScene().addPageBreak()
366 self.has_pending_empty_line = True
367 return ANY_STATE 349 return ANY_STATE
368 350
369 351
370 class _ForcedParagraphStates(FontaineState): 352 class _ForcedParagraphStates(FontaineState):
371 STATE_SYMBOLS = { 353 STATE_SYMBOLS = {
405 if self._consume_empty_line: 387 if self._consume_empty_line:
406 fp.readline() 388 fp.readline()
407 return self._state_cls() 389 return self._state_cls()
408 390
409 391
392 class _EmptyLineState(FontaineState):
393 def __init__(self):
394 super().__init__()
395 self.line_count = 0
396
397 def match(self, fp, ctx):
398 return RE_EMPTY_LINE.match(fp.peekline())
399
400 def consume(self, fp, ctx):
401 fp.readline()
402 if fp.line_no > 1: # Don't take into account the fake blank at 0
403 self.line_count += 1
404 return ANY_STATE
405
406 def exit(self, ctx, next_state):
407 if self.line_count > 0:
408 text = self.line_count * '\n'
409 last_para = ctx.document.lastParagraph()
410 if last_para and last_para.type == TYPE_ACTION:
411 last_para.text += text
412 else:
413 ctx.document.lastScene().addAction(text[1:])
414
415
410 ROOT_STATES = [ 416 ROOT_STATES = [
411 _ForcedParagraphStates, # Must be first. 417 _ForcedParagraphStates, # Must be first.
412 _SceneHeaderState, 418 _SceneHeaderState,
413 _CharacterState, 419 _CharacterState,
414 _TransitionState, 420 _TransitionState,
415 _PageBreakState, 421 _PageBreakState,
416 _CenteredActionState, 422 _CenteredActionState,
423 _EmptyLineState, # Must be second to last.
417 _ActionState, # Must be last. 424 _ActionState, # Must be last.
418 ] 425 ]
419 426
420 427
421 class _PeekableFile: 428 class _PeekableFile:
422 def __init__(self, fp): 429 def __init__(self, fp):
423 self.line_no = 1 430 self.line_no = 1
424 self._fp = fp 431 self._fp = fp
425 self._blankAt0 = False 432 self._blankAt0 = False
426 433
427 def readline(self, size=-1): 434 def readline(self):
428 if self._blankAt0: 435 if self._blankAt0:
429 self._blankAt0 = False 436 self._blankAt0 = False
437 self.line_no = 0
430 return '\n' 438 return '\n'
431 439
432 data = self._fp.readline(size) 440 data = self._fp.readline()
433 self.line_no += 1 441 self.line_no += 1
434 return data 442 return data
435 443
436 def peekline(self): 444 def peekline(self):
437 if self._blankAt0: 445 if self._blankAt0:
464 def _addBlankAt0(self): 472 def _addBlankAt0(self):
465 if self._fp.tell() != 0: 473 if self._fp.tell() != 0:
466 raise Exception( 474 raise Exception(
467 "Can't add blank line at 0 if reading has started.") 475 "Can't add blank line at 0 if reading has started.")
468 self._blankAt0 = True 476 self._blankAt0 = True
469 477 self.line_no = 0
470 def _read(self, size, advance_line_no):
471 data = self._fp.read(size)
472 if advance_line_no:
473 self.line_no += data.count('\n')
474 return data
475 478
476 479
477 class _FontaineStateMachine: 480 class _FontaineStateMachine:
478 def __init__(self, fp, doc): 481 def __init__(self, fp, doc):
479 self.fp = _PeekableFile(fp) 482 self.fp = _PeekableFile(fp)
495 self.state = _PassThroughState() 498 self.state = _PassThroughState()
496 if not RE_EMPTY_LINE.match(self.fp.peekline()): 499 if not RE_EMPTY_LINE.match(self.fp.peekline()):
497 # Add a fake empty line at the beginning of the text if 500 # Add a fake empty line at the beginning of the text if
498 # there's not one already. This makes state matching easier. 501 # there's not one already. This makes state matching easier.
499 self.fp._addBlankAt0() 502 self.fp._addBlankAt0()
500 # Make this added empty line "pending" so if the first line
501 # is an action paragraph, it doesn't include it.
502 self.state.has_pending_empty_line = True
503 503
504 # Start parsing! Here we try to do a mostly-forward-only parser with 504 # Start parsing! Here we try to do a mostly-forward-only parser with
505 # non overlapping regexes to make it decently fast. 505 # non overlapping regexes to make it decently fast.
506 while True: 506 while True:
507 logger.debug("State '%s' consuming from '%s'..." % 507 logger.debug("State '%s' consuming from '%s'..." %
514 res = EOF_STATE 514 res = EOF_STATE
515 515
516 # Figure out what to do next... 516 # Figure out what to do next...
517 517
518 if res is None: 518 if res is None:
519 raise Exception( 519 raise FontaineParserError(
520 self.line_no,
521 "State '%s' returned a `None` result. "
520 "States need to return `ANY_STATE`, one or more specific " 522 "States need to return `ANY_STATE`, one or more specific "
521 "states, or `EOF_STATE` if they reached the end of the " 523 "states, or `EOF_STATE` if they reached the end of the "
522 "file.") 524 "file." % self.state.__class__.__name__)
523 525
524 elif res is ANY_STATE or isinstance(res, list): 526 elif res is ANY_STATE or isinstance(res, list):
525 # State wants to exit, we need to figure out what is the 527 # State wants to exit, we need to figure out what is the
526 # next state. 528 # next state.
527 pos = self.fp._fp.tell() 529 pos = self.fp._fp.tell()
542 raise Exception("Can't match following state after: %s" % 544 raise Exception("Can't match following state after: %s" %
543 self.state) 545 self.state)
544 546
545 # Handle the current state before we move on to the new one. 547 # Handle the current state before we move on to the new one.
546 if self.state: 548 if self.state:
547 if type(self.state) == type(res) and self.state.can_merge: 549 self.state.exit(self, res)
548 # Don't switch states if the next state is the same
549 # type and that type supports merging.
550 self.state.merge()
551 continue
552
553 self.state.exit(self)
554 if (self.state.has_pending_empty_line and
555 not res.needs_pending_empty_lines):
556 logger.debug("Skipping pending blank line from %s" %
557 self.state.__class__.__name__)
558 self.fp.readline()
559
560 self.state = res 550 self.state = res
561 551
562 elif isinstance(res, FontaineState): 552 elif isinstance(res, FontaineState):
563 # State wants to exit, wants a specific state to be next. 553 # State wants to exit, wants a specific state to be next.
564 if self.state: 554 if self.state:
565 self.state.exit(self) 555 self.state.exit(self, res)
566 if (self.state.has_pending_empty_line and
567 not res.needs_pending_empty_lines):
568 logger.debug("Skipping pending blank line from %s" %
569 self.state.__class__.__name__)
570 self.fp.readline()
571 self.state = res 556 self.state = res
572 557
573 elif res is EOF_STATE: 558 elif res is EOF_STATE:
574 # Reached end of file. 559 # Reached end of file.
575 if self.state: 560 if self.state:
576 self.state.exit(self) 561 self.state.exit(self, res)
577 break 562 break
578 563
579 else: 564 else:
580 raise Exception("Unsupported state result: %s" % res) 565 raise Exception("Unsupported state result: %s" % res)
581 566