Mercurial > jouvence
comparison fontaine/parser.py @ 2:59fe8cb6190d
Add lots of tests, fix lots of bugs.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 03 Jan 2017 09:05:28 -0800 |
parents | 74b83e3d921e |
children |
comparison
equal
deleted
inserted
replaced
1:74b83e3d921e | 2:59fe8cb6190d |
---|---|
1 import re | 1 import re |
2 import logging | 2 import logging |
3 from .document import TYPE_ACTION | |
3 | 4 |
4 | 5 |
5 logger = logging.getLogger(__name__) | 6 logger = logging.getLogger(__name__) |
6 | 7 |
7 | 8 |
8 class FontaineState: | 9 class FontaineState: |
9 can_merge = False | 10 def __init__(self): |
10 needs_pending_empty_lines = True | 11 pass |
11 | |
12 def __init__(self): | |
13 self.has_pending_empty_line = False | |
14 | 12 |
15 def match(self, fp, ctx): | 13 def match(self, fp, ctx): |
16 return False | 14 return False |
17 | 15 |
18 def consume(self, fp, ctx): | 16 def consume(self, fp, ctx): |
19 raise NotImplementedError() | 17 raise NotImplementedError() |
20 | 18 |
21 def merge(self): | 19 def exit(self, ctx, next_state): |
22 pass | |
23 | |
24 def exit(self, ctx): | |
25 pass | 20 pass |
26 | 21 |
27 | 22 |
28 class _PassThroughState(FontaineState): | 23 class _PassThroughState(FontaineState): |
29 def consume(self, fp, ctx): | 24 def consume(self, fp, ctx): |
40 | 35 |
41 | 36 |
42 RE_EMPTY_LINE = re.compile(r"^$", re.M) | 37 RE_EMPTY_LINE = re.compile(r"^$", re.M) |
43 RE_BLANK_LINE = re.compile(r"^\s*$", re.M) | 38 RE_BLANK_LINE = re.compile(r"^\s*$", re.M) |
44 | 39 |
45 RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:") | 40 RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:\s*") |
46 | 41 |
47 | 42 |
48 class _TitlePageState(FontaineState): | 43 class _TitlePageState(FontaineState): |
49 def __init__(self): | 44 def __init__(self): |
50 super().__init__() | 45 super().__init__() |
63 | 58 |
64 m = RE_TITLE_KEY_VALUE.match(line) | 59 m = RE_TITLE_KEY_VALUE.match(line) |
65 if m: | 60 if m: |
66 # Commit current value, start new one. | 61 # Commit current value, start new one. |
67 self._commit(ctx) | 62 self._commit(ctx) |
68 self._cur_key = m.group('key') | 63 self._cur_key = m.group('key').lower() |
69 self._cur_val = line[m.end():].strip() | 64 self._cur_val = line[m.end():] |
70 else: | 65 else: |
71 # Keep accumulating the value of one of the title page's | 66 # Keep accumulating the value of one of the title page's |
72 # values. | 67 # values. |
73 self._cur_val += line.strip() | 68 self._cur_val += line.lstrip() |
74 | 69 |
75 if RE_EMPTY_LINE.match(fp.peekline()): | 70 if RE_EMPTY_LINE.match(fp.peekline()): |
76 self._commit(ctx) | 71 self._commit(ctx) |
77 # Finished with the page title, now move on to the first scene. | 72 # Finished with the page title, now move on to the first scene. |
78 self.has_pending_empty_line = True | |
79 break | 73 break |
80 | 74 |
81 return ANY_STATE | 75 return ANY_STATE |
82 | 76 |
83 def exit(self, ctx): | 77 def exit(self, ctx, next_state): |
84 self._commit(ctx) | 78 self._commit(ctx) |
85 | 79 |
86 def _commit(self, ctx): | 80 def _commit(self, ctx): |
87 if self._cur_key is not None: | 81 if self._cur_key is not None: |
88 ctx.document.title_values[self._cur_key] = self._cur_val | 82 val = self._cur_val.rstrip('\r\n') |
83 ctx.document.title_values[self._cur_key] = val | |
89 self._cur_key = None | 84 self._cur_key = None |
90 self._cur_val = None | 85 self._cur_val = None |
91 | 86 |
92 | 87 |
93 RE_SCENE_HEADER_PATTERN = re.compile( | 88 RE_SCENE_HEADER_PATTERN = re.compile( |
105 def consume(self, fp, ctx): | 100 def consume(self, fp, ctx): |
106 fp.readline() # Get past the blank line. | 101 fp.readline() # Get past the blank line. |
107 line = fp.readline().rstrip('\r\n') | 102 line = fp.readline().rstrip('\r\n') |
108 line = line.lstrip('.') # In case it was forced. | 103 line = line.lstrip('.') # In case it was forced. |
109 ctx.document.addScene(line) | 104 ctx.document.addScene(line) |
110 self.has_pending_empty_line = True | |
111 return ANY_STATE | 105 return ANY_STATE |
112 | 106 |
113 | 107 |
114 class _ActionState(FontaineState): | 108 class _ActionState(FontaineState): |
115 can_merge = True | |
116 needs_pending_empty_lines = False | |
117 | |
118 def __init__(self): | 109 def __init__(self): |
119 super().__init__() | 110 super().__init__() |
120 self.text = '' | 111 self.text = '' |
121 self._to_merge = None | |
122 self._was_merged = False | |
123 | 112 |
124 def match(self, fp, ctx): | 113 def match(self, fp, ctx): |
125 return True | 114 return True |
126 | 115 |
127 def consume(self, fp, ctx): | 116 def consume(self, fp, ctx): |
130 line = fp.readline() | 119 line = fp.readline() |
131 if not line: | 120 if not line: |
132 return EOF_STATE | 121 return EOF_STATE |
133 | 122 |
134 if is_first_line: | 123 if is_first_line: |
124 # Ignore the fake blank line at 0 if it's threre. | |
125 if fp.line_no == 0: | |
126 continue | |
127 | |
135 line = line.lstrip('!') # In case it was forced. | 128 line = line.lstrip('!') # In case it was forced. |
136 is_first_line = False | 129 is_first_line = False |
137 | 130 |
138 # If the next line is empty, strip the carriage return from | 131 # If the next line is empty, strip the carriage return from |
139 # the line we just got because it's probably gonna be the | 132 # the line we just got because it's probably gonna be the |
140 # last one. | 133 # last one. |
141 if RE_EMPTY_LINE.match(fp.peekline()): | 134 if RE_EMPTY_LINE.match(fp.peekline()): |
142 stripped_line = line.rstrip("\r\n") | 135 self.text += line.rstrip("\r\n") |
143 self.text += stripped_line | |
144 self._to_merge = line[len(stripped_line):] | |
145 break | 136 break |
146 # ...otherwise, add the line with in full. | 137 # ...otherwise, add the line with in full. |
147 self.text += line | 138 self.text += line |
148 | 139 |
149 return ANY_STATE | 140 return ANY_STATE |
150 | 141 |
151 def merge(self): | 142 def exit(self, ctx, next_state): |
152 # Put back the stuff we stripped from what we thought was the | 143 last_para = ctx.document.lastParagraph() |
153 # last line. | 144 if last_para and last_para.type == TYPE_ACTION: |
154 self.text += self._to_merge | 145 last_para.text += '\n' + self.text |
155 self._was_merged = True | 146 else: |
156 | 147 ctx.document.lastScene().addAction(self.text) |
157 def exit(self, ctx): | |
158 ctx.document.lastScene().addAction(self.text) | |
159 | 148 |
160 | 149 |
161 RE_CENTERED_LINE = re.compile(r"^\s*>\s*.*\s*<\s*$", re.M) | 150 RE_CENTERED_LINE = re.compile(r"^\s*>\s*.*\s*<\s*$", re.M) |
162 | 151 |
163 | 152 |
188 if clean_line[0] != '>' or clean_line[-1] != '<': | 177 if clean_line[0] != '>' or clean_line[-1] != '<': |
189 # The whole paragraph must have `>` and `<` wrappers, so | 178 # The whole paragraph must have `>` and `<` wrappers, so |
190 # if we detect a line that doesn't have them, we make this | 179 # if we detect a line that doesn't have them, we make this |
191 # paragraph be a normal action instead. | 180 # paragraph be a normal action instead. |
192 fp.restore(snapshot) | 181 fp.restore(snapshot) |
193 self.has_pending_empty_line = True | |
194 self._aborted = True | 182 self._aborted = True |
195 return _ActionState() | 183 return _ActionState() |
196 else: | 184 else: |
197 # Remove wrapping `>`/`<`, and spaces. | 185 # Remove wrapping `>`/`<`, and spaces. |
198 clean_line = clean_line[1:-1].strip() | 186 clean_line = clean_line[1:-1].strip() |
199 | 187 |
200 if RE_EMPTY_LINE.match(fp.peekline()): | 188 if RE_EMPTY_LINE.match(fp.peekline()): |
201 self.text += clean_line | 189 self.text += clean_line |
202 self.has_pending_empty_line = True | |
203 break | 190 break |
204 self.text += clean_line + eol | 191 self.text += clean_line + eol |
205 | 192 |
206 return ANY_STATE | 193 return ANY_STATE |
207 | 194 |
208 def exit(self, ctx): | 195 def exit(self, ctx, next_state): |
209 if not self._aborted: | 196 if not self._aborted: |
210 ctx.document.lastScene().addCenteredAction(self.text) | 197 ctx.document.lastScene().addCenteredAction(self.text) |
211 | 198 |
212 | 199 |
213 RE_CHARACTER_LINE = re.compile(r"^\s*[A-Z\-]+\s*(\(.*\))?$", re.M) | 200 RE_CHARACTER_LINE = re.compile(r"^\s*[A-Z][A-Z\-\._\s]+\s*(\(.*\))?$", re.M) |
214 | 201 |
215 | 202 |
216 class _CharacterState(FontaineState): | 203 class _CharacterState(FontaineState): |
217 def match(self, fp, ctx): | 204 def match(self, fp, ctx): |
218 lines = fp.peeklines(3) | 205 lines = fp.peeklines(3) |
245 | 232 |
246 next_line = fp.peekline() | 233 next_line = fp.peekline() |
247 if not RE_EMPTY_LINE.match(next_line): | 234 if not RE_EMPTY_LINE.match(next_line): |
248 return _DialogState() | 235 return _DialogState() |
249 | 236 |
250 self.has_pending_empty_line = True | |
251 return ANY_STATE | 237 return ANY_STATE |
252 | 238 |
253 | 239 |
254 class _DialogState(FontaineState): | 240 class _DialogState(FontaineState): |
255 def __init__(self): | 241 def __init__(self): |
278 self.text += line.rstrip('\r\n') | 264 self.text += line.rstrip('\r\n') |
279 return _ParentheticalState() | 265 return _ParentheticalState() |
280 | 266 |
281 if RE_EMPTY_LINE.match(next_line): | 267 if RE_EMPTY_LINE.match(next_line): |
282 self.text += line.rstrip('\r\n') | 268 self.text += line.rstrip('\r\n') |
283 self.has_pending_empty_line = True | |
284 break | 269 break |
285 self.text += line | 270 self.text += line |
286 | 271 |
287 return ANY_STATE | 272 return ANY_STATE |
288 | 273 |
289 def exit(self, ctx): | 274 def exit(self, ctx, next_state): |
290 ctx.document.lastScene().addDialog(self.text.rstrip('\r\n')) | 275 ctx.document.lastScene().addDialog(self.text.rstrip('\r\n')) |
291 | 276 |
292 | 277 |
293 class _LyricsState(FontaineState): | 278 class _LyricsState(FontaineState): |
294 def __init__(self): | 279 def __init__(self): |
310 if line.startswith('~'): | 295 if line.startswith('~'): |
311 line = line.lstrip('~') | 296 line = line.lstrip('~') |
312 else: | 297 else: |
313 logger.debug("Rolling back lyrics into action paragraph.") | 298 logger.debug("Rolling back lyrics into action paragraph.") |
314 fp.restore(snapshot) | 299 fp.restore(snapshot) |
315 self.has_pending_empty_line = True | |
316 self._aborted = True | 300 self._aborted = True |
317 return _ActionState() | 301 return _ActionState() |
318 | 302 |
319 if RE_EMPTY_LINE.match(fp.peekline()): | 303 if RE_EMPTY_LINE.match(fp.peekline()): |
320 self.text += line.rstrip('\r\n') | 304 self.text += line.rstrip('\r\n') |
321 self.has_pending_empty_line = True | |
322 break | 305 break |
323 self.text += line | 306 self.text += line |
324 | 307 |
325 return ANY_STATE | 308 return ANY_STATE |
326 | 309 |
327 def exit(self, ctx): | 310 def exit(self, ctx, next_state): |
328 if not self._aborted: | 311 if not self._aborted: |
329 ctx.document.lastScene().addLyrics(self.text) | 312 ctx.document.lastScene().addLyrics(self.text) |
330 | 313 |
331 | 314 |
332 RE_TRANSITION_LINE = re.compile(r"^\s*[^a-z]+TO\:$", re.M) | 315 RE_TRANSITION_LINE = re.compile(r"^\s*[^a-z]+TO\:$", re.M) |
343 def consume(self, fp, ctx): | 326 def consume(self, fp, ctx): |
344 fp.readline() # Get past the empty line. | 327 fp.readline() # Get past the empty line. |
345 line = fp.readline().lstrip().rstrip('\r\n') | 328 line = fp.readline().lstrip().rstrip('\r\n') |
346 line = line.lstrip('>') # In case it was forced. | 329 line = line.lstrip('>') # In case it was forced. |
347 ctx.document.lastScene().addTransition(line) | 330 ctx.document.lastScene().addTransition(line) |
348 self.has_pending_empty_line = True | 331 return ANY_STATE |
349 | 332 |
350 | 333 |
351 RE_PAGE_BREAK_LINE = re.compile(r"^\=\=\=+$", re.M) | 334 RE_PAGE_BREAK_LINE = re.compile(r"^\=\=\=+$", re.M) |
352 | 335 |
353 | 336 |
361 | 344 |
362 def consume(self, fp, ctx): | 345 def consume(self, fp, ctx): |
363 fp.readline() | 346 fp.readline() |
364 fp.readline() | 347 fp.readline() |
365 ctx.document.lastScene().addPageBreak() | 348 ctx.document.lastScene().addPageBreak() |
366 self.has_pending_empty_line = True | |
367 return ANY_STATE | 349 return ANY_STATE |
368 | 350 |
369 | 351 |
370 class _ForcedParagraphStates(FontaineState): | 352 class _ForcedParagraphStates(FontaineState): |
371 STATE_SYMBOLS = { | 353 STATE_SYMBOLS = { |
405 if self._consume_empty_line: | 387 if self._consume_empty_line: |
406 fp.readline() | 388 fp.readline() |
407 return self._state_cls() | 389 return self._state_cls() |
408 | 390 |
409 | 391 |
392 class _EmptyLineState(FontaineState): | |
393 def __init__(self): | |
394 super().__init__() | |
395 self.line_count = 0 | |
396 | |
397 def match(self, fp, ctx): | |
398 return RE_EMPTY_LINE.match(fp.peekline()) | |
399 | |
400 def consume(self, fp, ctx): | |
401 fp.readline() | |
402 if fp.line_no > 1: # Don't take into account the fake blank at 0 | |
403 self.line_count += 1 | |
404 return ANY_STATE | |
405 | |
406 def exit(self, ctx, next_state): | |
407 if self.line_count > 0: | |
408 text = self.line_count * '\n' | |
409 last_para = ctx.document.lastParagraph() | |
410 if last_para and last_para.type == TYPE_ACTION: | |
411 last_para.text += text | |
412 else: | |
413 ctx.document.lastScene().addAction(text[1:]) | |
414 | |
415 | |
410 ROOT_STATES = [ | 416 ROOT_STATES = [ |
411 _ForcedParagraphStates, # Must be first. | 417 _ForcedParagraphStates, # Must be first. |
412 _SceneHeaderState, | 418 _SceneHeaderState, |
413 _CharacterState, | 419 _CharacterState, |
414 _TransitionState, | 420 _TransitionState, |
415 _PageBreakState, | 421 _PageBreakState, |
416 _CenteredActionState, | 422 _CenteredActionState, |
423 _EmptyLineState, # Must be second to last. | |
417 _ActionState, # Must be last. | 424 _ActionState, # Must be last. |
418 ] | 425 ] |
419 | 426 |
420 | 427 |
421 class _PeekableFile: | 428 class _PeekableFile: |
422 def __init__(self, fp): | 429 def __init__(self, fp): |
423 self.line_no = 1 | 430 self.line_no = 1 |
424 self._fp = fp | 431 self._fp = fp |
425 self._blankAt0 = False | 432 self._blankAt0 = False |
426 | 433 |
427 def readline(self, size=-1): | 434 def readline(self): |
428 if self._blankAt0: | 435 if self._blankAt0: |
429 self._blankAt0 = False | 436 self._blankAt0 = False |
437 self.line_no = 0 | |
430 return '\n' | 438 return '\n' |
431 | 439 |
432 data = self._fp.readline(size) | 440 data = self._fp.readline() |
433 self.line_no += 1 | 441 self.line_no += 1 |
434 return data | 442 return data |
435 | 443 |
436 def peekline(self): | 444 def peekline(self): |
437 if self._blankAt0: | 445 if self._blankAt0: |
464 def _addBlankAt0(self): | 472 def _addBlankAt0(self): |
465 if self._fp.tell() != 0: | 473 if self._fp.tell() != 0: |
466 raise Exception( | 474 raise Exception( |
467 "Can't add blank line at 0 if reading has started.") | 475 "Can't add blank line at 0 if reading has started.") |
468 self._blankAt0 = True | 476 self._blankAt0 = True |
469 | 477 self.line_no = 0 |
470 def _read(self, size, advance_line_no): | |
471 data = self._fp.read(size) | |
472 if advance_line_no: | |
473 self.line_no += data.count('\n') | |
474 return data | |
475 | 478 |
476 | 479 |
477 class _FontaineStateMachine: | 480 class _FontaineStateMachine: |
478 def __init__(self, fp, doc): | 481 def __init__(self, fp, doc): |
479 self.fp = _PeekableFile(fp) | 482 self.fp = _PeekableFile(fp) |
495 self.state = _PassThroughState() | 498 self.state = _PassThroughState() |
496 if not RE_EMPTY_LINE.match(self.fp.peekline()): | 499 if not RE_EMPTY_LINE.match(self.fp.peekline()): |
497 # Add a fake empty line at the beginning of the text if | 500 # Add a fake empty line at the beginning of the text if |
498 # there's not one already. This makes state matching easier. | 501 # there's not one already. This makes state matching easier. |
499 self.fp._addBlankAt0() | 502 self.fp._addBlankAt0() |
500 # Make this added empty line "pending" so if the first line | |
501 # is an action paragraph, it doesn't include it. | |
502 self.state.has_pending_empty_line = True | |
503 | 503 |
504 # Start parsing! Here we try to do a mostly-forward-only parser with | 504 # Start parsing! Here we try to do a mostly-forward-only parser with |
505 # non overlapping regexes to make it decently fast. | 505 # non overlapping regexes to make it decently fast. |
506 while True: | 506 while True: |
507 logger.debug("State '%s' consuming from '%s'..." % | 507 logger.debug("State '%s' consuming from '%s'..." % |
514 res = EOF_STATE | 514 res = EOF_STATE |
515 | 515 |
516 # Figure out what to do next... | 516 # Figure out what to do next... |
517 | 517 |
518 if res is None: | 518 if res is None: |
519 raise Exception( | 519 raise FontaineParserError( |
520 self.line_no, | |
521 "State '%s' returned a `None` result. " | |
520 "States need to return `ANY_STATE`, one or more specific " | 522 "States need to return `ANY_STATE`, one or more specific " |
521 "states, or `EOF_STATE` if they reached the end of the " | 523 "states, or `EOF_STATE` if they reached the end of the " |
522 "file.") | 524 "file." % self.state.__class__.__name__) |
523 | 525 |
524 elif res is ANY_STATE or isinstance(res, list): | 526 elif res is ANY_STATE or isinstance(res, list): |
525 # State wants to exit, we need to figure out what is the | 527 # State wants to exit, we need to figure out what is the |
526 # next state. | 528 # next state. |
527 pos = self.fp._fp.tell() | 529 pos = self.fp._fp.tell() |
542 raise Exception("Can't match following state after: %s" % | 544 raise Exception("Can't match following state after: %s" % |
543 self.state) | 545 self.state) |
544 | 546 |
545 # Handle the current state before we move on to the new one. | 547 # Handle the current state before we move on to the new one. |
546 if self.state: | 548 if self.state: |
547 if type(self.state) == type(res) and self.state.can_merge: | 549 self.state.exit(self, res) |
548 # Don't switch states if the next state is the same | |
549 # type and that type supports merging. | |
550 self.state.merge() | |
551 continue | |
552 | |
553 self.state.exit(self) | |
554 if (self.state.has_pending_empty_line and | |
555 not res.needs_pending_empty_lines): | |
556 logger.debug("Skipping pending blank line from %s" % | |
557 self.state.__class__.__name__) | |
558 self.fp.readline() | |
559 | |
560 self.state = res | 550 self.state = res |
561 | 551 |
562 elif isinstance(res, FontaineState): | 552 elif isinstance(res, FontaineState): |
563 # State wants to exit, wants a specific state to be next. | 553 # State wants to exit, wants a specific state to be next. |
564 if self.state: | 554 if self.state: |
565 self.state.exit(self) | 555 self.state.exit(self, res) |
566 if (self.state.has_pending_empty_line and | |
567 not res.needs_pending_empty_lines): | |
568 logger.debug("Skipping pending blank line from %s" % | |
569 self.state.__class__.__name__) | |
570 self.fp.readline() | |
571 self.state = res | 556 self.state = res |
572 | 557 |
573 elif res is EOF_STATE: | 558 elif res is EOF_STATE: |
574 # Reached end of file. | 559 # Reached end of file. |
575 if self.state: | 560 if self.state: |
576 self.state.exit(self) | 561 self.state.exit(self, res) |
577 break | 562 break |
578 | 563 |
579 else: | 564 else: |
580 raise Exception("Unsupported state result: %s" % res) | 565 raise Exception("Unsupported state result: %s" % res) |
581 | 566 |