diff fontaine/parser.py @ 2:59fe8cb6190d

Add lots of tests, fix lots of bugs.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 03 Jan 2017 09:05:28 -0800
parents 74b83e3d921e
children
line wrap: on
line diff
--- a/fontaine/parser.py	Mon Jan 02 21:54:59 2017 -0800
+++ b/fontaine/parser.py	Tue Jan 03 09:05:28 2017 -0800
@@ -1,16 +1,14 @@
 import re
 import logging
+from .document import TYPE_ACTION
 
 
 logger = logging.getLogger(__name__)
 
 
 class FontaineState:
-    can_merge = False
-    needs_pending_empty_lines = True
-
     def __init__(self):
-        self.has_pending_empty_line = False
+        pass
 
     def match(self, fp, ctx):
         return False
@@ -18,10 +16,7 @@
     def consume(self, fp, ctx):
         raise NotImplementedError()
 
-    def merge(self):
-        pass
-
-    def exit(self, ctx):
+    def exit(self, ctx, next_state):
         pass
 
 
@@ -42,7 +37,7 @@
 RE_EMPTY_LINE = re.compile(r"^$", re.M)
 RE_BLANK_LINE = re.compile(r"^\s*$", re.M)
 
-RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:")
+RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:\s*")
 
 
 class _TitlePageState(FontaineState):
@@ -65,27 +60,27 @@
             if m:
                 # Commit current value, start new one.
                 self._commit(ctx)
-                self._cur_key = m.group('key')
-                self._cur_val = line[m.end():].strip()
+                self._cur_key = m.group('key').lower()
+                self._cur_val = line[m.end():]
             else:
                 # Keep accumulating the value of one of the title page's
                 # values.
-                self._cur_val += line.strip()
+                self._cur_val += line.lstrip()
 
             if RE_EMPTY_LINE.match(fp.peekline()):
                 self._commit(ctx)
                 # Finished with the page title, now move on to the first scene.
-                self.has_pending_empty_line = True
                 break
 
         return ANY_STATE
 
-    def exit(self, ctx):
+    def exit(self, ctx, next_state):
         self._commit(ctx)
 
     def _commit(self, ctx):
         if self._cur_key is not None:
-            ctx.document.title_values[self._cur_key] = self._cur_val
+            val = self._cur_val.rstrip('\r\n')
+            ctx.document.title_values[self._cur_key] = val
             self._cur_key = None
             self._cur_val = None
 
@@ -107,19 +102,13 @@
         line = fp.readline().rstrip('\r\n')
         line = line.lstrip('.')  # In case it was forced.
         ctx.document.addScene(line)
-        self.has_pending_empty_line = True
         return ANY_STATE
 
 
 class _ActionState(FontaineState):
-    can_merge = True
-    needs_pending_empty_lines = False
-
     def __init__(self):
         super().__init__()
         self.text = ''
-        self._to_merge = None
-        self._was_merged = False
 
     def match(self, fp, ctx):
         return True
@@ -132,6 +121,10 @@
                 return EOF_STATE
 
             if is_first_line:
+                # Ignore the fake blank line at 0 if it's threre.
+                if fp.line_no == 0:
+                    continue
+
                 line = line.lstrip('!')  # In case it was forced.
                 is_first_line = False
 
@@ -139,23 +132,19 @@
             # the line we just got because it's probably gonna be the
             # last one.
             if RE_EMPTY_LINE.match(fp.peekline()):
-                stripped_line = line.rstrip("\r\n")
-                self.text += stripped_line
-                self._to_merge = line[len(stripped_line):]
+                self.text += line.rstrip("\r\n")
                 break
             # ...otherwise, add the line with in full.
             self.text += line
 
         return ANY_STATE
 
-    def merge(self):
-        # Put back the stuff we stripped from what we thought was the
-        # last line.
-        self.text += self._to_merge
-        self._was_merged = True
-
-    def exit(self, ctx):
-        ctx.document.lastScene().addAction(self.text)
+    def exit(self, ctx, next_state):
+        last_para = ctx.document.lastParagraph()
+        if last_para and last_para.type == TYPE_ACTION:
+            last_para.text += '\n' + self.text
+        else:
+            ctx.document.lastScene().addAction(self.text)
 
 
 RE_CENTERED_LINE = re.compile(r"^\s*>\s*.*\s*<\s*$", re.M)
@@ -190,7 +179,6 @@
                 # if we detect a line that doesn't have them, we make this
                 # paragraph be a normal action instead.
                 fp.restore(snapshot)
-                self.has_pending_empty_line = True
                 self._aborted = True
                 return _ActionState()
             else:
@@ -199,18 +187,17 @@
 
             if RE_EMPTY_LINE.match(fp.peekline()):
                 self.text += clean_line
-                self.has_pending_empty_line = True
                 break
             self.text += clean_line + eol
 
         return ANY_STATE
 
-    def exit(self, ctx):
+    def exit(self, ctx, next_state):
         if not self._aborted:
             ctx.document.lastScene().addCenteredAction(self.text)
 
 
-RE_CHARACTER_LINE = re.compile(r"^\s*[A-Z\-]+\s*(\(.*\))?$", re.M)
+RE_CHARACTER_LINE = re.compile(r"^\s*[A-Z][A-Z\-\._\s]+\s*(\(.*\))?$", re.M)
 
 
 class _CharacterState(FontaineState):
@@ -247,7 +234,6 @@
         if not RE_EMPTY_LINE.match(next_line):
             return _DialogState()
 
-        self.has_pending_empty_line = True
         return ANY_STATE
 
 
@@ -280,13 +266,12 @@
 
             if RE_EMPTY_LINE.match(next_line):
                 self.text += line.rstrip('\r\n')
-                self.has_pending_empty_line = True
                 break
             self.text += line
 
         return ANY_STATE
 
-    def exit(self, ctx):
+    def exit(self, ctx, next_state):
         ctx.document.lastScene().addDialog(self.text.rstrip('\r\n'))
 
 
@@ -312,19 +297,17 @@
             else:
                 logger.debug("Rolling back lyrics into action paragraph.")
                 fp.restore(snapshot)
-                self.has_pending_empty_line = True
                 self._aborted = True
                 return _ActionState()
 
             if RE_EMPTY_LINE.match(fp.peekline()):
                 self.text += line.rstrip('\r\n')
-                self.has_pending_empty_line = True
                 break
             self.text += line
 
         return ANY_STATE
 
-    def exit(self, ctx):
+    def exit(self, ctx, next_state):
         if not self._aborted:
             ctx.document.lastScene().addLyrics(self.text)
 
@@ -345,7 +328,7 @@
         line = fp.readline().lstrip().rstrip('\r\n')
         line = line.lstrip('>')  # In case it was forced.
         ctx.document.lastScene().addTransition(line)
-        self.has_pending_empty_line = True
+        return ANY_STATE
 
 
 RE_PAGE_BREAK_LINE = re.compile(r"^\=\=\=+$", re.M)
@@ -363,7 +346,6 @@
         fp.readline()
         fp.readline()
         ctx.document.lastScene().addPageBreak()
-        self.has_pending_empty_line = True
         return ANY_STATE
 
 
@@ -407,6 +389,30 @@
         return self._state_cls()
 
 
+class _EmptyLineState(FontaineState):
+    def __init__(self):
+        super().__init__()
+        self.line_count = 0
+
+    def match(self, fp, ctx):
+        return RE_EMPTY_LINE.match(fp.peekline())
+
+    def consume(self, fp, ctx):
+        fp.readline()
+        if fp.line_no > 1:  # Don't take into account the fake blank at 0
+            self.line_count += 1
+        return ANY_STATE
+
+    def exit(self, ctx, next_state):
+        if self.line_count > 0:
+            text = self.line_count * '\n'
+            last_para = ctx.document.lastParagraph()
+            if last_para and last_para.type == TYPE_ACTION:
+                last_para.text += text
+            else:
+                ctx.document.lastScene().addAction(text[1:])
+
+
 ROOT_STATES = [
     _ForcedParagraphStates,  # Must be first.
     _SceneHeaderState,
@@ -414,6 +420,7 @@
     _TransitionState,
     _PageBreakState,
     _CenteredActionState,
+    _EmptyLineState,   # Must be second to last.
     _ActionState,  # Must be last.
 ]
 
@@ -424,12 +431,13 @@
         self._fp = fp
         self._blankAt0 = False
 
-    def readline(self, size=-1):
+    def readline(self):
         if self._blankAt0:
             self._blankAt0 = False
+            self.line_no = 0
             return '\n'
 
-        data = self._fp.readline(size)
+        data = self._fp.readline()
         self.line_no += 1
         return data
 
@@ -466,12 +474,7 @@
             raise Exception(
                 "Can't add blank line at 0 if reading has started.")
         self._blankAt0 = True
-
-    def _read(self, size, advance_line_no):
-        data = self._fp.read(size)
-        if advance_line_no:
-            self.line_no += data.count('\n')
-        return data
+        self.line_no = 0
 
 
 class _FontaineStateMachine:
@@ -497,9 +500,6 @@
                 # Add a fake empty line at the beginning of the text if
                 # there's not one already. This makes state matching easier.
                 self.fp._addBlankAt0()
-                # Make this added empty line "pending" so if the first line
-                # is an action paragraph, it doesn't include it.
-                self.state.has_pending_empty_line = True
 
         # Start parsing! Here we try to do a mostly-forward-only parser with
         # non overlapping regexes to make it decently fast.
@@ -516,10 +516,12 @@
             # Figure out what to do next...
 
             if res is None:
-                raise Exception(
+                raise FontaineParserError(
+                    self.line_no,
+                    "State '%s' returned a `None` result. "
                     "States need to return `ANY_STATE`, one or more specific "
                     "states, or `EOF_STATE` if they reached the end of the "
-                    "file.")
+                    "file." % self.state.__class__.__name__)
 
             elif res is ANY_STATE or isinstance(res, list):
                 # State wants to exit, we need to figure out what is the
@@ -544,36 +546,19 @@
 
                 # Handle the current state before we move on to the new one.
                 if self.state:
-                    if type(self.state) == type(res) and self.state.can_merge:
-                        # Don't switch states if the next state is the same
-                        # type and that type supports merging.
-                        self.state.merge()
-                        continue
-
-                    self.state.exit(self)
-                    if (self.state.has_pending_empty_line and
-                            not res.needs_pending_empty_lines):
-                        logger.debug("Skipping pending blank line from %s" %
-                                     self.state.__class__.__name__)
-                        self.fp.readline()
-
+                    self.state.exit(self, res)
                 self.state = res
 
             elif isinstance(res, FontaineState):
                 # State wants to exit, wants a specific state to be next.
                 if self.state:
-                    self.state.exit(self)
-                    if (self.state.has_pending_empty_line and
-                            not res.needs_pending_empty_lines):
-                        logger.debug("Skipping pending blank line from %s" %
-                                     self.state.__class__.__name__)
-                        self.fp.readline()
+                    self.state.exit(self, res)
                 self.state = res
 
             elif res is EOF_STATE:
                 # Reached end of file.
                 if self.state:
-                    self.state.exit(self)
+                    self.state.exit(self, res)
                 break
 
             else: