comparison fontaine/parser.py @ 0:243401c49520

Initial commit.
author Ludovic Chabant <ludovic@chabant.com>
date Mon, 02 Jan 2017 12:30:49 -0800
parents
children 74b83e3d921e
comparison
equal deleted inserted replaced
-1:000000000000 0:243401c49520
1 import re
2 import logging
3
4
5 logger = logging.getLogger(__name__)
6
7
8 class FontaineState:
9 can_merge = False
10
11 def match(self, fp, ctx):
12 return False
13
14 def consume(self, fp, ctx):
15 raise NotImplementedError()
16
17 def exit(self, ctx):
18 pass
19
20
21 class FontaineParserError(Exception):
22 def __init__(self, line_no, message):
23 super().__init__("Error line %d: %s" % (line_no, message))
24
25
26 ANY_STATE = object()
27 EOF_STATE = object()
28
29
30 RE_EMPTY_LINE = re.compile(r"^$", re.M)
31 RE_BLANK_LINE = re.compile(r"^\s*$", re.M)
32
33 RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:")
34
35
36 class _TitlePageState(FontaineState):
37 def __init__(self):
38 super().__init__()
39 self._cur_key = None
40 self._cur_val = None
41
42 def consume(self, fp, ctx):
43 line = fp.readline()
44 if not line:
45 return EOF_STATE
46
47 if RE_EMPTY_LINE.match(line):
48 self._commit(ctx)
49 # Finished with the page title, now move on to the first scene.
50 # However, if we never had any page title, go back to the beginning
51 # so we don't consume anybody else's empty lines.
52 if len(ctx.document.title_values) == 0:
53 fp.seek0()
54 return ANY_STATE
55
56 m = RE_TITLE_KEY_VALUE.match(line)
57 if m:
58 # Commit current value, start new one.
59 self._commit(ctx)
60 self._cur_key = m.group('key')
61 self._cur_val = line[m.end():].strip()
62 else:
63 if self._cur_val is None:
64 if len(ctx.document.title_values) == 0:
65 # Early exit because there's no title page.
66 # Go back to the beginning so we don't consume somebody's
67 # first line of text.
68 fp.seek0()
69 return ANY_STATE
70
71 raise FontaineParserError(
72 fp.line_no,
73 "Page title needs to be followed by 2 empty lines.")
74
75 # Keep accumulating the value of one of the title page's values.
76 self._cur_val += line.strip()
77 return True
78
79 def exit(self, ctx):
80 self._commit(ctx)
81
82 def _commit(self, ctx):
83 if self._cur_key is not None:
84 ctx.document.title_values[self._cur_key] = self._cur_val
85 self._cur_key = None
86 self._cur_val = None
87
88
89 RE_SCENE_HEADER_PATTERN = re.compile(
90 r"^(int|ext|est|int/ext|int./ext|i/e)[\s\.]", re.I)
91
92
93 class _SceneHeaderState(FontaineState):
94 def match(self, fp, ctx):
95 lines = fp.peeklines(2)
96 return (
97 RE_EMPTY_LINE.match(lines[0]) and
98 RE_SCENE_HEADER_PATTERN.match(lines[1]))
99
100 def consume(self, fp, ctx):
101 fp.readline() # Get past the blank line.
102 line = fp.readline().rstrip('\r\n')
103 line = line.lstrip('.') # In case it was forced.
104 ctx.document.addScene(line)
105 return ANY_STATE
106
107
108 class _ActionState(FontaineState):
109 can_merge = True
110
111 def __init__(self):
112 super().__init__()
113 self.text = ''
114
115 def match(self, fp, ctx):
116 return True
117
118 def consume(self, fp, ctx):
119 is_first_line = True
120 while True:
121 line = fp.readline()
122 if not line:
123 return EOF_STATE
124
125 if is_first_line:
126 line = line.lstrip('!')
127 is_first_line = False
128
129 self.text += line
130
131 if RE_EMPTY_LINE.match(fp.peekline()):
132 break
133
134 return ANY_STATE
135
136 def exit(self, ctx):
137 ctx.document.lastScene().addAction(self.text)
138
139
140 RE_CHARACTER_LINE = re.compile(r"^[A-Z\-]+\s*(\(.*\))?$", re.M)
141
142
143 class _CharacterState(FontaineState):
144 def match(self, fp, ctx):
145 lines = fp.peeklines(3)
146 return (RE_EMPTY_LINE.match(lines[0]) and
147 RE_CHARACTER_LINE.match(lines[1]) and
148 not RE_EMPTY_LINE.match(lines[2]))
149
150 def consume(self, fp, ctx):
151 fp.readline() # Get past the empty line.
152 line = fp.readline().rstrip('\r\n')
153 line = line.lstrip('@') # In case it was forced.
154 ctx.document.lastScene().addCharacter(line)
155 return [_ParentheticalState, _DialogState]
156
157
158 RE_PARENTHETICAL_LINE = re.compile(r"^\s*\(.*\)\s*$", re.M)
159
160
161 class _ParentheticalState(FontaineState):
162 def match(self, fp, ctx):
163 # We only get here from a `_CharacterState` so we know the previous
164 # one is already that.
165 line = fp.peekline()
166 return RE_PARENTHETICAL_LINE.match(line)
167
168 def consume(self, fp, ctx):
169 line = fp.readline().rstrip('\r\n')
170 ctx.document.lastScene().addParenthetical(line)
171 return [_DialogState, _CharacterState, _ActionState]
172
173
174 class _DialogState(FontaineState):
175 def __init__(self):
176 super().__init__()
177 self.text = ''
178
179 def match(self, fp, ctx):
180 line = fp.peekline()
181 return not RE_EMPTY_LINE.match(line)
182
183 def consume(self, fp, ctx):
184 while True:
185 line = fp.readline()
186 if not line:
187 return EOF_STATE
188 self.text += line
189 if RE_EMPTY_LINE.match(fp.peekline()):
190 break
191 return ANY_STATE
192
193 def exit(self, ctx):
194 ctx.document.lastScene().addDialog(self.text.rstrip('\r\n'))
195
196
197 class _LyricsState(FontaineState):
198 pass
199
200
201 class _TransitionState(FontaineState):
202 pass
203
204
205 class _ForcedParagraphStates(FontaineState):
206 STATE_SYMBOLS = {
207 '.': _SceneHeaderState,
208 '!': _ActionState,
209 '@': _CharacterState,
210 '~': _LyricsState,
211 '>': _TransitionState
212 }
213
214 def __init__(self):
215 super().__init__()
216 self._state_cls = None
217
218 def match(self, fp, ctx):
219 lines = fp.peeklines(2)
220 if (RE_EMPTY_LINE.match(lines[0]) and
221 lines[1][:1] in self.STATE_SYMBOLS):
222 self._state_cls = self.STATE_SYMBOLS[lines[1][:1]]
223 return True
224 return False
225
226 def consume(self, fp, ctx):
227 return self._state_cls()
228
229
230 STATES = [
231 _ForcedParagraphStates, # Must be first.
232 _SceneHeaderState,
233 _CharacterState,
234 _TransitionState,
235 _ActionState, # Must be last.
236 ]
237
238
239 class _PeekableFile:
240 def __init__(self, fp):
241 self.line_no = 1
242 self._fp = fp
243
244 def read(self, size=-1):
245 return self._doRead(size, True)
246
247 def read1(self):
248 return self.read(1)
249
250 def peek1(self):
251 pos = self._fp.tell()
252 c = self._doRead(1, False)
253 self._fp.seek(pos)
254 return c
255
256 def readline(self, size=-1):
257 data = self._fp.readline(size)
258 self.line_no += 1
259 return data
260
261 def peekline(self):
262 pos = self._fp.tell()
263 line = self._fp.readline()
264 self._fp.seek(pos)
265 return line
266
267 def peeklines(self, count):
268 pos = self._fp.tell()
269 lines = []
270 for i in range(count):
271 lines.append(self._fp.readline())
272 self._fp.seek(pos)
273 return lines
274
275 def seek0(self):
276 self._fp.seek(0)
277 self.line_no = 1
278
279 def _doRead(self, size, advance_line_no):
280 data = self._fp.read(size)
281 if advance_line_no:
282 self.line_no += data.count('\n')
283 return data
284
285
286 class _FontaineStateMachine:
287 def __init__(self, fp, doc):
288 self.fp = _PeekableFile(fp)
289 self.state = None
290 self.document = doc
291
292 @property
293 def line_no(self):
294 return self.fp.line_no
295
296 def run(self):
297 self.state = _TitlePageState()
298 while True:
299 logger.debug("State '%s' consuming from '%s'..." %
300 (self.state.__class__.__name__, self.fp.peekline()))
301 res = self.state.consume(self.fp, self)
302
303 # See if we reached the end of the file.
304 if not self.fp.peekline():
305 logger.debug("Reached end of line... ending parsing.")
306 res = EOF_STATE
307
308 # Figure out what to do next...
309
310 if res is None:
311 raise Exception(
312 "States need to return `ANY_STATE`, one or more specific "
313 "states, or `EOF_STATE` if they reached the end of the "
314 "file.")
315
316 if res is True:
317 # State continues to consume.
318 continue
319
320 if res is ANY_STATE or isinstance(res, list):
321 # State wants to exit, we need to figure out what is the
322 # next state.
323 pos = self.fp._fp.tell()
324 next_states = res
325 if next_states is ANY_STATE:
326 next_states = STATES
327 logger.debug("Trying to match next state from: %s" %
328 [t.__name__ for t in next_states])
329 for sc in next_states:
330 s = sc()
331 if s.match(self.fp, self):
332 logger.debug("Matched state %s" %
333 s.__class__.__name__)
334 self.fp._fp.seek(pos)
335 res = s
336 break
337 else:
338 raise Exception("Can't match following state after: %s" %
339 self.state)
340 if self.state:
341 if type(self.state) == type(res) and self.state.can_merge:
342 # Don't switch states if the next state is the same
343 # type and that type supports merging.
344 continue
345
346 self.state.exit(self)
347
348 self.state = res
349 continue
350
351 if isinstance(res, FontaineState):
352 # State wants to exit, wants a specific state to be next.
353 if self.state:
354 self.state.exit(self)
355 self.state = res
356 continue
357
358 if res is EOF_STATE:
359 # Reached end of file.
360 if self.state:
361 self.state.exit(self)
362 break
363
364 raise Exception("Unsupported state result: %s" % res)
365
366
367 class FontaineParser:
368 def __init__(self):
369 pass
370
371 def parse(self, filein):
372 if isinstance(filein, str):
373 with open(filein, 'r') as fp:
374 return self._doParse(fp)
375 else:
376 return self._doParse(fp)
377
378 def parseString(self, text):
379 import io
380 with io.StringIO(text) as fp:
381 return self._doParse(fp)
382
383 def _doParse(self, fp):
384 from .document import FontaineDocument
385 doc = FontaineDocument()
386 machine = _FontaineStateMachine(fp, doc)
387 machine.run()
388 return doc