0
|
1 import re
|
|
2 import logging
|
|
3
|
|
4
|
|
5 logger = logging.getLogger(__name__)
|
|
6
|
|
7
|
|
8 class FontaineState:
|
|
9 can_merge = False
|
|
10
|
|
11 def match(self, fp, ctx):
|
|
12 return False
|
|
13
|
|
14 def consume(self, fp, ctx):
|
|
15 raise NotImplementedError()
|
|
16
|
|
17 def exit(self, ctx):
|
|
18 pass
|
|
19
|
|
20
|
|
21 class FontaineParserError(Exception):
|
|
22 def __init__(self, line_no, message):
|
|
23 super().__init__("Error line %d: %s" % (line_no, message))
|
|
24
|
|
25
|
|
26 ANY_STATE = object()
|
|
27 EOF_STATE = object()
|
|
28
|
|
29
|
|
30 RE_EMPTY_LINE = re.compile(r"^$", re.M)
|
|
31 RE_BLANK_LINE = re.compile(r"^\s*$", re.M)
|
|
32
|
|
33 RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:")
|
|
34
|
|
35
|
|
36 class _TitlePageState(FontaineState):
|
|
37 def __init__(self):
|
|
38 super().__init__()
|
|
39 self._cur_key = None
|
|
40 self._cur_val = None
|
|
41
|
|
42 def consume(self, fp, ctx):
|
|
43 line = fp.readline()
|
|
44 if not line:
|
|
45 return EOF_STATE
|
|
46
|
|
47 if RE_EMPTY_LINE.match(line):
|
|
48 self._commit(ctx)
|
|
49 # Finished with the page title, now move on to the first scene.
|
|
50 # However, if we never had any page title, go back to the beginning
|
|
51 # so we don't consume anybody else's empty lines.
|
|
52 if len(ctx.document.title_values) == 0:
|
|
53 fp.seek0()
|
|
54 return ANY_STATE
|
|
55
|
|
56 m = RE_TITLE_KEY_VALUE.match(line)
|
|
57 if m:
|
|
58 # Commit current value, start new one.
|
|
59 self._commit(ctx)
|
|
60 self._cur_key = m.group('key')
|
|
61 self._cur_val = line[m.end():].strip()
|
|
62 else:
|
|
63 if self._cur_val is None:
|
|
64 if len(ctx.document.title_values) == 0:
|
|
65 # Early exit because there's no title page.
|
|
66 # Go back to the beginning so we don't consume somebody's
|
|
67 # first line of text.
|
|
68 fp.seek0()
|
|
69 return ANY_STATE
|
|
70
|
|
71 raise FontaineParserError(
|
|
72 fp.line_no,
|
|
73 "Page title needs to be followed by 2 empty lines.")
|
|
74
|
|
75 # Keep accumulating the value of one of the title page's values.
|
|
76 self._cur_val += line.strip()
|
|
77 return True
|
|
78
|
|
79 def exit(self, ctx):
|
|
80 self._commit(ctx)
|
|
81
|
|
82 def _commit(self, ctx):
|
|
83 if self._cur_key is not None:
|
|
84 ctx.document.title_values[self._cur_key] = self._cur_val
|
|
85 self._cur_key = None
|
|
86 self._cur_val = None
|
|
87
|
|
88
|
|
89 RE_SCENE_HEADER_PATTERN = re.compile(
|
|
90 r"^(int|ext|est|int/ext|int./ext|i/e)[\s\.]", re.I)
|
|
91
|
|
92
|
|
93 class _SceneHeaderState(FontaineState):
|
|
94 def match(self, fp, ctx):
|
|
95 lines = fp.peeklines(2)
|
|
96 return (
|
|
97 RE_EMPTY_LINE.match(lines[0]) and
|
|
98 RE_SCENE_HEADER_PATTERN.match(lines[1]))
|
|
99
|
|
100 def consume(self, fp, ctx):
|
|
101 fp.readline() # Get past the blank line.
|
|
102 line = fp.readline().rstrip('\r\n')
|
|
103 line = line.lstrip('.') # In case it was forced.
|
|
104 ctx.document.addScene(line)
|
|
105 return ANY_STATE
|
|
106
|
|
107
|
|
108 class _ActionState(FontaineState):
|
|
109 can_merge = True
|
|
110
|
|
111 def __init__(self):
|
|
112 super().__init__()
|
|
113 self.text = ''
|
|
114
|
|
115 def match(self, fp, ctx):
|
|
116 return True
|
|
117
|
|
118 def consume(self, fp, ctx):
|
|
119 is_first_line = True
|
|
120 while True:
|
|
121 line = fp.readline()
|
|
122 if not line:
|
|
123 return EOF_STATE
|
|
124
|
|
125 if is_first_line:
|
|
126 line = line.lstrip('!')
|
|
127 is_first_line = False
|
|
128
|
|
129 self.text += line
|
|
130
|
|
131 if RE_EMPTY_LINE.match(fp.peekline()):
|
|
132 break
|
|
133
|
|
134 return ANY_STATE
|
|
135
|
|
136 def exit(self, ctx):
|
|
137 ctx.document.lastScene().addAction(self.text)
|
|
138
|
|
139
|
|
140 RE_CHARACTER_LINE = re.compile(r"^[A-Z\-]+\s*(\(.*\))?$", re.M)
|
|
141
|
|
142
|
|
143 class _CharacterState(FontaineState):
|
|
144 def match(self, fp, ctx):
|
|
145 lines = fp.peeklines(3)
|
|
146 return (RE_EMPTY_LINE.match(lines[0]) and
|
|
147 RE_CHARACTER_LINE.match(lines[1]) and
|
|
148 not RE_EMPTY_LINE.match(lines[2]))
|
|
149
|
|
150 def consume(self, fp, ctx):
|
|
151 fp.readline() # Get past the empty line.
|
|
152 line = fp.readline().rstrip('\r\n')
|
|
153 line = line.lstrip('@') # In case it was forced.
|
|
154 ctx.document.lastScene().addCharacter(line)
|
|
155 return [_ParentheticalState, _DialogState]
|
|
156
|
|
157
|
|
158 RE_PARENTHETICAL_LINE = re.compile(r"^\s*\(.*\)\s*$", re.M)
|
|
159
|
|
160
|
|
161 class _ParentheticalState(FontaineState):
|
|
162 def match(self, fp, ctx):
|
|
163 # We only get here from a `_CharacterState` so we know the previous
|
|
164 # one is already that.
|
|
165 line = fp.peekline()
|
|
166 return RE_PARENTHETICAL_LINE.match(line)
|
|
167
|
|
168 def consume(self, fp, ctx):
|
|
169 line = fp.readline().rstrip('\r\n')
|
|
170 ctx.document.lastScene().addParenthetical(line)
|
|
171 return [_DialogState, _CharacterState, _ActionState]
|
|
172
|
|
173
|
|
174 class _DialogState(FontaineState):
|
|
175 def __init__(self):
|
|
176 super().__init__()
|
|
177 self.text = ''
|
|
178
|
|
179 def match(self, fp, ctx):
|
|
180 line = fp.peekline()
|
|
181 return not RE_EMPTY_LINE.match(line)
|
|
182
|
|
183 def consume(self, fp, ctx):
|
|
184 while True:
|
|
185 line = fp.readline()
|
|
186 if not line:
|
|
187 return EOF_STATE
|
|
188 self.text += line
|
|
189 if RE_EMPTY_LINE.match(fp.peekline()):
|
|
190 break
|
|
191 return ANY_STATE
|
|
192
|
|
193 def exit(self, ctx):
|
|
194 ctx.document.lastScene().addDialog(self.text.rstrip('\r\n'))
|
|
195
|
|
196
|
|
197 class _LyricsState(FontaineState):
|
|
198 pass
|
|
199
|
|
200
|
|
201 class _TransitionState(FontaineState):
|
|
202 pass
|
|
203
|
|
204
|
|
205 class _ForcedParagraphStates(FontaineState):
|
|
206 STATE_SYMBOLS = {
|
|
207 '.': _SceneHeaderState,
|
|
208 '!': _ActionState,
|
|
209 '@': _CharacterState,
|
|
210 '~': _LyricsState,
|
|
211 '>': _TransitionState
|
|
212 }
|
|
213
|
|
214 def __init__(self):
|
|
215 super().__init__()
|
|
216 self._state_cls = None
|
|
217
|
|
218 def match(self, fp, ctx):
|
|
219 lines = fp.peeklines(2)
|
|
220 if (RE_EMPTY_LINE.match(lines[0]) and
|
|
221 lines[1][:1] in self.STATE_SYMBOLS):
|
|
222 self._state_cls = self.STATE_SYMBOLS[lines[1][:1]]
|
|
223 return True
|
|
224 return False
|
|
225
|
|
226 def consume(self, fp, ctx):
|
|
227 return self._state_cls()
|
|
228
|
|
229
|
|
230 STATES = [
|
|
231 _ForcedParagraphStates, # Must be first.
|
|
232 _SceneHeaderState,
|
|
233 _CharacterState,
|
|
234 _TransitionState,
|
|
235 _ActionState, # Must be last.
|
|
236 ]
|
|
237
|
|
238
|
|
239 class _PeekableFile:
|
|
240 def __init__(self, fp):
|
|
241 self.line_no = 1
|
|
242 self._fp = fp
|
|
243
|
|
244 def read(self, size=-1):
|
|
245 return self._doRead(size, True)
|
|
246
|
|
247 def read1(self):
|
|
248 return self.read(1)
|
|
249
|
|
250 def peek1(self):
|
|
251 pos = self._fp.tell()
|
|
252 c = self._doRead(1, False)
|
|
253 self._fp.seek(pos)
|
|
254 return c
|
|
255
|
|
256 def readline(self, size=-1):
|
|
257 data = self._fp.readline(size)
|
|
258 self.line_no += 1
|
|
259 return data
|
|
260
|
|
261 def peekline(self):
|
|
262 pos = self._fp.tell()
|
|
263 line = self._fp.readline()
|
|
264 self._fp.seek(pos)
|
|
265 return line
|
|
266
|
|
267 def peeklines(self, count):
|
|
268 pos = self._fp.tell()
|
|
269 lines = []
|
|
270 for i in range(count):
|
|
271 lines.append(self._fp.readline())
|
|
272 self._fp.seek(pos)
|
|
273 return lines
|
|
274
|
|
275 def seek0(self):
|
|
276 self._fp.seek(0)
|
|
277 self.line_no = 1
|
|
278
|
|
279 def _doRead(self, size, advance_line_no):
|
|
280 data = self._fp.read(size)
|
|
281 if advance_line_no:
|
|
282 self.line_no += data.count('\n')
|
|
283 return data
|
|
284
|
|
285
|
|
286 class _FontaineStateMachine:
|
|
287 def __init__(self, fp, doc):
|
|
288 self.fp = _PeekableFile(fp)
|
|
289 self.state = None
|
|
290 self.document = doc
|
|
291
|
|
292 @property
|
|
293 def line_no(self):
|
|
294 return self.fp.line_no
|
|
295
|
|
296 def run(self):
|
|
297 self.state = _TitlePageState()
|
|
298 while True:
|
|
299 logger.debug("State '%s' consuming from '%s'..." %
|
|
300 (self.state.__class__.__name__, self.fp.peekline()))
|
|
301 res = self.state.consume(self.fp, self)
|
|
302
|
|
303 # See if we reached the end of the file.
|
|
304 if not self.fp.peekline():
|
|
305 logger.debug("Reached end of line... ending parsing.")
|
|
306 res = EOF_STATE
|
|
307
|
|
308 # Figure out what to do next...
|
|
309
|
|
310 if res is None:
|
|
311 raise Exception(
|
|
312 "States need to return `ANY_STATE`, one or more specific "
|
|
313 "states, or `EOF_STATE` if they reached the end of the "
|
|
314 "file.")
|
|
315
|
|
316 if res is True:
|
|
317 # State continues to consume.
|
|
318 continue
|
|
319
|
|
320 if res is ANY_STATE or isinstance(res, list):
|
|
321 # State wants to exit, we need to figure out what is the
|
|
322 # next state.
|
|
323 pos = self.fp._fp.tell()
|
|
324 next_states = res
|
|
325 if next_states is ANY_STATE:
|
|
326 next_states = STATES
|
|
327 logger.debug("Trying to match next state from: %s" %
|
|
328 [t.__name__ for t in next_states])
|
|
329 for sc in next_states:
|
|
330 s = sc()
|
|
331 if s.match(self.fp, self):
|
|
332 logger.debug("Matched state %s" %
|
|
333 s.__class__.__name__)
|
|
334 self.fp._fp.seek(pos)
|
|
335 res = s
|
|
336 break
|
|
337 else:
|
|
338 raise Exception("Can't match following state after: %s" %
|
|
339 self.state)
|
|
340 if self.state:
|
|
341 if type(self.state) == type(res) and self.state.can_merge:
|
|
342 # Don't switch states if the next state is the same
|
|
343 # type and that type supports merging.
|
|
344 continue
|
|
345
|
|
346 self.state.exit(self)
|
|
347
|
|
348 self.state = res
|
|
349 continue
|
|
350
|
|
351 if isinstance(res, FontaineState):
|
|
352 # State wants to exit, wants a specific state to be next.
|
|
353 if self.state:
|
|
354 self.state.exit(self)
|
|
355 self.state = res
|
|
356 continue
|
|
357
|
|
358 if res is EOF_STATE:
|
|
359 # Reached end of file.
|
|
360 if self.state:
|
|
361 self.state.exit(self)
|
|
362 break
|
|
363
|
|
364 raise Exception("Unsupported state result: %s" % res)
|
|
365
|
|
366
|
|
367 class FontaineParser:
|
|
368 def __init__(self):
|
|
369 pass
|
|
370
|
|
371 def parse(self, filein):
|
|
372 if isinstance(filein, str):
|
|
373 with open(filein, 'r') as fp:
|
|
374 return self._doParse(fp)
|
|
375 else:
|
|
376 return self._doParse(fp)
|
|
377
|
|
378 def parseString(self, text):
|
|
379 import io
|
|
380 with io.StringIO(text) as fp:
|
|
381 return self._doParse(fp)
|
|
382
|
|
383 def _doParse(self, fp):
|
|
384 from .document import FontaineDocument
|
|
385 doc = FontaineDocument()
|
|
386 machine = _FontaineStateMachine(fp, doc)
|
|
387 machine.run()
|
|
388 return doc
|