Mercurial > jouvence
comparison fontaine/parser.py @ 0:243401c49520
Initial commit.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 02 Jan 2017 12:30:49 -0800 |
parents | |
children | 74b83e3d921e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:243401c49520 |
---|---|
1 import re | |
2 import logging | |
3 | |
4 | |
5 logger = logging.getLogger(__name__) | |
6 | |
7 | |
8 class FontaineState: | |
9 can_merge = False | |
10 | |
11 def match(self, fp, ctx): | |
12 return False | |
13 | |
14 def consume(self, fp, ctx): | |
15 raise NotImplementedError() | |
16 | |
17 def exit(self, ctx): | |
18 pass | |
19 | |
20 | |
21 class FontaineParserError(Exception): | |
22 def __init__(self, line_no, message): | |
23 super().__init__("Error line %d: %s" % (line_no, message)) | |
24 | |
25 | |
26 ANY_STATE = object() | |
27 EOF_STATE = object() | |
28 | |
29 | |
30 RE_EMPTY_LINE = re.compile(r"^$", re.M) | |
31 RE_BLANK_LINE = re.compile(r"^\s*$", re.M) | |
32 | |
33 RE_TITLE_KEY_VALUE = re.compile(r"^(?P<key>[\w\s\-]+)\s*:") | |
34 | |
35 | |
36 class _TitlePageState(FontaineState): | |
37 def __init__(self): | |
38 super().__init__() | |
39 self._cur_key = None | |
40 self._cur_val = None | |
41 | |
42 def consume(self, fp, ctx): | |
43 line = fp.readline() | |
44 if not line: | |
45 return EOF_STATE | |
46 | |
47 if RE_EMPTY_LINE.match(line): | |
48 self._commit(ctx) | |
49 # Finished with the page title, now move on to the first scene. | |
50 # However, if we never had any page title, go back to the beginning | |
51 # so we don't consume anybody else's empty lines. | |
52 if len(ctx.document.title_values) == 0: | |
53 fp.seek0() | |
54 return ANY_STATE | |
55 | |
56 m = RE_TITLE_KEY_VALUE.match(line) | |
57 if m: | |
58 # Commit current value, start new one. | |
59 self._commit(ctx) | |
60 self._cur_key = m.group('key') | |
61 self._cur_val = line[m.end():].strip() | |
62 else: | |
63 if self._cur_val is None: | |
64 if len(ctx.document.title_values) == 0: | |
65 # Early exit because there's no title page. | |
66 # Go back to the beginning so we don't consume somebody's | |
67 # first line of text. | |
68 fp.seek0() | |
69 return ANY_STATE | |
70 | |
71 raise FontaineParserError( | |
72 fp.line_no, | |
73 "Page title needs to be followed by 2 empty lines.") | |
74 | |
75 # Keep accumulating the value of one of the title page's values. | |
76 self._cur_val += line.strip() | |
77 return True | |
78 | |
79 def exit(self, ctx): | |
80 self._commit(ctx) | |
81 | |
82 def _commit(self, ctx): | |
83 if self._cur_key is not None: | |
84 ctx.document.title_values[self._cur_key] = self._cur_val | |
85 self._cur_key = None | |
86 self._cur_val = None | |
87 | |
88 | |
89 RE_SCENE_HEADER_PATTERN = re.compile( | |
90 r"^(int|ext|est|int/ext|int./ext|i/e)[\s\.]", re.I) | |
91 | |
92 | |
93 class _SceneHeaderState(FontaineState): | |
94 def match(self, fp, ctx): | |
95 lines = fp.peeklines(2) | |
96 return ( | |
97 RE_EMPTY_LINE.match(lines[0]) and | |
98 RE_SCENE_HEADER_PATTERN.match(lines[1])) | |
99 | |
100 def consume(self, fp, ctx): | |
101 fp.readline() # Get past the blank line. | |
102 line = fp.readline().rstrip('\r\n') | |
103 line = line.lstrip('.') # In case it was forced. | |
104 ctx.document.addScene(line) | |
105 return ANY_STATE | |
106 | |
107 | |
108 class _ActionState(FontaineState): | |
109 can_merge = True | |
110 | |
111 def __init__(self): | |
112 super().__init__() | |
113 self.text = '' | |
114 | |
115 def match(self, fp, ctx): | |
116 return True | |
117 | |
118 def consume(self, fp, ctx): | |
119 is_first_line = True | |
120 while True: | |
121 line = fp.readline() | |
122 if not line: | |
123 return EOF_STATE | |
124 | |
125 if is_first_line: | |
126 line = line.lstrip('!') | |
127 is_first_line = False | |
128 | |
129 self.text += line | |
130 | |
131 if RE_EMPTY_LINE.match(fp.peekline()): | |
132 break | |
133 | |
134 return ANY_STATE | |
135 | |
136 def exit(self, ctx): | |
137 ctx.document.lastScene().addAction(self.text) | |
138 | |
139 | |
140 RE_CHARACTER_LINE = re.compile(r"^[A-Z\-]+\s*(\(.*\))?$", re.M) | |
141 | |
142 | |
143 class _CharacterState(FontaineState): | |
144 def match(self, fp, ctx): | |
145 lines = fp.peeklines(3) | |
146 return (RE_EMPTY_LINE.match(lines[0]) and | |
147 RE_CHARACTER_LINE.match(lines[1]) and | |
148 not RE_EMPTY_LINE.match(lines[2])) | |
149 | |
150 def consume(self, fp, ctx): | |
151 fp.readline() # Get past the empty line. | |
152 line = fp.readline().rstrip('\r\n') | |
153 line = line.lstrip('@') # In case it was forced. | |
154 ctx.document.lastScene().addCharacter(line) | |
155 return [_ParentheticalState, _DialogState] | |
156 | |
157 | |
158 RE_PARENTHETICAL_LINE = re.compile(r"^\s*\(.*\)\s*$", re.M) | |
159 | |
160 | |
161 class _ParentheticalState(FontaineState): | |
162 def match(self, fp, ctx): | |
163 # We only get here from a `_CharacterState` so we know the previous | |
164 # one is already that. | |
165 line = fp.peekline() | |
166 return RE_PARENTHETICAL_LINE.match(line) | |
167 | |
168 def consume(self, fp, ctx): | |
169 line = fp.readline().rstrip('\r\n') | |
170 ctx.document.lastScene().addParenthetical(line) | |
171 return [_DialogState, _CharacterState, _ActionState] | |
172 | |
173 | |
174 class _DialogState(FontaineState): | |
175 def __init__(self): | |
176 super().__init__() | |
177 self.text = '' | |
178 | |
179 def match(self, fp, ctx): | |
180 line = fp.peekline() | |
181 return not RE_EMPTY_LINE.match(line) | |
182 | |
183 def consume(self, fp, ctx): | |
184 while True: | |
185 line = fp.readline() | |
186 if not line: | |
187 return EOF_STATE | |
188 self.text += line | |
189 if RE_EMPTY_LINE.match(fp.peekline()): | |
190 break | |
191 return ANY_STATE | |
192 | |
193 def exit(self, ctx): | |
194 ctx.document.lastScene().addDialog(self.text.rstrip('\r\n')) | |
195 | |
196 | |
197 class _LyricsState(FontaineState): | |
198 pass | |
199 | |
200 | |
201 class _TransitionState(FontaineState): | |
202 pass | |
203 | |
204 | |
205 class _ForcedParagraphStates(FontaineState): | |
206 STATE_SYMBOLS = { | |
207 '.': _SceneHeaderState, | |
208 '!': _ActionState, | |
209 '@': _CharacterState, | |
210 '~': _LyricsState, | |
211 '>': _TransitionState | |
212 } | |
213 | |
214 def __init__(self): | |
215 super().__init__() | |
216 self._state_cls = None | |
217 | |
218 def match(self, fp, ctx): | |
219 lines = fp.peeklines(2) | |
220 if (RE_EMPTY_LINE.match(lines[0]) and | |
221 lines[1][:1] in self.STATE_SYMBOLS): | |
222 self._state_cls = self.STATE_SYMBOLS[lines[1][:1]] | |
223 return True | |
224 return False | |
225 | |
226 def consume(self, fp, ctx): | |
227 return self._state_cls() | |
228 | |
229 | |
230 STATES = [ | |
231 _ForcedParagraphStates, # Must be first. | |
232 _SceneHeaderState, | |
233 _CharacterState, | |
234 _TransitionState, | |
235 _ActionState, # Must be last. | |
236 ] | |
237 | |
238 | |
239 class _PeekableFile: | |
240 def __init__(self, fp): | |
241 self.line_no = 1 | |
242 self._fp = fp | |
243 | |
244 def read(self, size=-1): | |
245 return self._doRead(size, True) | |
246 | |
247 def read1(self): | |
248 return self.read(1) | |
249 | |
250 def peek1(self): | |
251 pos = self._fp.tell() | |
252 c = self._doRead(1, False) | |
253 self._fp.seek(pos) | |
254 return c | |
255 | |
256 def readline(self, size=-1): | |
257 data = self._fp.readline(size) | |
258 self.line_no += 1 | |
259 return data | |
260 | |
261 def peekline(self): | |
262 pos = self._fp.tell() | |
263 line = self._fp.readline() | |
264 self._fp.seek(pos) | |
265 return line | |
266 | |
267 def peeklines(self, count): | |
268 pos = self._fp.tell() | |
269 lines = [] | |
270 for i in range(count): | |
271 lines.append(self._fp.readline()) | |
272 self._fp.seek(pos) | |
273 return lines | |
274 | |
275 def seek0(self): | |
276 self._fp.seek(0) | |
277 self.line_no = 1 | |
278 | |
279 def _doRead(self, size, advance_line_no): | |
280 data = self._fp.read(size) | |
281 if advance_line_no: | |
282 self.line_no += data.count('\n') | |
283 return data | |
284 | |
285 | |
286 class _FontaineStateMachine: | |
287 def __init__(self, fp, doc): | |
288 self.fp = _PeekableFile(fp) | |
289 self.state = None | |
290 self.document = doc | |
291 | |
292 @property | |
293 def line_no(self): | |
294 return self.fp.line_no | |
295 | |
296 def run(self): | |
297 self.state = _TitlePageState() | |
298 while True: | |
299 logger.debug("State '%s' consuming from '%s'..." % | |
300 (self.state.__class__.__name__, self.fp.peekline())) | |
301 res = self.state.consume(self.fp, self) | |
302 | |
303 # See if we reached the end of the file. | |
304 if not self.fp.peekline(): | |
305 logger.debug("Reached end of line... ending parsing.") | |
306 res = EOF_STATE | |
307 | |
308 # Figure out what to do next... | |
309 | |
310 if res is None: | |
311 raise Exception( | |
312 "States need to return `ANY_STATE`, one or more specific " | |
313 "states, or `EOF_STATE` if they reached the end of the " | |
314 "file.") | |
315 | |
316 if res is True: | |
317 # State continues to consume. | |
318 continue | |
319 | |
320 if res is ANY_STATE or isinstance(res, list): | |
321 # State wants to exit, we need to figure out what is the | |
322 # next state. | |
323 pos = self.fp._fp.tell() | |
324 next_states = res | |
325 if next_states is ANY_STATE: | |
326 next_states = STATES | |
327 logger.debug("Trying to match next state from: %s" % | |
328 [t.__name__ for t in next_states]) | |
329 for sc in next_states: | |
330 s = sc() | |
331 if s.match(self.fp, self): | |
332 logger.debug("Matched state %s" % | |
333 s.__class__.__name__) | |
334 self.fp._fp.seek(pos) | |
335 res = s | |
336 break | |
337 else: | |
338 raise Exception("Can't match following state after: %s" % | |
339 self.state) | |
340 if self.state: | |
341 if type(self.state) == type(res) and self.state.can_merge: | |
342 # Don't switch states if the next state is the same | |
343 # type and that type supports merging. | |
344 continue | |
345 | |
346 self.state.exit(self) | |
347 | |
348 self.state = res | |
349 continue | |
350 | |
351 if isinstance(res, FontaineState): | |
352 # State wants to exit, wants a specific state to be next. | |
353 if self.state: | |
354 self.state.exit(self) | |
355 self.state = res | |
356 continue | |
357 | |
358 if res is EOF_STATE: | |
359 # Reached end of file. | |
360 if self.state: | |
361 self.state.exit(self) | |
362 break | |
363 | |
364 raise Exception("Unsupported state result: %s" % res) | |
365 | |
366 | |
367 class FontaineParser: | |
368 def __init__(self): | |
369 pass | |
370 | |
371 def parse(self, filein): | |
372 if isinstance(filein, str): | |
373 with open(filein, 'r') as fp: | |
374 return self._doParse(fp) | |
375 else: | |
376 return self._doParse(fp) | |
377 | |
378 def parseString(self, text): | |
379 import io | |
380 with io.StringIO(text) as fp: | |
381 return self._doParse(fp) | |
382 | |
383 def _doParse(self, fp): | |
384 from .document import FontaineDocument | |
385 doc = FontaineDocument() | |
386 machine = _FontaineStateMachine(fp, doc) | |
387 machine.run() | |
388 return doc |