Mercurial > piecrust2
comparison piecrust/importing/jekyll.py @ 60:6e60e0fef2be
Add `import` command, Jekyll importer.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 26 Aug 2014 23:20:48 -0700 |
parents | |
children | 52e4d9a1f917 |
comparison
equal
deleted
inserted
replaced
59:e3e3de44377c | 60:6e60e0fef2be |
---|---|
1 import os | |
2 import os.path | |
3 import re | |
4 import shutil | |
5 import yaml | |
6 import logging | |
7 from piecrust.configuration import parse_config_header | |
8 from piecrust.importing.base import Importer | |
9 from piecrust.uriutil import multi_replace | |
10 | |
11 | |
12 logger = logging.getLogger(__name__) | |
13 | |
14 | |
15 class JekyllImporter(Importer): | |
16 def __init__(self): | |
17 super(JekyllImporter, self).__init__() | |
18 self.name = 'jekyll' | |
19 self.description = "Imports content from a Jekyll or Octopress blog." | |
20 | |
21 def setupParser(self, parser, app): | |
22 parser.add_argument('root_dir', | |
23 help="The root directory of the Jekyll or Octopress website.") | |
24 | |
25 def importWebsite(self, app, args): | |
26 logger.debug("Importing Jekyll site from: %s" % args.root_dir) | |
27 for dirpath, dirnames, filenames in os.walk(args.root_dir): | |
28 dirnames[:] = list(filter(lambda i: not i[0] == '.', dirnames)) | |
29 for fn in filenames: | |
30 if fn[0] == '.': | |
31 continue | |
32 | |
33 full_fn = os.path.join(dirpath, fn) | |
34 rel_fn = os.path.relpath(full_fn, args.root_dir) | |
35 if rel_fn.startswith('.' + os.sep): | |
36 rel_fn = fn | |
37 | |
38 logger.debug("- %s" % rel_fn) | |
39 if rel_fn == '_config.yml': | |
40 self.convertConfig(app, full_fn) | |
41 elif rel_fn.startswith('_layouts'): | |
42 self.convertLayout(app, full_fn, rel_fn[len('_layouts/'):]) | |
43 elif rel_fn.startswith('_includes'): | |
44 self.convertInclude(app, full_fn, rel_fn[len('_includes/'):]) | |
45 elif rel_fn.startswith('_posts'): | |
46 self.convertPost(app, full_fn, rel_fn[len('_posts/'):]) | |
47 else: | |
48 with open(full_fn, 'rb') as fp: | |
49 firstline = fp.read(3) | |
50 if firstline == '---': | |
51 self.convertPage(app, full_fn, rel_fn) | |
52 else: | |
53 self.convertStatic(app, full_fn, rel_fn) | |
54 | |
55 logger.info("The Jekyll website was successfully imported.") | |
56 | |
57 def convertConfig(self, app, src_path): | |
58 logger.debug(" Converting configuration file.") | |
59 with open(src_path, 'r', encoding='utf8') as fp: | |
60 config = yaml.load(fp) | |
61 | |
62 if 'site' not in config: | |
63 config['site'] = {} | |
64 config['site']['related_posts'] = [] | |
65 config['site']['posts_fs'] = 'flat' | |
66 config['site']['templates_dirs'] = ['includes', 'layouts'] | |
67 config['site']['tag_url'] = 'tags/%tag%' | |
68 if 'permalink' in config: | |
69 permalink = config['permalink'] | |
70 if permalink == 'date': | |
71 permalink = '/:categories/:year/:month/:day/:title.html' | |
72 elif permalink == 'pretty': | |
73 permalink = '/:categories/:year/:month/:day/:title/' | |
74 elif permalink == 'none': | |
75 permalink = '/:categories/:title.html' | |
76 | |
77 # TODO: handle `:categories` token. | |
78 post_url = multi_replace( | |
79 permalink, | |
80 {':year': '%year%', ':month': '%month%', ':day': '%day%', | |
81 ':title': '%slug%', ':categories': ''}) | |
82 post_url = post_url.replace('//', '/').strip('/') | |
83 config['site']['post_url'] = post_url | |
84 if 'exclude' in config: | |
85 if 'baker' not in config: | |
86 config['baker'] = {} | |
87 config['baker']['skip_patterns'] = list(map( | |
88 lambda i: '^/_%s/' % re.escape(i))) | |
89 if 'jinja' not in config: | |
90 config['jinja'] = {} | |
91 config['jinja']['auto_escape'] = False | |
92 if 'markdown' in config: | |
93 if not isinstance(config['markdown'], dict): | |
94 logger.warning("Discarding markdown setting: %s" % | |
95 config['markdown']) | |
96 del config['markdown'] | |
97 | |
98 with open(os.path.join(app.root_dir, 'config.yml'), 'w') as fp: | |
99 yaml.dump(config, stream=fp) | |
100 | |
101 def convertPage(self, app, path, rel_path): | |
102 logger.debug(" Converting page: %s" % rel_path) | |
103 is_index = False | |
104 is_static = False | |
105 _, ext = os.path.splitext(rel_path) | |
106 if re.search(r'^index\.(html?|textile|markdown)$', rel_path): | |
107 out_path = os.path.join(app.root_dir, 'pages', '_index' + ext) | |
108 is_index = True | |
109 else: | |
110 out_path = os.path.join(app.root_dir, 'pages', rel_path) | |
111 | |
112 if ext not in ['htm', 'html', 'textile', 'markdown']: | |
113 # There could be static files (SCSS or Less files) that look like | |
114 # pages because they have a YAML front matter. | |
115 is_static = True | |
116 out_path = os.path.join(app.root_dir, 'assets', rel_path) | |
117 | |
118 if is_static: | |
119 logger.debug(" Actually a static file... forwarding converstion.") | |
120 self.convertStatic(app, path, rel_path, True) | |
121 return | |
122 | |
123 self._doConvertPage(app, path, out_path) | |
124 if is_index: | |
125 shutil.copy2(out_path, os.path.join(app.root_dir, 'pages', '_tag.%s' % ext)) | |
126 | |
127 def convertPost(self, app, path, rel_path): | |
128 logger.debug(" Converting post: %s" % rel_path) | |
129 out_path = re.sub( | |
130 r'(\d{4}\-\d{2}\-\d{2})\-(.*)$', | |
131 r'\1_\2', | |
132 rel_path) | |
133 out_path = os.path.join(app.root_dir, 'posts', out_path) | |
134 self._doConvertPage(app, path, out_path) | |
135 | |
136 def convertLayout(self, app, path, rel_path): | |
137 logger.debug(" Converting layout: %s" % rel_path) | |
138 out_path = os.path.join(app.root_dir, 'layouts', rel_path) | |
139 self._doConvertPage(app, path, out_path, True) | |
140 | |
141 def convertInclude(self, app, path, rel_path): | |
142 logger.debug(" Converting include: %s" % rel_path) | |
143 out_path = os.path.join(app.root_dir, 'includes', rel_path) | |
144 self._doConvertPage(app, path, out_path, True) | |
145 | |
146 def convertStatic(self, app, path, rel_path, strip_header=False): | |
147 logger.debug(" Converting static: %s" % rel_path) | |
148 out_path = os.path.join(app.root_dir, 'assets', rel_path) | |
149 logger.debug(" %s -> %s" % (path, out_path)) | |
150 os.makedirs(os.path.dirname(out_path), 0o755, True) | |
151 | |
152 if strip_header: | |
153 with open(path, 'r', encoding='utf8') as fp: | |
154 content = fp.write() | |
155 config, offset = parse_config_header(content) | |
156 content = content[offset:] | |
157 with open(out_path, 'w', encoding='utf8') as fp: | |
158 fp.write(content) | |
159 return | |
160 | |
161 shutil.copy2(path, out_path) | |
162 | |
163 def _doConvertPage(self, app, path, out_path, is_template=False): | |
164 logger.debug(" %s -> %s" % (path, out_path)) | |
165 os.makedirs(os.path.dirname(out_path), 0o755, True) | |
166 | |
167 with open(path, 'r', encoding='utf8') as fp: | |
168 contents = fp.read() | |
169 | |
170 config, offset = parse_config_header(contents) | |
171 text = contents[offset:] | |
172 text_before = text | |
173 | |
174 wrap_content_tag = True | |
175 | |
176 if is_template: | |
177 if 'layout' in config: | |
178 # Liquid doesn't support template inheritance but | |
179 # Jinja does. | |
180 text = ("{%% extends '%s.html' %%}\n\n" | |
181 "{%% block jekyllcontent %%}\n" | |
182 "%s\n" | |
183 "{%% endblock %%}\n" % (config['layout'], text)) | |
184 wrap_content_tag = False | |
185 else: | |
186 if 'layout' in config: | |
187 if config['layout'] == 'nil': | |
188 config['layout'] = 'none' | |
189 | |
190 # Convert the template stuff we can: | |
191 # - content tag may have to be wrapped in a `jekyllcontent` | |
192 # because Jekyll uses implicit layout inheritance | |
193 # placements. | |
194 if wrap_content_tag: | |
195 text = re.sub( | |
196 r'{{\s*content\s*}}', | |
197 r'{% block jekyllcontent %}{{ content }}{% endblock %}', | |
198 text) | |
199 # - list of posts | |
200 text = re.sub( | |
201 '(?<=\{%|\{\{)([^\}]*)site.posts', | |
202 '\\1blog.posts', | |
203 text); | |
204 text = re.sub( | |
205 '(?<=\{%|\{\{)([^\}]*)paginator.posts', | |
206 '\\1pagination.posts', | |
207 text); | |
208 # - list of categories or tags | |
209 text = re.sub( | |
210 '(?<=\{%|\{\{)([^\}]*)site.categories', | |
211 '\\1blog.categories', | |
212 text); | |
213 text = re.sub( | |
214 '(?<=\{%|\{\{)([^\}]*)site.tags', | |
215 '\\1blog.tags', | |
216 text); | |
217 # - list of related posts | |
218 text = re.sub( | |
219 '(?<=\{%|\{\{)(?<!%\})site.related_posts', | |
220 '\\1pagination.related_posts', | |
221 text); | |
222 # - enumeration limits | |
223 text = re.sub( | |
224 '{%\s*for\s+([^}]+)\s+limit\:\s*(\d+)', | |
225 '{% for \\1[:\\2]', | |
226 text); | |
227 text = re.sub( | |
228 '{%\s*for\s+([^}]+)\s+offset\:\s*(\d+)', | |
229 '{% for \\1[\\2:]', | |
230 text); | |
231 # - code highlighting | |
232 text = re.sub( | |
233 '{%\s*highlight\s+([\w\d]+)\s*%}', | |
234 '{% geshi \'\\1\' %}', | |
235 text); | |
236 text = re.sub( | |
237 '{%\s*endhighlight\s*%}', | |
238 '{% endgeshi %}', | |
239 text); | |
240 # - unless tag | |
241 text = re.sub( | |
242 '{%\s*unless\s+([^}]+)\s*%}', | |
243 '{% if not \\1 %}', | |
244 text); | |
245 text = re.sub( | |
246 '{%\s*endunless\s*%}', | |
247 '{% endif %}', | |
248 text); | |
249 # - variable assignment | |
250 text = re.sub( | |
251 '\{%\s*assign\s+', | |
252 '{% set ', | |
253 text); | |
254 # - include tag | |
255 text = re.sub( | |
256 '\{%\s*include\s+([\w\d\.\-_]+)\s*%}', | |
257 '{% include "\\1" %}', | |
258 text); | |
259 # - truncate filter | |
260 text = re.sub( | |
261 '\|\s*truncate\:\s*(\d+)', | |
262 '|truncate(\\1)', | |
263 text); | |
264 # - date filter | |
265 text = re.sub( | |
266 '\|\s*date\:\s*"([^"]+)"', | |
267 '|date("\\1")', | |
268 text); | |
269 # - some filters we don't need | |
270 text = re.sub( | |
271 '\|\s*date_to_string', | |
272 '', | |
273 text); | |
274 | |
275 if text != text_before: | |
276 # We changed the text, so create a backup. | |
277 shutil.copy2(path, '%s.orig' % out_path) | |
278 | |
279 with open(out_path, 'w', encoding='utf8') as fp: | |
280 if not is_template: | |
281 fp.write("---\n") | |
282 fp.write(yaml.dump(config)) | |
283 fp.write("---\n") | |
284 fp.write(text) | |
285 |