comparison piecrust/importing/jekyll.py @ 60:6e60e0fef2be

Add `import` command, Jekyll importer.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 26 Aug 2014 23:20:48 -0700
parents
children 52e4d9a1f917
comparison
equal deleted inserted replaced
59:e3e3de44377c 60:6e60e0fef2be
1 import os
2 import os.path
3 import re
4 import shutil
5 import yaml
6 import logging
7 from piecrust.configuration import parse_config_header
8 from piecrust.importing.base import Importer
9 from piecrust.uriutil import multi_replace
10
11
12 logger = logging.getLogger(__name__)
13
14
15 class JekyllImporter(Importer):
16 def __init__(self):
17 super(JekyllImporter, self).__init__()
18 self.name = 'jekyll'
19 self.description = "Imports content from a Jekyll or Octopress blog."
20
21 def setupParser(self, parser, app):
22 parser.add_argument('root_dir',
23 help="The root directory of the Jekyll or Octopress website.")
24
25 def importWebsite(self, app, args):
26 logger.debug("Importing Jekyll site from: %s" % args.root_dir)
27 for dirpath, dirnames, filenames in os.walk(args.root_dir):
28 dirnames[:] = list(filter(lambda i: not i[0] == '.', dirnames))
29 for fn in filenames:
30 if fn[0] == '.':
31 continue
32
33 full_fn = os.path.join(dirpath, fn)
34 rel_fn = os.path.relpath(full_fn, args.root_dir)
35 if rel_fn.startswith('.' + os.sep):
36 rel_fn = fn
37
38 logger.debug("- %s" % rel_fn)
39 if rel_fn == '_config.yml':
40 self.convertConfig(app, full_fn)
41 elif rel_fn.startswith('_layouts'):
42 self.convertLayout(app, full_fn, rel_fn[len('_layouts/'):])
43 elif rel_fn.startswith('_includes'):
44 self.convertInclude(app, full_fn, rel_fn[len('_includes/'):])
45 elif rel_fn.startswith('_posts'):
46 self.convertPost(app, full_fn, rel_fn[len('_posts/'):])
47 else:
48 with open(full_fn, 'rb') as fp:
49 firstline = fp.read(3)
50 if firstline == '---':
51 self.convertPage(app, full_fn, rel_fn)
52 else:
53 self.convertStatic(app, full_fn, rel_fn)
54
55 logger.info("The Jekyll website was successfully imported.")
56
57 def convertConfig(self, app, src_path):
58 logger.debug(" Converting configuration file.")
59 with open(src_path, 'r', encoding='utf8') as fp:
60 config = yaml.load(fp)
61
62 if 'site' not in config:
63 config['site'] = {}
64 config['site']['related_posts'] = []
65 config['site']['posts_fs'] = 'flat'
66 config['site']['templates_dirs'] = ['includes', 'layouts']
67 config['site']['tag_url'] = 'tags/%tag%'
68 if 'permalink' in config:
69 permalink = config['permalink']
70 if permalink == 'date':
71 permalink = '/:categories/:year/:month/:day/:title.html'
72 elif permalink == 'pretty':
73 permalink = '/:categories/:year/:month/:day/:title/'
74 elif permalink == 'none':
75 permalink = '/:categories/:title.html'
76
77 # TODO: handle `:categories` token.
78 post_url = multi_replace(
79 permalink,
80 {':year': '%year%', ':month': '%month%', ':day': '%day%',
81 ':title': '%slug%', ':categories': ''})
82 post_url = post_url.replace('//', '/').strip('/')
83 config['site']['post_url'] = post_url
84 if 'exclude' in config:
85 if 'baker' not in config:
86 config['baker'] = {}
87 config['baker']['skip_patterns'] = list(map(
88 lambda i: '^/_%s/' % re.escape(i)))
89 if 'jinja' not in config:
90 config['jinja'] = {}
91 config['jinja']['auto_escape'] = False
92 if 'markdown' in config:
93 if not isinstance(config['markdown'], dict):
94 logger.warning("Discarding markdown setting: %s" %
95 config['markdown'])
96 del config['markdown']
97
98 with open(os.path.join(app.root_dir, 'config.yml'), 'w') as fp:
99 yaml.dump(config, stream=fp)
100
101 def convertPage(self, app, path, rel_path):
102 logger.debug(" Converting page: %s" % rel_path)
103 is_index = False
104 is_static = False
105 _, ext = os.path.splitext(rel_path)
106 if re.search(r'^index\.(html?|textile|markdown)$', rel_path):
107 out_path = os.path.join(app.root_dir, 'pages', '_index' + ext)
108 is_index = True
109 else:
110 out_path = os.path.join(app.root_dir, 'pages', rel_path)
111
112 if ext not in ['htm', 'html', 'textile', 'markdown']:
113 # There could be static files (SCSS or Less files) that look like
114 # pages because they have a YAML front matter.
115 is_static = True
116 out_path = os.path.join(app.root_dir, 'assets', rel_path)
117
118 if is_static:
119 logger.debug(" Actually a static file... forwarding converstion.")
120 self.convertStatic(app, path, rel_path, True)
121 return
122
123 self._doConvertPage(app, path, out_path)
124 if is_index:
125 shutil.copy2(out_path, os.path.join(app.root_dir, 'pages', '_tag.%s' % ext))
126
127 def convertPost(self, app, path, rel_path):
128 logger.debug(" Converting post: %s" % rel_path)
129 out_path = re.sub(
130 r'(\d{4}\-\d{2}\-\d{2})\-(.*)$',
131 r'\1_\2',
132 rel_path)
133 out_path = os.path.join(app.root_dir, 'posts', out_path)
134 self._doConvertPage(app, path, out_path)
135
136 def convertLayout(self, app, path, rel_path):
137 logger.debug(" Converting layout: %s" % rel_path)
138 out_path = os.path.join(app.root_dir, 'layouts', rel_path)
139 self._doConvertPage(app, path, out_path, True)
140
141 def convertInclude(self, app, path, rel_path):
142 logger.debug(" Converting include: %s" % rel_path)
143 out_path = os.path.join(app.root_dir, 'includes', rel_path)
144 self._doConvertPage(app, path, out_path, True)
145
146 def convertStatic(self, app, path, rel_path, strip_header=False):
147 logger.debug(" Converting static: %s" % rel_path)
148 out_path = os.path.join(app.root_dir, 'assets', rel_path)
149 logger.debug(" %s -> %s" % (path, out_path))
150 os.makedirs(os.path.dirname(out_path), 0o755, True)
151
152 if strip_header:
153 with open(path, 'r', encoding='utf8') as fp:
154 content = fp.write()
155 config, offset = parse_config_header(content)
156 content = content[offset:]
157 with open(out_path, 'w', encoding='utf8') as fp:
158 fp.write(content)
159 return
160
161 shutil.copy2(path, out_path)
162
163 def _doConvertPage(self, app, path, out_path, is_template=False):
164 logger.debug(" %s -> %s" % (path, out_path))
165 os.makedirs(os.path.dirname(out_path), 0o755, True)
166
167 with open(path, 'r', encoding='utf8') as fp:
168 contents = fp.read()
169
170 config, offset = parse_config_header(contents)
171 text = contents[offset:]
172 text_before = text
173
174 wrap_content_tag = True
175
176 if is_template:
177 if 'layout' in config:
178 # Liquid doesn't support template inheritance but
179 # Jinja does.
180 text = ("{%% extends '%s.html' %%}\n\n"
181 "{%% block jekyllcontent %%}\n"
182 "%s\n"
183 "{%% endblock %%}\n" % (config['layout'], text))
184 wrap_content_tag = False
185 else:
186 if 'layout' in config:
187 if config['layout'] == 'nil':
188 config['layout'] = 'none'
189
190 # Convert the template stuff we can:
191 # - content tag may have to be wrapped in a `jekyllcontent`
192 # because Jekyll uses implicit layout inheritance
193 # placements.
194 if wrap_content_tag:
195 text = re.sub(
196 r'{{\s*content\s*}}',
197 r'{% block jekyllcontent %}{{ content }}{% endblock %}',
198 text)
199 # - list of posts
200 text = re.sub(
201 '(?<=\{%|\{\{)([^\}]*)site.posts',
202 '\\1blog.posts',
203 text);
204 text = re.sub(
205 '(?<=\{%|\{\{)([^\}]*)paginator.posts',
206 '\\1pagination.posts',
207 text);
208 # - list of categories or tags
209 text = re.sub(
210 '(?<=\{%|\{\{)([^\}]*)site.categories',
211 '\\1blog.categories',
212 text);
213 text = re.sub(
214 '(?<=\{%|\{\{)([^\}]*)site.tags',
215 '\\1blog.tags',
216 text);
217 # - list of related posts
218 text = re.sub(
219 '(?<=\{%|\{\{)(?<!%\})site.related_posts',
220 '\\1pagination.related_posts',
221 text);
222 # - enumeration limits
223 text = re.sub(
224 '{%\s*for\s+([^}]+)\s+limit\:\s*(\d+)',
225 '{% for \\1[:\\2]',
226 text);
227 text = re.sub(
228 '{%\s*for\s+([^}]+)\s+offset\:\s*(\d+)',
229 '{% for \\1[\\2:]',
230 text);
231 # - code highlighting
232 text = re.sub(
233 '{%\s*highlight\s+([\w\d]+)\s*%}',
234 '{% geshi \'\\1\' %}',
235 text);
236 text = re.sub(
237 '{%\s*endhighlight\s*%}',
238 '{% endgeshi %}',
239 text);
240 # - unless tag
241 text = re.sub(
242 '{%\s*unless\s+([^}]+)\s*%}',
243 '{% if not \\1 %}',
244 text);
245 text = re.sub(
246 '{%\s*endunless\s*%}',
247 '{% endif %}',
248 text);
249 # - variable assignment
250 text = re.sub(
251 '\{%\s*assign\s+',
252 '{% set ',
253 text);
254 # - include tag
255 text = re.sub(
256 '\{%\s*include\s+([\w\d\.\-_]+)\s*%}',
257 '{% include "\\1" %}',
258 text);
259 # - truncate filter
260 text = re.sub(
261 '\|\s*truncate\:\s*(\d+)',
262 '|truncate(\\1)',
263 text);
264 # - date filter
265 text = re.sub(
266 '\|\s*date\:\s*"([^"]+)"',
267 '|date("\\1")',
268 text);
269 # - some filters we don't need
270 text = re.sub(
271 '\|\s*date_to_string',
272 '',
273 text);
274
275 if text != text_before:
276 # We changed the text, so create a backup.
277 shutil.copy2(path, '%s.orig' % out_path)
278
279 with open(out_path, 'w', encoding='utf8') as fp:
280 if not is_template:
281 fp.write("---\n")
282 fp.write(yaml.dump(config))
283 fp.write("---\n")
284 fp.write(text)
285