Mercurial > piecrust2
comparison piecrust/importing/wordpress.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | dd25bd3ce1f9 |
children |
comparison
equal
deleted
inserted
replaced
851:2c7e57d80bba | 852:4850f8c21b6e |
---|---|
3 import datetime | 3 import datetime |
4 import yaml | 4 import yaml |
5 from collections import OrderedDict | 5 from collections import OrderedDict |
6 from piecrust import CONFIG_PATH | 6 from piecrust import CONFIG_PATH |
7 from piecrust.configuration import ( | 7 from piecrust.configuration import ( |
8 ConfigurationLoader, ConfigurationDumper, merge_dicts) | 8 ConfigurationLoader, ConfigurationDumper, merge_dicts) |
9 from piecrust.importing.base import Importer, create_page, download_asset | 9 from piecrust.importing.base import Importer, create_page, download_asset |
10 from piecrust.sources.base import MODE_CREATING | |
11 | 10 |
12 | 11 |
13 logger = logging.getLogger(__name__) | 12 logger = logging.getLogger(__name__) |
14 | 13 |
15 | 14 |
16 class WordpressImporterBase(Importer): | 15 class WordpressImporterBase(Importer): |
17 def setupParser(self, parser, app): | 16 def setupParser(self, parser, app): |
18 parser.add_argument( | 17 parser.add_argument( |
19 '--pages-source', | 18 '--pages-source', |
20 default="pages", | 19 default="pages", |
21 help="The source to store pages in.") | 20 help="The source to store pages in.") |
22 parser.add_argument( | 21 parser.add_argument( |
23 '--posts-source', | 22 '--posts-source', |
24 default="posts", | 23 default="posts", |
25 help="The source to store posts in.") | 24 help="The source to store posts in.") |
26 parser.add_argument( | 25 parser.add_argument( |
27 '--default-post-layout', | 26 '--default-post-layout', |
28 help="The default layout to use for posts.") | 27 help="The default layout to use for posts.") |
29 parser.add_argument( | 28 parser.add_argument( |
30 '--default-post-category', | 29 '--default-post-category', |
31 help="The default category to use for posts.") | 30 help="The default category to use for posts.") |
32 parser.add_argument( | 31 parser.add_argument( |
33 '--default-page-layout', | 32 '--default-page-layout', |
34 help="The default layout to use for pages.") | 33 help="The default layout to use for pages.") |
35 parser.add_argument( | 34 parser.add_argument( |
36 '--default-page-category', | 35 '--default-page-category', |
37 help="The default category to use for pages.") | 36 help="The default category to use for pages.") |
38 | 37 |
39 def importWebsite(self, app, args): | 38 def importWebsite(self, app, args): |
40 impl = self._getImplementation(app, args) | 39 impl = self._getImplementation(app, args) |
41 return impl.importWebsite() | 40 return impl.importWebsite() |
42 | 41 |
58 # Site configuration. | 57 # Site configuration. |
59 logger.info("Generating site configuration...") | 58 logger.info("Generating site configuration...") |
60 site_config = self._getSiteConfig(ctx) | 59 site_config = self._getSiteConfig(ctx) |
61 site_config.setdefault('site', {}) | 60 site_config.setdefault('site', {}) |
62 site_config['site'].update({ | 61 site_config['site'].update({ |
63 'post_url': '%year%/%month%/%slug%', | 62 'post_url': '%year%/%month%/%slug%', |
64 'category_url': 'category/%category%'}) | 63 'category_url': 'category/%category%'}) |
65 | 64 |
66 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH) | 65 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH) |
67 with open(site_config_path, 'r') as fp: | 66 with open(site_config_path, 'r') as fp: |
68 cfg_data = yaml.load(fp, Loader=ConfigurationLoader) | 67 cfg_data = yaml.load(fp, Loader=ConfigurationLoader) |
69 | 68 |
100 download_asset(self.app, asset_info['url']) | 99 download_asset(self.app, asset_info['url']) |
101 | 100 |
102 def _createPost(self, post_info): | 101 def _createPost(self, post_info): |
103 post_dt = post_info['datetime'] | 102 post_dt = post_info['datetime'] |
104 finder = { | 103 finder = { |
105 'year': post_dt.year, | 104 'year': post_dt.year, |
106 'month': post_dt.month, | 105 'month': post_dt.month, |
107 'day': post_dt.day, | 106 'day': post_dt.day, |
108 'slug': post_info['slug']} | 107 'slug': post_info['slug']} |
109 if post_info['type'] == 'post': | 108 if post_info['type'] == 'post': |
110 source = self._posts_source | 109 source = self._posts_source |
111 elif post_info['type'] == 'page': | 110 elif post_info['type'] == 'page': |
112 source = self._pages_source | 111 source = self._pages_source |
113 else: | 112 else: |
172 def _getSiteConfig(self, channel): | 171 def _getSiteConfig(self, channel): |
173 # Get basic site information | 172 # Get basic site information |
174 title = find_text(channel, 'title') | 173 title = find_text(channel, 'title') |
175 description = find_text(channel, 'description') | 174 description = find_text(channel, 'description') |
176 site_config = OrderedDict({ | 175 site_config = OrderedDict({ |
177 'site': { | 176 'site': { |
178 'title': title, | 177 'title': title, |
179 'description': description} | 178 'description': description} |
180 }) | 179 }) |
181 | 180 |
182 # Get authors' names. | 181 # Get authors' names. |
183 authors = {} | 182 authors = {} |
184 for a in channel.findall('wp:author', self.ns_wp): | 183 for a in channel.findall('wp:author', self.ns_wp): |
185 login = find_text(a, 'wp:author_login', self.ns_wp) | 184 login = find_text(a, 'wp:author_login', self.ns_wp) |
186 authors[login] = { | 185 authors[login] = { |
187 'email': find_text(a, 'wp:author_email', self.ns_wp), | 186 'email': find_text(a, 'wp:author_email', self.ns_wp), |
188 'display_name': find_text(a, 'wp:author_display_name', | 187 'display_name': find_text(a, 'wp:author_display_name', |
189 self.ns_wp), | 188 self.ns_wp), |
190 'first_name': find_text(a, 'wp:author_first_name', | 189 'first_name': find_text(a, 'wp:author_first_name', |
191 self.ns_wp), | 190 self.ns_wp), |
192 'last_name': find_text(a, 'wp:author_last_name', | 191 'last_name': find_text(a, 'wp:author_last_name', |
193 self.ns_wp), | 192 self.ns_wp), |
194 'author_id': find_text(a, 'wp:author_id', | 193 'author_id': find_text(a, 'wp:author_id', |
195 self.ns_wp)} | 194 self.ns_wp)} |
196 site_config['site']['authors'] = authors | 195 site_config['site']['authors'] = authors |
197 | 196 |
198 return site_config | 197 return site_config |
199 | 198 |
200 def _getPosts(self, channel): | 199 def _getPosts(self, channel): |
214 post_date = datetime.datetime.strptime(post_date_str, | 213 post_date = datetime.datetime.strptime(post_date_str, |
215 '%Y-%m-%d %H:%M:%S') | 214 '%Y-%m-%d %H:%M:%S') |
216 post_name = find_text(node, 'wp:post_name', self.ns_wp) | 215 post_name = find_text(node, 'wp:post_name', self.ns_wp) |
217 post_type = find_text(node, 'wp:post_type', self.ns_wp) | 216 post_type = find_text(node, 'wp:post_type', self.ns_wp) |
218 post_info = { | 217 post_info = { |
219 'type': post_type, | 218 'type': post_type, |
220 'slug': post_name, | 219 'slug': post_name, |
221 'datetime': post_date} | 220 'datetime': post_date} |
222 | 221 |
223 title = find_text(node, 'title') | 222 title = find_text(node, 'title') |
224 creator = find_text(node, 'dc:creator', self.ns_dc) | 223 creator = find_text(node, 'dc:creator', self.ns_dc) |
225 status = find_text(node, 'wp:status', self.ns_wp) | 224 status = find_text(node, 'wp:status', self.ns_wp) |
226 post_id = find_text(node, 'wp:post_id', self.ns_wp) | 225 post_id = find_text(node, 'wp:post_id', self.ns_wp) |
227 guid = find_text(node, 'guid') | 226 guid = find_text(node, 'guid') |
228 description = find_text(node, 'description') | 227 description = find_text(node, 'description') |
229 # TODO: menu order, parent, password, sticky | 228 # TODO: menu order, parent, password, sticky |
230 post_info.update({ | 229 post_info.update({ |
231 'title': title, | 230 'title': title, |
232 'author': creator, | 231 'author': creator, |
233 'status': status, | 232 'status': status, |
234 'post_id': post_id, | 233 'post_id': post_id, |
235 'post_guid': guid, | 234 'post_guid': guid, |
236 'description': description}) | 235 'description': description}) |
237 | 236 |
238 categories = [] | 237 categories = [] |
239 for c in node.findall('category'): | 238 for c in node.findall('category'): |
240 nicename = str(c.attrib.get('nicename')) | 239 nicename = str(c.attrib.get('nicename')) |
241 categories.append(nicename) | 240 categories.append(nicename) |
248 post_info['metadata'] = metadata | 247 post_info['metadata'] = metadata |
249 | 248 |
250 content = find_text(node, 'content:encoded', self.ns_content) | 249 content = find_text(node, 'content:encoded', self.ns_content) |
251 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt) | 250 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt) |
252 post_info.update({ | 251 post_info.update({ |
253 'content': content, | 252 'content': content, |
254 'excerpt': excerpt}) | 253 'excerpt': excerpt}) |
255 | 254 |
256 return post_info | 255 return post_info |
257 | 256 |
258 | 257 |
259 class WordpressXmlImporter(WordpressImporterBase): | 258 class WordpressXmlImporter(WordpressImporterBase): |