comparison piecrust/importing/wordpress.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents dd25bd3ce1f9
children
comparison
equal deleted inserted replaced
851:2c7e57d80bba 852:4850f8c21b6e
3 import datetime 3 import datetime
4 import yaml 4 import yaml
5 from collections import OrderedDict 5 from collections import OrderedDict
6 from piecrust import CONFIG_PATH 6 from piecrust import CONFIG_PATH
7 from piecrust.configuration import ( 7 from piecrust.configuration import (
8 ConfigurationLoader, ConfigurationDumper, merge_dicts) 8 ConfigurationLoader, ConfigurationDumper, merge_dicts)
9 from piecrust.importing.base import Importer, create_page, download_asset 9 from piecrust.importing.base import Importer, create_page, download_asset
10 from piecrust.sources.base import MODE_CREATING
11 10
12 11
13 logger = logging.getLogger(__name__) 12 logger = logging.getLogger(__name__)
14 13
15 14
16 class WordpressImporterBase(Importer): 15 class WordpressImporterBase(Importer):
17 def setupParser(self, parser, app): 16 def setupParser(self, parser, app):
18 parser.add_argument( 17 parser.add_argument(
19 '--pages-source', 18 '--pages-source',
20 default="pages", 19 default="pages",
21 help="The source to store pages in.") 20 help="The source to store pages in.")
22 parser.add_argument( 21 parser.add_argument(
23 '--posts-source', 22 '--posts-source',
24 default="posts", 23 default="posts",
25 help="The source to store posts in.") 24 help="The source to store posts in.")
26 parser.add_argument( 25 parser.add_argument(
27 '--default-post-layout', 26 '--default-post-layout',
28 help="The default layout to use for posts.") 27 help="The default layout to use for posts.")
29 parser.add_argument( 28 parser.add_argument(
30 '--default-post-category', 29 '--default-post-category',
31 help="The default category to use for posts.") 30 help="The default category to use for posts.")
32 parser.add_argument( 31 parser.add_argument(
33 '--default-page-layout', 32 '--default-page-layout',
34 help="The default layout to use for pages.") 33 help="The default layout to use for pages.")
35 parser.add_argument( 34 parser.add_argument(
36 '--default-page-category', 35 '--default-page-category',
37 help="The default category to use for pages.") 36 help="The default category to use for pages.")
38 37
39 def importWebsite(self, app, args): 38 def importWebsite(self, app, args):
40 impl = self._getImplementation(app, args) 39 impl = self._getImplementation(app, args)
41 return impl.importWebsite() 40 return impl.importWebsite()
42 41
58 # Site configuration. 57 # Site configuration.
59 logger.info("Generating site configuration...") 58 logger.info("Generating site configuration...")
60 site_config = self._getSiteConfig(ctx) 59 site_config = self._getSiteConfig(ctx)
61 site_config.setdefault('site', {}) 60 site_config.setdefault('site', {})
62 site_config['site'].update({ 61 site_config['site'].update({
63 'post_url': '%year%/%month%/%slug%', 62 'post_url': '%year%/%month%/%slug%',
64 'category_url': 'category/%category%'}) 63 'category_url': 'category/%category%'})
65 64
66 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH) 65 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH)
67 with open(site_config_path, 'r') as fp: 66 with open(site_config_path, 'r') as fp:
68 cfg_data = yaml.load(fp, Loader=ConfigurationLoader) 67 cfg_data = yaml.load(fp, Loader=ConfigurationLoader)
69 68
100 download_asset(self.app, asset_info['url']) 99 download_asset(self.app, asset_info['url'])
101 100
102 def _createPost(self, post_info): 101 def _createPost(self, post_info):
103 post_dt = post_info['datetime'] 102 post_dt = post_info['datetime']
104 finder = { 103 finder = {
105 'year': post_dt.year, 104 'year': post_dt.year,
106 'month': post_dt.month, 105 'month': post_dt.month,
107 'day': post_dt.day, 106 'day': post_dt.day,
108 'slug': post_info['slug']} 107 'slug': post_info['slug']}
109 if post_info['type'] == 'post': 108 if post_info['type'] == 'post':
110 source = self._posts_source 109 source = self._posts_source
111 elif post_info['type'] == 'page': 110 elif post_info['type'] == 'page':
112 source = self._pages_source 111 source = self._pages_source
113 else: 112 else:
172 def _getSiteConfig(self, channel): 171 def _getSiteConfig(self, channel):
173 # Get basic site information 172 # Get basic site information
174 title = find_text(channel, 'title') 173 title = find_text(channel, 'title')
175 description = find_text(channel, 'description') 174 description = find_text(channel, 'description')
176 site_config = OrderedDict({ 175 site_config = OrderedDict({
177 'site': { 176 'site': {
178 'title': title, 177 'title': title,
179 'description': description} 178 'description': description}
180 }) 179 })
181 180
182 # Get authors' names. 181 # Get authors' names.
183 authors = {} 182 authors = {}
184 for a in channel.findall('wp:author', self.ns_wp): 183 for a in channel.findall('wp:author', self.ns_wp):
185 login = find_text(a, 'wp:author_login', self.ns_wp) 184 login = find_text(a, 'wp:author_login', self.ns_wp)
186 authors[login] = { 185 authors[login] = {
187 'email': find_text(a, 'wp:author_email', self.ns_wp), 186 'email': find_text(a, 'wp:author_email', self.ns_wp),
188 'display_name': find_text(a, 'wp:author_display_name', 187 'display_name': find_text(a, 'wp:author_display_name',
189 self.ns_wp), 188 self.ns_wp),
190 'first_name': find_text(a, 'wp:author_first_name', 189 'first_name': find_text(a, 'wp:author_first_name',
191 self.ns_wp), 190 self.ns_wp),
192 'last_name': find_text(a, 'wp:author_last_name', 191 'last_name': find_text(a, 'wp:author_last_name',
193 self.ns_wp), 192 self.ns_wp),
194 'author_id': find_text(a, 'wp:author_id', 193 'author_id': find_text(a, 'wp:author_id',
195 self.ns_wp)} 194 self.ns_wp)}
196 site_config['site']['authors'] = authors 195 site_config['site']['authors'] = authors
197 196
198 return site_config 197 return site_config
199 198
200 def _getPosts(self, channel): 199 def _getPosts(self, channel):
214 post_date = datetime.datetime.strptime(post_date_str, 213 post_date = datetime.datetime.strptime(post_date_str,
215 '%Y-%m-%d %H:%M:%S') 214 '%Y-%m-%d %H:%M:%S')
216 post_name = find_text(node, 'wp:post_name', self.ns_wp) 215 post_name = find_text(node, 'wp:post_name', self.ns_wp)
217 post_type = find_text(node, 'wp:post_type', self.ns_wp) 216 post_type = find_text(node, 'wp:post_type', self.ns_wp)
218 post_info = { 217 post_info = {
219 'type': post_type, 218 'type': post_type,
220 'slug': post_name, 219 'slug': post_name,
221 'datetime': post_date} 220 'datetime': post_date}
222 221
223 title = find_text(node, 'title') 222 title = find_text(node, 'title')
224 creator = find_text(node, 'dc:creator', self.ns_dc) 223 creator = find_text(node, 'dc:creator', self.ns_dc)
225 status = find_text(node, 'wp:status', self.ns_wp) 224 status = find_text(node, 'wp:status', self.ns_wp)
226 post_id = find_text(node, 'wp:post_id', self.ns_wp) 225 post_id = find_text(node, 'wp:post_id', self.ns_wp)
227 guid = find_text(node, 'guid') 226 guid = find_text(node, 'guid')
228 description = find_text(node, 'description') 227 description = find_text(node, 'description')
229 # TODO: menu order, parent, password, sticky 228 # TODO: menu order, parent, password, sticky
230 post_info.update({ 229 post_info.update({
231 'title': title, 230 'title': title,
232 'author': creator, 231 'author': creator,
233 'status': status, 232 'status': status,
234 'post_id': post_id, 233 'post_id': post_id,
235 'post_guid': guid, 234 'post_guid': guid,
236 'description': description}) 235 'description': description})
237 236
238 categories = [] 237 categories = []
239 for c in node.findall('category'): 238 for c in node.findall('category'):
240 nicename = str(c.attrib.get('nicename')) 239 nicename = str(c.attrib.get('nicename'))
241 categories.append(nicename) 240 categories.append(nicename)
248 post_info['metadata'] = metadata 247 post_info['metadata'] = metadata
249 248
250 content = find_text(node, 'content:encoded', self.ns_content) 249 content = find_text(node, 'content:encoded', self.ns_content)
251 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt) 250 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt)
252 post_info.update({ 251 post_info.update({
253 'content': content, 252 'content': content,
254 'excerpt': excerpt}) 253 'excerpt': excerpt})
255 254
256 return post_info 255 return post_info
257 256
258 257
259 class WordpressXmlImporter(WordpressImporterBase): 258 class WordpressXmlImporter(WordpressImporterBase):