annotate piecrust/importing/wordpress.py @ 300:2daa05a21026

import: Add an XML-based Wordpress importer.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 11 Mar 2015 23:48:35 -0700
parents
children 103abb08755e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 import os.path
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
2 import logging
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3 import datetime
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 import yaml
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
5 from urllib.parse import urlparse
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
6 from piecrust import CONFIG_PATH
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
7 from piecrust.importing.base import Importer, create_page, download_asset
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
8 from piecrust.sources.base import MODE_CREATING
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11 logger = logging.getLogger(__name__)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
14 class WordpressImporter(Importer):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
15 name = 'wordpress'
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
16 description = "Imports a Wordpress blog."
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
17
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18 def setupParser(self, parser, app):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
19 parser.add_argument(
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
20 '--posts_fs',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21 default="hierarchy",
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
22 choices=['flat', 'shallow', 'hierarchy'],
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
23 help="The blog file-system type to use.")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
24 parser.add_argument(
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25 '--prefix',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
26 default="wp_",
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 help="The SQL table prefix. Defaults to `wp_`.")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
28 parser.add_argument(
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29 '--default-post-layout',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30 help="The default layout to use for posts.")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
31 parser.add_argument(
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
32 '--default-post-category',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
33 help="The default category to use for posts.")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
34 parser.add_argument(
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
35 '--default-page-layout',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
36 help="The default layout to use for pages.")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
37 parser.add_argument(
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
38 '--default-page-category',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
39 help="The default category to use for pages.")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
40 parser.add_argument(
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
41 'xml_or_db_url',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
42 help=("The exported XML archive of the Wordpress site, or "
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
43 "the URL of the SQL database.\n"
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
44 "\n"
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
45 "If an SQL database URL, it should be of the "
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
46 "form: type://user:password@server/database\n"
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
47 "\n"
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
48 "For example:\n"
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
49 "mysql://user:password@example.org/my_database"))
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
50
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
51 def importWebsite(self, app, args):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
52 parsed_url = urlparse(args.xml_or_db_url)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
53 if not parsed_url.scheme:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
54 impl = _XmlImporter(app, args)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
55 else:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
56 impl = _SqlImporter(app, args)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
57 return impl.importWebsite()
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
58
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
59
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
60 class _XmlImporter(object):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
61 ns_wp = {'wp': 'http://wordpress.org/export/1.2/'}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
62 ns_dc = {'dc': "http://purl.org/dc/elements/1.1/"}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
63 ns_excerpt = {'excerpt': "http://wordpress.org/export/1.2/excerpt/"}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
64 ns_content = {'content': "http://purl.org/rss/1.0/modules/content/"}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
65
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
66 def __init__(self, app, args):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
67 self.app = app
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
68 self.path = args.xml_or_db_url
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
69 self.posts_fs = args.posts_fs
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
70 self._cat_map = {}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
71 self._author_map = {}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
72
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
73 for cls in self.app.plugin_loader.getSources():
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
74 if cls.SOURCE_NAME == ('posts/%s' % self.posts_fs):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
75 src_config = {
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
76 'type': 'posts/%s' % self.posts_fs,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
77 'fs_endpoint': 'posts',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
78 'data_type': 'blog'}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
79 self.posts_source = cls(app, 'posts', src_config)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
80 break
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
81 else:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
82 raise Exception("No such posts file-system: " % self.posts_fs)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
83
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
84 def importWebsite(self):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
85 if not os.path.exists(self.path):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
86 raise Exception("No such file: %s" % self.path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
87
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
88 try:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
89 import xml.etree.ElementTree as ET
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
90 except ImportError:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
91 logger.error("You don't seem to have any support for ElementTree "
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
92 "XML parsing.")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
93 return 1
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
94
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
95 with open(self.path, 'r', encoding='utf8') as fp:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
96 xml = fp.read()
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
97 xml = xml.replace(chr(0x1e), '')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
98 xml = xml.replace(chr(0x10), '')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
99 tree = ET.fromstring(xml)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
100 channel = tree.find('channel')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
101
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
102 # Get basic site information
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
103 title = find_text(channel, 'title')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
104 description = find_text(channel, 'description')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
105 site_config = {
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
106 'site': {
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
107 'title': title,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
108 'description': description,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
109 'posts_fs': self.posts_fs}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
110 }
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
111 logger.info("Importing '%s'" % title)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
112
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
113 # Get authors' names.
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
114 authors = {}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
115 for a in channel.findall('wp:author', self.ns_wp):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
116 login = find_text(a, 'wp:author_login', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
117 authors[login] = {
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
118 'email': find_text(a, 'wp:author_email', self.ns_wp),
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
119 'display_name': find_text(a, 'wp:author_display_name',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
120 self.ns_wp),
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
121 'first_name': find_text(a, 'wp:author_first_name',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
122 self.ns_wp),
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
123 'last_name': find_text(a, 'wp:author_last_name',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
124 self.ns_wp),
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
125 'author_id': find_text(a, 'wp:author_id',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
126 self.ns_wp)}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
127 site_config['site']['authors'] = authors
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
128
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
129 # Other stuff.
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
130 site_config['site'].update({
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
131 'post_url': '%year%/%month%/%slug%',
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
132 'category_url': 'category/%category%'})
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
133
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
134 logger.info("Generating site configuration...")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
135 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
136 with open(site_config_path, 'w') as fp:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
137 yaml.safe_dump(site_config, fp, default_flow_style=False,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
138 allow_unicode=True)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
139
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
140 # Content.
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
141 for i in channel.findall('item'):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
142 post_type = find_text(i, 'wp:post_type', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
143 if post_type == 'attachment':
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
144 self._createAsset(i)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
145 elif post_type == 'post':
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
146 self._createPost(i)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
147
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
148 self._cat_map = None
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
149 self._author_map = None
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
150
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
151 def _createAsset(self, node):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
152 url = find_text(node, 'wp:attachment_url', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
153 download_asset(self.app, url)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
154
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
155 def _getPageMetadata(self, node):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
156 title = find_text(node, 'title')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
157 creator = find_text(node, 'dc:creator', self.ns_dc)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
158 status = find_text(node, 'wp:status', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
159 post_id = find_text(node, 'wp:post_id', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
160 guid = find_text(node, 'guid')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
161 description = find_text(node, 'description')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
162 # TODO: menu order, parent, password, sticky
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
163
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
164 categories = []
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
165 for c in node.findall('category'):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
166 nicename = str(c.attrib.get('nicename'))
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
167 categories.append(nicename)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
168
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
169 metadata = {
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
170 'title': title,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
171 'author': creator,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
172 'status': status,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
173 'post_id': post_id,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
174 'post_guid': guid,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
175 'description': description,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
176 'categories': categories}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
177
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
178 for m in node.findall('wp:postmeta', self.ns_wp):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
179 key = find_text(m, 'wp:meta_key', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
180 metadata[key] = find_text(m, 'wp:meta_value', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
181
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
182 return metadata
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
183
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
184 def _getPageContents(self, node):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
185 content = find_text(node, 'content:encoded', self.ns_content)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
186 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
187 if not excerpt.strip():
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
188 return content
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
189 return "%s\n\n---excerpt---\n\n%s" % (content, excerpt)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
190
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
191 def _getPageInfo(self, node):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
192 url = find_text(node, 'link')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
193 post_date_str = find_text(node, 'wp:post_date', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
194 post_date = datetime.datetime.strptime(post_date_str,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
195 '%Y-%m-%d %H:%M:%S')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
196 post_name = find_text(node, 'wp:post_name', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
197 return {
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
198 'url': url,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
199 'slug': post_name,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
200 'datetime': post_date}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
201
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
202 def _createPage(self, node):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
203 info = self._getPageInfo(node)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
204 rel_path = os.path.join('pages', info['slug'])
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
205 metadata = self._getPageMetadata(node)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
206 contents = self._getPageContents(node)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
207 create_page(self.app, rel_path, metadata, contents)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
208
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
209 def _createPost(self, node):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
210 info = self._getPageInfo(node)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
211 post_dt = info['datetime']
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
212 finder = {
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
213 'year': post_dt.year,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
214 'month': post_dt.month,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
215 'day': post_dt.day,
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
216 'slug': info['slug']}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
217 rel_path, fac_metadata = self.posts_source.findPagePath(
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
218 finder, MODE_CREATING)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
219 rel_path = os.path.join('posts', rel_path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
220 metadata = self._getPageMetadata(node)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
221 contents = self._getPageContents(node)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
222 create_page(self.app, rel_path, metadata, contents)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
223
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
224
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
225 class _SqlImporter(object):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
226 def __init__(self, app, args):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
227 self.app = app
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
228 self.db_url = args.xml_or_db_url
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
229 self.prefix = args.prefix
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
230
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
231 def importWebsite(self):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
232 raise NotImplementedError()
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
233
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
234
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
235 def find_text(parent, child_name, namespaces=None):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
236 return str(parent.find(child_name, namespaces).text)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
237