Mercurial > piecrust2
annotate piecrust/importing/wordpress.py @ 300:2daa05a21026
import: Add an XML-based Wordpress importer.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 11 Mar 2015 23:48:35 -0700 |
parents | |
children | 103abb08755e |
rev | line source |
---|---|
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
1 import os.path |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
2 import logging |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
3 import datetime |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
4 import yaml |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
5 from urllib.parse import urlparse |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
6 from piecrust import CONFIG_PATH |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
7 from piecrust.importing.base import Importer, create_page, download_asset |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
8 from piecrust.sources.base import MODE_CREATING |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
9 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
10 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
11 logger = logging.getLogger(__name__) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
12 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
13 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
14 class WordpressImporter(Importer): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
15 name = 'wordpress' |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
16 description = "Imports a Wordpress blog." |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
17 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
18 def setupParser(self, parser, app): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
19 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
20 '--posts_fs', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
21 default="hierarchy", |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
22 choices=['flat', 'shallow', 'hierarchy'], |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
23 help="The blog file-system type to use.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
24 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
25 '--prefix', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
26 default="wp_", |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
27 help="The SQL table prefix. Defaults to `wp_`.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
28 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
29 '--default-post-layout', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
30 help="The default layout to use for posts.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
31 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
32 '--default-post-category', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
33 help="The default category to use for posts.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
34 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
35 '--default-page-layout', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
36 help="The default layout to use for pages.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
37 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
38 '--default-page-category', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
39 help="The default category to use for pages.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
40 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
41 'xml_or_db_url', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
42 help=("The exported XML archive of the Wordpress site, or " |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
43 "the URL of the SQL database.\n" |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
44 "\n" |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
45 "If an SQL database URL, it should be of the " |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
46 "form: type://user:password@server/database\n" |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
47 "\n" |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
48 "For example:\n" |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
49 "mysql://user:password@example.org/my_database")) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
50 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
51 def importWebsite(self, app, args): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
52 parsed_url = urlparse(args.xml_or_db_url) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
53 if not parsed_url.scheme: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
54 impl = _XmlImporter(app, args) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
55 else: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
56 impl = _SqlImporter(app, args) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
57 return impl.importWebsite() |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
58 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
59 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
60 class _XmlImporter(object): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
61 ns_wp = {'wp': 'http://wordpress.org/export/1.2/'} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
62 ns_dc = {'dc': "http://purl.org/dc/elements/1.1/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
63 ns_excerpt = {'excerpt': "http://wordpress.org/export/1.2/excerpt/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
64 ns_content = {'content': "http://purl.org/rss/1.0/modules/content/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
65 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
66 def __init__(self, app, args): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
67 self.app = app |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
68 self.path = args.xml_or_db_url |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
69 self.posts_fs = args.posts_fs |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
70 self._cat_map = {} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
71 self._author_map = {} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
72 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
73 for cls in self.app.plugin_loader.getSources(): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
74 if cls.SOURCE_NAME == ('posts/%s' % self.posts_fs): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
75 src_config = { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
76 'type': 'posts/%s' % self.posts_fs, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
77 'fs_endpoint': 'posts', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
78 'data_type': 'blog'} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
79 self.posts_source = cls(app, 'posts', src_config) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
80 break |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
81 else: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
82 raise Exception("No such posts file-system: " % self.posts_fs) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
83 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
84 def importWebsite(self): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
85 if not os.path.exists(self.path): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
86 raise Exception("No such file: %s" % self.path) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
87 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
88 try: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
89 import xml.etree.ElementTree as ET |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
90 except ImportError: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
91 logger.error("You don't seem to have any support for ElementTree " |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
92 "XML parsing.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
93 return 1 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
94 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
95 with open(self.path, 'r', encoding='utf8') as fp: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
96 xml = fp.read() |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
97 xml = xml.replace(chr(0x1e), '') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
98 xml = xml.replace(chr(0x10), '') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
99 tree = ET.fromstring(xml) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
100 channel = tree.find('channel') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
101 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
102 # Get basic site information |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
103 title = find_text(channel, 'title') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
104 description = find_text(channel, 'description') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
105 site_config = { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
106 'site': { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
107 'title': title, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
108 'description': description, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
109 'posts_fs': self.posts_fs} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
110 } |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
111 logger.info("Importing '%s'" % title) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
112 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
113 # Get authors' names. |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
114 authors = {} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
115 for a in channel.findall('wp:author', self.ns_wp): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
116 login = find_text(a, 'wp:author_login', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
117 authors[login] = { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
118 'email': find_text(a, 'wp:author_email', self.ns_wp), |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
119 'display_name': find_text(a, 'wp:author_display_name', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
120 self.ns_wp), |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
121 'first_name': find_text(a, 'wp:author_first_name', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
122 self.ns_wp), |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
123 'last_name': find_text(a, 'wp:author_last_name', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
124 self.ns_wp), |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
125 'author_id': find_text(a, 'wp:author_id', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
126 self.ns_wp)} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
127 site_config['site']['authors'] = authors |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
128 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
129 # Other stuff. |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
130 site_config['site'].update({ |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
131 'post_url': '%year%/%month%/%slug%', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
132 'category_url': 'category/%category%'}) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
133 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
134 logger.info("Generating site configuration...") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
135 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
136 with open(site_config_path, 'w') as fp: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
137 yaml.safe_dump(site_config, fp, default_flow_style=False, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
138 allow_unicode=True) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
139 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
140 # Content. |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
141 for i in channel.findall('item'): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
142 post_type = find_text(i, 'wp:post_type', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
143 if post_type == 'attachment': |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
144 self._createAsset(i) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
145 elif post_type == 'post': |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
146 self._createPost(i) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
147 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
148 self._cat_map = None |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
149 self._author_map = None |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
150 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
151 def _createAsset(self, node): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
152 url = find_text(node, 'wp:attachment_url', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
153 download_asset(self.app, url) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
154 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
155 def _getPageMetadata(self, node): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
156 title = find_text(node, 'title') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
157 creator = find_text(node, 'dc:creator', self.ns_dc) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
158 status = find_text(node, 'wp:status', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
159 post_id = find_text(node, 'wp:post_id', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
160 guid = find_text(node, 'guid') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
161 description = find_text(node, 'description') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
162 # TODO: menu order, parent, password, sticky |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
163 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
164 categories = [] |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
165 for c in node.findall('category'): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
166 nicename = str(c.attrib.get('nicename')) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
167 categories.append(nicename) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
168 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
169 metadata = { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
170 'title': title, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
171 'author': creator, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
172 'status': status, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
173 'post_id': post_id, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
174 'post_guid': guid, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
175 'description': description, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
176 'categories': categories} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
177 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
178 for m in node.findall('wp:postmeta', self.ns_wp): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
179 key = find_text(m, 'wp:meta_key', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
180 metadata[key] = find_text(m, 'wp:meta_value', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
181 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
182 return metadata |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
183 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
184 def _getPageContents(self, node): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
185 content = find_text(node, 'content:encoded', self.ns_content) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
186 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
187 if not excerpt.strip(): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
188 return content |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
189 return "%s\n\n---excerpt---\n\n%s" % (content, excerpt) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
190 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
191 def _getPageInfo(self, node): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
192 url = find_text(node, 'link') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
193 post_date_str = find_text(node, 'wp:post_date', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
194 post_date = datetime.datetime.strptime(post_date_str, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
195 '%Y-%m-%d %H:%M:%S') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
196 post_name = find_text(node, 'wp:post_name', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
197 return { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
198 'url': url, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
199 'slug': post_name, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
200 'datetime': post_date} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
201 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
202 def _createPage(self, node): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
203 info = self._getPageInfo(node) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
204 rel_path = os.path.join('pages', info['slug']) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
205 metadata = self._getPageMetadata(node) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
206 contents = self._getPageContents(node) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
207 create_page(self.app, rel_path, metadata, contents) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
208 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
209 def _createPost(self, node): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
210 info = self._getPageInfo(node) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
211 post_dt = info['datetime'] |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
212 finder = { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
213 'year': post_dt.year, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
214 'month': post_dt.month, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
215 'day': post_dt.day, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
216 'slug': info['slug']} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
217 rel_path, fac_metadata = self.posts_source.findPagePath( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
218 finder, MODE_CREATING) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
219 rel_path = os.path.join('posts', rel_path) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
220 metadata = self._getPageMetadata(node) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
221 contents = self._getPageContents(node) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
222 create_page(self.app, rel_path, metadata, contents) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
223 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
224 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
225 class _SqlImporter(object): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
226 def __init__(self, app, args): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
227 self.app = app |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
228 self.db_url = args.xml_or_db_url |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
229 self.prefix = args.prefix |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
230 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
231 def importWebsite(self): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
232 raise NotImplementedError() |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
233 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
234 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
235 def find_text(parent, child_name, namespaces=None): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
236 return str(parent.find(child_name, namespaces).text) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
237 |