Mercurial > piecrust2
annotate piecrust/importing/wordpress.py @ 314:1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Fri, 27 Mar 2015 20:46:36 -0700 |
parents | 103abb08755e |
children | dd25bd3ce1f9 |
rev | line source |
---|---|
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
1 import os.path |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
2 import logging |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
3 import datetime |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
4 import yaml |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
5 from collections import OrderedDict |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
6 from piecrust import CONFIG_PATH |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
7 from piecrust.configuration import ( |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
8 ConfigurationLoader, ConfigurationDumper, merge_dicts) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
9 from piecrust.importing.base import Importer, create_page, download_asset |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
10 from piecrust.sources.base import MODE_CREATING |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
11 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
12 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
13 logger = logging.getLogger(__name__) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
14 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
15 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
16 class WordpressImporterBase(Importer): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
17 def setupParser(self, parser, app): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
18 parser.add_argument( |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
19 '--pages-source', |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
20 default="pages", |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
21 help="The source to store pages in.") |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
22 parser.add_argument( |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
23 '--posts-source', |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
24 default="posts", |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
25 help="The source to store posts in.") |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
26 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
27 '--default-post-layout', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
28 help="The default layout to use for posts.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
29 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
30 '--default-post-category', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
31 help="The default category to use for posts.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
32 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
33 '--default-page-layout', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
34 help="The default layout to use for pages.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
35 parser.add_argument( |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
36 '--default-page-category', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
37 help="The default category to use for pages.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
38 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
39 def importWebsite(self, app, args): |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
40 impl = self._getImplementation(app, args) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
41 return impl.importWebsite() |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
42 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
43 def _getImplementation(self, app, args): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
44 raise NotImplementedError() |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
45 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
46 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
47 class _ImporterBase(object): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
48 def __init__(self, app, args): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
49 self.app = app |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
50 self._cat_map = {} |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
51 self._author_map = {} |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
52 self._pages_source = app.getSource(args.pages_source) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
53 self._posts_source = app.getSource(args.posts_source) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
54 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
55 def importWebsite(self): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
56 ctx = self._open() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
57 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
58 # Site configuration. |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
59 logger.info("Generating site configuration...") |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
60 site_config = self._getSiteConfig(ctx) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
61 site_config.setdefault('site', {}) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
62 site_config['site'].update({ |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
63 'post_url': '%year%/%month%/%slug%', |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
64 'category_url': 'category/%category%'}) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
65 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
66 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
67 with open(site_config_path, 'r') as fp: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
68 cfg_data = yaml.load(fp, Loader=ConfigurationLoader) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
69 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
70 cfg_data = cfg_data or {} |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
71 merge_dicts(cfg_data, site_config) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
72 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
73 with open(site_config_path, 'w') as fp: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
74 yaml.dump(cfg_data, fp, default_flow_style=False, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
75 allow_unicode=True, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
76 Dumper=ConfigurationDumper) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
77 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
78 # Content |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
79 for p in self._getPosts(ctx): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
80 if p['type'] == 'attachment': |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
81 self._createAsset(p) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
82 else: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
83 self._createPost(p) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
84 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
85 self._close(ctx) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
86 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
87 def _open(self): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
88 raise NotImplementedError() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
89 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
90 def _close(self, ctx): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
91 pass |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
92 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
93 def _getSiteConfig(self, ctx): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
94 raise NotImplementedError() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
95 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
96 def _getPosts(self, ctx): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
97 raise NotImplementedError() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
98 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
99 def _createAsset(self, asset_info): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
100 download_asset(self.app, asset_info['url']) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
101 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
102 def _createPost(self, post_info): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
103 post_dt = post_info['datetime'] |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
104 finder = { |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
105 'year': post_dt.year, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
106 'month': post_dt.month, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
107 'day': post_dt.day, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
108 'slug': post_info['slug']} |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
109 if post_info['type'] == 'post': |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
110 source = self._posts_source |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
111 elif post_info['type'] == 'page': |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
112 source = self._pages_source |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
113 else: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
114 raise Exception("Unknown post type: %s" % post_info['type']) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
115 rel_path, fac_metadata = source.findPagePath(finder, MODE_CREATING) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
116 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
117 metadata = post_info['metadata'].copy() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
118 for name in ['title', 'author', 'status', 'post_id', 'post_guid', |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
119 'description', 'categories']: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
120 val = post_info.get(name) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
121 if val is not None: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
122 metadata[name] = val |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
123 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
124 content = post_info['content'] |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
125 excerpt = post_info['excerpt'] |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
126 text = content |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
127 if excerpt is not None and excerpt.strip() != '': |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
128 text = "%s\n\n---excerpt---\n\n%s" % (content, excerpt) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
129 |
314
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
130 status = metadata.get('status') |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
131 if status == 'publish': |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
132 path = source.resolveRef(rel_path) |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
133 create_page(self.app, path, metadata, text) |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
134 elif status == 'draft': |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
135 filename = '-'.join(metadata['title'].split(' ')) + '.html' |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
136 path = os.path.join(self.app.root_dir, 'drafts', filename) |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
137 create_page(self.app, path, metadata, text) |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
138 else: |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
139 logger.warning("Ignoring post with status: %s" % status) |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
140 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
141 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
142 class _XmlImporter(_ImporterBase): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
143 ns_wp = {'wp': 'http://wordpress.org/export/1.2/'} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
144 ns_dc = {'dc': "http://purl.org/dc/elements/1.1/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
145 ns_excerpt = {'excerpt': "http://wordpress.org/export/1.2/excerpt/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
146 ns_content = {'content': "http://purl.org/rss/1.0/modules/content/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
147 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
148 def __init__(self, app, args): |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
149 super(_XmlImporter, self).__init__(app, args) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
150 self.path = args.xml_path |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
151 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
152 def _open(self): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
153 if not os.path.exists(self.path): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
154 raise Exception("No such file: %s" % self.path) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
155 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
156 try: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
157 import xml.etree.ElementTree as ET |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
158 except ImportError: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
159 logger.error("You don't seem to have any support for ElementTree " |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
160 "XML parsing.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
161 return 1 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
162 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
163 with open(self.path, 'r', encoding='utf8') as fp: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
164 xml = fp.read() |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
165 xml = xml.replace(chr(0x1e), '') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
166 xml = xml.replace(chr(0x10), '') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
167 tree = ET.fromstring(xml) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
168 channel = tree.find('channel') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
169 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
170 return channel |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
171 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
172 def _getSiteConfig(self, channel): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
173 # Get basic site information |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
174 title = find_text(channel, 'title') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
175 description = find_text(channel, 'description') |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
176 site_config = OrderedDict({ |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
177 'site': { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
178 'title': title, |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
179 'description': description} |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
180 }) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
181 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
182 # Get authors' names. |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
183 authors = {} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
184 for a in channel.findall('wp:author', self.ns_wp): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
185 login = find_text(a, 'wp:author_login', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
186 authors[login] = { |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
187 'email': find_text(a, 'wp:author_email', self.ns_wp), |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
188 'display_name': find_text(a, 'wp:author_display_name', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
189 self.ns_wp), |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
190 'first_name': find_text(a, 'wp:author_first_name', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
191 self.ns_wp), |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
192 'last_name': find_text(a, 'wp:author_last_name', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
193 self.ns_wp), |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
194 'author_id': find_text(a, 'wp:author_id', |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
195 self.ns_wp)} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
196 site_config['site']['authors'] = authors |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
197 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
198 return site_config |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
199 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
200 def _getPosts(self, channel): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
201 for i in channel.findall('item'): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
202 post_type = find_text(i, 'wp:post_type', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
203 if post_type == 'attachment': |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
204 yield self._getAssetInfo(i) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
205 else: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
206 yield self._getPostInfo(i) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
207 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
208 def _getAssetInfo(self, node): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
209 url = find_text(node, 'wp:attachment_url', self.ns_wp) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
210 return {'type': 'attachment', 'url': url} |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
211 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
212 def _getPostInfo(self, node): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
213 post_date_str = find_text(node, 'wp:post_date', self.ns_wp) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
214 post_date = datetime.datetime.strptime(post_date_str, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
215 '%Y-%m-%d %H:%M:%S') |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
216 post_name = find_text(node, 'wp:post_name', self.ns_wp) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
217 post_type = find_text(node, 'wp:post_type', self.ns_wp) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
218 post_info = { |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
219 'type': post_type, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
220 'slug': post_name, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
221 'datetime': post_date} |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
222 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
223 title = find_text(node, 'title') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
224 creator = find_text(node, 'dc:creator', self.ns_dc) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
225 status = find_text(node, 'wp:status', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
226 post_id = find_text(node, 'wp:post_id', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
227 guid = find_text(node, 'guid') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
228 description = find_text(node, 'description') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
229 # TODO: menu order, parent, password, sticky |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
230 post_info.update({ |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
231 'title': title, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
232 'author': creator, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
233 'status': status, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
234 'post_id': post_id, |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
235 'post_guid': guid, |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
236 'description': description}) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
237 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
238 categories = [] |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
239 for c in node.findall('category'): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
240 nicename = str(c.attrib.get('nicename')) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
241 categories.append(nicename) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
242 post_info['categories'] = categories |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
243 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
244 metadata = {} |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
245 for m in node.findall('wp:postmeta', self.ns_wp): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
246 key = find_text(m, 'wp:meta_key', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
247 metadata[key] = find_text(m, 'wp:meta_value', self.ns_wp) |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
248 post_info['metadata'] = metadata |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
249 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
250 content = find_text(node, 'content:encoded', self.ns_content) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
251 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt) |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
252 post_info.update({ |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
253 'content': content, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
254 'excerpt': excerpt}) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
255 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
256 return post_info |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
257 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
258 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
259 class WordpressXmlImporter(WordpressImporterBase): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
260 name = 'wordpress-xml' |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
261 description = "Imports a Wordpress blog from an exported XML archive." |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
262 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
263 def setupParser(self, parser, app): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
264 super(WordpressXmlImporter, self).setupParser(parser, app) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
265 parser.add_argument( |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
266 'xml_path', |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
267 help="The path to the exported XML archive file.") |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
268 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
269 def _getImplementation(self, app, args): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
270 return _XmlImporter(app, args) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
271 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
272 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
273 def find_text(parent, child_name, namespaces=None): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
274 return str(parent.find(child_name, namespaces).text) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
275 |