annotate piecrust/importing/wordpress.py @ 1188:a7c43131d871

bake: Fix file write flushing problem with Python 3.8+ Writing the cache files fails in Python 3.8 because it looks like flushing behaviour has changed. We need to explicitly flush. And even then, in very rare occurrences, it looks like it can still run into racing conditions, so we do a very hacky and ugly "retry" loop when fetching cached data :(
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 15 Jun 2021 22:36:23 -0700
parents 4850f8c21b6e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 import os.path
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
2 import logging
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3 import datetime
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 import yaml
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
5 from collections import OrderedDict
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
6 from piecrust import CONFIG_PATH
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
7 from piecrust.configuration import (
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
8 ConfigurationLoader, ConfigurationDumper, merge_dicts)
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9 from piecrust.importing.base import Importer, create_page, download_asset
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12 logger = logging.getLogger(__name__)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
14
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
15 class WordpressImporterBase(Importer):
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
16 def setupParser(self, parser, app):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
17 parser.add_argument(
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
18 '--pages-source',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
19 default="pages",
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
20 help="The source to store pages in.")
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21 parser.add_argument(
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
22 '--posts-source',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
23 default="posts",
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
24 help="The source to store posts in.")
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25 parser.add_argument(
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
26 '--default-post-layout',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
27 help="The default layout to use for posts.")
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
28 parser.add_argument(
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
29 '--default-post-category',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
30 help="The default category to use for posts.")
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
31 parser.add_argument(
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
32 '--default-page-layout',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
33 help="The default layout to use for pages.")
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
34 parser.add_argument(
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
35 '--default-page-category',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
36 help="The default category to use for pages.")
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
37
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
38 def importWebsite(self, app, args):
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
39 impl = self._getImplementation(app, args)
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
40 return impl.importWebsite()
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
41
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
42 def _getImplementation(self, app, args):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
43 raise NotImplementedError()
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
44
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
45
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
46 class _ImporterBase(object):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
47 def __init__(self, app, args):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
48 self.app = app
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
49 self._cat_map = {}
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
50 self._author_map = {}
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
51 self._pages_source = app.getSource(args.pages_source)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
52 self._posts_source = app.getSource(args.posts_source)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
53
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
54 def importWebsite(self):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
55 ctx = self._open()
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
56
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
57 # Site configuration.
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
58 logger.info("Generating site configuration...")
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
59 site_config = self._getSiteConfig(ctx)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
60 site_config.setdefault('site', {})
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
61 site_config['site'].update({
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
62 'post_url': '%year%/%month%/%slug%',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
63 'category_url': 'category/%category%'})
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
64
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
65 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
66 with open(site_config_path, 'r') as fp:
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
67 cfg_data = yaml.load(fp, Loader=ConfigurationLoader)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
68
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
69 cfg_data = cfg_data or {}
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
70 merge_dicts(cfg_data, site_config)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
71
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
72 with open(site_config_path, 'w') as fp:
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
73 yaml.dump(cfg_data, fp, default_flow_style=False,
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
74 allow_unicode=True,
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
75 Dumper=ConfigurationDumper)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
76
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
77 # Content
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
78 for p in self._getPosts(ctx):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
79 if p['type'] == 'attachment':
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
80 self._createAsset(p)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
81 else:
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
82 self._createPost(p)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
83
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
84 self._close(ctx)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
85
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
86 def _open(self):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
87 raise NotImplementedError()
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
88
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
89 def _close(self, ctx):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
90 pass
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
91
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
92 def _getSiteConfig(self, ctx):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
93 raise NotImplementedError()
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
94
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
95 def _getPosts(self, ctx):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
96 raise NotImplementedError()
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
97
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
98 def _createAsset(self, asset_info):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
99 download_asset(self.app, asset_info['url'])
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
100
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
101 def _createPost(self, post_info):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
102 post_dt = post_info['datetime']
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
103 finder = {
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
104 'year': post_dt.year,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
105 'month': post_dt.month,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
106 'day': post_dt.day,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
107 'slug': post_info['slug']}
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
108 if post_info['type'] == 'post':
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
109 source = self._posts_source
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
110 elif post_info['type'] == 'page':
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
111 source = self._pages_source
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
112 else:
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
113 raise Exception("Unknown post type: %s" % post_info['type'])
363
dd25bd3ce1f9 serve: Refactoring and fixes to be able to serve taxonomy pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 314
diff changeset
114 factory = source.findPageFactory(finder, MODE_CREATING)
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
115
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
116 metadata = post_info['metadata'].copy()
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
117 for name in ['title', 'author', 'status', 'post_id', 'post_guid',
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
118 'description', 'categories']:
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
119 val = post_info.get(name)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
120 if val is not None:
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
121 metadata[name] = val
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
122
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
123 content = post_info['content']
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
124 excerpt = post_info['excerpt']
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
125 text = content
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
126 if excerpt is not None and excerpt.strip() != '':
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
127 text = "%s\n\n---excerpt---\n\n%s" % (content, excerpt)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
128
314
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
129 status = metadata.get('status')
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
130 if status == 'publish':
363
dd25bd3ce1f9 serve: Refactoring and fixes to be able to serve taxonomy pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 314
diff changeset
131 path = factory.path
314
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
132 create_page(self.app, path, metadata, text)
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
133 elif status == 'draft':
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
134 filename = '-'.join(metadata['title'].split(' ')) + '.html'
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
135 path = os.path.join(self.app.root_dir, 'drafts', filename)
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
136 create_page(self.app, path, metadata, text)
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
137 else:
1ddd18ad5e76 import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents: 302
diff changeset
138 logger.warning("Ignoring post with status: %s" % status)
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
139
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
140
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
141 class _XmlImporter(_ImporterBase):
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
142 ns_wp = {'wp': 'http://wordpress.org/export/1.2/'}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
143 ns_dc = {'dc': "http://purl.org/dc/elements/1.1/"}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
144 ns_excerpt = {'excerpt': "http://wordpress.org/export/1.2/excerpt/"}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
145 ns_content = {'content': "http://purl.org/rss/1.0/modules/content/"}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
146
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
147 def __init__(self, app, args):
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
148 super(_XmlImporter, self).__init__(app, args)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
149 self.path = args.xml_path
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
150
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
151 def _open(self):
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
152 if not os.path.exists(self.path):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
153 raise Exception("No such file: %s" % self.path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
154
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
155 try:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
156 import xml.etree.ElementTree as ET
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
157 except ImportError:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
158 logger.error("You don't seem to have any support for ElementTree "
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
159 "XML parsing.")
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
160 return 1
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
161
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
162 with open(self.path, 'r', encoding='utf8') as fp:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
163 xml = fp.read()
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
164 xml = xml.replace(chr(0x1e), '')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
165 xml = xml.replace(chr(0x10), '')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
166 tree = ET.fromstring(xml)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
167 channel = tree.find('channel')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
168
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
169 return channel
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
170
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
171 def _getSiteConfig(self, channel):
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
172 # Get basic site information
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
173 title = find_text(channel, 'title')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
174 description = find_text(channel, 'description')
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
175 site_config = OrderedDict({
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
176 'site': {
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
177 'title': title,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
178 'description': description}
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
179 })
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
180
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
181 # Get authors' names.
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
182 authors = {}
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
183 for a in channel.findall('wp:author', self.ns_wp):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
184 login = find_text(a, 'wp:author_login', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
185 authors[login] = {
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
186 'email': find_text(a, 'wp:author_email', self.ns_wp),
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
187 'display_name': find_text(a, 'wp:author_display_name',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
188 self.ns_wp),
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
189 'first_name': find_text(a, 'wp:author_first_name',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
190 self.ns_wp),
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
191 'last_name': find_text(a, 'wp:author_last_name',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
192 self.ns_wp),
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
193 'author_id': find_text(a, 'wp:author_id',
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
194 self.ns_wp)}
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
195 site_config['site']['authors'] = authors
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
196
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
197 return site_config
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
198
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
199 def _getPosts(self, channel):
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
200 for i in channel.findall('item'):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
201 post_type = find_text(i, 'wp:post_type', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
202 if post_type == 'attachment':
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
203 yield self._getAssetInfo(i)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
204 else:
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
205 yield self._getPostInfo(i)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
206
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
207 def _getAssetInfo(self, node):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
208 url = find_text(node, 'wp:attachment_url', self.ns_wp)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
209 return {'type': 'attachment', 'url': url}
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
210
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
211 def _getPostInfo(self, node):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
212 post_date_str = find_text(node, 'wp:post_date', self.ns_wp)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
213 post_date = datetime.datetime.strptime(post_date_str,
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
214 '%Y-%m-%d %H:%M:%S')
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
215 post_name = find_text(node, 'wp:post_name', self.ns_wp)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
216 post_type = find_text(node, 'wp:post_type', self.ns_wp)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
217 post_info = {
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
218 'type': post_type,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
219 'slug': post_name,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
220 'datetime': post_date}
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
221
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
222 title = find_text(node, 'title')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
223 creator = find_text(node, 'dc:creator', self.ns_dc)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
224 status = find_text(node, 'wp:status', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
225 post_id = find_text(node, 'wp:post_id', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
226 guid = find_text(node, 'guid')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
227 description = find_text(node, 'description')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
228 # TODO: menu order, parent, password, sticky
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
229 post_info.update({
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
230 'title': title,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
231 'author': creator,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
232 'status': status,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
233 'post_id': post_id,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
234 'post_guid': guid,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
235 'description': description})
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
236
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
237 categories = []
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
238 for c in node.findall('category'):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
239 nicename = str(c.attrib.get('nicename'))
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
240 categories.append(nicename)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
241 post_info['categories'] = categories
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
242
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
243 metadata = {}
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
244 for m in node.findall('wp:postmeta', self.ns_wp):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
245 key = find_text(m, 'wp:meta_key', self.ns_wp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
246 metadata[key] = find_text(m, 'wp:meta_value', self.ns_wp)
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
247 post_info['metadata'] = metadata
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
248
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
249 content = find_text(node, 'content:encoded', self.ns_content)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
250 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt)
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
251 post_info.update({
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
252 'content': content,
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 363
diff changeset
253 'excerpt': excerpt})
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
254
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
255 return post_info
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
256
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
257
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
258 class WordpressXmlImporter(WordpressImporterBase):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
259 name = 'wordpress-xml'
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
260 description = "Imports a Wordpress blog from an exported XML archive."
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
261
302
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
262 def setupParser(self, parser, app):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
263 super(WordpressXmlImporter, self).setupParser(parser, app)
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
264 parser.add_argument(
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
265 'xml_path',
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
266 help="The path to the exported XML archive file.")
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
267
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
268 def _getImplementation(self, app, args):
103abb08755e import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
269 return _XmlImporter(app, args)
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
270
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
271
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
272 def find_text(parent, child_name, namespaces=None):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
273 return str(parent.find(child_name, namespaces).text)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
274