Mercurial > wikked
annotate wikked/fs.py @ 24:8a83b0e91633
The wiki `fs` only scans known file extensions.
Those file extensions are provided by the registered formatters.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Fri, 04 Jan 2013 16:02:46 -0800 |
parents | 67c150d5ed53 |
children | 2b35d719f342 |
rev | line source |
---|---|
0 | 1 import os |
2 import os.path | |
3 import re | |
4 import string | |
24
8a83b0e91633
The wiki `fs` only scans known file extensions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
5 import codecs |
0 | 6 |
7 | |
8 class PageNotFoundError(Exception): | |
9 """ An error raised when no physical file | |
10 is found for a given URL. | |
11 """ | |
12 pass | |
13 | |
14 | |
15 class FileSystem(object): | |
16 """ A class responsible for mapping page URLs to | |
17 file-system paths, and for scanning the file-system | |
18 to list existing pages. | |
19 """ | |
20 def __init__(self, root): | |
21 self.root = root | |
22 self.excluded = [] | |
24
8a83b0e91633
The wiki `fs` only scans known file extensions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
23 self.page_extensions = None |
0 | 24 |
3
59cad6ce1a1c
Added support for history and diffing.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
25 def getPageInfos(self, subdir=None): |
0 | 26 basepath = self.root |
27 if subdir is not None: | |
28 basepath = self.getPhysicalNamespacePath(subdir) | |
29 | |
30 for dirpath, dirnames, filenames in os.walk(basepath): | |
31 dirnames[:] = [d for d in dirnames if os.path.join(dirpath, d) not in self.excluded] | |
32 for filename in filenames: | |
33 path = os.path.join(dirpath, filename) | |
8
793234411100
Fixed Mercurial files incorrectly included as pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
3
diff
changeset
|
34 if path in self.excluded: |
793234411100
Fixed Mercurial files incorrectly included as pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
3
diff
changeset
|
35 continue |
18
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
36 page_info = self.getPageInfo(path) |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
37 if page_info is not None: |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
38 yield page_info |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
39 |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
40 def getPageInfo(self, path): |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
41 for e in self.excluded: |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
42 if path.startswith(e): |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
43 return None |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
44 return self._getPageInfo(path) |
0 | 45 |
46 def getPage(self, url): | |
47 path = self.getPhysicalPagePath(url) | |
24
8a83b0e91633
The wiki `fs` only scans known file extensions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
48 with codecs.open(path, 'r', encoding='utf-8') as f: |
0 | 49 content = f.read() |
50 name = os.path.basename(path) | |
51 name_split = os.path.splitext(name) | |
52 return { | |
53 'url': url, | |
54 'path': path, | |
55 'name': name_split[0], | |
56 'ext': name_split[1], | |
57 'content': content | |
58 } | |
59 | |
3
59cad6ce1a1c
Added support for history and diffing.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
60 def pageExists(self, url): |
59cad6ce1a1c
Added support for history and diffing.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
61 try: |
59cad6ce1a1c
Added support for history and diffing.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
62 self.getPhysicalPagePath(url) |
59cad6ce1a1c
Added support for history and diffing.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
63 return True |
59cad6ce1a1c
Added support for history and diffing.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
64 except PageNotFoundError: |
59cad6ce1a1c
Added support for history and diffing.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
65 return False |
59cad6ce1a1c
Added support for history and diffing.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
66 |
0 | 67 def getPhysicalNamespacePath(self, url): |
68 return self._getPhysicalPath(url, False) | |
69 | |
18
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
70 def _getPageInfo(self, path): |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
71 rel_path = os.path.relpath(path, self.root) |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
72 rel_path_split = os.path.splitext(rel_path) |
24
8a83b0e91633
The wiki `fs` only scans known file extensions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
73 if self.page_extensions is not None and rel_path_split[1] not in self.page_extensions: |
18
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
74 return None |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
75 url = re.sub(r'[^A-Za-z0-9_\.\-\(\)/]+', '-', rel_path_split[0].lower()) |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
76 return { |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
77 'url': url, |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
78 'path': path, |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
79 'name': rel_path_split[0], |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
80 'ext': rel_path_split[1] |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
81 } |
67c150d5ed53
Added ability to get a single page's info from the file-system.
Ludovic Chabant <ludovic@chabant.com>
parents:
8
diff
changeset
|
82 |
0 | 83 def getPhysicalPagePath(self, url): |
84 return self._getPhysicalPath(url, True) | |
85 | |
86 def _getPhysicalPath(self, url, is_file): | |
87 if string.find(url, '..') >= 0: | |
88 raise ValueError("Page URLs can't contain '..': " + url) | |
89 | |
90 # For each "part" in the given URL, find the first | |
91 # file-system entry that would get slugified to an | |
92 # equal string. | |
93 current = self.root | |
94 parts = url.lower().split('/') | |
95 for i, part in enumerate(parts): | |
96 names = os.listdir(current) | |
97 for name in names: | |
98 name_formatted = re.sub(r'[^A-Za-z0-9_\.\-\(\)]+', '-', name.lower()) | |
99 if is_file and i == len(parts) - 1: | |
100 # If we're looking for a file and this is the last part, | |
101 # look for something similar but with an extension. | |
102 if re.match("%s\.[a-z]+" % re.escape(part), name_formatted): | |
103 current = os.path.join(current, name) | |
104 break | |
105 else: | |
106 if name_formatted == part: | |
107 current = os.path.join(current, name) | |
108 break | |
109 else: | |
110 # Failed to find a part of the URL. | |
111 raise PageNotFoundError("No such page: " + url) | |
112 return current | |
113 |