0
|
1 import os
|
|
2 import os.path
|
|
3 import re
|
|
4 import string
|
|
5
|
|
6
|
|
7 class PageNotFoundError(Exception):
|
|
8 """ An error raised when no physical file
|
|
9 is found for a given URL.
|
|
10 """
|
|
11 pass
|
|
12
|
|
13
|
|
14 class FileSystem(object):
|
|
15 """ A class responsible for mapping page URLs to
|
|
16 file-system paths, and for scanning the file-system
|
|
17 to list existing pages.
|
|
18 """
|
|
19 def __init__(self, root):
|
|
20 self.root = root
|
|
21 self.excluded = []
|
|
22
|
|
23 def getPageNames(self, subdir=None):
|
|
24 basepath = self.root
|
|
25 if subdir is not None:
|
|
26 basepath = self.getPhysicalNamespacePath(subdir)
|
|
27
|
|
28 for dirpath, dirnames, filenames in os.walk(basepath):
|
|
29 dirnames[:] = [d for d in dirnames if os.path.join(dirpath, d) not in self.excluded]
|
|
30 for filename in filenames:
|
|
31 path = os.path.join(dirpath, filename)
|
|
32 path_split = os.path.splitext(os.path.relpath(path, self.root))
|
|
33 if path_split[1] != '':
|
|
34 yield path_split[0]
|
|
35
|
|
36 def getPage(self, url):
|
|
37 path = self.getPhysicalPagePath(url)
|
|
38 with open(path, 'r') as f:
|
|
39 content = f.read()
|
|
40 name = os.path.basename(path)
|
|
41 name_split = os.path.splitext(name)
|
|
42 return {
|
|
43 'url': url,
|
|
44 'path': path,
|
|
45 'name': name_split[0],
|
|
46 'ext': name_split[1],
|
|
47 'content': content
|
|
48 }
|
|
49
|
|
50 def getPhysicalNamespacePath(self, url):
|
|
51 return self._getPhysicalPath(url, False)
|
|
52
|
|
53 def getPhysicalPagePath(self, url):
|
|
54 return self._getPhysicalPath(url, True)
|
|
55
|
|
56 def _getPhysicalPath(self, url, is_file):
|
|
57 if string.find(url, '..') >= 0:
|
|
58 raise ValueError("Page URLs can't contain '..': " + url)
|
|
59
|
|
60 # For each "part" in the given URL, find the first
|
|
61 # file-system entry that would get slugified to an
|
|
62 # equal string.
|
|
63 current = self.root
|
|
64 parts = url.lower().split('/')
|
|
65 for i, part in enumerate(parts):
|
|
66 names = os.listdir(current)
|
|
67 for name in names:
|
|
68 name_formatted = re.sub(r'[^A-Za-z0-9_\.\-\(\)]+', '-', name.lower())
|
|
69 if is_file and i == len(parts) - 1:
|
|
70 # If we're looking for a file and this is the last part,
|
|
71 # look for something similar but with an extension.
|
|
72 if re.match("%s\.[a-z]+" % re.escape(part), name_formatted):
|
|
73 current = os.path.join(current, name)
|
|
74 break
|
|
75 else:
|
|
76 if name_formatted == part:
|
|
77 current = os.path.join(current, name)
|
|
78 break
|
|
79 else:
|
|
80 # Failed to find a part of the URL.
|
|
81 raise PageNotFoundError("No such page: " + url)
|
|
82 return current
|
|
83
|