Mercurial > wikked
diff wikked/fs.py @ 0:c946f4facfa2
Initial commit.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 10 Dec 2012 22:40:52 -0800 |
parents | |
children | 59cad6ce1a1c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wikked/fs.py Mon Dec 10 22:40:52 2012 -0800 @@ -0,0 +1,83 @@ +import os +import os.path +import re +import string + + +class PageNotFoundError(Exception): + """ An error raised when no physical file + is found for a given URL. + """ + pass + + +class FileSystem(object): + """ A class responsible for mapping page URLs to + file-system paths, and for scanning the file-system + to list existing pages. + """ + def __init__(self, root): + self.root = root + self.excluded = [] + + def getPageNames(self, subdir=None): + basepath = self.root + if subdir is not None: + basepath = self.getPhysicalNamespacePath(subdir) + + for dirpath, dirnames, filenames in os.walk(basepath): + dirnames[:] = [d for d in dirnames if os.path.join(dirpath, d) not in self.excluded] + for filename in filenames: + path = os.path.join(dirpath, filename) + path_split = os.path.splitext(os.path.relpath(path, self.root)) + if path_split[1] != '': + yield path_split[0] + + def getPage(self, url): + path = self.getPhysicalPagePath(url) + with open(path, 'r') as f: + content = f.read() + name = os.path.basename(path) + name_split = os.path.splitext(name) + return { + 'url': url, + 'path': path, + 'name': name_split[0], + 'ext': name_split[1], + 'content': content + } + + def getPhysicalNamespacePath(self, url): + return self._getPhysicalPath(url, False) + + def getPhysicalPagePath(self, url): + return self._getPhysicalPath(url, True) + + def _getPhysicalPath(self, url, is_file): + if string.find(url, '..') >= 0: + raise ValueError("Page URLs can't contain '..': " + url) + + # For each "part" in the given URL, find the first + # file-system entry that would get slugified to an + # equal string. + current = self.root + parts = url.lower().split('/') + for i, part in enumerate(parts): + names = os.listdir(current) + for name in names: + name_formatted = re.sub(r'[^A-Za-z0-9_\.\-\(\)]+', '-', name.lower()) + if is_file and i == len(parts) - 1: + # If we're looking for a file and this is the last part, + # look for something similar but with an extension. + if re.match("%s\.[a-z]+" % re.escape(part), name_formatted): + current = os.path.join(current, name) + break + else: + if name_formatted == part: + current = os.path.join(current, name) + break + else: + # Failed to find a part of the URL. + raise PageNotFoundError("No such page: " + url) + return current +