view wikked/fs.py @ 132:e5d4b61e7a4c

Fixed file-system parsing for Windows.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 03 Dec 2013 09:46:30 -0800
parents 9d22cf4d2412
children d29007463b70
line wrap: on
line source

import os
import os.path
import re
import string
import codecs
import fnmatch
import logging
import itertools
from utils import PageNotFoundError, NamespaceNotFoundError


META_ENDPOINT = '_meta'


logger = logging.getLogger(__name__)


class PageInfo(object):
    def __init__(self, url, path):
        self.url = url
        self.path = path
        self._content = None

    @property
    def content(self):
        if self._content is None:
            with codecs.open(self.path, 'r', encoding='utf-8') as f:
                self._content = f.read()
        return self._content


class FileSystem(object):
    """ A class responsible for mapping page URLs to
        file-system paths, and for scanning the file-system
        to list existing pages.
    """
    def __init__(self, root):
        self.root = unicode(root)

        self.excluded = []
        self.page_extensions = None
        self.default_extension = '.txt'

    def initFs(self, wiki):
        self.page_extensions = list(set(
            itertools.chain(*wiki.formatters.itervalues())))

        self.excluded += wiki.parameters.getSpecialFilenames()
        self.excluded += wiki.scm.getSpecialFilenames()

        self.default_extension = wiki.config.get('wiki', 'default_extension')

    def getPageInfos(self, subdir=None):
        basepath = self.root
        if subdir is not None:
            basepath = self.getPhysicalNamespacePath(subdir)

        for dirpath, dirnames, filenames in os.walk(basepath):
            dirnames[:] = [d for d in dirnames if os.path.join(dirpath, d) not in self.excluded]
            for filename in filenames:
                path = os.path.join(dirpath, filename)
                page_info = self.getPageInfo(path)
                if page_info is not None:
                    yield page_info

    def getPageInfo(self, path):
        if not isinstance(path, unicode):
            path = unicode(path)
        for e in self.excluded:
            if fnmatch.fnmatch(path, e):
                return None
        return self._getPageInfo(path)

    def getPage(self, url):
        path = self.getPhysicalPagePath(url)
        return PageInfo(url, path)

    def setPage(self, url, content):
        path = self.getPhysicalPagePath(url, make_new=True)
        logger.debug("Saving page '%s' to: %s" % (url, path))
        with codecs.open(path, 'w', encoding='utf-8') as f:
            f.write(content)
        return PageInfo(url, path)

    def pageExists(self, url):
        try:
            self.getPhysicalPagePath(url)
            return True
        except PageNotFoundError:
            return False

    def getPhysicalPagePath(self, url, make_new=False):
        return self._getPhysicalPath(url, is_file=True, make_new=make_new)

    def getPhysicalNamespacePath(self, url, make_new=False):
        return self._getPhysicalPath(url, is_file=False, make_new=make_new)

    def _getPageInfo(self, path):
        meta = None
        rel_path = os.path.relpath(path, self.root)
        if rel_path.startswith(META_ENDPOINT + os.sep):
            rel_path = rel_path[len(META_ENDPOINT) + 1:]
            meta, rel_path = rel_path.split(os.sep, 1)
        rel_path_split = os.path.splitext(rel_path)
        ext = rel_path_split[1].lstrip('.')
        name = rel_path_split[0].replace(os.sep, '/')
        if len(ext) == 0:
            return None
        if self.page_extensions is not None and ext not in self.page_extensions:
            return None

        url = '/' + name
        if meta:
            url = u"%s:/%s" % (meta.lower(), name)
        return PageInfo(url, path)

    def _getPhysicalPath(self, url, is_file=True, make_new=False):
        endpoint = None
        m = re.match(r'(\w[\w\d]+)\:(.*)', url)
        if m:
            endpoint = str(m.group(1))
            url = str(m.group(2)).strip()

        if url[0] != '/':
            raise ValueError("Page URLs need to be absolute: " + url)
        if string.find(url, '..') >= 0:
            raise ValueError("Page URLs can't contain '..': " + url)

        # Find the root directory in which we'll be searching for the
        # page file.
        root = self.root
        if endpoint:
            root = os.path.join(self.root, META_ENDPOINT, endpoint)

        # Make the URL into a relative file-system path.
        url_path = url[1:].replace('/', os.sep)

        # If we want a non-existing file's path, just build that.
        if make_new:
            return os.path.join(root, url_path + '.' + self.default_extension)

        # Find the right file-system entry for this URL.
        url_path = os.path.join(root, url_path)
        if is_file:
            dirname, basename = os.path.split(url_path)
            if not os.path.isdir(dirname):
                self._throwNotFoundError(url, root, is_file)
            filenames = os.listdir(dirname)
            for filename in filenames:
                name, ext = os.path.splitext(filename)
                if name == basename:
                    return os.path.join(dirname, filename)
            self._throwNotFoundError(url, root, is_file)
        else:
            if os.path.isdir(url_path):
                return url_path
            self._throwNotFoundError(url, root, is_file)

    def _throwNotFoundError(self, url, searched, is_file):
        if is_file:
            raise PageNotFoundError("No such page '%s' in: %s" % (url, searched))
        else:
            raise NamespaceNotFoundError("No such namespace '%s' in: %s" % (url, searched))