view wikked/wiki.py @ 292:0b0f3486719c

More efficient invalidation of the cache when a page is edited. More efficient auto-reload. Remove `force_resolve` option from the web frontend.
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 27 Sep 2014 22:49:15 -0700
parents 3eef607e62d4
children 6bd9d44fc535
line wrap: on
line source

import os
import os.path
import time
import logging
import importlib
from ConfigParser import SafeConfigParser, NoOptionError
from wikked.page import FileSystemPage
from wikked.fs import FileSystem
from wikked.auth import UserManager
from wikked.scheduler import ResolveScheduler


logger = logging.getLogger(__name__)


def passthrough_formatter(text):
    """ Passthrough formatter. Pretty simple stuff. """
    return text


class InitializationError(Exception):
    """ An exception that can get raised while the wiki gets
        initialized.
    """
    pass


NORMAL_CONTEXT = 0
INIT_CONTEXT = 1
BACKGROUND_CONTEXT = 2


class WikiParameters(object):
    """ An object that defines how a wiki gets initialized.
    """
    def __init__(self, root=None, ctx=NORMAL_CONTEXT):
        if root is None:
            root = os.getcwd()
        self.root = root
        self.context = ctx
        self.formatters = self.getFormatters()
        self.wiki_updater = self.getWikiUpdater()
        self._config = None
        self._index_factory = None
        self._scm_factory = None

    @property
    def config(self):
        if self._config is None:
            self._config = self._loadConfig()
        return self._config

    def fs_factory(self):
        return FileSystem(self.root, self.config)

    def index_factory(self):
        self._ensureIndexFactory()
        return self._index_factory()

    def db_factory(self):
        from wikked.db.sql import SQLDatabase
        return SQLDatabase(self.config)

    def scm_factory(self):
        self._ensureScmFactory()
        return self._scm_factory()

    def auth_factory(self):
        return UserManager(self.config)

    def getFormatters(self):
        formatters = {passthrough_formatter: ['txt', 'html']}
        self.tryAddFormatter(formatters, 'markdown', 'markdown',
                             ['md', 'mdown', 'markdown'])
        self.tryAddFormatter(formatters, 'textile', 'textile',
                             ['tl', 'text', 'textile'])
        self.tryAddFormatter(formatters, 'creole', 'creole2html',
                             ['cr', 'creole'])
        return formatters

    def getSpecialFilenames(self):
        yield '.wikirc'
        yield '.wiki'
        yield '_files'
        if self.config.has_section('ignore'):
            for name, val in self.config.items('ignore'):
                yield val

    def tryAddFormatter(self, formatters, module_name, module_func,
                        extensions):
        try:
            module = importlib.import_module(module_name)
            func = getattr(module, module_func)
            formatters[func] = extensions
        except ImportError:
            pass

    def getWikiUpdater(self):
        return lambda wiki: wiki.updateAll()

    def _loadConfig(self):
        # Merge the default settings with any settings provided by
        # the local config file(s).
        config_path = os.path.join(self.root, '.wikirc')
        local_config_path = os.path.join(self.root, '.wiki', 'wikirc')
        default_config_path = os.path.join(
            os.path.dirname(__file__), 'resources', 'defaults.cfg')

        config = SafeConfigParser()
        config.readfp(open(default_config_path))
        config.set('wiki', 'root', self.root)
        config.read([config_path, local_config_path])
        return config

    def _ensureIndexFactory(self):
        if self._index_factory is None:
            index_type = self.config.get('wiki', 'indexer')
            if index_type == 'whoosh':
                def impl():
                    from wikked.indexer.whooshidx import WhooshWikiIndex
                    return WhooshWikiIndex()
                self._index_factory = impl
            elif index_type == 'elastic':
                def impl():
                    from wikked.indexer.elastic import ElasticWikiIndex
                    return ElasticWikiIndex()
                self._index_factory = impl
            else:
                raise InitializationError("No such indexer: " + index_type)

    def _ensureScmFactory(self):
        if self._scm_factory is None:
            try:
                scm_type = self.config.get('wiki', 'sourcecontrol')
            except NoOptionError:
                # Auto-detect
                if os.path.isdir(os.path.join(self.root, '.hg')):
                    scm_type = 'hg'
                elif os.path.isdir(os.path.join(self.root, '.git')):
                    scm_type = 'git'
                else:
                    # Default to Mercurial. Yes. I just decided that myself.
                    scm_type = 'hg'

            if self.context != NORMAL_CONTEXT and scm_type == 'hg':
                # Quick workaround for when we're creating a new repo,
                # or running background tasks.
                # We'll be using the `hg` process instead of the command
                # server, since there's no repo there yet, or we just don't
                # want to spawn a new process unless we want to.
                logger.debug("Forcing `hgexe` source-control for new repo.")
                scm_type = 'hgexe'

            if scm_type == 'hg':
                def impl():
                    from wikked.scm.mercurial import MercurialCommandServerSourceControl
                    return MercurialCommandServerSourceControl(self.root)
                self._scm_factory = impl

            elif scm_type == 'hgexe':
                def impl():
                    from wikked.scm.mercurial import MercurialSourceControl
                    return MercurialSourceControl(self.root)
                self._scm_factory = impl

            elif scm_type == 'git':
                def impl():
                    from wikked.scm.git import GitLibSourceControl
                    return GitLibSourceControl(self.root)
                self._scm_factory = impl
            else:
                raise InitializationError(
                    "No such source control: " + scm_type)


class EndpointInfo(object):
    def __init__(self, name):
        self.name = name
        self.query = True
        self.default = None


class Wiki(object):
    """ The wiki class! This is where the magic happens.
    """
    def __init__(self, parameters):
        """ Creates a new wiki instance. It won't be fully functional
            until you call `start`, which does the actual initialization.
            This gives you a chance to customize a few more things before
            getting started.
        """
        if parameters is None:
            raise ValueError("No parameters were given to the wiki.")

        self.formatters = parameters.formatters
        self.special_filenames = parameters.getSpecialFilenames()

        self.main_page_url = (
            '/' +
            parameters.config.get('wiki', 'main_page').strip('/'))
        self.templates_url = (
            parameters.config.get('wiki', 'templates_endpoint') +
            ':/')
        self.endpoints = self._createEndpointInfos(parameters.config)

        self.fs = parameters.fs_factory()
        self.index = parameters.index_factory()
        self.db = parameters.db_factory()
        self.scm = parameters.scm_factory()
        self.auth = parameters.auth_factory()

        self._wiki_updater = parameters.wiki_updater

    @property
    def root(self):
        return self.fs.root

    def start(self, update=False):
        """ Properly initializes the wiki and all its sub-systems.
        """
        order = [self.fs, self.scm, self.index, self.db, self.auth]
        for o in order:
            o.start(self)

        if update:
            self.updateAll()

    def init(self):
        """ Creates a new wiki at the specified root directory.
        """
        order = [self.fs, self.scm, self.index, self.db, self.auth]
        for o in order:
            o.init(self)
        self.start()
        for o in order:
            o.postInit()

    def stop(self):
        """ De-initializes the wiki and its sub-systems.
        """
        self.db.close()

    def reset(self):
        """ Clears all the cached data and rebuilds it from scratch.
        """
        logger.info("Resetting wiki data...")
        page_infos = self.fs.getPageInfos()
        factory = lambda pi: FileSystemPage(self, pi)
        self.db.reset(page_infos, factory)
        self.resolve(force=True)
        self.index.reset(self.getPages())

    def resolve(self, only_urls=None, force=False, parallel=False):
        """ Compute the final info (text, meta, links) of all or a subset of
            the pages, and caches it in the DB.
        """
        logger.debug("Resolving pages...")
        if only_urls:
            page_urls = only_urls
        else:
            page_urls = self.db.getPageUrls(uncached_only=(not force))

        num_workers = 4 if parallel else 1
        s = ResolveScheduler(self, page_urls)
        s.run(num_workers)

    def updatePage(self, url=None, path=None):
        """ Completely updates a single page, i.e. read it from the file-system
            and have it fully resolved and cached in the DB.
        """
        if url and path:
            raise Exception("Can't specify both an URL and a path.")
        logger.info("Updating page: %s" % (url or path))
        if path:
            page_info = self.fs.getPageInfo(path)
        else:
            page_info = self.fs.findPageInfo(url)
        self.db.updatePage(page_info)
        self.resolve(only_urls=[page_info.url])
        self.index.updatePage(self.db.getPage(
            page_info.url,
            fields=['url', 'path', 'title', 'text']))

    def updateAll(self):
        """ Completely updates all pages, i.e. read them from the file-system
            and have them fully resolved and cached in the DB.
            This function will check for timestamps to only update pages that
            need it.
        """
        logger.info("Updating all pages...")
        page_infos = self.fs.getPageInfos()
        self.db.updateAll(page_infos)
        self.resolve()
        self.index.updateAll(self.db.getPages(
            fields=['url', 'path', 'title', 'text']))

    def getPageUrls(self, subdir=None):
        """ Returns all the page URLs in the wiki, or in the given
            sub-directory.
        """
        for url in self.db.getPageUrls(subdir):
            yield url

    def getPages(self, subdir=None, meta_query=None,
                 endpoint_only=None, no_endpoint_only=False, fields=None):
        """ Gets all the pages in the wiki, or in the given sub-directory.
        """
        for page in self.db.getPages(subdir=subdir, meta_query=meta_query,
                endpoint_only=endpoint_only, no_endpoint_only=no_endpoint_only,
                fields=fields):
            yield page

    def getPage(self, url, fields=None):
        """ Gets the page for a given URL.
        """
        return self.db.getPage(url, fields=fields)

    def setPage(self, url, page_fields):
        """ Updates or creates a page for a given URL.
        """
        # Validate the parameters.
        if 'text' not in page_fields:
            raise ValueError(
                "No text specified for editing page '%s'." % url)
        if 'author' not in page_fields:
            raise ValueError(
                "No author specified for editing page '%s'." % url)
        if 'message' not in page_fields:
            raise ValueError(
                "No commit message specified for editing page '%s'." % url)

        # Save the new/modified text.
        page_info = self.fs.setPage(url, page_fields['text'])

        # Commit the file to the source-control.
        commit_meta = {
            'author': page_fields['author'],
            'message': page_fields['message']}
        self.scm.commit([page_info.path], commit_meta)

        # Update the DB and index with the new/modified page.
        self.updatePage(path=page_info.path)

        # Update all the other pages.
        self._wiki_updater(self)

    def revertPage(self, url, page_fields):
        """ Reverts the page with the given URL to an older revision.
        """
        # Validate the parameters.
        if 'rev' not in page_fields:
            raise ValueError(
                "No revision specified for reverting page '%s'." % url)
        if 'author' not in page_fields:
            raise ValueError(
                "No author specified for reverting page '%s'." % url)
        if 'message' not in page_fields:
            raise ValueError(
                "No commit message specified for reverting page '%s'." % url)

        # Get the revision.
        path = self.fs.getPhysicalPagePath(url)
        rev_text = self.scm.getRevision(path, page_fields['rev'])

        # Write to the file and commit.
        self.fs.setPage(url, rev_text)

        # Commit to source-control.
        commit_meta = {
            'author': page_fields['author'],
            'message': page_fields['message']}
        self.scm.commit([path], commit_meta)

        # Update the DB and index with the modified page.
        self.updatePage(url)

        # Update all the other pages.
        self._wiki_updater(self)

    def pageExists(self, url):
        """ Returns whether a page exists at the given URL.
        """
        return self.db.pageExists(url)

    def getHistory(self, limit=10, after_rev=None):
        """ Shorthand method to get the history from the source-control.
        """
        return self.scm.getHistory(limit=limit, after_rev=after_rev)

    def getSpecialFilenames(self):
        return self.special_filenames

    def _createEndpointInfos(self, config):
        endpoints = {}
        sections = [s for s in config.sections() if s.startswith('endpoint:')]
        for s in sections:
            ep = EndpointInfo(s[9:])   # 9 = len('endpoint:')
            if config.has_option(s, 'query'):
                ep.query = config.getboolean(s, 'query')
            if config.has_option(s, 'default'):
                ep.default = config.get(s, 'default')
            endpoints[ep.name] = ep
        return endpoints

    def _setupPostSetPageUpdater(self, async):
        if async:
            logger.debug("Setting up asynchronous updater.")
            from tasks import update_wiki
            self._postSetPageUpdate = lambda wiki: update_wiki.delay(self.root)
        else:
            logger.debug("Setting up simple updater.")
            self._postSetPageUpdate = lambda wiki: wiki._simplePostSetPageUpdate()

    def _simpleWikiUpdater(self):
        page_urls = self.db.getPageUrls(uncached_only=True)
        self.resolve(only_urls=page_urls)
        pages = [self.db.getPage(url=pu,
                                 fields=['url', 'path', 'title', 'text'])
                 for pu in page_urls]
        self.index.updateAll(pages)


def reloader_stat_loop(wiki, interval=1):
    mtimes = {}
    while 1:
        for page_info in wiki.fs.getPageInfos():
            path = page_info['path']
            try:
                mtime = os.stat(path).st_mtime
            except OSError:
                continue

            old_time = mtimes.get(path)
            if old_time is None:
                mtimes[path] = mtime
                continue
            elif mtime > old_time:
                print "Change detected in '%s'." % path
        time.sleep(interval)