view wikked/utils.py @ 464:1dc6a0a74da3

wiki: Improve consistency of absolute/relative links. - Make links from endpoint pages go to the same endpoint by default. - Add support for `:` (empty) endpoint to link outside of endpoints. - Add unit tests.
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 06 Oct 2018 19:40:52 -0700
parents 6cd51ea6dfcf
children 71114096433c
line wrap: on
line source

import re
import os
import os.path
import urllib.error
import urllib.parse
import urllib.request
import unicodedata
from xml.sax.saxutils import escape, unescape


re_terminal_path = re.compile(r'[/\\]|(\w\:)')
endpoint_regex = re.compile(r'(\w[\w\d]+)?\:(.*)')
endpoint_prefix_regex = re.compile(r'^(\w[\w\d]+)\:')


class PageNotFoundError(Exception):
    """ An error raised when no physical file
       is found for a given URL.
    """
    def __init__(self, url, message=None, *args):
        Exception.__init__(self, url, message, *args)

    @property
    def url(self):
        return self.args[0]

    def __str__(self):
        url = self.args[0]
        message = self.args[1]
        res = "Can't find page '%s'." % url
        if message:
            res += ' ' + message
        return res


class NamespaceNotFoundError(Exception):
    """ An error raised when no physical directory is found
        for a given URL.
    """
    pass


def find_wiki_root(path=None):
    if not path:
        path = os.getcwd()
    while True:
        if os.path.isfile(os.path.join(path, '.wikirc')):
            return path
        if (os.path.isdir(os.path.join(path, '.git')) or
                os.path.isdir(os.path.join(path, '.hg'))):
            return path
        path = os.path.dirname(path)
        if not path or re_terminal_path.match(path):
            break
    return None


def get_absolute_url(base_url, url, *, force_endpoint=None, quote=False):
    base_endpoint, base_url = split_page_url(base_url)
    if base_url[0] != '/':
        raise ValueError("The base URL must be absolute. Got: %s" % base_url)

    endpoint, url = split_page_url(url)
    if endpoint is None:
        endpoint = force_endpoint
        if endpoint is None:
            endpoint = base_endpoint

    if url.startswith('/'):
        # Absolute page URL.
        abs_url = url
    elif url.startswith('./'):
        # URL wants to be relative to the base url's name, instead
        # of its directory.
        abs_url = base_url + url[1:]
    else:
        # Relative page URL. Let's normalize all `..` in it,
        # which could also replace forward slashes by backslashes
        # on Windows, so we need to convert that back.
        urldir = os.path.dirname(base_url)
        raw_abs_url = os.path.join(urldir, url)
        abs_url = os.path.normpath(raw_abs_url).replace('\\', '/')

    if quote:
        abs_url = urllib.parse.quote(abs_url.encode('utf-8'))
    if endpoint:
        return '%s:%s' % (endpoint, abs_url)
    return abs_url


def get_url_folder(url):
    if url:
        head = os.path.dirname(url)
        if head:
            return head
    return '/'


def is_endpoint_url(url):
    return endpoint_prefix_regex.match(url) is not None


def split_page_url(url):
    m = endpoint_regex.match(url)
    if m is None:
        return (None, url)
    endpoint = m.group(1)
    path = m.group(2)
    return (endpoint, path)


def lower_url(url):
    return unicodedata.normalize("NFKD", url.casefold())


def get_meta_name_and_modifiers(name):
    """ Strips a meta name from any leading modifiers like `__` or `+`
        and returns both as a tuple. If no modifier was found, the
        second tuple value is `None`.
    """
    clean_name = name
    modifiers = None
    if name[:2] == '__':
        modifiers = '__'
        clean_name = name[3:]
    elif name[0] == '+':
        modifiers = '+'
        clean_name = name[1:]
    return (clean_name, modifiers)


def flatten_single_metas(meta):
    items = list(meta.items())
    for k, v in items:
        if isinstance(v, list):
            lv = len(v)
            if lv == 0:
                del meta[k]
            elif lv == 1:
                meta[k] = v[0]
    return meta


html_escape_table = {'"': "&quot;", "'": "&apos;"}
html_unescape_table = {v: k for k, v in list(html_escape_table.items())}


def html_escape(text):
    return escape(text, html_escape_table)


def html_unescape(text):
    return unescape(text, html_unescape_table)