changeset 427:bbe048e682ec

core: Be nice and handle case-sensitivity when linking to pages. Now making a wiki link to an existing page without using the correct casing will still work.
author Ludovic Chabant <ludovic@chabant.com>
date Thu, 30 Mar 2017 08:21:23 -0700
parents bf65fba2854c
children 58f3120f7e78
files wikked/db/base.py wikked/db/sql.py wikked/resolver.py wikked/utils.py
diffstat 4 files changed, 37 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/wikked/db/base.py	Tue Mar 28 21:25:00 2017 -0700
+++ b/wikked/db/base.py	Thu Mar 30 08:21:23 2017 -0700
@@ -78,10 +78,15 @@
         """ Invalidates resolved information for pages in the wiki. """
         pass
 
-    def pageExists(self, url=None, path=None):
+    def pageExists(self, url):
         """ Returns whether a given page exists. """
         raise NotImplementedError()
 
+    def validateUrl(self, url):
+        """ Returns the proper URL, given a URL with potentially different
+            casing, or `None` if that URL doesn't exist. """
+        raise NotImplementedError()
+
     def getLinksTo(self, url):
         """ Gets the list of links to a given page. """
         raise NotImplementedError()
--- a/wikked/db/sql.py	Tue Mar 28 21:25:00 2017 -0700
+++ b/wikked/db/sql.py	Thu Mar 30 08:21:23 2017 -0700
@@ -18,7 +18,7 @@
 from sqlalchemy.orm.session import Session
 from wikked.db.base import Database, PageListNotFound, NoWantedPages
 from wikked.page import Page, PageData, FileSystemPage, WantedPage
-from wikked.utils import split_page_url
+from wikked.utils import split_page_url, lower_url
 
 
 logger = logging.getLogger(__name__)
@@ -36,6 +36,7 @@
     # this length (but it's good because it makes those 2 columns short enough
     # to be indexable by SQL).
     url = Column(String(260), unique=True)
+    url_ci = Column(String(260), unique=True)
     path = Column(String(260), unique=True)
     endpoint = Column(String(64))
     title = Column(UnicodeText)
@@ -240,7 +241,7 @@
 class SQLDatabase(Database):
     """ A database cache based on SQL.
     """
-    schema_version = 9
+    schema_version = 10
 
     def __init__(self, config):
         Database.__init__(self)
@@ -520,11 +521,21 @@
         logger.debug("Uncaching: %s" % ', '.join(uncached_urls))
         self.session.commit()
 
-    def pageExists(self, url=None, path=None):
-        q = self.session.query(SQLPage.id, SQLPage.url).filter_by(url=url)
+    def pageExists(self, url):
+        l = lower_url(url)
+        q = self.session.query(SQLPage.id, SQLPage.url_ci).filter_by(url_ci=l)
         res = self.session.query(q.exists())
         return res.scalar()
 
+    def validateUrl(self, url):
+        l = lower_url(url)
+        try:
+            q = self.session.query(SQLPage.id, SQLPage.url, SQLPage.url_ci).\
+                filter_by(url_ci=l).one()
+            return q.url
+        except NoResultFound:
+            return None
+
     def getLinksTo(self, url):
         q = self.session.query(SQLReadyLink).\
                 filter(SQLReadyLink.target_url == url).\
@@ -598,6 +609,7 @@
         po = SQLPage()
         po.cache_time = datetime.datetime.now()
         po.url = page.url
+        po.url_ci = lower_url(page.url)
         po.endpoint, _ = split_page_url(page.url)
         po.path = page.path
         po.title = page.title
--- a/wikked/resolver.py	Tue Mar 28 21:25:00 2017 -0700
+++ b/wikked/resolver.py	Thu Mar 30 08:21:23 2017 -0700
@@ -213,13 +213,18 @@
                 raw_url = m.group('url')
                 is_edit = bool(m.group('isedit'))
                 url = self.ctx.getAbsoluteUrl(raw_url)
+                validated_url = self.wiki.db.validateUrl(url)
+                if validated_url:
+                    url = validated_url
                 self.output.out_links.append(url)
                 action = 'edit' if is_edit else 'read'
                 quoted_url = urllib.parse.quote(url.encode('utf-8'))
-                if self.wiki.pageExists(url):
+
+                if validated_url:
                     actual_url = '/%s/%s' % (action, quoted_url.lstrip('/'))
                     return ('<a class="wiki-link" data-wiki-url="%s" '
                             'href="%s"' % (quoted_url, actual_url))
+
                 actual_url = '/%s/%s' % (action, quoted_url.lstrip('/'))
                 return ('<a class="wiki-link missing" data-wiki-url="%s" '
                         'href="%s"' % (quoted_url, actual_url))
--- a/wikked/utils.py	Tue Mar 28 21:25:00 2017 -0700
+++ b/wikked/utils.py	Thu Mar 30 08:21:23 2017 -0700
@@ -1,7 +1,10 @@
 import re
 import os
 import os.path
-import urllib.request, urllib.parse, urllib.error
+import urllib.error
+import urllib.parse
+import urllib.request
+import unicodedata
 from xml.sax.saxutils import escape, unescape
 
 
@@ -92,6 +95,10 @@
     return (endpoint, path)
 
 
+def lower_url(url):
+    return unicodedata.normalize("NFKD", url.casefold())
+
+
 def get_meta_name_and_modifiers(name):
     """ Strips a meta name from any leading modifiers like `__` or `+`
         and returns both as a tuple. If no modifier was found, the
@@ -123,10 +130,10 @@
 html_escape_table = {'"': "&quot;", "'": "&apos;"}
 html_unescape_table = {v: k for k, v in list(html_escape_table.items())}
 
+
 def html_escape(text):
     return escape(text, html_escape_table)
 
 
 def html_unescape(text):
     return unescape(text, html_unescape_table)
-