changeset 425:e28f2c76691c

web: Add "broken links" and "wanted pages" lists. * Broken links relies on usual page list stuff. * Wanted pages relies on new DB tables and information.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 28 Mar 2017 21:24:44 -0700
parents 1066d0986082
children bf65fba2854c
files wikked/db/base.py wikked/db/sql.py wikked/page.py wikked/templates/special-broken-links.html wikked/templates/special-wanted-pages.html wikked/views/special.py wikked/webimpl/__init__.py wikked/webimpl/edit.py wikked/webimpl/special.py
diffstat 9 files changed, 297 insertions(+), 87 deletions(-) [+]
line wrap: on
line diff
--- a/wikked/db/base.py	Fri Mar 24 16:46:37 2017 -0700
+++ b/wikked/db/base.py	Tue Mar 28 21:24:44 2017 -0700
@@ -7,6 +7,10 @@
                 "No such page list: %s" % list_name)
 
 
+class NoWantedPages(Exception):
+    pass
+
+
 class Database(object):
     """ The base class for a database cache.
     """
@@ -88,6 +92,12 @@
     def _getPageByPath(self, path, fields):
         raise NotImplementedError()
 
+    def saveWantedPages(self, wanted_pages):
+        pass
+
+    def getWantedPages(self):
+        raise NotImplementedError()
+
     def addPageList(self, list_name, pages):
         pass
 
@@ -105,4 +115,3 @@
 
     def removeAllPageLists(self):
         pass
-
--- a/wikked/db/sql.py	Fri Mar 24 16:46:37 2017 -0700
+++ b/wikked/db/sql.py	Tue Mar 28 21:24:44 2017 -0700
@@ -16,8 +16,8 @@
     Load)
 from sqlalchemy.orm.exc import NoResultFound
 from sqlalchemy.orm.session import Session
-from wikked.db.base import Database, PageListNotFound
-from wikked.page import Page, PageData, FileSystemPage
+from wikked.db.base import Database, PageListNotFound, NoWantedPages
+from wikked.page import Page, PageData, FileSystemPage, WantedPage
 from wikked.utils import split_page_url
 
 
@@ -127,6 +127,16 @@
     time_value = Column(DateTime)
 
 
+class SQLWantedPage(Base):
+    __tablename__ = 'wanted_pages'
+
+    id = Column(Integer, primary_key=True)
+    url = Column(Text)
+    wanted_by_id = Column(Integer, ForeignKey('pages.id'))
+
+    wanted_by = relationship('SQLPage')
+
+
 class SQLPageList(Base):
     __tablename__ = 'page_lists'
 
@@ -147,8 +157,7 @@
     list_id = Column(Integer, ForeignKey('page_lists.id'))
     page_id = Column(Integer, ForeignKey('pages.id'))
 
-    page = relationship(
-            'SQLPage')
+    page = relationship('SQLPage')
 
 
 class _WikkedSQLSession(Session):
@@ -231,7 +240,7 @@
 class SQLDatabase(Database):
     """ A database cache based on SQL.
     """
-    schema_version = 8
+    schema_version = 9
 
     def __init__(self, config):
         Database.__init__(self)
@@ -309,6 +318,12 @@
         ver.name = 'schema_version'
         ver.int_value = self.schema_version
         self.session.add(ver)
+
+        wanted_valid = SQLInfo()
+        wanted_valid.name = 'wanted_pages_is_valid'
+        wanted_valid.int_value = 0
+        self.session.add(wanted_valid)
+
         self.session.commit()
 
     def _getSchemaVersion(self):
@@ -496,7 +511,7 @@
         if except_url:
             q = q.filter(SQLPage.url != except_url)
         if only_required:
-            q = q.filter(SQLPage.needs_invalidate == True)
+            q = q.filter(SQLPage.needs_invalidate is True)
 
         uncached_urls = []
         for p in q.all():
@@ -607,6 +622,47 @@
 
         return po
 
+    def saveWantedPages(self, wanted_pages):
+        # Delete previously cached wanted pages.
+        self.session.query(SQLWantedPage).delete()
+
+        for p in wanted_pages:
+            item = SQLWantedPage()
+            item.url = p.url
+            item.wanted_by_id = p.wanted_by._id
+            self.session.add(item)
+
+        valid = self.session.query(SQLInfo)\
+            .filter(SQLInfo.name == 'wanted_pages_is_valid')\
+            .first()
+        if valid is not None:
+            valid.int_value = 1
+        else:
+            valid = SQLInfo()
+            valid.name = 'wanted_pages_is_valid'
+            valid.int_value = 1
+            self.session.add(valid)
+
+        self.session.commit()
+
+    def getWantedPages(self, valid_only=True):
+        if valid_only:
+            valid = self.session.query(SQLInfo)\
+                .filter(SQLInfo.name == 'wanted_pages_is_valid')\
+                .first()
+            if valid is None or valid.int_value != 1:
+                raise NoWantedPages()
+
+        fields = ['url', 'title']
+
+        q = self.session.query(SQLWantedPage)\
+            .join(SQLWantedPage.wanted_by)
+        q = self._addFieldOptions(q, fields, use_load_obj=True)
+
+        for wp in q.all():
+            yield WantedPage(wp.url,
+                             SQLDatabasePage(self, wp.wanted_by, fields))
+
     def addPageList(self, list_name, pages):
         page_list = self.session.query(SQLPageList)\
                 .filter(SQLPageList.list_name == list_name)\
@@ -733,4 +789,3 @@
                 data.ext_links = [l.target_url for l in db_obj.ready_links]
 
         return data
-
--- a/wikked/page.py	Fri Mar 24 16:46:37 2017 -0700
+++ b/wikked/page.py	Tue Mar 28 21:24:44 2017 -0700
@@ -162,3 +162,9 @@
             data.title = data.title[0]
 
         return data
+
+
+class WantedPage:
+    def __init__(self, url, wanted_by):
+        self.url = url
+        self.wanted_by = wanted_by
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wikked/templates/special-broken-links.html	Tue Mar 28 21:24:44 2017 -0700
@@ -0,0 +1,8 @@
+{% extends "special-pagelist.html" %}
+{% block message %}
+<p>Here is a list of pages with at least one broken link in them. A broken link is a link to a missing page.</p>
+{% endblock %}
+{% block empty %}
+<p>No broken links!</p>
+{% endblock %}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wikked/templates/special-wanted-pages.html	Tue Mar 28 21:24:44 2017 -0700
@@ -0,0 +1,26 @@
+{% extends 'index.html' %}
+{% block nav %}{% include 'special-nav.html' %}{% endblock %}
+{% block content %}
+<article>
+    <header>
+        <h1>Wanted Pages</h1>
+    </header>
+    <section>
+        <p>This is a list of pages that are linked to by other pages in this wiki,
+           but which don't exist.</p>
+        {%if wanted_pages%}
+        <ul>
+        {%for p in wanted_pages%}
+            <li><a class="wiki-link missing" href="{{get_edit_url(p.url)}}">{{p.title}}</a> 
+            {%if p.wanted_by%}
+                (wanted by <a class="wiki-link" href="{{get_read_url(p.wanted_by.url)}}">{{p.wanted_by.title}}</a>)
+            {%endif%}
+            </li>
+        {%endfor%}
+        </ul>
+        {%else%}
+        <p>No wanted pages!</p>
+        {%endif%}
+    </section>
+</article>
+{% endblock %}
--- a/wikked/views/special.py	Fri Mar 24 16:46:37 2017 -0700
+++ b/wikked/views/special.py	Tue Mar 28 21:24:44 2017 -0700
@@ -6,69 +6,84 @@
 from wikked.web import app, get_wiki
 from wikked.webimpl.special import (
         get_orphans, get_broken_redirects, get_double_redirects,
-        get_dead_ends)
+        get_dead_ends, get_broken_links, get_wanted_pages)
 
 
 special_sections = [
-        {
-            'name': 'wiki',
-            'title': 'Wiki'
-            },
-        {
-            'name': 'lists',
-            'title': 'Page Lists'
-            },
-        {
-            'name': 'users',
-            'title': 'Users'
-            }
-        ]
+    {
+        'name': 'wiki',
+        'title': 'Wiki'
+    },
+    {
+        'name': 'lists',
+        'title': 'Page Lists'
+    },
+    {
+        'name': 'users',
+        'title': 'Users'
+    }
+]
 
 special_pages = {
-        'changes': {
-            "title": "Recent Changes",
-            "view": 'site_history',
-            "description": "See all changes in the wiki.",
-            "section": "wiki",
-            },
-        'orphans': {
-            "title": "Orphaned Pages",
-            "view": 'special_list_orphans',
-            "description": ("Lists pages in the wiki that have no "
-                            "links to them."),
-            "section": "lists",
-            "template": "special-orphans.html"
-            },
-        'broken-redirects': {
-            "title": "Broken Redirects",
-            "view": 'special_list_broken_redirects',
-            "description": ("Lists pages that redirect to a missing "
-                            "page."),
-            "section": "lists",
-            "template": "special-broken-redirects.html"
-            },
-        'double-redirects': {
-            "title": "Double Redirects",
-            "view": 'special_list_double_redirects',
-            "description": "Lists pages that redirect twice or more.",
-            "section": "lists",
-            "template": "special-double-redirects.html"
-            },
-        'dead-ends': {
-            "title": "Dead-End Pages",
-            "view": 'special_list_dead_ends',
-            "description": ("Lists pages that don't have any "
-                            "outgoing links."),
-            "section": "lists",
-            "template": "special-dead-ends.html"
-            },
-        'users': {
-            "title": "All Users",
-            "view": 'special_users',
-            "description": "A list of all registered users.",
-            "section": "users",
-            }
-        }
+    'changes': {
+        "title": "Recent Changes",
+        "view": 'site_history',
+        "description": "See all changes in the wiki.",
+        "section": "wiki",
+    },
+    'orphans': {
+        "title": "Orphaned Pages",
+        "view": 'special_list_orphans',
+        "description": ("Lists pages in the wiki that have no "
+                        "links to them."),
+        "section": "lists",
+        "template": "special-orphans.html"
+    },
+    'broken-redirects': {
+        "title": "Broken Redirects",
+        "view": 'special_list_broken_redirects',
+        "description": ("Lists pages that redirect to a missing "
+                        "page."),
+        "section": "lists",
+        "template": "special-broken-redirects.html"
+    },
+    'double-redirects': {
+        "title": "Double Redirects",
+        "view": 'special_list_double_redirects',
+        "description": "Lists pages that redirect twice or more.",
+        "section": "lists",
+        "template": "special-double-redirects.html"
+    },
+    'dead-ends': {
+        "title": "Dead-End Pages",
+        "view": 'special_list_dead_ends',
+        "description": ("Lists pages that don't have any "
+                        "outgoing links."),
+        "section": "lists",
+        "template": "special-dead-ends.html"
+    },
+    'broken-links': {
+        "title": "Broken Links",
+        "view": 'special_list_broken_links',
+        "description": ("Lists pages that have broken links in them."),
+        "section": "lists",
+        "template": "special-broken-links.html"
+    },
+    'wanted-pages': {
+        "title": "Wanted Pages",
+        "view": 'special_list_wanted_pages',
+        "description": ("Lists pages that don't exist yet but already have "
+                        "incoming links to them."),
+        "section": "lists",
+        "template": "special-wanted-pages.html"
+    },
+    'users': {
+        "title": "All Users",
+        "view": 'special_users',
+        "description": "A list of all registered users.",
+        "section": "users",
+    }
+}
 
 
 @app.route('/special')
@@ -101,17 +116,22 @@
     if 'raw_url' in kwargs:
         raw_url = kwargs['raw_url']
         del kwargs['raw_url']
+    refresh = True
+    if 'refresh' in kwargs:
+        refresh = kwargs['refresh']
+        del kwargs['refresh']
 
     data = api_func(wiki, user, *args, **kwargs)
     add_auth_data(data)
     add_navigation_data(None, data, raw_url=raw_url)
     data['title'] = info['title']
     data['is_special_page'] = True
-    data['refresh'] = {
-        'url': url_for('special_list_refresh'),
-        'list_name': page_name.replace('-', '_'),
-        'postback': page_name
-    }
+    if refresh:
+        data['refresh'] = {
+            'url': url_for('special_list_refresh'),
+            'list_name': page_name.replace('-', '_'),
+            'postback': page_name
+        }
     return render_template(info['template'], **data)
 
 
@@ -143,6 +163,21 @@
                     raw_url='/api/dead-ends')
 
 
+@app.route('/special/list/broken-links')
+@requires_reader_auth
+def special_list_broken_links():
+    return call_api('broken-links', get_broken_links,
+                    raw_url='/api/broken-links')
+
+
+@app.route('/special/list/wanted-pages')
+@requires_reader_auth
+def special_list_wanted_pages():
+    return call_api('wanted-pages', get_wanted_pages,
+                    raw_url='/api/wanted-pages',
+                    refresh=False)
+
+
 @app.route('/special/list-refresh', methods=['POST'])
 @requires_auth('administrators')
 def special_list_refresh():
--- a/wikked/webimpl/__init__.py	Fri Mar 24 16:46:37 2017 -0700
+++ b/wikked/webimpl/__init__.py	Tue Mar 28 21:24:44 2017 -0700
@@ -199,3 +199,14 @@
 
     return builder
 
+
+def make_page_title(url):
+    endpoint, path = split_page_url(url)
+    last_slash = path.rstrip('/').rfind('/')
+    if last_slash < 0 or last_slash == 0:
+        title = path.lstrip('/')
+    else:
+        title = path[last_slash + 1:]
+    if endpoint:
+        return '%s: %s' % (endpoint, title)
+    return title
--- a/wikked/webimpl/edit.py	Fri Mar 24 16:46:37 2017 -0700
+++ b/wikked/webimpl/edit.py	Tue Mar 28 21:24:44 2017 -0700
@@ -3,10 +3,10 @@
 from wikked.page import Page, PageData
 from wikked.formatter import PageFormatter, FormattingContext
 from wikked.resolver import PageResolver
-from wikked.utils import PageNotFoundError, split_page_url
+from wikked.utils import PageNotFoundError
 from wikked.webimpl import (
         CHECK_FOR_WRITE,
-        get_page_or_raise, get_page_meta)
+        get_page_or_raise, get_page_meta, make_page_title)
 
 
 logger = logging.getLogger(__name__)
@@ -40,18 +40,6 @@
         return data
 
 
-def make_page_title(url):
-    endpoint, path = split_page_url(url)
-    last_slash = path.rstrip('/').rfind('/')
-    if last_slash < 0 or last_slash == 0:
-        title = path.lstrip('/')
-    else:
-        title = path[last_slash + 1:]
-    if endpoint:
-        return '%s: %s' % (endpoint, title)
-    return title
-
-
 def get_edit_page(wiki, user, url, author=None, custom_data=None):
     page = None
     try:
@@ -114,4 +102,3 @@
     resolver = PageResolver(dummy)
     dummy._setExtendedData(resolver.run())
     return dummy.text
-
--- a/wikked/webimpl/special.py	Fri Mar 24 16:46:37 2017 -0700
+++ b/wikked/webimpl/special.py	Tue Mar 28 21:24:44 2017 -0700
@@ -1,7 +1,10 @@
+import urllib.parse
+from wikked.db.base import NoWantedPages
+from wikked.page import WantedPage
 from wikked.utils import get_absolute_url
 from wikked.webimpl import (
         CHECK_FOR_READ,
-        get_page_meta, get_page_or_raise,
+        get_page_meta, get_page_or_raise, make_page_title,
         is_page_readable, get_redirect_target,
         get_or_build_pagelist, get_generic_pagelist_builder,
         CircularRedirectError, RedirectNotFound)
@@ -109,6 +112,76 @@
             fields=['url', 'title', 'meta', 'links'])
 
 
+def get_broken_links(wiki, user):
+    def builder_func():
+        wiki.resolve()
+
+        pages = set()
+        page_existence = {}
+        for p in wiki.getPages(
+                no_endpoint_only=True,
+                fields=['url', 'title', 'meta', 'links']):
+            # Gather all outgoing links from each page, then check which
+            # of those match another page in the dictionary.
+            for l in p.links:
+                abs_l = get_absolute_url(p.url, l)
+                exists = page_existence.get(abs_l, None)
+                if exists is None:
+                    # Don't know yet if this URL is valid, so let's ask the
+                    # database and cache the result.
+                    exists = wiki.pageExists(abs_l)
+                    page_existence[abs_l] = exists
+                if not exists:
+                    pages.add(p)
+        return pages
+
+    fields = ['url', 'title', 'meta']
+    pages = get_or_build_pagelist(wiki, 'broken_links', builder_func, fields)
+    return build_pagelist_view_data(pages, user)
+
+
+def get_wanted_pages(wiki, user):
+    def builder_func():
+        wiki.resolve()
+
+        wanted = {}
+        page_existence = {}
+        for p in wiki.getPages(
+                no_endpoint_only=True,
+                fields=['url', 'title', 'meta', 'links']):
+            for l in p.links:
+                abs_l = get_absolute_url(p.url, l)
+                exists = page_existence.get(abs_l, None)
+                if exists is None:
+                    exists = wiki.pageExists(abs_l)
+                    page_existence[abs_l] = exists
+                if not exists:
+                    wanted.setdefault(abs_l, p)
+
+        return [WantedPage(u, p) for u, p in wanted.items()]
+
+    try:
+        wanted = sorted(wiki.db.getWantedPages(), key=lambda p: p.url)
+    except NoWantedPages:
+        wanted = None
+
+    if wanted is None:
+        wanted = builder_func()
+        wiki.db.saveWantedPages(wanted)
+
+    data = []
+    for w in wanted:
+        d = {'url': urllib.parse.quote(w.url.encode('utf-8')),
+             'title': make_page_title(w.url),
+             'wanted_by': {
+                 'url': urllib.parse.quote(w.wanted_by.url.encode('utf-8')),
+                 'title': w.wanted_by.title}
+             }
+        data.append(d)
+    result = {'wanted_pages': data}
+    return result
+
+
 def list_pages(wiki, user, url=None):
     pages = list(filter(is_page_readable, wiki.getPages(url)))
     page_metas = [get_page_meta(page) for page in pages]