Mercurial > wikked
changeset 425:e28f2c76691c
web: Add "broken links" and "wanted pages" lists.
* Broken links relies on usual page list stuff.
* Wanted pages relies on new DB tables and information.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 28 Mar 2017 21:24:44 -0700 |
parents | 1066d0986082 |
children | bf65fba2854c |
files | wikked/db/base.py wikked/db/sql.py wikked/page.py wikked/templates/special-broken-links.html wikked/templates/special-wanted-pages.html wikked/views/special.py wikked/webimpl/__init__.py wikked/webimpl/edit.py wikked/webimpl/special.py |
diffstat | 9 files changed, 297 insertions(+), 87 deletions(-) [+] |
line wrap: on
line diff
--- a/wikked/db/base.py Fri Mar 24 16:46:37 2017 -0700 +++ b/wikked/db/base.py Tue Mar 28 21:24:44 2017 -0700 @@ -7,6 +7,10 @@ "No such page list: %s" % list_name) +class NoWantedPages(Exception): + pass + + class Database(object): """ The base class for a database cache. """ @@ -88,6 +92,12 @@ def _getPageByPath(self, path, fields): raise NotImplementedError() + def saveWantedPages(self, wanted_pages): + pass + + def getWantedPages(self): + raise NotImplementedError() + def addPageList(self, list_name, pages): pass @@ -105,4 +115,3 @@ def removeAllPageLists(self): pass -
--- a/wikked/db/sql.py Fri Mar 24 16:46:37 2017 -0700 +++ b/wikked/db/sql.py Tue Mar 28 21:24:44 2017 -0700 @@ -16,8 +16,8 @@ Load) from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.orm.session import Session -from wikked.db.base import Database, PageListNotFound -from wikked.page import Page, PageData, FileSystemPage +from wikked.db.base import Database, PageListNotFound, NoWantedPages +from wikked.page import Page, PageData, FileSystemPage, WantedPage from wikked.utils import split_page_url @@ -127,6 +127,16 @@ time_value = Column(DateTime) +class SQLWantedPage(Base): + __tablename__ = 'wanted_pages' + + id = Column(Integer, primary_key=True) + url = Column(Text) + wanted_by_id = Column(Integer, ForeignKey('pages.id')) + + wanted_by = relationship('SQLPage') + + class SQLPageList(Base): __tablename__ = 'page_lists' @@ -147,8 +157,7 @@ list_id = Column(Integer, ForeignKey('page_lists.id')) page_id = Column(Integer, ForeignKey('pages.id')) - page = relationship( - 'SQLPage') + page = relationship('SQLPage') class _WikkedSQLSession(Session): @@ -231,7 +240,7 @@ class SQLDatabase(Database): """ A database cache based on SQL. """ - schema_version = 8 + schema_version = 9 def __init__(self, config): Database.__init__(self) @@ -309,6 +318,12 @@ ver.name = 'schema_version' ver.int_value = self.schema_version self.session.add(ver) + + wanted_valid = SQLInfo() + wanted_valid.name = 'wanted_pages_is_valid' + wanted_valid.int_value = 0 + self.session.add(wanted_valid) + self.session.commit() def _getSchemaVersion(self): @@ -496,7 +511,7 @@ if except_url: q = q.filter(SQLPage.url != except_url) if only_required: - q = q.filter(SQLPage.needs_invalidate == True) + q = q.filter(SQLPage.needs_invalidate is True) uncached_urls = [] for p in q.all(): @@ -607,6 +622,47 @@ return po + def saveWantedPages(self, wanted_pages): + # Delete previously cached wanted pages. + self.session.query(SQLWantedPage).delete() + + for p in wanted_pages: + item = SQLWantedPage() + item.url = p.url + item.wanted_by_id = p.wanted_by._id + self.session.add(item) + + valid = self.session.query(SQLInfo)\ + .filter(SQLInfo.name == 'wanted_pages_is_valid')\ + .first() + if valid is not None: + valid.int_value = 1 + else: + valid = SQLInfo() + valid.name = 'wanted_pages_is_valid' + valid.int_value = 1 + self.session.add(valid) + + self.session.commit() + + def getWantedPages(self, valid_only=True): + if valid_only: + valid = self.session.query(SQLInfo)\ + .filter(SQLInfo.name == 'wanted_pages_is_valid')\ + .first() + if valid is None or valid.int_value != 1: + raise NoWantedPages() + + fields = ['url', 'title'] + + q = self.session.query(SQLWantedPage)\ + .join(SQLWantedPage.wanted_by) + q = self._addFieldOptions(q, fields, use_load_obj=True) + + for wp in q.all(): + yield WantedPage(wp.url, + SQLDatabasePage(self, wp.wanted_by, fields)) + def addPageList(self, list_name, pages): page_list = self.session.query(SQLPageList)\ .filter(SQLPageList.list_name == list_name)\ @@ -733,4 +789,3 @@ data.ext_links = [l.target_url for l in db_obj.ready_links] return data -
--- a/wikked/page.py Fri Mar 24 16:46:37 2017 -0700 +++ b/wikked/page.py Tue Mar 28 21:24:44 2017 -0700 @@ -162,3 +162,9 @@ data.title = data.title[0] return data + + +class WantedPage: + def __init__(self, url, wanted_by): + self.url = url + self.wanted_by = wanted_by
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wikked/templates/special-broken-links.html Tue Mar 28 21:24:44 2017 -0700 @@ -0,0 +1,8 @@ +{% extends "special-pagelist.html" %} +{% block message %} +<p>Here is a list of pages with at least one broken link in them. A broken link is a link to a missing page.</p> +{% endblock %} +{% block empty %} +<p>No broken links!</p> +{% endblock %} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wikked/templates/special-wanted-pages.html Tue Mar 28 21:24:44 2017 -0700 @@ -0,0 +1,26 @@ +{% extends 'index.html' %} +{% block nav %}{% include 'special-nav.html' %}{% endblock %} +{% block content %} +<article> + <header> + <h1>Wanted Pages</h1> + </header> + <section> + <p>This is a list of pages that are linked to by other pages in this wiki, + but which don't exist.</p> + {%if wanted_pages%} + <ul> + {%for p in wanted_pages%} + <li><a class="wiki-link missing" href="{{get_edit_url(p.url)}}">{{p.title}}</a> + {%if p.wanted_by%} + (wanted by <a class="wiki-link" href="{{get_read_url(p.wanted_by.url)}}">{{p.wanted_by.title}}</a>) + {%endif%} + </li> + {%endfor%} + </ul> + {%else%} + <p>No wanted pages!</p> + {%endif%} + </section> +</article> +{% endblock %}
--- a/wikked/views/special.py Fri Mar 24 16:46:37 2017 -0700 +++ b/wikked/views/special.py Tue Mar 28 21:24:44 2017 -0700 @@ -6,69 +6,84 @@ from wikked.web import app, get_wiki from wikked.webimpl.special import ( get_orphans, get_broken_redirects, get_double_redirects, - get_dead_ends) + get_dead_ends, get_broken_links, get_wanted_pages) special_sections = [ - { - 'name': 'wiki', - 'title': 'Wiki' - }, - { - 'name': 'lists', - 'title': 'Page Lists' - }, - { - 'name': 'users', - 'title': 'Users' - } - ] + { + 'name': 'wiki', + 'title': 'Wiki' + }, + { + 'name': 'lists', + 'title': 'Page Lists' + }, + { + 'name': 'users', + 'title': 'Users' + } +] special_pages = { - 'changes': { - "title": "Recent Changes", - "view": 'site_history', - "description": "See all changes in the wiki.", - "section": "wiki", - }, - 'orphans': { - "title": "Orphaned Pages", - "view": 'special_list_orphans', - "description": ("Lists pages in the wiki that have no " - "links to them."), - "section": "lists", - "template": "special-orphans.html" - }, - 'broken-redirects': { - "title": "Broken Redirects", - "view": 'special_list_broken_redirects', - "description": ("Lists pages that redirect to a missing " - "page."), - "section": "lists", - "template": "special-broken-redirects.html" - }, - 'double-redirects': { - "title": "Double Redirects", - "view": 'special_list_double_redirects', - "description": "Lists pages that redirect twice or more.", - "section": "lists", - "template": "special-double-redirects.html" - }, - 'dead-ends': { - "title": "Dead-End Pages", - "view": 'special_list_dead_ends', - "description": ("Lists pages that don't have any " - "outgoing links."), - "section": "lists", - "template": "special-dead-ends.html" - }, - 'users': { - "title": "All Users", - "view": 'special_users', - "description": "A list of all registered users.", - "section": "users", - } - } + 'changes': { + "title": "Recent Changes", + "view": 'site_history', + "description": "See all changes in the wiki.", + "section": "wiki", + }, + 'orphans': { + "title": "Orphaned Pages", + "view": 'special_list_orphans', + "description": ("Lists pages in the wiki that have no " + "links to them."), + "section": "lists", + "template": "special-orphans.html" + }, + 'broken-redirects': { + "title": "Broken Redirects", + "view": 'special_list_broken_redirects', + "description": ("Lists pages that redirect to a missing " + "page."), + "section": "lists", + "template": "special-broken-redirects.html" + }, + 'double-redirects': { + "title": "Double Redirects", + "view": 'special_list_double_redirects', + "description": "Lists pages that redirect twice or more.", + "section": "lists", + "template": "special-double-redirects.html" + }, + 'dead-ends': { + "title": "Dead-End Pages", + "view": 'special_list_dead_ends', + "description": ("Lists pages that don't have any " + "outgoing links."), + "section": "lists", + "template": "special-dead-ends.html" + }, + 'broken-links': { + "title": "Broken Links", + "view": 'special_list_broken_links', + "description": ("Lists pages that have broken links in them."), + "section": "lists", + "template": "special-broken-links.html" + }, + 'wanted-pages': { + "title": "Wanted Pages", + "view": 'special_list_wanted_pages', + "description": ("Lists pages that don't exist yet but already have " + "incoming links to them."), + "section": "lists", + "template": "special-wanted-pages.html" + }, + 'users': { + "title": "All Users", + "view": 'special_users', + "description": "A list of all registered users.", + "section": "users", + } +} @app.route('/special') @@ -101,17 +116,22 @@ if 'raw_url' in kwargs: raw_url = kwargs['raw_url'] del kwargs['raw_url'] + refresh = True + if 'refresh' in kwargs: + refresh = kwargs['refresh'] + del kwargs['refresh'] data = api_func(wiki, user, *args, **kwargs) add_auth_data(data) add_navigation_data(None, data, raw_url=raw_url) data['title'] = info['title'] data['is_special_page'] = True - data['refresh'] = { - 'url': url_for('special_list_refresh'), - 'list_name': page_name.replace('-', '_'), - 'postback': page_name - } + if refresh: + data['refresh'] = { + 'url': url_for('special_list_refresh'), + 'list_name': page_name.replace('-', '_'), + 'postback': page_name + } return render_template(info['template'], **data) @@ -143,6 +163,21 @@ raw_url='/api/dead-ends') +@app.route('/special/list/broken-links') +@requires_reader_auth +def special_list_broken_links(): + return call_api('broken-links', get_broken_links, + raw_url='/api/broken-links') + + +@app.route('/special/list/wanted-pages') +@requires_reader_auth +def special_list_wanted_pages(): + return call_api('wanted-pages', get_wanted_pages, + raw_url='/api/wanted-pages', + refresh=False) + + @app.route('/special/list-refresh', methods=['POST']) @requires_auth('administrators') def special_list_refresh():
--- a/wikked/webimpl/__init__.py Fri Mar 24 16:46:37 2017 -0700 +++ b/wikked/webimpl/__init__.py Tue Mar 28 21:24:44 2017 -0700 @@ -199,3 +199,14 @@ return builder + +def make_page_title(url): + endpoint, path = split_page_url(url) + last_slash = path.rstrip('/').rfind('/') + if last_slash < 0 or last_slash == 0: + title = path.lstrip('/') + else: + title = path[last_slash + 1:] + if endpoint: + return '%s: %s' % (endpoint, title) + return title
--- a/wikked/webimpl/edit.py Fri Mar 24 16:46:37 2017 -0700 +++ b/wikked/webimpl/edit.py Tue Mar 28 21:24:44 2017 -0700 @@ -3,10 +3,10 @@ from wikked.page import Page, PageData from wikked.formatter import PageFormatter, FormattingContext from wikked.resolver import PageResolver -from wikked.utils import PageNotFoundError, split_page_url +from wikked.utils import PageNotFoundError from wikked.webimpl import ( CHECK_FOR_WRITE, - get_page_or_raise, get_page_meta) + get_page_or_raise, get_page_meta, make_page_title) logger = logging.getLogger(__name__) @@ -40,18 +40,6 @@ return data -def make_page_title(url): - endpoint, path = split_page_url(url) - last_slash = path.rstrip('/').rfind('/') - if last_slash < 0 or last_slash == 0: - title = path.lstrip('/') - else: - title = path[last_slash + 1:] - if endpoint: - return '%s: %s' % (endpoint, title) - return title - - def get_edit_page(wiki, user, url, author=None, custom_data=None): page = None try: @@ -114,4 +102,3 @@ resolver = PageResolver(dummy) dummy._setExtendedData(resolver.run()) return dummy.text -
--- a/wikked/webimpl/special.py Fri Mar 24 16:46:37 2017 -0700 +++ b/wikked/webimpl/special.py Tue Mar 28 21:24:44 2017 -0700 @@ -1,7 +1,10 @@ +import urllib.parse +from wikked.db.base import NoWantedPages +from wikked.page import WantedPage from wikked.utils import get_absolute_url from wikked.webimpl import ( CHECK_FOR_READ, - get_page_meta, get_page_or_raise, + get_page_meta, get_page_or_raise, make_page_title, is_page_readable, get_redirect_target, get_or_build_pagelist, get_generic_pagelist_builder, CircularRedirectError, RedirectNotFound) @@ -109,6 +112,76 @@ fields=['url', 'title', 'meta', 'links']) +def get_broken_links(wiki, user): + def builder_func(): + wiki.resolve() + + pages = set() + page_existence = {} + for p in wiki.getPages( + no_endpoint_only=True, + fields=['url', 'title', 'meta', 'links']): + # Gather all outgoing links from each page, then check which + # of those match another page in the dictionary. + for l in p.links: + abs_l = get_absolute_url(p.url, l) + exists = page_existence.get(abs_l, None) + if exists is None: + # Don't know yet if this URL is valid, so let's ask the + # database and cache the result. + exists = wiki.pageExists(abs_l) + page_existence[abs_l] = exists + if not exists: + pages.add(p) + return pages + + fields = ['url', 'title', 'meta'] + pages = get_or_build_pagelist(wiki, 'broken_links', builder_func, fields) + return build_pagelist_view_data(pages, user) + + +def get_wanted_pages(wiki, user): + def builder_func(): + wiki.resolve() + + wanted = {} + page_existence = {} + for p in wiki.getPages( + no_endpoint_only=True, + fields=['url', 'title', 'meta', 'links']): + for l in p.links: + abs_l = get_absolute_url(p.url, l) + exists = page_existence.get(abs_l, None) + if exists is None: + exists = wiki.pageExists(abs_l) + page_existence[abs_l] = exists + if not exists: + wanted.setdefault(abs_l, p) + + return [WantedPage(u, p) for u, p in wanted.items()] + + try: + wanted = sorted(wiki.db.getWantedPages(), key=lambda p: p.url) + except NoWantedPages: + wanted = None + + if wanted is None: + wanted = builder_func() + wiki.db.saveWantedPages(wanted) + + data = [] + for w in wanted: + d = {'url': urllib.parse.quote(w.url.encode('utf-8')), + 'title': make_page_title(w.url), + 'wanted_by': { + 'url': urllib.parse.quote(w.wanted_by.url.encode('utf-8')), + 'title': w.wanted_by.title} + } + data.append(d) + result = {'wanted_pages': data} + return result + + def list_pages(wiki, user, url=None): pages = list(filter(is_page_readable, wiki.getPages(url))) page_metas = [get_page_meta(page) for page in pages]