Mercurial > wikked
changeset 111:e5dea315583b
First phase of fixes:
- Use unicode strings in more places.
- Fixed updating/resetting of the wiki DB.
- Fixed incoming links query.
- Fixed incorrect URL trail and outgoing links while resolving includes.
- Validate URLs correctly in the public API.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sat, 16 Nov 2013 08:32:57 -0800 |
parents | 827e236aa7c6 |
children | a65cedc183d6 |
files | wikked/db.py wikked/formatter.py wikked/resolver.py wikked/views.py wikked/wiki.py |
diffstat | 5 files changed, 119 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/wikked/db.py Thu Nov 14 15:21:48 2013 -0800 +++ b/wikked/db.py Sat Nov 16 08:32:57 2013 -0800 @@ -31,7 +31,7 @@ def reset(self, pages): raise NotImplementedError() - def update(self, pages): + def update(self, pages, force=False): raise NotImplementedError() def getPageUrls(self, subdir=None): @@ -184,7 +184,7 @@ self._addPage(page) db.session.commit() - def update(self, pages): + def update(self, pages, force=False): self.logger.debug("Updating SQL database...") to_update = set() already_added = set() @@ -199,7 +199,7 @@ already_added.add(p.path) path_time = datetime.datetime.fromtimestamp( os.path.getmtime(p.path)) - if path_time > p.time: + if path_time > p.time or force: # File has changed since last index. to_remove.append(p) to_update.add(p.path) @@ -208,6 +208,14 @@ db.session.commit() + db_pages = db.session.query(SQLPage).\ + add_columns('id', 'is_ready').\ + all() + for p in db_pages: + p.is_ready = False + + db.session.commit() + added_db_objs = [] for p in pages: if (p.path in to_update or @@ -253,11 +261,12 @@ return self.getPage(url) is not None def getLinksTo(self, url): - q = db.session.query(SQLLink, SQLPage).\ - filter(SQLLink.target_url == SQLPage.url).\ + q = db.session.query(SQLReadyLink).\ + filter(SQLReadyLink.target_url == url).\ + join(SQLReadyLink.source).\ all() for l in q: - yield l.source + yield l.source.url def _createSchema(self): db.drop_all()
--- a/wikked/formatter.py Thu Nov 14 15:21:48 2013 -0800 +++ b/wikked/formatter.py Sat Nov 16 08:32:57 2013 -0800 @@ -48,8 +48,8 @@ def _processWikiMeta(self, ctx, text): def repl(m): - meta_name = str(m.group('name')).lower() - meta_value = str(m.group('value')) + meta_name = unicode(m.group('name')).lower() + meta_value = unicode(m.group('value')) if meta_value is None or meta_value == '': # No value provided: this is a "flag" meta. @@ -83,7 +83,7 @@ flags=re.MULTILINE) # Multi-line meta. text = re.sub( - r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*)^\}\}\s*$', + r'^\{\{(?P<name>(__|\+)?[a-zA-Z][a-zA-Z0-9_\-]+):\s*(?P<value>.*)^\s*\}\}\s*$', repl, text, flags=re.MULTILINE | re.DOTALL) @@ -149,8 +149,8 @@ arg_pattern = r"(^|\|)\s*(?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*="\ r"(?P<value>[^\|]+)" for m in re.finditer(arg_pattern, query): - name = str(m.group('name')).strip() - value = str(m.group('value')).strip() + name = unicode(m.group('name')).strip() + value = unicode(m.group('value')).strip() processed_args += '%s=%s' % (name, value) mod_attr = '' @@ -167,6 +167,6 @@ urls = [] pattern = r"<a class=\"[^\"]*\" data-wiki-url=\"(?P<url>[^\"]+)\">" for m in re.finditer(pattern, text): - urls.append(str(m.group('url'))) + urls.append(unicode(m.group('url'))) return urls
--- a/wikked/resolver.py Thu Nov 14 15:21:48 2013 -0800 +++ b/wikked/resolver.py Sat Nov 16 08:32:57 2013 -0800 @@ -16,18 +16,26 @@ """ An exception raised when a circular include is found while rendering a page. """ - def __init__(self, message, url_trail): - Exception.__init__(self, message) - self.url_trail = url_trail + def __init__(self, current_url, url_trail, message=None): + Exception.__init__(self, current_url, url_trail, message) + + def __str__(self): + current_url = self.args[0] + url_trail = self.args[1] + message = self.args[2] + res = "Circular include detected at '%s' (after %s)" % (current_url, url_trail) + if message: + res += ": %s" % message + return res class ResolveContext(object): """ The context for resolving page queries. """ def __init__(self, root_page=None): self.root_page = root_page - self.url_trail = set() + self.url_trail = [] if root_page: - self.url_trail.add(root_page.url) + self.url_trail.append(root_page.url) def shouldRunMeta(self, modifier): if modifier is None: @@ -52,10 +60,8 @@ self.out_links = [] if page: self.meta = dict(page.getLocalMeta()) - self.out_links = list(page.getLocalLinks()) def add(self, other): - self.out_links = list(set(self.out_links + other.out_links)) for original_key, val in other.meta.iteritems(): # Ignore internal properties. Strip include-only properties # from their prefix. @@ -134,15 +140,15 @@ # Resolve queries, includes, etc. def repl2(m): - meta_name = str(m.group('name')) - meta_value = str(m.group('value')) + meta_name = unicode(m.group('name')) + meta_value = unicode(m.group('value')) meta_opts = {} if m.group('opts'): for c in re.finditer( r'data-wiki-(?P<name>[a-z]+)="(?P<value>[^"]+)"', - str(m.group('opts'))): - opt_name = str(c.group('name')) - opt_value = str(c.group('value')) + unicode(m.group('opts'))): + opt_name = unicode(c.group('name')) + opt_value = unicode(c.group('value')) meta_opts[opt_name] = opt_value resolver = self.resolvers.get(meta_name) @@ -167,9 +173,10 @@ # Resolve link states. def repl1(m): - raw_url = str(m.group('url')) - abs_raw_url = self.ctx.getAbsoluteUrl(raw_url) - url = namespace_title_to_url(abs_raw_url) + raw_url = unicode(m.group('url')) + raw_url = self.ctx.getAbsoluteUrl(raw_url) + url = namespace_title_to_url(raw_url) + self.output.out_links.append(url) if self.wiki.pageExists(url): return '<a class="wiki-link" data-wiki-url="%s">' % url return '<a class="wiki-link missing" data-wiki-url="%s">' % url @@ -196,7 +203,7 @@ # Check for circular includes. include_url = self.ctx.getAbsoluteUrl(opts['url'], self.page.url) if include_url in self.ctx.url_trail: - raise CircularIncludeError("Circular include detected at: %s" % include_url, self.ctx.url_trail) + raise CircularIncludeError(include_url, self.ctx.url_trail) # Parse the templating parameters. parameters = dict(self.parameters) @@ -208,19 +215,21 @@ # root page. arg_pattern = r"(^|\|)\s*((?P<name>[a-zA-Z][a-zA-Z0-9_\-]+)\s*=)?(?P<value>[^\|]+)" for i, m in enumerate(re.finditer(arg_pattern, args)): - key = str(m.group('name')).lower() - value = str(m.group('value')).strip() + key = unicode(m.group('name')).lower() + value = unicode(m.group('value')).strip() value = self._renderTemplate(value, parameters, error_url=self.page.url) parameters[key] = value parameters['__args'].append(value) # Re-run the resolver on the included page to get its final # formatted text. + current_url_trail = list(self.ctx.url_trail) page = self.wiki.getPage(include_url) - self.ctx.url_trail.add(page.url) + self.ctx.url_trail.append(page.url) child = PageResolver(page, self.ctx, parameters) child_output = child.run() self.output.add(child_output) + self.ctx.url_trail = current_url_trail # Run the templating. text = child_output.text @@ -242,9 +251,9 @@ for m in re.finditer(arg_pattern, query): key = m.group('name').lower() if key in parameters: - parameters[key] = str(m.group('value')) + parameters[key] = unicode(m.group('value')) else: - meta_query[key] = str(m.group('value')) + meta_query[key] = unicode(m.group('value')) # Find pages that match the query, excluding any page # that is in the URL trail. @@ -319,11 +328,11 @@ for v in include_meta_values: pipe_idx = v.find('|') if pipe_idx > 0: - incl_url = v[:pipe_idx] + abs_url = self.ctx.getAbsoluteUrl(v[:pipe_idx], page.url) + included_urls.append(abs_url) else: - incl_url = v - abs_incl_url = get_absolute_url(page.url, incl_url) - included_urls.append(abs_incl_url) + abs_url = self.ctx.getAbsoluteUrl(v, page.url) + included_urls.append(abs_url) # Recurse into included pages. for url in included_urls: @@ -368,4 +377,3 @@ if title is None: title = value return '<a class="wiki-link" data-wiki-url="%s" data-action="edit">%s</a>' % (value, title) -
--- a/wikked/views.py Thu Nov 14 15:21:48 2013 -0800 +++ b/wikked/views.py Sat Nov 16 08:32:57 2013 -0800 @@ -1,5 +1,6 @@ import time import urllib +import string import os.path from flask import render_template, abort, request, g, jsonify from flask.ext.login import login_user, logout_user, current_user @@ -10,7 +11,7 @@ from page import Page, PageData from fs import PageNotFoundError from formatter import PageFormatter, FormattingContext -from utils import namespace_title_to_url, get_absolute_url +from utils import title_to_url import scm @@ -19,12 +20,7 @@ CHECK_FOR_WRITE = 2 -def coerce_redirect(page, redirect): - target_url = get_absolute_url(page.url, redirect[0]) - return namespace_title_to_url(target_url) - - -def coerce_category(page, category): +def get_category_meta(category): result = [] for item in category: result.append({ @@ -34,8 +30,8 @@ return result COERCE_META = { - 'redirect': coerce_redirect, - 'category': coerce_category + 'redirect': title_to_url, + 'category': get_category_meta } @@ -64,8 +60,6 @@ def get_page_or_none(url, force_resolve=False): - if url[0] != '/': - url = '/' + url try: page = g.wiki.getPage(url) if force_resolve: @@ -87,6 +81,10 @@ abort(404) +def make_absolute(url): + return '/' + string.lstrip(url, '/') + + def is_page_readable(page, user=current_user): return page.wiki.auth.isPageReadable(page, user.get_id()) @@ -104,7 +102,7 @@ meta['url'] = page.url for name in COERCE_META: if name in meta: - meta[name] = COERCE_META[name](page, meta[name]) + meta[name] = COERCE_META[name](meta[name]) return meta @@ -131,6 +129,13 @@ continue except PageNotFoundError: pass + continue + page = g.wiki.getPage(f_info.url) + try: + if not is_page_readable(page): + continue + except PageNotFoundError: + pass rev_data['pages'].append({ 'url': f_info.url, 'action': scm.ACTION_NAMES[f['action']] @@ -182,7 +187,7 @@ @app.route('/api/list/<path:url>') def api_list_pages(url): - pages = filter(is_page_readable, g.wiki.getPages(url)) + pages = filter(is_page_readable, g.wiki.getPages(make_absolute(url))) page_metas = [get_page_meta(page) for page in pages] result = {'path': url, 'pages': list(page_metas)} return make_auth_response(result) @@ -191,7 +196,7 @@ @app.route('/api/read/<path:url>') def api_read_page(url): page = get_page_or_404( - url, + make_absolute(url), check_perms=CHECK_FOR_READ, force_resolve=('force_resolve' in request.args)) result = {'meta': get_page_meta(page), 'text': page.text} @@ -200,7 +205,7 @@ @app.route('/api/raw/<path:url>') def api_read_page_raw(url): - page = get_page_or_404(url, CHECK_FOR_READ) + page = get_page_or_404(make_absolute(url), CHECK_FOR_READ) result = {'meta': get_page_meta(page), 'text': page.raw_text} return make_auth_response(result) @@ -210,7 +215,7 @@ rev = request.args.get('rev') if rev is None: abort(400) - page = get_page_or_404(url, CHECK_FOR_READ) + page = get_page_or_404(make_absolute(url), CHECK_FOR_READ) page_rev = page.getRevision(rev) meta = dict(get_page_meta(page, True), rev=rev) result = {'meta': meta, 'text': page_rev} @@ -234,7 +239,7 @@ rev2 = request.args.get('rev2') if rev1 is None: abort(400) - page = get_page_or_404(url, CHECK_FOR_READ) + page = get_page_or_404(make_absolute(url), CHECK_FOR_READ) diff = page.getDiff(rev1, rev2) if 'raw' not in request.args: lexer = get_lexer_by_name('diff') @@ -250,7 +255,7 @@ @app.route('/api/state/<path:url>') def api_get_state(url): - page = get_page_or_404(url, CHECK_FOR_READ) + page = get_page_or_404(make_absolute(url), CHECK_FOR_READ) state = page.getState() return make_auth_response({ 'meta': get_page_meta(page, True), @@ -260,13 +265,13 @@ @app.route('/api/outlinks/<path:url>') def api_get_outgoing_links(url): - page = get_page_or_404(url, CHECK_FOR_READ) + page = get_page_or_404(make_absolute(url), CHECK_FOR_READ) links = [] for link in page.links: other = get_page_or_none(link) if other is not None: links.append({ - 'url': link, + 'url': other.url, 'title': other.title }) else: @@ -278,7 +283,7 @@ @app.route('/api/inlinks/<path:url>') def api_get_incoming_links(url): - page = get_page_or_404(url, CHECK_FOR_READ) + page = get_page_or_404(make_absolute(url), CHECK_FOR_READ) links = [] for link in page.getIncomingLinks(): other = get_page_or_none(link) @@ -296,6 +301,7 @@ @app.route('/api/edit/<path:url>', methods=['GET', 'POST']) def api_edit_page(url): + url = make_absolute(url) if request.method == 'GET': page = get_page_or_none(url) if page is None: @@ -354,7 +360,7 @@ if 'message' in request.form and len(request.form['message']) > 0: message = request.form['message'] - url = '/' + url + url = make_absolute(url) page_fields = { 'rev': rev, 'author': author, @@ -419,6 +425,7 @@ @app.route('/api/history/<path:url>') def api_page_history(url): + url = make_absolute(url) page = get_page_or_404(url, CHECK_FOR_READ) history = page.getHistory() hist_data = get_history_data(history) @@ -494,3 +501,4 @@ result = {'username': user.username, 'groups': user.groups} return make_auth_response(result) abort(404) +
--- a/wikked/wiki.py Thu Nov 14 15:21:48 2013 -0800 +++ b/wikked/wiki.py Sat Nov 16 08:32:57 2013 -0800 @@ -122,13 +122,39 @@ self.db.initDb() if update: + self.update() + + def stop(self): + self.db.close() + + def reset(self, cache_ext_data=True): + self.logger.debug("Resetting wiki data...") + page_infos = self.fs.getPageInfos() + fs_pages = FileSystemPage.fromPageInfos(self, page_infos) + self.db.reset(fs_pages) + self.index.reset(self.getPages()) + + if cache_ext_data: + self._cacheAllPages() + + def update(self, url=None, cache_ext_data=True): + updated_urls = [] + self.logger.debug("Updating pages...") + if url: + page_info = self.fs.getPage(url) + fs_page = FileSystemPage(self, page_info=page_info) + self.db.update([fs_page]) + updated_urls.append(url) + self.index.update([self.getPage(url)]) + else: page_infos = self.fs.getPageInfos() fs_pages = FileSystemPage.fromPageInfos(self, page_infos) self.db.update(fs_pages) + updated_urls += [p.url for p in fs_pages] self.index.update(self.getPages()) - def stop(self): - self.db.close() + if cache_ext_data: + self._cacheAllPages() def getPageUrls(self, subdir=None): """ Returns all the page URLs in the wiki, or in the given @@ -220,6 +246,12 @@ """ return self.scm.getHistory() + def _cacheAllPages(self): + self.logger.debug("Caching extended page data...") + for url in self.getPageUrls(): + page = self.getPage(url) + page._ensureExtendedData() + def _loadConfig(self, parameters): # Merge the default settings with any settings provided by # the parameters.