Mercurial > wikked
changeset 292:0b0f3486719c
More efficient invalidation of the cache when a page is edited.
More efficient auto-reload.
Remove `force_resolve` option from the web frontend.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sat, 27 Sep 2014 22:49:15 -0700 |
parents | 035c7a58e9aa |
children | f5e49462b10b |
files | wikked/db/base.py wikked/db/sql.py wikked/page.py wikked/views/__init__.py wikked/views/read.py wikked/wiki.py |
diffstat | 6 files changed, 51 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/wikked/db/base.py Sat Sep 27 22:47:48 2014 -0700 +++ b/wikked/db/base.py Sat Sep 27 22:49:15 2014 -0700 @@ -50,9 +50,6 @@ raise PageNotFoundError(url or path) return page - def isPageValid(self, url): - return True - def cachePage(self, page): pass
--- a/wikked/db/sql.py Sat Sep 27 22:47:48 2014 -0700 +++ b/wikked/db/sql.py Sat Sep 27 22:49:15 2014 -0700 @@ -29,7 +29,7 @@ __tablename__ = 'pages' id = Column(Integer, primary_key=True) - time = Column(DateTime) + cache_time = Column(DateTime) # In the spirit of cross-platformness we let Windows' suckiness dictacte # this length (but it's good because it makes those 2 columns short enough # to be indexable by SQL). @@ -53,6 +53,7 @@ ready_text = Column(UnicodeText(length=2 ** 31)) is_ready = Column(Boolean) + needs_invalidate = Column(Boolean) ready_meta = relationship( 'SQLReadyMeta', @@ -127,7 +128,7 @@ class SQLDatabase(Database): """ A database cache based on SQL. """ - schema_version = 5 + schema_version = 6 def __init__(self, config): Database.__init__(self) @@ -236,7 +237,17 @@ self.session.commit() page = FileSystemPage(self.wiki, page_info) - self._addPage(page) + added_p = self._addPage(page) + self.session.commit() + + # Invalidate all the appropriate pages. + q = self.session.query(SQLPage)\ + .options(load_only('id', 'needs_invalidate', 'is_ready'))\ + .filter(SQLPage.needs_invalidate is True) + for p in q.all(): + if p.id == added_p.id: + continue + p.is_ready = False self.session.commit() def updateAll(self, page_infos, force=False): @@ -252,7 +263,7 @@ page_infos = list(page_infos) page_urls = set([p.url for p in page_infos]) db_pages = self.session.query(SQLPage).\ - options(load_only('id', 'url', 'path', 'time')).\ + options(load_only('id', 'url', 'path', 'cache_time')).\ all() for p in db_pages: if not os.path.isfile(p.path): @@ -262,7 +273,7 @@ already_added.add(p.path) path_time = datetime.datetime.fromtimestamp( os.path.getmtime(p.path)) - if path_time > p.time or (force and p.url in page_urls): + if path_time > p.cache_time or (force and p.url in page_urls): # File has changed since last index. to_remove.append(p) to_update.add(p.path) @@ -317,30 +328,6 @@ for p in q.all(): yield SQLDatabasePage(self, p, fields) - def isPageValid(self, url): - db_obj = self.session.query(SQLPage).\ - options(load_only('id', 'url', 'path', 'time')).\ - filter(SQLPage.url == url).\ - first() - if not db_obj: - return False - path_time = datetime.datetime.fromtimestamp( - os.path.getmtime(db_obj.path)) - return path_time < db_obj.time - - def invalidateCache(self, ids): - if not isinstance(ids, list): - ids = list(ids) - logger.debug("Invalidating %d page caches in SQL database." % len(ids)) - - db_pages = self.session.query(SQLPage).\ - options(load_only('id', 'url', 'is_ready')).\ - filter(SQLPage.id.in_(ids)).\ - all() - for p in db_pages: - p.is_ready = False - self.session.commit() - def cachePage(self, page): if not hasattr(page, '_id') or not page._id: raise Exception("Given page '%s' has no `_id` attribute set." % page.url) @@ -361,6 +348,7 @@ raise db_obj.ready_text = page._data.text + db_obj.needs_invalidate = False del db_obj.ready_meta[:] for name, value in page._data.ext_meta.iteritems(): @@ -371,6 +359,8 @@ else: for v in value: db_obj.ready_meta.append(SQLReadyMeta(name, v)) + if name in ['include', 'query']: + db_obj.needs_invalidate = True del db_obj.ready_links[:] for link_url in page._data.ext_links: @@ -444,7 +434,7 @@ logger.debug("Adding page '%s' to SQL database." % page.url) po = SQLPage() - po.time = datetime.datetime.now() + po.cache_time = datetime.datetime.now() po.url = page.url po.endpoint, _ = split_page_url(page.url) po.path = page.path @@ -489,6 +479,8 @@ data.url = db_obj.url if fields is None or 'path' in fields: data.path = db_obj.path + if fields is None or 'cache_time' in fields: + data.cache_time = db_obj.cache_time if fields is None or 'title' in fields: data.title = db_obj.title if fields is None or 'raw_text' in fields:
--- a/wikked/page.py Sat Sep 27 22:47:48 2014 -0700 +++ b/wikked/page.py Sat Sep 27 22:49:15 2014 -0700 @@ -30,6 +30,7 @@ def __init__(self): self.url = None self.path = None + self.cache_time = None self.title = None self.raw_text = None self.formatted_text = None @@ -57,6 +58,10 @@ return self._data.path @property + def cache_time(self): + return self._data.cache_time + + @property def extension(self): if self._data.path is None: raise Exception("The 'path' field was not loaded.") @@ -133,6 +138,7 @@ data = PageData() data.url = page_info.url data.path = page_info.path + data.cache_time = None data.raw_text = page_info.content # Format the page and get the meta properties.
--- a/wikked/views/__init__.py Sat Sep 27 22:47:48 2014 -0700 +++ b/wikked/views/__init__.py Sat Sep 27 22:49:15 2014 -0700 @@ -1,5 +1,7 @@ +import os.path import urllib import string +import datetime from flask import g, abort, jsonify from flask.ext.login import current_user from wikked.fs import PageNotFoundError @@ -32,24 +34,30 @@ def get_page_or_none(url, fields=None, convert_url=True, - check_perms=DONT_CHECK, force_resolve=False): + check_perms=DONT_CHECK): if convert_url: url = url_from_viewarg(url) + auto_reload = app.config.get('WIKI_AUTO_RELOAD') + if auto_reload and fields is not None: + if 'path' not in fields: + fields.append('path') + if 'cache_time' not in fields: + fields.append('cache_time') + try: - if app.config.get('WIKI_AUTO_RELOAD'): - if not g.wiki.db.isPageValid(url): - app.logger.info("Page '%s' has changed, reloading." % url) - g.wiki.updatePage(url=url) - else: - app.logger.debug("Page '%s' is up to date." % url) - elif force_resolve: - g.wiki.resolve(only_urls=[url], force=True) - page = g.wiki.getPage(url, fields=fields) except PageNotFoundError: return None + if auto_reload: + path_time = datetime.datetime.fromtimestamp( + os.path.getmtime(page.path)) + if path_time >= page.cache_time: + app.logger.info("Page '%s' has changed, reloading." % url) + g.wiki.updatePage(path=page.path) + page = g.wiki.getPage(url, fields=fields) + if check_perms == CHECK_FOR_READ and not is_page_readable(page): abort(401) elif check_perms == CHECK_FOR_WRITE and not is_page_writable(page): @@ -59,9 +67,8 @@ def get_page_or_404(url, fields=None, convert_url=True, - check_perms=DONT_CHECK, force_resolve=False): - page = get_page_or_none(url, fields, convert_url, check_perms, - force_resolve) + check_perms=DONT_CHECK): + page = get_page_or_none(url, fields, convert_url, check_perms) if page is not None: return page app.logger.error("No such page: " + url)
--- a/wikked/views/read.py Sat Sep 27 22:47:48 2014 -0700 +++ b/wikked/views/read.py Sat Sep 27 22:49:15 2014 -0700 @@ -68,7 +68,6 @@ else: additional_info['user'] = False - force_resolve = ('force_resolve' in request.args) no_redirect = ('no_redirect' in request.args) endpoint, value, path = split_url_from_viewarg(url) @@ -80,8 +79,7 @@ path, fields=['url', 'title', 'text', 'meta'], convert_url=False, - check_perms=CHECK_FOR_READ, - force_resolve=force_resolve) + check_perms=CHECK_FOR_READ) visited_paths.append(path) redirect_meta = page.getMeta('redirect') if redirect_meta is None: @@ -107,8 +105,7 @@ meta_page_url, fields=['url', 'title', 'text', 'meta'], convert_url=False, - check_perms=CHECK_FOR_READ, - force_resolve=force_resolve) + check_perms=CHECK_FOR_READ) endpoint_info = g.wiki.endpoints.get(endpoint) if endpoint_info is not None: @@ -119,8 +116,7 @@ endpoint_info.default, fields=['url', 'title', 'text', 'meta'], convert_url=False, - check_perms=CHECK_FOR_READ, - force_resolve=force_resolve) + check_perms=CHECK_FOR_READ) if not endpoint_info.query: # Not a query-based endpoint (like categories). Let's just
--- a/wikked/wiki.py Sat Sep 27 22:47:48 2014 -0700 +++ b/wikked/wiki.py Sat Sep 27 22:49:15 2014 -0700 @@ -281,15 +281,6 @@ page_info.url, fields=['url', 'path', 'title', 'text'])) - # Invalidate all the appropriate pages. - logger.info("Handling dependencies...") - invalidate_ids = [] - db_pages = self.db.getPages(fields=['local_meta']) - for p in db_pages: - if p.getLocalMeta('include') or p.getLocalMeta('query'): - invalidate_ids.append(p._id) - self.db.invalidateCache(invalidate_ids) - def updateAll(self): """ Completely updates all pages, i.e. read them from the file-system and have them fully resolved and cached in the DB.