changeset 292:0b0f3486719c

More efficient invalidation of the cache when a page is edited. More efficient auto-reload. Remove `force_resolve` option from the web frontend.
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 27 Sep 2014 22:49:15 -0700
parents 035c7a58e9aa
children f5e49462b10b
files wikked/db/base.py wikked/db/sql.py wikked/page.py wikked/views/__init__.py wikked/views/read.py wikked/wiki.py
diffstat 6 files changed, 51 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/wikked/db/base.py	Sat Sep 27 22:47:48 2014 -0700
+++ b/wikked/db/base.py	Sat Sep 27 22:49:15 2014 -0700
@@ -50,9 +50,6 @@
             raise PageNotFoundError(url or path)
         return page
 
-    def isPageValid(self, url):
-        return True
-
     def cachePage(self, page):
         pass
 
--- a/wikked/db/sql.py	Sat Sep 27 22:47:48 2014 -0700
+++ b/wikked/db/sql.py	Sat Sep 27 22:49:15 2014 -0700
@@ -29,7 +29,7 @@
     __tablename__ = 'pages'
 
     id = Column(Integer, primary_key=True)
-    time = Column(DateTime)
+    cache_time = Column(DateTime)
     # In the spirit of cross-platformness we let Windows' suckiness dictacte
     # this length (but it's good because it makes those 2 columns short enough
     # to be indexable by SQL).
@@ -53,6 +53,7 @@
 
     ready_text = Column(UnicodeText(length=2 ** 31))
     is_ready = Column(Boolean)
+    needs_invalidate = Column(Boolean)
 
     ready_meta = relationship(
         'SQLReadyMeta',
@@ -127,7 +128,7 @@
 class SQLDatabase(Database):
     """ A database cache based on SQL.
     """
-    schema_version = 5
+    schema_version = 6
 
     def __init__(self, config):
         Database.__init__(self)
@@ -236,7 +237,17 @@
             self.session.commit()
 
         page = FileSystemPage(self.wiki, page_info)
-        self._addPage(page)
+        added_p = self._addPage(page)
+        self.session.commit()
+
+        # Invalidate all the appropriate pages.
+        q = self.session.query(SQLPage)\
+                .options(load_only('id', 'needs_invalidate', 'is_ready'))\
+                .filter(SQLPage.needs_invalidate is True)
+        for p in q.all():
+            if p.id == added_p.id:
+                continue
+            p.is_ready = False
         self.session.commit()
 
     def updateAll(self, page_infos, force=False):
@@ -252,7 +263,7 @@
         page_infos = list(page_infos)
         page_urls = set([p.url for p in page_infos])
         db_pages = self.session.query(SQLPage).\
-            options(load_only('id', 'url', 'path', 'time')).\
+            options(load_only('id', 'url', 'path', 'cache_time')).\
             all()
         for p in db_pages:
             if not os.path.isfile(p.path):
@@ -262,7 +273,7 @@
                 already_added.add(p.path)
                 path_time = datetime.datetime.fromtimestamp(
                     os.path.getmtime(p.path))
-                if path_time > p.time or (force and p.url in page_urls):
+                if path_time > p.cache_time or (force and p.url in page_urls):
                     # File has changed since last index.
                     to_remove.append(p)
                     to_update.add(p.path)
@@ -317,30 +328,6 @@
         for p in q.all():
             yield SQLDatabasePage(self, p, fields)
 
-    def isPageValid(self, url):
-        db_obj = self.session.query(SQLPage).\
-            options(load_only('id', 'url', 'path', 'time')).\
-            filter(SQLPage.url == url).\
-            first()
-        if not db_obj:
-            return False
-        path_time = datetime.datetime.fromtimestamp(
-            os.path.getmtime(db_obj.path))
-        return path_time < db_obj.time
-
-    def invalidateCache(self, ids):
-        if not isinstance(ids, list):
-            ids = list(ids)
-        logger.debug("Invalidating %d page caches in SQL database." % len(ids))
-
-        db_pages = self.session.query(SQLPage).\
-            options(load_only('id', 'url', 'is_ready')).\
-            filter(SQLPage.id.in_(ids)).\
-            all()
-        for p in db_pages:
-            p.is_ready = False
-        self.session.commit()
-
     def cachePage(self, page):
         if not hasattr(page, '_id') or not page._id:
             raise Exception("Given page '%s' has no `_id` attribute set." % page.url)
@@ -361,6 +348,7 @@
             raise
 
         db_obj.ready_text = page._data.text
+        db_obj.needs_invalidate = False
 
         del db_obj.ready_meta[:]
         for name, value in page._data.ext_meta.iteritems():
@@ -371,6 +359,8 @@
             else:
                 for v in value:
                     db_obj.ready_meta.append(SQLReadyMeta(name, v))
+            if name in ['include', 'query']:
+                db_obj.needs_invalidate = True
 
         del db_obj.ready_links[:]
         for link_url in page._data.ext_links:
@@ -444,7 +434,7 @@
         logger.debug("Adding page '%s' to SQL database." % page.url)
 
         po = SQLPage()
-        po.time = datetime.datetime.now()
+        po.cache_time = datetime.datetime.now()
         po.url = page.url
         po.endpoint, _ = split_page_url(page.url)
         po.path = page.path
@@ -489,6 +479,8 @@
             data.url = db_obj.url
         if fields is None or 'path' in fields:
             data.path = db_obj.path
+        if fields is None or 'cache_time' in fields:
+            data.cache_time = db_obj.cache_time
         if fields is None or 'title' in fields:
             data.title = db_obj.title
         if fields is None or 'raw_text' in fields:
--- a/wikked/page.py	Sat Sep 27 22:47:48 2014 -0700
+++ b/wikked/page.py	Sat Sep 27 22:49:15 2014 -0700
@@ -30,6 +30,7 @@
     def __init__(self):
         self.url = None
         self.path = None
+        self.cache_time = None
         self.title = None
         self.raw_text = None
         self.formatted_text = None
@@ -57,6 +58,10 @@
         return self._data.path
 
     @property
+    def cache_time(self):
+        return self._data.cache_time
+
+    @property
     def extension(self):
         if self._data.path is None:
             raise Exception("The 'path' field was not loaded.")
@@ -133,6 +138,7 @@
         data = PageData()
         data.url = page_info.url
         data.path = page_info.path
+        data.cache_time = None
         data.raw_text = page_info.content
 
         # Format the page and get the meta properties.
--- a/wikked/views/__init__.py	Sat Sep 27 22:47:48 2014 -0700
+++ b/wikked/views/__init__.py	Sat Sep 27 22:49:15 2014 -0700
@@ -1,5 +1,7 @@
+import os.path
 import urllib
 import string
+import datetime
 from flask import g, abort, jsonify
 from flask.ext.login import current_user
 from wikked.fs import PageNotFoundError
@@ -32,24 +34,30 @@
 
 
 def get_page_or_none(url, fields=None, convert_url=True,
-        check_perms=DONT_CHECK, force_resolve=False):
+        check_perms=DONT_CHECK):
     if convert_url:
         url = url_from_viewarg(url)
 
+    auto_reload = app.config.get('WIKI_AUTO_RELOAD')
+    if auto_reload and fields is not None:
+        if 'path' not in fields:
+            fields.append('path')
+        if 'cache_time' not in fields:
+            fields.append('cache_time')
+
     try:
-        if app.config.get('WIKI_AUTO_RELOAD'):
-            if not g.wiki.db.isPageValid(url):
-                app.logger.info("Page '%s' has changed, reloading." % url)
-                g.wiki.updatePage(url=url)
-            else:
-                app.logger.debug("Page '%s' is up to date." % url)
-        elif force_resolve:
-            g.wiki.resolve(only_urls=[url], force=True)
-
         page = g.wiki.getPage(url, fields=fields)
     except PageNotFoundError:
         return None
 
+    if auto_reload:
+        path_time = datetime.datetime.fromtimestamp(
+            os.path.getmtime(page.path))
+        if path_time >= page.cache_time:
+            app.logger.info("Page '%s' has changed, reloading." % url)
+            g.wiki.updatePage(path=page.path)
+            page = g.wiki.getPage(url, fields=fields)
+
     if check_perms == CHECK_FOR_READ and not is_page_readable(page):
         abort(401)
     elif check_perms == CHECK_FOR_WRITE and not is_page_writable(page):
@@ -59,9 +67,8 @@
 
 
 def get_page_or_404(url, fields=None, convert_url=True,
-        check_perms=DONT_CHECK, force_resolve=False):
-    page = get_page_or_none(url, fields, convert_url, check_perms,
-            force_resolve)
+        check_perms=DONT_CHECK):
+    page = get_page_or_none(url, fields, convert_url, check_perms)
     if page is not None:
         return page
     app.logger.error("No such page: " + url)
--- a/wikked/views/read.py	Sat Sep 27 22:47:48 2014 -0700
+++ b/wikked/views/read.py	Sat Sep 27 22:49:15 2014 -0700
@@ -68,7 +68,6 @@
         else:
             additional_info['user'] = False
 
-    force_resolve = ('force_resolve' in request.args)
     no_redirect = ('no_redirect' in request.args)
 
     endpoint, value, path = split_url_from_viewarg(url)
@@ -80,8 +79,7 @@
                     path,
                     fields=['url', 'title', 'text', 'meta'],
                     convert_url=False,
-                    check_perms=CHECK_FOR_READ,
-                    force_resolve=force_resolve)
+                    check_perms=CHECK_FOR_READ)
             visited_paths.append(path)
             redirect_meta = page.getMeta('redirect')
             if redirect_meta is None:
@@ -107,8 +105,7 @@
             meta_page_url,
             fields=['url', 'title', 'text', 'meta'],
             convert_url=False,
-            check_perms=CHECK_FOR_READ,
-            force_resolve=force_resolve)
+            check_perms=CHECK_FOR_READ)
 
     endpoint_info = g.wiki.endpoints.get(endpoint)
     if endpoint_info is not None:
@@ -119,8 +116,7 @@
                     endpoint_info.default,
                     fields=['url', 'title', 'text', 'meta'],
                     convert_url=False,
-                    check_perms=CHECK_FOR_READ,
-                    force_resolve=force_resolve)
+                    check_perms=CHECK_FOR_READ)
 
         if not endpoint_info.query:
             # Not a query-based endpoint (like categories). Let's just
--- a/wikked/wiki.py	Sat Sep 27 22:47:48 2014 -0700
+++ b/wikked/wiki.py	Sat Sep 27 22:49:15 2014 -0700
@@ -281,15 +281,6 @@
             page_info.url,
             fields=['url', 'path', 'title', 'text']))
 
-        # Invalidate all the appropriate pages.
-        logger.info("Handling dependencies...")
-        invalidate_ids = []
-        db_pages = self.db.getPages(fields=['local_meta'])
-        for p in db_pages:
-            if p.getLocalMeta('include') or p.getLocalMeta('query'):
-                invalidate_ids.append(p._id)
-        self.db.invalidateCache(invalidate_ids)
-
     def updateAll(self):
         """ Completely updates all pages, i.e. read them from the file-system
             and have them fully resolved and cached in the DB.