changeset 221:4306c6b56b30

Some optimizations for `wk update`: - don't always load/format the text contents. - better naming for a few functions.
author Ludovic Chabant <ludovic@chabant.com>
date Mon, 03 Mar 2014 22:03:06 -0800
parents 6ef588e2e748
children 31ac8bd02ddd
files wikked/db/base.py wikked/db/sql.py wikked/fs.py wikked/page.py wikked/wiki.py
diffstat 5 files changed, 24 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/wikked/db/base.py	Mon Mar 03 22:01:57 2014 -0800
+++ b/wikked/db/base.py	Mon Mar 03 22:03:06 2014 -0800
@@ -19,10 +19,10 @@
     def close(self):
         raise NotImplementedError()
 
-    def reset(self, pages):
+    def reset(self, page_infos, page_factory):
         raise NotImplementedError()
 
-    def update(self, pages, force=False):
+    def update(self, page_infos, page_factory, force=False):
         raise NotImplementedError()
 
     def getPageUrls(self, subdir=None, uncached_only=False):
--- a/wikked/db/sql.py	Mon Mar 03 22:01:57 2014 -0800
+++ b/wikked/db/sql.py	Mon Mar 03 22:03:06 2014 -0800
@@ -208,25 +208,26 @@
                 self._session.commit()
             self._session.remove()
 
-    def reset(self, pages):
+    def reset(self, page_infos, page_factory):
         logger.debug("Re-creating SQL database.")
         self._createSchema()
-        for page in pages:
+        for pi in page_infos:
+            page = page_factory(pi)
             self._addPage(page)
         self.session.commit()
 
-    def update(self, pages, force=False):
+    def update(self, page_infos, page_factory, force=False):
         if self._needsSchemaUpdate():
             raise Exception("This wiki needs a database upgrade. "
                             "Please run `wk reset`.")
 
+        logger.debug("Updating SQL database...")
+
         to_update = set()
         already_added = set()
         to_remove = []
-        pages = list(pages)
-
-        logger.debug("Updating SQL database...")
-        page_urls = [p.url for p in pages]
+        page_infos = list(page_infos)
+        page_urls = set([p.url for p in page_infos])
         db_pages = self.session.query(SQLPage).\
             options(load_only('id', 'url', 'path', 'time')).\
             all()
@@ -250,10 +251,11 @@
         self.session.commit()
 
         added_db_objs = []
-        for p in pages:
-            if (p.path in to_update or
-                    p.path not in already_added):
-                added_db_objs.append(self._addPage(p))
+        for pi in page_infos:
+            if (pi.path in to_update or
+                    pi.path not in already_added):
+                page = page_factory(pi)
+                added_db_objs.append(self._addPage(page))
 
         self.session.commit()
 
@@ -276,12 +278,12 @@
         return [o.id for o in added_db_objs]
 
     def getPageUrls(self, subdir=None, uncached_only=False):
-        q = self.session.query(SQLPage.url)
+        q = self.session.query(SQLPage.url, SQLPage.is_ready)
         if subdir:
             subdir = string.rstrip(subdir, '/') + '/%'
             q = q.filter(SQLPage.url.like(subdir))
         if uncached_only:
-            q = q.filter(SQLPage.is_ready is False)
+            q = q.filter(SQLPage.is_ready == False)
         for p in q.all():
             yield p.url
 
--- a/wikked/fs.py	Mon Mar 03 22:01:57 2014 -0800
+++ b/wikked/fs.py	Mon Mar 03 22:03:06 2014 -0800
@@ -91,7 +91,7 @@
                 return None
         return self._getPageInfo(path)
 
-    def getPage(self, url):
+    def findPageInfo(self, url):
         logger.debug("Searching for page: %s" % url)
         path = self.getPhysicalPagePath(url)
         return PageInfo(url, path)
--- a/wikked/page.py	Mon Mar 03 22:01:57 2014 -0800
+++ b/wikked/page.py	Mon Mar 03 22:03:06 2014 -0800
@@ -135,8 +135,3 @@
             data.title = re.sub(r'\-', ' ', filename_split[0])
 
         return data
-
-    @staticmethod
-    def fromPageInfos(wiki, page_infos):
-        for p in page_infos:
-            yield FileSystemPage(wiki, p)
--- a/wikked/wiki.py	Mon Mar 03 22:01:57 2014 -0800
+++ b/wikked/wiki.py	Mon Mar 03 22:03:06 2014 -0800
@@ -240,27 +240,23 @@
     def reset(self):
         logger.info("Resetting wiki data...")
         page_infos = self.fs.getPageInfos()
-        fs_pages = FileSystemPage.fromPageInfos(self, page_infos)
-        self.db.reset(fs_pages)
+        factory = lambda pi: FileSystemPage(self, pi)
+        self.db.reset(page_infos, factory)
         self._cachePages(force_resolve=True)
         self.index.reset(self.getPages())
 
     def update(self, url=None, cache_ext_data=True):
-        updated_urls = []
         logger.info("Updating pages...")
+        factory = lambda pi: FileSystemPage(self, pi)
         if url:
-            page_info = self.fs.getPage(url)
-            fs_page = FileSystemPage(self, page_info)
-            self.db.update([fs_page], force=True)
-            updated_urls.append(url)
+            page_info = self.fs.findPage(url)
+            self.db.update([page_info], factory, force=True)
             self._cachePages([url])
             self.index.update([self.getPage(url)])
         else:
             page_infos = self.fs.getPageInfos()
-            fs_pages = FileSystemPage.fromPageInfos(self, page_infos)
-            self.db.update(fs_pages)
+            self.db.update(page_infos, factory)
             self._cachePages()
-            updated_urls += [p.url for p in fs_pages]
             self.index.update(self.getPages())
 
     def getPageUrls(self, subdir=None):