Mercurial > wikked
changeset 221:4306c6b56b30
Some optimizations for `wk update`:
- don't always load/format the text contents.
- better naming for a few functions.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Mon, 03 Mar 2014 22:03:06 -0800 |
parents | 6ef588e2e748 |
children | 31ac8bd02ddd |
files | wikked/db/base.py wikked/db/sql.py wikked/fs.py wikked/page.py wikked/wiki.py |
diffstat | 5 files changed, 24 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/wikked/db/base.py Mon Mar 03 22:01:57 2014 -0800 +++ b/wikked/db/base.py Mon Mar 03 22:03:06 2014 -0800 @@ -19,10 +19,10 @@ def close(self): raise NotImplementedError() - def reset(self, pages): + def reset(self, page_infos, page_factory): raise NotImplementedError() - def update(self, pages, force=False): + def update(self, page_infos, page_factory, force=False): raise NotImplementedError() def getPageUrls(self, subdir=None, uncached_only=False):
--- a/wikked/db/sql.py Mon Mar 03 22:01:57 2014 -0800 +++ b/wikked/db/sql.py Mon Mar 03 22:03:06 2014 -0800 @@ -208,25 +208,26 @@ self._session.commit() self._session.remove() - def reset(self, pages): + def reset(self, page_infos, page_factory): logger.debug("Re-creating SQL database.") self._createSchema() - for page in pages: + for pi in page_infos: + page = page_factory(pi) self._addPage(page) self.session.commit() - def update(self, pages, force=False): + def update(self, page_infos, page_factory, force=False): if self._needsSchemaUpdate(): raise Exception("This wiki needs a database upgrade. " "Please run `wk reset`.") + logger.debug("Updating SQL database...") + to_update = set() already_added = set() to_remove = [] - pages = list(pages) - - logger.debug("Updating SQL database...") - page_urls = [p.url for p in pages] + page_infos = list(page_infos) + page_urls = set([p.url for p in page_infos]) db_pages = self.session.query(SQLPage).\ options(load_only('id', 'url', 'path', 'time')).\ all() @@ -250,10 +251,11 @@ self.session.commit() added_db_objs = [] - for p in pages: - if (p.path in to_update or - p.path not in already_added): - added_db_objs.append(self._addPage(p)) + for pi in page_infos: + if (pi.path in to_update or + pi.path not in already_added): + page = page_factory(pi) + added_db_objs.append(self._addPage(page)) self.session.commit() @@ -276,12 +278,12 @@ return [o.id for o in added_db_objs] def getPageUrls(self, subdir=None, uncached_only=False): - q = self.session.query(SQLPage.url) + q = self.session.query(SQLPage.url, SQLPage.is_ready) if subdir: subdir = string.rstrip(subdir, '/') + '/%' q = q.filter(SQLPage.url.like(subdir)) if uncached_only: - q = q.filter(SQLPage.is_ready is False) + q = q.filter(SQLPage.is_ready == False) for p in q.all(): yield p.url
--- a/wikked/fs.py Mon Mar 03 22:01:57 2014 -0800 +++ b/wikked/fs.py Mon Mar 03 22:03:06 2014 -0800 @@ -91,7 +91,7 @@ return None return self._getPageInfo(path) - def getPage(self, url): + def findPageInfo(self, url): logger.debug("Searching for page: %s" % url) path = self.getPhysicalPagePath(url) return PageInfo(url, path)
--- a/wikked/page.py Mon Mar 03 22:01:57 2014 -0800 +++ b/wikked/page.py Mon Mar 03 22:03:06 2014 -0800 @@ -135,8 +135,3 @@ data.title = re.sub(r'\-', ' ', filename_split[0]) return data - - @staticmethod - def fromPageInfos(wiki, page_infos): - for p in page_infos: - yield FileSystemPage(wiki, p)
--- a/wikked/wiki.py Mon Mar 03 22:01:57 2014 -0800 +++ b/wikked/wiki.py Mon Mar 03 22:03:06 2014 -0800 @@ -240,27 +240,23 @@ def reset(self): logger.info("Resetting wiki data...") page_infos = self.fs.getPageInfos() - fs_pages = FileSystemPage.fromPageInfos(self, page_infos) - self.db.reset(fs_pages) + factory = lambda pi: FileSystemPage(self, pi) + self.db.reset(page_infos, factory) self._cachePages(force_resolve=True) self.index.reset(self.getPages()) def update(self, url=None, cache_ext_data=True): - updated_urls = [] logger.info("Updating pages...") + factory = lambda pi: FileSystemPage(self, pi) if url: - page_info = self.fs.getPage(url) - fs_page = FileSystemPage(self, page_info) - self.db.update([fs_page], force=True) - updated_urls.append(url) + page_info = self.fs.findPage(url) + self.db.update([page_info], factory, force=True) self._cachePages([url]) self.index.update([self.getPage(url)]) else: page_infos = self.fs.getPageInfos() - fs_pages = FileSystemPage.fromPageInfos(self, page_infos) - self.db.update(fs_pages) + self.db.update(page_infos, factory) self._cachePages() - updated_urls += [p.url for p in fs_pages] self.index.update(self.getPages()) def getPageUrls(self, subdir=None):