Mercurial > wikked
view wikked/db/sql.py @ 142:7e4287d9b3bb
Changes to make it easier to deploy:
- Local config can go in `.wikirc.local`.
- Remove path dependencies from `WikiParameters`.
- More configurability in the SQL database backend.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 10 Dec 2013 13:49:03 -0800 |
parents | df04e6a875ba |
children | 391dee401693 |
line wrap: on
line source
import os import os.path import types import string import logging import datetime from sqlalchemy import ( create_engine, and_, or_, Column, Boolean, Integer, String, Text, DateTime, ForeignKey) from sqlalchemy.orm import ( scoped_session, sessionmaker, relationship, backref, defer) from sqlalchemy.ext.declarative import declarative_base from base import Database from wikked.page import Page, FileSystemPage, PageData, PageLoadingError from wikked.formatter import SINGLE_METAS from wikked.utils import PageNotFoundError logger = logging.getLogger(__name__) Base = declarative_base() class SQLPage(Base): __tablename__ = 'pages' id = Column(Integer, primary_key=True) time = Column(DateTime) url = Column(Text) path = Column(Text) title = Column(Text) raw_text = Column(Text) formatted_text = Column(Text) meta = relationship('SQLMeta', order_by='SQLMeta.id', backref=backref('page'), cascade='all, delete, delete-orphan') links = relationship('SQLLink', order_by='SQLLink.id', backref=backref('source'), cascade='all, delete, delete-orphan') ready_text = Column(Text) is_ready = Column(Boolean) ready_meta = relationship('SQLReadyMeta', order_by='SQLReadyMeta.id', backref=backref('page'), cascade='all, delete, delete-orphan') ready_links = relationship('SQLReadyLink', order_by='SQLReadyLink.id', backref=backref('source'), cascade='all, delete, delete-orphan') class SQLMeta(Base): __tablename__ = 'meta' id = Column(Integer, primary_key=True) page_id = Column(Integer, ForeignKey('pages.id')) name = Column(String(128)) value = Column(Text) def __init__(self, name=None, value=None): self.name = name self.value = value class SQLReadyMeta(Base): __tablename__ = 'ready_meta' id = Column(Integer, primary_key=True) page_id = Column(Integer, ForeignKey('pages.id')) name = Column(String(128)) value = Column(Text) def __init__(self, name=None, value=None): self.name = name self.value = value class SQLLink(Base): __tablename__ = 'links' id = Column(Integer, primary_key=True) source_id = Column(Integer, ForeignKey('pages.id')) target_url = Column(Text) def __init__(self, target_url=None): self.target_url = target_url class SQLReadyLink(Base): __tablename__ = 'ready_links' id = Column(Integer, primary_key=True) source_id = Column(Integer, ForeignKey('pages.id')) target_url = Column(Text) def __init__(self, target_url=None): self.target_url = target_url class SQLInfo(Base): __tablename__ = 'info' id = Column(Integer, primary_key=True) name = Column(String(64)) str_value = Column(String(256)) int_value = Column(Integer) time_value = Column(DateTime) class SQLDatabase(Database): """ A database cache based on SQL. """ schema_version = 3 def __init__(self): Database.__init__(self) self.engine = None def initDb(self, wiki): self.wiki = wiki url_params = {'root': wiki.root} engine_url = wiki.config.get('wiki', 'database_url') % url_params logger.info("Using database from URL: %s" % engine_url) self.engine = create_engine(engine_url, convert_unicode=True) self.session = scoped_session(sessionmaker( autocommit=False, autoflush=False, bind=self.engine)) Base.query = self.session.query_property() create_schema = False if engine_url != 'sqlite:///:memory:': # The existing schema is outdated, re-create it. schema_version = self._getSchemaVersion() if schema_version < self.schema_version: logger.debug( "SQL database is outdated (got version %s), will re-create.", schema_version) create_schema = True else: logger.debug( "SQL database has up-to-date schema.") else: create_schema = True if create_schema: self._createSchema() def open(self): logger.debug("Opening connection") def close(self): logger.debug("Closing connection") def reset(self, pages): logger.debug("Re-creating SQL database.") self._createSchema() for page in pages: self._addPage(page) self.session.commit() def update(self, pages, force=False): to_update = set() already_added = set() to_remove = [] pages = list(pages) logger.debug("Updating SQL database...") page_urls = [p.url for p in pages] db_pages = self.session.query(SQLPage).\ all() for p in db_pages: if not os.path.isfile(p.path): # File was deleted. to_remove.append(p) else: already_added.add(p.path) path_time = datetime.datetime.fromtimestamp( os.path.getmtime(p.path)) if path_time > p.time or (force and p.url in page_urls): # File has changed since last index. to_remove.append(p) to_update.add(p.path) for p in to_remove: self._removePage(p) self.session.commit() added_db_objs = [] for p in pages: if (p.path in to_update or p.path not in already_added): added_db_objs.append(self._addPage(p)) self.session.commit() if to_remove or added_db_objs: # If pages have been added/removed/updated, invalidate everything # in the wiki that has includes or queries. db_pages = self.session.query(SQLPage).\ options( defer(SQLPage.title), defer(SQLPage.raw_text), defer(SQLPage.formatted_text), defer(SQLPage.ready_text)).\ join(SQLReadyMeta).\ filter(or_(SQLReadyMeta.name == 'include', SQLReadyMeta.name == 'query')).\ all() for p in db_pages: p.is_ready = False self.session.commit() logger.debug("...done updating SQL database.") return [o.id for o in added_db_objs] def getPageUrls(self, subdir=None): q = self.session.query(SQLPage.url) if subdir: subdir = string.rstrip(subdir, '/') + '/%' q = q.filter(SQLPage.url.like(subdir)) urls = [] for p in q.all(): urls.append(p.url) return urls def getPages(self, subdir=None, meta_query=None): q = self.session.query(SQLPage) if meta_query: q = q.join(SQLReadyMeta) for name, values in meta_query.iteritems(): for v in values: q = q.filter(and_(SQLReadyMeta.name == name, SQLReadyMeta.value == v)) if subdir: subdir = string.rstrip(subdir, '/') + '/%' q = q.filter(SQLPage.url.like(subdir)) for p in q.all(): yield SQLDatabasePage(self.wiki, db_obj=p) def pageExists(self, url=None, path=None): # TODO: replace with an `EXIST` query. return self.getPage(url, path, raise_if_none=False) is not None def getLinksTo(self, url): q = self.session.query(SQLReadyLink).\ filter(SQLReadyLink.target_url == url).\ join(SQLReadyLink.source).\ all() for l in q: yield l.source.url def getUncachedPages(self): q = self.session.query(SQLPage).\ filter(SQLPage.is_ready == False).\ all() for p in q: yield SQLDatabasePage(self.wiki, db_obj=p) def _getPageByUrl(self, url): q = self.session.query(SQLPage).filter_by(url=url) page = q.first() if page is None: return None return SQLDatabasePage(self.wiki, db_obj=page) def _getPageByPath(self, path): q = self.session.query(SQLPage).filter_by(path=path) page = q.first() if page is None: return None return SQLDatabasePage(self.wiki, db_obj=page) def _createSchema(self): Base.metadata.drop_all(self.engine) Base.metadata.create_all(self.engine) ver = SQLInfo() ver.name = 'schema_version' ver.int_value = self.schema_version self.session.add(ver) self.session.commit() def _getSchemaVersion(self): try: q = self.session.query(SQLInfo).\ filter(SQLInfo.name == 'schema_version').\ first() if q is None: return 0 except: return -1 return q.int_value def _addPage(self, page): logger.debug("Adding page '%s' to SQL database." % page.url) po = SQLPage() po.time = datetime.datetime.now() po.url = page.url po.path = page.path po.title = page.title po.raw_text = page.raw_text po.formatted_text = page.getFormattedText() po.ready_text = None po.is_ready = False for name, value in page.getLocalMeta().iteritems(): if isinstance(value, bool): value = "" if isinstance(value, types.StringTypes): po.meta.append(SQLMeta(name, value)) else: for v in value: po.meta.append(SQLMeta(name, v)) for link_url in page.getLocalLinks(): po.links.append(SQLLink(link_url)) self.session.add(po) return po def _cacheExtendedData(self, page): logger.info("Caching extended data for page '%s' [%d]." % (page.url, page._id)) if not hasattr(page, '_id') or not page._id: raise Exception("Given page '%s' has no `_id` attribute set." % page.url) db_obj = self.session.query(SQLPage).filter(SQLPage.id == page._id).one() db_obj.ready_text = page._data.text del db_obj.ready_meta[:] for name, value in page._data.ext_meta.iteritems(): if isinstance(value, bool): value = "" if isinstance(value, types.StringTypes): db_obj.ready_meta.append(SQLReadyMeta(name, value)) else: for v in value: db_obj.ready_meta.append(SQLReadyMeta(name, v)) del db_obj.ready_links[:] for link_url in page._data.ext_links: db_obj.ready_links.append(SQLReadyLink(link_url)) db_obj.is_ready = True self.session.commit() def _removePage(self, page): logger.debug("Removing page '%s' [%d] from SQL database." % (page.url, page.id)) self.session.delete(page) class SQLDatabasePage(Page): """ A page that can load its properties from a database. """ def __init__(self, wiki, url=None, db_obj=None): if url and db_obj: raise Exception("You can't specify both an url and a database object.") if not url and not db_obj: raise Exception("You need to specify either a url or a database object.") super(SQLDatabasePage, self).__init__(wiki, url or db_obj.url) self.auto_update = wiki.config.get('wiki', 'auto_update') self._db_obj = db_obj @property def path(self): if self._db_obj: return self._db_obj.path return super(SQLDatabasePage, self).path @property def _id(self): if self._db_obj: return self._db_obj.id self._ensureData() return self._data._db_id def _loadData(self): try: db_obj = self._db_obj or self.wiki.db.getPage(self.url) except PageNotFoundError: raise PageNotFoundError(self.url, "Please run `update` or `reset`.") data = self._loadFromDbObject(db_obj) self._db_obj = None return data def _onExtendedDataLoaded(self): self.wiki.db._cacheExtendedData(self) def _loadFromDbObject(self, db_obj, bypass_auto_update=False): if not bypass_auto_update and self.auto_update: path_time = datetime.datetime.fromtimestamp( os.path.getmtime(db_obj.path)) if path_time >= db_obj.time: logger.debug( "Updating database cache for page '%s'." % self.url) try: fs_page = FileSystemPage(self.wiki, self.url) fs_page._ensureData() added_ids = self.wiki.db.update([fs_page]) fs_page._data._db_id = added_ids[0] return fs_page._data except Exception as e: msg = "Error updating database cache from the file-system: %s" % e raise PageLoadingError(msg, e) data = PageData() data._db_id = db_obj.id data.path = db_obj.path split = os.path.splitext(data.path) data.filename = split[0] data.extension = split[1].lstrip('.') data.title = db_obj.title data.raw_text = db_obj.raw_text data.formatted_text = db_obj.formatted_text data.local_meta = {} for m in db_obj.meta: value = data.local_meta.get(m.name) if m.name in SINGLE_METAS: data.local_meta[m.name] = m.value else: if value is None: data.local_meta[m.name] = [m.value] else: data.local_meta[m.name].append(m.value) data.local_links = [l.target_url for l in db_obj.links] if db_obj.is_ready and not self._force_resolve: # If we have extended cache data from the database, we might as # well load it now too. data.text = db_obj.ready_text for m in db_obj.ready_meta: value = data.ext_meta.get(m.name) if value is None: data.ext_meta[m.name] = [m.value] else: data.ext_meta[m.name].append(m.value) data.ext_links = [l.target_url for l in db_obj.ready_links] # Flag this data as completely loaded. data.has_extended_data = True return data