Mercurial > piecrust2
view piecrust/dataproviders/pageiterator.py @ 1188:a7c43131d871
bake: Fix file write flushing problem with Python 3.8+
Writing the cache files fails in Python 3.8 because it looks like flushing
behaviour has changed. We need to explicitly flush. And even then, in very
rare occurrences, it looks like it can still run into racing conditions,
so we do a very hacky and ugly "retry" loop when fetching cached data :(
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 15 Jun 2021 22:36:23 -0700 |
parents | a9a592f655e3 |
children |
line wrap: on
line source
import logging from piecrust.data.filters import PaginationFilter from piecrust.data.paginationdata import PaginationData from piecrust.events import Event from piecrust.dataproviders.base import DataProvider from piecrust.sources.base import ContentSource logger = logging.getLogger(__name__) class _CombinedSource: def __init__(self, sources): self.sources = sources self.app = sources[0].app self.name = None # This is for recursive traversal of the iterator chain. # See later in `PageIterator`. self.it = None def __iter__(self): sources = self.sources if len(sources) == 1: source = sources[0] self.name = source.name yield from source.getAllPages() self.name = None return # Return the pages from all the combined sources, but skip # those that are "overridden" -- e.g. a theme page that gets # replaced by a user page of the same name. used_uris = set() for source in sources: self.name = source.name for page in source.getAllPages(): page_uri = page.getUri() if page_uri not in used_uris: used_uris.add(page_uri) yield page self.name = None class PageIteratorDataProvider(DataProvider): """ A data provider that reads a content source as a list of pages. This class supports wrapping another `PageIteratorDataProvider` instance because several sources may want to be merged under the same data endpoint (e.g. `site.pages` which lists both the user pages and the theme pages). """ PROVIDER_NAME = 'page_iterator' debug_render_doc_dynamic = ['_debugRenderDoc'] debug_render_not_empty = True def __init__(self, source, page): super().__init__(source, page) self._app = source.app self._it = None self._iterated = False def __len__(self): self._load() return len(self._it) def __iter__(self): self._load() yield from self._it def _load(self): if self._it is not None: return combined_source = _CombinedSource(list(reversed(self._sources))) self._it = PageIterator(combined_source, current_page=self._page) self._it._load_event += self._onIteration def _onIteration(self, it): if not self._iterated: rcs = self._app.env.render_ctx_stack if rcs.current_ctx is not None: rcs.current_ctx.addUsedSource(it._source) self._iterated = True def _addSource(self, source): if self._it is not None: raise Exception("Can't add sources after the data provider " "has been loaded.") super()._addSource(source) def _debugRenderDoc(self): return 'Provides a list of %d items' % len(self) class PageIterator: def __init__(self, source, *, current_page=None): self._source = source self._is_content_source = isinstance( source, (ContentSource, _CombinedSource)) self._cache = None self._pagination_slicer = None self._has_sorter = False self._next_page = None self._prev_page = None self._locked = False self._load_event = Event() self._iter_event = Event() self._current_page = current_page self._initIterator() @property def total_count(self): self._load() if self._pagination_slicer is not None: return self._pagination_slicer.inner_count return len(self._cache) @property def next_page(self): self._load() return self._next_page @property def prev_page(self): self._load() return self._prev_page def __len__(self): self._load() return len(self._cache) def __getitem__(self, key): self._load() return self._cache[key] def __iter__(self): self._load() self._iter_event.fire(self) return iter(self._cache) def __getattr__(self, name): if name[:3] == 'is_' or name[:3] == 'in_': def is_filter(value): conf = {'is_%s' % name[3:]: value} return self._simpleNonSortedWrap(SettingFilterIterator, conf) return is_filter if name[:4] == 'has_': def has_filter(value): conf = {name: value} return self._simpleNonSortedWrap(SettingFilterIterator, conf) return has_filter if name[:5] == 'with_': def has_filter(value): conf = {'has_%s' % name[5:]: value} return self._simpleNonSortedWrap(SettingFilterIterator, conf) return has_filter return self.__getattribute__(name) def skip(self, count): return self._simpleWrap(SliceIterator, count) def limit(self, count): return self._simpleWrap(SliceIterator, 0, count) def slice(self, skip, limit): return self._simpleWrap(SliceIterator, skip, limit) def filter(self, filter_name): if self._current_page is None: raise Exception("Can't use `filter()` because no parent page was " "set for this page iterator.") filter_conf = self._current_page.config.get(filter_name) if filter_conf is None: raise Exception("Couldn't find filter '%s' in the configuration " "header for page: %s" % (filter_name, self._current_page.path)) return self._simpleNonSortedWrap(SettingFilterIterator, filter_conf) def sort(self, setting_name=None, reverse=False): if setting_name: self._wrapAsSort(SettingSortIterator, setting_name, reverse) else: self._wrapAsSort(NaturalSortIterator, reverse) return self def reset(self): self._ensureUnlocked() self._unload() return self @property def _is_loaded(self): return self._cache is not None @property def _has_more(self): self._load() if self._pagination_slicer: return self._pagination_slicer.has_more return False def _simpleWrap(self, it_class, *args, **kwargs): self._ensureUnlocked() self._ensureUnloaded() self._ensureSorter() self._it = it_class(self._it, *args, **kwargs) if self._pagination_slicer is None and it_class is SliceIterator: self._pagination_slicer = self._it self._pagination_slicer.current_page = self._current_page return self def _simpleNonSortedWrap(self, it_class, *args, **kwargs): self._ensureUnlocked() self._ensureUnloaded() self._it = it_class(self._it, *args, **kwargs) return self def _wrapAsSort(self, sort_it_class, *args, **kwargs): self._ensureUnlocked() self._ensureUnloaded() self._it = sort_it_class(self._it, *args, **kwargs) self._has_sorter = True return self def _lockIterator(self): self._ensureUnlocked() self._locked = True def _ensureUnlocked(self): if self._locked: raise Exception( "This page iterator has been locked and can't be modified.") def _ensureUnloaded(self): if self._cache: raise Exception( "This page iterator has already been iterated upon and " "can't be modified anymore.") def _ensureSorter(self): if self._has_sorter: return if self._is_content_source: # For content sources, the default sorting is reverse # date/time sorting. self._it = DateSortIterator(self._it, reverse=True) self._has_sorter = True def _initIterator(self): if self._is_content_source: if isinstance(self._source, _CombinedSource): self._it = self._source else: self._it = PageContentSourceIterator(self._source) app = self._source.app if app.config.get('baker/is_baking'): # While baking, automatically exclude any page with # the `draft` setting. draft_setting = app.config['baker/no_bake_setting'] self._it = NoDraftsIterator(self._it, draft_setting) if not app.config.get('baker/bake_future'): # Don't bake pages from the future. self._it = PruneFutureIterator(self._it, app.env.start_datetime) elif app.config.get('server/is_serving'): if not app.config.get('server/serve_future'): # Don't serve pages from the future. self._it = PruneFutureIterator(self._it, app.env.start_datetime) else: self._it = GenericSourceIterator(self._source) def _unload(self): self._initIterator() self._cache = None self._paginationSlicer = None self._has_sorter = False self._next_page = None self._prev_page = None def _load(self): if self._cache is not None: return self._ensureSorter() if self._is_content_source: self._it = PaginationDataBuilderIterator(self._it) self._cache = list(self._it) if (self._current_page is not None and self._pagination_slicer is not None): pn = [self._pagination_slicer.prev_page, self._pagination_slicer.next_page] pn_it = PaginationDataBuilderIterator(iter(pn)) self._prev_page, self._next_page = (list(pn_it)) self._load_event.fire(self) def _debugRenderDoc(self): return "Contains %d items" % len(self) class SettingFilterIterator: def __init__(self, it, fil_conf): self.it = it self.fil_conf = fil_conf self._fil = None def __iter__(self): if self._fil is None: self._fil = PaginationFilter() self._fil.addClausesFromConfig(self.fil_conf) for i in self.it: if self._fil.pageMatches(i): yield i class HardCodedFilterIterator: def __init__(self, it, fil): self.it = it self._fil = fil def __iter__(self): for i in self.it: if self._fil.pageMatches(i): yield i class SliceIterator: def __init__(self, it, offset=0, limit=-1): self.it = it self.offset = offset self.limit = limit self.current_page = None self.has_more = False self.inner_count = -1 self.next_page = None self.prev_page = None self._cache = None def __iter__(self): if self._cache is None: inner_list = list(self.it) self.inner_count = len(inner_list) if self.limit > 0: self.has_more = self.inner_count > (self.offset + self.limit) self._cache = inner_list[self.offset:self.offset + self.limit] else: self.has_more = False self._cache = inner_list[self.offset:] if self.current_page: try: idx = inner_list.index(self.current_page) except ValueError: idx = -1 if idx >= 0: if idx < self.inner_count - 1: self.next_page = inner_list[idx + 1] if idx > 0: self.prev_page = inner_list[idx - 1] return iter(self._cache) class NaturalSortIterator: def __init__(self, it, reverse=False): self.it = it self.reverse = reverse def __iter__(self): return iter(sorted(self.it, reverse=self.reverse)) class SettingSortIterator: def __init__(self, it, name, reverse=False): self.it = it self.name = name self.reverse = reverse def __iter__(self): return iter(sorted(self.it, key=self._key_getter, reverse=self.reverse)) def _key_getter(self, item): key = item.config.get(self.name) if key is None: return 0 return key class DateSortIterator: def __init__(self, it, reverse=True): self.it = it self.reverse = reverse def __iter__(self): return iter(sorted(self.it, key=lambda x: x.datetime, reverse=self.reverse)) class PageContentSourceIterator: def __init__(self, source): self.source = source # This is to permit recursive traversal of the # iterator chain. It acts as the end. self.it = None def __iter__(self): source = self.source yield from source.getAllPages() class NoDraftsIterator: def __init__(self, source, no_draft_setting): self.it = source self.no_draft_setting = no_draft_setting def __iter__(self): nds = self.no_draft_setting yield from filter(lambda i: not i.config.get(nds), self.it) class PruneFutureIterator: def __init__(self, source, now_dt): self.it = source self.now_dt = now_dt def __iter__(self): now_dt = self.now_dt for i in self.it: if i.datetime <= now_dt: yield i class PaginationDataBuilderIterator: def __init__(self, it): self.it = it def __iter__(self): for page in self.it: if page is not None: yield PaginationData(page) else: yield None class GenericSourceIterator: def __init__(self, source): self.source = source self.it = None def __iter__(self): yield from self.source