changeset 853:f070a4fc033c

core: Continue PieCrust3 refactor, simplify pages. The asset pipeline is still the only function pipeline at this point. * No more `QualifiedPage`, and several other pieces of code deleted. * Data providers are simpler and more focused. For instance, the page iterator doesn't try to support other types of items. * Route parameters are proper known source metadata to remove the confusion between the two. * Make the baker and pipeline more correctly manage records and record histories. * Add support for record collapsing and deleting stale outputs in the asset pipeline.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 21 May 2017 00:06:59 -0700
parents 4850f8c21b6e
children 08e02c2a2a1a
files piecrust/app.py piecrust/appconfig.py piecrust/baking/baker.py piecrust/baking/worker.py piecrust/commands/builtin/baking.py piecrust/configuration.py piecrust/data/assetor.py piecrust/data/builder.py piecrust/data/iterators.py piecrust/data/linker.py piecrust/data/paginationdata.py piecrust/data/paginator.py piecrust/data/provider.py piecrust/dataproviders/__init__.py piecrust/dataproviders/asset_iterator.py piecrust/dataproviders/base.py piecrust/dataproviders/blog.py piecrust/dataproviders/page_iterator.py piecrust/events.py piecrust/page.py piecrust/pipelines/_procrecords.py piecrust/pipelines/asset.py piecrust/pipelines/base.py piecrust/pipelines/records.py piecrust/processing/compressors.py piecrust/processing/sitemap.py piecrust/rendering.py piecrust/serving/middlewares.py piecrust/serving/server.py piecrust/serving/util.py piecrust/sources/autoconfig.py piecrust/sources/base.py piecrust/sources/blogarchives.py piecrust/sources/default.py piecrust/sources/fs.py piecrust/sources/interfaces.py piecrust/sources/mixins.py piecrust/sources/posts.py piecrust/sources/prose.py
diffstat 37 files changed, 1251 insertions(+), 1361 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/app.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/app.py	Sun May 21 00:06:59 2017 -0700
@@ -34,18 +34,20 @@
         else:
             self.cache = NullExtensibleCache()
 
+        if env is None:
+            env = StandardEnvironment()
         self.env = env
-        if self.env is None:
-            self.env = StandardEnvironment()
-        self.env.initialize(self)
-        self.env.stats.registerTimer('SiteConfigLoad')
-        self.env.stats.registerTimer('PageLoad')
-        self.env.stats.registerTimer("PageDataBuild")
-        self.env.stats.registerTimer("BuildRenderData")
-        self.env.stats.registerTimer("PageRender")
-        self.env.stats.registerTimer("PageRenderSegments")
-        self.env.stats.registerTimer("PageRenderLayout")
-        self.env.stats.registerTimer("PageSerialize")
+        env.initialize(self)
+
+        stats = env.stats
+        stats.registerTimer('SiteConfigLoad')
+        stats.registerTimer('PageLoad')
+        stats.registerTimer("PageDataBuild")
+        stats.registerTimer("BuildRenderData")
+        stats.registerTimer("PageRender")
+        stats.registerTimer("PageRenderSegments")
+        stats.registerTimer("PageRenderLayout")
+        stats.registerTimer("PageSerialize")
 
     @cached_property
     def config(self):
@@ -193,19 +195,17 @@
         for source in self.sources:
             if source.name == source_name:
                 return source
-        return None
 
-    def getSourceRoutes(self, source_name):
+        from piecrust.sources.base import SourceNotFoundError
+        raise SourceNotFoundError(source_name)
+
+    def getSourceRoute(self, source_name):
         for route in self.routes:
             if route.source_name == source_name:
-                yield route
+                return route
 
-    def getSourceRoute(self, source_name, route_params):
-        for route in self.getSourceRoutes(source_name):
-            if (route_params is None or
-                    route.matchesParameters(route_params)):
-                return route
-        return None
+        from piecrust.routing import RouteNotFoundError
+        raise RouteNotFoundError(source_name)
 
     def getPublisher(self, target_name):
         for pub in self.publishers:
@@ -213,11 +213,11 @@
                 return pub
         return None
 
-    def getPage(self, content_item):
+    def getPage(self, source, content_item):
         cache_key = content_item.spec
         return self.env.page_repository.get(
             cache_key,
-            lambda: Page(content_item))
+            lambda: Page(source, content_item))
 
     def _get_dir(self, default_rel_dir):
         abs_dir = os.path.join(self.root_dir, default_rel_dir)
--- a/piecrust/appconfig.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/appconfig.py	Sun May 21 00:06:59 2017 -0700
@@ -386,7 +386,7 @@
         sc.setdefault('fs_endpoint', sn)
         sc.setdefault('ignore_missing_dir', False)
         sc.setdefault('data_endpoint', None)
-        sc.setdefault('data_type', 'iterator')
+        sc.setdefault('data_type', None)
         sc.setdefault('item_name', sn)
         sc.setdefault('items_per_page', 5)
         sc.setdefault('date_format', DEFAULT_DATE_FORMAT)
@@ -412,6 +412,8 @@
 
     # Check routes are referencing correct sources, have default
     # values, etc.
+    used_sources = set()
+    existing_sources = set(values['site']['sources'].keys())
     for rc in v:
         if not isinstance(rc, dict):
             raise ConfigurationError("All routes in 'site/routes' must be "
@@ -426,12 +428,14 @@
         r_source = rc.get('source')
         if r_source is None:
             raise ConfigurationError("Routes must specify a source.")
-        if (r_source and
-                r_source not in list(values['site']['sources'].keys())):
+        if r_source not in existing_sources:
             raise ConfigurationError("Route is referencing unknown "
                                      "source: %s" % r_source)
+        if r_source in used_sources:
+            raise ConfigurationError("Source '%s' already has a route." %
+                                     r_source)
+        used_sources.add(r_source)
 
-        rc.setdefault('pass', 0)
         rc.setdefault('page_suffix', '/%num%')
 
     return v
--- a/piecrust/baking/baker.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/baking/baker.py	Sun May 21 00:06:59 2017 -0700
@@ -16,10 +16,10 @@
 logger = logging.getLogger(__name__)
 
 
-def get_bake_records_path(app, out_dir):
+def get_bake_records_path(app, out_dir, *, suffix=''):
     records_cache = app.cache.getCache('baker')
     records_id = hashlib.md5(out_dir.encode('utf8')).hexdigest()
-    records_name = records_id + '.record'
+    records_name = '%s%s.records' % (records_id, suffix)
     return records_cache.getCachePath(records_name)
 
 
@@ -74,7 +74,7 @@
 
         # Create the bake records history which tracks what's up-to-date
         # or not since last time we baked to the given output folder.
-        record_history = MultiRecordHistory(previous_records, self._records)
+        record_histories = MultiRecordHistory(previous_records, self._records)
 
         # Pre-create all caches.
         for cache_name in ['app', 'baker', 'pages', 'renders']:
@@ -87,8 +87,6 @@
         #
         # Also, create and initialize each pipeline for each source.
         sources_by_realm = {}
-        ppctx = PipelineContext(self.out_dir, record_history,
-                                force=self.force)
         for source in self.app.sources:
             pname = source.config['pipeline']
             if pname in self.allowed_pipelines:
@@ -96,8 +94,14 @@
                     source.config['realm'], [])
 
                 pp = self._pipeline_classes[pname](source)
+
+                record_name = _get_record_name(source.name, pname)
+                record_history = record_histories.getHistory(record_name)
+                ppctx = PipelineContext(self.out_dir, record_history,
+                                        force=self.force)
                 pp.initialize(ppctx)
-                srclist.append((source, pp))
+
+                srclist.append((source, pp, ppctx))
             else:
                 logger.debug(
                     "Skip source '%s' because pipeline '%s' is ignored." %
@@ -112,7 +116,19 @@
         for realm in realm_list:
             srclist = sources_by_realm.get(realm)
             if srclist is not None:
-                self._bakeRealm(record_history, pool, realm, srclist)
+                self._bakeRealm(pool, srclist)
+
+        # Handle deletions.
+        for realm in realm_list:
+            srclist = sources_by_realm.get(realm)
+            if srclist is not None:
+                self._deleteStaleOutputs(pool, srclist)
+
+        # Collapse records.
+        for realm in realm_list:
+            srclist = sources_by_realm.get(realm)
+            if srclist is not None:
+                self._collapseRecords(srclist)
 
         # All done with the workers. Close the pool and get reports.
         pool_stats = pool.close()
@@ -120,13 +136,13 @@
         for ps in pool_stats:
             if ps is not None:
                 total_stats.mergeStats(ps)
-        record_history.current.stats = total_stats
+        record_histories.current.stats = total_stats
 
         # Shutdown the pipelines.
         for realm in realm_list:
             srclist = sources_by_realm.get(realm)
             if srclist is not None:
-                for _, pp in srclist:
+                for _, pp, ppctx in srclist:
                     pp.shutdown(ppctx)
 
         # Backup previous records.
@@ -136,28 +152,28 @@
             suffix = '' if i == 0 else '.%d' % i
             records_path_i = os.path.join(
                 records_dir,
-                '%s%s.record' % (records_id, suffix))
+                '%s%s.records' % (records_id, suffix))
             if os.path.exists(records_path_i):
                 records_path_next = os.path.join(
                     records_dir,
-                    '%s.%s.record' % (records_id, i + 1))
+                    '%s.%s.records' % (records_id, i + 1))
                 if os.path.exists(records_path_next):
                     os.remove(records_path_next)
                 os.rename(records_path_i, records_path_next)
 
-        # Save the bake record.
+        # Save the bake records.
         with format_timed_scope(logger, "saved bake records.",
                                 level=logging.DEBUG, colored=False):
-            record_history.current.bake_time = time.time()
-            record_history.current.out_dir = self.out_dir
-            record_history.current.save(records_path)
+            record_histories.current.bake_time = time.time()
+            record_histories.current.out_dir = self.out_dir
+            record_histories.current.save(records_path)
 
         # All done.
         self.app.config.set('baker/is_baking', False)
         logger.debug(format_timed(start_time, 'done baking'))
 
         self._records = None
-        return record_history.current
+        return record_histories.current
 
     def _handleCacheValidity(self, previous_records, current_records):
         start_time = time.perf_counter()
@@ -170,8 +186,8 @@
             # version of the app.
             reason = "not valid anymore"
         elif previous_records.invalidated:
-            # We have no valid previous bake record.
-            reason = "need bake record regeneration"
+            # We have no valid previous bake records.
+            reason = "need bake records regeneration"
         else:
             # Check if any template has changed since the last bake. Since
             # there could be some advanced conditional logic going on, we'd
@@ -200,8 +216,8 @@
                 start_time, "cache is assumed valid", colored=False))
             return True
 
-    def _bakeRealm(self, record_history, pool, realm, srclist):
-        for source, pp in srclist:
+    def _bakeRealm(self, pool, srclist):
+        for source, pp, ppctx in srclist:
             logger.debug("Queuing jobs for source '%s' using pipeline '%s'." %
                          (source.name, pp.PIPELINE_NAME))
             jobs = [BakeJob(source.name, item.spec, item.metadata)
@@ -209,6 +225,25 @@
             pool.queueJobs(jobs)
         pool.wait()
 
+    def _deleteStaleOutputs(self, pool, srclist):
+        for source, pp, ppctx in srclist:
+            ppctx.record_history.build()
+
+            to_delete = pp.getDeletions(ppctx)
+            if to_delete is not None:
+                for path, reason in to_delete:
+                    logger.debug("Removing '%s': %s" % (path, reason))
+                    ppctx.current_record.deleted_out_paths.append(path)
+                    try:
+                        os.remove(path)
+                    except FileNotFoundError:
+                        pass
+                    logger.info('[delete] %s' % path)
+
+    def _collapseRecords(self, srclist):
+        for source, pp, ppctx in srclist:
+            pp.collapseRecords(ppctx)
+
     def _logErrors(self, item_spec, errors):
         logger.error("Errors found in %s:" % item_spec)
         for e in errors:
@@ -237,19 +272,21 @@
         return pool
 
     def _handleWorkerResult(self, job, res):
-        record_name = self._getRecordName(job)
+        record_name = _get_record_name(job.source_name, res.pipeline_name)
         record = self._records.getRecord(record_name)
-        record.entries.append(res.record)
-        if not res.record.success:
+        record.entries.append(res.record_entry)
+        if not res.record_entry.success:
             record.success = False
             self._records.success = False
-            self._logErrors(job.item_spec, res.record.errors)
+            self._logErrors(job.item_spec, res.record_entry.errors)
 
     def _handleWorkerError(self, job, exc_data):
         e = RecordEntry()
         e.item_spec = job.item_spec
         e.errors.append(str(exc_data))
 
+        ppname = self.app.getSource(job.source_name).config['pipeline']
+        record_name = _get_record_name(job.source_name, ppname)
         record_name = self._getRecordName(job)
         record = self._records.getRecord(record_name)
         record.entries.append(e)
@@ -261,7 +298,6 @@
         if self.app.debug:
             logger.error(exc_data.traceback)
 
-    def _getRecordName(self, job):
-        sn = job.source_name
-        ppn = self.app.getSource(sn).config['pipeline']
-        return '%s@%s' % (sn, ppn)
+
+def _get_record_name(source_name, pipeline_name):
+    return '%s@%s' % (source_name, pipeline_name)
--- a/piecrust/baking/worker.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/baking/worker.py	Sun May 21 00:06:59 2017 -0700
@@ -2,7 +2,7 @@
 import logging
 from piecrust.pipelines.base import PipelineContext, PipelineResult
 from piecrust.pipelines.records import (
-    MultiRecordHistory, MultiRecord, Record, load_records)
+    MultiRecordHistory, MultiRecord, RecordEntry, load_records)
 from piecrust.sources.base import ContentItem
 from piecrust.workerpool import IWorker
 
@@ -42,17 +42,6 @@
         stats = app.env.stats
         stats.registerTimer("BakeWorker_%d_Total" % self.wid)
         stats.registerTimer("BakeWorkerInit")
-        stats.registerTimer("JobReceive")
-        stats.registerTimer('LoadJob', raise_if_registered=False)
-        stats.registerTimer('RenderFirstSubJob',
-                            raise_if_registered=False)
-        stats.registerTimer('BakeJob', raise_if_registered=False)
-
-        stats.registerCounter("SourceUseAbortions")
-
-        stats.registerManifest("LoadJobs")
-        stats.registerManifest("RenderJobs")
-        stats.registerManifest("BakeJobs")
 
         self.app = app
 
@@ -90,9 +79,12 @@
         src, pp = self._sources[job.source_name]
         item = ContentItem(job.item_spec, job.item_metadata)
 
-        record_class = pp.RECORD_CLASS or Record
-        ppres = PipelineResult(record_class())
-        ppres.record.item_spec = job.item_spec
+        entry_class = pp.RECORD_ENTRY_CLASS or RecordEntry
+        ppres = PipelineResult()
+        ppres.pipeline_name = pp.PIPELINE_NAME
+        ppres.record_entry = entry_class()
+        ppres.record_entry.item_spec = job.item_spec
+
         pp.run(item, self._ppctx, ppres)
         return ppres
 
@@ -113,26 +105,3 @@
         self.item_spec = item_spec
         self.item_metadata = item_metadata
 
-
-class JobHandler:
-    def __init__(self, ctx):
-        self.ctx = ctx
-
-    @property
-    def app(self):
-        return self.ctx.app
-
-    def handleJob(self, job):
-        raise NotImplementedError()
-
-    def shutdown(self):
-        pass
-
-
-def _get_errors(ex):
-    errors = []
-    while ex is not None:
-        errors.append(str(ex))
-        ex = ex.__cause__
-    return errors
-
--- a/piecrust/commands/builtin/baking.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/commands/builtin/baking.py	Sun May 21 00:06:59 2017 -0700
@@ -1,7 +1,6 @@
 import time
 import os.path
 import logging
-import hashlib
 import fnmatch
 import datetime
 from colorama import Fore
@@ -103,6 +102,132 @@
         return records
 
 
+class ShowRecordCommand(ChefCommand):
+    def __init__(self):
+        super(ShowRecordCommand, self).__init__()
+        self.name = 'showrecord'
+        self.description = ("Shows the bake record for a given output "
+                            "directory.")
+
+    def setupParser(self, parser, app):
+        parser.add_argument(
+            '-o', '--output',
+            help="The output directory for which to show the bake record "
+            "(defaults to `_counter`)",
+            nargs='?')
+        parser.add_argument(
+            '-i', '--in-path',
+            help="A pattern that will be used to filter the relative path "
+            "of entries to show.")
+        parser.add_argument(
+            '-t', '--out-path',
+            help="A pattern that will be used to filter the output path "
+            "of entries to show.")
+        parser.add_argument(
+            '--fails',
+            action='store_true',
+            help="Only show record entries for failures.")
+        parser.add_argument(
+            '--last',
+            type=int,
+            default=0,
+            help="Show the last Nth bake record.")
+        parser.add_argument(
+            '--html-only',
+            action='store_true',
+            help="Only show records for pages (not from the asset "
+            "pipeline).")
+        parser.add_argument(
+            '--assets-only',
+            action='store_true',
+            help="Only show records for assets (not from pages).")
+        parser.add_argument(
+            '-p', '--pipelines',
+            nargs='*',
+            help="Only show records for the given pipeline(s).")
+        parser.add_argument(
+            '--show-stats',
+            action='store_true',
+            help="Show stats from the record.")
+        parser.add_argument(
+            '--show-manifest',
+            help="Show manifest entries from the record.")
+
+    def run(self, ctx):
+        from piecrust.baking.baker import get_bake_records_path
+        from piecrust.pipelines.records import load_records
+
+        out_dir = ctx.args.output or os.path.join(ctx.app.root_dir, '_counter')
+        suffix = '' if ctx.args.last == 0 else '.%d' % ctx.args.last
+        records_path = get_bake_records_path(ctx.app, out_dir, suffix=suffix)
+        records = load_records(records_path)
+        if records.invalidated:
+            raise Exception(
+                "The bake record was saved by a previous version of "
+                "PieCrust and can't be shown.")
+
+        in_pattern = None
+        if ctx.args.in_path:
+            in_pattern = '*%s*' % ctx.args.in_path.strip('*')
+
+        out_pattern = None
+        if ctx.args.out_path:
+            out_pattern = '*%s*' % ctx.args.out.strip('*')
+
+        pipelines = ctx.args.pipelines
+        if not pipelines:
+            pipelines = [p.PIPELINE_NAME
+                         for p in ctx.app.plugin_loader.getPipelines()]
+        if ctx.args.assets_only:
+            pipelines = ['asset']
+        if ctx.args.html_only:
+            pipelines = ['page']
+
+        logger.info("Bake record for: %s" % out_dir)
+        logger.info("Status: %s" % ('SUCCESS' if records.success
+                                    else 'FAILURE'))
+        logger.info("Date/time: %s" %
+                    datetime.datetime.fromtimestamp(records.bake_time))
+        logger.info("Incremental count: %d" % records.incremental_count)
+        logger.info("Versions: %s/%s" % (records._app_version,
+                                         records._record_version))
+        logger.info("")
+
+        for rec in records.records:
+            if ctx.args.fails and rec.success:
+                continue
+
+            logger.info("Record: %s" % rec.name)
+            logger.info("Status: %s" % ('SUCCESS' if rec.success
+                                        else 'FAILURE'))
+            for e in rec.entries:
+                if ctx.args.fails and e.success:
+                    continue
+                if in_pattern and not fnmatch.fnmatch(e.item_spec, in_pattern):
+                    continue
+                if out_pattern and not any(
+                        [fnmatch.fnmatch(op, out_pattern)
+                         for op in e.out_paths]):
+                    continue
+                _print_record_entry(e)
+
+            logger.info("")
+
+        stats = records.stats
+        if ctx.args.show_stats:
+            _show_stats(stats)
+
+        if ctx.args.show_manifest:
+            for name in sorted(stats.manifests.keys()):
+                if ctx.args.show_manifest.lower() in name.lower():
+                    val = stats.manifests[name]
+                    logger.info(
+                        "    [%s%s%s] [%d entries]" %
+                        (Fore.CYAN, name, Fore.RESET, len(val)))
+                    for v in val:
+                        logger.info("      - %s" % v)
+
+
 def _show_stats(stats, *, full=False):
     indent = '    '
 
@@ -132,275 +257,20 @@
                 logger.info("%s  - %s" % (indent, v))
 
 
-class ShowRecordCommand(ChefCommand):
-    def __init__(self):
-        super(ShowRecordCommand, self).__init__()
-        self.name = 'showrecord'
-        self.description = ("Shows the bake record for a given output "
-                            "directory.")
-
-    def setupParser(self, parser, app):
-        parser.add_argument(
-            '-o', '--output',
-            help="The output directory for which to show the bake record "
-            "(defaults to `_counter`)",
-            nargs='?')
-        parser.add_argument(
-            '-p', '--path',
-            help="A pattern that will be used to filter the relative path "
-            "of entries to show.")
-        parser.add_argument(
-            '-t', '--out',
-            help="A pattern that will be used to filter the output path "
-            "of entries to show.")
-        parser.add_argument(
-            '--last',
-            type=int,
-            default=0,
-            help="Show the last Nth bake record.")
-        parser.add_argument(
-            '--html-only',
-            action='store_true',
-            help="Only show records for pages (not from the asset "
-            "pipeline).")
-        parser.add_argument(
-            '--assets-only',
-            action='store_true',
-            help="Only show records for assets (not from pages).")
-        parser.add_argument(
-            '--show-stats',
-            action='store_true',
-            help="Show stats from the record.")
-        parser.add_argument(
-            '--show-manifest',
-            help="Show manifest entries from the record.")
-
-    def run(self, ctx):
-        from piecrust.processing.records import (
-            FLAG_PREPARED, FLAG_PROCESSED, FLAG_BYPASSED_STRUCTURED_PROCESSING,
-            FLAG_COLLAPSED_FROM_LAST_RUN)
-        from piecrust.rendering import (
-            PASS_FORMATTING, PASS_RENDERING)
-
-        out_dir = ctx.args.output or os.path.join(ctx.app.root_dir, '_counter')
-        record_id = hashlib.md5(out_dir.encode('utf8')).hexdigest()
-        suffix = '' if ctx.args.last == 0 else '.%d' % ctx.args.last
-        record_name = '%s%s.record' % (record_id, suffix)
-
-        pattern = None
-        if ctx.args.path:
-            pattern = '*%s*' % ctx.args.path.strip('*')
-
-        out_pattern = None
-        if ctx.args.out:
-            out_pattern = '*%s*' % ctx.args.out.strip('*')
-
-        if not ctx.args.show_stats and not ctx.args.show_manifest:
-            if not ctx.args.assets_only:
-                self._showBakeRecord(
-                    ctx, record_name, pattern, out_pattern)
-            if not ctx.args.html_only:
-                self._showProcessingRecord(
-                    ctx, record_name, pattern, out_pattern)
-            return
-
-        stats = {}
-        bake_rec = self._getBakeRecord(ctx, record_name)
-        if bake_rec:
-            _merge_stats(bake_rec.stats, stats)
-        proc_rec = self._getProcessingRecord(ctx, record_name)
-        if proc_rec:
-            _merge_stats(proc_rec.stats, stats)
-
-        if ctx.args.show_stats:
-            _show_stats(stats, full=False)
-
-        if ctx.args.show_manifest:
-            for name in sorted(stats.keys()):
-                logger.info('%s:' % name)
-                s = stats[name]
-                for name in sorted(s.manifests.keys()):
-                    if ctx.args.show_manifest.lower() in name.lower():
-                        val = s.manifests[name]
-                        logger.info(
-                            "    [%s%s%s] [%d entries]" %
-                            (Fore.CYAN, name, Fore.RESET, len(val)))
-                        for v in val:
-                            logger.info("      - %s" % v)
-
-    def _getBakeRecord(self, ctx, record_name):
-        record_cache = ctx.app.cache.getCache('baker')
-        if not record_cache.has(record_name):
-            logger.warning(
-                    "No page bake record has been created for this output "
-                    "path.")
-            return None
-
-        record = BakeRecord.load(record_cache.getCachePath(record_name))
-        return record
-
-    def _showBakeRecord(self, ctx, record_name, pattern, out_pattern):
-        record = self._getBakeRecord(ctx, record_name)
-        if record is None:
-            return
-
-        logging.info("Bake record for: %s" % record.out_dir)
-        logging.info("From: %s" % record_name)
-        logging.info("Last baked: %s" %
-                     datetime.datetime.fromtimestamp(record.bake_time))
-        if record.success:
-            logging.info("Status: success")
-        else:
-            logging.error("Status: failed")
-        logging.info("Entries:")
-        for entry in record.entries:
-            if pattern and not fnmatch.fnmatch(entry.path, pattern):
-                continue
-            if out_pattern and not (
-                    any([o for o in entry.all_out_paths
-                         if fnmatch.fnmatch(o, out_pattern)])):
-                continue
-
-            flags = _get_flag_descriptions(
-                entry.flags,
-                {
-                    BakeRecordEntry.FLAG_NEW: 'new',
-                    BakeRecordEntry.FLAG_SOURCE_MODIFIED: 'modified',
-                    BakeRecordEntry.FLAG_OVERRIDEN: 'overriden'})
+def _print_record_entry(e):
+    logger.info(" - %s" % e.item_spec)
+    logger.info("   Outputs:")
+    if e.out_paths:
+        for op in e.out_paths:
+            logger.info("    - %s" % op)
+    else:
+        logger.info("      <none>")
 
-            logging.info(" - ")
-
-            rel_path = os.path.relpath(entry.path, ctx.app.root_dir)
-            logging.info("   path:      %s" % rel_path)
-            logging.info("   source:    %s" % entry.source_name)
-            if entry.extra_key:
-                logging.info("   extra key: %s" % entry.extra_key)
-            logging.info("   flags:     %s" % _join(flags))
-            logging.info("   config:    %s" % entry.config)
-
-            if entry.errors:
-                logging.error("   errors: %s" % entry.errors)
-
-            logging.info("   %d sub-pages:" % len(entry.subs))
-            for sub in entry.subs:
-                sub_flags = _get_flag_descriptions(
-                    sub.flags,
-                    {
-                        SubPageBakeInfo.FLAG_BAKED: 'baked',
-                        SubPageBakeInfo.FLAG_FORCED_BY_SOURCE:
-                        'forced by source',
-                        SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS:
-                        'forced by missing previous record entry',
-                        SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS:
-                        'forced by previous errors',
-                        SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED:
-                        'formatting invalidated'})
-
-                logging.info("   - ")
-                logging.info("     URL:    %s" % sub.out_uri)
-                logging.info("     path:   %s" % os.path.relpath(
-                        sub.out_path, record.out_dir))
-                logging.info("     flags:  %s" % _join(sub_flags))
-
-                pass_names = {
-                        PASS_FORMATTING: 'formatting pass',
-                        PASS_RENDERING: 'rendering pass'}
-                for p, ri in enumerate(sub.render_info):
-                    logging.info("     - %s" % pass_names[p])
-                    if not ri:
-                        logging.info("       no info")
-                        continue
-
-                    logging.info("       used sources:  %s" %
-                                 _join(ri.used_source_names))
-                    pgn_info = 'no'
-                    if ri.used_pagination:
-                        pgn_info = 'yes'
-                    if ri.pagination_has_more:
-                        pgn_info += ', has more'
-                    logging.info("       used pagination: %s", pgn_info)
-                    logging.info("       used assets: %s",
-                                 'yes' if ri.used_assets else 'no')
-                    logging.info("       other info:")
-                    for k, v in ri._custom_info.items():
-                        logging.info("       - %s: %s" % (k, v))
-
-                if sub.errors:
-                    logging.error("   errors: %s" % sub.errors)
-
-    def _getProcessingRecord(self, ctx, record_name):
-        record_cache = ctx.app.cache.getCache('proc')
-        if not record_cache.has(record_name):
-            logger.warning(
-                    "No asset processing record has been created for this "
-                    "output path.")
-            return None
+    e_desc = e.describe()
+    for k in sorted(e_desc.keys()):
+        logger.info("   %s: %s" % (k, e_desc[k]))
 
-        record = ProcessorPipelineRecord.load(
-                record_cache.getCachePath(record_name))
-        return record
-
-    def _showProcessingRecord(self, ctx, record_name, pattern, out_pattern):
-        record = self._getProcessingRecord(ctx, record_name)
-        if record is None:
-            return
-
-        logging.info("")
-        logging.info("Processing record for: %s" % record.out_dir)
-        logging.info("Last baked: %s" %
-                     datetime.datetime.fromtimestamp(record.process_time))
-        if record.success:
-            logging.info("Status: success")
-        else:
-            logging.error("Status: failed")
-        logging.info("Entries:")
-        for entry in record.entries:
-            rel_path = os.path.relpath(entry.path, ctx.app.root_dir)
-            if pattern and not fnmatch.fnmatch(rel_path, pattern):
-                continue
-            if out_pattern and not (
-                    any([o for o in entry.rel_outputs
-                         if fnmatch.fnmatch(o, out_pattern)])):
-                continue
-
-            flags = _get_flag_descriptions(
-                    entry.flags,
-                    {
-                        FLAG_PREPARED: 'prepared',
-                        FLAG_PROCESSED: 'processed',
-                        FLAG_BYPASSED_STRUCTURED_PROCESSING: 'external',
-                        FLAG_COLLAPSED_FROM_LAST_RUN: 'from last run'})
-
-            logger.info(" - ")
-            logger.info("   path:      %s" % rel_path)
-            logger.info("   out paths: %s" % entry.rel_outputs)
-            logger.info("   flags:     %s" % _join(flags))
-            logger.info("   proc tree: %s" % _format_proc_tree(
-                    entry.proc_tree, 14*' '))
-
-            if entry.errors:
-                logger.error("   errors: %s" % entry.errors)
-
-
-def _join(items, sep=', ', text_if_none='none'):
-    if items:
-        return sep.join(items)
-    return text_if_none
-
-
-def _get_flag_descriptions(flags, descriptions):
-    res = []
-    for k, v in descriptions.items():
-        if flags & k:
-            res.append(v)
-    return res
-
-
-def _format_proc_tree(tree, margin='', level=0):
-    name, children = tree
-    res = '%s%s+ %s\n' % (margin if level > 0 else '', level * '  ', name)
-    if children:
-        for c in children:
-            res += _format_proc_tree(c, margin, level + 1)
-    return res
-
+    if e.errors:
+        logger.error("   Errors:")
+        for err in e.errors:
+            logger.error("    - %s" % err)
--- a/piecrust/configuration.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/configuration.py	Sun May 21 00:06:59 2017 -0700
@@ -15,6 +15,13 @@
 default_allowed_types = (dict, list, tuple, float, int, bool, str)
 
 
+MERGE_NEW_VALUES = 0
+MERGE_OVERWRITE_VALUES = 1
+MERGE_PREPEND_LISTS = 2
+MERGE_APPEND_LISTS = 4
+MERGE_ALL = MERGE_OVERWRITE_VALUES | MERGE_PREPEND_LISTS
+
+
 class ConfigurationError(Exception):
     pass
 
@@ -64,7 +71,7 @@
         self._ensureLoaded()
         return self._values
 
-    def merge(self, other):
+    def merge(self, other, mode=MERGE_ALL):
         self._ensureLoaded()
 
         if isinstance(other, dict):
@@ -162,13 +169,6 @@
             cur = cur[b]
 
 
-MERGE_NEW_VALUES = 0
-MERGE_OVERWRITE_VALUES = 1
-MERGE_PREPEND_LISTS = 2
-MERGE_APPEND_LISTS = 4
-MERGE_ALL = MERGE_OVERWRITE_VALUES | MERGE_PREPEND_LISTS
-
-
 def merge_dicts(source, merging, *args,
                 validator=None, mode=MERGE_ALL):
     _recurse_merge_dicts(source, merging, None, validator, mode)
--- a/piecrust/data/assetor.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/data/assetor.py	Sun May 21 00:06:59 2017 -0700
@@ -33,15 +33,14 @@
     return base_url.rstrip('/') + '/'
 
 
-class Assetor(object):
+class Assetor:
     debug_render_doc = """Helps render URLs to files in the current page's
                           asset folder."""
     debug_render = []
     debug_render_dynamic = ['_debugRenderAssetNames']
 
-    def __init__(self, page, uri):
+    def __init__(self, page):
         self._page = page
-        self._uri = uri
         self._cache = None
 
     def __getattr__(self, name):
--- a/piecrust/data/builder.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/data/builder.py	Sun May 21 00:06:59 2017 -0700
@@ -1,6 +1,7 @@
 import logging
+from piecrust.data.assetor import Assetor
 from piecrust.data.base import MergedMapping
-from piecrust.data.linker import PageLinkerData
+# from piecrust.data.linker import PageLinkerData
 from piecrust.data.pagedata import PageData
 from piecrust.data.paginator import Paginator
 from piecrust.data.piecrustdata import PieCrustData
@@ -11,32 +12,33 @@
 logger = logging.getLogger(__name__)
 
 
-class DataBuildingContext(object):
-    def __init__(self, qualified_page):
-        self.qualified_page = qualified_page
+class DataBuildingContext:
+    def __init__(self, page, sub_num):
+        self.page = page
+        self.sub_num = sub_num
         self.pagination_source = None
         self.pagination_filter = None
 
 
 def build_page_data(ctx):
-    qpage = ctx.qualified_page
-    page = qpage.page
+    page = ctx.page
+    sub_num = ctx.sub_num
     app = page.app
+
     pgn_source = ctx.pagination_source or get_default_pagination_source(page)
-    first_uri = ctx.page.getUri(1)
 
     pc_data = PieCrustData()
     config_data = PageData(page, ctx)
-    paginator = Paginator(qpage, pgn_source,
+    paginator = Paginator(pgn_source, page, sub_num,
                           pgn_filter=ctx.pagination_filter)
-    assetor = page.source.buildAssetor(page, first_uri)
-    linker = PageLinkerData(page.source, page.rel_path)
+    assetor = Assetor(page)
+    # linker = PageLinkerData(page.source, page.rel_path)
     data = {
         'piecrust': pc_data,
         'page': config_data,
         'assets': assetor,
         'pagination': paginator,
-        'family': linker
+        # 'family': linker
     }
 
     for route in app.routes:
--- a/piecrust/data/iterators.py	Wed May 17 00:11:48 2017 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,338 +0,0 @@
-import logging
-from piecrust.data.filters import PaginationFilter
-from piecrust.events import Event
-from piecrust.sources.base import ContentSource, AbortedSourceUseError
-from piecrust.sources.interfaces import IPaginationSource
-
-
-logger = logging.getLogger(__name__)
-
-
-class SliceIterator(object):
-    def __init__(self, it, offset=0, limit=-1):
-        self.it = it
-        self.offset = offset
-        self.limit = limit
-        self.current_page = None
-        self.has_more = False
-        self.inner_count = -1
-        self.next_page = None
-        self.prev_page = None
-        self._cache = None
-
-    def __iter__(self):
-        if self._cache is None:
-            inner_list = list(self.it)
-            self.inner_count = len(inner_list)
-
-            if self.limit > 0:
-                self.has_more = self.inner_count > (self.offset + self.limit)
-                self._cache = inner_list[self.offset:self.offset + self.limit]
-            else:
-                self.has_more = False
-                self._cache = inner_list[self.offset:]
-
-            if self.current_page:
-                try:
-                    idx = inner_list.index(self.current_page)
-                except ValueError:
-                    idx = -1
-                if idx >= 0:
-                    if idx < self.inner_count - 1:
-                        self.next_page = inner_list[idx + 1]
-                    if idx > 0:
-                        self.prev_page = inner_list[idx - 1]
-
-        return iter(self._cache)
-
-
-class SettingFilterIterator(object):
-    def __init__(self, it, fil_conf, setting_accessor=None):
-        self.it = it
-        self.fil_conf = fil_conf
-        self._fil = None
-        self.setting_accessor = setting_accessor
-
-    def __iter__(self):
-        if self._fil is None:
-            self._fil = PaginationFilter(value_accessor=self.setting_accessor)
-            self._fil.addClausesFromConfig(self.fil_conf)
-
-        for i in self.it:
-            if self._fil.pageMatches(i):
-                yield i
-
-
-class NaturalSortIterator(object):
-    def __init__(self, it, reverse=False):
-        self.it = it
-        self.reverse = reverse
-
-    def __iter__(self):
-        return iter(sorted(self.it, reverse=self.reverse))
-
-
-class SettingSortIterator(object):
-    def __init__(self, it, name, reverse=False, value_accessor=None):
-        self.it = it
-        self.name = name
-        self.reverse = reverse
-        self.value_accessor = value_accessor or self._default_value_accessor
-
-    def __iter__(self):
-        return iter(sorted(self.it, key=self._key_getter,
-                           reverse=self.reverse))
-
-    def _key_getter(self, item):
-        key = self.value_accessor(item, self.name)
-        if key is None:
-            return 0
-        return key
-
-    @staticmethod
-    def _default_value_accessor(item, name):
-        try:
-            return getattr(item, name)
-        except AttributeError:
-            return None
-
-
-class PaginationFilterIterator(object):
-    def __init__(self, it, fil):
-        self.it = it
-        self._fil = fil
-
-    def __iter__(self):
-        for page in self.it:
-            if self._fil.pageMatches(page):
-                yield page
-
-
-class GenericSortIterator(object):
-    def __init__(self, it, sorter):
-        self.it = it
-        self.sorter = sorter
-        self._sorted_it = None
-
-    def __iter__(self):
-        if self._sorted_it is None:
-            self._sorted_it = self.sorter(self.it)
-        return iter(self._sorted_it)
-
-
-class PageIterator(object):
-    debug_render = []
-    debug_render_doc_dynamic = ['_debugRenderDoc']
-    debug_render_not_empty = True
-
-    def __init__(self, source, *,
-                 current_page=None,
-                 pagination_filter=None, sorter=None,
-                 offset=0, limit=-1, locked=False):
-        self._source = source
-        self._current_page = current_page
-        self._locked = False
-        self._pages = source
-        self._pagesData = None
-        self._pagination_slicer = None
-        self._has_sorter = False
-        self._next_page = None
-        self._prev_page = None
-        self._iter_event = Event()
-
-        if isinstance(source, IPaginationSource):
-            src_it = source.getSourceIterator()
-            if src_it is not None:
-                self._pages = src_it
-
-        # Apply any filter first, before we start sorting or slicing.
-        if pagination_filter is not None:
-            self._simpleNonSortedWrap(PaginationFilterIterator,
-                                      pagination_filter)
-
-        if sorter is not None:
-            self._simpleNonSortedWrap(GenericSortIterator, sorter)
-            self._has_sorter = True
-
-        if offset > 0 or limit > 0:
-            self.slice(offset, limit)
-
-        self._locked = locked
-
-    @property
-    def total_count(self):
-        self._load()
-        if self._pagination_slicer is not None:
-            return self._pagination_slicer.inner_count
-        return len(self._pagesData)
-
-    @property
-    def next_page(self):
-        self._load()
-        return self._next_page
-
-    @property
-    def prev_page(self):
-        self._load()
-        return self._prev_page
-
-    def __len__(self):
-        self._load()
-        return len(self._pagesData)
-
-    def __getitem__(self, key):
-        self._load()
-        return self._pagesData[key]
-
-    def __iter__(self):
-        self._load()
-        self._iter_event.fire()
-        return iter(self._pagesData)
-
-    def __getattr__(self, name):
-        if name[:3] == 'is_' or name[:3] == 'in_':
-            def is_filter(value):
-                conf = {'is_%s' % name[3:]: value}
-                accessor = self._getSettingAccessor()
-                return self._simpleNonSortedWrap(SettingFilterIterator, conf,
-                                                 accessor)
-            return is_filter
-
-        if name[:4] == 'has_':
-            def has_filter(value):
-                conf = {name: value}
-                accessor = self._getSettingAccessor()
-                return self._simpleNonSortedWrap(SettingFilterIterator, conf,
-                                                 accessor)
-            return has_filter
-
-        if name[:5] == 'with_':
-            def has_filter(value):
-                conf = {'has_%s' % name[5:]: value}
-                accessor = self._getSettingAccessor()
-                return self._simpleNonSortedWrap(SettingFilterIterator, conf,
-                                                 accessor)
-            return has_filter
-
-        return self.__getattribute__(name)
-
-    def skip(self, count):
-        return self._simpleWrap(SliceIterator, count)
-
-    def limit(self, count):
-        return self._simpleWrap(SliceIterator, 0, count)
-
-    def slice(self, skip, limit):
-        return self._simpleWrap(SliceIterator, skip, limit)
-
-    def filter(self, filter_name):
-        if self._current_page is None:
-            raise Exception("Can't use `filter()` because no parent page was "
-                            "set for this page iterator.")
-        filter_conf = self._current_page.config.get(filter_name)
-        if filter_conf is None:
-            raise Exception("Couldn't find filter '%s' in the configuration "
-                            "header for page: %s" %
-                            (filter_name, self._current_page.path))
-        accessor = self._getSettingAccessor()
-        return self._simpleNonSortedWrap(SettingFilterIterator, filter_conf,
-                                         accessor)
-
-    def sort(self, setting_name=None, reverse=False):
-        self._ensureUnlocked()
-        self._unload()
-        if setting_name is not None:
-            accessor = self._getSettingAccessor()
-            self._pages = SettingSortIterator(self._pages, setting_name,
-                                              reverse, accessor)
-        else:
-            self._pages = NaturalSortIterator(self._pages, reverse)
-        self._has_sorter = True
-        return self
-
-    def reset(self):
-        self._ensureUnlocked()
-        self._unload
-        return self
-
-    @property
-    def _has_more(self):
-        self._load()
-        if self._pagination_slicer:
-            return self._pagination_slicer.has_more
-        return False
-
-    def _simpleWrap(self, it_class, *args, **kwargs):
-        self._ensureUnlocked()
-        self._unload()
-        self._ensureSorter()
-        self._pages = it_class(self._pages, *args, **kwargs)
-        if self._pagination_slicer is None and it_class is SliceIterator:
-            self._pagination_slicer = self._pages
-            self._pagination_slicer.current_page = self._current_page
-        return self
-
-    def _simpleNonSortedWrap(self, it_class, *args, **kwargs):
-        self._ensureUnlocked()
-        self._unload()
-        self._pages = it_class(self._pages, *args, **kwargs)
-        return self
-
-    def _getSettingAccessor(self):
-        accessor = None
-        if isinstance(self._source, IPaginationSource):
-            accessor = self._source.getSettingAccessor()
-        return accessor
-
-    def _ensureUnlocked(self):
-        if self._locked:
-            raise Exception(
-                    "This page iterator has been locked, probably because "
-                    "you're trying to tamper with pagination data.")
-
-    def _ensureSorter(self):
-        if self._has_sorter:
-            return
-        if isinstance(self._source, IPaginationSource):
-            sort_it = self._source.getSorterIterator(self._pages)
-            if sort_it is not None:
-                self._pages = sort_it
-        self._has_sorter = True
-
-    def _unload(self):
-        self._pagesData = None
-        self._next_page = None
-        self._prev_page = None
-
-    def _load(self):
-        if self._pagesData is not None:
-            return
-
-        if (self._current_page is not None and
-                self._current_page.app.env.abort_source_use and
-                isinstance(self._source, ContentSource)):
-            logger.debug("Aborting iteration from %s." %
-                         self._current_page.ref_spec)
-            raise AbortedSourceUseError()
-
-        self._ensureSorter()
-
-        it_chain = self._pages
-        is_pgn_source = False
-        if isinstance(self._source, IPaginationSource):
-            is_pgn_source = True
-            tail_it = self._source.getTailIterator(self._pages)
-            if tail_it is not None:
-                it_chain = tail_it
-
-        self._pagesData = list(it_chain)
-
-        if is_pgn_source and self._current_page and self._pagination_slicer:
-            pn = [self._pagination_slicer.prev_page,
-                    self._pagination_slicer.next_page]
-            pn_it = self._source.getTailIterator(iter(pn))
-            self._prev_page, self._next_page = (list(pn_it))
-
-    def _debugRenderDoc(self):
-        return "Contains %d items" % len(self)
-
--- a/piecrust/data/linker.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/data/linker.py	Sun May 21 00:06:59 2017 -0700
@@ -1,9 +1,8 @@
 import logging
 import collections
-from piecrust.data.iterators import PageIterator
 from piecrust.data.pagedata import LazyPageConfigLoaderHasNoValue
 from piecrust.data.paginationdata import PaginationData
-from piecrust.sources.interfaces import IPaginationSource
+from piecrust.dataproviders.page_iterator import PageIterator
 
 
 logger = logging.getLogger(__name__)
@@ -11,7 +10,7 @@
 
 class PageLinkerData(object):
     """ Entry template data to get access to related pages from a given
-    root page.
+        root page.
     """
     debug_render = ['parent', 'ancestors', 'siblings', 'children', 'root',
                     'forpath']
--- a/piecrust/data/paginationdata.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/data/paginationdata.py	Sun May 21 00:06:59 2017 -0700
@@ -7,31 +7,30 @@
 
 
 class PaginationData(LazyPageConfigData):
-    def __init__(self, qualified_page):
-        super(PaginationData, self).__init__(qualified_page.page)
-        self._qualified_page = qualified_page
+    def __init__(self, page):
+        super().__init__(page)
 
     def _load(self):
+        from piecrust.data.assetor import Assetor
         from piecrust.uriutil import split_uri
 
         page = self._page
         dt = page.datetime
-        page_url = self._qualified_page.uri
+        page_url = page.getUri()
         _, slug = split_uri(page.app, page_url)
         self._setValue('url', page_url)
         self._setValue('slug', slug)
-        self._setValue(
-            'timestamp',
-            time.mktime(page.datetime.timetuple()))
+        self._setValue('timestamp',
+                       time.mktime(page.datetime.timetuple()))
         self._setValue('datetime', {
             'year': dt.year, 'month': dt.month, 'day': dt.day,
             'hour': dt.hour, 'minute': dt.minute, 'second': dt.second})
         date_format = page.app.config.get('site/date_format')
         if date_format:
             self._setValue('date', page.datetime.strftime(date_format))
-        self._setValue('mtime', page.path_mtime)
+        self._setValue('mtime', page.content_mtime)
 
-        assetor = page.source.buildAssetor(page, page_url)
+        assetor = Assetor(page)
         self._setValue('assets', assetor)
 
         segment_names = page.config.get('segments')
@@ -50,11 +49,11 @@
         assert self is data
 
         if do_render:
-            uri = self._qualified_page.uri
+            uri = self.getUri()
             try:
                 from piecrust.rendering import (
                     RenderingContext, render_page_segments)
-                ctx = RenderingContext(self._qualified_page)
+                ctx = RenderingContext(self._page)
                 render_result = render_page_segments(ctx)
                 segs = render_result.segments
             except Exception as ex:
--- a/piecrust/data/paginator.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/data/paginator.py	Sun May 21 00:06:59 2017 -0700
@@ -1,9 +1,6 @@
 import math
 import logging
 from werkzeug.utils import cached_property
-from piecrust.data.filters import PaginationFilter, page_value_accessor
-from piecrust.data.iterators import PageIterator
-from piecrust.sources.interfaces import IPaginationSource
 
 
 logger = logging.getLogger(__name__)
@@ -23,11 +20,11 @@
         'total_item_count', 'total_page_count',
         'next_item', 'prev_item']
 
-    def __init__(self, qualified_page, source, *,
+    def __init__(self, source, current_page, sub_num, *,
                  pgn_filter=None, items_per_page=-1):
-        self._parent_page = qualified_page
-        self._page_num = qualified_page.page_num
         self._source = source
+        self._page = current_page
+        self._sub_num = sub_num
         self._iterator = None
         self._pgn_filter = pgn_filter
         self._items_per_page = items_per_page
@@ -88,12 +85,11 @@
     def items_per_page(self):
         if self._items_per_page > 0:
             return self._items_per_page
-        if self._parent_page:
-            ipp = self._parent_page.page.config.get('items_per_page')
-            if ipp is not None:
-                return ipp
-        if isinstance(self._source, IPaginationSource):
-            return self._source.getItemsPerPage()
+
+        ipp = self._page.config.get('items_per_page')
+        if ipp is not None:
+            return ipp
+
         raise Exception("No way to figure out how many items to display "
                         "per page.")
 
@@ -104,19 +100,19 @@
 
     @property
     def prev_page_number(self):
-        if self._page_num > 1:
-            return self._page_num - 1
+        if self._sub_num > 1:
+            return self._sub_num - 1
         return None
 
     @property
     def this_page_number(self):
-        return self._page_num
+        return self._sub_num
 
     @property
     def next_page_number(self):
         self._load()
         if self._iterator._has_more:
-            return self._page_num + 1
+            return self._sub_num + 1
         return None
 
     @property
@@ -128,7 +124,7 @@
 
     @property
     def this_page(self):
-        return self._getPageUri(self._page_num)
+        return self._getPageUri(self._sub_num)
 
     @property
     def next_page(self):
@@ -166,8 +162,8 @@
         if radius <= 0 or total_page_count < (2 * radius + 1):
             return list(range(1, total_page_count + 1))
 
-        first_num = self._page_num - radius
-        last_num = self._page_num + radius
+        first_num = self._sub_num - radius
+        last_num = self._sub_num + radius
         if first_num <= 0:
             last_num += 1 - first_num
             first_num = 1
@@ -185,42 +181,30 @@
         if self._iterator is not None:
             return
 
-        if self._source is None:
-            raise Exception("Can't load pagination data: no source has "
-                            "been defined.")
+        from piecrust.data.filters import PaginationFilter
+        from piecrust.dataproviders.page_iterator import PageIterator
 
-        pag_filter = self._getPaginationFilter()
-        offset = (self._page_num - 1) * self.items_per_page
-        current_page = None
-        if self._parent_page:
-            current_page = self._parent_page.page
+        pag_filter = PaginationFilter()
+        if self._pgn_filter is not None:
+            pag_filter.addClause(self._pgn_filter.root_clause)
+
         self._iterator = PageIterator(
             self._source,
-            current_page=current_page,
+            current_page=self._page,
             pagination_filter=pag_filter,
-            offset=offset, limit=self.items_per_page,
             locked=True)
         self._iterator._iter_event += self._onIteration
 
-    def _getPaginationFilter(self):
-        f = PaginationFilter(value_accessor=page_value_accessor)
-
-        if self._pgn_filter is not None:
-            f.addClause(self._pgn_filter.root_clause)
-
-        if self._parent_page and isinstance(self._source, IPaginationSource):
-            sf = self._source.getPaginationFilter(self._parent_page)
-            if sf is not None:
-                f.addClause(sf.root_clause)
-
-        return f
+        offset = (self._sub_num - 1) * self.items_per_page
+        limit = self.items_per_page
+        self._iterator.slice(offset, limit)
 
     def _getPageUri(self, index):
-        return self._parent_page.getUri(index)
+        return self._page.getUri(index)
 
     def _onIteration(self):
-        if self._parent_page is not None and not self._pgn_set_on_ctx:
-            eis = self._parent_page.app.env.exec_info_stack
+        if not self._pgn_set_on_ctx:
+            eis = self._page.app.env.exec_info_stack
             eis.current_page_info.render_ctx.setPagination(self)
             self._pgn_set_on_ctx = True
 
--- a/piecrust/data/provider.py	Wed May 17 00:11:48 2017 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,274 +0,0 @@
-import time
-import collections.abc
-from piecrust.configuration import ConfigurationError
-from piecrust.data.iterators import PageIterator
-from piecrust.generation.taxonomy import Taxonomy
-from piecrust.sources.array import ArraySource
-
-
-def get_data_provider_class(app, provider_type):
-    if not provider_type:
-        raise Exception("No data provider type specified.")
-    for prov in app.plugin_loader.getDataProviders():
-        if prov.PROVIDER_NAME == provider_type:
-            return prov
-    raise ConfigurationError(
-        "Unknown data provider type: %s" % provider_type)
-
-
-class DataProvider(object):
-    debug_render_dynamic = []
-    debug_render_invoke_dynamic = []
-
-    def __init__(self, source, page, override):
-        if source.app is not page.app:
-            raise Exception("The given source and page don't belong to "
-                            "the same application.")
-        self._source = source
-        self._page = page
-
-
-class IteratorDataProvider(DataProvider):
-    PROVIDER_NAME = 'iterator'
-
-    debug_render_doc_dynamic = ['_debugRenderDoc']
-    debug_render_not_empty = True
-
-    def __init__(self, source, page, override):
-        super(IteratorDataProvider, self).__init__(source, page, override)
-
-        self._innerIt = None
-        if isinstance(override, IteratorDataProvider):
-            # Iterator providers can be chained, like for instance with
-            # `site.pages` listing both the theme pages and the user site's
-            # pages.
-            self._innerIt = override
-
-        self._pages = PageIterator(source, current_page=page)
-        self._pages._iter_event += self._onIteration
-        self._ctx_set = False
-
-    def __len__(self):
-        return len(self._pages)
-
-    def __getitem__(self, key):
-        return self._pages[key]
-
-    def __iter__(self):
-        yield from iter(self._pages)
-        if self._innerIt:
-            yield from self._innerIt
-
-    def _onIteration(self):
-        if not self._ctx_set:
-            eis = self._page.app.env.exec_info_stack
-            eis.current_page_info.render_ctx.addUsedSource(self._source.name)
-            self._ctx_set = True
-
-    def _debugRenderDoc(self):
-        return 'Provides a list of %d items' % len(self)
-
-
-class BlogDataProvider(DataProvider, collections.abc.Mapping):
-    PROVIDER_NAME = 'blog'
-
-    debug_render_doc = """Provides a list of blog posts and yearly/monthly
-                          archives."""
-    debug_render_dynamic = (['_debugRenderTaxonomies'] +
-            DataProvider.debug_render_dynamic)
-
-    def __init__(self, source, page, override):
-        super(BlogDataProvider, self).__init__(source, page, override)
-        self._yearly = None
-        self._monthly = None
-        self._taxonomies = {}
-        self._ctx_set = False
-
-    @property
-    def posts(self):
-        return self._posts()
-
-    @property
-    def years(self):
-        return self._buildYearlyArchive()
-
-    @property
-    def months(self):
-        return self._buildMonthlyArchive()
-
-    def __getitem__(self, name):
-        if name == 'posts':
-            return self._posts()
-        elif name == 'years':
-            return self._buildYearlyArchive()
-        elif name == 'months':
-            return self._buildMonthlyArchive()
-
-        if self._source.app.config.get('site/taxonomies/' + name) is not None:
-            return self._buildTaxonomy(name)
-
-        raise KeyError("No such item: %s" % name)
-
-    def __iter__(self):
-        keys = ['posts', 'years', 'months']
-        keys += list(self._source.app.config.get('site/taxonomies').keys())
-        return iter(keys)
-
-    def __len__(self):
-        return 3 + len(self._source.app.config.get('site/taxonomies'))
-
-    def _debugRenderTaxonomies(self):
-        return list(self._source.app.config.get('site/taxonomies').keys())
-
-    def _posts(self):
-        it = PageIterator(self._source, current_page=self._page)
-        it._iter_event += self._onIteration
-        return it
-
-    def _buildYearlyArchive(self):
-        if self._yearly is not None:
-            return self._yearly
-
-        self._yearly = []
-        yearly_index = {}
-        for post in self._source.getPages():
-            year = post.datetime.strftime('%Y')
-
-            posts_this_year = yearly_index.get(year)
-            if posts_this_year is None:
-                timestamp = time.mktime(
-                        (post.datetime.year, 1, 1, 0, 0, 0, 0, 0, -1))
-                posts_this_year = BlogArchiveEntry(self._page, year, timestamp)
-                self._yearly.append(posts_this_year)
-                yearly_index[year] = posts_this_year
-
-            posts_this_year._data_source.append(post)
-        self._yearly = sorted(self._yearly,
-                key=lambda e: e.timestamp,
-                reverse=True)
-        self._onIteration()
-        return self._yearly
-
-    def _buildMonthlyArchive(self):
-        if self._monthly is not None:
-            return self._monthly
-
-        self._monthly = []
-        for post in self._source.getPages():
-            month = post.datetime.strftime('%B %Y')
-
-            posts_this_month = next(
-                    filter(lambda m: m.name == month, self._monthly),
-                    None)
-            if posts_this_month is None:
-                timestamp = time.mktime(
-                        (post.datetime.year, post.datetime.month, 1,
-                            0, 0, 0, 0, 0, -1))
-                posts_this_month = BlogArchiveEntry(self._page, month, timestamp)
-                self._monthly.append(posts_this_month)
-
-            posts_this_month._data_source.append(post)
-        self._monthly = sorted(self._monthly,
-                key=lambda e: e.timestamp,
-                reverse=True)
-        self._onIteration()
-        return self._monthly
-
-    def _buildTaxonomy(self, tax_name):
-        if tax_name in self._taxonomies:
-            return self._taxonomies[tax_name]
-
-        tax_cfg = self._page.app.config.get('site/taxonomies/' + tax_name)
-        tax = Taxonomy(tax_name, tax_cfg)
-
-        posts_by_tax_value = {}
-        for post in self._source.getPages():
-            tax_values = post.config.get(tax.setting_name)
-            if tax_values is None:
-                continue
-            if not isinstance(tax_values, list):
-                tax_values = [tax_values]
-            for val in tax_values:
-                posts = posts_by_tax_value.setdefault(val, [])
-                posts.append(post)
-
-        entries = []
-        for value, ds in posts_by_tax_value.items():
-            source = ArraySource(self._page.app, ds)
-            entries.append(BlogTaxonomyEntry(self._page, source, value))
-        self._taxonomies[tax_name] = sorted(entries, key=lambda k: k.name)
-
-        self._onIteration()
-        return self._taxonomies[tax_name]
-
-    def _onIteration(self):
-        if not self._ctx_set:
-            eis = self._page.app.env.exec_info_stack
-            if eis.current_page_info:
-                eis.current_page_info.render_ctx.addUsedSource(self._source)
-            self._ctx_set = True
-
-
-class BlogArchiveEntry(object):
-    debug_render = ['name', 'timestamp', 'posts']
-    debug_render_invoke = ['name', 'timestamp', 'posts']
-
-    def __init__(self, page, name, timestamp):
-        self.name = name
-        self.timestamp = timestamp
-        self._page = page
-        self._data_source = []
-        self._iterator = None
-
-    def __str__(self):
-        return self.name
-
-    def __int__(self):
-        return int(self.name)
-
-    @property
-    def posts(self):
-        self._load()
-        self._iterator.reset()
-        return self._iterator
-
-    def _load(self):
-        if self._iterator is not None:
-            return
-        source = ArraySource(self._page.app, self._data_source)
-        self._iterator = PageIterator(source, current_page=self._page)
-
-
-class BlogTaxonomyEntry(object):
-    debug_render = ['name', 'post_count', 'posts']
-    debug_render_invoke = ['name', 'post_count', 'posts']
-
-    def __init__(self, page, source, property_value):
-        self._page = page
-        self._source = source
-        self._property_value = property_value
-        self._iterator = None
-
-    def __str__(self):
-        return self._property_value
-
-    @property
-    def name(self):
-        return self._property_value
-
-    @property
-    def posts(self):
-        self._load()
-        self._iterator.reset()
-        return self._iterator
-
-    @property
-    def post_count(self):
-        return self._source.page_count
-
-    def _load(self):
-        if self._iterator is not None:
-            return
-
-        self._iterator = PageIterator(self._source, current_page=self._page)
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/dataproviders/base.py	Sun May 21 00:06:59 2017 -0700
@@ -0,0 +1,31 @@
+from piecrust.configuration import ConfigurationError
+
+
+class UnsupportedWrappedDataProviderError(Exception):
+    pass
+
+
+class DataProvider:
+    """ The base class for a data provider.
+    """
+    PROVIDER_NAME = None
+
+    debug_render_dynamic = []
+    debug_render_invoke_dynamic = []
+
+    def __init__(self, source):
+        self._source = source
+
+    def _wrapDataProvider(self, provider):
+        raise UnsupportedWrappedDataProviderError()
+
+
+def get_data_provider_class(app, provider_type):
+    if not provider_type:
+        raise Exception("No data provider type specified.")
+    for prov in app.plugin_loader.getDataProviders():
+        if prov.PROVIDER_NAME == provider_type:
+            return prov
+    raise ConfigurationError(
+        "Unknown data provider type: %s" % provider_type)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/dataproviders/blog.py	Sun May 21 00:06:59 2017 -0700
@@ -0,0 +1,209 @@
+import time
+import collections.abc
+from piecrust.dataproviders.base import DataProvider
+from piecrust.generation.taxonomy import Taxonomy
+
+
+class BlogDataProvider(DataProvider, collections.abc.Mapping):
+    PROVIDER_NAME = 'blog'
+
+    debug_render_doc = """Provides a list of blog posts and yearly/monthly
+                          archives."""
+    debug_render_dynamic = (['_debugRenderTaxonomies'] +
+                            DataProvider.debug_render_dynamic)
+
+    def __init__(self, source, page, override):
+        super(BlogDataProvider, self).__init__(source, page, override)
+        self._yearly = None
+        self._monthly = None
+        self._taxonomies = {}
+        self._ctx_set = False
+
+    @property
+    def posts(self):
+        return self._posts()
+
+    @property
+    def years(self):
+        return self._buildYearlyArchive()
+
+    @property
+    def months(self):
+        return self._buildMonthlyArchive()
+
+    def __getitem__(self, name):
+        if name == 'posts':
+            return self._posts()
+        elif name == 'years':
+            return self._buildYearlyArchive()
+        elif name == 'months':
+            return self._buildMonthlyArchive()
+
+        if self._source.app.config.get('site/taxonomies/' + name) is not None:
+            return self._buildTaxonomy(name)
+
+        raise KeyError("No such item: %s" % name)
+
+    def __iter__(self):
+        keys = ['posts', 'years', 'months']
+        keys += list(self._source.app.config.get('site/taxonomies').keys())
+        return iter(keys)
+
+    def __len__(self):
+        return 3 + len(self._source.app.config.get('site/taxonomies'))
+
+    def _debugRenderTaxonomies(self):
+        return list(self._source.app.config.get('site/taxonomies').keys())
+
+    def _posts(self):
+        it = PageIterator(self._source, current_page=self._page)
+        it._iter_event += self._onIteration
+        return it
+
+    def _buildYearlyArchive(self):
+        if self._yearly is not None:
+            return self._yearly
+
+        self._yearly = []
+        yearly_index = {}
+        for post in self._source.getPages():
+            year = post.datetime.strftime('%Y')
+
+            posts_this_year = yearly_index.get(year)
+            if posts_this_year is None:
+                timestamp = time.mktime(
+                        (post.datetime.year, 1, 1, 0, 0, 0, 0, 0, -1))
+                posts_this_year = BlogArchiveEntry(self._page, year, timestamp)
+                self._yearly.append(posts_this_year)
+                yearly_index[year] = posts_this_year
+
+            posts_this_year._data_source.append(post)
+        self._yearly = sorted(self._yearly,
+                key=lambda e: e.timestamp,
+                reverse=True)
+        self._onIteration()
+        return self._yearly
+
+    def _buildMonthlyArchive(self):
+        if self._monthly is not None:
+            return self._monthly
+
+        self._monthly = []
+        for post in self._source.getPages():
+            month = post.datetime.strftime('%B %Y')
+
+            posts_this_month = next(
+                    filter(lambda m: m.name == month, self._monthly),
+                    None)
+            if posts_this_month is None:
+                timestamp = time.mktime(
+                        (post.datetime.year, post.datetime.month, 1,
+                            0, 0, 0, 0, 0, -1))
+                posts_this_month = BlogArchiveEntry(self._page, month, timestamp)
+                self._monthly.append(posts_this_month)
+
+            posts_this_month._data_source.append(post)
+        self._monthly = sorted(self._monthly,
+                key=lambda e: e.timestamp,
+                reverse=True)
+        self._onIteration()
+        return self._monthly
+
+    def _buildTaxonomy(self, tax_name):
+        if tax_name in self._taxonomies:
+            return self._taxonomies[tax_name]
+
+        tax_cfg = self._page.app.config.get('site/taxonomies/' + tax_name)
+        tax = Taxonomy(tax_name, tax_cfg)
+
+        posts_by_tax_value = {}
+        for post in self._source.getPages():
+            tax_values = post.config.get(tax.setting_name)
+            if tax_values is None:
+                continue
+            if not isinstance(tax_values, list):
+                tax_values = [tax_values]
+            for val in tax_values:
+                posts = posts_by_tax_value.setdefault(val, [])
+                posts.append(post)
+
+        entries = []
+        for value, ds in posts_by_tax_value.items():
+            source = ArraySource(self._page.app, ds)
+            entries.append(BlogTaxonomyEntry(self._page, source, value))
+        self._taxonomies[tax_name] = sorted(entries, key=lambda k: k.name)
+
+        self._onIteration()
+        return self._taxonomies[tax_name]
+
+    def _onIteration(self):
+        if not self._ctx_set:
+            eis = self._page.app.env.exec_info_stack
+            if eis.current_page_info:
+                eis.current_page_info.render_ctx.addUsedSource(self._source)
+            self._ctx_set = True
+
+
+class BlogArchiveEntry(object):
+    debug_render = ['name', 'timestamp', 'posts']
+    debug_render_invoke = ['name', 'timestamp', 'posts']
+
+    def __init__(self, page, name, timestamp):
+        self.name = name
+        self.timestamp = timestamp
+        self._page = page
+        self._data_source = []
+        self._iterator = None
+
+    def __str__(self):
+        return self.name
+
+    def __int__(self):
+        return int(self.name)
+
+    @property
+    def posts(self):
+        self._load()
+        self._iterator.reset()
+        return self._iterator
+
+    def _load(self):
+        if self._iterator is not None:
+            return
+        source = ArraySource(self._page.app, self._data_source)
+        self._iterator = PageIterator(source, current_page=self._page)
+
+
+class BlogTaxonomyEntry(object):
+    debug_render = ['name', 'post_count', 'posts']
+    debug_render_invoke = ['name', 'post_count', 'posts']
+
+    def __init__(self, page, source, property_value):
+        self._page = page
+        self._source = source
+        self._property_value = property_value
+        self._iterator = None
+
+    def __str__(self):
+        return self._property_value
+
+    @property
+    def name(self):
+        return self._property_value
+
+    @property
+    def posts(self):
+        self._load()
+        self._iterator.reset()
+        return self._iterator
+
+    @property
+    def post_count(self):
+        return self._source.page_count
+
+    def _load(self):
+        if self._iterator is not None:
+            return
+
+        self._iterator = PageIterator(self._source, current_page=self._page)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/dataproviders/page_iterator.py	Sun May 21 00:06:59 2017 -0700
@@ -0,0 +1,345 @@
+import logging
+from piecrust.data.filters import PaginationFilter
+from piecrust.data.paginationdata import PaginationData
+from piecrust.events import Event
+from piecrust.dataproviders.base import DataProvider
+from piecrust.sources.base import AbortedSourceUseError
+
+
+logger = logging.getLogger(__name__)
+
+
+class PageIteratorDataProvider(DataProvider):
+    """ A data provider that reads a content source as a list of pages.
+
+        This class supports wrapping another `PageIteratorDataProvider`
+        instance because several sources may want to be merged under the
+        same data endpoint (e.g. `site.pages` which lists both the user
+        pages and the theme pages).
+    """
+    PROVIDER_NAME = 'page_iterator'
+
+    debug_render_doc_dynamic = ['_debugRenderDoc']
+    debug_render_not_empty = True
+
+    def __init__(self, source, current_page=None):
+        super().__init__(source)
+        self._it = PageIterator(source, current_page=current_page)
+        self._it._iter_event += self._onIteration
+        self._innerProvider = None
+        self._iterated = False
+
+    def __len__(self):
+        res = len(self._it)
+        if self._innerProvider is not None:
+            res += len(self._innerProvider)
+        return res
+
+    def __iter__(self):
+        yield from self._it
+        if self._innerProvider is not None:
+            yield from self._innerProvider
+
+    def _onIteration(self):
+        if not self._iterated:
+            rcs = self._source.app.env.render_ctx_stack
+            rcs.current_ctx.addUsedSource(self._source.name)
+            self._iterated = True
+
+    def _debugRenderDoc(self):
+        return 'Provides a list of %d items' % len(self)
+
+
+class PageIterator:
+    def __init__(self, source, *,
+                 current_page=None, locked=False):
+        self._source = source
+        self._cache = None
+        self._pagination_slicer = None
+        self._has_sorter = False
+        self._next_page = None
+        self._prev_page = None
+        self._locked = locked
+        self._iter_event = Event()
+        self._current_page = current_page
+        self._it = PageContentSourceIterator(self._source)
+
+    @property
+    def total_count(self):
+        self._load()
+        if self._pagination_slicer is not None:
+            return self._pagination_slicer.inner_count
+        return len(self._cache)
+
+    @property
+    def next_page(self):
+        self._load()
+        return self._next_page
+
+    @property
+    def prev_page(self):
+        self._load()
+        return self._prev_page
+
+    def __len__(self):
+        self._load()
+        return len(self._cache)
+
+    def __getitem__(self, key):
+        self._load()
+        return self._cache[key]
+
+    def __iter__(self):
+        self._load()
+        return iter(self._cache)
+
+    def __getattr__(self, name):
+        if name[:3] == 'is_' or name[:3] == 'in_':
+            def is_filter(value):
+                conf = {'is_%s' % name[3:]: value}
+                return self._simpleNonSortedWrap(SettingFilterIterator, conf)
+            return is_filter
+
+        if name[:4] == 'has_':
+            def has_filter(value):
+                conf = {name: value}
+                return self._simpleNonSortedWrap(SettingFilterIterator, conf)
+            return has_filter
+
+        if name[:5] == 'with_':
+            def has_filter(value):
+                conf = {'has_%s' % name[5:]: value}
+                return self._simpleNonSortedWrap(SettingFilterIterator, conf)
+            return has_filter
+
+        return self.__getattribute__(name)
+
+    def skip(self, count):
+        return self._simpleWrap(SliceIterator, count)
+
+    def limit(self, count):
+        return self._simpleWrap(SliceIterator, 0, count)
+
+    def slice(self, skip, limit):
+        return self._simpleWrap(SliceIterator, skip, limit)
+
+    def filter(self, filter_name):
+        if self._current_page is None:
+            raise Exception("Can't use `filter()` because no parent page was "
+                            "set for this page iterator.")
+        filter_conf = self._current_page.config.get(filter_name)
+        if filter_conf is None:
+            raise Exception("Couldn't find filter '%s' in the configuration "
+                            "header for page: %s" %
+                            (filter_name, self._current_page.path))
+        return self._simpleNonSortedWrap(SettingFilterIterator, filter_conf)
+
+    def sort(self, setting_name, reverse=False):
+        if not setting_name:
+            raise Exception("You need to specify a configuration setting "
+                            "to sort by.")
+        self._ensureUnlocked()
+        self._ensureUnloaded()
+        self._pages = SettingSortIterator(self._pages, setting_name, reverse)
+        self._has_sorter = True
+        return self
+
+    def reset(self):
+        self._ensureUnlocked()
+        self._unload()
+        return self
+
+    @property
+    def _is_loaded(self):
+        return self._cache is not None
+
+    @property
+    def _has_more(self):
+        if self._cache is None:
+            return False
+        if self._pagination_slicer:
+            return self._pagination_slicer.has_more
+        return False
+
+    def _simpleWrap(self, it_class, *args, **kwargs):
+        self._ensureUnlocked()
+        self._ensureUnloaded()
+        self._ensureSorter()
+        self._it = it_class(self._it, *args, **kwargs)
+        if self._pagination_slicer is None and it_class is SliceIterator:
+            self._pagination_slicer = self._it
+            self._pagination_slicer.current_page = self._current_page
+        return self
+
+    def _simpleNonSortedWrap(self, it_class, *args, **kwargs):
+        self._ensureUnlocked()
+        self._ensureUnloaded()
+        self._it = it_class(self._it, *args, **kwargs)
+        return self
+
+    def _ensureUnlocked(self):
+        if self._locked:
+            raise Exception(
+                "This page iterator has been locked and can't be modified.")
+
+    def _ensureUnloaded(self):
+        if self._cache:
+            raise Exception(
+                "This page iterator has already been iterated upon and "
+                "can't be modified anymore.")
+
+    def _ensureSorter(self):
+        if self._has_sorter:
+            return
+        self._it = DateSortIterator(self._it, reverse=True)
+        self._has_sorter = True
+
+    def _unload(self):
+        self._it = PageContentSourceIterator(self._source)
+        self._cache = None
+        self._paginationSlicer = None
+        self._has_sorter = False
+        self._next_page = None
+        self._prev_page = None
+
+    def _load(self):
+        if self._cache is not None:
+            return
+
+        if self._source.app.env.abort_source_use:
+            if self._current_page is not None:
+                logger.debug("Aborting iteration of '%s' from: %s." %
+                             (self.source.name,
+                              self._current_page.content_spec))
+            else:
+                logger.debug("Aborting iteration of '%s'." %
+                             self._source.name)
+            raise AbortedSourceUseError()
+
+        self._ensureSorter()
+
+        tail_it = PaginationDataBuilderIterator(self._it, self._source.route)
+        self._cache = list(tail_it)
+
+        if (self._current_page is not None and
+                self._pagination_slicer is not None):
+            pn = [self._pagination_slicer.prev_page,
+                  self._pagination_slicer.next_page]
+            pn_it = PaginationDataBuilderIterator(iter(pn),
+                                                  self._source.route)
+            self._prev_page, self._next_page = (list(pn_it))
+
+        self._iter_event.fire()
+
+    def _debugRenderDoc(self):
+        return "Contains %d items" % len(self)
+
+
+class SettingFilterIterator:
+    def __init__(self, it, fil_conf):
+        self.it = it
+        self.fil_conf = fil_conf
+        self._fil = None
+
+    def __iter__(self):
+        if self._fil is None:
+            self._fil = PaginationFilter()
+            self._fil.addClausesFromConfig(self.fil_conf)
+
+        for i in self.it:
+            if self._fil.pageMatches(i):
+                yield i
+
+
+class SliceIterator:
+    def __init__(self, it, offset=0, limit=-1):
+        self.it = it
+        self.offset = offset
+        self.limit = limit
+        self.current_page = None
+        self.has_more = False
+        self.inner_count = -1
+        self.next_page = None
+        self.prev_page = None
+        self._cache = None
+
+    def __iter__(self):
+        if self._cache is None:
+            inner_list = list(self.it)
+            self.inner_count = len(inner_list)
+
+            if self.limit > 0:
+                self.has_more = self.inner_count > (self.offset + self.limit)
+                self._cache = inner_list[self.offset:self.offset + self.limit]
+            else:
+                self.has_more = False
+                self._cache = inner_list[self.offset:]
+
+            if self.current_page:
+                try:
+                    idx = inner_list.index(self.current_page)
+                except ValueError:
+                    idx = -1
+                if idx >= 0:
+                    if idx < self.inner_count - 1:
+                        self.next_page = inner_list[idx + 1]
+                    if idx > 0:
+                        self.prev_page = inner_list[idx - 1]
+
+        return iter(self._cache)
+
+
+class SettingSortIterator:
+    def __init__(self, it, name, reverse=False):
+        self.it = it
+        self.name = name
+        self.reverse = reverse
+
+    def __iter__(self):
+        return iter(sorted(self.it, key=self._key_getter,
+                           reverse=self.reverse))
+
+    def _key_getter(self, item):
+        key = item.config.get(item)
+        if key is None:
+            return 0
+        return key
+
+
+class DateSortIterator:
+    def __init__(self, it, reverse=True):
+        self.it = it
+        self.reverse = reverse
+
+    def __iter__(self):
+        return iter(sorted(self.it,
+                           key=lambda x: x.datetime, reverse=self.reverse))
+
+
+class PageContentSourceIterator:
+    def __init__(self, source):
+        self.source = source
+
+        # This is to permit recursive traversal of the
+        # iterator chain. It acts as the end.
+        self.it = None
+
+    def __iter__(self):
+        source = self.source
+        app = source.app
+        for item in source.getAllContents():
+            yield app.getPage(source, item)
+
+
+class PaginationDataBuilderIterator:
+    def __init__(self, it, route):
+        self.it = it
+        self.route = route
+
+    def __iter__(self):
+        for page in self.it:
+            if page is not None:
+                yield PaginationData(page)
+            else:
+                yield None
+
--- a/piecrust/events.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/events.py	Sun May 21 00:06:59 2017 -0700
@@ -1,5 +1,7 @@
 
 class Event(object):
+    """ A simple implementation of a subscribable event.
+    """
     def __init__(self):
         self._handlers = []
 
--- a/piecrust/page.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/page.py	Sun May 21 00:06:59 2017 -0700
@@ -1,7 +1,5 @@
 import re
-import sys
 import json
-import os.path
 import hashlib
 import logging
 import datetime
@@ -10,7 +8,8 @@
 from werkzeug.utils import cached_property
 from piecrust.configuration import (
     Configuration, ConfigurationError,
-    parse_config_header)
+    parse_config_header,
+    MERGE_PREPEND_LISTS)
 
 
 logger = logging.getLogger(__name__)
@@ -40,41 +39,25 @@
     pass
 
 
-class QualifiedPage(object):
-    def __init__(self, page, route, route_params, *, page_num=1):
-        self.page = page
-        self.page_num = page_num
-        self.route = route
-        self.route_params = route_params
-
-    @property
-    def app(self):
-        return self.page.app
-
-    @property
-    def source(self):
-        return self.page.source
-
-    @cached_property
-    def uri(self):
-        return self.route.getUri(self.route_params, self.page_num)
-
-    def getSubPage(self, page_num):
-        return QualifiedPage(self.page, self.route, self.route_params,
-                             page_num=self.page_num + 1)
-
-
-class Page(object):
-    def __init__(self, content_item):
+class Page:
+    """ Represents a page that is text content with an optional YAML
+        front-matter, and that goes through the page pipeline.
+    """
+    def __init__(self, source, content_item):
+        self.source = source
         self.content_item = content_item
         self._config = None
         self._segments = None
         self._flags = FLAG_NONE
         self._datetime = None
 
-    @property
-    def source(self):
-        return self.content_item.source
+    @cached_property
+    def app(self):
+        return self.source.app
+
+    @cached_property
+    def route(self):
+        return self.source.route
 
     @property
     def source_metadata(self):
@@ -84,13 +67,9 @@
     def content_spec(self):
         return self.content_item.spec
 
-    @property
-    def app(self):
-        return self.content_item.source.app
-
     @cached_property
     def content_mtime(self):
-        return self.content_item.getmtime()
+        return self.source.getItemMtime(self.content_item)
 
     @property
     def flags(self):
@@ -110,67 +89,82 @@
     def datetime(self):
         if self._datetime is None:
             try:
-                if 'datetime' in self.source_metadata:
-                    # Get the date/time from the source.
-                    self._datetime = self.source_metadata['datetime']
-                elif 'date' in self.source_metadata:
-                    # Get the date from the source. Potentially get the
-                    # time from the page config.
-                    page_date = self.source_metadata['date']
-                    page_time = _parse_config_time(self.config.get('time'))
-                    if page_time is not None:
-                        self._datetime = datetime.datetime(
-                            page_date.year,
-                            page_date.month,
-                            page_date.day) + page_time
-                    else:
-                        self._datetime = datetime.datetime(
-                            page_date.year, page_date.month, page_date.day)
-                elif 'date' in self.config:
-                    # Get the date from the page config, and maybe the
-                    # time too.
-                    page_date = _parse_config_date(self.config.get('date'))
-                    self._datetime = datetime.datetime(
-                        page_date.year,
-                        page_date.month,
-                        page_date.day)
-                    page_time = _parse_config_time(self.config.get('time'))
-                    if page_time is not None:
-                        self._datetime += page_time
-                else:
-                    # No idea what the date/time for this page is.
-                    self._datetime = datetime.datetime.fromtimestamp(0)
+                self._datetime = self._computeDateTime()
             except Exception as ex:
                 logger.exception(ex)
                 raise Exception(
                     "Error computing time for page: %s" %
-                    self.path) from ex
+                    self.content_spec) from ex
+
+            if self._datetime is None:
+                self._datetime = datetime.datetime.fromtimestamp(
+                    self.content_mtime)
+
         return self._datetime
 
     @datetime.setter
     def datetime(self, value):
         self._datetime = value
 
+    def getUri(self, sub_num=1):
+        route_params = self.source_metadata['route_params']
+        return self.route.getUri(route_params, sub_num=sub_num)
+
     def getSegment(self, name='content'):
         return self.segments[name]
 
+    def _computeDateTime(self):
+        if 'datetime' in self.source_metadata:
+            # Get the date/time from the source.
+            self._datetime = self.source_metadata['datetime']
+        elif 'date' in self.source_metadata:
+            # Get the date from the source. Potentially get the
+            # time from the page config.
+            page_date = self.source_metadata['date']
+            page_time = _parse_config_time(self.config.get('time'))
+            if page_time is not None:
+                self._datetime = datetime.datetime(
+                    page_date.year,
+                    page_date.month,
+                    page_date.day) + page_time
+            else:
+                self._datetime = datetime.datetime(
+                    page_date.year, page_date.month, page_date.day)
+        elif 'date' in self.config:
+            # Get the date from the page config, and maybe the
+            # time too.
+            page_date = _parse_config_date(self.config.get('date'))
+            self._datetime = datetime.datetime(
+                page_date.year,
+                page_date.month,
+                page_date.day)
+            page_time = _parse_config_time(self.config.get('time'))
+            if page_time is not None:
+                self._datetime += page_time
+            else:
+                # No idea what the date/time for this page is.
+                self._datetime = datetime.datetime.fromtimestamp(0)
+
     def _load(self):
         if self._config is not None:
             return
 
         config, content, was_cache_valid = load_page(
-            self.app, self.path, self.path_mtime)
+            self.source, self.content_item)
 
-        if 'config' in self.source_metadata:
-            config.merge(self.source_metadata['config'])
+        extra_config = self.source_metadata.get('config')
+        if extra_config is not None:
+            # Merge the source metadata configuration settings with the
+            # configuration settings from the page's contents. We only
+            # prepend to lists, i.e. we don't overwrite values because we
+            # want to keep what the user wrote in the file.
+            config.merge(extra_config, mode=MERGE_PREPEND_LISTS)
 
         self._config = config
         self._segments = content
         if was_cache_valid:
             self._flags |= FLAG_RAW_CACHE_VALID
 
-        self.source.finalizeConfig(self)
-
 
 def _parse_config_date(page_date):
     if page_date is None:
@@ -216,10 +210,8 @@
 
 
 class PageLoadingError(Exception):
-    def __init__(self, path, inner=None):
-        super(PageLoadingError, self).__init__(
-            "Error loading page: %s" % path,
-            inner)
+    def __init__(self, spec):
+        super().__init__("Error loading page: %s" % spec)
 
 
 class ContentSegment(object):
@@ -267,23 +259,22 @@
     return data
 
 
-def load_page(app, path, path_mtime=None):
+def load_page(source, content_item):
     try:
-        with app.env.timerScope('PageLoad'):
-            return _do_load_page(app, path, path_mtime)
+        with source.app.env.stats.timerScope('PageLoad'):
+            return _do_load_page(source, content_item)
     except Exception as e:
-        logger.exception(
-            "Error loading page: %s" %
-            os.path.relpath(path, app.root_dir))
-        _, __, traceback = sys.exc_info()
-        raise PageLoadingError(path, e).with_traceback(traceback)
+        logger.exception("Error loading page: %s" % content_item.spec)
+        raise PageLoadingError(content_item.spec) from e
 
 
-def _do_load_page(app, path, path_mtime):
+def _do_load_page(source, content_item):
     # Check the cache first.
+    app = source.app
     cache = app.cache.getCache('pages')
-    cache_path = hashlib.md5(path.encode('utf8')).hexdigest() + '.json'
-    page_time = path_mtime or os.path.getmtime(path)
+    cache_token = "%s@%s" % (source.name, content_item.spec)
+    cache_path = hashlib.md5(cache_token.encode('utf8')).hexdigest() + '.json'
+    page_time = source.getItemMtime(content_item)
     if cache.isValid(cache_path, page_time):
         cache_data = json.loads(
             cache.read(cache_path),
@@ -295,16 +286,11 @@
         return config, content, True
 
     # Nope, load the page from the source file.
-    logger.debug("Loading page configuration from: %s" % path)
-    with open(path, 'r', encoding='utf-8') as fp:
+    logger.debug("Loading page configuration from: %s" % content_item.spec)
+    with source.openItem(content_item, 'r', encoding='utf-8') as fp:
         raw = fp.read()
     header, offset = parse_config_header(raw)
 
-    if 'format' not in header:
-        auto_formats = app.config.get('site/auto_formats')
-        name, ext = os.path.splitext(path)
-        header['format'] = auto_formats.get(ext, None)
-
     config = PageConfiguration(header)
     content = parse_segments(raw, offset)
     config.set('segments', list(content.keys()))
--- a/piecrust/pipelines/_procrecords.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/pipelines/_procrecords.py	Sun May 21 00:06:59 2017 -0700
@@ -10,7 +10,6 @@
 
     def __init__(self):
         super().__init__()
-        self.out_paths = []
         self.flags = self.FLAG_NONE
         self.proc_tree = None
 
@@ -32,4 +31,35 @@
     def was_collapsed_from_last_run(self):
         return self.flags & self.FLAG_COLLAPSED_FROM_LAST_RUN
 
+    def describe(self):
+        d = super().describe()
+        d['Flags'] = _get_flag_descriptions(self.flags)
+        d['Processing Tree'] = _format_proc_tree(self.proc_tree, 20 * ' ')
+        return d
 
+
+flag_descriptions = {
+    AssetPipelineRecordEntry.FLAG_PREPARED: 'prepared',
+    AssetPipelineRecordEntry.FLAG_PROCESSED: 'processed',
+    AssetPipelineRecordEntry.FLAG_BYPASSED_STRUCTURED_PROCESSING: 'external',
+    AssetPipelineRecordEntry.FLAG_COLLAPSED_FROM_LAST_RUN: 'from last run'}
+
+
+def _get_flag_descriptions(flags):
+    res = []
+    for k, v in flag_descriptions.items():
+        if flags & k:
+            res.append(v)
+    if res:
+        return ', '.join(res)
+    return 'none'
+
+
+def _format_proc_tree(tree, margin='', level=0):
+    name, children = tree
+    res = '%s%s+ %s\n' % (margin if level > 0 else '', level * '  ', name)
+    if children:
+        for c in children:
+            res += _format_proc_tree(c, margin, level + 1)
+    return res
+
--- a/piecrust/pipelines/asset.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/pipelines/asset.py	Sun May 21 00:06:59 2017 -0700
@@ -17,7 +17,7 @@
 
 class AssetPipeline(ContentPipeline):
     PIPELINE_NAME = 'asset'
-    RECORD_CLASS = AssetPipelineRecordEntry
+    RECORD_ENTRY_CLASS = AssetPipelineRecordEntry
 
     def __init__(self, source):
         if not isinstance(source, FSContentSourceBase):
@@ -68,22 +68,23 @@
         if re_matchany(rel_path, self.ignore_patterns):
             return
 
-        record = result.record
+        record_entry = result.record_entry
         stats = self.app.env.stats
 
         # Build the processing tree for this job.
         with stats.timerScope('BuildProcessingTree'):
             builder = ProcessingTreeBuilder(self._processors)
             tree_root = builder.build(rel_path)
-            record.flags |= AssetPipelineRecordEntry.FLAG_PREPARED
+            record_entry.flags |= AssetPipelineRecordEntry.FLAG_PREPARED
 
         # Prepare and run the tree.
         print_node(tree_root, recursive=True)
         leaves = tree_root.getLeaves()
-        record.rel_outputs = [l.path for l in leaves]
-        record.proc_tree = get_node_name_tree(tree_root)
+        record_entry.out_paths = [os.path.join(ctx.out_dir, l.path)
+                                  for l in leaves]
+        record_entry.proc_tree = get_node_name_tree(tree_root)
         if tree_root.getProcessor().is_bypassing_structured_processing:
-            record.flags |= (
+            record_entry.flags |= (
                 AssetPipelineRecordEntry.FLAG_BYPASSED_STRUCTURED_PROCESSING)
 
         if ctx.force:
@@ -93,29 +94,11 @@
             runner = ProcessingTreeRunner(
                 self._base_dir, self.tmp_dir, ctx.out_dir)
             if runner.processSubTree(tree_root):
-                record.flags |= (
+                record_entry.flags |= (
                     AssetPipelineRecordEntry.FLAG_PROCESSED)
 
-    def shutdown(self, ctx):
-        # Invoke post-processors.
-        proc_ctx = ProcessorContext(self, ctx)
-        for proc in self._processors:
-            proc.onPipelineEnd(proc_ctx)
-
-    def collapseRecords(self, record_history):
-        for prev, cur in record_history.diffs():
-            if prev and cur and not cur.was_processed:
-                # This asset wasn't processed, so the information from
-                # last time is still valid.
-                cur.flags = (
-                    prev.flags &
-                    (~AssetPipelineRecordEntry.FLAG_PROCESSED |
-                     AssetPipelineRecordEntry.FLAG_COLLAPSED_FROM_LAST_RUN))
-                cur.out_paths = list(prev.out_paths)
-                cur.errors = list(prev.errors)
-
-    def getDeletions(self, record_history):
-        for prev, cur in record_history.diffs():
+    def getDeletions(self, ctx):
+        for prev, cur in ctx.record_history.diffs:
             if prev and not cur:
                 for p in prev.out_paths:
                     yield (p, 'previous asset was removed')
@@ -124,6 +107,23 @@
                 for p in diff:
                     yield (p, 'asset changed outputs')
 
+    def collapseRecords(self, ctx):
+        for prev, cur in ctx.record_history.diffs:
+            if prev and cur and not cur.was_processed:
+                # This asset wasn't processed, so the information from
+                # last time is still valid.
+                cur.flags = (
+                    (prev.flags & ~AssetPipelineRecordEntry.FLAG_PROCESSED) |
+                    AssetPipelineRecordEntry.FLAG_COLLAPSED_FROM_LAST_RUN)
+                cur.out_paths = list(prev.out_paths)
+                cur.errors = list(prev.errors)
+
+    def shutdown(self, ctx):
+        # Invoke post-processors.
+        proc_ctx = ProcessorContext(self, ctx)
+        for proc in self._processors:
+            proc.onPipelineEnd(proc_ctx)
+
 
 split_processor_names_re = re.compile(r'[ ,]+')
 
--- a/piecrust/pipelines/base.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/pipelines/base.py	Sun May 21 00:06:59 2017 -0700
@@ -30,10 +30,17 @@
         """
         return self.worker_id < 0
 
+    @property
+    def current_record(self):
+        return self.record_history.current
+
 
 class PipelineResult:
-    def __init__(self, record):
-        self.record = record
+    """ Result of running a pipeline on a content item.
+    """
+    def __init__(self):
+        self.pipeline_name = None
+        self.record_entry = None
 
 
 class ContentPipeline:
@@ -41,7 +48,7 @@
     """
     PIPELINE_NAME = None
     PIPELINE_PASSES = 1
-    RECORD_CLASS = None
+    RECORD_ENTRY_CLASS = None
 
     def __init__(self, source):
         self.source = source
@@ -63,11 +70,11 @@
     def run(self, content_item, ctx, result):
         raise NotImplementedError()
 
-    def shutdown(self, ctx):
+    def getDeletions(self, ctx):
         pass
 
-    def collapseRecords(self, record_history):
+    def collapseRecords(self, ctx):
         pass
 
-    def getDeletions(self, record_history):
+    def shutdown(self, ctx):
         pass
--- a/piecrust/pipelines/records.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/pipelines/records.py	Sun May 21 00:06:59 2017 -0700
@@ -10,6 +10,8 @@
 
 
 class MultiRecord:
+    """ A container that includes multiple `Record` instances.
+    """
     RECORD_VERSION = 12
 
     def __init__(self):
@@ -28,7 +30,7 @@
                 return r
         if not auto_create:
             return None
-        record = Record()
+        record = Record(record_name)
         self.records.append(record)
         return record
 
@@ -48,23 +50,31 @@
 
 
 class Record:
-    def __init__(self):
-        self.name = None
+    """ A basic class that represents a 'record' of a bake operation on a
+        content source.
+    """
+    def __init__(self, name):
+        self.name = name
         self.entries = []
-        self.stats = {}
-        self.out_dir = None
+        self.deleted_out_paths = []
         self.success = True
 
 
 class RecordEntry:
+    """ An entry in a record, for a specific content item.
+    """
     def __init__(self):
         self.item_spec = None
+        self.out_paths = []
         self.errors = []
 
     @property
     def success(self):
         return len(self.errors) == 0
 
+    def describe(self):
+        return {}
+
 
 def _are_records_valid(multi_record):
     return (multi_record._app_version == APP_VERSION and
@@ -101,6 +111,9 @@
 
 
 class MultiRecordHistory:
+    """ Tracks the differences between an 'old' and a 'new' record
+        container.
+    """
     def __init__(self, previous, current):
         if previous is None or current is None:
             raise ValueError()
@@ -114,7 +127,13 @@
         for h in self.histories:
             if h.name == record_name:
                 return h
-        return None
+        rh = RecordHistory(
+            Record(record_name),
+            Record(record_name))
+        self.histories.append(rh)
+        self.previous.records.append(rh.previous)
+        self.current.records.append(rh.current)
+        return rh
 
     def _buildHistories(self, previous, current):
         pairs = {}
@@ -128,22 +147,30 @@
                     raise Exception("Got several records named: %s" % r.name)
                 pairs[r.name] = (p[0], r)
 
-        for p, c in pairs.values():
+        for name, pair in pairs.items():
+            p, c = pair
+            if p is None:
+                p = Record(name)
+                previous.records.append(p)
+            if c is None:
+                c = Record(name)
+                current.records.append(c)
             self.histories.append(RecordHistory(p, c))
 
 
 class RecordHistory:
     def __init__(self, previous, current):
-        self._diffs = {}
-        self._previous = previous
-        self._current = current
+        if previous is None or current is None:
+            raise ValueError()
 
-        if previous and current and previous.name != current.name:
+        if previous.name != current.name:
             raise Exception("The two records must have the same name! "
                             "Got '%s' and '%s'." %
                             (previous.name, current.name))
 
-        self._buildDiffs()
+        self._previous = previous
+        self._current = current
+        self._diffs = None
 
     @property
     def name(self):
@@ -159,9 +186,15 @@
 
     @property
     def diffs(self):
+        if self._diffs is None:
+            raise Exception("This record history hasn't been built yet.")
         return self._diffs.values()
 
-    def _buildDiffs(self):
+    def build(self):
+        if self._diffs is not None:
+            raise Exception("This record history has already been built.")
+
+        self._diffs = {}
         if self._previous is not None:
             for e in self._previous.entries:
                 key = _build_diff_key(e.item_spec)
--- a/piecrust/processing/compressors.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/processing/compressors.py	Sun May 21 00:06:59 2017 -0700
@@ -17,7 +17,7 @@
         self._conf = None
 
     def matches(self, path):
-        return path.endswith('.css')
+        return path.endswith('.css') and not path.endswith('.min.css')
 
     def getOutputFilenames(self, filename):
         self._ensureInitialized()
@@ -73,6 +73,9 @@
         super(UglifyJSProcessor, self).__init__({'js': 'js'})
         self._conf = None
 
+    def matches(self, path):
+        return path.endswith('.js') and not path.endswith('.min.js')
+
     def _doProcess(self, in_path, out_path):
         self._ensureInitialized()
 
--- a/piecrust/processing/sitemap.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/processing/sitemap.py	Sun May 21 00:06:59 2017 -0700
@@ -1,7 +1,9 @@
+import os
+import os.path
 import time
 import logging
 import yaml
-from piecrust.data.iterators import PageIterator
+from piecrust.dataproviders.page_iterator import PageIterator
 from piecrust.processing.base import SimpleFileProcessor
 
 
@@ -36,11 +38,19 @@
         with open(in_path, 'r') as fp:
             sitemap = yaml.load(fp)
 
-        with open(out_path, 'w') as fp:
-            fp.write(SITEMAP_HEADER)
-            self._writeManualLocs(sitemap, fp)
-            self._writeAutoLocs(sitemap, fp)
-            fp.write(SITEMAP_FOOTER)
+        try:
+            with open(out_path, 'w') as fp:
+                fp.write(SITEMAP_HEADER)
+                self._writeManualLocs(sitemap, fp)
+                self._writeAutoLocs(sitemap, fp)
+                fp.write(SITEMAP_FOOTER)
+        except:
+            # If an exception occurs, delete the output file otherwise
+            # the pipeline will think the output was correctly produced.
+            if os.path.isfile(out_path):
+                logger.debug("Error occured, removing output sitemap.")
+                os.unlink(out_path)
+            raise
 
         return True
 
--- a/piecrust/rendering.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/rendering.py	Sun May 21 00:06:59 2017 -0700
@@ -37,15 +37,16 @@
 
 
 class RenderedPage(object):
-    def __init__(self, qualified_page):
-        self.qualified_page = qualified_page
+    def __init__(self, page, sub_num):
+        self.page = page
+        self.sub_num = sub_num
         self.data = None
         self.content = None
         self.render_info = [None, None]
 
     @property
     def app(self):
-        return self.qualified_page.app
+        return self.page.app
 
     def copyRenderInfo(self):
         return copy.deepcopy(self.render_info)
@@ -77,8 +78,9 @@
 
 
 class RenderingContext(object):
-    def __init__(self, qualified_page, force_render=False):
-        self.qualified_page = qualified_page
+    def __init__(self, page, *, sub_num=1, force_render=False):
+        self.page = page
+        self.sub_num = sub_num
         self.force_render = force_render
         self.pagination_source = None
         self.pagination_filter = None
@@ -88,7 +90,7 @@
 
     @property
     def app(self):
-        return self.qualified_page.app
+        return self.page.app
 
     @property
     def current_pass_info(self):
@@ -138,13 +140,13 @@
 
     def hasPage(self, page):
         for ei in self._ctx_stack:
-            if ei.qualified_page.page == page:
+            if ei.page == page:
                 return True
         return False
 
     def pushCtx(self, render_ctx):
         for ctx in self._ctx_stack:
-            if ctx.qualified_page.page == render_ctx.qualified_page.page:
+            if ctx.page == render_ctx.page:
                 raise Exception("Loop detected during rendering!")
         self._ctx_stack.append(render_ctx)
 
@@ -161,7 +163,8 @@
     stack = env.render_ctx_stack
     stack.pushCtx(ctx)
 
-    qpage = ctx.qualified_page
+    page = ctx.page
+    page_uri = page.getUri(ctx.sub_num)
 
     try:
         # Build the data for both segment and layout rendering.
@@ -177,20 +180,20 @@
         with env.timerScope("PageRenderSegments"):
             if repo is not None and not ctx.force_render:
                 render_result = repo.get(
-                    qpage.uri,
+                    page_uri,
                     lambda: _do_render_page_segments(ctx, page_data),
-                    fs_cache_time=qpage.page.content_mtime,
+                    fs_cache_time=page.content_mtime,
                     save_to_fs=save_to_fs)
             else:
                 render_result = _do_render_page_segments(ctx, page_data)
                 if repo:
-                    repo.put(qpage.uri, render_result, save_to_fs)
+                    repo.put(page_uri, render_result, save_to_fs)
 
         # Render layout.
         ctx.setCurrentPass(PASS_RENDERING)
-        layout_name = qpage.page.config.get('layout')
+        layout_name = page.config.get('layout')
         if layout_name is None:
-            layout_name = qpage.page.source.config.get(
+            layout_name = page.source.config.get(
                 'default_layout', 'default')
         null_names = ['', 'none', 'nil']
         if layout_name not in null_names:
@@ -199,13 +202,13 @@
 
             with ctx.app.env.timerScope("PageRenderLayout"):
                 layout_result = _do_render_layout(
-                    layout_name, qpage, page_data)
+                    layout_name, page, page_data)
         else:
             layout_result = {
                 'content': render_result['segments']['content'],
                 'pass_info': None}
 
-        rp = RenderedPage(qpage)
+        rp = RenderedPage(page, ctx.sub_num)
         rp.data = page_data
         rp.content = layout_result['content']
         rp.render_info[PASS_FORMATTING] = _unpickle_object(
@@ -233,7 +236,8 @@
     stack = env.render_ctx_stack
     stack.pushCtx(ctx)
 
-    qpage = ctx.qualified_page
+    page = ctx.page
+    page_uri = page.getUri(ctx.sub_num)
 
     try:
         ctx.setCurrentPass(PASS_FORMATTING)
@@ -244,14 +248,14 @@
         with ctx.app.env.timerScope("PageRenderSegments"):
             if repo is not None and not ctx.force_render:
                 render_result = repo.get(
-                    qpage.uri,
+                    page_uri,
                     lambda: _do_render_page_segments_from_ctx(ctx),
-                    fs_cache_time=qpage.page.content_mtime,
+                    fs_cache_time=page.content_mtime,
                     save_to_fs=save_to_fs)
             else:
                 render_result = _do_render_page_segments_from_ctx(ctx)
                 if repo:
-                    repo.put(qpage.uri, render_result, save_to_fs)
+                    repo.put(page_uri, render_result, save_to_fs)
     finally:
         ctx.setCurrentPass(PASS_NONE)
         stack.popCtx()
@@ -264,7 +268,7 @@
 
 def _build_render_data(ctx):
     with ctx.app.env.timerScope("PageDataBuild"):
-        data_ctx = DataBuildingContext(ctx.qualified_page)
+        data_ctx = DataBuildingContext(ctx.page, ctx.sub_num)
         data_ctx.pagination_source = ctx.pagination_source
         data_ctx.pagination_filter = ctx.pagination_filter
         page_data = build_page_data(data_ctx)
@@ -279,7 +283,7 @@
 
 
 def _do_render_page_segments(ctx, page_data):
-    page = ctx.qualified_page.page
+    page = ctx.page
     app = page.app
 
     engine_name = page.config.get('template_engine')
--- a/piecrust/serving/middlewares.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/serving/middlewares.py	Sun May 21 00:06:59 2017 -0700
@@ -89,8 +89,8 @@
         if not found:
             return NotFound("No such page: %s" % page_path)
 
-        ctx = DataBuildingContext(req_page.qualified_page,
-                                  page_num=req_page.page_num)
+        ctx = DataBuildingContext(req_page.page,
+                                  sub_num=req_page.sub_num)
         data = build_page_data(ctx)
 
         var_path = request.args.getlist('var')
--- a/piecrust/serving/server.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/serving/server.py	Sun May 21 00:06:59 2017 -0700
@@ -160,14 +160,15 @@
 
         # If we haven't found any good match, report all the places we didn't
         # find it at.
-        qp = req_page.qualified_page
-        if qp is None:
+        if req_page.page is None:
             msg = "Can't find path for '%s':" % request.path
             raise MultipleNotFound(msg, req_page.not_found_errors)
 
         # We have a page, let's try to render it.
-        render_ctx = RenderingContext(qp, force_render=True)
-        qp.page.source.prepareRenderContext(render_ctx)
+        render_ctx = RenderingContext(req_page,
+                                      sub_num=req_page.sub_num,
+                                      force_render=True)
+        req_page.page.source.prepareRenderContext(render_ctx)
 
         # Render the page.
         rendered_page = render_page(render_ctx)
--- a/piecrust/serving/util.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/serving/util.py	Sun May 21 00:06:59 2017 -0700
@@ -5,7 +5,7 @@
 import datetime
 from werkzeug.wrappers import Response
 from werkzeug.wsgi import wrap_file
-from piecrust.page import QualifiedPage, PageNotFoundError
+from piecrust.page import PageNotFoundError
 from piecrust.routing import RouteNotFoundError
 from piecrust.uriutil import split_sub_uri
 
@@ -22,7 +22,8 @@
 
 class RequestedPage(object):
     def __init__(self):
-        self.qualified_page = None
+        self.page = None
+        self.sub_num = 1
         self.req_path = None
         self.not_found_errors = []
 
@@ -62,10 +63,11 @@
         if route_sub_num > 1:
             cur_req_path = req_path_no_num
 
-        qp = _get_requested_page_for_route(app, route, route_params,
-                                           route_sub_num)
-        if qp is not None:
-            req_page.qualified_page = qp
+        page = _get_requested_page_for_route(app, route, route_params,
+                                             route_sub_num)
+        if page is not None:
+            req_page.page = page
+            req_page.sub_num = route_sub_num
             req_page.req_path = cur_req_path
             break
 
@@ -76,16 +78,12 @@
     return req_page
 
 
-def _get_requested_page_for_route(app, route, route_params, sub_num):
+def _get_requested_page_for_route(app, route, route_params):
     source = app.getSource(route.source_name)
     item = source.findContent(route_params)
-    if item is None:
-        return None
-
-    # Build the page.
-    page = app.getPage(item)
-    qp = QualifiedPage(page, route, route_params, sub_num)
-    return qp
+    if item is not None:
+        return app.getPage(item)
+    return None
 
 
 def load_mimetype_map():
--- a/piecrust/sources/autoconfig.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/autoconfig.py	Sun May 21 00:06:59 2017 -0700
@@ -15,7 +15,9 @@
         settings to their generated pages based on those pages' paths.
     """
     def __init__(self, app, name, config):
-        DefaultContentSource.__init__(app, name, config)
+        super().__init__(app, name, config)
+
+        config.setdefault('data_type', 'page_iterator')
 
         self.capture_mode = config.get('capture_mode', 'path')
         if self.capture_mode not in ['path', 'dirname', 'filename']:
--- a/piecrust/sources/base.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/base.py	Sun May 21 00:06:59 2017 -0700
@@ -1,5 +1,6 @@
 import logging
 import collections
+from werkzeug.utils import cached_property
 
 
 # Source realms, to differentiate sources in the site itself ('User')
@@ -40,6 +41,12 @@
 
 class ContentItem:
     """ Describes a piece of content.
+
+        Some known metadata that PieCrust will use include:
+        - `route_params`: A dictionary of route parameters to generate
+              the URL to the content.
+        - `config`: A dictionary of configuration settings to merge
+              into the settings found in the content itself.
     """
     def __init__(self, spec, metadata):
         self.spec = spec
@@ -80,7 +87,11 @@
             return self.app.theme_dir
         return self.app.root_dir
 
-    def openItem(self, item, mode='r'):
+    @cached_property
+    def route(self):
+        return self.app.getSourceRoute(self.name)
+
+    def openItem(self, item, mode='r', **kwargs):
         raise NotImplementedError()
 
     def getItemMtime(self, item):
--- a/piecrust/sources/blogarchives.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/blogarchives.py	Sun May 21 00:06:59 2017 -0700
@@ -2,7 +2,7 @@
 import datetime
 from piecrust.chefutil import format_timed_scope
 from piecrust.data.filters import PaginationFilter, IFilterClause
-from piecrust.data.iterators import PageIterator
+from piecrust.dataproviders.page_iterator import PageIterator
 from piecrust.routing import RouteParameter
 from piecrust.sources.base import ContentSource, GeneratedContentException
 
--- a/piecrust/sources/default.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/default.py	Sun May 21 00:06:59 2017 -0700
@@ -19,6 +19,9 @@
 
     def __init__(self, app, name, config):
         super().__init__(app, name, config)
+
+        config.setdefault('data_type', 'page_iterator')
+
         self.auto_formats = app.config.get('site/auto_formats')
         self.default_auto_format = app.config.get('site/default_auto_format')
         self.supported_extensions = list(self.auto_formats)
@@ -33,7 +36,9 @@
     def _doCreateItemMetadata(self, path):
         slug = self._makeSlug(path)
         metadata = {
-            'slug': slug
+            'route_params': {
+                'slug': slug
+            }
         }
         _, ext = os.path.splitext(path)
         if ext:
--- a/piecrust/sources/fs.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/fs.py	Sun May 21 00:06:59 2017 -0700
@@ -1,4 +1,6 @@
 import os.path
+import re
+import fnmatch
 import logging
 from piecrust import osutil
 from piecrust.routing import RouteParameter
@@ -28,7 +30,6 @@
         super().__init__(app, name, config)
         self.fs_endpoint = config.get('fs_endpoint', name)
         self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
-        self._fs_filter = None
 
     def _checkFSEndpoint(self):
         if not os.path.isdir(self.fs_endpoint_path):
@@ -38,7 +39,7 @@
                                                  self.fs_endpoint_path)
         return True
 
-    def openItem(self, item, mode='r'):
+    def openItem(self, item, mode='r', encoding=None):
         for m in 'wxa+':
             if m in mode:
                 # If opening the file for writing, let's make sure the
@@ -47,11 +48,14 @@
                 if not os.path.exists(dirname):
                     os.makedirs(dirname, 0o755)
                 break
-        return open(item.spec, mode)
+        return open(item.spec, mode, encoding=encoding)
 
     def getItemMtime(self, item):
         return os.path.getmtime(item.spec)
 
+    def describe(self):
+        return {'endpoint_path': self.fs_endpoint_path}
+
 
 class FSContentSource(FSContentSourceBase):
     """ Implements a `ContentSource` that simply returns files on disk
@@ -59,6 +63,15 @@
     """
     SOURCE_NAME = 'fs'
 
+    def __init__(self, app, name, config):
+        super().__init__(app, name, config)
+
+        config.setdefault('data_type', 'asset_iterator')
+
+        ig, ir = _parse_ignores(config.get('ignore'))
+        self._ignore_globs = ig
+        self._ignore_regexes = ir
+
     def getContents(self, group):
         logger.debug("Scanning for content in: %s" % self.fs_endpoint_path)
         if not self._checkFSEndpoint():
@@ -69,12 +82,16 @@
             parent_path = group.spec
 
         names = filter(_filter_crap_files, osutil.listdir(parent_path))
-        if self._fs_filter is not None:
-            names = filter(self._fs_filter, names)
+
+        final_names = []
+        for name in names:
+            path = os.path.join(parent_path, name)
+            if not self._filterIgnored(path):
+                final_names.append(name)
 
         items = []
         groups = []
-        for name in names:
+        for name in final_names:
             path = os.path.join(parent_path, name)
             if os.path.isdir(path):
                 metadata = self._createGroupMetadata(path)
@@ -85,6 +102,16 @@
         self._finalizeContent(group, items, groups)
         return items + groups
 
+    def _filterIgnored(self, path):
+        rel_path = os.path.relpath(path, self.fs_endpoint_path)
+        for g in self._ignore_globs:
+            if fnmatch.fnmatch(rel_path, g):
+                return True
+        for r in self._ignore_regexes:
+            if r.search(g):
+                return True
+        return False
+
     def _createGroupMetadata(self, path):
         return {}
 
@@ -107,5 +134,14 @@
         return [
             RouteParameter('path', RouteParameter.TYPE_PATH)]
 
-    def describe(self):
-        return {'endpoint_path': self.fs_endpoint_path}
+
+def _parse_ignores(patterns):
+    globs = []
+    regexes = []
+    if patterns:
+        for pat in patterns:
+            if len(pat) > 2 and pat[0] == '/' and pat[-1] == '/':
+                regexes.append(re.compile(pat[1:-1]))
+            else:
+                globs.append(pat)
+    return globs, regexes
--- a/piecrust/sources/interfaces.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/interfaces.py	Sun May 21 00:06:59 2017 -0700
@@ -1,24 +1,3 @@
-
-
-class IPaginationSource(object):
-    """ Defines the interface for a source that can be used as the data
-        for an iterator or a pagination.
-    """
-    def getItemsPerPage(self):
-        raise NotImplementedError()
-
-    def getSourceIterator(self):
-        raise NotImplementedError()
-
-    def getSorterIterator(self, it):
-        raise NotImplementedError()
-
-    def getTailIterator(self, it):
-        raise NotImplementedError()
-
-    def getSettingAccessor(self):
-        raise NotImplementedError()
-
 
 class IPreparingSource(object):
     """ Defines the interface for a source whose pages can be created by the
--- a/piecrust/sources/mixins.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/mixins.py	Sun May 21 00:06:59 2017 -0700
@@ -1,9 +1,7 @@
 import os.path
 import logging
 from piecrust import osutil
-from piecrust.data.paginationdata import PaginationData
 from piecrust.sources.base import ContentItem
-from piecrust.sources.interfaces import IPaginationSource
 
 
 logger = logging.getLogger(__name__)
@@ -11,57 +9,6 @@
 assets_suffix = '-assets'
 
 
-class ContentSourceIterator(object):
-    def __init__(self, source):
-        self.source = source
-
-        # This is to permit recursive traversal of the
-        # iterator chain. It acts as the end.
-        self.it = None
-
-    def __iter__(self):
-        return self.source.getAllContentItems()
-
-
-class DateSortIterator(object):
-    def __init__(self, it, reverse=True):
-        self.it = it
-        self.reverse = reverse
-
-    def __iter__(self):
-        return iter(sorted(self.it,
-                           key=lambda x: x.datetime, reverse=self.reverse))
-
-
-class PaginationDataBuilderIterator(object):
-    def __init__(self, it):
-        self.it = it
-
-    def __iter__(self):
-        for page in self.it:
-            if page is not None:
-                yield PaginationData(page)
-            else:
-                yield None
-
-
-class SimplePaginationSourceMixin(IPaginationSource):
-    """ Implements the `IPaginationSource` interface in a standard way that
-        should fit most page sources.
-    """
-    def getItemsPerPage(self):
-        return self.config['items_per_page']
-
-    def getSourceIterator(self):
-        return ContentSourceIterator(self)
-
-    def getSorterIterator(self, it):
-        return DateSortIterator(it)
-
-    def getTailIterator(self, it):
-        return PaginationDataBuilderIterator(it)
-
-
 class SimpleAssetsSubDirMixin:
     def _getRelatedAssetsContents(self, item, relationship):
         if not item.metadata.get('__has_assets', False):
--- a/piecrust/sources/posts.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/posts.py	Sun May 21 00:06:59 2017 -0700
@@ -10,8 +10,7 @@
     FSContentSource, InvalidFileSystemEndpointError)
 from piecrust.sources.interfaces import (
     IPreparingSource, IInteractiveSource, InteractiveField)
-from piecrust.sources.mixins import (
-    SimplePaginationSourceMixin, SimpleAssetsSubDirMixin)
+from piecrust.sources.mixins import SimpleAssetsSubDirMixin
 from piecrust.uriutil import uri_to_title
 
 
@@ -24,7 +23,10 @@
     PATH_FORMAT = None
 
     def __init__(self, app, name, config):
-        FSContentSource.__init__(self, app, name, config)
+        super().__init__(app, name, config)
+
+        config.setdefault('data_type', 'page_iterator')
+
         self.auto_formats = app.config.get('site/auto_formats')
         self.default_auto_format = app.config.get('site/default_auto_format')
         self.supported_extensions = list(self.auto_formats)
@@ -135,12 +137,6 @@
             RouteParameter('month', RouteParameter.TYPE_INT2),
             RouteParameter('year', RouteParameter.TYPE_INT4)]
 
-    def getSourceIterator(self):
-        if self._source_it_cache is None:
-            it = SimplePaginationSourceMixin.getSourceIterator(self)
-            self._source_it_cache = list(it)
-        return self._source_it_cache
-
     def setupPrepareParser(self, parser, app):
         parser.add_argument(
             '-d', '--date', help="The date of the post, "
@@ -201,15 +197,24 @@
                                                  self.fs_endpoint_path)
         return True
 
-    def _makeContentItem(self, path, slug, year, month, day):
-        path = path.replace('\\', '/')
+    def _makeContentItem(self, rel_path, slug, year, month, day):
+        path = os.path.join(self.fs_endpoint_path, rel_path)
         timestamp = datetime.date(year, month, day)
         metadata = {
-            'slug': slug,
-            'year': year,
-            'month': month,
-            'day': day,
-            'date': timestamp}
+            'route_params': {
+                'slug': slug,
+                'year': year,
+                'month': month,
+                'day': day},
+            'date': timestamp
+        }
+
+        _, ext = os.path.splitext(path)
+        if ext:
+            fmt = self.auto_formats.get(ext.lstrip('.'))
+            if fmt:
+                metadata['config'] = {'format': fmt}
+
         return ContentItem(path, metadata)
 
 
--- a/piecrust/sources/prose.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/prose.py	Sun May 21 00:06:59 2017 -0700
@@ -1,5 +1,3 @@
-import os
-import os.path
 import copy
 import logging
 from piecrust.sources.default import DefaultContentSource
@@ -12,21 +10,19 @@
     SOURCE_NAME = 'prose'
 
     def __init__(self, app, name, config):
-        super(ProseSource, self).__init__(app, name, config)
+        super().__init__(app, name, config)
         self.config_recipe = config.get('config', {})
 
-    def _populateMetadata(self, rel_path, metadata, mode=None):
-        metadata['config'] = self._makeConfig(rel_path, mode)
+    def _doCreateItemMetadata(self, path):
+        metadata = super()._doCreateItemMetadata(path)
+        config = metadata.setdefault('config', {})
+        config.update(self._makeConfig(path))
+        return config
 
-    def _makeConfig(self, rel_path, mode):
+    def _makeConfig(self, path):
         c = copy.deepcopy(self.config_recipe)
-        if c.get('title') == '%first_line%' and mode != MODE_CREATING:
-            path = os.path.join(self.fs_endpoint_path, rel_path)
-            try:
-                c['title'] = get_first_line(path)
-            except IOError:
-                if mode == MODE_PARSING:
-                    raise
+        if c.get('title') == '%first_line%':
+            c['title'] = get_first_line(path)
         return c