changeset 723:606f6d57b5df

routing: Cleanup URL routing and improve page matching. * Add new types of route parameters for integers (int4, int2, int). * Remove hard-coded hacks around converting year/month/day values. * Make the blog post routes use the new typed parameters. * Fix problems with matching routes with integer parameters when they can get confused with a sub-page number.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 29 May 2016 20:19:28 -0700
parents f0a3af3fbea2
children 09115f0900f0
files piecrust/appconfig.py piecrust/data/builder.py piecrust/generation/base.py piecrust/generation/taxonomy.py piecrust/rendering.py piecrust/routing.py piecrust/serving/util.py tests/test_serving.py
diffstat 8 files changed, 99 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/appconfig.py	Sun May 29 20:15:56 2016 -0700
+++ b/piecrust/appconfig.py	Sun May 29 20:19:28 2016 -0700
@@ -336,8 +336,8 @@
             'posts_fs': DEFAULT_POSTS_FS,
             'default_page_layout': 'default',
             'default_post_layout': 'post',
-            'post_url': '/%year%/%month%/%day%/%slug%',
-            'year_url': '/%year%',
+            'post_url': '/%int4:year%/%int2:month%/%int2:day%/%slug%',
+            'year_url': '/archives/%int4:year%',
             'tag_url': '/tag/%path:tag%',
             'category_url': '/%category%',
             'posts_per_page': 5
@@ -459,12 +459,12 @@
                     {
                         'url': post_url,
                         'source': blog_name,
-                        'func': 'pcposturl(year,month,day,slug)'
+                        'func': 'pcposturl(int:year,int:month,int:day,slug)'
                         },
                     {
                         'url': year_url,
                         'generator': ('%s_archives' % blog_name),
-                        'func': 'pcyearurl(year)'
+                        'func': 'pcyearurl(archive_year)'
                         }
                     ]
                 })
--- a/piecrust/data/builder.py	Sun May 29 20:15:56 2016 -0700
+++ b/piecrust/data/builder.py	Sun May 29 20:19:28 2016 -0700
@@ -7,7 +7,6 @@
 from piecrust.data.paginator import Paginator
 from piecrust.data.piecrustdata import PieCrustData
 from piecrust.data.providersdata import DataProvidersData
-from piecrust.uriutil import split_sub_uri
 
 
 logger = logging.getLogger(__name__)
@@ -32,8 +31,8 @@
 def build_page_data(ctx):
     app = ctx.app
     page = ctx.page
-    first_uri, _ = split_sub_uri(app, ctx.uri)
     pgn_source = ctx.pagination_source or get_default_pagination_source(page)
+    first_uri = ctx.page.getUri(1)
 
     pc_data = PieCrustData()
     config_data = PageData(page, ctx)
--- a/piecrust/generation/base.py	Sun May 29 20:15:56 2016 -0700
+++ b/piecrust/generation/base.py	Sun May 29 20:19:28 2016 -0700
@@ -129,6 +129,10 @@
         raise Exception("Can't find source '%s' for generator '%s'." % (
             self.source_name, self.name))
 
+    def getPageFactory(self, route_metadata):
+        # This will raise `PageNotFoundError` naturally if not found.
+        return self.page_ref.getFactory()
+
     def bake(self, ctx):
         raise NotImplementedError()
 
--- a/piecrust/generation/taxonomy.py	Sun May 29 20:15:56 2016 -0700
+++ b/piecrust/generation/taxonomy.py	Sun May 29 20:19:28 2016 -0700
@@ -65,25 +65,16 @@
             sm = app.config.get('site/slugify_mode', 'encode')
         self.slugify_mode = _parse_slugify_mode(sm)
 
-    @property
-    def page_ref_path(self):
-        try:
-            return self.page_ref.path
-        except PageNotFoundError:
-            return None
+    def prepareRenderContext(self, ctx):
+        self._setPaginationSource(ctx)
 
-    def getPageFactory(self, route_metadata):
-        # This will raise `PageNotFoundError` naturally if not found.
-        return self.page_ref.getFactory()
-
-    def prepareRenderContext(self, ctx):
         tax_terms, is_combination = self._getTaxonomyTerms(
                 ctx.page.route_metadata)
         self._setTaxonomyFilter(ctx, tax_terms, is_combination)
 
-        ctx.custom_data = {
+        ctx.custom_data.update({
                 self.taxonomy.term_name: tax_terms,
-                'is_multiple_%s' % self.taxonomy.term_name: is_combination}
+                'is_multiple_%s' % self.taxonomy.term_name: is_combination})
         if (self.taxonomy.is_multiple and
                 self.taxonomy.name != self.taxonomy.term_name):
             mult_val = tax_terms
@@ -110,6 +101,9 @@
                 self.taxonomy, self.slugify_mode, term_value, is_combination))
         ctx.pagination_filter = flt
 
+    def _setPaginationSource(self, ctx):
+        ctx.pagination_source = self.source
+
     def onRouteFunctionUsed(self, route, route_metadata):
         # Get the values.
         values = route_metadata[self.taxonomy.term_name]
@@ -134,7 +128,13 @@
         logger.debug("Changed route metadata to: %s" % route_metadata)
 
     def bake(self, ctx):
-        logger.debug("Baking taxonomy pages...")
+        if not self.page_ref.exists:
+            logger.debug(
+                    "No page found at '%s', skipping taxonomy '%s'." %
+                    (self.page_ref, self.taxonomy.name))
+            return
+
+        logger.debug("Baking %s pages...", self.taxonomy.name)
         with format_timed_scope(logger, 'gathered taxonomy terms',
                                 level=logging.DEBUG, colored=False):
             all_terms, dirty_terms = self._buildDirtyTaxonomyTerms(ctx)
@@ -206,12 +206,6 @@
                 "Baking '%s' for source '%s': %s" %
                 (self.taxonomy.name, self.source_name, dirty_terms))
 
-        if not self.page_ref.exists:
-            logger.debug(
-                    "No taxonomy page found at '%s', skipping." %
-                    self.page_ref)
-            return 0
-
         route = self.app.getGeneratorRoute(self.name)
         if route is None:
             raise Exception("No routes have been defined for generator: %s" %
@@ -226,12 +220,12 @@
             if not self.taxonomy.is_multiple:
                 term = term[0]
             slugified_term = s.slugify(term)
+            extra_route_metadata = {self.taxonomy.term_name: slugified_term}
 
+            # Use the slugified term as the record extra key.
             logger.debug(
                     "Queuing: %s [%s=%s]" %
                     (fac.ref_spec, self.taxonomy.name, slugified_term))
-
-            extra_route_metadata = {self.taxonomy.term_name: slugified_term}
             ctx.queueBakeJob(fac, route, extra_route_metadata, slugified_term)
             job_count += 1
         ctx.runJobQueue()
@@ -243,7 +237,7 @@
         for prev_entry, cur_entry in ctx.getAllPageRecords():
             # Only consider taxonomy-related entries that don't have any
             # current version (i.e. they weren't baked just now).
-            if (prev_entry and not cur_entry):
+            if prev_entry and not cur_entry:
                 try:
                     t = ctx.getSeedFromRecordExtraKey(prev_entry.extra_key)
                 except InvalidRecordExtraKey:
--- a/piecrust/rendering.py	Sun May 29 20:15:56 2016 -0700
+++ b/piecrust/rendering.py	Sun May 29 20:19:28 2016 -0700
@@ -103,7 +103,7 @@
         self.is_from_request = is_from_request
         self.pagination_source = None
         self.pagination_filter = None
-        self.custom_data = None
+        self.custom_data = {}
         self.render_passes = [None, None]  # Same length as RENDER_PASSES
         self._current_pass = PASS_NONE
 
--- a/piecrust/routing.py	Sun May 29 20:15:56 2016 -0700
+++ b/piecrust/routing.py	Sun May 29 20:19:28 2016 -0700
@@ -9,8 +9,8 @@
 logger = logging.getLogger(__name__)
 
 
-route_re = re.compile(r'%((?P<qual>path):)?(?P<name>\w+)%')
-route_esc_re = re.compile(r'\\%((?P<qual>path)\\:)?(?P<name>\w+)\\%')
+route_re = re.compile(r'%((?P<qual>[\w\d]+):)?(?P<name>\w+)%')
+route_esc_re = re.compile(r'\\%((?P<qual>[\w\d]+)\\:)?(?P<name>\w+)\\%')
 template_func_re = re.compile(r'^(?P<name>\w+)\((?P<args>.*)\)\s*$')
 template_func_arg_re = re.compile(r'(?P<arg>\+?\w+)')
 ugly_url_cleaner = re.compile(r'\.html$')
@@ -27,12 +27,6 @@
 def create_route_metadata(page):
     route_metadata = copy.deepcopy(page.source_metadata)
     route_metadata.update(page.getRouteMetadata())
-
-    # TODO: fix this hard-coded shit
-    for key in ['year', 'month', 'day']:
-        if key in route_metadata and isinstance(route_metadata[key], str):
-            route_metadata[key] = int(route_metadata[key])
-
     return route_metadata
 
 
@@ -75,6 +69,13 @@
                              re.escape(self.uri_pattern)) + '$'
         self.uri_re = re.compile(p)
 
+        # Get the types of the route parameters.
+        self.param_types = {}
+        for m in route_re.finditer(self.uri_pattern):
+            qual = m.group('qual')
+            if qual:
+                self.param_types[str(m.group('name'))] = qual
+
         # If the URI pattern has a 'path'-type component, we'll need to match
         # the versions for which that component is empty. So for instance if
         # we have `/foo/%path:bar%`, we may need to match `/foo` (note the
@@ -174,17 +175,18 @@
             for k in missing_keys:
                 route_metadata[k] = ''
 
-        # TODO: fix this hard-coded shit
-        for key in ['year', 'month', 'day']:
-            if key in route_metadata and isinstance(route_metadata[key], str):
-                try:
-                    route_metadata[key] = int(route_metadata[key])
-                except ValueError:
-                    pass
+        for k in route_metadata:
+            route_metadata[k] = self._coerceRouteParameter(
+                    k, route_metadata[k])
 
         return route_metadata
 
     def getUri(self, route_metadata, *, sub_num=1):
+        route_metadata = dict(route_metadata)
+        for k in route_metadata:
+            route_metadata[k] = self._coerceRouteParameter(
+                    k, route_metadata[k])
+
         uri = self.uri_format % route_metadata
         suffix = None
         if sub_num > 1:
@@ -230,21 +232,23 @@
         return uri
 
     def _uriFormatRepl(self, m):
+        qual = m.group('qual')
         name = m.group('name')
-        #TODO: fix this hard-coded shit
-        if name == 'year':
-            return '%(year)04d'
-        if name == 'month':
-            return '%(month)02d'
-        if name == 'day':
-            return '%(day)02d'
-        return '%(' + name + ')s'
+        if qual == 'int4':
+            return '%%(%s)04d' % name
+        elif qual == 'int2':
+            return '%%(%s)02d' % name
+        return '%%(%s)s' % name
 
     def _uriPatternRepl(self, m):
         name = m.group('name')
-        qualifier = m.group('qual')
-        if qualifier == 'path':
+        qual = m.group('qual')
+        if qual == 'path':
             return r'(?P<%s>[^\?]*)' % name
+        elif qual == 'int4':
+            return r'(?P<%s>\d{4})' % name
+        elif qual == 'int2':
+            return r'(?P<%s>\d{2})' % name
         return r'(?P<%s>[^/\?]+)' % name
 
     def _uriNoPathRepl(self, m):
@@ -254,6 +258,22 @@
             return ''
         return r'(?P<%s>[^/\?]+)' % name
 
+    def _coerceRouteParameter(self, name, val):
+        param_type = self.param_types.get(name)
+        if param_type is None:
+            return val
+        if param_type in ['int', 'int2', 'int4']:
+            try:
+                return int(val)
+            except ValueError:
+                raise Exception(
+                        "Expected route parameter '%s' to be of type "
+                        "'%s', but was: %s" %
+                        (k, param_type, route_metadata[k]))
+        if param_type == 'path':
+            return val
+        raise Exception("Unknown route parameter type: %s" % param_type)
+
     def _createTemplateFunc(self, func_def):
         if func_def is None:
             return
@@ -299,10 +319,8 @@
                 del non_var_args[-1]
 
             for arg_name, arg_val in zip(non_var_args, args):
-                #TODO: fix this hard-coded shit.
-                if arg_name in ['year', 'month', 'day']:
-                    arg_val = int(arg_val)
-                metadata[arg_name] = arg_val
+                metadata[arg_name] = self._coerceRouteParameter(
+                        arg_name, arg_val)
 
             if is_variable:
                 metadata[self.template_func_vararg] = []
--- a/piecrust/serving/util.py	Sun May 29 20:15:56 2016 -0700
+++ b/piecrust/serving/util.py	Sun May 29 20:19:28 2016 -0700
@@ -24,14 +24,14 @@
 
 
 class RequestedPage(object):
-    def __init__(self, qualified_page):
-        self.qualified_page = qualified_page
+    def __init__(self):
+        self.qualified_page = None
         self.req_path = None
         self.page_num = 1
         self.not_found_errors = []
 
 
-def find_routes(routes, uri):
+def find_routes(routes, uri, is_sub_page=False):
     """ Returns routes matching the given URL, but puts generator routes
         at the end.
     """
@@ -41,35 +41,42 @@
         metadata = route.matchUri(uri)
         if metadata is not None:
             if route.is_source_route:
-                res.append((route, metadata))
+                res.append((route, metadata, is_sub_page))
             else:
-                gen_res.append((route, metadata))
+                gen_res.append((route, metadata, is_sub_page))
     return res + gen_res
 
 
 def get_requested_page(app, req_path):
     # Try to find what matches the requested URL.
-    req_path, page_num = split_sub_uri(app, req_path)
+    routes = find_routes(app.routes, req_path)
 
-    routes = find_routes(app.routes, req_path)
+    # It could also be a sub-page (i.e. the URL ends with a page number), so
+    # we try to also match the base URL (without the number).
+    req_path_no_num, page_num = split_sub_uri(app, req_path)
+    if page_num > 1:
+        routes += find_routes(app.routes, req_path_no_num, True)
+
     if len(routes) == 0:
         raise RouteNotFoundError("Can't find route for: %s" % req_path)
 
-    qp = None
-    not_found_errors = []
-    for route, route_metadata in routes:
+    req_page = RequestedPage()
+    for route, route_metadata, is_sub_page in routes:
         try:
+            cur_req_path = req_path
+            if is_sub_page:
+                cur_req_path = req_path_no_num
+
             qp = _get_requested_page_for_route(
-                    app, route, route_metadata, req_path)
+                    app, route, route_metadata, cur_req_path)
             if qp is not None:
+                req_page.qualified_page = qp
+                req_page.req_path = cur_req_path
+                if is_sub_page:
+                    req_page.page_num = page_num
                 break
         except PageNotFoundError as nfe:
-            not_found_errors.append(nfe)
-
-    req_page = RequestedPage(qp)
-    req_page.req_path = req_path
-    req_page.page_num = page_num
-    req_page.not_found_errors = not_found_errors
+            req_page.not_found_errors.append(nfe)
     return req_page
 
 
--- a/tests/test_serving.py	Sun May 29 20:15:56 2016 -0700
+++ b/tests/test_serving.py	Sun May 29 20:19:28 2016 -0700
@@ -33,7 +33,7 @@
 
     assert len(matching) == len(expected)
     for i in range(len(matching)):
-        route, metadata = matching[i]
+        route, metadata, is_sub_page = matching[i]
         exp_source, exp_md = expected[i]
         assert route.source_name == exp_source
         assert metadata == exp_md