changeset 32:2265920c4688

Improve populate cache command Make it use the same feed parsing code as the process command, and add support for --dry-run argument.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 10 May 2023 16:06:46 -0700
parents 6a014f7c6eea
children 9e4eb3f2754e
files silorider/commands/utils.py silorider/main.py
diffstat 2 files changed, 29 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/silorider/commands/utils.py	Wed May 10 16:03:31 2023 -0700
+++ b/silorider/commands/utils.py	Wed May 10 16:06:46 2023 -0700
@@ -1,5 +1,5 @@
 import logging
-from ..parse import parse_mf2
+from ..parse import parse_url
 
 
 logger = logging.getLogger(__name__)
@@ -33,41 +33,36 @@
 
 
 def populate_cache(ctx):
-    urls = get_named_urls(ctx.config, ctx.args.url)
-    for url in urls:
-        logger.info("Caching entries from %s" % url)
-        _populate_cache_for_url(url, ctx)
-
-
-def _populate_cache_for_url(url, ctx):
-    import mf2util
     import dateutil.parser
 
-    silos = get_named_silos(ctx.silos, ctx.args.silo)
+    urls = get_named_urls(ctx.config, ctx.args.url)
 
     until_dt = None
     if ctx.args.until:
         until_dt = dateutil.parser.parse(ctx.args.until).date()
         logger.debug("Populating cache until: %s" % until_dt)
 
-    mf_obj = parse_mf2(url)
-    mf_dict = mf_obj.to_dict()
-    for entry in mf_dict.get('items', []):
-        entry_props = entry.get('properties')
-        if not entry_props:
-            logger.warning("Found entry without any properties.")
-            continue
+    for url in urls:
+        logger.info("Caching entries from %s" % url)
+        _populate_cache_for_url(url, ctx, until_dt=until_dt)
+
 
-        entry_url = entry_props.get('url')
+def _populate_cache_for_url(url, ctx, until_dt=None):
+    silos = get_named_silos(ctx.silos, ctx.args.silo)
+
+    feed = parse_url(url)
+
+    for entry in feed.entries:
+        entry_url = entry.get('url')
         if not entry_url:
-            logger.warning("Found entry without any URL.")
+            logger.warning("Found entry without any URL: %s" % repr(entry._mf_entry))
             continue
 
         if isinstance(entry_url, list):
             entry_url = entry_url[0]
 
         if until_dt:
-            entry_published = entry_props.get('published')
+            entry_published = entry.get('published')
             if not entry_published:
                 logger.warning("Entry '%s' has not published date." %
                                entry_url)
@@ -76,10 +71,16 @@
             if isinstance(entry_published, list):
                 entry_published = entry_published[0]
 
-            entry_published_dt = mf2util.parse_datetime(entry_published)
-            if entry_published_dt and entry_published_dt.date() > until_dt:
+            if entry_published and entry_published.date() > until_dt:
                 continue
 
-        logger.debug("Adding entry to cache: %s" % entry_url)
         for silo in silos:
-            ctx.cache.addPost(silo.name, entry_url)
+            if ctx.cache.wasPosted(silo.name, entry_url):
+                logger.debug("Entry is already in '%s' cache: %s" % (silo.name, entry_url))
+                continue
+
+            if not ctx.args.dry_run:
+                logger.debug("Adding entry to '%s' cache: %s" % (silo.name, entry_url))
+                ctx.cache.addPost(silo.name, entry_url)
+            else:
+                logger.debug("Would add entry to '%s' cache: %s" % (silo.name, entry_url))
--- a/silorider/main.py	Wed May 10 16:03:31 2023 -0700
+++ b/silorider/main.py	Wed May 10 16:06:46 2023 -0700
@@ -83,6 +83,10 @@
     parser.add_argument(
         '--until',
         help="The date until which to populate the cache (included).")
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help="Only report what would be posted, but don't post anything.")
     parser.set_defaults(func=_run)