comparison silorider/commands/process.py @ 66:4caf6720d1dd

Only process silos that did not throw an exception in pre-process.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 26 Dec 2023 16:27:28 -0800
parents b7da3d97ea99
children c5bf03406a33
comparison
equal deleted inserted replaced
65:412ff72ba091 66:4caf6720d1dd
29 @property 29 @property
30 def silos(self): 30 def silos(self):
31 return self._silos 31 return self._silos
32 32
33 def process(self): 33 def process(self):
34 self.preProcess() 34 ok_silos = self.preProcess()
35 35
36 # Get all silos to return a profile URL handler. 36 # Get all silos to return a profile URL handler.
37 profile_url_handlers = {} 37 profile_url_handlers = {}
38 for silo in self.ctx.silos: 38 for silo in ok_silos:
39 handler = silo.getProfileUrlHandler() 39 handler = silo.getProfileUrlHandler()
40 if handler: 40 if handler:
41 profile_url_handlers[silo.SILO_TYPE] = handler 41 profile_url_handlers[silo.SILO_TYPE] = handler
42 42
43 postctx = SiloPostingContext(self.ctx, profile_url_handlers) 43 postctx = SiloPostingContext(self.ctx, profile_url_handlers)
44 feed = parse_url(self.url, self.name, self.config) 44 feed = parse_url(self.url, self.name, self.config)
45 for entry in feed.entries: 45 for entry in feed.entries:
46 self.processEntry(postctx, entry) 46 self.processEntry(ok_silos, postctx, entry)
47 47
48 self.postProcess() 48 self.postProcess(ok_silos)
49 49
50 def preProcess(self): 50 def preProcess(self):
51 # Pre-parse the "since" and "until" dates/times. 51 # Pre-parse the "since" and "until" dates/times.
52 if self.ctx.args.since: 52 if self.ctx.args.since:
53 self.ctx.args.since = dateparser.parse(self.ctx.args.since) 53 self.ctx.args.since = dateparser.parse(self.ctx.args.since)
54 if self.ctx.args.until: 54 if self.ctx.args.until:
55 self.ctx.args.until = dateparser.parse(self.ctx.args.until) 55 self.ctx.args.until = dateparser.parse(self.ctx.args.until)
56 56
57 # Go over the silos needed for this command (i.e. potentially 57 # Go over the silos needed for this command (i.e. potentially
58 # filtered by passing `-s`) and call their `onPostStart`. 58 # filtered by passing `-s`) and call their `onPostStart`.
59 ok_silos = []
59 for silo in self.silos: 60 for silo in self.silos:
60 silo.onPostStart(self.ctx) 61 try:
62 silo.onPostStart(self.ctx)
63 ok_silos.append(silo)
64 except Exception as ex:
65 logger.error("Error during pre-process of silo '%s'" % silo.name)
66 logger.error(ex)
67 return ok_silos
61 68
62 def postProcess(self): 69 def postProcess(self, silos):
63 for silo in self.silos: 70 for silo in silos:
64 silo.onPostEnd(self.ctx) 71 silo.onPostEnd(self.ctx)
65 72
66 def processEntry(self, postctx, entry): 73 def processEntry(self, silos, postctx, entry):
67 entry_url = entry.get('url') 74 entry_url = entry.get('url')
68 if not entry_url: 75 if not entry_url:
69 logger.warning("Found entry without a URL: %s" % repr(entry._mf_entry)) 76 logger.warning("Found entry without a URL: %s" % repr(entry._mf_entry))
70 return 77 return
71 78
76 no_cache = self.ctx.args.no_cache 83 no_cache = self.ctx.args.no_cache
77 only_since = self.ctx.args.since 84 only_since = self.ctx.args.since
78 only_until = self.ctx.args.until 85 only_until = self.ctx.args.until
79 86
80 logger.debug("Processing entry: %s" % entry_url) 87 logger.debug("Processing entry: %s" % entry_url)
81 for silo in self.silos: 88 for silo in silos:
82 if only_since or only_until: 89 if only_since or only_until:
83 entry_dt = entry.get('published') 90 entry_dt = entry.get('published')
84 if not entry_dt: 91 if not entry_dt:
85 logger.warning( 92 logger.warning(
86 "Skipping entry with no published date/time " 93 "Skipping entry with no published date/time "