Mercurial > silorider
changeset 28:69a6a8c9d33d
Fix article feed not being found for some markups
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 19 Apr 2023 12:48:25 -0700 |
parents | c898b4df0f29 |
children | 20d4cf433704 |
files | silorider/parse.py |
diffstat | 1 files changed, 10 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/silorider/parse.py Wed Apr 19 12:46:58 2023 -0700 +++ b/silorider/parse.py Wed Apr 19 12:48:25 2023 -0700 @@ -8,7 +8,7 @@ def parse_url(url_or_path): mf_obj = parse_mf2(url_or_path) - matcher = EntryMatcher(mf_obj) + matcher = EntryMatcher(mf_obj.to_dict(), mf_obj.__doc__) feed = Feed(url_or_path, matcher.mf_dict) @@ -146,21 +146,25 @@ """ A class that matches `mf2util` results along with the original BeautifulSoup document, so we have HTML objects on hand if needed. """ - def __init__(self, mf_obj): - self.mf_dict = mf_obj.to_dict() + def __init__(self, mf_dict, bf_doc): + self.mf_dict = mf_dict self.entries = [] els_by_type = {} next_el = {} - bf = mf_obj.__doc__ - for e in self.mf_dict.get('items', []): + + items = mf_dict.get('items', []) + if len(items) == 1 and items[0]['type'][0] == 'h-feed': + items = items[0].get('children', []) + + for e in items: types = e.get('type') if not types: continue entry_type = types[0] if entry_type not in els_by_type: - ebt = list(bf.find_all(class_=entry_type)) + ebt = list(bf_doc.find_all(class_=entry_type)) els_by_type[entry_type] = ebt next_el[entry_type] = 0