Mercurial > silorider
changeset 75:2e57d0fd8ceb
Add more options for dynamic feed DOM manipulation
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sat, 10 Aug 2024 21:19:44 -0700 |
parents | 08ee3ffbe508 |
children | 268b287461c0 |
files | silorider/parse.py |
diffstat | 1 files changed, 25 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/silorider/parse.py Sat Aug 10 21:19:23 2024 -0700 +++ b/silorider/parse.py Sat Aug 10 21:19:44 2024 -0700 @@ -80,18 +80,30 @@ logger.debug("Modifying HTML doc:") for selector, to_add in class_mods: + # Hack to support semicolon and equal signs in .ini files... + # Note that Python seems to make all key names lowercase. + selector = selector.replace('$semicolon$', ':') + selector = selector.replace('$equals$', '=') + elems = list(doc.select(selector)) if not elems: logger.warning("No elements matched by rule: %s" % selector) continue + for elem in elems: - logger.debug("Adding %s to %s" % (to_add, elem.name)) - if to_add == 'dt-published': + if to_add == '$MOVE_UP$': + _move_element_up(doc, elem) + elif to_add == 'dt-published': _insert_html_datetime_published(doc, elem) else: - if 'class' not in elem.attrs: - elem['class'] = [] - elem['class'].append(to_add) + _add_class_to_element(doc, elem, to_add) + + +def _move_element_up(doc, elem): + dest = elem.parent.parent + logger.debug("Moving '%s' up to '%s'" % (elem.name, dest.name)) + elem.extract() + dest.append(elem) def _insert_html_datetime_published(doc, elem): @@ -116,6 +128,13 @@ logger.debug("Adding datetime attribute: %s" % dt) +def _add_class_to_element(doc, elem, to_add): + logger.debug("Adding %s to %s" % (to_add, elem.name)) + if 'class' not in elem.attrs: + elem['class'] = [] + elem['class'].append(to_add) + + class InvalidEntryException(Exception): pass @@ -226,6 +245,7 @@ for item in items: item_types = item.get('type', []) if 'h-feed' not in item_types: + logger.debug("Rejecting item of types: %s" % item_types) continue children = item.get('children', [])