Mercurial > silorider
changeset 0:a1b7a459326a
Initial commit.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 18 Jul 2018 20:46:04 -0700 |
parents | |
children | 169aa24a8442 |
files | .hgignore LICENSE.rst MANIFEST.in Pipfile Pipfile.lock README.rst requirements.txt setup.py silorider/__init__.py silorider/cache/__init__.py silorider/cache/base.py silorider/cache/memory.py silorider/cache/sqlite.py silorider/commands/__init__.py silorider/commands/auth.py silorider/commands/process.py silorider/commands/utils.py silorider/default.cfg silorider/format.py silorider/main.py silorider/parse.py silorider/silos/__init__.py silorider/silos/base.py silorider/silos/mastodon.py silorider/silos/print.py silorider/version.py tests/__init__.py tests/conftest.py tests/test_commands_populate.py tests/test_format.py tests/test_silos_mastodon.py |
diffstat | 26 files changed, 2029 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgignore Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,9 @@ +syntax: glob +*.pyc +*.egg-info +.eggs +.pytest_cache +__pycache__ +build +dist +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE.rst Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,14 @@ +Copyright 2018 Ludovic Chabant + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MANIFEST.in Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,7 @@ +include LICENSE.rst +include README.rst + +include requirements.txt + +recursive-include silorider * +recursive-include tests *
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Pipfile Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,22 @@ +[[source]] + +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + + +[packages] + +"mf2py" = "*" +"mastodon.py" = "*" +coloredlogs = "*" +"mf2util" = "*" +python-dateutil = "*" +python-twitter = "*" + + +[dev-packages] + +pytest = "*" +setuptools-scm = "*" +pytest-runner = "*"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Pipfile.lock Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,328 @@ +{ + "_meta": { + "hash": { + "sha256": "c5dd0999f78dfe324dac4ec22397addd20a4d29d660fc201e97d678d8c61a55b" + }, + "host-environment-markers": { + "implementation_name": "cpython", + "implementation_version": "3.6.3", + "os_name": "posix", + "platform_machine": "x86_64", + "platform_python_implementation": "CPython", + "platform_release": "17.6.0", + "platform_system": "Darwin", + "platform_version": "Darwin Kernel Version 17.6.0: Tue May 8 15:22:16 PDT 2018; root:xnu-4570.61.1~1/RELEASE_X86_64", + "python_full_version": "3.6.3", + "python_version": "3.6", + "sys_platform": "darwin" + }, + "pipfile-spec": 6, + "requires": {}, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "0cc8643": { + "editable": true, + "path": "../mf2util" + }, + "asn1crypto": { + "hashes": [ + "sha256:2f1adbb7546ed199e3c90ef23ec95c5cf3585bac7d11fb7eb562a3fe89c64e87", + "sha256:9d5c20441baf0cb60a4ac34cc447c6c189024b6b4c6cd7877034f4965c464e49" + ], + "version": "==0.24.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:7015e76bf32f1f574636c4288399a6de66ce08fb7b2457f628a8d70c0fbabb11", + "sha256:11a9a27b7d3bddc6d86f59fb76afb70e921a25ac2d6cc55b40d072bd68435a76", + "sha256:808b6ac932dccb0a4126558f7dfdcf41710dd44a4ef497a0bb59a77f9f078e89" + ], + "version": "==4.6.0" + }, + "certifi": { + "hashes": [ + "sha256:9fa520c1bacfb634fa7af20a76bcbd3d5fb390481724c597da32c719a7dca4b0", + "sha256:13e698f54293db9f89122b0581843a782ad0934a4fe0172d2a980ba77fc61bb7" + ], + "version": "==2018.4.16" + }, + "cffi": { + "hashes": [ + "sha256:1b0493c091a1898f1136e3f4f991a784437fac3673780ff9de3bcf46c80b6b50", + "sha256:87f37fe5130574ff76c17cab61e7d2538a16f843bb7bca8ebbc4b12de3078596", + "sha256:1553d1e99f035ace1c0544050622b7bc963374a00c467edafac50ad7bd276aef", + "sha256:151b7eefd035c56b2b2e1eb9963c90c6302dc15fbd8c1c0a83a163ff2c7d7743", + "sha256:edabd457cd23a02965166026fd9bfd196f4324fe6032e866d0f3bd0301cd486f", + "sha256:ba5e697569f84b13640c9e193170e89c13c6244c24400fc57e88724ef610cd31", + "sha256:79f9b6f7c46ae1f8ded75f68cf8ad50e5729ed4d590c74840471fc2823457d04", + "sha256:b0f7d4a3df8f06cf49f9f121bead236e328074de6449866515cea4907bbc63d6", + "sha256:4c91af6e967c2015729d3e69c2e51d92f9898c330d6a851bf8f121236f3defd3", + "sha256:7a33145e04d44ce95bcd71e522b478d282ad0eafaf34fe1ec5bbd73e662f22b6", + "sha256:95d5251e4b5ca00061f9d9f3d6fe537247e145a8524ae9fd30a2f8fbce993b5b", + "sha256:b75110fb114fa366b29a027d0c9be3709579602ae111ff61674d28c93606acca", + "sha256:ae5e35a2c189d397b91034642cb0eab0e346f776ec2eb44a49a459e6615d6e2e", + "sha256:fdf1c1dc5bafc32bc5d08b054f94d659422b05aba244d6be4ddc1c72d9aa70fb", + "sha256:9d1d3e63a4afdc29bd76ce6aa9d58c771cd1599fbba8cf5057e7860b203710dd", + "sha256:be2a9b390f77fd7676d80bc3cdc4f8edb940d8c198ed2d8c0be1319018c778e1", + "sha256:ed01918d545a38998bfa5902c7c00e0fee90e957ce036a4000a88e3fe2264917", + "sha256:857959354ae3a6fa3da6651b966d13b0a8bed6bbc87a0de7b38a549db1d2a359", + "sha256:2ba8a45822b7aee805ab49abfe7eec16b90587f7f26df20c71dd89e45a97076f", + "sha256:a36c5c154f9d42ec176e6e620cb0dd275744aa1d804786a71ac37dc3661a5e95", + "sha256:e55e22ac0a30023426564b1059b035973ec82186ddddbac867078435801c7801", + "sha256:3eb6434197633b7748cea30bf0ba9f66727cdce45117a712b29a443943733257", + "sha256:ecbb7b01409e9b782df5ded849c178a0aa7c906cf8c5a67368047daab282b184", + "sha256:770f3782b31f50b68627e22f91cb182c48c47c02eb405fd689472aa7b7aa16dc", + "sha256:d5d8555d9bfc3f02385c1c37e9f998e2011f0db4f90e250e5bc0c0a85a813085", + "sha256:3c85641778460581c42924384f5e68076d724ceac0f267d66c757f7535069c93", + "sha256:ca1bd81f40adc59011f58159e4aa6445fc585a32bb8ac9badf7a2c1aa23822f2", + "sha256:3bb6bd7266598f318063e584378b8e27c67de998a43362e8fce664c54ee52d30", + "sha256:a6a5cb8809091ec9ac03edde9304b3ad82ad4466333432b16d78ef40e0cce0d5", + "sha256:57b2533356cb2d8fac1555815929f7f5f14d68ac77b085d2326b571310f34f6e", + "sha256:495c5c2d43bf6cebe0178eb3e88f9c4aa48d8934aa6e3cddb865c058da76756b", + "sha256:e90f17980e6ab0f3c2f3730e56d1fe9bcba1891eeea58966e89d352492cc74f4" + ], + "markers": "platform_python_implementation != 'PyPy'", + "version": "==1.11.5" + }, + "chardet": { + "hashes": [ + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691", + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae" + ], + "version": "==3.0.4" + }, + "coloredlogs": { + "hashes": [ + "sha256:34fad2e342d5a559c31b6c889e8d14f97cb62c47d9a2ae7b5ed14ea10a79eff8", + "sha256:b869a2dda3fa88154b9dd850e27828d8755bfab5a838a1c97fbc850c6e377c36" + ], + "version": "==10.0" + }, + "cryptography": { + "hashes": [ + "sha256:abd070b5849ed64e6d349199bef955ee0ad99aefbad792f0c587f8effa681a5e", + "sha256:3f3b65d5a16e6b52fba63dc860b62ca9832f51f1a2ae5083c78b6840275f12dd", + "sha256:77d0ad229d47a6e0272d00f6bf8ac06ce14715a9fd02c9a97f5a2869aab3ccb2", + "sha256:808fe471b1a6b777f026f7dc7bd9a4959da4bfab64972f2bbe91e22527c1c037", + "sha256:6fef51ec447fe9f8351894024e94736862900d3a9aa2961528e602eb65c92bdb", + "sha256:60bda7f12ecb828358be53095fc9c6edda7de8f1ef571f96c00b2363643fa3cd", + "sha256:5cb990056b7cadcca26813311187ad751ea644712022a3976443691168781b6f", + "sha256:c332118647f084c983c6a3e1dba0f3bcb051f69d12baccac68db8d62d177eb8a", + "sha256:f57008eaff597c69cf692c3518f6d4800f0309253bb138b526a37fe9ef0c7471", + "sha256:551a3abfe0c8c6833df4192a63371aa2ff43afd8f570ed345d31f251d78e7e04", + "sha256:db6013746f73bf8edd9c3d1d3f94db635b9422f503db3fc5ef105233d4c011ab", + "sha256:d6f46e862ee36df81e6342c2177ba84e70f722d9dc9c6c394f9f1f434c4a5563", + "sha256:9b62fb4d18529c84b961efd9187fecbb48e89aa1a0f9f4161c61b7fc42a101bd", + "sha256:9e5bed45ec6b4f828866ac6a6bedf08388ffcfa68abe9e94b34bb40977aba531", + "sha256:f6c821ac253c19f2ad4c8691633ae1d1a17f120d5b01ea1d256d7b602bc59887", + "sha256:ba6a774749b6e510cffc2fb98535f717e0e5fd91c7c99a61d223293df79ab351", + "sha256:5251e7de0de66810833606439ca65c9b9e45da62196b0c88bfadf27740aac09f", + "sha256:64b5c67acc9a7c83fbb4b69166f3105a0ab722d27934fac2cb26456718eec2ba", + "sha256:9fc295bf69130a342e7a19a39d7bbeb15c0bcaabc7382ec33ef3b2b7d18d2f63" + ], + "version": "==2.2.2" + }, + "decorator": { + "hashes": [ + "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82", + "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c" + ], + "version": "==4.3.0" + }, + "future": { + "hashes": [ + "sha256:e39ced1ab767b5936646cedba8bcce582398233d6a627067d4c6a454c90cfedb" + ], + "version": "==0.16.0" + }, + "html5lib": { + "hashes": [ + "sha256:20b159aa3badc9d5ee8f5c647e5efd02ed2a66ab8d354930bd9ff139fc1dc0a3", + "sha256:66cb0dcfdbbc4f9c3ba1a63fdb511ffdbd4f513b2b6d81b80cd26ce6b3fb3736" + ], + "version": "==1.0.1" + }, + "http-ece": { + "hashes": [ + "sha256:2f31a0640c31a0c2934ab1e37005dd9a559ae854a16304f9b839e062074106cc" + ], + "version": "==1.0.5" + }, + "humanfriendly": { + "hashes": [ + "sha256:acaaae671773dd5c70c6042b3442730e7d52804d9d645189093a15d208b86255", + "sha256:f772c455f8de6af1185c8fea7d2920ba469ea9f7ccfc42635bbdd7f115cdd575" + ], + "version": "==4.15.1" + }, + "idna": { + "hashes": [ + "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e", + "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16" + ], + "version": "==2.7" + }, + "mastodon.py": { + "hashes": [ + "sha256:0d426c37795ed24cdf7affec7b3465cb76f9afc3f1d4dfbfd389b0b4459dbf4d", + "sha256:339a60c4ea505dd5b6c8f6ac076ce40f9e7bdfcd72d9466869da8bf631e4b9f5" + ], + "version": "==1.3.0" + }, + "mf2py": { + "hashes": [ + "sha256:2dc2e2a18ac457829936c24ffce8d66b44ea63f834aea1ed48dfbdf7b4951585", + "sha256:64cadc8a271382e075a72bca1c0dc09297f8e1de62279849bd3915418f7087de" + ], + "version": "==1.1.1" + }, + "oauthlib": { + "hashes": [ + "sha256:d883b36b21a6ad813953803edfa563b1b579d79ca758fe950d1bc9e8b326025b", + "sha256:ac35665a61c1685c56336bda97d5eefa246f1202618a1d6f34fccb1bdd404162" + ], + "version": "==2.1.0" + }, + "pycparser": { + "hashes": [ + "sha256:99a8ca03e29851d96616ad0404b4aad7d9ee16f25c9f9708a11faf2810f7b226" + ], + "version": "==2.18" + }, + "python-dateutil": { + "hashes": [ + "sha256:1adb80e7a782c12e52ef9a8182bebeb73f1d7e24e374397af06fb4956c8dc5c0", + "sha256:e27001de32f627c22380a688bcc43ce83504a7bc5da472209b4c70f02829f0b8" + ], + "version": "==2.7.3" + }, + "python-twitter": { + "hashes": [ + "sha256:29b536a59edfb0b1f634ea8d98b91c4c5bd7514e91fab16af086604c627bebae", + "sha256:77ebcf2344b622d2fa1e54a851971e030ae313c754863b435e5c1827be97a721" + ], + "version": "==3.4.2" + }, + "pytz": { + "hashes": [ + "sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053", + "sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277" + ], + "version": "==2018.5" + }, + "requests": { + "hashes": [ + "sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1", + "sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a" + ], + "version": "==2.19.1" + }, + "requests-oauthlib": { + "hashes": [ + "sha256:e21232e2465808c0e892e0e4dbb8c2faafec16ac6dc067dd546e9b466f3deac8", + "sha256:fe3282f48fb134ee0035712159f5429215459407f6d5484013343031ff1a400d", + "sha256:8886bfec5ad7afb391ed5443b1f697c6f4ae98d0e5620839d8b4499c032ada3f" + ], + "version": "==1.0.0" + }, + "six": { + "hashes": [ + "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb", + "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9" + ], + "version": "==1.11.0" + }, + "urllib3": { + "hashes": [ + "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5", + "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf" + ], + "version": "==1.23" + }, + "webencodings": { + "hashes": [ + "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", + "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" + ], + "version": "==0.5.1" + } + }, + "develop": { + "atomicwrites": { + "hashes": [ + "sha256:a24da68318b08ac9c9c45029f4a10371ab5b20e4226738e150e6e7c571630ae6", + "sha256:240831ea22da9ab882b551b31d4225591e5e447a68c5e188db5b89ca1d487585" + ], + "version": "==1.1.5" + }, + "attrs": { + "hashes": [ + "sha256:4b90b09eeeb9b88c35bc642cbac057e45a5fd85367b985bd2809c62b7b939265", + "sha256:e0d0eb91441a3b53dab4d9b743eafc1ac44476296a2053b6ca3af0b139faf87b" + ], + "version": "==18.1.0" + }, + "more-itertools": { + "hashes": [ + "sha256:a18d870ef2ffca2b8463c0070ad17b5978056f403fb64e3f15fe62a52db21cc0", + "sha256:6703844a52d3588f951883005efcf555e49566a48afd4db4e965d69b883980d3", + "sha256:2b6b9893337bfd9166bee6a62c2b0c9fe7735dcf85948b387ec8cba30e85d8e8" + ], + "version": "==4.2.0" + }, + "pluggy": { + "hashes": [ + "sha256:d345c8fe681115900d6da8d048ba67c25df42973bda370783cd58826442dcd7c", + "sha256:e160a7fcf25762bb60efc7e171d4497ff1d8d2d75a3d0df7a21b76821ecbf5c5", + "sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff" + ], + "version": "==0.6.0" + }, + "py": { + "hashes": [ + "sha256:e31fb2767eb657cbde86c454f02e99cb846d3cd9d61b318525140214fdc0e98e", + "sha256:3fd59af7435864e1a243790d322d763925431213b6b8529c6ca71081ace3bbf7" + ], + "version": "==1.5.4" + }, + "pytest": { + "hashes": [ + "sha256:4b208614ae6d98195430ad6bde03641c78553acee7c83cec2e85d613c0cd383d", + "sha256:0453c8676c2bee6feb0434748b068d5510273a916295fd61d306c4f22fbfd752" + ], + "version": "==3.6.3" + }, + "pytest-runner": { + "hashes": [ + "sha256:d987fec1e31287592ffe1cb823a8c613c533db4c6aaca0ee1191dbc91e2fcc61", + "sha256:d23f117be39919f00dd91bffeb4f15e031ec797501b717a245e377aee0f577be" + ], + "version": "==4.2" + }, + "setuptools-scm": { + "hashes": [ + "sha256:0f386524bb99d959e0d98381d7fe1f0a810e04eace5d2cc6297e701d64de9a7d", + "sha256:1261fb48def5ac5e4d04cb6196886cb8c2de5dc066ed2bfee99d4bb21aecb781", + "sha256:fda84172bd4dca0b671c1569eef6d4458d7d006c66a5adb41aa7a88462bcb6c0", + "sha256:e2ab256c944e66f063a020a56b4269010d772ce3af757cc703fe56e6fdc2dda1", + "sha256:95ff5ca2cb1e48a3b92080c90fac35ac015c3f1be185f401f0941b11279fdae8", + "sha256:a767141fecdab1c0b3c8e4c788ac912d7c94a0d6c452d40777ba84f918316379" + ], + "version": "==2.1.0" + }, + "six": { + "hashes": [ + "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb", + "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9" + ], + "version": "==1.11.0" + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,60 @@ + +SiloRider +========= + +SiloRider is a command-line utility that lets you implement the `POSSE`_ model +on a website. This is how it works: + +- It reads your website's main page (or whatever URL you supply) and looks for + `Microformats`_ markup. +- It reads a configuration file where you describe which "silos" (*i.e.* + external services) you want to post your content to. +- It reads a local cache file to figure out which content has already been + posted where, so it only posts new content. +- It actually posts that content to each silo. + + +Supported Silos +--------------- + +Right now, the following silos are supported: + +- `Mastodon`_: an open, federated microblogging service. +- Print: a debug silo that just prints entries in the console. + + +Quickstart +---------- + +SiloRider will need to read a configuration file in `INI`_ format. The minimum requirement is to define at least one "silo" using a ``silo:<name>`` section:: + + [silo:my_mastodon] + type: mastodon + url: https://mastodon.social + +This defines one Mastodon silo to which you want to post your entries. + +You can then run:: + + silorider auth my_mastodon + +This command will authenticate your Mastodon account and provide SiloRider with the permission to post to your timeline. The authorization tokens are stored in a cache file that defaults to ``silorider.db``, next to the configuration file. Later, this cache will also contain the list of entries already posted to each silo. + +Once authenticated, you can run:: + + silorider populate https://yourwebsite + +This will populate the cache with the existing entries, since you probably don't want the first run of SiloRider to cross-post your last dozen or so entries in one go. + +Later, when you post something new, you can then run:: + + silorider process https://yourwebsite + +This will pick up the new entries and post them to Mastodon. You can run this command again regularly... if there's something new, SiloRider will cross-post it to the configured silos. If not, it will just exit. + + +.. _POSSE: https://indieweb.org/POSSE +.. _Microformats: http://microformats.org/ +.. _Mastodon: https://joinmastodon.org/ +.. _INI: https://en.wikipedia.org/wiki/INI_file +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/requirements.txt Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,34 @@ +asn1crypto==0.24.0 +atomicwrites==1.1.5 +attrs==18.1.0 +beautifulsoup4==4.6.0 +certifi==2018.4.16 +cffi==1.11.5 +chardet==3.0.4 +coloredlogs==10.0 +cryptography==2.2.2 +decorator==4.3.0 +future==0.16.0 +html5lib==1.0.1 +http-ece==1.0.5 +humanfriendly==4.12.1 +idna==2.7 +Mastodon.py==1.3.0 +mf2py==1.1.0 +mf2util==0.5.0 +more-itertools==4.2.0 +oauthlib==2.1.0 +pluggy==0.6.0 +py==1.5.4 +pycparser==2.18 +pytest==3.6.2 +pytest-runner==4.2 +python-dateutil==2.7.3 +python-twitter==3.4.2 +pytz==2018.5 +requests==2.19.1 +requests-oauthlib==1.0.0 +setuptools-scm==2.1.0 +six==1.11.0 +urllib3==1.23 +webencodings==0.5.1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/setup.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,48 @@ +import os.path +from setuptools import setup, find_packages + + +def read(fname): + with open(os.path.join(os.path.dirname(__file__), fname)) as fp: + return fp.read() + + +long_description = read('README.rst') + +install_requires = [ + 'coloredlogs>=10.0', + 'Mastodon.py>=1.3.0', + 'mf2py>=1.1.0', + 'mf2util>=0.5.0', + 'python-dateutil>=2.7.0', + 'python-twitter>=3.4.0' +] + +tests_require = [ + 'pytest>=3.6.2' +] + +setup_requires = [ + 'setuptools-scm', + 'pytest-runner' +] + + +setup( + name='silorider', + use_scm_version={'write_to': 'silorider/version.py'}, + description=("Scans a website's microformats and cross-posts content " + "to 'silo' services."), + long_description=long_description, + author='Ludovic Chabant', + author_email='ludovic@chabant.com', + license="Apache License 2.0", + url='https://bolt80.com/silorider', + packages=find_packages(), + setup_requires=setup_requires, + tests_require=tests_require, + install_requires=install_requires, + entry_points={'console_scripts': [ + 'silorider = silorider.main:main' + ]} +)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/cache/base.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,60 @@ +import os.path +import urllib.parse +import logging + + +logger = logging.getLogger(__name__) + + +class Cache: + def getCustomValue(self, name, valtype=str): + raise NotImplementedError() + + def setCustomValue(self, name, val): + raise NotImplementedError() + + def wasPosted(self, silo_name, entry_uri): + raise NotImplementedError() + + def addPost(self, silo_name, entry_uri): + raise NotImplementedError() + + +class NullCache(Cache): + def __init__(self): + self._vals = {} + + def getCustomValue(self, name, valtype=str): + return self._vals.get(name) + + def setCustomValue(self, name, val): + self._vals[name] = val + + def wasPosted(self, silo_name, entry_uri): + return False + + def addPost(self, silo_name, entry_uri): + pass + + +def load_cache(config, cfg_dir): + if not config.has_section('cache'): + logger.warning("No cache configured!") + return NullCache() + + cache_uri = config.get('cache', 'uri', fallback=None) + if not cache_uri: + return NullCache() + + res = urllib.parse.urlparse(cache_uri) + if res.scheme == 'sqlite': + from .sqlite import SqliteCache + dbpath = res.netloc + res.path + if cfg_dir: + dbpath = os.path.join(cfg_dir, dbpath) + return SqliteCache(dbpath, config) + elif res.scheme == 'memory': + from .memory import MemoryCache + return MemoryCache() + + raise Exception("Unknown cache URI: %s" % cache_uri)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/cache/memory.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,23 @@ +from .base import Cache + + +class MemoryCache(Cache): + def __init__(self): + self._vals = {} + self._posted = {} + + def getCustomValue(self, name, valtype=str): + return self._vals.get(name) + + def setCustomValue(self, name, val): + self._vals[name] = val + + def wasPosted(self, silo_name, entry_uri): + uris = self._posted.get(silo_name) + if uris: + return entry_uri in uris + return False + + def addPost(self, silo_name, entry_uri): + uris = self._posted.setdefault(silo_name, set()) + uris.add(entry_uri)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/cache/sqlite.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,122 @@ +import os.path +import logging +from .base import Cache + + +logger = logging.getLogger(__name__) + + +class SqliteCache(Cache): + SCHEMA_VERSION = 1 + + def __init__(self, dbpath, config): + self.path = dbpath + self.config = config + + import sqlite3 + logger.debug("Opening SQL DB: %s" % dbpath) + self.conn = sqlite3.connect(dbpath, + detect_types=sqlite3.PARSE_DECLTYPES) + + if (not os.path.exists(dbpath) or + self._getSchemaVersion() != self.SCHEMA_VERSION): + self._initDb() + + def _getSchemaVersion(self): + import sqlite3 + try: + return self.getCustomValue('schema_version', valtype=int) + except sqlite3.Error: + return None + + def _initDb(self): + c = self.conn.cursor() + c.execute('''DROP TABLE IF EXISTS info''') + c.execute( + '''CREATE TABLE info ( + name text PRIMARY KEY NOT NULL, + str_val text, + real_val real, + int_val int + )''') + c.execute( + '''INSERT INTO info (name, int_val) + VALUES ('schema_version', ?)''', + (self.SCHEMA_VERSION,)) + + c.execute('''DROP TABLE IF EXISTS posted''') + c.execute( + '''CREATE TABLE posted ( + id integer PRIMARY KEY, + silo text NOT NULL, + uri text NOT NULL, + posted_on timestamp + )''') + c.execute( + '''CREATE INDEX index_silo ON posted(silo)''') + c.execute( + '''CREATE INDEX index_uri ON posted(uri)''') + self.conn.commit() + c.close() + + def getCustomValue(self, name, valtype=str): + c = self.conn.cursor() + if valtype is str: + c.execute( + '''SELECT str_val FROM info WHERE (name = ?)''', (name,)) + elif valtype is float: + c.execute( + '''SELECT real_val FROM info WHERE (name = ?)''', (name,)) + elif valtype in (int, bool): + c.execute( + '''SELECT int_val FROM info WHERE (name = ?)''', (name,)) + else: + raise Exception("Unsupported value type: %s" % valtype) + row = c.fetchone() + if row is None: + return None + + return valtype(row[0]) + + def setCustomValue(self, name, val): + c = self.conn.cursor() + if isinstance(val, str): + c.execute( + '''INSERT OR REPLACE INTO info (name, str_val) + VALUES (?, ?)''', + (name, val)) + elif isinstance(val, float): + c.execute( + '''INSERT OR REPLACE INTO info (name, real_val) + VALUES (?, ?)''', + (name, str(val))) + elif isinstance(val, (int, bool)): + c.execute( + '''INSERT OR REPLACE INTO info (name, int_val) + VALUES (?, ?)''', + (name, str(int(val)))) + else: + raise Exception("Unsupported value type: %s" % type(val)) + + self.conn.commit() + c.close() + + def wasPosted(self, silo_name, entry_uri): + c = self.conn.cursor() + c.execute( + '''SELECT id, silo, uri + FROM posted + WHERE (silo = ? AND uri = ?)''', + (silo_name, entry_uri)) + if c.fetchone(): + return True + return False + + def addPost(self, silo_name, entry_uri): + c = self.conn.cursor() + c.execute( + '''INSERT INTO posted (silo, uri) + VALUES (?, ?)''', + (silo_name, entry_uri)) + self.conn.commit() + c.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/commands/auth.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,19 @@ +import logging +from ..silos.base import SiloAuthenticationContext + + +logger = logging.getLogger(__name__) + + +def auth_silo(ctx): + silo_names = ctx.args.silo + if 'all' in silo_names: + silo_names = [s.name for s in ctx.silos] + + for silo in ctx.silos: + if silo.name not in silo_names: + continue + + logger.debug("Authenticating silo: %s" % silo.name) + authctx = SiloAuthenticationContext(ctx) + silo.authenticate(authctx)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/commands/process.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,94 @@ +import logging +from .utils import get_named_silos +from ..silos.base import SiloPostingContext +from ..parse import parse_url + + +logger = logging.getLogger(__name__) + + +def process_url(url, ctx): + p = Processor(ctx, url) + p.process() + + +class Processor: + def __init__(self, ctx, url): + self.ctx = ctx + self.url = url + self._silos = get_named_silos(ctx.silos, ctx.args.silo) + + @property + def config(self): + return self.ctx.config + + @property + def silos(self): + return self._silos + + def process(self): + self.preProcess() + + feed = parse_url(self.url) + for entry in feed.entries: + self.processEntry(entry) + + self.postProcess() + + def preProcess(self): + for silo in self.silos: + silo.onPostStart() + + def postProcess(self): + for silo in self.silos: + silo.onPostEnd() + + def processEntry(self, entry): + if self.isEntryFiltered(entry): + logger.debug("Entry is filtered out: %s" % entry.best_name) + return + + entry_url = entry.get('url') + if not entry_url: + logger.warning("Found entry without a URL.") + return + + postctx = SiloPostingContext(self.ctx) + no_cache = self.ctx.args.no_cache + logger.debug("Processing entry: %s" % entry.best_name) + for silo in self.silos: + if no_cache or not self.ctx.cache.wasPosted(silo.name, entry_url): + if not self.ctx.args.dry_run: + silo.postEntry(entry, postctx) + self.ctx.cache.addPost(silo.name, entry_url) + else: + logger.info("Would post entry on %s: %s" % + (silo.name, entry.best_name)) + else: + logger.debug("Skipping already posted entry on %s: %s" % + (silo.name, entry.best_name)) + + def isEntryFiltered(self, entry): + if not self.config.has_section('filter'): + return False + + items = self.config.items('filter') + + for name, value in items: + if name.startswith('include_'): + propname = name[8:] + propvalue = entry.get(propname) + for inc_val in value.split(','): + if inc_val in propvalue: + break + else: + return True + + elif name.startswith('exclude_'): + propname = name[8:] + propvalue = entry.get(propname) + for excl_val in value.split(','): + if excl_val in propvalue: + return True + + return False
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/commands/utils.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,67 @@ +import logging +from ..parse import parse_mf2 + + +logger = logging.getLogger(__name__) + + +def get_named_silos(silos, names): + if not names: + return silos + + valid_names = set([s.name for s in silos]) + for n in names: + if n not in valid_names: + raise Exception("No such silo: %s" % n) + + res = [] + for s in silos: + if s.name in names: + res.append(s) + return res + + +def populate_cache(url, ctx): + import mf2util + import dateutil.parser + + silos = get_named_silos(ctx.silos, ctx.args.silo) + + until_dt = None + if ctx.args.until: + until_dt = dateutil.parser.parse(ctx.args.until).date() + logger.debug("Populating cache until: %s" % until_dt) + + mf_obj = parse_mf2(url) + mf_dict = mf_obj.to_dict() + for entry in mf_dict.get('items', []): + entry_props = entry.get('properties') + if not entry_props: + logger.warning("Found entry without any properties.") + continue + + entry_url = entry_props.get('url') + if not entry_url: + logger.warning("Found entry without any URL.") + continue + + if isinstance(entry_url, list): + entry_url = entry_url[0] + + if until_dt: + entry_published = entry_props.get('published') + if not entry_published: + logger.warning("Entry '%s' has not published date." % + entry_url) + continue + + if isinstance(entry_published, list): + entry_published = entry_published[0] + + entry_published_dt = mf2util.parse_datetime(entry_published) + if entry_published_dt and entry_published_dt.date() > until_dt: + continue + + logger.debug("Adding entry to cache: %s" % entry_url) + for silo in silos: + ctx.cache.addPost(silo.name, entry_url)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/default.cfg Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,7 @@ +[cache] +uri=sqlite://silorider.db + + +[filter] + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/format.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,48 @@ +import re +import textwrap + + +def format_entry(entry, limit=None, add_url='auto'): + url = entry.url + name = entry.best_name + + do_add_url = ((add_url is True) or + (add_url == 'auto' and not entry.is_micropost)) + if limit: + if do_add_url and url: + limit -= 1 + len(url) + + shortened = len(name) > limit + if shortened: + # If we have to shorten the text, but we haven't taken the + # URL into account yet, let's see if we have to include now! + # (this happens when we only want to include it when the text + # is shortened) + if not do_add_url and add_url == 'auto' and url: + do_add_url = True + limit -= 1 + len(url) + + if limit <= 0: + raise Exception("Can't shorten post name.") + + name = textwrap.shorten(name, width=limit, placeholder="...") + + if do_add_url and url: + name += ' ' + url + return name + + +re_sentence_end = re.compile(r'[\w\]\)\"\'\.]\.\s|[\?\!]\s') + + +def shorten_text(txt, limit): + if len(txt) <= limit: + return (txt, False) + + m = re_sentence_end.search(txt) + if m and m.end <= (limit + 1): + return (txt[:m.end - 1], True) + + shorter = textwrap.shorten( + txt, width=limit, placeholder="...") + return (shorter, True)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/main.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,194 @@ +import os.path +import logging +import argparse +import configparser +import coloredlogs + + +logger = logging.getLogger(__name__) + + +class ExecutionContext: + def __init__(self, args, config, cache, silos): + self.args = args + self.config = config + self.cache = cache + self.silos = silos + + +def _setup_auth(parser): + def _run(ctx): + from .commands.auth import auth_silo + auth_silo(ctx) + + parser.add_argument( + 'silo', + nargs='+', + help=("The name of the silo to authenticate. " + "Use 'all' to authenticate all declared silos.")) + parser.add_argument( + '-f', '--force', + action='store_true', + help="Force re-authenticate even silos with valid access.") + parser.add_argument( + '--console', + action='store_true', + help=("Use the current terminal to enter credentials. This is " + "useful if you're not in an environment where silorider can " + "launch a browser.")) + parser.set_defaults(func=_run) + + +def _setup_process(parser): + def _run(ctx): + from .commands.process import process_url + for url in ctx.args.site_url: + process_url(url, ctx) + + parser.add_argument( + 'site_url', + nargs='+', + help="URL of the website to read from.") + parser.add_argument( + '-s', '--silo', + nargs='*', + help="Only use the given silo(s).") + parser.add_argument( + '--no-cache', + action='store_true', + help="Ignore the cache, post all entries that qualify.") + parser.add_argument( + '--dry-run', + action='store_true', + help="Only report what would be posted, but don't post anything.") + parser.set_defaults(func=_run) + + +def _setup_populate(parser): + def _run(ctx): + from .commands.utils import populate_cache + for url in ctx.args.site_url: + populate_cache(url, ctx) + + parser.add_argument( + 'site_url', + nargs='+', + help="URL of the website to read from.") + parser.add_argument( + '-s', '--silo', + nargs='*', + help="Which silo to populate.") + parser.add_argument( + '--until', + help="The date until which to populate the cache (included).") + parser.set_defaults(func=_run) + + +commands = { + 'auth': { + 'help': "Authenticate with a silo service.", + 'setup': _setup_auth, + }, + 'process': { + 'help': "Post a website's latest articles to silo services.", + 'setup': _setup_process, + }, + 'populate': { + 'help': "Populates the cache with the latest entries from a feed.", + 'setup': _setup_populate, + } +} + + +has_debug_logging = False +pre_exec_hook = None +post_exec_hook = None + + +def _unsafe_main(args=None): + parser = argparse.ArgumentParser('SiloRider') + parser.add_argument( + '-v', '--verbose', + action='store_true', + help="Print debug messages.") + parser.add_argument( + '--no-color', + action='store_true', + help="Don't use pretty colors.") + parser.add_argument( + '-c', '--config', + help="Configuration file to load.") + + subparsers = parser.add_subparsers() + for cn, cd in commands.items(): + cp = subparsers.add_parser(cn, help=cd.get('help')) + cd['setup'](cp) + + args = parser.parse_args(args) + + global has_debug_logging + has_debug_logging = args.verbose + + if not args.no_color: + coloredlogs.install() + + loglvl = logging.DEBUG if args.verbose else logging.INFO + root_logger = logging.getLogger() + root_logger.setLevel(loglvl) + for handler in root_logger.handlers: + handler.setLevel(loglvl) + + if not getattr(args, 'func', None): + parser.print_help() + return + + logger.debug("Loading configuration.") + config = configparser.ConfigParser(interpolation=None) + config_paths = [os.path.join(os.path.dirname(__file__), 'default.cfg')] + if args.config: + config_paths.append(args.config) + config.read(config_paths) + + logger.debug("Initializing cache.") + from .cache.base import load_cache + cfg_dir = os.path.dirname(args.config) if args.config else None + cache = load_cache(config, cfg_dir) + + logger.debug("Initializing silo riders.") + from .silos.base import load_silos + silos = load_silos(config, cache) + if not silos: + logger.warning("No silos defined in the configuration file. " + "Nothing to do!") + return + + ctx = ExecutionContext(args, config, cache, silos) + + if pre_exec_hook: + pre_exec_hook(ctx) + + res = args.func(ctx) + + if post_exec_hook: + post_exec_hook(ctx, res) + + if isinstance(res, int): + return res + return 0 + + +def main(): + try: + res = _unsafe_main() + except Exception as ex: + if has_debug_logging: + raise + logger.error(ex) + res = 1 + + import sys + sys.exit(res) + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/parse.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,180 @@ +import os.path +import logging +import datetime + + +logger = logging.getLogger(__name__) + + +def parse_url(url_or_path): + mf_obj = parse_mf2(url_or_path) + matcher = EntryMatcher(mf_obj) + + feed = Feed(url_or_path, matcher.mf_dict) + + entries = [] + for pair in matcher.entries: + mf_entry, bs_el = pair + try: + entry = Entry(feed, mf_entry, bs_el) + entry.interpret() + except InvalidEntryException: + logger.debug("Found invalid entry... skipping.") + continue + + entries.append(entry) + + sorted_entries = sorted( + entries, + key=lambda e: e.get( + 'published', datetime.datetime.fromtimestamp( + 0, + tz=datetime.timezone(datetime.timedelta(0)))), + reverse=False) + + feed.entries = sorted_entries + return feed + + +def parse_mf2(url_or_path): + import mf2py + logger.debug("Fetching %s..." % url_or_path) + if os.path.exists(url_or_path): + obj = open(url_or_path, 'r', encoding='utf8') + params = {'doc': obj} + else: + params = {'url': url_or_path} + return mf2py.Parser(html_parser='html5lib', **params) + + +class InvalidEntryException(Exception): + pass + + +class Feed: + def __init__(self, url, mf_dict): + self.url = url + self._mf_dict = mf_dict + self.entries = [] + + +class Entry: + def __init__(self, owner_feed, mf_entry, bs_obj): + self._owner_feed = owner_feed + self._mf_entry = mf_entry + self._bs_obj = bs_obj + + self._type = None + self._props = None + + @property + def entry_type(self): + return self._type + + @property + def html_element(self): + return self._bs_obj + + @property + def best_name(self): + self.interpret() + + for pn in ['title', 'name', 'content-plain', 'content']: + pv = self._props.get(pn) + if pv: + return pv + return None + + def __getattr__(self, name): + try: + return self._doGet(name) + except KeyError: + raise AttributeError("Entry does not have property '%s'." % name) + + def get(self, name, default=None, *, force_list=False): + try: + return self._doGet(name, force_list=force_list) + except KeyError: + return default + + def _doGet(self, name, force_list=False): + self.interpret() + + values = self._props[name] + if not force_list and isinstance(values, list) and len(values) == 1: + return values[0] + return values + + def htmlFind(self, *args, **kwargs): + if self._bs_obj is None: + raise Exception("No HTML object is available for this entry.") + + return self._bs_obj.find(*args, **kwargs) + + def interpret(self): + if self._type is not None or self._props is not None: + return + + import mf2util + + self._type = mf2util.post_type_discovery(self._mf_entry) + self._props = mf2util.interpret_entry( + self._owner_feed._mf_dict, self._owner_feed.url, + hentry=self._mf_entry) + + # Adds a `is_micropost` property. + self._detect_micropost() + + # mf2util only detects the first photo for a "photo"-type post, + # but there might be several so we need to fix that. + # + # mf2util also apparently doesn't always bring "category" info. + self._fix_interpreted_props('photo', 'category') + + def _detect_micropost(self): + is_micro = False + name = self.get('name') + content = self.get('content-plain') + if content and not name: + is_micro = True + elif name and not content: + is_micro = True + elif name and content: + shortest = min(len(name), len(content)) + is_micro = (name[:shortest] == content[:shortest]) + self._props['is_micropost'] = is_micro + + def _fix_interpreted_props(self, *names): + for name in names: + values = self._mf_entry['properties'].get(name, []) + if isinstance(values, str): + values = [values] + self._props[name] = values + + +class EntryMatcher: + """ A class that matches `mf2util` results along with the original + BeautifulSoup document, so we have HTML objects on hand if needed. + """ + def __init__(self, mf_obj): + self.mf_dict = mf_obj.to_dict() + self.entries = [] + + els_by_type = {} + next_el = {} + bf = mf_obj.__doc__ + for e in self.mf_dict.get('items', []): + types = e.get('type') + if not types: + continue + + entry_type = types[0] + if entry_type not in els_by_type: + ebt = list(bf.find_all(class_=entry_type)) + els_by_type[entry_type] = ebt + next_el[entry_type] = 0 + + els = els_by_type[entry_type] + e_and_el = (e, els[next_el[entry_type]]) + self.entries.append(e_and_el) + next_el[entry_type] += 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/silos/base.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,104 @@ +import logging +from ..format import format_entry + + +logger = logging.getLogger(__name__) + + +class SiloCreationContext: + def __init__(self, config, cache, silo_name): + self.config = config + self.cache = cache + self.silo_name = silo_name + + +class SiloContextBase: + def __init__(self, exec_ctx): + self.exec_ctx = exec_ctx + + @property + def args(self): + return self.exec_ctx.args + + @property + def config(self): + return self.exec_ctx.config + + @property + def cache(self): + return self.exec_ctx.cache + + +class SiloAuthenticationContext(SiloContextBase): + pass + + +class SiloPostingContext(SiloContextBase): + pass + + +class Silo: + SILO_TYPE = 'unknown' + + def __init__(self, ctx): + self.ctx = ctx + self._silo_cfg = dict(ctx.config.items('silo:%s' % self.name)) + + @property + def name(self): + return self.ctx.silo_name + + def getConfigItem(self, name, fallback=None): + return self._silo_cfg.get(name, fallback) + + def getConfigItems(self): + return self._silo_cfg.copy() + + def getCacheItem(self, name, valtype=str): + full_name = '%s_%s' % (self.name, name) + return self.ctx.cache.getCustomValue(full_name, valtype=valtype) + + def setCacheItem(self, name, val): + full_name = '%s_%s' % (self.name, name) + return self.ctx.cache.setCustomValue(full_name, val) + + def formatEntry(self, entry, *args, **kwargs): + return format_entry(entry, *args, **kwargs) + + def authenticate(self, ctx): + raise NotImplementedError() + + def onPostStart(self): + pass + + def postEntry(self, entry, ctx): + raise NotImplementedError() + + def onPostEnd(self): + pass + + +def load_silos(config, cache): + from .print import PrintSilo + from .mastodon import MastodonSilo + silo_types = [PrintSilo, MastodonSilo] + silo_dict = dict([(s.SILO_TYPE, s) for s in silo_types]) + + silos = [] + sec_names = [sn for sn in config.sections() if sn.startswith('silo:')] + for sec_name in sec_names: + silo_name = sec_name[5:] + sec_items = dict(config.items(sec_name)) + silo_type = sec_items.get('type') + if not silo_type: + raise Exception("No silo type specified for: %s" % silo_name) + + silo_class = silo_dict.get(silo_type) + if not silo_class: + raise Exception("Unknown silo type: %s" % silo_type) + + logger.debug("Creating silo '%s' for '%s'." % (silo_type, silo_name)) + cctx = SiloCreationContext(config, cache, silo_name) + silo = silo_class(cctx) + silos.append(silo) + return silos
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/silos/mastodon.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,147 @@ +import urllib.request +import getpass +import logging +import mimetypes +import mastodon +from .base import Silo + + +logger = logging.getLogger(__name__) + + +class MastodonSilo(Silo): + SILO_TYPE = 'mastodon' + _CLIENT_CLASS = mastodon.Mastodon + + def __init__(self, ctx): + super().__init__(ctx) + + self.base_url = self.getConfigItem('url') + if not self.base_url: + raise Exception("No Mastodon instance URL defined for: %s", + self.ctx.silo_name) + + self.client = None + + def authenticate(self, ctx): + force = ctx.exec_ctx.args.force + + client_token = self.getCacheItem('clienttoken') + if not client_token or force: + logger.info("Authenticating client app with Mastodon for %s" % + self.ctx.silo_name) + pair = self._CLIENT_CLASS.create_app( + 'SiloRider', + scopes=['read', 'write'], + api_base_url=self.base_url + ) + client_token = '%s,%s' % pair + self.setCacheItem('clienttoken', client_token) + + client_id, client_secret = client_token.split(',') + + access_token = self.getCacheItem('accesstoken') + if not access_token or force: + m = self._CLIENT_CLASS( + client_id=client_id, + client_secret=client_secret, + api_base_url=self.base_url) + + if ctx.exec_ctx.args.console: + logger.info("Authenticating user with Mastodon for %s" % + self.ctx.silo_name) + logger.info("Only access tokens will be stored -- your " + "username and password will be forgotten in " + "a second.") + + username = input("Username: ") + if not username: + raise Exception("You must enter a username.") + + password = getpass.getpass(prompt="Password: ") + + try: + access_token = m.log_in( + username, password, + scopes=['read', 'write']) + except mastodon.MastodonIllegalArgumentError as err: + raise Exception("Incorrect credientials") from err + except mastodon.MastodonAPIError as err: + raise Exception("Autentication error") from err + + username = password = None + + else: + logger.info("Once you've authorized silorider to access" + "your Mastodon account, paste the authentication " + "code back here:") + + import webbrowser + req_url = m.auth_request_url(scopes=['write']) + webbrowser.open(req_url) + + access_token = input("Authentication code: ") + + self.setCacheItem('accesstoken', access_token) + + def onPostStart(self): + self._ensureApp() + + def _ensureApp(self): + if self.client is not None: + return + + logger.debug("Creating Mastodon app.") + client_token = self.getCacheItem('clienttoken') + if not client_token: + raise Exception("Mastodon silo '%s' isn't authenticated." % + self.name) + + client_id, client_secret = client_token.split(',') + + access_token = self.getCacheItem('accesstoken') + if not access_token: + raise Exception("Mastodon silo '%s' isn't authenticated." % + self.name) + + self.client = self._CLIENT_CLASS( + client_id=client_id, + client_secret=client_secret, + access_token=access_token, + api_base_url=self.base_url) + + def postEntry(self, entry, ctx): + toottxt = self.formatEntry(entry, limit=400) + if not toottxt: + raise Exception("Can't find any content to use for the toot!") + + visibility = self.getConfigItem('toot_visibility', fallback='public') + + media_ids = None + photo_urls = entry.get('photo', [], force_list=True) + if photo_urls: + media_ids = [] + for pu in photo_urls: + media_ids.append(self._mediaPostFromUrl(pu)) + + logger.debug("Posting toot: %s" % toottxt) + self.client.status_post(toottxt, media_ids=media_ids, + visibility=visibility) + + def _mediaPostFromUrl(self, url): + logger.debug("Downloading %s for upload to Mastodon..." % url) + mt, enc = mimetypes.guess_type(url) + if not mt: + mt = mimetypes.common_types['.jpg'] + + ext = mimetypes.guess_extension(mt) or '.jpg' + logger.debug("Got MIME type and extension: %s %s" % (mt, ext)) + + try: + tmpfile, headers = urllib.request.urlretrieve(url) + logger.debug("Using temporary file: %s" % tmpfile) + + return self.client.media_post(tmpfile, mt) + finally: + logger.debug("Cleaning up.") + urllib.request.urlcleanup()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/silos/print.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,35 @@ +import datetime +import textwrap +from .base import Silo + + +class PrintSilo(Silo): + SILO_TYPE = 'print' + + def authenticate(self, ctx): + pass + + def postEntry(self, entry, ctx): + import pprint + + tokens = {} + shorten = (self.getConfigItem('shorten', '').lower() in + ['true', 'yes', 'on', '1']) + names = self.getConfigItem('items', 'best_name,category,published') + names = names.split(',') + for n in names: + if n == 'type': + tokens['type'] = entry.entry_type + + elif n == 'best_name': + tokens['best_name'] = entry.best_name + + else: + v = entry.get(n) + if shorten: + v = textwrap.shorten(v, width=400, placeholder='...') + if isinstance(v, datetime.datetime): + v = v.strftime('%c') + tokens[n] = v + + pprint.pprint(tokens)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/version.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,4 @@ +# coding: utf-8 +# file generated by setuptools_scm +# don't change, don't track in version control +version = '0.1.dev0+d20180718'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/conftest.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,145 @@ +import io +import os +import os.path +import re +import logging +import tempfile +import pytest +import silorider.main + + +# def pytest_collect_file(parent, path): +# if path.ext == ".html" and path.basename.startswith("feeds"): +# return FeedFile(path, parent) + + +re_feed_test_sep = re.compile(r'^---$') + + +class FeedFile(pytest.File): + def collect(self): + with self.fspath.open() as fp: + markup = fp.read() + + name = self.fspath.basename + html_markup, yaml_markup = re_feed_test_sep.split(markup, 1) + yield FeedItem(name, self, html_markup, yaml_markup) + + +class FeedItem(pytest.Item): + def __init__(self, name, parent, in_spec, out_spec): + super().__init__(name, parent) + self.in_spec = in_spec + self.out_spec = out_spec + + def runtest(self): + pass + + +@pytest.fixture +def cli(): + return CliRunner() + + +class CliRunner: + def __init__(self): + self._cfgtxt = """ +[cache] +uri=memory://for_test +""" + self._pre_hooks = [] + self._cleanup = [] + + def getFeedPath(self, name): + return os.path.join(os.path.dirname(__file__), + 'feeds', + '%s.html' % name) + + def createTempFeed(self, contents): + tmpfd, tmpname = tempfile.mkstemp() + with os.fdopen(tmpfd, 'w', encoding='utf8') as tmpfp: + tmpfp.write(contents) + self._cleanup.append(tmpname) + return tmpname + + def setConfig(self, cfgtxt): + self._cfgtxt = cfgtxt + return self + + def appendConfig(self, cfgtxt): + self._cfgtxt += cfgtxt + return self + + def appendSiloConfig(self, silo_name, silo_type, **options): + cfgtxt = '[silo:%s]\n' % silo_name + cfgtxt += 'type=%s\n' % silo_type + if options is not None: + for n, v in options.items(): + cfgtxt += '%s=%s\n' % (n, v) + return self.appendConfig(cfgtxt) + + def preExecHook(self, hook): + self._pre_hooks.append(hook) + + def run(self, *args): + pre_args = [] + if self._cfgtxt: + tmpfd, tmpcfg = tempfile.mkstemp() + print("Creating temporary configuration file: %s" % tmpcfg) + with os.fdopen(tmpfd, 'w') as tmpfp: + tmpfp.write(self._cfgtxt) + self._cleanup.append(tmpcfg) + pre_args = ['-c', tmpcfg] + + captured = io.StringIO() + handler = logging.StreamHandler(captured) + handler.setLevel(logging.INFO) + silorider_logger = logging.getLogger('silorider') + silorider_logger.addHandler(handler) + + main_ctx = None + main_res = None + + def pre_exec_hook(ctx): + for h in self._pre_hooks: + h(ctx) + + def post_exec_hook(ctx, res): + nonlocal main_ctx, main_res + main_ctx = ctx + main_res = res + + silorider.main.pre_exec_hook = pre_exec_hook + silorider.main.post_exec_hook = post_exec_hook + + args = pre_args + list(args) + print("Running command: %s" % list(args)) + try: + silorider.main._unsafe_main(args) + finally: + silorider.main.pre_exec_hook = None + silorider.main.post_exec_hook = None + + silorider_logger.removeHandler(handler) + + print("Cleaning %d temporary files." % len(self._cleanup)) + for tmpname in self._cleanup: + os.remove(tmpname) + + return main_ctx, main_res + + +@pytest.fixture +def feedutil(): + return FeedUtil() + + +class FeedUtil: + def makeFeed(self, *entries): + feed = '<html><body>\n' + for e in entries: + feed += '<article class="h-entry">\n' + feed += e + feed += '</article>\n' + feed += '</body></html>' + return feed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_commands_populate.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,48 @@ + +feed1 = """ +<html><body> + <article class="h-entry"> + <h1 class="p-name">A new article</h1> + <div class="e-content"> + <p>This is the text of the article.</p> + <p>It has 2 paragraphs.</p> + </div> + <a class="u-url" href="https://example.org/a-new-article">permalink</a> + </article> +</body></html>""" + + +def test_populate(cli): + cli.appendSiloConfig('test', 'print', items='name') + feed = cli.createTempFeed(feed1) + ctx, _ = cli.run('populate', feed, '-s', 'test') + assert ctx.cache.wasPosted('test', 'https://example.org/a-new-article') + + +feed2 = """ +<html><body> + <article class="h-entry"> + <h1 class="p-name">First article</h1> + <div><time class="dt-published" datetime="2018-01-07T09:30:00-00:00"></time></div> + <a class="u-url" href="https://example.org/first-article">permalink</a> + </article> + <article class="h-entry"> + <h1 class="p-name">Second article</h1> + <div><time class="dt-published" datetime="2018-01-08T09:30:00-00:00"></time></div> + <a class="u-url" href="https://example.org/second-article">permalink</a> + </article> + <article class="h-entry"> + <h1 class="p-name">Third article</h1> + <div><time class="dt-published" datetime="2018-01-09T09:30:00-00:00"></time></div> + <a class="u-url" href="https://example.org/third-article">permalink</a> + </article> +</body></html>""" # NOQA + + +def test_populate_until(cli): + cli.appendSiloConfig('test', 'print', items='name') + feed = cli.createTempFeed(feed2) + ctx, _ = cli.run('populate', feed, '-s', 'test', '--until', '2018-01-08') + assert ctx.cache.wasPosted('test', 'https://example.org/first-article') + assert ctx.cache.wasPosted('test', 'https://example.org/second-article') + assert not ctx.cache.wasPosted('test', 'https://example.org/third-article')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_format.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,69 @@ +import pytest +from silorider.format import format_entry + + +test_url = 'https://example.org/article' + + +class TestEntry: + pass + + +def _make_test_entry(best_name, is_micropost): + entry = TestEntry() + entry.best_name = best_name + entry.is_micropost = is_micropost + entry.url = test_url + return entry + + +@pytest.mark.parametrize("title, limit, add_url, expected", [ + ('A test entry', None, False, 'A test entry'), + ('A test entry', None, 'auto', 'A test entry ' + test_url), + ('A test entry', None, True, 'A test entry ' + test_url), + + ('A test entry', 80, False, 'A test entry'), + ('A test entry', 80, 'auto', 'A test entry ' + test_url), + ('A test entry', 80, True, 'A test entry ' + test_url), + + ('A test entry that is very very long because its title has many many ' + 'words in it for no good reason', 80, False, + 'A test entry that is very very long because its title has many many ' + 'words in...'), + ('A test entry that is very very long because its title has many many ' + 'words in it for no good reason', 80, 'auto', + 'A test entry that is very very long because its... ' + test_url), + ('A test entry that is very very long because its title has many many ' + 'words in it for no good reason', 80, True, + 'A test entry that is very very long because its... ' + test_url) +]) +def test_format_lonform_entry(title, limit, add_url, expected): + entry = _make_test_entry(title, False) + actual = format_entry(entry, limit, add_url) + assert actual == expected + + +@pytest.mark.parametrize("text, limit, add_url, expected", [ + ('A test entry', None, False, 'A test entry'), + ('A test entry', None, 'auto', 'A test entry'), + ('A test entry', None, True, 'A test entry ' + test_url), + + ('A test entry', 80, False, 'A test entry'), + ('A test entry', 80, 'auto', 'A test entry'), + ('A test entry', 80, True, 'A test entry ' + test_url), + + ('A test entry that is very very long because its title has many many ' + 'words in it for no good reason', 80, False, + 'A test entry that is very very long because its title has many many ' + 'words in...'), + ('A test entry that is very very long because its title has many many ' + 'words in it for no good reason', 80, 'auto', + 'A test entry that is very very long because its... ' + test_url), + ('A test entry that is very very long because its title has many many ' + 'words in it for no good reason', 80, True, + 'A test entry that is very very long because its... ' + test_url) +]) +def test_format_micropost_entry(text, limit, add_url, expected): + entry = _make_test_entry(text, True) + actual = format_entry(entry, limit, add_url) + assert actual == expected
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_silos_mastodon.py Wed Jul 18 20:46:04 2018 -0700 @@ -0,0 +1,141 @@ +import pytest + + +def test_one_article(cli, feedutil, mastmock): + feed = cli.createTempFeed(feedutil.makeFeed( + """<h1 class="p-name">A new article</h1> +<div class="e-content"> +<p>This is the text of the article.</p> +<p>It has 2 paragraphs.</p> +</div> +<a class="u-url" href="https://example.org/a-new-article">permalink</a>""" + )) + + cli.appendSiloConfig('test', 'mastodon', url='/blah') + mastmock.installTokens(cli, 'test') + + ctx, _ = cli.run('process', feed) + assert ctx.cache.wasPosted('test', 'https://example.org/a-new-article') + toot = ctx.silos[0].client.toots[0] + assert toot == ('A new article https://example.org/a-new-article', + None, 'public') + + +def test_one_micropost(cli, feedutil, mastmock): + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="p-name">This is a quick update.</p> +<a class="u-url" href="/01234.html">permalink</a>""" + )) + + cli.appendSiloConfig('test', 'mastodon', url='/blah') + mastmock.installTokens(cli, 'test') + + ctx, _ = cli.run('process', feed) + assert ctx.cache.wasPosted('test', '/01234.html') + toot = ctx.silos[0].client.toots[0] + assert toot == ("This is a quick update.", None, 'public') + + +def test_one_micropost_with_one_photo(cli, feedutil, mastmock, monkeypatch): + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="p-name">This is a quick photo update.</p> +<div> + <a class="u-photo" href="/fullimg.jpg"><img src="/thumbimg.jpg"/></a> +</div> +<a class="u-url" href="/01234.html">permalink</a>""" + )) + + cli.appendSiloConfig('test', 'mastodon', url='/blah') + mastmock.installTokens(cli, 'test') + + with monkeypatch.context() as m: + import urllib.request + m.setattr(urllib.request, 'urlretrieve', _patched_urlretrieve) + m.setattr(urllib.request, 'urlcleanup', _patched_urlcleanup) + ctx, _ = cli.run('process', feed) + assert ctx.cache.wasPosted('test', '/01234.html') + media = ctx.silos[0].client.media[0] + assert media == ('/retrived/fullimg.jpg', 'image/jpeg', 1) + toot = ctx.silos[0].client.toots[0] + assert toot == ("This is a quick photo update.", [1], 'public') + + +def test_one_micropost_with_two_photos(cli, feedutil, mastmock, monkeypatch): + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="p-name">This is a photo update with 2 photos.</p> +<div> + <a class="u-photo" href="/fullimg1.jpg"><img src="/thumbimg1.jpg"/></a> + <a class="u-photo" href="/fullimg2.jpg"><img src="/thumbimg2.jpg"/></a> +</div> +<a class="u-url" href="/01234.html">permalink</a>""" + )) + + cli.appendSiloConfig('test', 'mastodon', url='/blah') + mastmock.installTokens(cli, 'test') + + with monkeypatch.context() as m: + import urllib.request + m.setattr(urllib.request, 'urlretrieve', _patched_urlretrieve) + m.setattr(urllib.request, 'urlcleanup', _patched_urlcleanup) + ctx, _ = cli.run('process', feed) + assert ctx.cache.wasPosted('test', '/01234.html') + media = ctx.silos[0].client.media[0] + assert media == ('/retrived/fullimg1.jpg', 'image/jpeg', 1) + media = ctx.silos[0].client.media[1] + assert media == ('/retrived/fullimg2.jpg', 'image/jpeg', 2) + toot = ctx.silos[0].client.toots[0] + assert toot == ("This is a photo update with 2 photos.", [1, 2], 'public') + + +def _patched_urlretrieve(url): + return ('/retrived/' + url.lstrip('/'), None) + + +def _patched_urlcleanup(): + pass + + +@pytest.fixture(scope='session') +def mastmock(): + from silorider.silos.mastodon import MastodonSilo + MastodonSilo._CLIENT_CLASS = MastodonMock + return MastodonMockUtil() + + +class MastodonMock: + @staticmethod + def create_app(app_name, scopes, api_base_url): + return ('TEST_CLIENT_ID', 'TEST_CLIENT_SECRET') + + def __init__(self, client_id, client_secret, access_token, api_base_url): + self.toots = [] + self.media = [] + self.next_mid = 1 + + def log_in(self, username, password, scopes): + return 'TEST_ACCESS_TOKEN' + + def auth_request_url(self, scopes): + return 'https://example.org/auth' + + def status_post(self, toot, media_ids=None, visibility=None): + self.toots.append((toot, media_ids, visibility)) + + def media_post(self, filename, mimetype): + mid = self.next_mid + self.next_mid += 1 + self.media.append((filename, mimetype, mid)) + return mid + + +class MastodonMockUtil: + def installTokens(self, cli, silo_name): + def do_install_tokens(ctx): + ctx.cache.setCustomValue( + '%s_clienttoken' % silo_name, + 'TEST_CLIENT_ID,TEST_CLIENT_SECRET') + ctx.cache.setCustomValue( + '%s_accesstoken' % silo_name, + 'TEST_ACCESS_TOKEN') + + cli.preExecHook(do_install_tokens)