- commit
- 6867c649998a0e92b2a98539d3b65d8e2e8ef2f5
- parent
- d4d2e0e0afd49a75fcc89e35d0ba1bae8ca121af
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2023-07-02 09:50
basic support for activity pub
Diffstat
| M | plutopluto/__init__.py | 79 | +++++++++++++++++++++++++++++++++++++++++++++++++++---------- |
1 files changed, 67 insertions, 12 deletions
diff --git a/plutopluto/__init__.py b/plutopluto/__init__.py
@@ -1,14 +1,17 @@ 1 1 #!/usr/bin/env python 2 2 3 3 import argparse -1 4 import datetime 4 5 import functools 5 6 import os 6 7 import sys 7 8 from time import mktime 8 9 from time import time -1 10 from xml.sax.saxutils import escape 9 11 10 12 import feedparser11 -1 from bs4 import BeautifulSoup-1 13 import requests -1 14 from feedparser.sanitizer import _sanitize_html 12 15 from flask import Flask 13 16 from flask import request 14 17 from flask import jsonify @@ -29,7 +32,7 @@ def linebreaks(text): 29 32 30 33 31 34 @functools.lru_cache32 -1 def parse(url):-1 35 def parse_feed(url): 33 36 """Get feed and convert to JSON.""" 34 37 35 38 feed = feedparser.parse(url) @@ -64,20 +67,72 @@ def parse(url): 64 67 } 65 68 66 69 -1 70 @functools.lru_cache -1 71 def parse_activity_stream(url): -1 72 r = requests.get(url, headers={'Accept': 'application/activity+json'}) -1 73 r.raise_for_status() -1 74 data = r.json() -1 75 entries = [] -1 76 -1 77 def _parse_item(obj): -1 78 source = obj.get('audience', obj['attributedTo']) -1 79 -1 80 content = _sanitize_html(obj.get('content', ''), 'utf-8', 'text/html') -1 81 for attachment in obj.get('attachment', []): -1 82 href = escape(attachment.get('href', attachment.get('url', ''))) -1 83 if href: -1 84 ext = href.rsplit('.', 1)[-1] -1 85 if ext in ['jpg', 'jpeg', 'png', 'gif', 'webp']: -1 86 content += f'<img src="{href}" alt="">' -1 87 else: -1 88 content += f'<p><a href="{href}">{href}</a></p>' -1 89 else: -1 90 print(attachment) -1 91 -1 92 return { -1 93 'id': obj['id'], -1 94 'title': obj.get('name', ''), -1 95 'link': obj.get('url', obj['id']), -1 96 'source': source.split('/')[-1], -1 97 'source_link': source, -1 98 'content': content, -1 99 'dt': datetime.datetime.fromisoformat(obj['published']).timestamp(), -1 100 # attachments -1 101 } -1 102 -1 103 def _process_activity(activity): -1 104 if activity['type'] == 'Create': -1 105 entries.append(_parse_item(activity['object'])) -1 106 elif activity['type'] == 'Announce': -1 107 _process_activity(activity['object']) -1 108 -1 109 for activity in data['orderedItems']: -1 110 _process_activity(activity) -1 111 -1 112 return { -1 113 'url': url, -1 114 'next': data.get('next'), -1 115 'entries': entries, -1 116 } -1 117 -1 118 67 119 @app.route('/parse', methods=['GET']) 68 120 def _parse():69 -1 if 'url' in request.values:70 -1 url = request.values['url']-1 121 if 'url' not in request.values: -1 122 abort(400) 71 12372 -1 try:73 -1 data = parse(url)74 -1 except Exception as err:75 -1 app.logger.warning('%s: %s' % (url, err))76 -1 abort(500)-1 124 url = request.values['url'] 77 12578 -1 return jsonify(data)79 -1 else:80 -1 abort(400)-1 126 try: -1 127 if 'outbox' in url: -1 128 data = parse_activity_stream(url) -1 129 else: -1 130 data = parse_feed(url) -1 131 except Exception as err: -1 132 app.logger.warning('%s: %s' % (url, err)) -1 133 abort(500) -1 134 -1 135 return jsonify(data) 81 136 82 137 83 138 @app.route('/', methods=['GET'])