plutopluto

git clone https://git.ce9e.org/plutopluto.git

commit
6867c649998a0e92b2a98539d3b65d8e2e8ef2f5
parent
d4d2e0e0afd49a75fcc89e35d0ba1bae8ca121af
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2023-07-02 09:50
basic support for activity pub

Diffstat

M plutopluto/__init__.py 79 +++++++++++++++++++++++++++++++++++++++++++++++++++----------

1 files changed, 67 insertions, 12 deletions


diff --git a/plutopluto/__init__.py b/plutopluto/__init__.py

@@ -1,14 +1,17 @@
    1     1 #!/usr/bin/env python
    2     2 
    3     3 import argparse
   -1     4 import datetime
    4     5 import functools
    5     6 import os
    6     7 import sys
    7     8 from time import mktime
    8     9 from time import time
   -1    10 from xml.sax.saxutils import escape
    9    11 
   10    12 import feedparser
   11    -1 from bs4 import BeautifulSoup
   -1    13 import requests
   -1    14 from feedparser.sanitizer import _sanitize_html
   12    15 from flask import Flask
   13    16 from flask import request
   14    17 from flask import jsonify
@@ -29,7 +32,7 @@ def linebreaks(text):
   29    32 
   30    33 
   31    34 @functools.lru_cache
   32    -1 def parse(url):
   -1    35 def parse_feed(url):
   33    36     """Get feed and convert to JSON."""
   34    37 
   35    38     feed = feedparser.parse(url)
@@ -64,20 +67,72 @@ def parse(url):
   64    67     }
   65    68 
   66    69 
   -1    70 @functools.lru_cache
   -1    71 def parse_activity_stream(url):
   -1    72     r = requests.get(url, headers={'Accept': 'application/activity+json'})
   -1    73     r.raise_for_status()
   -1    74     data = r.json()
   -1    75     entries = []
   -1    76 
   -1    77     def _parse_item(obj):
   -1    78         source = obj.get('audience', obj['attributedTo'])
   -1    79 
   -1    80         content = _sanitize_html(obj.get('content', ''), 'utf-8', 'text/html')
   -1    81         for attachment in obj.get('attachment', []):
   -1    82             href = escape(attachment.get('href', attachment.get('url', '')))
   -1    83             if href:
   -1    84                 ext = href.rsplit('.', 1)[-1]
   -1    85                 if ext in ['jpg', 'jpeg', 'png', 'gif', 'webp']:
   -1    86                     content += f'<img src="{href}" alt="">'
   -1    87                 else:
   -1    88                     content += f'<p><a href="{href}">{href}</a></p>'
   -1    89             else:
   -1    90                 print(attachment)
   -1    91 
   -1    92         return {
   -1    93             'id': obj['id'],
   -1    94             'title': obj.get('name', ''),
   -1    95             'link': obj.get('url', obj['id']),
   -1    96             'source': source.split('/')[-1],
   -1    97             'source_link': source,
   -1    98             'content': content,
   -1    99             'dt': datetime.datetime.fromisoformat(obj['published']).timestamp(),
   -1   100             # attachments
   -1   101         }
   -1   102 
   -1   103     def _process_activity(activity):
   -1   104         if activity['type'] == 'Create':
   -1   105             entries.append(_parse_item(activity['object']))
   -1   106         elif activity['type'] == 'Announce':
   -1   107             _process_activity(activity['object'])
   -1   108 
   -1   109     for activity in data['orderedItems']:
   -1   110         _process_activity(activity)
   -1   111 
   -1   112     return {
   -1   113         'url': url,
   -1   114         'next': data.get('next'),
   -1   115         'entries': entries,
   -1   116     }
   -1   117 
   -1   118 
   67   119 @app.route('/parse', methods=['GET'])
   68   120 def _parse():
   69    -1     if 'url' in request.values:
   70    -1         url = request.values['url']
   -1   121     if 'url' not in request.values:
   -1   122         abort(400)
   71   123 
   72    -1         try:
   73    -1             data = parse(url)
   74    -1         except Exception as err:
   75    -1             app.logger.warning('%s: %s' % (url, err))
   76    -1             abort(500)
   -1   124     url = request.values['url']
   77   125 
   78    -1         return jsonify(data)
   79    -1     else:
   80    -1         abort(400)
   -1   126     try:
   -1   127         if 'outbox' in url:
   -1   128             data = parse_activity_stream(url)
   -1   129         else:
   -1   130             data = parse_feed(url)
   -1   131     except Exception as err:
   -1   132         app.logger.warning('%s: %s' % (url, err))
   -1   133         abort(500)
   -1   134 
   -1   135     return jsonify(data)
   81   136 
   82   137 
   83   138 @app.route('/', methods=['GET'])