plutopluto

git clone https://git.ce9e.org/plutopluto.git

commit
36bf62a5173651729a23b9a7359aed8d64dd818d
parent
ba6a83b5e41f3144642580a6e8fd0409c4ee1e78
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2023-07-02 07:40
rm clean_html

feedparser already does sanitization

Diffstat

M plutopluto/__init__.py 56 +++++---------------------------------------------------

1 files changed, 5 insertions, 51 deletions


diff --git a/plutopluto/__init__.py b/plutopluto/__init__.py

@@ -19,53 +19,6 @@ __version__ = '1.2.0'
   19    19 app = Flask(__name__)
   20    20 
   21    21 
   22    -1 def clean_html(s):
   23    -1     """Strip possibly dangerous HTML."""
   24    -1 
   25    -1     allowed_tags = [
   26    -1         'p',
   27    -1         'a',
   28    -1         'ul',
   29    -1         'ol',
   30    -1         'li',
   31    -1         'blockquote',
   32    -1         'em',
   33    -1         'strong',
   34    -1         'img',
   35    -1         'video',
   36    -1         'h1',
   37    -1         'h2',
   38    -1         'h3',
   39    -1         'h4',
   40    -1         'h5',
   41    -1         'h6',
   42    -1         'pre',
   43    -1         'code',
   44    -1         'hr',
   45    -1         'table',
   46    -1         'tr',
   47    -1         'td',
   48    -1         'th',
   49    -1         'details',
   50    -1         'summary',
   51    -1     ]
   52    -1     allowed_attrs = ['href', 'src', 'alt', 'title']
   53    -1 
   54    -1     tree = BeautifulSoup(s)
   55    -1 
   56    -1     for tag in tree.find_all():
   57    -1         if tag.name not in allowed_tags:
   58    -1             if tag.name in ['script']:
   59    -1                 tag.extract()
   60    -1             else:
   61    -1                 tag.hidden = True
   62    -1         else:
   63    -1             for attr in set(tag.attrs) - set(allowed_attrs):
   64    -1                 del tag.attrs[attr]
   65    -1 
   66    -1     return str(tree)
   67    -1 
   68    -1 
   69    22 @functools.lru_cache
   70    23 def parse(url):
   71    24     """Get feed and convert to JSON."""
@@ -86,14 +39,15 @@ def parse(url):
   86    39         d['source'] = feed.feed.get('title')
   87    40         if 'youtube' in url:
   88    41             template = u'<img alt="%s" src="%s" />\n<div>%s</div>'
   89    -1             d['content'] = clean_html(template % (
   -1    42             d['content'] = template % (
   90    43                 item['media_content'][0]['url'],
   91    44                 item['media_thumbnail'][0]['url'],
   92    -1                 item['media_description']))
   -1    45                 item['media_description'],
   -1    46             )
   93    47         elif 'content' in item:
   94    -1             d['content'] = clean_html(item['content'][0]['value'])
   -1    48             d['content'] = item['content'][0]['value']
   95    49         else:
   96    -1             d['content'] = clean_html(item.get('description'))
   -1    50             d['content'] = item.get('description')
   97    51         return d
   98    52 
   99    53     return {