plutopluto

git clone https://git.ce9e.org/plutopluto.git

commit
826efdda4ce234295bb778a0d454b003133bdf86
parent
758bdecb7484554a7155263390c3226c28df99dd
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2023-07-01 17:41
intend with spaces in py files

Diffstat

M plutopluto/__init__.py 182 ++++++++++++++++++++++++++++++------------------------------
M setup.py 60 ++++++++++++++++++++++++++++++------------------------------

2 files changed, 121 insertions, 121 deletions


diff --git a/plutopluto/__init__.py b/plutopluto/__init__.py

@@ -19,122 +19,122 @@ cache = SimpleCache()
   19    19 
   20    20 
   21    21 def strip_atts(s):
   22    -1 	"""Strip possibly dangerous HTML attributes."""
   -1    22     """Strip possibly dangerous HTML attributes."""
   23    23 
   24    -1 	whitelist = ['href', 'src', 'alt', 'title', 'datetime']
   25    -1 	tree = BeautifulSoup(s)
   -1    24     whitelist = ['href', 'src', 'alt', 'title', 'datetime']
   -1    25     tree = BeautifulSoup(s)
   26    26 
   27    -1 	for tag in tree.find_all():
   28    -1 		l = []
   29    -1 		for attr in tag.attrs:
   30    -1 			if attr not in whitelist:
   31    -1 				l.append(attr)
   32    -1 		for attr in l:
   33    -1 			del tag.attrs[attr]
   34    -1 	return str(tree)
   -1    27     for tag in tree.find_all():
   -1    28         l = []
   -1    29         for attr in tag.attrs:
   -1    30             if attr not in whitelist:
   -1    31                 l.append(attr)
   -1    32         for attr in l:
   -1    33             del tag.attrs[attr]
   -1    34     return str(tree)
   35    35 
   36    36 
   37    37 def parse(url):
   38    -1 	"""Get feed and convert to JSON."""
   39    -1 
   40    -1 	feed = feedparser.parse(url)
   41    -1 
   42    -1 	def _parse_item(i, item):
   43    -1 		d = dict()
   44    -1 		if 'published_parsed' in item:
   45    -1 			d['dt'] = mktime(item['published_parsed'])
   46    -1 		elif 'updated_parsed' in item:
   47    -1 			d['dt'] = mktime(item['updated_parsed'])
   48    -1 		else:
   49    -1 			d['dt'] = int(time()) - i  # - i to preserve sort order
   50    -1 		d['id'] = item.get('id')
   51    -1 		d['title'] = item.get('title')
   52    -1 		d['link'] = item.get('link')
   53    -1 		d['source'] = feed.feed.get('title')
   54    -1 		if 'youtube' in url:
   55    -1 			template = u'<img alt="%s" src="%s" />\n<div>%s</div>'
   56    -1 			d['content'] = strip_atts(template % (
   57    -1 				item['media_content'][0]['url'],
   58    -1 				item['media_thumbnail'][0]['url'],
   59    -1 				item['media_description']))
   60    -1 		elif 'content' in item:
   61    -1 			d['content'] = strip_atts(item['content'][0]['value'])
   62    -1 		else:
   63    -1 			d['content'] = strip_atts(item.get('description'))
   64    -1 		return d
   65    -1 
   66    -1 	return {
   67    -1 		'url': url,
   68    -1 		'entries': [_parse_item(i, item) for i, item in enumerate(feed.entries)],
   69    -1 	}
   -1    38     """Get feed and convert to JSON."""
   -1    39 
   -1    40     feed = feedparser.parse(url)
   -1    41 
   -1    42     def _parse_item(i, item):
   -1    43         d = dict()
   -1    44         if 'published_parsed' in item:
   -1    45             d['dt'] = mktime(item['published_parsed'])
   -1    46         elif 'updated_parsed' in item:
   -1    47             d['dt'] = mktime(item['updated_parsed'])
   -1    48         else:
   -1    49             d['dt'] = int(time()) - i  # - i to preserve sort order
   -1    50         d['id'] = item.get('id')
   -1    51         d['title'] = item.get('title')
   -1    52         d['link'] = item.get('link')
   -1    53         d['source'] = feed.feed.get('title')
   -1    54         if 'youtube' in url:
   -1    55             template = u'<img alt="%s" src="%s" />\n<div>%s</div>'
   -1    56             d['content'] = strip_atts(template % (
   -1    57                 item['media_content'][0]['url'],
   -1    58                 item['media_thumbnail'][0]['url'],
   -1    59                 item['media_description']))
   -1    60         elif 'content' in item:
   -1    61             d['content'] = strip_atts(item['content'][0]['value'])
   -1    62         else:
   -1    63             d['content'] = strip_atts(item.get('description'))
   -1    64         return d
   -1    65 
   -1    66     return {
   -1    67         'url': url,
   -1    68         'entries': [_parse_item(i, item) for i, item in enumerate(feed.entries)],
   -1    69     }
   70    70 
   71    71 
   72    72 def cachedParse(url, timeout=5 * 60):
   73    -1 	data = cache.get(url)
   74    -1 	if data is None:
   75    -1 		data = parse(url)
   76    -1 		cache.set(url, data, timeout=timeout)
   77    -1 	return data
   -1    73     data = cache.get(url)
   -1    74     if data is None:
   -1    75         data = parse(url)
   -1    76         cache.set(url, data, timeout=timeout)
   -1    77     return data
   78    78 
   79    79 
   80    80 @app.route('/parse', methods=['GET'])
   81    81 def _parse():
   82    -1 	if 'url' in request.values:
   83    -1 		url = request.values['url']
   -1    82     if 'url' in request.values:
   -1    83         url = request.values['url']
   84    84 
   85    -1 		try:
   86    -1 			data = cachedParse(url)
   87    -1 		except Exception as err:
   88    -1 			app.logger.warning('%s: %s' % (url, err))
   89    -1 			abort(500)
   -1    85         try:
   -1    86             data = cachedParse(url)
   -1    87         except Exception as err:
   -1    88             app.logger.warning('%s: %s' % (url, err))
   -1    89             abort(500)
   90    90 
   91    -1 		return jsonify(data)
   92    -1 	else:
   93    -1 		abort(400)
   -1    91         return jsonify(data)
   -1    92     else:
   -1    93         abort(400)
   94    94 
   95    95 
   96    96 @app.route('/', methods=['GET'])
   97    97 def index():
   98    -1 	with open(os.path.join(app.root_path, 'index.html')) as fh:
   99    -1 		return fh.read()
   -1    98     with open(os.path.join(app.root_path, 'index.html')) as fh:
   -1    99         return fh.read()
  100   100 
  101   101 
  102   102 @app.route('/config', methods=['GET'])
  103   103 def config():
  104    -1 	return jsonify({
  105    -1 		'urls': app.config['URLS']
  106    -1 	})
   -1   104     return jsonify({
   -1   105         'urls': app.config['URLS']
   -1   106     })
  107   107 
  108   108 
  109   109 def main():
  110    -1 	parser = argparse.ArgumentParser(description='simple feed aggregator')
  111    -1 	parser.add_argument('--version', '-V', action='version', version=__version__)
  112    -1 	parser.add_argument('-d', '--debug', action='store_true')
  113    -1 	parser.add_argument('-c', '--config', metavar='FILE')
  114    -1 	parser.add_argument('urls', metavar='URL', nargs='*',
  115    -1 		help='full feed url, optionally with a {page} placeholder')
  116    -1 	args = parser.parse_args()
  117    -1 
  118    -1 	config_name = '.plutopluto.cfg'
  119    -1 	local_config = os.path.abspath(config_name)
  120    -1 	home_config = os.path.expanduser('~/' + config_name)
  121    -1 
  122    -1 	if args.config:
  123    -1 		app.config.from_pyfile(os.path.abspath(args.config))
  124    -1 	elif os.path.exists(local_config):
  125    -1 		app.config.from_pyfile(local_config)
  126    -1 	elif os.path.exists(home_config):
  127    -1 		app.config.from_pyfile(home_config)
  128    -1 	app.debug = args.debug
  129    -1 	app.config['URLS'] = args.urls + app.config.get('URLS', [])
  130    -1 
  131    -1 	if not app.config['URLS']:
  132    -1 		print("Error: No urls provided")
  133    -1 		parser.print_usage()
  134    -1 		sys.exit(1)
  135    -1 
  136    -1 	app.run(app.config.get('HOST'), app.config.get('PORT'))
   -1   110     parser = argparse.ArgumentParser(description='simple feed aggregator')
   -1   111     parser.add_argument('--version', '-V', action='version', version=__version__)
   -1   112     parser.add_argument('-d', '--debug', action='store_true')
   -1   113     parser.add_argument('-c', '--config', metavar='FILE')
   -1   114     parser.add_argument('urls', metavar='URL', nargs='*',
   -1   115         help='full feed url, optionally with a {page} placeholder')
   -1   116     args = parser.parse_args()
   -1   117 
   -1   118     config_name = '.plutopluto.cfg'
   -1   119     local_config = os.path.abspath(config_name)
   -1   120     home_config = os.path.expanduser('~/' + config_name)
   -1   121 
   -1   122     if args.config:
   -1   123         app.config.from_pyfile(os.path.abspath(args.config))
   -1   124     elif os.path.exists(local_config):
   -1   125         app.config.from_pyfile(local_config)
   -1   126     elif os.path.exists(home_config):
   -1   127         app.config.from_pyfile(home_config)
   -1   128     app.debug = args.debug
   -1   129     app.config['URLS'] = args.urls + app.config.get('URLS', [])
   -1   130 
   -1   131     if not app.config['URLS']:
   -1   132         print("Error: No urls provided")
   -1   133         parser.print_usage()
   -1   134         sys.exit(1)
   -1   135 
   -1   136     app.run(app.config.get('HOST'), app.config.get('PORT'))
  137   137 
  138   138 
  139   139 if __name__ == '__main__':
  140    -1 	main()
   -1   140     main()

diff --git a/setup.py b/setup.py

@@ -13,33 +13,33 @@ VERSION = re.search("__version__ = '([^']+)'", INIT).group(1)
   13    13 
   14    14 
   15    15 setup(
   16    -1 	name='plutopluto',
   17    -1 	version=VERSION,
   18    -1 	description="simple feed aggregator",
   19    -1 	long_description=README,
   20    -1 	url='https://github.com/xi/plutopluto',
   21    -1 	author='Tobias Bengfort',
   22    -1 	author_email='tobias.bengfort@posteo.de',
   23    -1 	packages=['plutopluto'],
   24    -1 	include_package_data=True,
   25    -1 	install_requires=[
   26    -1 		'argparse',
   27    -1 		'flask',
   28    -1 		'werkzeug',
   29    -1 		'feedparser',
   30    -1 		'beautifulsoup4',
   31    -1 	],
   32    -1 	entry_points={'console_scripts': [
   33    -1 		'plutopluto=plutopluto:main',
   34    -1 	]},
   35    -1 	license='GPLv2+',
   36    -1 	classifiers=[
   37    -1 		'Environment :: Web Environment',
   38    -1 		'Intended Audience :: End Users/Desktop',
   39    -1 		'Operating System :: OS Independent',
   40    -1 		'Programming Language :: Python',
   41    -1 		'Programming Language :: JavaScript',
   42    -1 		'License :: OSI Approved :: GNU General Public License v2 or later '
   43    -1 			'(GPLv2+)',
   44    -1 		'Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary',
   45    -1 	])
   -1    16     name='plutopluto',
   -1    17     version=VERSION,
   -1    18     description="simple feed aggregator",
   -1    19     long_description=README,
   -1    20     url='https://github.com/xi/plutopluto',
   -1    21     author='Tobias Bengfort',
   -1    22     author_email='tobias.bengfort@posteo.de',
   -1    23     packages=['plutopluto'],
   -1    24     include_package_data=True,
   -1    25     install_requires=[
   -1    26         'argparse',
   -1    27         'flask',
   -1    28         'werkzeug',
   -1    29         'feedparser',
   -1    30         'beautifulsoup4',
   -1    31     ],
   -1    32     entry_points={'console_scripts': [
   -1    33         'plutopluto=plutopluto:main',
   -1    34     ]},
   -1    35     license='GPLv2+',
   -1    36     classifiers=[
   -1    37         'Environment :: Web Environment',
   -1    38         'Intended Audience :: End Users/Desktop',
   -1    39         'Operating System :: OS Independent',
   -1    40         'Programming Language :: Python',
   -1    41         'Programming Language :: JavaScript',
   -1    42         'License :: OSI Approved :: GNU General Public License v2 or later '
   -1    43             '(GPLv2+)',
   -1    44         'Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary',
   -1    45     ])