PyJSONProxy

simple proxy and scraper
git clone https://git.ce9e.org/PyJSONProxy.git

commit
e3131e9760bdd74b8d038a096c3b65bccd8412d5
parent
050daa3039c9970652be8e18c2ad7c04af0bb7b0
Author
Tobias Bengfort <tobias.bengfort@gmx.net>
Date
2015-12-06 22:31
Merge branch 'feature-asyncio'

Diffstat

M jsonproxy/__init__.py 101 +++++++++++++++++++++++++++++++++----------------------------
A jsonproxy/web.py 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M setup.py 6 +++---

3 files changed, 152 insertions, 49 deletions


diff --git a/jsonproxy/__init__.py b/jsonproxy/__init__.py

@@ -1,23 +1,14 @@
    1    -1 from __future__ import absolute_import
    2    -1 
   -1     1 import asyncio
    3     2 import os
    4     3 import sys
    5     4 
    6    -1 try:
    7    -1 	from urllib.request import urlopen as _urlopen
    8    -1 	from urllib.error import HTTPError
    9    -1 except ImportError:
   10    -1 	from urllib2 import urlopen as _urlopen
   11    -1 	from urllib2 import HTTPError
   12    -1 
   13    -1 from flask import abort
   14    -1 from flask import current_app
   15    -1 from flask import Flask
   16    -1 from flask import jsonify
   17    -1 from flask import make_response
   18    -1 from flask import render_template
   19    -1 from flask import request
   20    -1 import cachetools
   -1     5 import aiohttp
   -1     6 
   -1     7 from .web import Application
   -1     8 from .web import jsonify
   -1     9 from .web import render_template
   -1    10 from .web import make_response
   -1    11 from .web import abort
   21    12 
   22    13 from .lib import check_config
   23    14 from .lib import _doc
@@ -25,66 +16,84 @@ from .lib import ENDPOINTS
   25    16 from .lib import parse_args
   26    17 from .lib import scrape
   27    18 
   28    -1 app = Flask(__name__)
   -1    19 app = Application(__name__)
   29    20 
   30    21 
   31    22 def get_config(endpoint):
   32    23 	try:
   33    -1 		return current_app.config[ENDPOINTS][endpoint]
   -1    24 		return app.config[ENDPOINTS][endpoint]
   34    25 	except KeyError:
   35    26 		abort(404)
   36    27 
   37    28 
   38    -1 @cachetools.ttl_cache()
   39    -1 def urlopen(url):
   40    -1 	try:
   41    -1 		current_app.logger.info('fetching %s' % url)
   42    -1 		original = _urlopen(url)
   -1    29 def async_cache(maxsize=128):
   -1    30 	cache = {}
   -1    31 
   -1    32 	def decorator(fn):
   -1    33 		def wrapper(*args):
   -1    34 			key = ':'.join(args)
   -1    35 			if key not in cache:
   -1    36 				if len(cache) >= maxsize:
   -1    37 					del cache[cache.keys().next()]
   -1    38 				cache[key] = yield from fn(*args)
   -1    39 			return cache[key]
   -1    40 		return wrapper
   -1    41 	return decorator
   43    42 
   44    -1 		body = original.read()
   45    -1 		code = original.getcode()
   46    -1 		headers = original.headers.items()
   47    43 
   48    -1 		return body, code, headers
   49    -1 	except HTTPError as error:
   50    -1 		abort(error.code)
   -1    44 @async_cache()
   -1    45 def _request(method, url):
   -1    46 	app.logger.info('{}:{}'.format(method, url))
   -1    47 	response = yield from aiohttp.request(method, url)
   -1    48 	if response.status != 200:
   -1    49 		abort(response.status)
   -1    50 	else:
   -1    51 		return response
   -1    52 
   51    53 
   -1    54 @app.route('/{endpoint}/{path:.+}', methods=['GET', 'HEAD', 'OPTIONS'])
   -1    55 @asyncio.coroutine
   -1    56 def handle(request):
   -1    57 	endpoint = request.match_info['endpoint']
   52    58 
   53    -1 @app.route('/<endpoint>/<path:path>', methods=['GET'])
   54    -1 def handle(endpoint, path):
   55    59 	config = get_config(endpoint)
   56    -1 	url = request.url.replace(request.host_url + endpoint + '/', config['host'])
   -1    60 	url = config['host'] + request.match_info['path']
   -1    61 	if request.query_string:
   -1    62 		url += '?' + request.query_string
   57    63 
   58    -1 	body, code, headers = urlopen(url)
   -1    64 	remote = yield from _request(request.method, url)
   -1    65 	body = yield from remote.read()
   59    66 
   60    -1 	if 'fields' in config:
   61    -1 		response = jsonify(scrape(url, body, config))
   -1    67 	if 'fields' in config and request.method == 'GET':
   -1    68 		response = jsonify(scrape(url, body, config), status=remote.status)
   62    69 	else:
   63    -1 		response = make_response(body, code)
   -1    70 		response = make_response(body, status=remote.status)
   64    71 
   65    -1 	if current_app.config.get('ALLOW_CORS', False):
   -1    72 	if app.config.get('ALLOW_CORS', False):
   66    73 		response.headers['Access-Control-Allow-Origin'] = '*'
   67    74 
   68    75 	return response
   69    76 
   70    77 
   71    -1 @app.route('/', methods=['GET'])
   72    -1 def index():
   73    -1 	config = current_app.config[ENDPOINTS]
   -1    78 @app.route('/')
   -1    79 def index(request):
   -1    80 	config = app.config[ENDPOINTS]
   74    81 	data = [_doc(config[endpoint], endpoint) for endpoint in config]
   75    82 	return render_template('index.html', endpoints=data)
   76    83 
   77    84 
   78    -1 @app.route('/<endpoint>/', methods=['GET'])
   79    -1 def doc(endpoint):
   80    -1 	config = get_config(endpoint)
   81    -1 	return render_template('index.html', endpoints=[_doc(config, endpoint)])
   -1    85 @app.route('/{endpoint}/')
   -1    86 def doc(request):
   -1    87 	endpoint = request.match_info['endpoint']
   -1    88 	config = app.get_config(endpoint)
   -1    89 	data = [_doc(config, endpoint)]
   -1    90 	return render_template('index.html', endpoints=data)
   82    91 
   83    92 
   84    93 def main():
   85    94 	args = parse_args()
   86    95 
   87    -1 	app.config.from_pyfile(os.path.abspath(args.config))
   -1    96 	app.config_from_file(os.path.abspath(args.config))
   88    97 	app.debug = args.debug
   89    98 
   90    99 	errors = check_config(app.config)

diff --git a/jsonproxy/web.py b/jsonproxy/web.py

@@ -0,0 +1,94 @@
   -1     1 """Flask inspired wrapper around aiohttp."""
   -1     2 
   -1     3 from functools import lru_cache
   -1     4 from pkg_resources import resource_filename
   -1     5 import asyncio
   -1     6 import logging
   -1     7 import os
   -1     8 
   -1     9 from aiohttp import web
   -1    10 import jinja2
   -1    11 
   -1    12 
   -1    13 @lru_cache()
   -1    14 def get_template(name):
   -1    15 	path = resource_filename(__name__, os.path.join('templates', name))
   -1    16 	with open(path) as fh:
   -1    17 		return jinja2.Template(fh.read())
   -1    18 
   -1    19 
   -1    20 def render_template(name, **kwargs):
   -1    21 	template = get_template(name)
   -1    22 	text = template.render(**kwargs)
   -1    23 	return web.Response(body=text.encode('utf8'))
   -1    24 
   -1    25 
   -1    26 def jsonify(data, **kwargs):
   -1    27 	return web.json_response(data, **kwargs)
   -1    28 
   -1    29 
   -1    30 def abort(code):
   -1    31 	if code == 404:
   -1    32 		raise web.HTTPNotFound
   -1    33 	elif code >= 500:
   -1    34 		raise web.HTTPInternalServerError
   -1    35 	else:
   -1    36 		raise web.HTTPBadRequest
   -1    37 
   -1    38 
   -1    39 def make_response(data, **kwargs):
   -1    40 	if isinstance(data, web.StreamResponse):
   -1    41 		return data
   -1    42 	elif isinstance(data, str):
   -1    43 		return web.Response(body=data.encode('utf8'), **kwargs)
   -1    44 	elif isinstance(data, bytes):
   -1    45 		return web.Response(body=data, **kwargs)
   -1    46 	else:
   -1    47 		raise TypeError('cannot make response from {}'.format(data))
   -1    48 
   -1    49 
   -1    50 class Application:
   -1    51 	def __init__(self, name):
   -1    52 		self.name = name
   -1    53 		self.loop = asyncio.get_event_loop()
   -1    54 		self.app = web.Application(loop=self.loop)
   -1    55 		self.config = {}
   -1    56 		self.debug = False
   -1    57 
   -1    58 		self.logger = logging.getLogger(self.name)
   -1    59 		self.logger.setLevel(logging.INFO)
   -1    60 
   -1    61 		consoleHandler = logging.StreamHandler()
   -1    62 		formatter = logging.Formatter('%(asctime)s - %(message)s')
   -1    63 		consoleHandler.setFormatter(formatter)
   -1    64 		self.logger.addHandler(consoleHandler)
   -1    65 
   -1    66 	def config_from_file(self, path):
   -1    67 		with open(path) as fh:
   -1    68 			exec(compile(fh.read(), path, 'exec'), self.config)
   -1    69 
   -1    70 	def add_route(self, path, fn, methods=('GET',)):
   -1    71 		@asyncio.coroutine
   -1    72 		def wrapped(*args, **kwargs):
   -1    73 			data = yield from asyncio.async(fn(*args, **kwargs))
   -1    74 			return make_response(data)
   -1    75 
   -1    76 		for method in methods:
   -1    77 			self.app.router.add_route(method, path, wrapped)
   -1    78 
   -1    79 	def route(self, path, methods=('GET',)):
   -1    80 		def decorator(fn):
   -1    81 			self.add_route(path, fn, methods=methods)
   -1    82 			return fn
   -1    83 		return decorator
   -1    84 
   -1    85 	def run(self, host='localhost', port=5000):
   -1    86 		if self.debug:
   -1    87 			self.logger.setLevel(logging.DEBUG)
   -1    88 		server = self.loop.create_server(self.app.make_handler(), host, port)
   -1    89 		self.loop.run_until_complete(server)
   -1    90 		self.logger.info("Server started at http://{}:{}".format(host, port))
   -1    91 		try:
   -1    92 			self.loop.run_forever()
   -1    93 		except KeyboardInterrupt:
   -1    94 			pass

diff --git a/setup.py b/setup.py

@@ -13,9 +13,9 @@ setup(
   13    13     author_email='tobias.bengfort@posteo.de',
   14    14     packages=['jsonproxy'],
   15    15     install_requires=[
   16    -1         'flask',
   -1    16         'aiohttp',
   17    17         'beautifulsoup4',
   18    -1         'cachetools',
   -1    18         'jinja2',
   19    19     ],
   20    20     entry_points={'console_scripts': [
   21    21         'jsonproxy=jsonproxy:main',
@@ -26,7 +26,7 @@ setup(
   26    26         'Environment :: Console',
   27    27         'Intended Audience :: Information Technology',
   28    28         'Operating System :: OS Independent',
   29    -1         'Programming Language :: Python',
   -1    29         'Programming Language :: Python :: 3.3',
   30    30         'License :: OSI Approved :: GNU Affero General Public License v3 '
   31    31             'or later (AGPLv3+)',
   32    32         'Topic :: Internet :: Proxy Servers',