PyJSONProxy

simple proxy and scraper
git clone https://git.ce9e.org/PyJSONProxy.git

commit
c09849ba7d8acba625a85bdbbca6e33fd58d655e
parent
8d29fede8e07a093ca028d82d7b2a5a6b7ae8b9a
Author
Tobias Bengfort <tobias.bengfort@gmx.net>
Date
2015-02-05 17:24
add to ENDPOINTS instead of overwriting

works better with multiple config files

Diffstat

M README.rst 64 ++++++++++++++++++++++++++++---------------------------------
M jsonproxy/__init__.py 1 +

2 files changed, 30 insertions, 35 deletions


diff --git a/README.rst b/README.rst

@@ -16,10 +16,8 @@ implements the third variant. So you can do something like this::
   16    16 
   17    17 With a configuration like this::
   18    18 
   19    -1     ENDPOINTS = {
   20    -1         'github': {
   21    -1             'host': 'https://api.github.com/users/'
   22    -1         }
   -1    19     ENDPOINTS['github'] = {
   -1    20         'host': 'https://api.github.com/users/'
   23    21     }
   24    22 
   25    23 
@@ -47,23 +45,21 @@ HTML pages, PyJSONProxy can extract information from there::
   47    45 
   48    46 ::
   49    47 
   50    -1     ENDPOINTS = {
   51    -1         'github': {
   52    -1             'host': 'https://github.com/',
   53    -1             'type': 'scrape_item',
   54    -1             'fields': {
   55    -1               'login': '.vcard-username',
   56    -1               'fullname': '.vcard-fullname',
   57    -1               'email': '.vcard-details .email',
   58    -1               'join-date': '.vcard-details .join-date@datetime'
   59    -1             }
   60    -1         },
   61    -1         'repos': {
   62    -1             'host': 'https://github.com/',
   63    -1             'type': 'scrape_list',
   64    -1             'selector': '.popular-repos a.mini-repo-list-item@href'
   -1    48     ENDPOINTS['github'] = {
   -1    49         'host': 'https://github.com/',
   -1    50         'type': 'scrape_item',
   -1    51         'fields': {
   -1    52           'login': '.vcard-username',
   -1    53           'fullname': '.vcard-fullname',
   -1    54           'email': '.vcard-details .email',
   -1    55           'join-date': '.vcard-details .join-date@datetime'
   65    56         }
   66    57     }
   -1    58     ENDPOINTS['repos'] = {
   -1    59         'host': 'https://github.com/',
   -1    60         'type': 'scrape_list',
   -1    61         'selector': '.popular-repos a.mini-repo-list-item@href'
   -1    62     }
   67    63 
   68    64 There a two options here: ``scrape_item`` and ``scrape_list``. The first
   69    65 one will take a list of fields and selectors and return only the first
@@ -91,22 +87,20 @@ Some simple documentation is auomatically generated and available under
   91    87 endpoint). To provide some input for this documentation, you can add a
   92    88 description to both endpoints and fields::
   93    89 
   94    -1     ENDPOINTS = {
   95    -1         'github': {
   96    -1             'host': 'https://github.com/',
   97    -1             'type': 'scrape_item',
   98    -1             'doc': 'Access data about GitHub users',
   99    -1             'fields': {
  100    -1               'login': '.vcard-username',
  101    -1               'fullname': '.vcard-fullname',
  102    -1               'email': '.vcard-details .email'
  103    -1               'join-date': '.vcard-details .join-date@datetime'
  104    -1             },
  105    -1             'fields_doc': {
  106    -1               'login': 'github username',
  107    -1               'fullname': 'the user\'s full name',
  108    -1               'join-date': 'date when the user joined github in ISO-xx format'
  109    -1             }
   -1    90     ENDPOINTS['github'] = {
   -1    91         'host': 'https://github.com/',
   -1    92         'type': 'scrape_item',
   -1    93         'doc': 'Access data about GitHub users',
   -1    94         'fields': {
   -1    95           'login': '.vcard-username',
   -1    96           'fullname': '.vcard-fullname',
   -1    97           'email': '.vcard-details .email'
   -1    98           'join-date': '.vcard-details .join-date@datetime'
   -1    99         },
   -1   100         'fields_doc': {
   -1   101           'login': 'github username',
   -1   102           'fullname': 'the user\'s full name',
   -1   103           'join-date': 'date when the user joined github in ISO-xx format'
  110   104         }
  111   105     }
  112   106 

diff --git a/jsonproxy/__init__.py b/jsonproxy/__init__.py

@@ -46,6 +46,7 @@ def main():
   46    46 	app = Flask(__name__)
   47    47 
   48    48 	# load config
   -1    49 	app.config[ENDPOINTS] = {}
   49    50 	config_files = [
   50    51 		os.path.expanduser('~/.config/pyjsonproxy.cfg'),
   51    52 		os.path.abspath('.pyjsonproxy.cfg'),