PyJSONProxy

simple proxy and scraper
git clone https://git.ce9e.org/PyJSONProxy.git

commit
8f5204b37f625769cb7256a5503fc1f2e17b8b0e
parent
7883fbe7bb81be831936a435551ecbbb3a769815
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2017-06-03 09:51
allow to mark fields as optional

Diffstat

M jsonproxy/lib.py 11 +++++++++--

1 files changed, 9 insertions, 2 deletions


diff --git a/jsonproxy/lib.py b/jsonproxy/lib.py

@@ -11,6 +11,10 @@ ENDPOINTS = 'ENDPOINTS'
   11    11 
   12    12 
   13    13 def get_attribute_list(html, selector):
   -1    14 	optional = selector.endswith('?')
   -1    15 	if optional:
   -1    16 		selector = selector[:-1]
   -1    17 
   14    18 	s = selector.rsplit('@', 1)[0]
   15    19 	if s:
   16    20 		elements = html.select(s)
@@ -19,14 +23,17 @@ def get_attribute_list(html, selector):
   19    23 
   20    24 	if '@' in selector:
   21    25 		attr = selector.rsplit('@', 1)[1]
   22    -1 		return [element[attr] for element in elements]
   -1    26 		if optional:
   -1    27 			return [element.get(attr) for element in elements]
   -1    28 		else:
   -1    29 			return [element[attr] for element in elements]
   23    30 	else:
   24    31 		return [element.text.strip() for element in elements]
   25    32 
   26    33 
   27    34 def get_attribute(html, selector):
   28    35 	l = get_attribute_list(html, selector)
   29    -1 	if len(l) > 0:
   -1    36 	if len(l) > 0 or not selector.endswith('?'):
   30    37 		return l[0]
   31    38 
   32    39