PyJSONProxy

simple proxy and scraper
git clone https://git.ce9e.org/PyJSONProxy.git

commit
6b7f723983e007cf7174328cbbf552463086d92d
parent
8f5204b37f625769cb7256a5503fc1f2e17b8b0e
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2017-06-03 10:01
lazy attribute parsing

Diffstat

M jsonproxy/lib.py 19 +++++++++++--------

1 files changed, 11 insertions, 8 deletions


diff --git a/jsonproxy/lib.py b/jsonproxy/lib.py

@@ -10,7 +10,7 @@ except ImportError:
   10    10 ENDPOINTS = 'ENDPOINTS'
   11    11 
   12    12 
   13    -1 def get_attribute_list(html, selector):
   -1    13 def iter_attribute(html, selector):
   14    14 	optional = selector.endswith('?')
   15    15 	if optional:
   16    16 		selector = selector[:-1]
@@ -24,17 +24,20 @@ def get_attribute_list(html, selector):
   24    24 	if '@' in selector:
   25    25 		attr = selector.rsplit('@', 1)[1]
   26    26 		if optional:
   27    -1 			return [element.get(attr) for element in elements]
   -1    27 			return (element.get(attr) for element in elements)
   28    28 		else:
   29    -1 			return [element[attr] for element in elements]
   -1    29 			return (element[attr] for element in elements)
   30    30 	else:
   31    -1 		return [element.text.strip() for element in elements]
   -1    31 		return (element.text.strip() for element in elements)
   32    32 
   33    33 
   34    34 def get_attribute(html, selector):
   35    -1 	l = get_attribute_list(html, selector)
   36    -1 	if len(l) > 0 or not selector.endswith('?'):
   37    -1 		return l[0]
   -1    35 	l = iter_attribute(html, selector)
   -1    36 	try:
   -1    37 		return next(l)
   -1    38 	except StopIteration:
   -1    39 		if not selector.endswith('?'):
   -1    40 			raise
   38    41 
   39    42 
   40    43 def get_fields(html, config):
@@ -46,7 +49,7 @@ def get_fields(html, config):
   46    49 			elements = html.select(value['selector'])
   47    50 			data[key] = [get_fields(e, value) for e in elements]
   48    51 		else:
   49    -1 			data[key] = get_attribute_list(html, value['selector'])
   -1    52 			data[key] = list(iter_attribute(html, value['selector']))
   50    53 	return data
   51    54 
   52    55