- commit
- 6b7f723983e007cf7174328cbbf552463086d92d
- parent
- 8f5204b37f625769cb7256a5503fc1f2e17b8b0e
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2017-06-03 10:01
lazy attribute parsing
Diffstat
| M | jsonproxy/lib.py | 19 | +++++++++++-------- |
1 files changed, 11 insertions, 8 deletions
diff --git a/jsonproxy/lib.py b/jsonproxy/lib.py
@@ -10,7 +10,7 @@ except ImportError: 10 10 ENDPOINTS = 'ENDPOINTS' 11 11 12 1213 -1 def get_attribute_list(html, selector):-1 13 def iter_attribute(html, selector): 14 14 optional = selector.endswith('?') 15 15 if optional: 16 16 selector = selector[:-1] @@ -24,17 +24,20 @@ def get_attribute_list(html, selector): 24 24 if '@' in selector: 25 25 attr = selector.rsplit('@', 1)[1] 26 26 if optional:27 -1 return [element.get(attr) for element in elements]-1 27 return (element.get(attr) for element in elements) 28 28 else:29 -1 return [element[attr] for element in elements]-1 29 return (element[attr] for element in elements) 30 30 else:31 -1 return [element.text.strip() for element in elements]-1 31 return (element.text.strip() for element in elements) 32 32 33 33 34 34 def get_attribute(html, selector):35 -1 l = get_attribute_list(html, selector)36 -1 if len(l) > 0 or not selector.endswith('?'):37 -1 return l[0]-1 35 l = iter_attribute(html, selector) -1 36 try: -1 37 return next(l) -1 38 except StopIteration: -1 39 if not selector.endswith('?'): -1 40 raise 38 41 39 42 40 43 def get_fields(html, config): @@ -46,7 +49,7 @@ def get_fields(html, config): 46 49 elements = html.select(value['selector']) 47 50 data[key] = [get_fields(e, value) for e in elements] 48 51 else:49 -1 data[key] = get_attribute_list(html, value['selector'])-1 52 data[key] = list(iter_attribute(html, value['selector'])) 50 53 return data 51 54 52 55