- commit
- 40434181556a588fe168cd74607d806889319d07
- parent
- 032910abd1093ffe084e9bbecd106e818258a899
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2025-12-07 20:55
refactor _search
Diffstat
| M | handelsregister.py | 80 | ++++++++++++++++++++++++++++++++++++------------------------ |
1 files changed, 48 insertions, 32 deletions
diff --git a/handelsregister.py b/handelsregister.py
@@ -24,13 +24,32 @@ def parse_id(s): 24 24 if 'früher' in tail: 25 25 tail = tail[:tail.index('früher')] 26 26 return {27 -1 'court': ' '.join(parts[:i]),-1 27 'court': ' '.join(parts[1:i]), 28 28 'reg': reg, 29 29 'id': ' '.join(tail), 30 30 } 31 31 raise ValueError(s) 32 32 33 33 -1 34 def parse_si_field(item): -1 35 si_element = item.select_one('[onclick*="Dokumentart.SI"]') -1 36 if si_element: -1 37 m = re.search( -1 38 r"ergebnissForm:selectedSuchErgebnisFormTable:[^']*", -1 39 si_element['onclick'], -1 40 ) -1 41 if m: -1 42 return m[0] -1 43 -1 44 -1 45 def parse_item(item): -1 46 return { -1 47 'title': item.select_one('.marginLeft20').text, -1 48 'si_field': parse_si_field(item), -1 49 **parse_id(item.select_one('.fontWeightBold').text) -1 50 } -1 51 -1 52 34 53 class Session(requests.Session): 35 54 def request(self, *args, **kwargs): 36 55 retries = 2 @@ -47,64 +66,61 @@ class Session(requests.Session): 47 66 raise 48 67 49 6850 -1 def fetch_view_state(session):-1 69 def get_context(session): 51 70 r = session.get('https://www.handelsregister.de/rp_web/erweitertesuche/welcome.xhtml') 52 71 soup = BeautifulSoup(r.content, 'html.parser')53 -1 return soup.find('input', {'name': 'javax.faces.ViewState'})['value']-1 72 -1 73 return { -1 74 'view_state': soup.select_one('input[name="javax.faces.ViewState"]')['value'], -1 75 } 54 76 55 7756 -1 def _search(session, data):57 -1 view_state = fetch_view_state(session)-1 78 def _search(session, query): -1 79 ctx = get_context(session) 58 80 r = session.post( 59 81 'https://www.handelsregister.de/rp_web/erweitertesuche/welcome.xhtml', 60 82 data={ 61 83 'form': 'form', 62 84 'form:btnSuche': '',63 -1 'javax.faces.ViewState': view_state,-1 85 'javax.faces.ViewState': ctx['view_state'], 64 86 'form:schlagwortOptionen': 1, -1 87 'form:aenlichLautendeSchlagwoerterBoolChkbox_input': 'on', 65 88 'form:ergebnisseProSeite_input': 100,66 -1 **data,-1 89 **query, 67 90 }, 68 91 )69 -1 return BeautifulSoup(r.content, features='html.parser')-1 92 soup = BeautifulSoup(r.content, features='html.parser') -1 93 return { -1 94 'action': soup.select_one('[action]')['action'], -1 95 'view_state': soup.select_one('input[name="javax.faces.ViewState"]')['value'], -1 96 'truncated': bool(soup.select_one(r'#ergebnissForm\:ergebnisseAnzahl_label')), -1 97 'items': [parse_item(item) for item in soup.select('[data-ri]')], -1 98 } 70 99 71 100 72 101 def search(terms, register=''): -1 102 query = { -1 103 'form:schlagwoerter': terms, -1 104 'form:registerArt_input': register, -1 105 } 73 106 with Session() as session:74 -1 soup = _search(session, {75 -1 'form:schlagwoerter': terms,76 -1 'form:aenlichLautendeSchlagwoerterBoolChkbox_input': 'on',77 -1 'form:registerArt_input': register,78 -1 })79 -180 -1 for item in soup.select('[data-ri]'):81 -1 yield {82 -1 'title': item.select_one('.marginLeft20').text,83 -1 **parse_id(item.select_one('.fontWeightBold').text),84 -1 }-1 107 data = _search(session, query) -1 108 return data['items'] 85 109 86 110 87 111 def get_xml(register, id): 88 112 with Session() as session:89 -1 soup = _search(session, {90 -1 'form:registerNummer': id,-1 113 data = _search(session, { 91 114 'form:registerArt_input': register, -1 115 'form:registerNummer': id, 92 116 })93 -194 -1 link = soup.select_one('[onclick*="Dokumentart.SI"]')95 -1 field = re.search(96 -1 r"ergebnissForm:selectedSuchErgebnisFormTable:[^']*",97 -1 link['onclick'],98 -1 )[0]99 -1100 -1 view_state = soup.select_one('input[name="javax.faces.ViewState"]')['value']101 -1 action = soup.select_one('[action]')['action']-1 117 field = data['items'][0]['si_field'] 102 118 103 119 r = session.post(104 -1 f'https://www.handelsregister.de{action}',-1 120 f'https://www.handelsregister.de{data["action"]}', 105 121 data={ 106 122 'ergebnissForm': 'ergebnissForm',107 -1 'javax.faces.ViewState': view_state,-1 123 'javax.faces.ViewState': data['view_state'], 108 124 'property': 'Global.Dokumentart.SI', 109 125 field: field, 110 126 },