- commit
- 605d40329b9ff80e0cd12e6ed180500e7cdba18e
- parent
- 5d807e53fd3f9d7089515e13af01f2ecc9e9fe69
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2025-12-07 14:32
add search helper
Diffstat
| M | handelsregister.py | 74 | +++++++++++++++++++++++++++---------------------------------- |
1 files changed, 33 insertions, 41 deletions
diff --git a/handelsregister.py b/handelsregister.py
@@ -4,15 +4,6 @@ import re 4 4 import requests 5 5 from bs4 import BeautifulSoup 6 67 -1 SEARCH_DEFAULTS = {8 -1 'suchTyp': 'n',9 -1 'form': 'form',10 -1 'form:btnSuche': 'form:btnSuche',11 -1 'javax.faces.partial.ajax': 'true',12 -1 'javax.faces.partial.execute': '@all',13 -1 'javax.faces.ViewState': 'stateless',14 -1 }15 -116 7 REGISTERS = { 17 8 'HRA': 'Handelsregister Abteilung A', 18 9 'HRB': 'Handelsregister Abteilung B', @@ -23,21 +14,29 @@ REGISTERS = { 23 14 } 24 15 25 16 -1 17 def _search(session, data): -1 18 r = session.post( -1 19 'https://www.handelsregister.de/rp_web/erweitertesuche/welcome.xhtml', -1 20 data={ -1 21 'form': 'form', -1 22 'form:btnSuche': '', -1 23 'javax.faces.ViewState': 'stateless', -1 24 'form:schlagwortOptionen': 1, -1 25 **data, -1 26 }, -1 27 ) -1 28 r.raise_for_status() -1 29 return BeautifulSoup(r.content, features='html.parser') -1 30 -1 31 26 32 def search(terms, register=''): 27 33 with requests.Session() as session:28 -1 r = session.post(29 -1 'https://www.handelsregister.de/rp_web/erweitertesuche.xhtml',30 -1 data={31 -1 **SEARCH_DEFAULTS,32 -1 'form:registerArt_input': register,33 -1 'form:schlagwoerter': terms,34 -1 'form:schlagwortOptionen': 1,35 -1 'form:aenlichLautendeSchlagwoerterBoolChkbox_input': 'on',36 -1 }37 -1 )38 -1 r.raise_for_status()-1 34 soup = _search(session, { -1 35 'form:schlagwoerter': terms, -1 36 'form:aenlichLautendeSchlagwoerterBoolChkbox_input': 'on', -1 37 'form:registerArt_input': register, -1 38 }) 39 3940 -1 soup = BeautifulSoup(r.content, features='html.parser')41 40 for item in soup.select('[data-ri]'): 42 41 yield { 43 42 'title': item.find(class_='marginLeft20').text, @@ -47,26 +46,19 @@ def search(terms, register=''): 47 46 48 47 def get_xml(register, id): 49 48 with requests.Session() as session:50 -1 r = session.post(51 -1 'https://www.handelsregister.de/rp_web/erweitertesuche.xhtml',52 -1 data={53 -1 **SEARCH_DEFAULTS,54 -1 'form:registerArt_input': register,55 -1 'form:registerNummer': id,56 -1 },57 -1 )58 -1 r.raise_for_status()59 -160 -1 field = None61 -1 for x in re.findall(r'PrimeFaces.addSubmitParam\([^)]*', r.text):62 -1 if 'Global.Dokumentart.SI' in x:63 -1 field = re.search(r"ergebnissForm:selectedSuchErgebnisFormTable:[^']*", x)[0]64 -1 break65 -1 if not field:66 -1 raise ValueError67 -168 -1 view_state = re.search(r'<update id="j_id1:javax.faces.ViewState:0"><!\[CDATA\[([-0-9]*:[-0-9]*)\]\]></update>', r.text)[1]69 -1 action = re.search('action="([^"]*)"', r.text)[1]-1 49 soup = _search(session, { -1 50 'form:registerNummer': id, -1 51 'form:registerArt_input': register, -1 52 }) -1 53 -1 54 link = soup.select_one('[onclick*="Dokumentart.SI"]') -1 55 field = re.search( -1 56 r"ergebnissForm:selectedSuchErgebnisFormTable:[^']*", -1 57 link['onclick'], -1 58 )[0] -1 59 -1 60 view_state = soup.select_one('input[name="javax.faces.ViewState"]')['value'] -1 61 action = soup.select_one('[action]')['action'] 70 62 71 63 r2 = session.post( 72 64 f'https://www.handelsregister.de{action}',