handelsregister

command line interface for handelsregister.de
git clone https://git.ce9e.org/handelsregister.git

commit
605d40329b9ff80e0cd12e6ed180500e7cdba18e
parent
5d807e53fd3f9d7089515e13af01f2ecc9e9fe69
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2025-12-07 14:32
add search helper

Diffstat

M handelsregister.py 74 +++++++++++++++++++++++++++----------------------------------

1 files changed, 33 insertions, 41 deletions


diff --git a/handelsregister.py b/handelsregister.py

@@ -4,15 +4,6 @@ import re
    4     4 import requests
    5     5 from bs4 import BeautifulSoup
    6     6 
    7    -1 SEARCH_DEFAULTS = {
    8    -1     'suchTyp': 'n',
    9    -1     'form': 'form',
   10    -1     'form:btnSuche': 'form:btnSuche',
   11    -1     'javax.faces.partial.ajax': 'true',
   12    -1     'javax.faces.partial.execute': '@all',
   13    -1     'javax.faces.ViewState': 'stateless',
   14    -1 }
   15    -1 
   16     7 REGISTERS = {
   17     8     'HRA': 'Handelsregister Abteilung A',
   18     9     'HRB': 'Handelsregister Abteilung B',
@@ -23,21 +14,29 @@ REGISTERS = {
   23    14 }
   24    15 
   25    16 
   -1    17 def _search(session, data):
   -1    18     r = session.post(
   -1    19         'https://www.handelsregister.de/rp_web/erweitertesuche/welcome.xhtml',
   -1    20         data={
   -1    21             'form': 'form',
   -1    22             'form:btnSuche': '',
   -1    23             'javax.faces.ViewState': 'stateless',
   -1    24             'form:schlagwortOptionen': 1,
   -1    25             **data,
   -1    26         },
   -1    27     )
   -1    28     r.raise_for_status()
   -1    29     return BeautifulSoup(r.content, features='html.parser')
   -1    30 
   -1    31 
   26    32 def search(terms, register=''):
   27    33     with requests.Session() as session:
   28    -1         r = session.post(
   29    -1             'https://www.handelsregister.de/rp_web/erweitertesuche.xhtml',
   30    -1             data={
   31    -1                 **SEARCH_DEFAULTS,
   32    -1                 'form:registerArt_input': register,
   33    -1                 'form:schlagwoerter': terms,
   34    -1                 'form:schlagwortOptionen': 1,
   35    -1                 'form:aenlichLautendeSchlagwoerterBoolChkbox_input': 'on',
   36    -1             }
   37    -1         )
   38    -1         r.raise_for_status()
   -1    34         soup = _search(session, {
   -1    35             'form:schlagwoerter': terms,
   -1    36             'form:aenlichLautendeSchlagwoerterBoolChkbox_input': 'on',
   -1    37             'form:registerArt_input': register,
   -1    38         })
   39    39 
   40    -1     soup = BeautifulSoup(r.content, features='html.parser')
   41    40     for item in soup.select('[data-ri]'):
   42    41         yield {
   43    42             'title': item.find(class_='marginLeft20').text,
@@ -47,26 +46,19 @@ def search(terms, register=''):
   47    46 
   48    47 def get_xml(register, id):
   49    48     with requests.Session() as session:
   50    -1         r = session.post(
   51    -1             'https://www.handelsregister.de/rp_web/erweitertesuche.xhtml',
   52    -1             data={
   53    -1                 **SEARCH_DEFAULTS,
   54    -1                 'form:registerArt_input': register,
   55    -1                 'form:registerNummer': id,
   56    -1             },
   57    -1         )
   58    -1         r.raise_for_status()
   59    -1 
   60    -1         field = None
   61    -1         for x in re.findall(r'PrimeFaces.addSubmitParam\([^)]*', r.text):
   62    -1             if 'Global.Dokumentart.SI' in x:
   63    -1                 field = re.search(r"ergebnissForm:selectedSuchErgebnisFormTable:[^']*", x)[0]
   64    -1                 break
   65    -1         if not field:
   66    -1             raise ValueError
   67    -1 
   68    -1         view_state = re.search(r'<update id="j_id1:javax.faces.ViewState:0"><!\[CDATA\[([-0-9]*:[-0-9]*)\]\]></update>', r.text)[1]
   69    -1         action = re.search('action="([^"]*)"', r.text)[1]
   -1    49         soup = _search(session, {
   -1    50             'form:registerNummer': id,
   -1    51             'form:registerArt_input': register,
   -1    52         })
   -1    53 
   -1    54         link = soup.select_one('[onclick*="Dokumentart.SI"]')
   -1    55         field = re.search(
   -1    56             r"ergebnissForm:selectedSuchErgebnisFormTable:[^']*",
   -1    57             link['onclick'],
   -1    58         )[0]
   -1    59 
   -1    60         view_state = soup.select_one('input[name="javax.faces.ViewState"]')['value']
   -1    61         action = soup.select_one('[action]')['action']
   70    62 
   71    63         r2 = session.post(
   72    64             f'https://www.handelsregister.de{action}',