handelsregister

command line interface for handelsregister.de
git clone https://git.ce9e.org/handelsregister.git

commit
40434181556a588fe168cd74607d806889319d07
parent
032910abd1093ffe084e9bbecd106e818258a899
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2025-12-07 20:55
refactor _search

Diffstat

M handelsregister.py 80 ++++++++++++++++++++++++++++++++++++------------------------

1 files changed, 48 insertions, 32 deletions


diff --git a/handelsregister.py b/handelsregister.py

@@ -24,13 +24,32 @@ def parse_id(s):
   24    24             if 'früher' in tail:
   25    25                 tail = tail[:tail.index('früher')]
   26    26             return {
   27    -1                 'court': ' '.join(parts[:i]),
   -1    27                 'court': ' '.join(parts[1:i]),
   28    28                 'reg': reg,
   29    29                 'id': ' '.join(tail),
   30    30             }
   31    31     raise ValueError(s)
   32    32 
   33    33 
   -1    34 def parse_si_field(item):
   -1    35     si_element = item.select_one('[onclick*="Dokumentart.SI"]')
   -1    36     if si_element:
   -1    37         m = re.search(
   -1    38             r"ergebnissForm:selectedSuchErgebnisFormTable:[^']*",
   -1    39             si_element['onclick'],
   -1    40         )
   -1    41         if m:
   -1    42             return m[0]
   -1    43 
   -1    44 
   -1    45 def parse_item(item):
   -1    46     return {
   -1    47         'title': item.select_one('.marginLeft20').text,
   -1    48         'si_field': parse_si_field(item),
   -1    49         **parse_id(item.select_one('.fontWeightBold').text)
   -1    50     }
   -1    51 
   -1    52 
   34    53 class Session(requests.Session):
   35    54     def request(self, *args, **kwargs):
   36    55         retries = 2
@@ -47,64 +66,61 @@ class Session(requests.Session):
   47    66                     raise
   48    67 
   49    68 
   50    -1 def fetch_view_state(session):
   -1    69 def get_context(session):
   51    70     r = session.get('https://www.handelsregister.de/rp_web/erweitertesuche/welcome.xhtml')
   52    71     soup = BeautifulSoup(r.content, 'html.parser')
   53    -1     return soup.find('input', {'name': 'javax.faces.ViewState'})['value']
   -1    72 
   -1    73     return {
   -1    74         'view_state': soup.select_one('input[name="javax.faces.ViewState"]')['value'],
   -1    75     }
   54    76 
   55    77 
   56    -1 def _search(session, data):
   57    -1     view_state = fetch_view_state(session)
   -1    78 def _search(session, query):
   -1    79     ctx = get_context(session)
   58    80     r = session.post(
   59    81         'https://www.handelsregister.de/rp_web/erweitertesuche/welcome.xhtml',
   60    82         data={
   61    83             'form': 'form',
   62    84             'form:btnSuche': '',
   63    -1             'javax.faces.ViewState': view_state,
   -1    85             'javax.faces.ViewState': ctx['view_state'],
   64    86             'form:schlagwortOptionen': 1,
   -1    87             'form:aenlichLautendeSchlagwoerterBoolChkbox_input': 'on',
   65    88             'form:ergebnisseProSeite_input': 100,
   66    -1             **data,
   -1    89             **query,
   67    90         },
   68    91     )
   69    -1     return BeautifulSoup(r.content, features='html.parser')
   -1    92     soup = BeautifulSoup(r.content, features='html.parser')
   -1    93     return {
   -1    94         'action': soup.select_one('[action]')['action'],
   -1    95         'view_state': soup.select_one('input[name="javax.faces.ViewState"]')['value'],
   -1    96         'truncated': bool(soup.select_one(r'#ergebnissForm\:ergebnisseAnzahl_label')),
   -1    97         'items': [parse_item(item) for item in soup.select('[data-ri]')],
   -1    98     }
   70    99 
   71   100 
   72   101 def search(terms, register=''):
   -1   102     query = {
   -1   103         'form:schlagwoerter': terms,
   -1   104         'form:registerArt_input': register,
   -1   105     }
   73   106     with Session() as session:
   74    -1         soup = _search(session, {
   75    -1             'form:schlagwoerter': terms,
   76    -1             'form:aenlichLautendeSchlagwoerterBoolChkbox_input': 'on',
   77    -1             'form:registerArt_input': register,
   78    -1         })
   79    -1 
   80    -1     for item in soup.select('[data-ri]'):
   81    -1         yield {
   82    -1             'title': item.select_one('.marginLeft20').text,
   83    -1             **parse_id(item.select_one('.fontWeightBold').text),
   84    -1         }
   -1   107         data = _search(session, query)
   -1   108     return data['items']
   85   109 
   86   110 
   87   111 def get_xml(register, id):
   88   112     with Session() as session:
   89    -1         soup = _search(session, {
   90    -1             'form:registerNummer': id,
   -1   113         data = _search(session, {
   91   114             'form:registerArt_input': register,
   -1   115             'form:registerNummer': id,
   92   116         })
   93    -1 
   94    -1         link = soup.select_one('[onclick*="Dokumentart.SI"]')
   95    -1         field = re.search(
   96    -1             r"ergebnissForm:selectedSuchErgebnisFormTable:[^']*",
   97    -1             link['onclick'],
   98    -1         )[0]
   99    -1 
  100    -1         view_state = soup.select_one('input[name="javax.faces.ViewState"]')['value']
  101    -1         action = soup.select_one('[action]')['action']
   -1   117         field = data['items'][0]['si_field']
  102   118 
  103   119         r = session.post(
  104    -1             f'https://www.handelsregister.de{action}',
   -1   120             f'https://www.handelsregister.de{data["action"]}',
  105   121             data={
  106   122                 'ergebnissForm': 'ergebnissForm',
  107    -1                 'javax.faces.ViewState': view_state,
   -1   123                 'javax.faces.ViewState': data['view_state'],
  108   124                 'property': 'Global.Dokumentart.SI',
  109   125                 field: field,
  110   126             },