db-pkpass

Convert Deutsche Bahn PDF tickets to PKPass
git clone https://git.ce9e.org/db-pkpass.git

commit
0e13df517856dadd45812f9db0c10a1fd241f8f3
parent
c590c7c41b37f45dd16d1ab24ee14ee64d523274
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2025-05-30 16:49
tweak extract_legs()

Diffstat

M db_pkpass.py 43 ++++++++++++++++++++++++++++---------------

1 files changed, 28 insertions, 15 deletions


diff --git a/db_pkpass.py b/db_pkpass.py

@@ -93,14 +93,17 @@ def extract_legs(pdf):
   93    93     last_x = 0
   94    94     for page in pdf:
   95    95         for x, _, _, _, text, _, _ in page.get_text('blocks'):
   -1    96             text = text.rstrip('\n').replace(',\n', ', ')
   96    97             if text.startswith('Halt\nDatum\nZeit\nGleis'):
   97    98                 state = 1
   98    -1             elif text.startswith('Wichtige Nutzungshinweise'):
   99    -1                 break
  100    99             elif state == 0:
  101   100                 pass
  102    -1             elif state == 1 or (state > 0 and x < last_x):
  103    -1                 v1, v2 = text.rstrip('\n').split('\n')
   -1   101             elif text.startswith('Ihre Reiseverbindung und Reservierung'):
   -1   102                 pass
   -1   103             elif text.startswith('Wichtige Nutzungshinweise') or not text.strip():
   -1   104                 break
   -1   105             elif state == 1 or x < last_x:
   -1   106                 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))
  104   107                 legs.append({
  105   108                     'start': {
  106   109                         'station': v1,
@@ -111,31 +114,39 @@ def extract_legs(pdf):
  111   114                 })
  112   115                 state = 2
  113   116             elif state == 2:
  114    -1                 v1, v2 = text.rstrip('\n').split('\n')
   -1   117                 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))
  115   118                 legs[-1]['start']['date'] = v1
  116   119                 legs[-1]['destination']['date'] = v2
  117   120                 state = 3
  118   121             elif state == 3:
  119    -1                 v1, v2 = text.rstrip('\n').split('\n')
  120    -1                 legs[-1]['start']['datetime'] = parse_leg_dt(legs[-1]['start'].pop('date'), v1, 'ab')
  121    -1                 legs[-1]['destination']['datetime'] = parse_leg_dt(legs[-1]['destination'].pop('date'), v2, 'an')
   -1   122                 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))
   -1   123                 date1 = legs[-1]['start'].pop('date')
   -1   124                 date2 = legs[-1]['destination'].pop('date')
   -1   125                 legs[-1]['start']['datetime'] = parse_leg_dt(date1, v1, 'ab')
   -1   126                 legs[-1]['destination']['datetime'] = parse_leg_dt(date2, v2, 'an')
  122   127                 state = 4
  123   128             elif state == 4:
  124    -1                 v1, v2 = text.rstrip('\n').split('\n')
  125    -1                 legs[-1]['start']['platform'] = v1
  126    -1                 legs[-1]['destination']['platform'] = v2
   -1   129                 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))
   -1   130                 if v1:
   -1   131                     legs[-1]['start']['platform'] = v1
   -1   132                 if v2:
   -1   133                     legs[-1]['destination']['platform'] = v2
  127   134                 state = 5
  128   135             elif state == 5:
  129    -1                 legs[-1]['train'] = text.strip()
   -1   136                 legs[-1]['train'] = text.strip().replace('\n', ' ')
  130   137                 state = 6
  131   138             elif state == 6:
  132    -1                 legs[-1]['comment'] = text.strip()
   -1   139                 legs[-1]['comment'] = text.strip().replace('\n', ' ')
  133   140                 state = 7
  134   141             else:
  135    -1                 raise ValueError((text, state))
   -1   142                 raise ValueError
  136   143 
  137   144             last_x = x
  138   145 
   -1   146     for leg in legs:
   -1   147         if 'train' not in leg:
   -1   148             leg['train'] = leg['destination'].pop('platform')
   -1   149 
  139   150     return legs
  140   151 
  141   152 
@@ -149,7 +160,9 @@ def extract_order_id(pdf):
  149   160 
  150   161 def format_stop(stop, train=None):
  151   162     t = stop['datetime'].strftime('%H:%M')
  152    -1     s = f'{t} {stop["station"]} #{stop["platform"]}'
   -1   163     s = f'{t} {stop["station"]}'
   -1   164     if stop.get('platform'):
   -1   165         s += f' #{stop["platform"]}'
  153   166     if train:
  154   167         s = f'{s} - {train}'
  155   168     return s