db-pkpass: Convert Deutsche Bahn PDF tickets to PKPass

commit: 0438ecc7c751dd7a28cf4d0167ff5af2e59e83d2
parent: f874077d0344a8b1390ecf089f5b07ef2621de0e
Author: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: 2025-05-30 18:21

refactor extract_legs()

Diffstat

db_pkpass.py

112

++++++++++++++++++++++++++++++-------------------------------

1 files changed, 55 insertions, 57 deletions

diff --git a/db_pkpass.py b/db_pkpass.py

@@ -87,68 +87,66 @@ def parse_leg_dt(datestr, timestr, prefix, start):
   87    87     return dt
   88    88 
   89    89 
   90    -1 def extract_legs(pdf):
   91    -1     legs = []
   92    -1     state = 0
   -1    90 def iter_lines(pdf):
   93    91     last_x = 0
   94    -1     validity = extract_validity(pdf)
   -1    92     last_y = 0
   -1    93     line = []
   95    94     for page in pdf:
   96    -1         for x, _, _, _, text, _, _ in page.get_text('blocks'):
   -1    95         for x, y, _, _, text, _, _ in page.get_text('blocks'):
   97    96             text = text.rstrip('\n').replace(',\n', ', ')
   98    -1             if text.startswith('Halt\nDatum\nZeit\nGleis'):
   99    -1                 state = 1
  100    -1             elif state == 0 or text.startswith('Ihre Reiseverbindung '):
  101    -1                 pass
  102    -1             elif text.startswith('Wichtige Nutzungshinweise') or not text.strip():
  103    -1                 break
  104    -1             elif state == 1 or x < last_x:
  105    -1                 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))
  106    -1                 legs.append({
  107    -1                     'start': {
  108    -1                         'station': v1,
  109    -1                     },
  110    -1                     'destination': {
  111    -1                         'station': v2,
  112    -1                     },
  113    -1                 })
  114    -1                 state = 2
  115    -1             elif state == 2:
  116    -1                 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))
  117    -1                 legs[-1]['start']['date'] = v1
  118    -1                 legs[-1]['destination']['date'] = v2
  119    -1                 state = 3
  120    -1             elif state == 3:
  121    -1                 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))
  122    -1                 date1 = legs[-1]['start'].pop('date')
  123    -1                 date2 = legs[-1]['destination'].pop('date')
  124    -1                 legs[-1]['start']['datetime'] = parse_leg_dt(
  125    -1                     date1, v1, 'ab', validity[0]
  126    -1                 )
  127    -1                 legs[-1]['destination']['datetime'] = parse_leg_dt(
  128    -1                     date2, v2, 'an', validity[0]
  129    -1                 )
  130    -1                 state = 4
  131    -1             elif state == 4:
  132    -1                 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))
  133    -1                 if v1:
  134    -1                     legs[-1]['start']['platform'] = v1
  135    -1                 if v2:
  136    -1                     legs[-1]['destination']['platform'] = v2
  137    -1                 state = 5
  138    -1             elif state == 5:
  139    -1                 legs[-1]['train'] = text.strip().replace('\n', ' ')
  140    -1                 state = 6
  141    -1             elif state == 6:
  142    -1                 legs[-1]['comment'] = text.strip().replace('\n', ' ')
  143    -1                 state = 7
   -1    97             if x <= last_x or y > last_y:
   -1    98                 if line:
   -1    99                     yield line
   -1   100                 line = [text]
  144   101             else:
  145    -1                 raise ValueError
  146    -1 
   -1   102                 line.append(text)
  147   103             last_x = x
   -1   104             last_y = y
   -1   105     if line:
   -1   106         yield line
  148   107 
  149    -1     for leg in legs:
  150    -1         if 'train' not in leg:
  151    -1             leg['train'] = leg['destination'].pop('platform')
   -1   108 
   -1   109 def extract_legs(pdf):
   -1   110     legs = []
   -1   111     started = False
   -1   112     validity = extract_validity(pdf)
   -1   113     for line in iter_lines(pdf):
   -1   114         text = ' '.join(line)
   -1   115         if text.startswith('Halt\nDatum\nZeit\nGleis'):
   -1   116             started = True
   -1   117         elif not started or text.startswith('Ihre Reiseverbindung '):
   -1   118             pass
   -1   119         elif text.startswith('Wichtige Nutzungshinweise') or not text.strip():
   -1   120             break
   -1   121         else:
   -1   122             station1, station2 = (v.strip() for v in line[0].split('\n'))
   -1   123             date1, date2 = (v.strip() for v in line[1].split('\n'))
   -1   124             time1, time2 = (v.strip() for v in line[2].split('\n'))
   -1   125             legs.append({
   -1   126                 'start': {
   -1   127                     'station': station1,
   -1   128                     'datetime': parse_leg_dt(date1, time1, 'ab', validity[0])
   -1   129                 },
   -1   130                 'destination': {
   -1   131                     'station': station2,
   -1   132                     'datetime': parse_leg_dt(date2, time2, 'an', validity[0])
   -1   133                 },
   -1   134             })
   -1   135 
   -1   136             if len(line) > 3:
   -1   137                 platform1, platform2 = (v.strip() for v in line[3].split('\n'))
   -1   138                 if platform1:
   -1   139                     legs[-1]['start']['platform'] = platform1
   -1   140                 if platform2:
   -1   141                     legs[-1]['destination']['platform'] = platform2
   -1   142 
   -1   143             if len(line) > 4:
   -1   144                 legs[-1]['train'] = line[4].strip().replace('\n', ' ')
   -1   145             else:
   -1   146                 legs[-1]['train'] = legs[-1]['destination'].pop('platform')
   -1   147 
   -1   148             if len(line) > 5:
   -1   149                 legs[-1]['comment'] = line[5].strip().replace('\n', ' ')
  152   150 
  153   151     return legs
  154   152 
@@ -160,7 +158,7 @@ def extract_id(pdf):
  160   158                 key = f'{label}: '
  161   159                 if text.startswith(key):
  162   160                     return label, text[len(key):]
  163    -1     raise ValueError('No order ID found')
   -1   161     raise ValueError('No ID found')
  164   162 
  165   163 
  166   164 def extract_title(pdf):