- commit
- 0e13df517856dadd45812f9db0c10a1fd241f8f3
- parent
- c590c7c41b37f45dd16d1ab24ee14ee64d523274
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2025-05-30 16:49
tweak extract_legs()
Diffstat
| M | db_pkpass.py | 43 | ++++++++++++++++++++++++++++--------------- |
1 files changed, 28 insertions, 15 deletions
diff --git a/db_pkpass.py b/db_pkpass.py
@@ -93,14 +93,17 @@ def extract_legs(pdf): 93 93 last_x = 0 94 94 for page in pdf: 95 95 for x, _, _, _, text, _, _ in page.get_text('blocks'): -1 96 text = text.rstrip('\n').replace(',\n', ', ') 96 97 if text.startswith('Halt\nDatum\nZeit\nGleis'): 97 98 state = 198 -1 elif text.startswith('Wichtige Nutzungshinweise'):99 -1 break100 99 elif state == 0: 101 100 pass102 -1 elif state == 1 or (state > 0 and x < last_x):103 -1 v1, v2 = text.rstrip('\n').split('\n')-1 101 elif text.startswith('Ihre Reiseverbindung und Reservierung'): -1 102 pass -1 103 elif text.startswith('Wichtige Nutzungshinweise') or not text.strip(): -1 104 break -1 105 elif state == 1 or x < last_x: -1 106 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n')) 104 107 legs.append({ 105 108 'start': { 106 109 'station': v1, @@ -111,31 +114,39 @@ def extract_legs(pdf): 111 114 }) 112 115 state = 2 113 116 elif state == 2:114 -1 v1, v2 = text.rstrip('\n').split('\n')-1 117 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n')) 115 118 legs[-1]['start']['date'] = v1 116 119 legs[-1]['destination']['date'] = v2 117 120 state = 3 118 121 elif state == 3:119 -1 v1, v2 = text.rstrip('\n').split('\n')120 -1 legs[-1]['start']['datetime'] = parse_leg_dt(legs[-1]['start'].pop('date'), v1, 'ab')121 -1 legs[-1]['destination']['datetime'] = parse_leg_dt(legs[-1]['destination'].pop('date'), v2, 'an')-1 122 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n')) -1 123 date1 = legs[-1]['start'].pop('date') -1 124 date2 = legs[-1]['destination'].pop('date') -1 125 legs[-1]['start']['datetime'] = parse_leg_dt(date1, v1, 'ab') -1 126 legs[-1]['destination']['datetime'] = parse_leg_dt(date2, v2, 'an') 122 127 state = 4 123 128 elif state == 4:124 -1 v1, v2 = text.rstrip('\n').split('\n')125 -1 legs[-1]['start']['platform'] = v1126 -1 legs[-1]['destination']['platform'] = v2-1 129 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n')) -1 130 if v1: -1 131 legs[-1]['start']['platform'] = v1 -1 132 if v2: -1 133 legs[-1]['destination']['platform'] = v2 127 134 state = 5 128 135 elif state == 5:129 -1 legs[-1]['train'] = text.strip()-1 136 legs[-1]['train'] = text.strip().replace('\n', ' ') 130 137 state = 6 131 138 elif state == 6:132 -1 legs[-1]['comment'] = text.strip()-1 139 legs[-1]['comment'] = text.strip().replace('\n', ' ') 133 140 state = 7 134 141 else:135 -1 raise ValueError((text, state))-1 142 raise ValueError 136 143 137 144 last_x = x 138 145 -1 146 for leg in legs: -1 147 if 'train' not in leg: -1 148 leg['train'] = leg['destination'].pop('platform') -1 149 139 150 return legs 140 151 141 152 @@ -149,7 +160,9 @@ def extract_order_id(pdf): 149 160 150 161 def format_stop(stop, train=None): 151 162 t = stop['datetime'].strftime('%H:%M')152 -1 s = f'{t} {stop["station"]} #{stop["platform"]}'-1 163 s = f'{t} {stop["station"]}' -1 164 if stop.get('platform'): -1 165 s += f' #{stop["platform"]}' 153 166 if train: 154 167 s = f'{s} - {train}' 155 168 return s