- commit
- 0438ecc7c751dd7a28cf4d0167ff5af2e59e83d2
- parent
- f874077d0344a8b1390ecf089f5b07ef2621de0e
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2025-05-30 18:21
refactor extract_legs()
Diffstat
| M | db_pkpass.py | 112 | ++++++++++++++++++++++++++++++------------------------------- |
1 files changed, 55 insertions, 57 deletions
diff --git a/db_pkpass.py b/db_pkpass.py
@@ -87,68 +87,66 @@ def parse_leg_dt(datestr, timestr, prefix, start): 87 87 return dt 88 88 89 8990 -1 def extract_legs(pdf):91 -1 legs = []92 -1 state = 0-1 90 def iter_lines(pdf): 93 91 last_x = 094 -1 validity = extract_validity(pdf)-1 92 last_y = 0 -1 93 line = [] 95 94 for page in pdf:96 -1 for x, _, _, _, text, _, _ in page.get_text('blocks'):-1 95 for x, y, _, _, text, _, _ in page.get_text('blocks'): 97 96 text = text.rstrip('\n').replace(',\n', ', ')98 -1 if text.startswith('Halt\nDatum\nZeit\nGleis'):99 -1 state = 1100 -1 elif state == 0 or text.startswith('Ihre Reiseverbindung '):101 -1 pass102 -1 elif text.startswith('Wichtige Nutzungshinweise') or not text.strip():103 -1 break104 -1 elif state == 1 or x < last_x:105 -1 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))106 -1 legs.append({107 -1 'start': {108 -1 'station': v1,109 -1 },110 -1 'destination': {111 -1 'station': v2,112 -1 },113 -1 })114 -1 state = 2115 -1 elif state == 2:116 -1 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))117 -1 legs[-1]['start']['date'] = v1118 -1 legs[-1]['destination']['date'] = v2119 -1 state = 3120 -1 elif state == 3:121 -1 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))122 -1 date1 = legs[-1]['start'].pop('date')123 -1 date2 = legs[-1]['destination'].pop('date')124 -1 legs[-1]['start']['datetime'] = parse_leg_dt(125 -1 date1, v1, 'ab', validity[0]126 -1 )127 -1 legs[-1]['destination']['datetime'] = parse_leg_dt(128 -1 date2, v2, 'an', validity[0]129 -1 )130 -1 state = 4131 -1 elif state == 4:132 -1 v1, v2 = (v.strip() for v in text.rstrip('\n').split('\n'))133 -1 if v1:134 -1 legs[-1]['start']['platform'] = v1135 -1 if v2:136 -1 legs[-1]['destination']['platform'] = v2137 -1 state = 5138 -1 elif state == 5:139 -1 legs[-1]['train'] = text.strip().replace('\n', ' ')140 -1 state = 6141 -1 elif state == 6:142 -1 legs[-1]['comment'] = text.strip().replace('\n', ' ')143 -1 state = 7-1 97 if x <= last_x or y > last_y: -1 98 if line: -1 99 yield line -1 100 line = [text] 144 101 else:145 -1 raise ValueError146 -1-1 102 line.append(text) 147 103 last_x = x -1 104 last_y = y -1 105 if line: -1 106 yield line 148 107149 -1 for leg in legs:150 -1 if 'train' not in leg:151 -1 leg['train'] = leg['destination'].pop('platform')-1 108 -1 109 def extract_legs(pdf): -1 110 legs = [] -1 111 started = False -1 112 validity = extract_validity(pdf) -1 113 for line in iter_lines(pdf): -1 114 text = ' '.join(line) -1 115 if text.startswith('Halt\nDatum\nZeit\nGleis'): -1 116 started = True -1 117 elif not started or text.startswith('Ihre Reiseverbindung '): -1 118 pass -1 119 elif text.startswith('Wichtige Nutzungshinweise') or not text.strip(): -1 120 break -1 121 else: -1 122 station1, station2 = (v.strip() for v in line[0].split('\n')) -1 123 date1, date2 = (v.strip() for v in line[1].split('\n')) -1 124 time1, time2 = (v.strip() for v in line[2].split('\n')) -1 125 legs.append({ -1 126 'start': { -1 127 'station': station1, -1 128 'datetime': parse_leg_dt(date1, time1, 'ab', validity[0]) -1 129 }, -1 130 'destination': { -1 131 'station': station2, -1 132 'datetime': parse_leg_dt(date2, time2, 'an', validity[0]) -1 133 }, -1 134 }) -1 135 -1 136 if len(line) > 3: -1 137 platform1, platform2 = (v.strip() for v in line[3].split('\n')) -1 138 if platform1: -1 139 legs[-1]['start']['platform'] = platform1 -1 140 if platform2: -1 141 legs[-1]['destination']['platform'] = platform2 -1 142 -1 143 if len(line) > 4: -1 144 legs[-1]['train'] = line[4].strip().replace('\n', ' ') -1 145 else: -1 146 legs[-1]['train'] = legs[-1]['destination'].pop('platform') -1 147 -1 148 if len(line) > 5: -1 149 legs[-1]['comment'] = line[5].strip().replace('\n', ' ') 152 150 153 151 return legs 154 152 @@ -160,7 +158,7 @@ def extract_id(pdf): 160 158 key = f'{label}: ' 161 159 if text.startswith(key): 162 160 return label, text[len(key):]163 -1 raise ValueError('No order ID found')-1 161 raise ValueError('No ID found') 164 162 165 163 166 164 def extract_title(pdf):