db-pkpass

Convert Deutsche Bahn PDF tickets to PKPass
git clone https://git.ce9e.org/db-pkpass.git

commit
6a52c4eb3311c99169ed0f855e11fc5cd8c5f907
parent
f66e54cb7885387decc2f77381418f429dd3957e
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2025-05-30 13:52
add code

Diffstat

A db_pkpass.py 233 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 files changed, 233 insertions, 0 deletions


diff --git a/db_pkpass.py b/db_pkpass.py

@@ -0,0 +1,233 @@
   -1     1 import argparse
   -1     2 import base64
   -1     3 import datetime
   -1     4 import hashlib
   -1     5 import io
   -1     6 import json
   -1     7 import zipfile
   -1     8 from zoneinfo import ZoneInfo
   -1     9 
   -1    10 import cv2
   -1    11 import numpy
   -1    12 import pymupdf
   -1    13 import zxingcpp
   -1    14 
   -1    15 BARCODES = {
   -1    16     zxingcpp.BarcodeFormat.Aztec: 'PKBarcodeFormatAztec',
   -1    17     zxingcpp.BarcodeFormat.Code128: 'PKBarcodeFormatCode128',
   -1    18     zxingcpp.BarcodeFormat.PDF417: 'PKBarcodeFormatPDF417',
   -1    19     zxingcpp.BarcodeFormat.QRCode: 'PKBarcodeFormatQR',
   -1    20 }
   -1    21 BARCODE_FORMATS = zxingcpp.BarcodeFormats(
   -1    22     zxingcpp.BarcodeFormat.Aztec
   -1    23     or zxingcpp.BarcodeFormat.Code128
   -1    24     or zxingcpp.BarcodeFormat.PDF417
   -1    25     or zxingcpp.BarcodeFormat.QRCode
   -1    26 )
   -1    27 
   -1    28 ICON = base64.b64decode("""
   -1    29 iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAAAXNSR0IArs4c6QAAAARnQU1BAACx
   -1    30 jwv8YQUAAAAgY0hSTQAAeiYAAICEAAD6AAAAgOgAAHUwAADqYAAAOpgAABdwnLpRPAAAADNQTFRF
   -1    31 AAAA/gAD/xAT/x8h/y4v/01O/2Rk/319/5OU/5ub/6+w/8LC/9HQ/9zd/ubm//Pz////JXIqAwAA
   -1    32 AAF0Uk5TAEDm2GYAAAABYktHRACIBR1IAAAACXBIWXMAAA7EAAAOxAGVKw4bAAAAB3RJTUUH6QUe
   -1    33 DTAymvC7FgAAAWZJREFUWMPtls2ahSAIho+l+Q/d/9XOaJnlhDm6OYu+XQnvowjI5/Pq1bdoZp2a
   -1    34 N/+JdWsa9I8EzoY0xw1wBx1yPG4hYPTaJR18I8D0AcwFACpLGwfZzqrrEt4DXBEd6ZKdKiOvsAXA
   -1    35 WLJTf1YENAGYRALAFiQBE+c5s/QZwIXIOWNJgFxXBJMs/QngYqTZYUYDguFy+joBjg/xAFhhPweU
   -1    36 gHRSfAAkL1sCYAc87SB9KmIHyyMA81kvANlwC5v2NoX5NIjo5dmqDhAl4G+G1QHLcQ0lgJtaMR0A
   -1    37 SQIUDAKOMusHMOE7gqjtr9SeoQL/f41bHvj9v3kCQM64C2DrpFuGVQGWSOW0ELZWBUiimFoBqcEB
   -1    38 dYSpDgBBNRTgOTj3gFAyty3NXlZsS1eOIbxNpEoetLV17gcfFj/0tAmD/Y+rNhaI13noeR8aMEJ9
   -1    39 zrZnxLFzHHGGh6yxMY+FQXHUf3zUffXqC/QDnptJNAYwk4oAAAAASUVORK5CYII=
   -1    40 """.strip())
   -1    41 
   -1    42 
   -1    43 def dump_pkpass(files: dict[str, bytes]) -> bytes:
   -1    44     # https://developer.apple.com/documentation/walletpasses
   -1    45     # https://file-extensions.com/docs/pkpass
   -1    46 
   -1    47     buf = io.BytesIO()
   -1    48     manifest = {}
   -1    49 
   -1    50     with zipfile.ZipFile(buf, 'w') as zfh:
   -1    51         for path, content in files.items():
   -1    52             with zfh.open(path, 'w') as fh:
   -1    53                 fh.write(content)
   -1    54             manifest[path] = hashlib.sha1(content).hexdigest()
   -1    55 
   -1    56         manifest_bytes = json.dumps(manifest).encode('utf-8')
   -1    57         with zfh.open('manifest.json', 'w') as fh:
   -1    58             fh.write(manifest_bytes)
   -1    59 
   -1    60     return buf.getvalue()
   -1    61 
   -1    62 
   -1    63 def pdf_iter_text_lines(pdf):
   -1    64     for i in range(len(pdf)):
   -1    65         text = pdf.get_page_text(i)
   -1    66         yield from text.split('\n')
   -1    67 
   -1    68 
   -1    69 def extract_barcodes(pdf):
   -1    70     barcodes = []
   -1    71     for i in range(len(pdf)):
   -1    72         for xref in pdf.get_page_images(i):
   -1    73             img_data = pdf.extract_image(xref[0])
   -1    74             arr = numpy.frombuffer(img_data['image'], numpy.uint8)
   -1    75             img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
   -1    76             results = zxingcpp.read_barcodes(img, formats=BARCODE_FORMATS)
   -1    77             for result in results:
   -1    78                 barcodes.append((result.bytes, BARCODES[result.format]))
   -1    79     return barcodes
   -1    80 
   -1    81 
   -1    82 def parse_leg_dt(datestr, timestr, prefix):
   -1    83     tz = ZoneInfo('Europe/Berlin')
   -1    84     now = datetime.datetime.now(tz=tz)
   -1    85     year = now.year
   -1    86 
   -1    87     f = f'%Y %d.%m. {prefix} %H:%M'
   -1    88     s = f'{year} {datestr} {timestr}'
   -1    89     dt = datetime.datetime.strptime(s, f).astimezone(tz)
   -1    90     if dt < now:
   -1    91         s = f'{year + 1} {datestr} {timestr}'
   -1    92         dt = datetime.datetime.strptime(s, f).astimezone(tz)
   -1    93     return dt
   -1    94 
   -1    95 
   -1    96 def extract_legs(pdf):
   -1    97     raw = []
   -1    98     started = False
   -1    99     lines = pdf_iter_text_lines(pdf)
   -1   100     for line in lines:
   -1   101         line = line.strip()
   -1   102         if line.startswith('Ihre Reiseverbindung und Reservierung'):
   -1   103             assert next(lines) == 'Halt'
   -1   104             assert next(lines) == 'Datum'
   -1   105             assert next(lines) == 'Zeit'
   -1   106             assert next(lines) == 'Gleis'
   -1   107             assert next(lines) == 'Produkte'
   -1   108             assert next(lines) == 'Reservierung / Hinweise'
   -1   109             started = True
   -1   110         elif started and not line:
   -1   111             break
   -1   112         elif started:
   -1   113             raw.append(line)
   -1   114 
   -1   115     i = 0
   -1   116     legs = []
   -1   117     while True:
   -1   118         legs.append({
   -1   119             'train': raw[i + 8],
   -1   120             'start': {
   -1   121                 'station': raw[i],
   -1   122                 'datetime': parse_leg_dt(raw[i + 2], raw[i + 4], 'ab'),
   -1   123                 'platform': raw[i + 6],
   -1   124             },
   -1   125             'destination': {
   -1   126                 'station': raw[i + 1],
   -1   127                 'datetime': parse_leg_dt(raw[i + 3], raw[i + 5], 'an'),
   -1   128                 'platform': raw[i + 7],
   -1   129             },
   -1   130         })
   -1   131 
   -1   132         if i + 13 >= len(raw):
   -1   133             break
   -1   134         elif raw[i + 13].startswith('ab '):
   -1   135             i += 9
   -1   136         else:
   -1   137             legs[-1]['comment'] = raw[i + 9]
   -1   138             i += 10
   -1   139 
   -1   140     return legs
   -1   141 
   -1   142 
   -1   143 def extract_order_id(pdf):
   -1   144     key = 'Auftragsnummer: '
   -1   145     for line in pdf_iter_text_lines(pdf):
   -1   146         if line.startswith(key):
   -1   147             return line[len(key):]
   -1   148 
   -1   149 
   -1   150 def format_stop(stop, train=None):
   -1   151     t = stop['datetime'].strftime('%H:%M')
   -1   152     s = f'{t} {stop["station"]} #{stop["platform"]}'
   -1   153     if train:
   -1   154         s = f'{s} - {train}'
   -1   155     return s
   -1   156 
   -1   157 
   -1   158 def format_legs(legs):
   -1   159     s = ''
   -1   160     for leg in legs:
   -1   161         s += format_stop(leg['start'], train=leg['train']) + '\n'
   -1   162         s += format_stop(leg['destination']) + '\n'
   -1   163     return s
   -1   164 
   -1   165 
   -1   166 def extract_content(pdf):
   -1   167     # https://developer.apple.com/documentation/walletpasses/pass
   -1   168 
   -1   169     order_id = extract_order_id(pdf)
   -1   170 
   -1   171     legs = extract_legs(pdf)
   -1   172     start = legs[0]['start']['station']
   -1   173     destination = legs[-1]['destination']['station']
   -1   174     date = legs[0]['start']['datetime']
   -1   175 
   -1   176     return {
   -1   177         'formatVersion': 1,
   -1   178         'organizationName': 'Deutsche Bahn AG',
   -1   179         'passTypeIdentifier': 'ticket.ce9e.org',
   -1   180         'teamIdentifier': 'XXXXXXXXXX',
   -1   181         'serialNumber': order_id,
   -1   182         'description': f'{start} → {destination} ({date.date().isoformat()})',
   -1   183         'barcodes': [
   -1   184             {
   -1   185                 'format': _format,
   -1   186                 'message': message.decode('iso-8859-1'),
   -1   187                 'messageEncoding': 'iso-8859-1',
   -1   188             }
   -1   189             for message, _format in extract_barcodes(pdf)
   -1   190         ],
   -1   191         'boardingPass': {
   -1   192             'transitType': 'PKTransitTypeTrain',
   -1   193             'secondaryFields': [
   -1   194                 {
   -1   195                     'key': 'date',
   -1   196                     'label': 'Datum',
   -1   197                     'dateStyle': 'PKDateStyleFull',
   -1   198                     'timeStyle': 'PKDateStyleNone',
   -1   199                     'value': date.isoformat(),
   -1   200                 },
   -1   201                 {
   -1   202                     'key': 'legs',
   -1   203                     'label': 'Reiseplan',
   -1   204                     'value': format_legs(legs),
   -1   205                 },
   -1   206                 {
   -1   207                     'key': 'order-id',
   -1   208                     'label': 'Auftragsnummer',
   -1   209                     'value': order_id,
   -1   210                 },
   -1   211             ],
   -1   212         },
   -1   213     }
   -1   214 
   -1   215 
   -1   216 if __name__ == '__main__':
   -1   217     parser = argparse.ArgumentParser()
   -1   218     parser.add_argument('path')
   -1   219     args = parser.parse_args()
   -1   220 
   -1   221     with open(args.path, 'rb') as fh:
   -1   222         pdf = pymupdf.open(stream=fh.read())
   -1   223     content = extract_content(pdf)
   -1   224 
   -1   225     output_path = args.path.replace('.pdf', '.pkpass')
   -1   226     with open(output_path, 'wb') as fh:
   -1   227         fh.write(dump_pkpass({
   -1   228             'pass.json': json.dumps(content).encode('utf-8'),
   -1   229             'icon.png': ICON,
   -1   230             'logo.png': ICON,
   -1   231         }))
   -1   232 
   -1   233     print(f'written to {output_path}')