oreilly-downloader: download epub books from O'Reilly

commit: 8c20833f9db46fdfc5237adbef70ecb80ab36f5f
parent: 4b074fb4243c69ff8b037dbe2da2092d59a21e30
Author: Tobias Bengfort <tobias.bengfort@posteo.de>
Date: 2025-05-17 16:09

init

Diffstat

A	README.md	31	+++++++++++++++++++++++++++++++
A	oreilly_downloader.py	81	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 112 insertions, 0 deletions

diff --git a/README.md b/README.md

@@ -0,0 +1,31 @@
   -1     1 # O'Reilly epub downloader
   -1     2 
   -1     3 O'Reilly provides all of their books in epub format, but only through their own
   -1     4 reader.
   -1     5 
   -1     6 This script allows you to download all the individual files and assemble them
   -1     7 back into a full epub. This allows you to use other readers, e.g. for
   -1     8 accessibility reasons.
   -1     9 
   -1    10 You need to have a valid JWT to download content. If you do not provide one,
   -1    11 each chapter will be cut short. You can get it by logging in with your browser
   -1    12 and extracting the `orm-jwt` cookie using the developer tools.
   -1    13 
   -1    14 Before any usage, please read the [O'Reilly Terms of
   -1    15 Service](https://learning.oreilly.com/terms/).
   -1    16 
   -1    17 # Usage
   -1    18 
   -1    19 ```
   -1    20 $ pip install aiohttp
   -1    21 $ python3 oreilly_downloader.py 9781491958698 --jwt 'XYZ'
   -1    22 …
   -1    23 created 9781491958698.epub
   -1    24 ```
   -1    25 
   -1    26 # Similar Projects
   -1    27 
   -1    28 -   <https://github.com/lorenzodifuccia/safaribooks> (python)
   -1    29 -   <https://github.com/hurlenko/orly> (rust)
   -1    30 -   <https://github.com/jenni/obooks> (javascript)
   -1    31 -   <https://github.com/rahulvramesh/oreilly-books-grabber> (go)

diff --git a/oreilly_downloader.py b/oreilly_downloader.py

@@ -0,0 +1,81 @@
   -1     1 import argparse
   -1     2 import asyncio
   -1     3 import zipfile
   -1     4 
   -1     5 import aiohttp
   -1     6 
   -1     7 BASE_URL = 'https://learning.oreilly.com'
   -1     8 
   -1     9 CONTAINER = b"""<?xml version="1.0"?>
   -1    10 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
   -1    11     <rootfiles>
   -1    12         <rootfile full-path="EPUB/content.opf" media-type="application/oebps-package+xml"/>
   -1    13     </rootfiles>
   -1    14 </container>
   -1    15 """  # noqa
   -1    16 
   -1    17 
   -1    18 async def check_auth(session):
   -1    19     url = BASE_URL + '/api/v1/user-preferences/'
   -1    20     async with session.get(url, raise_for_status=False) as r:
   -1    21         return r.ok
   -1    22 
   -1    23 
   -1    24 async def fetch_book(book_id, zfh, session):
   -1    25     root_path = f'/api/v2/epubs/urn:orm:book:{book_id}/files/'
   -1    26     b_root_path = root_path.encode('utf-8')
   -1    27 
   -1    28     async def download(url, path):
   -1    29         async with session.get(url) as r:
   -1    30             content = await r.read()
   -1    31             content = content.replace(b_root_path, b'/EPUB/')
   -1    32             with zfh.open(path, 'w') as fh:
   -1    33                 fh.write(content)
   -1    34 
   -1    35     with zfh.open('mimetype', 'w') as fh:
   -1    36         fh.write(b'application/epub+zip\n')
   -1    37 
   -1    38     with zfh.open('META-INF/container.xml', 'w') as fh:
   -1    39         fh.write(CONTAINER)
   -1    40 
   -1    41     url = BASE_URL + root_path
   -1    42     while url:
   -1    43         print(f'fetching {url}')
   -1    44         async with session.get(url) as r:
   -1    45             data = await r.json()
   -1    46 
   -1    47         await asyncio.gather(*[
   -1    48             download(result['url'], f'EPUB/{result["full_path"]}')
   -1    49             for result in data.get('results', [])
   -1    50         ])
   -1    51 
   -1    52         url = data.get('next')
   -1    53 
   -1    54 
   -1    55 async def amain():
   -1    56     parser = argparse.ArgumentParser()
   -1    57     parser.add_argument('book_id')
   -1    58     parser.add_argument('--jwt')
   -1    59     args = parser.parse_args()
   -1    60 
   -1    61     filename = f'{args.book_id}.epub'
   -1    62 
   -1    63     with zipfile.ZipFile(filename, 'w') as zfh:
   -1    64         async with aiohttp.ClientSession(
   -1    65             raise_for_status=True,
   -1    66             cookies={'orm-jwt': args.jwt},
   -1    67         ) as session:
   -1    68             if not args.jwt:
   -1    69                 print('No JWT provided. Continuing without…')
   -1    70             elif await check_auth(session):
   -1    71                 print('Authentication successful.')
   -1    72             else:
   -1    73                 print('Authentication failed. Continuing without…')
   -1    74 
   -1    75             await fetch_book(args.book_id, zfh, session)
   -1    76 
   -1    77     print(f'created {filename}')
   -1    78 
   -1    79 
   -1    80 if __name__ == '__main__':
   -1    81     asyncio.run(amain())