- commit
- 8c20833f9db46fdfc5237adbef70ecb80ab36f5f
- parent
- 4b074fb4243c69ff8b037dbe2da2092d59a21e30
- Author
- Tobias Bengfort <tobias.bengfort@posteo.de>
- Date
- 2025-05-17 16:09
init
Diffstat
| A | README.md | 31 | +++++++++++++++++++++++++++++++ |
| A | oreilly_downloader.py | 81 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 112 insertions, 0 deletions
diff --git a/README.md b/README.md
@@ -0,0 +1,31 @@ -1 1 # O'Reilly epub downloader -1 2 -1 3 O'Reilly provides all of their books in epub format, but only through their own -1 4 reader. -1 5 -1 6 This script allows you to download all the individual files and assemble them -1 7 back into a full epub. This allows you to use other readers, e.g. for -1 8 accessibility reasons. -1 9 -1 10 You need to have a valid JWT to download content. If you do not provide one, -1 11 each chapter will be cut short. You can get it by logging in with your browser -1 12 and extracting the `orm-jwt` cookie using the developer tools. -1 13 -1 14 Before any usage, please read the [O'Reilly Terms of -1 15 Service](https://learning.oreilly.com/terms/). -1 16 -1 17 # Usage -1 18 -1 19 ``` -1 20 $ pip install aiohttp -1 21 $ python3 oreilly_downloader.py 9781491958698 --jwt 'XYZ' -1 22 … -1 23 created 9781491958698.epub -1 24 ``` -1 25 -1 26 # Similar Projects -1 27 -1 28 - <https://github.com/lorenzodifuccia/safaribooks> (python) -1 29 - <https://github.com/hurlenko/orly> (rust) -1 30 - <https://github.com/jenni/obooks> (javascript) -1 31 - <https://github.com/rahulvramesh/oreilly-books-grabber> (go)
diff --git a/oreilly_downloader.py b/oreilly_downloader.py
@@ -0,0 +1,81 @@
-1 1 import argparse
-1 2 import asyncio
-1 3 import zipfile
-1 4
-1 5 import aiohttp
-1 6
-1 7 BASE_URL = 'https://learning.oreilly.com'
-1 8
-1 9 CONTAINER = b"""<?xml version="1.0"?>
-1 10 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
-1 11 <rootfiles>
-1 12 <rootfile full-path="EPUB/content.opf" media-type="application/oebps-package+xml"/>
-1 13 </rootfiles>
-1 14 </container>
-1 15 """ # noqa
-1 16
-1 17
-1 18 async def check_auth(session):
-1 19 url = BASE_URL + '/api/v1/user-preferences/'
-1 20 async with session.get(url, raise_for_status=False) as r:
-1 21 return r.ok
-1 22
-1 23
-1 24 async def fetch_book(book_id, zfh, session):
-1 25 root_path = f'/api/v2/epubs/urn:orm:book:{book_id}/files/'
-1 26 b_root_path = root_path.encode('utf-8')
-1 27
-1 28 async def download(url, path):
-1 29 async with session.get(url) as r:
-1 30 content = await r.read()
-1 31 content = content.replace(b_root_path, b'/EPUB/')
-1 32 with zfh.open(path, 'w') as fh:
-1 33 fh.write(content)
-1 34
-1 35 with zfh.open('mimetype', 'w') as fh:
-1 36 fh.write(b'application/epub+zip\n')
-1 37
-1 38 with zfh.open('META-INF/container.xml', 'w') as fh:
-1 39 fh.write(CONTAINER)
-1 40
-1 41 url = BASE_URL + root_path
-1 42 while url:
-1 43 print(f'fetching {url}')
-1 44 async with session.get(url) as r:
-1 45 data = await r.json()
-1 46
-1 47 await asyncio.gather(*[
-1 48 download(result['url'], f'EPUB/{result["full_path"]}')
-1 49 for result in data.get('results', [])
-1 50 ])
-1 51
-1 52 url = data.get('next')
-1 53
-1 54
-1 55 async def amain():
-1 56 parser = argparse.ArgumentParser()
-1 57 parser.add_argument('book_id')
-1 58 parser.add_argument('--jwt')
-1 59 args = parser.parse_args()
-1 60
-1 61 filename = f'{args.book_id}.epub'
-1 62
-1 63 with zipfile.ZipFile(filename, 'w') as zfh:
-1 64 async with aiohttp.ClientSession(
-1 65 raise_for_status=True,
-1 66 cookies={'orm-jwt': args.jwt},
-1 67 ) as session:
-1 68 if not args.jwt:
-1 69 print('No JWT provided. Continuing without…')
-1 70 elif await check_auth(session):
-1 71 print('Authentication successful.')
-1 72 else:
-1 73 print('Authentication failed. Continuing without…')
-1 74
-1 75 await fetch_book(args.book_id, zfh, session)
-1 76
-1 77 print(f'created {filename}')
-1 78
-1 79
-1 80 if __name__ == '__main__':
-1 81 asyncio.run(amain())