#!/usr/bin/env python3 import os import requests import tempfile import re from argparse import ArgumentParser from PIL import Image def extract_uuid(url): uuid_pattern = re.compile(r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}') match = uuid_pattern.search(url) return match.group(0) if match else None def fetch_manifest(uuid): url = f'https://iiif.bodleian.ox.ac.uk/iiif/manifest/{uuid}.json' try: response = requests.get(url) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: print(f"Error fetching JSON data: {e}") return None def calculate_chunks(image_width, image_height, chunk_size=4000): chunks = [] for y in range(0, image_height, chunk_size): for x in range(0, image_width, chunk_size): width = min(chunk_size, image_width - x) height = min(chunk_size, image_height - y) chunks.append((x, y, width, height)) return chunks def generate_url(uuid, x, y, width, height): return f'https://iiif.bodleian.ox.ac.uk/iiif/image/{uuid}/{x},{y},{width},{height}/{width},{height}/0/default.jpg' def download_chunk(url, save_path): response = requests.get(url, stream=True) if response.status_code == 200: with open(save_path, 'wb') as f: for chunk in response.iter_content(1024): f.write(chunk) else: raise Exception(f"Failed to download chunk: {url}") def merge_chunks(image_width, image_height, chunk_paths): full_image = Image.new('RGB', (image_width, image_height)) for (x, y, width, height), path in chunk_paths: chunk = Image.open(path) full_image.paste(chunk, (x, y)) return full_image def download_and_merge_image(image_width, image_height, image_uuid, output_file): chunks = calculate_chunks(image_width, image_height) chunk_paths = [] with tempfile.TemporaryDirectory() as temp_dir: for i, (x, y, width, height) in enumerate(chunks): url = generate_url(image_uuid, x, y, width, height) chunk_path = os.path.join(temp_dir, f"chunk_{i}.jpg") download_chunk(url, chunk_path) chunk_paths.append(((x, y, width, height), chunk_path)) full_image = merge_chunks(image_width, image_height, chunk_paths) full_image.save(output_file, subsampling=0, quality=99) if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('--uuid', type=str, required=True) parser.add_argument('--output-dir', type=str, required=True) parser.add_argument('--skip-pages', type=int, default=0, help='number of pages to skip') args = parser.parse_args() m = fetch_manifest(args.uuid) page = 0 for c in m['sequences'][0]['canvases']: if args.skip_pages != 0 and page < args.skip_pages: page += 1 continue download_and_merge_image(c['width'], c['height'], extract_uuid(c['@id']), os.path.join(args.output_dir, f'{page+1}.jpg')) print(f'{page} done') page += 1