From d36ecb0266db608dffbc0308e9b2666c93696672 Mon Sep 17 00:00:00 2001 From: Evgeny Sorokin Date: Wed, 26 Jun 2024 00:42:46 +0300 Subject: initial --- main.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100755 main.py (limited to 'main.py') diff --git a/main.py b/main.py new file mode 100755 index 0000000..db0c80e --- /dev/null +++ b/main.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +import os +import requests +import tempfile +import re + +from argparse import ArgumentParser +from PIL import Image + + +def extract_uuid(url): + uuid_pattern = re.compile(r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}') + match = uuid_pattern.search(url) + return match.group(0) if match else None + + +def fetch_manifest(uuid): + url = f'https://iiif.bodleian.ox.ac.uk/iiif/manifest/{uuid}.json' + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + print(f"Error fetching JSON data: {e}") + return None + + +def calculate_chunks(image_width, image_height, chunk_size=4000): + chunks = [] + for y in range(0, image_height, chunk_size): + for x in range(0, image_width, chunk_size): + width = min(chunk_size, image_width - x) + height = min(chunk_size, image_height - y) + chunks.append((x, y, width, height)) + return chunks + + +def generate_url(uuid, x, y, width, height): + return f'https://iiif.bodleian.ox.ac.uk/iiif/image/{uuid}/{x},{y},{width},{height}/{width},{height}/0/default.jpg' + + +def download_chunk(url, save_path): + response = requests.get(url, stream=True) + if response.status_code == 200: + with open(save_path, 'wb') as f: + for chunk in response.iter_content(1024): + f.write(chunk) + else: + raise Exception(f"Failed to download chunk: {url}") + + +def merge_chunks(image_width, image_height, chunk_paths): + full_image = Image.new('RGB', (image_width, image_height)) + for (x, y, width, height), path in chunk_paths: + chunk = Image.open(path) + full_image.paste(chunk, (x, y)) + return full_image + + +def download_and_merge_image(image_width, image_height, image_uuid, output_file): + chunks = calculate_chunks(image_width, image_height) + chunk_paths = [] + + with tempfile.TemporaryDirectory() as temp_dir: + for i, (x, y, width, height) in enumerate(chunks): + url = generate_url(image_uuid, x, y, width, height) + chunk_path = os.path.join(temp_dir, f"chunk_{i}.jpg") + download_chunk(url, chunk_path) + chunk_paths.append(((x, y, width, height), chunk_path)) + + full_image = merge_chunks(image_width, image_height, chunk_paths) + full_image.save(output_file) + + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument('--uuid', type=str, required=True) + parser.add_argument('--output-dir', type=str, required=True) + args = parser.parse_args() + + m = fetch_manifest(args.uuid) + page = 0 + for c in m['sequences'][0]['canvases']: + download_and_merge_image(c['width'], c['height'], extract_uuid(c['@id']), os.path.join(args.output_dir, f'{page}.jpg')) + print(f'{page} done') + page += 1 -- cgit v1.2.3