summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xmain.py86
1 files changed, 86 insertions, 0 deletions
diff --git a/main.py b/main.py
new file mode 100755
index 0000000..db0c80e
--- /dev/null
+++ b/main.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+import os
+import requests
+import tempfile
+import re
+
+from argparse import ArgumentParser
+from PIL import Image
+
+
+def extract_uuid(url):
+ uuid_pattern = re.compile(r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}')
+ match = uuid_pattern.search(url)
+ return match.group(0) if match else None
+
+
+def fetch_manifest(uuid):
+ url = f'https://iiif.bodleian.ox.ac.uk/iiif/manifest/{uuid}.json'
+ try:
+ response = requests.get(url)
+ response.raise_for_status()
+ return response.json()
+ except requests.exceptions.RequestException as e:
+ print(f"Error fetching JSON data: {e}")
+ return None
+
+
+def calculate_chunks(image_width, image_height, chunk_size=4000):
+ chunks = []
+ for y in range(0, image_height, chunk_size):
+ for x in range(0, image_width, chunk_size):
+ width = min(chunk_size, image_width - x)
+ height = min(chunk_size, image_height - y)
+ chunks.append((x, y, width, height))
+ return chunks
+
+
+def generate_url(uuid, x, y, width, height):
+ return f'https://iiif.bodleian.ox.ac.uk/iiif/image/{uuid}/{x},{y},{width},{height}/{width},{height}/0/default.jpg'
+
+
+def download_chunk(url, save_path):
+ response = requests.get(url, stream=True)
+ if response.status_code == 200:
+ with open(save_path, 'wb') as f:
+ for chunk in response.iter_content(1024):
+ f.write(chunk)
+ else:
+ raise Exception(f"Failed to download chunk: {url}")
+
+
+def merge_chunks(image_width, image_height, chunk_paths):
+ full_image = Image.new('RGB', (image_width, image_height))
+ for (x, y, width, height), path in chunk_paths:
+ chunk = Image.open(path)
+ full_image.paste(chunk, (x, y))
+ return full_image
+
+
+def download_and_merge_image(image_width, image_height, image_uuid, output_file):
+ chunks = calculate_chunks(image_width, image_height)
+ chunk_paths = []
+
+ with tempfile.TemporaryDirectory() as temp_dir:
+ for i, (x, y, width, height) in enumerate(chunks):
+ url = generate_url(image_uuid, x, y, width, height)
+ chunk_path = os.path.join(temp_dir, f"chunk_{i}.jpg")
+ download_chunk(url, chunk_path)
+ chunk_paths.append(((x, y, width, height), chunk_path))
+
+ full_image = merge_chunks(image_width, image_height, chunk_paths)
+ full_image.save(output_file)
+
+
+if __name__ == '__main__':
+ parser = ArgumentParser()
+ parser.add_argument('--uuid', type=str, required=True)
+ parser.add_argument('--output-dir', type=str, required=True)
+ args = parser.parse_args()
+
+ m = fetch_manifest(args.uuid)
+ page = 0
+ for c in m['sequences'][0]['canvases']:
+ download_and_merge_image(c['width'], c['height'], extract_uuid(c['@id']), os.path.join(args.output_dir, f'{page}.jpg'))
+ print(f'{page} done')
+ page += 1