summaryrefslogtreecommitdiff
path: root/retronews/retronews.py
blob: 9e80c58416a4bac458fbcec4ab5b4922781175da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import re
import requests

MONTHS = dict(
    jan=1,
    feb=2,
    mar=3,
    apr=4,
    may=5,
    jun=6,
    jul=7,
    juillet=7,
    aout=8,
    aug=8,
    sep=9,
    oct=10,
    nov=11,
    novembre=11,  # https://www.retronews.fr/journal/mercure-de-france/15-novembre-1905/118/2617647/1
    dec=12
)


def convert_date(s: str) -> tuple[str, str, str]:
    m = re.match(r'^(\d{2})-(.*?)-(\d{4})$', s).groups()
    year = m[2]
    month = '%02d' % MONTHS[m[1]]
    day = m[0]
    return year, month, day


def parse_url(url: str) -> tuple:
    return re.search(r'/(?:[\-\d\w]+)/([^/]+)/(\d+)/(\d+)/', url).groups()


def doc_info(collection_id, doc_id):
    r = requests.get(f'https://pv5web.retronews.fr/api/document/{collection_id}/{doc_id}')
    return r.json()


def page_info(collection_id, doc_id, page):
    r = requests.get(f'https://pv5web.retronews.fr/api/document/{collection_id}/{doc_id}/page/{page}/')
    return r.json()


def thumbnail_url(collection_id, doc_id, page) -> str:
    return f'https://pv5web.retronews.fr/api/document/{collection_id}/{doc_id}/page/{page}/thumbnail'


def tile_url(collection_id, doc_id, page, v_tile, h_tile) -> str:
    return f'https://pv5web.retronews.fr/api/document/{collection_id}/{doc_id}/page/{page}/tile/{h_tile}/{v_tile}/0'