summaryrefslogtreecommitdiff
path: root/mdf/database/database.py
blob: fd08e38cfbebe5a24ed2c27aead34bdc4bc88c20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import sqlite3
import logging
import os.path
from ..retronews import retronews
import threading

from typing import Optional


class Database:
    SCHEMA = 6

    def __init__(self):
        self.logger = logging.getLogger(self.__class__.__name__)

        file = os.path.join(os.path.dirname(__file__), '..', '..', 'mdf-retrobase.sqlite3')
        self.sqlite = sqlite3.connect(file, check_same_thread=False)
        self.lock = threading.Lock()

        sqlite_version = self._get_sqlite_version()
        self.logger.debug(f'SQLite version: {sqlite_version}')

        schema_version = self.schema_get_version()
        self.logger.debug(f'Schema version: {schema_version}')

        self.schema_init(schema_version)
        self.schema_set_version(self.SCHEMA)

    def __del__(self):
        if self.sqlite:
            self.sqlite.commit()
            self.sqlite.close()

    def _get_sqlite_version(self) -> str:
        cursor = self.sqlite.cursor()
        cursor.execute("SELECT sqlite_version()")
        return cursor.fetchone()[0]

    def schema_get_version(self) -> int:
        cursor = self.sqlite.execute('PRAGMA user_version')
        return int(cursor.fetchone()[0])

    def schema_set_version(self, v) -> None:
        self.sqlite.execute('PRAGMA user_version={:d}'.format(v))
        self.logger.info(f'Schema set to {v}')

    def cursor(self) -> sqlite3.Cursor:
        return self.sqlite.cursor()

    def commit(self) -> None:
        return self.sqlite.commit()

    def schema_init(self, version: int) -> None:
        cursor = self.cursor()

        if version < 1:
            # timestamps
            cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_links (
                    issue_date TEXT PRIMARY KEY,
                    url TEXT NOT NULL,
                    pages INTEGER NOT NULL
                )""")

        if version < 2:
            cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_pages (
                collection_id INTEGER NOT NULL,
                doc_id INTEGER NOT NULL,
                page INTEGER NOT NULL,
                height INTEGER NOT NULL,
                width INTEGER NOT NULL,
                dpi INTEGER NOT NULL
            )""")
            cursor.execute("""CREATE UNIQUE INDEX mdf_pages_idx ON mdf_pages (collection_id, doc_id, page)""")

        if version < 3:
            cursor.execute("ALTER TABLE mdf_pages ADD fail INTEGER NOT NULL")

        if version < 4:
            cursor.execute("""CREATE INDEX mdf_pages_fail_idx ON mdf_pages (fail)""")

        if version < 5:
            for col in ('collection_id', 'doc_id'):
                cursor.execute(f"ALTER TABLE mdf_links ADD {col} INTEGER NOT NULL DEFAULT '0'")
            cursor.execute("CREATE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")

        if version < 6:
            cursor.execute("DROP INDEX mdf_links_col_doc_idx")
            cursor.execute("CREATE UNIQUE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")

        self.commit()

    def add_link(self, issue_date: str, url: str, pages: int):
        with self.lock:
            self.cursor().execute("REPLACE INTO mdf_links (issue_date, url, pages) VALUES (?, ?, ?)",
                                  (issue_date, url, str(pages)))
            self.commit()

    def add_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
        with self.lock:
            self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, ?, ?, ?, 0)",
                                  (collection_id, doc_id, page, width, height, dpi))
            self.commit()

    def update_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
        with self.lock:
            self.cursor().execute("UPDATE mdf_pages SET width=?, height=?, dpi=?, fail=0 WHERE collection_id=? AND doc_id=? AND page=?",
                                  (width, height, dpi, collection_id, doc_id, page))
            self.commit()

    def add_page_failed(self, collection_id, doc_id, page):
        with self.lock:
            self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, 0, 0, 0, 1)",
                (collection_id, doc_id, page))
            self.commit()

    def get_existing_pages(self, fail=0):
        cur = self.cursor()
        cur.execute("SELECT collection_id, doc_id, page FROM mdf_pages WHERE fail=?", (fail,))
        return cur.fetchall()

    def get_documents(self, range: Optional[tuple[str, str]] = None):
        cur = self.cursor()
        docs = []

        sql = "SELECT issue_date, url, pages FROM mdf_links"
        if range:
            sql += f" WHERE issue_date BETWEEN '{range[0]}' AND '{range[1]}'"
        sql += " ORDER BY issue_date"
        cur.execute(sql)
        for issue_date, url, pages in cur.fetchall():
            pub_date, collection_id, doc_id = retronews.parse_url(url)
            docs.append(dict(
                url=url,
                collection_id=collection_id,
                doc_id=doc_id,
                pages=pages
            ))

        return docs

    def get_doc_pages(self, collection_id, doc_id):
        cur = self.cursor()
        cur.execute("SELECT page, width, height, dpi FROM mdf_pages WHERE collection_id=? AND doc_id=?",
                    (collection_id, doc_id))
        return cur.fetchall()

    def fix_documents(self):
        cur = self.cursor()
        cur.execute("SELECT issue_date, url FROM mdf_links")
        for issue_date, url in cur.fetchall():
            pub_date, cid, did = retronews.parse_url(url)
            cur.execute("UPDATE mdf_links SET collection_id=?, doc_id=? WHERE issue_date=?",
                        (cid, did, issue_date))
        self.commit()