summaryrefslogtreecommitdiff
path: root/database
diff options
context:
space:
mode:
Diffstat (limited to 'database')
-rw-r--r--database/__init__.py1
-rw-r--r--database/database.py152
2 files changed, 153 insertions, 0 deletions
diff --git a/database/__init__.py b/database/__init__.py
new file mode 100644
index 0000000..ef3f969
--- /dev/null
+++ b/database/__init__.py
@@ -0,0 +1 @@
+from .database import Database
diff --git a/database/database.py b/database/database.py
new file mode 100644
index 0000000..04902f1
--- /dev/null
+++ b/database/database.py
@@ -0,0 +1,152 @@
+import sqlite3
+import logging
+import os.path
+import retronews
+import threading
+
+from typing import Optional
+
+
+class Database:
+ SCHEMA = 6
+
+ def __init__(self):
+ self.logger = logging.getLogger(self.__class__.__name__)
+
+ file = os.path.join(os.path.dirname(__file__), '..', 'mdf-retrobase.sqlite3')
+ self.sqlite = sqlite3.connect(file, check_same_thread=False)
+ self.lock = threading.Lock()
+
+ sqlite_version = self._get_sqlite_version()
+ self.logger.debug(f'SQLite version: {sqlite_version}')
+
+ schema_version = self.schema_get_version()
+ self.logger.debug(f'Schema version: {schema_version}')
+
+ self.schema_init(schema_version)
+ self.schema_set_version(self.SCHEMA)
+
+ def __del__(self):
+ if self.sqlite:
+ self.sqlite.commit()
+ self.sqlite.close()
+
+ def _get_sqlite_version(self) -> str:
+ cursor = self.sqlite.cursor()
+ cursor.execute("SELECT sqlite_version()")
+ return cursor.fetchone()[0]
+
+ def schema_get_version(self) -> int:
+ cursor = self.sqlite.execute('PRAGMA user_version')
+ return int(cursor.fetchone()[0])
+
+ def schema_set_version(self, v) -> None:
+ self.sqlite.execute('PRAGMA user_version={:d}'.format(v))
+ self.logger.info(f'Schema set to {v}')
+
+ def cursor(self) -> sqlite3.Cursor:
+ return self.sqlite.cursor()
+
+ def commit(self) -> None:
+ return self.sqlite.commit()
+
+ def schema_init(self, version: int) -> None:
+ cursor = self.cursor()
+
+ if version < 1:
+ # timestamps
+ cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_links (
+ issue_date TEXT PRIMARY KEY,
+ url TEXT NOT NULL,
+ pages INTEGER NOT NULL
+ )""")
+
+ if version < 2:
+ cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_pages (
+ collection_id INTEGER NOT NULL,
+ doc_id INTEGER NOT NULL,
+ page INTEGER NOT NULL,
+ height INTEGER NOT NULL,
+ width INTEGER NOT NULL,
+ dpi INTEGER NOT NULL
+ )""")
+ cursor.execute("""CREATE UNIQUE INDEX mdf_pages_idx ON mdf_pages (collection_id, doc_id, page)""")
+
+ if version < 3:
+ cursor.execute("ALTER TABLE mdf_pages ADD fail INTEGER NOT NULL")
+
+ if version < 4:
+ cursor.execute("""CREATE INDEX mdf_pages_fail_idx ON mdf_pages (fail)""")
+
+ if version < 5:
+ for col in ('collection_id', 'doc_id'):
+ cursor.execute(f"ALTER TABLE mdf_links ADD {col} INTEGER NOT NULL DEFAULT '0'")
+ cursor.execute("CREATE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")
+
+ if version < 6:
+ cursor.execute("DROP INDEX mdf_links_col_doc_idx")
+ cursor.execute("CREATE UNIQUE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")
+
+ self.commit()
+
+ def add_link(self, issue_date: str, url: str, pages: int):
+ with self.lock:
+ self.cursor().execute("REPLACE INTO mdf_links (issue_date, url, pages) VALUES (?, ?, ?)",
+ (issue_date, url, str(pages)))
+ self.commit()
+
+ def add_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
+ with self.lock:
+ self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, ?, ?, ?, 0)",
+ (collection_id, doc_id, page, width, height, dpi))
+ self.commit()
+
+ def update_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
+ with self.lock:
+ self.cursor().execute("UPDATE mdf_pages SET width=?, height=?, dpi=?, fail=0 WHERE collection_id=? AND doc_id=? AND page=?",
+ (width, height, dpi, collection_id, doc_id, page))
+ self.commit()
+
+ def add_page_failed(self, collection_id, doc_id, page):
+ with self.lock:
+ self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, 0, 0, 0, 1)",
+ (collection_id, doc_id, page))
+ self.commit()
+
+ def get_existing_pages(self, fail=0):
+ cur = self.cursor()
+ cur.execute("SELECT collection_id, doc_id, page FROM mdf_pages WHERE fail=?", (fail,))
+ return cur.fetchall()
+
+ def get_documents(self, range: Optional[tuple[str, str]] = None):
+ cur = self.cursor()
+ docs = []
+
+ sql = "SELECT issue_date, url, pages FROM mdf_links"
+ if range:
+ sql += f" WHERE issue_date BETWEEN '{range[0]}' AND '{range[1]}'"
+ cur.execute(sql)
+ for issue_date, url, pages in cur.fetchall():
+ pub_date, collection_id, doc_id = retronews.parse_url(url)
+ docs.append(dict(
+ collection_id=collection_id,
+ doc_id=doc_id,
+ pages=pages
+ ))
+
+ return docs
+
+ def get_doc_pages(self, collection_id, doc_id):
+ cur = self.cursor()
+ cur.execute("SELECT page, width, height, dpi FROM mdf_pages WHERE collection_id=? AND doc_id=?",
+ (collection_id, doc_id))
+ return cur.fetchall()
+
+ def fix_documents(self):
+ cur = self.cursor()
+ cur.execute("SELECT issue_date, url FROM mdf_links")
+ for issue_date, url in cur.fetchall():
+ pub_date, cid, did = retronews.parse_url(url)
+ cur.execute("UPDATE mdf_links SET collection_id=?, doc_id=? WHERE issue_date=?",
+ (cid, did, issue_date))
+ self.commit()