diff options
Diffstat (limited to 'database')
-rw-r--r-- | database/__init__.py | 1 | ||||
-rw-r--r-- | database/database.py | 152 |
2 files changed, 0 insertions, 153 deletions
diff --git a/database/__init__.py b/database/__init__.py deleted file mode 100644 index ef3f969..0000000 --- a/database/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .database import Database diff --git a/database/database.py b/database/database.py deleted file mode 100644 index 04902f1..0000000 --- a/database/database.py +++ /dev/null @@ -1,152 +0,0 @@ -import sqlite3 -import logging -import os.path -import retronews -import threading - -from typing import Optional - - -class Database: - SCHEMA = 6 - - def __init__(self): - self.logger = logging.getLogger(self.__class__.__name__) - - file = os.path.join(os.path.dirname(__file__), '..', 'mdf-retrobase.sqlite3') - self.sqlite = sqlite3.connect(file, check_same_thread=False) - self.lock = threading.Lock() - - sqlite_version = self._get_sqlite_version() - self.logger.debug(f'SQLite version: {sqlite_version}') - - schema_version = self.schema_get_version() - self.logger.debug(f'Schema version: {schema_version}') - - self.schema_init(schema_version) - self.schema_set_version(self.SCHEMA) - - def __del__(self): - if self.sqlite: - self.sqlite.commit() - self.sqlite.close() - - def _get_sqlite_version(self) -> str: - cursor = self.sqlite.cursor() - cursor.execute("SELECT sqlite_version()") - return cursor.fetchone()[0] - - def schema_get_version(self) -> int: - cursor = self.sqlite.execute('PRAGMA user_version') - return int(cursor.fetchone()[0]) - - def schema_set_version(self, v) -> None: - self.sqlite.execute('PRAGMA user_version={:d}'.format(v)) - self.logger.info(f'Schema set to {v}') - - def cursor(self) -> sqlite3.Cursor: - return self.sqlite.cursor() - - def commit(self) -> None: - return self.sqlite.commit() - - def schema_init(self, version: int) -> None: - cursor = self.cursor() - - if version < 1: - # timestamps - cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_links ( - issue_date TEXT PRIMARY KEY, - url TEXT NOT NULL, - pages INTEGER NOT NULL - )""") - - if version < 2: - cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_pages ( - collection_id INTEGER NOT NULL, - doc_id INTEGER NOT NULL, - page INTEGER NOT NULL, - height INTEGER NOT NULL, - width INTEGER NOT NULL, - dpi INTEGER NOT NULL - )""") - cursor.execute("""CREATE UNIQUE INDEX mdf_pages_idx ON mdf_pages (collection_id, doc_id, page)""") - - if version < 3: - cursor.execute("ALTER TABLE mdf_pages ADD fail INTEGER NOT NULL") - - if version < 4: - cursor.execute("""CREATE INDEX mdf_pages_fail_idx ON mdf_pages (fail)""") - - if version < 5: - for col in ('collection_id', 'doc_id'): - cursor.execute(f"ALTER TABLE mdf_links ADD {col} INTEGER NOT NULL DEFAULT '0'") - cursor.execute("CREATE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)") - - if version < 6: - cursor.execute("DROP INDEX mdf_links_col_doc_idx") - cursor.execute("CREATE UNIQUE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)") - - self.commit() - - def add_link(self, issue_date: str, url: str, pages: int): - with self.lock: - self.cursor().execute("REPLACE INTO mdf_links (issue_date, url, pages) VALUES (?, ?, ?)", - (issue_date, url, str(pages))) - self.commit() - - def add_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int): - with self.lock: - self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, ?, ?, ?, 0)", - (collection_id, doc_id, page, width, height, dpi)) - self.commit() - - def update_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int): - with self.lock: - self.cursor().execute("UPDATE mdf_pages SET width=?, height=?, dpi=?, fail=0 WHERE collection_id=? AND doc_id=? AND page=?", - (width, height, dpi, collection_id, doc_id, page)) - self.commit() - - def add_page_failed(self, collection_id, doc_id, page): - with self.lock: - self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, 0, 0, 0, 1)", - (collection_id, doc_id, page)) - self.commit() - - def get_existing_pages(self, fail=0): - cur = self.cursor() - cur.execute("SELECT collection_id, doc_id, page FROM mdf_pages WHERE fail=?", (fail,)) - return cur.fetchall() - - def get_documents(self, range: Optional[tuple[str, str]] = None): - cur = self.cursor() - docs = [] - - sql = "SELECT issue_date, url, pages FROM mdf_links" - if range: - sql += f" WHERE issue_date BETWEEN '{range[0]}' AND '{range[1]}'" - cur.execute(sql) - for issue_date, url, pages in cur.fetchall(): - pub_date, collection_id, doc_id = retronews.parse_url(url) - docs.append(dict( - collection_id=collection_id, - doc_id=doc_id, - pages=pages - )) - - return docs - - def get_doc_pages(self, collection_id, doc_id): - cur = self.cursor() - cur.execute("SELECT page, width, height, dpi FROM mdf_pages WHERE collection_id=? AND doc_id=?", - (collection_id, doc_id)) - return cur.fetchall() - - def fix_documents(self): - cur = self.cursor() - cur.execute("SELECT issue_date, url FROM mdf_links") - for issue_date, url in cur.fetchall(): - pub_date, cid, did = retronews.parse_url(url) - cur.execute("UPDATE mdf_links SET collection_id=?, doc_id=? WHERE issue_date=?", - (cid, did, issue_date)) - self.commit() |