summaryrefslogtreecommitdiff
path: root/database
diff options
context:
space:
mode:
Diffstat (limited to 'database')
-rw-r--r--database/__init__.py1
-rw-r--r--database/database.py152
2 files changed, 0 insertions, 153 deletions
diff --git a/database/__init__.py b/database/__init__.py
deleted file mode 100644
index ef3f969..0000000
--- a/database/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .database import Database
diff --git a/database/database.py b/database/database.py
deleted file mode 100644
index 04902f1..0000000
--- a/database/database.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import sqlite3
-import logging
-import os.path
-import retronews
-import threading
-
-from typing import Optional
-
-
-class Database:
- SCHEMA = 6
-
- def __init__(self):
- self.logger = logging.getLogger(self.__class__.__name__)
-
- file = os.path.join(os.path.dirname(__file__), '..', 'mdf-retrobase.sqlite3')
- self.sqlite = sqlite3.connect(file, check_same_thread=False)
- self.lock = threading.Lock()
-
- sqlite_version = self._get_sqlite_version()
- self.logger.debug(f'SQLite version: {sqlite_version}')
-
- schema_version = self.schema_get_version()
- self.logger.debug(f'Schema version: {schema_version}')
-
- self.schema_init(schema_version)
- self.schema_set_version(self.SCHEMA)
-
- def __del__(self):
- if self.sqlite:
- self.sqlite.commit()
- self.sqlite.close()
-
- def _get_sqlite_version(self) -> str:
- cursor = self.sqlite.cursor()
- cursor.execute("SELECT sqlite_version()")
- return cursor.fetchone()[0]
-
- def schema_get_version(self) -> int:
- cursor = self.sqlite.execute('PRAGMA user_version')
- return int(cursor.fetchone()[0])
-
- def schema_set_version(self, v) -> None:
- self.sqlite.execute('PRAGMA user_version={:d}'.format(v))
- self.logger.info(f'Schema set to {v}')
-
- def cursor(self) -> sqlite3.Cursor:
- return self.sqlite.cursor()
-
- def commit(self) -> None:
- return self.sqlite.commit()
-
- def schema_init(self, version: int) -> None:
- cursor = self.cursor()
-
- if version < 1:
- # timestamps
- cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_links (
- issue_date TEXT PRIMARY KEY,
- url TEXT NOT NULL,
- pages INTEGER NOT NULL
- )""")
-
- if version < 2:
- cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_pages (
- collection_id INTEGER NOT NULL,
- doc_id INTEGER NOT NULL,
- page INTEGER NOT NULL,
- height INTEGER NOT NULL,
- width INTEGER NOT NULL,
- dpi INTEGER NOT NULL
- )""")
- cursor.execute("""CREATE UNIQUE INDEX mdf_pages_idx ON mdf_pages (collection_id, doc_id, page)""")
-
- if version < 3:
- cursor.execute("ALTER TABLE mdf_pages ADD fail INTEGER NOT NULL")
-
- if version < 4:
- cursor.execute("""CREATE INDEX mdf_pages_fail_idx ON mdf_pages (fail)""")
-
- if version < 5:
- for col in ('collection_id', 'doc_id'):
- cursor.execute(f"ALTER TABLE mdf_links ADD {col} INTEGER NOT NULL DEFAULT '0'")
- cursor.execute("CREATE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")
-
- if version < 6:
- cursor.execute("DROP INDEX mdf_links_col_doc_idx")
- cursor.execute("CREATE UNIQUE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")
-
- self.commit()
-
- def add_link(self, issue_date: str, url: str, pages: int):
- with self.lock:
- self.cursor().execute("REPLACE INTO mdf_links (issue_date, url, pages) VALUES (?, ?, ?)",
- (issue_date, url, str(pages)))
- self.commit()
-
- def add_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
- with self.lock:
- self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, ?, ?, ?, 0)",
- (collection_id, doc_id, page, width, height, dpi))
- self.commit()
-
- def update_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
- with self.lock:
- self.cursor().execute("UPDATE mdf_pages SET width=?, height=?, dpi=?, fail=0 WHERE collection_id=? AND doc_id=? AND page=?",
- (width, height, dpi, collection_id, doc_id, page))
- self.commit()
-
- def add_page_failed(self, collection_id, doc_id, page):
- with self.lock:
- self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, 0, 0, 0, 1)",
- (collection_id, doc_id, page))
- self.commit()
-
- def get_existing_pages(self, fail=0):
- cur = self.cursor()
- cur.execute("SELECT collection_id, doc_id, page FROM mdf_pages WHERE fail=?", (fail,))
- return cur.fetchall()
-
- def get_documents(self, range: Optional[tuple[str, str]] = None):
- cur = self.cursor()
- docs = []
-
- sql = "SELECT issue_date, url, pages FROM mdf_links"
- if range:
- sql += f" WHERE issue_date BETWEEN '{range[0]}' AND '{range[1]}'"
- cur.execute(sql)
- for issue_date, url, pages in cur.fetchall():
- pub_date, collection_id, doc_id = retronews.parse_url(url)
- docs.append(dict(
- collection_id=collection_id,
- doc_id=doc_id,
- pages=pages
- ))
-
- return docs
-
- def get_doc_pages(self, collection_id, doc_id):
- cur = self.cursor()
- cur.execute("SELECT page, width, height, dpi FROM mdf_pages WHERE collection_id=? AND doc_id=?",
- (collection_id, doc_id))
- return cur.fetchall()
-
- def fix_documents(self):
- cur = self.cursor()
- cur.execute("SELECT issue_date, url FROM mdf_links")
- for issue_date, url in cur.fetchall():
- pub_date, cid, did = retronews.parse_url(url)
- cur.execute("UPDATE mdf_links SET collection_id=?, doc_id=? WHERE issue_date=?",
- (cid, did, issue_date))
- self.commit()