From 215abbc3e440a7aa6842dd3c558ad68e49b4cbd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 10 May 2024 01:15:44 +0200 Subject: [PATCH] [archive] implement DownloadArchiveMemory class (#5255) keeps archive IDs in memory and only writes them to disk in a 'finalize' step --- gallery_dl/archive.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/gallery_dl/archive.py b/gallery_dl/archive.py index 302dc5aa..5f05bbfd 100644 --- a/gallery_dl/archive.py +++ b/gallery_dl/archive.py @@ -54,3 +54,45 @@ class DownloadArchive(): self.cursor.execute( "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,)) return self.cursor.fetchone() + + def finalize(self): + pass + + +class DownloadArchiveMemory(DownloadArchive): + + def __init__(self, path, format_string, pragma=None, + cache_key="_archive_key"): + DownloadArchive.__init__(self, path, format_string, pragma, cache_key) + self.keys = set() + + def add(self, kwdict): + self.keys.add( + kwdict.get(self._cache_key) or + self.keygen(kwdict)) + + def check(self, kwdict): + key = kwdict[self._cache_key] = self.keygen(kwdict) + if key in self.keys: + return True + self.cursor.execute( + "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,)) + return self.cursor.fetchone() + + def finalize(self): + if not self.keys: + return + + cursor = self.cursor + with self.connection: + try: + cursor.execute("BEGIN") + except sqlite3.OperationalError: + pass + + stmt = "INSERT OR IGNORE INTO archive (entry) VALUES (?)" + if len(self.keys) < 100: + for key in self.keys: + cursor.execute(stmt, (key,)) + else: + cursor.executemany(stmt, ((key,) for key in self.keys))