diff options
| author | Vosjedev <vosje@vosjedev.net> | 2026-01-24 15:16:18 +0100 |
|---|---|---|
| committer | Vosjedev <vosje@vosjedev.net> | 2026-01-24 15:16:18 +0100 |
| commit | 919a9e7e35f509afb2c499ab05e4fea91fe4fca5 (patch) | |
| tree | aca7e259cf0a6f168ee7a4c70d3847ba3a4cf1e6 /src/discord_image_bridge | |
| parent | a843f0d86d528d65a4581e4f5a84c3d082e5b8ca (diff) | |
| download | discord_image_bridge-919a9e7e35f509afb2c499ab05e4fea91fe4fca5.tar.gz discord_image_bridge-919a9e7e35f509afb2c499ab05e4fea91fe4fca5.tar.bz2 discord_image_bridge-919a9e7e35f509afb2c499ab05e4fea91fe4fca5.tar.xz | |
utils: make registing attachment ids faster by pooling download threads
and create a function for clearing the cache
Signed-off-by: Vosjedev <vosje@vosjedev.net>
Diffstat (limited to 'src/discord_image_bridge')
| -rw-r--r-- | src/discord_image_bridge/downloadpool.py | 62 | ||||
| -rw-r--r-- | src/discord_image_bridge/utils.py | 43 |
2 files changed, 99 insertions, 6 deletions
diff --git a/src/discord_image_bridge/downloadpool.py b/src/discord_image_bridge/downloadpool.py new file mode 100644 index 0000000..a8654dd --- /dev/null +++ b/src/discord_image_bridge/downloadpool.py @@ -0,0 +1,62 @@ +from threading import Lock, Thread, Event +from queue import Queue, Empty + +import cherrypy + +class DownloadPool(): + def __init__(self, download_fn, max_size=4): + self.download_fn=download_fn + self.max_size=4 + + self.lock=Lock() + self.queue=Queue() + self.threads:list[Thread]=[] + self.stopping=Event() + + self.fill() + + def fill(self): + for i in range(self.max_size): + th=Thread(target=self._wrap) + th.start() + self.threads.append(th) + + def exec(self, args=[], kwargs={}, callback=None): + done=Event() + data={ + "args":args, + "kwargs":kwargs, + "done":done, + "callback":callback + } + self.queue.put(data) + if not callback: + done.wait() + if "exception" in data: + raise data["exception"] + return data["result"] + + def _wrap(self): + while not self.stopping.is_set(): + with self.lock: + try: + data=self.queue.get(block=True,timeout=5) + except Empty: + continue + try: + data["result"]=self.download_fn(*data["args"], **data["kwargs"]) + if data["callback"]: + data["callback"](*data["result"]) + except Exception as e: + data["exception"]=e + data["done"].set() + + def stop(self): + cherrypy.log("Waiting for download thread pool to exit") + self.stopping.set() + for thread in self.threads: + thread.join() + cherrypy.log("Done") + + + diff --git a/src/discord_image_bridge/utils.py b/src/discord_image_bridge/utils.py index e614e50..838e9c6 100644 --- a/src/discord_image_bridge/utils.py +++ b/src/discord_image_bridge/utils.py @@ -1,13 +1,17 @@ import os +from time import time import cherrypy import requests from hashlib import sha256 as do_hash -from .dbpool import DBPoolManager +from .dbpool import DBPoolManager from . import discord +from . import _values +from .downloadpool import DownloadPool def download_and_cache(url, filename): + cherrypy.log("Downloading attachment %s"%filename) resp=requests.get(url) if resp.status_code==200: hash=do_hash(resp.content).hexdigest() @@ -17,6 +21,8 @@ def download_and_cache(url, filename): with open(fname,'wb') as fd: fd.write(resp.content) return hash, fname + except FileExistsError: + return hash, fname except OSError as e: cherrypy.log("Error writing "+filename+" to disk: "+repr(e)) return None, None @@ -28,6 +34,19 @@ def download_uncached(channel, msgid, attachmentid): return download_and_cache(attachment["url"],attachment["filename"]) return (None, None) +def clear_cache(): + ttl=os.getenv("BRIDGE_CACHE_TIME",3600*24*7) + cherrypy.log("Clearing cache") + c=0 + with _values.dbpool.get_connection() as conn, conn.cursor() as cur: + cur.execute("SELECT hash FROM cache WHERE fetched<?",(int(time())-ttl)) + for (hash) in cur: + path=os.path.join(hash[:2],hash) + if os.path.exists(path): + c+=1 + os.remove(path) + cherrypy.log("%d files removed"%c) + def on_ready(plugin:discord.DiscordWsManager, client:discord.DiscordWsClient): dbpool:DBPoolManager=plugin.dbpool @@ -42,7 +61,17 @@ def on_ready(plugin:discord.DiscordWsManager, client:discord.DiscordWsClient): "hash CHAR(64)" ")" ) + cur.execute( + "CREATE TABLE IF NOT EXISTS cache (" + "hash CHAR(64)," + "size BIGINT," + "fetched BIGINT" + ")" + ) conn.commit() + + download_pool=DownloadPool(download_and_cache) + cherrypy.engine.subscribe("stop",download_pool.stop) @client.event() def on_message_create(payload, data): @@ -58,9 +87,12 @@ def on_ready(plugin:discord.DiscordWsManager, client:discord.DiscordWsClient): cherrypy.log("Attachment with id %s and fname `%s' already downloaded"%(id,fname)) continue - cherrypy.log("Downloading attachment %s/%s %s"%(channel,id,fname)) + def callback(hash, disk_fname): + with dbpool.get_connection() as conn, conn.cursor() as cur: + cur.execute("UPDATE attachments SET hash=? WHERE id=?",(hash,id)) + conn.commit() - hash, disk_fname=download_and_cache(attachment["url"], fname) + download_pool.exec(args=(attachment["url"], fname),callback=callback) cur.execute("SELECT seq FROM attachments WHERE filename=? ORDER BY seq DESC LIMIT 1",(fname,)) nr=cur.fetchone() @@ -70,10 +102,9 @@ def on_ready(plugin:discord.DiscordWsManager, client:discord.DiscordWsClient): nr=0 cherrypy.log("Caching attachment %s/%s %s under %d"%(channel,id,fname,nr)) - cur.execute("INSERT INTO attachments (seq, filename, channel, message, id, hash) VALUES (?,?,?,?,?,?)",( + cur.execute("INSERT INTO attachments (seq, filename, channel, message, id) VALUES (?,?,?,?,?)",( nr,fname[:255], - channel,msgid,id, - hash + channel,msgid,id )) conn.commit() |
