diff options
| author | Vosjedev <vosje@vosjedev.net> | 2026-01-25 11:39:36 +0100 |
|---|---|---|
| committer | Vosjedev <vosje@vosjedev.net> | 2026-01-25 11:39:36 +0100 |
| commit | b032cbf4c404752b3fc1e1ad106d317d5c09c98e (patch) | |
| tree | d5691ea324cff22830c1aa96f2405b827c9bdac1 /src/discord_image_bridge | |
| parent | a2de5d820c052e317820a907d78e73d8dc0e0ecd (diff) | |
| download | discord_image_bridge-b032cbf4c404752b3fc1e1ad106d317d5c09c98e.tar.gz discord_image_bridge-b032cbf4c404752b3fc1e1ad106d317d5c09c98e.tar.bz2 discord_image_bridge-b032cbf4c404752b3fc1e1ad106d317d5c09c98e.tar.xz | |
fuck databases when not needed
move everything from mariadb to a filesystem-based store of metadata
Signed-off-by: Vosjedev <vosje@vosjedev.net>
Diffstat (limited to 'src/discord_image_bridge')
| -rw-r--r-- | src/discord_image_bridge/__init__.py | 86 | ||||
| -rw-r--r-- | src/discord_image_bridge/discord.py | 5 | ||||
| -rw-r--r-- | src/discord_image_bridge/downloadpool.py | 2 | ||||
| -rw-r--r-- | src/discord_image_bridge/fsmanager.py | 62 | ||||
| -rw-r--r-- | src/discord_image_bridge/utils.py | 129 |
5 files changed, 154 insertions, 130 deletions
diff --git a/src/discord_image_bridge/__init__.py b/src/discord_image_bridge/__init__.py index 7570d70..e0a658a 100644 --- a/src/discord_image_bridge/__init__.py +++ b/src/discord_image_bridge/__init__.py @@ -6,69 +6,52 @@ if not os.path.isdir(dir): os.chdir(dir) print(os.getcwd()) -from logging import WARNING +from typing import Literal + import magic from urllib.parse import quote import cherrypy -from .dbpool import DBPoolManager from .discord import DiscordWsManager -from . import _values from . import utils +from . import fsmanager + +fsmanager.makedirs() class Root(object): - def __init__(self,dbpool:DBPoolManager): - self.dbpool=dbpool - _values.dbpool=dbpool - @cherrypy.expose def default(self, *args, **kwargs): - if len(args)<2: - cherrypy.response.status=400 - return "Not enough arguments. Please provide seq and filename." - - seq=args[0] - if not seq.isdigit(): + if len(args)<1: cherrypy.response.status=400 - return "Seq is not a valid int." - seq=int(seq) - - fname=args[1] + return "Not enough arguments. Please provide filehash." + hash=args[0] - with self.dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("SELECT hash, channel, message, id FROM attachments WHERE seq=? AND filename=?",(seq,fname)) - match=cur.fetchone() - if not match: - cherrypy.response.status=404 - return "Seq/filename combination not found." + osname=fsmanager.hash2fname(hash) + if not os.path.isfile(osname): + ohash, osname=utils.download_uncached(hash) + if not ohash.startswith(hash): + cherrypy.log("Wrong hash! Expected %s, got %s"%(hash,ohash)) + cherrypy.response.status=500 + return "Got wrong content hash, not serving for the sake of security" - hash=match[0] - osname=os.path.join(hash[:2],hash) - if not os.path.isfile(osname): - ohash, osname=utils.download_uncached(*match[1:4]) - if not hash==ohash: - cherrypy.log("Wrong hash! Expected %s, got %s"%(hash,ohash)) - cherrypy.log("Fname=%s"%fname) - cherrypy.response.status=500 - return "Got wrong content hash, not serving for the sake of security" - - fd=open(osname, 'rb') - cherrypy.response.headers["Content-Type"]=magic.from_descriptor(fd.fileno(), mime=True) - fd.seek(0) - return fd + fd=open(osname, 'rb') + cherrypy.response.headers["Content-Type"]=magic.from_descriptor(fd.fileno(), mime=True) + fd.seek(0) + return fd - @cherrypy.expose - def get_url(self, attachment_id:int): - with self.dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("SELECT seq, filename FROM attachments WHERE id=?",(attachment_id,)) - data=cur.fetchone() - if not data: - cherrypy.response.status=404 - return - domain=cherrypy.request.base - url=domain+"/"+str(data[0])+'/'+quote(data[1],safe='') - return url + # TODO: reimplement + #@cherrypy.expose + #def get_url(self, platform:Literal["discord","telegram"], attachment_id:int): + # with self.dbpool.get_connection() as conn, conn.cursor() as cur: + # cur.execute("SELECT seq, filename FROM attachments WHERE id=?",(attachment_id,)) + # data=cur.fetchone() + # if not data: + # cherrypy.response.status=404 + # return + # domain=cherrypy.request.base + # url=domain+"/"+str(data[0])+'/'+quote(data[1],safe='') + # return url @cherrypy.expose def run_cron(self, token=None): @@ -84,13 +67,10 @@ class Root(object): return "Hello World!" def run(): - dbpool=DBPoolManager(cherrypy.engine) - dbpool.subscribe() - - manager=DiscordWsManager(cherrypy.engine, dbpool) + manager=DiscordWsManager(cherrypy.engine) manager.subscribe() - root=Root(dbpool=dbpool) + root=Root() cherrypy.quickstart(root) diff --git a/src/discord_image_bridge/discord.py b/src/discord_image_bridge/discord.py index e4854f2..2bfaca5 100644 --- a/src/discord_image_bridge/discord.py +++ b/src/discord_image_bridge/discord.py @@ -15,7 +15,6 @@ import cherrypy from requests import Request from . import sendqueue as rl -from .dbpool import DBPoolManager from . import _values TOKEN=_values.TOKEN @@ -220,13 +219,11 @@ class DiscordWsClient(WebSocketClient, DiscordWsClientMandatoryAttrs): class DiscordWsManager(cherrypy.process.plugins.SimplePlugin): - def __init__(self, bus, dbpool:DBPoolManager): + def __init__(self, bus): cherrypy.process.plugins.SimplePlugin.__init__(self, bus) self.client:DiscordWsClient=DiscordWsClientMandatoryAttrs() self.closing=Event() - self.dbpool=dbpool - def start(self): self.manager_thread=Thread(target=self.manager) self.manager_thread.start() diff --git a/src/discord_image_bridge/downloadpool.py b/src/discord_image_bridge/downloadpool.py index d86513e..fcf11cd 100644 --- a/src/discord_image_bridge/downloadpool.py +++ b/src/discord_image_bridge/downloadpool.py @@ -50,6 +50,8 @@ class DownloadPool(): data["callback"](*data["result"]) except Exception as e: data["exception"]=e + import traceback + traceback.print_exc() data["done"].set() def stop(self): diff --git a/src/discord_image_bridge/fsmanager.py b/src/discord_image_bridge/fsmanager.py new file mode 100644 index 0000000..7acaa3b --- /dev/null +++ b/src/discord_image_bridge/fsmanager.py @@ -0,0 +1,62 @@ +import os, fcntl, select +import json +from threading import Event + +def makedirs(): + for dir in ["cache", "meta", "links"]: + if not os.path.isdir(dir): + os.mkdir(dir) + +def hash2fname(hash): + return os.path.join("cache",hash[:2],hash[:10]) + +def hash2meta_fname(hash): + return os.path.join("meta",hash[:10]+".json") + +class MetaFile: + def __init__(self, hash): + self.hash=hash + print("init") + + def __enter__(self): + fname=hash2meta_fname(self.hash) + print("Opening "+fname) + self.fd=open( fname, "r+" if os.path.isfile(fname) else "x+" ) + print("Opened metafile "+self.fd.name) + fcntl.lockf(self.fd, fcntl.LOCK_EX) + return self + + def read(self): + print("Read") + self.fd.seek(0) + if self.fd.read(1): + self.fd.seek(0) + return json.load(self.fd) + else: # file is empty + return {} + + def write(self, data): + print("Write") + self.fd.seek(0) + self.fd.truncate() + json.dump(data, self.fd) + + def __exit__(self, type, value, traceback): + fcntl.lockf(self.fd, fcntl.LOCK_UN) + +hashlocks:dict[str,Event]={} + +class DataFile: + def __init__(self, fd): + self.fd=fd + + def __enter__(self): + print("Lock") + fcntl.lockf(self.fd, fcntl.LOCK_EX) + return self.fd + + def __exit__(self, type, value, exception): + print("Unlock") + fcntl.lockf(self.fd, fcntl.LOCK_UN) + + diff --git a/src/discord_image_bridge/utils.py b/src/discord_image_bridge/utils.py index d24ce58..1e013c1 100644 --- a/src/discord_image_bridge/utils.py +++ b/src/discord_image_bridge/utils.py @@ -4,10 +4,10 @@ import cherrypy import requests from hashlib import sha256 as do_hash -from .dbpool import DBPoolManager from . import discord from . import _values from .downloadpool import DownloadPool +from . import fsmanager def download_and_cache(url, filename): @@ -16,16 +16,12 @@ def download_and_cache(url, filename): if resp.status_code==200: hash=do_hash(resp.content).hexdigest() try: - if not os.path.isdir(hash[:2]): - os.mkdir(hash[:2]) - fname=os.path.join(hash[:2],hash) - with open(fname,'wb') as fd: + fname=fsmanager.hash2fname(hash) + dirname=os.path.dirname(fname) + if not os.path.isdir(dirname): + os.mkdir(dirname) + with fsmanager.DataFile(open(fname,'wb')) as fd: fd.write(resp.content) - size=fd.tell() - - with _values.dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("INSERT INTO cache VALUES (?,?,?)",(hash, size, int(time()))) - conn.commit() return hash, fname @@ -35,8 +31,27 @@ def download_and_cache(url, filename): cherrypy.log("Error writing "+filename+" to disk: "+repr(e)) return None, None +def download_uncached(hash): + with fsmanager.MetaFile(hash) as metafd: + data=metafd.read() + if not "sources" in data: + cherrypy.log("No sources available for "+hash) + return None, None + for source in data["sources"]: + match source["type"]: + case "discord": + return download_uncached_discord( + channel=source["channel"], + msgid=source["message"], + attachmentid=source["attachment"] + ) + case _: + # NOTE: maybe log here? + return None, None + cherrypy.log("No sources available for "+hash) + return None, None -def download_uncached(channel, msgid, attachmentid): +def download_uncached_discord(channel, msgid, attachmentid): status,data=discord.channel_message_get(channel_id=channel, message_id=msgid) for attachment in data["attachments"]: if attachment["id"]==str(attachmentid): @@ -48,41 +63,20 @@ def clear_cache(): cherrypy.log("Clearing cache") c=0 before=int(time())-ttl - with _values.dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("SELECT hash FROM cache WHERE fetched<?",(before,)) - for (hash,) in cur: - print(hash) - path=os.path.join(hash[:2],hash) - if os.path.exists(path): - c+=1 - os.remove(path) - cur.execute("DELETE FROM cache WHERE fetched<?",(before,)) - conn.commit() - cherrypy.log("%d files removed"%c) + # TODO: Rewrite + #with _values.dbpool.get_connection() as conn, conn.cursor() as cur: + # cur.execute("SELECT hash FROM cache WHERE fetched<?",(before,)) + # for (hash,) in cur: + # print(hash) + # path=os.path.join(hash[:2],hash) + # if os.path.exists(path): + # c+=1 + # os.remove(path) + # cur.execute("DELETE FROM cache WHERE fetched<?",(before,)) + # conn.commit() + #cherrypy.log("%d files removed"%c) def on_ready(plugin:discord.DiscordWsManager, client:discord.DiscordWsClient): - dbpool:DBPoolManager=plugin.dbpool - - with dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute( - "CREATE TABLE IF NOT EXISTS attachments (" - "seq INT NOT NULL," - "filename VARCHAR(255) NOT NULL," - "channel BIGINT UNSIGNED," - "message BIGINT UNSIGNED," - "id BIGINT UNSIGNED," - "hash CHAR(64)" - ")" - ) - cur.execute( - "CREATE TABLE IF NOT EXISTS cache (" - "hash CHAR(64)," - "size BIGINT," - "fetched BIGINT" - ")" - ) - conn.commit() - download_pool=DownloadPool(download_and_cache) cherrypy.engine.subscribe("stop",download_pool.stop) @@ -90,36 +84,25 @@ def on_ready(plugin:discord.DiscordWsManager, client:discord.DiscordWsClient): def on_message_create(payload, data): msgid=data["id"] channel=data["channel_id"] - with dbpool.get_connection() as conn, conn.cursor() as cur: - for attachment in data["attachments"]: - id=attachment["id"] - fname=attachment["filename"] - - cur.execute("SELECT id FROM attachments WHERE id=? AND filename=?",(id,fname)) - if cur.fetchone(): - cherrypy.log("Attachment with id %s and fname `%s' already downloaded"%(id,fname)) - continue - - def callback(hash, disk_fname): - with dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("UPDATE attachments SET hash=? WHERE id=?",(hash,id)) - conn.commit() - - download_pool.exec(args=(attachment["url"], fname),callback=callback) + for attachment in data["attachments"]: + id=attachment["id"] + fname=attachment["filename"] - cur.execute("SELECT seq FROM attachments WHERE filename=? ORDER BY seq DESC LIMIT 1",(fname,)) - nr=cur.fetchone() - if nr: - nr=nr[0]+1 - else: - nr=0 - cherrypy.log("Caching attachment %s/%s %s under %d"%(channel,id,fname,nr)) + def callback(hash, disk_fname): + cherrypy.log("Writing meta") + with fsmanager.MetaFile(hash) as metafd: + data=metafd.read() + if not "sources" in data: + data["sources"]=[] + data["sources"].append({ + "type":"discord", + "message":msgid, + "channel":channel, + "attachment":id + }) + metafd.write(data) + cherrypy.log("Done") - cur.execute("INSERT INTO attachments (seq, filename, channel, message, id) VALUES (?,?,?,?,?)",( - nr,fname[:255], - channel,msgid,id - )) - conn.commit() - + download_pool.exec(args=(attachment["url"], fname),callback=callback) |
