From b032cbf4c404752b3fc1e1ad106d317d5c09c98e Mon Sep 17 00:00:00 2001 From: Vosjedev Date: Sun, 25 Jan 2026 11:39:36 +0100 Subject: fuck databases when not needed move everything from mariadb to a filesystem-based store of metadata Signed-off-by: Vosjedev --- src/discord_image_bridge/__init__.py | 90 ++++++++------------- src/discord_image_bridge/discord.py | 5 +- src/discord_image_bridge/downloadpool.py | 2 + src/discord_image_bridge/fsmanager.py | 62 ++++++++++++++ src/discord_image_bridge/utils.py | 135 ++++++++++++++----------------- 5 files changed, 159 insertions(+), 135 deletions(-) create mode 100644 src/discord_image_bridge/fsmanager.py (limited to 'src/discord_image_bridge') diff --git a/src/discord_image_bridge/__init__.py b/src/discord_image_bridge/__init__.py index 7570d70..e0a658a 100644 --- a/src/discord_image_bridge/__init__.py +++ b/src/discord_image_bridge/__init__.py @@ -6,69 +6,52 @@ if not os.path.isdir(dir): os.chdir(dir) print(os.getcwd()) -from logging import WARNING +from typing import Literal + import magic from urllib.parse import quote import cherrypy -from .dbpool import DBPoolManager from .discord import DiscordWsManager -from . import _values from . import utils +from . import fsmanager + +fsmanager.makedirs() class Root(object): - def __init__(self,dbpool:DBPoolManager): - self.dbpool=dbpool - _values.dbpool=dbpool - @cherrypy.expose def default(self, *args, **kwargs): - if len(args)<2: - cherrypy.response.status=400 - return "Not enough arguments. Please provide seq and filename." - - seq=args[0] - if not seq.isdigit(): + if len(args)<1: cherrypy.response.status=400 - return "Seq is not a valid int." - seq=int(seq) - - fname=args[1] - - with self.dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("SELECT hash, channel, message, id FROM attachments WHERE seq=? AND filename=?",(seq,fname)) - match=cur.fetchone() - if not match: - cherrypy.response.status=404 - return "Seq/filename combination not found." + return "Not enough arguments. Please provide filehash." + hash=args[0] + + osname=fsmanager.hash2fname(hash) + if not os.path.isfile(osname): + ohash, osname=utils.download_uncached(hash) + if not ohash.startswith(hash): + cherrypy.log("Wrong hash! Expected %s, got %s"%(hash,ohash)) + cherrypy.response.status=500 + return "Got wrong content hash, not serving for the sake of security" - hash=match[0] - osname=os.path.join(hash[:2],hash) - if not os.path.isfile(osname): - ohash, osname=utils.download_uncached(*match[1:4]) - if not hash==ohash: - cherrypy.log("Wrong hash! Expected %s, got %s"%(hash,ohash)) - cherrypy.log("Fname=%s"%fname) - cherrypy.response.status=500 - return "Got wrong content hash, not serving for the sake of security" - - fd=open(osname, 'rb') - cherrypy.response.headers["Content-Type"]=magic.from_descriptor(fd.fileno(), mime=True) - fd.seek(0) - return fd - - @cherrypy.expose - def get_url(self, attachment_id:int): - with self.dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("SELECT seq, filename FROM attachments WHERE id=?",(attachment_id,)) - data=cur.fetchone() - if not data: - cherrypy.response.status=404 - return - domain=cherrypy.request.base - url=domain+"/"+str(data[0])+'/'+quote(data[1],safe='') - return url + fd=open(osname, 'rb') + cherrypy.response.headers["Content-Type"]=magic.from_descriptor(fd.fileno(), mime=True) + fd.seek(0) + return fd + + # TODO: reimplement + #@cherrypy.expose + #def get_url(self, platform:Literal["discord","telegram"], attachment_id:int): + # with self.dbpool.get_connection() as conn, conn.cursor() as cur: + # cur.execute("SELECT seq, filename FROM attachments WHERE id=?",(attachment_id,)) + # data=cur.fetchone() + # if not data: + # cherrypy.response.status=404 + # return + # domain=cherrypy.request.base + # url=domain+"/"+str(data[0])+'/'+quote(data[1],safe='') + # return url @cherrypy.expose def run_cron(self, token=None): @@ -84,13 +67,10 @@ class Root(object): return "Hello World!" def run(): - dbpool=DBPoolManager(cherrypy.engine) - dbpool.subscribe() - - manager=DiscordWsManager(cherrypy.engine, dbpool) + manager=DiscordWsManager(cherrypy.engine) manager.subscribe() - root=Root(dbpool=dbpool) + root=Root() cherrypy.quickstart(root) diff --git a/src/discord_image_bridge/discord.py b/src/discord_image_bridge/discord.py index e4854f2..2bfaca5 100644 --- a/src/discord_image_bridge/discord.py +++ b/src/discord_image_bridge/discord.py @@ -15,7 +15,6 @@ import cherrypy from requests import Request from . import sendqueue as rl -from .dbpool import DBPoolManager from . import _values TOKEN=_values.TOKEN @@ -220,13 +219,11 @@ class DiscordWsClient(WebSocketClient, DiscordWsClientMandatoryAttrs): class DiscordWsManager(cherrypy.process.plugins.SimplePlugin): - def __init__(self, bus, dbpool:DBPoolManager): + def __init__(self, bus): cherrypy.process.plugins.SimplePlugin.__init__(self, bus) self.client:DiscordWsClient=DiscordWsClientMandatoryAttrs() self.closing=Event() - self.dbpool=dbpool - def start(self): self.manager_thread=Thread(target=self.manager) self.manager_thread.start() diff --git a/src/discord_image_bridge/downloadpool.py b/src/discord_image_bridge/downloadpool.py index d86513e..fcf11cd 100644 --- a/src/discord_image_bridge/downloadpool.py +++ b/src/discord_image_bridge/downloadpool.py @@ -50,6 +50,8 @@ class DownloadPool(): data["callback"](*data["result"]) except Exception as e: data["exception"]=e + import traceback + traceback.print_exc() data["done"].set() def stop(self): diff --git a/src/discord_image_bridge/fsmanager.py b/src/discord_image_bridge/fsmanager.py new file mode 100644 index 0000000..7acaa3b --- /dev/null +++ b/src/discord_image_bridge/fsmanager.py @@ -0,0 +1,62 @@ +import os, fcntl, select +import json +from threading import Event + +def makedirs(): + for dir in ["cache", "meta", "links"]: + if not os.path.isdir(dir): + os.mkdir(dir) + +def hash2fname(hash): + return os.path.join("cache",hash[:2],hash[:10]) + +def hash2meta_fname(hash): + return os.path.join("meta",hash[:10]+".json") + +class MetaFile: + def __init__(self, hash): + self.hash=hash + print("init") + + def __enter__(self): + fname=hash2meta_fname(self.hash) + print("Opening "+fname) + self.fd=open( fname, "r+" if os.path.isfile(fname) else "x+" ) + print("Opened metafile "+self.fd.name) + fcntl.lockf(self.fd, fcntl.LOCK_EX) + return self + + def read(self): + print("Read") + self.fd.seek(0) + if self.fd.read(1): + self.fd.seek(0) + return json.load(self.fd) + else: # file is empty + return {} + + def write(self, data): + print("Write") + self.fd.seek(0) + self.fd.truncate() + json.dump(data, self.fd) + + def __exit__(self, type, value, traceback): + fcntl.lockf(self.fd, fcntl.LOCK_UN) + +hashlocks:dict[str,Event]={} + +class DataFile: + def __init__(self, fd): + self.fd=fd + + def __enter__(self): + print("Lock") + fcntl.lockf(self.fd, fcntl.LOCK_EX) + return self.fd + + def __exit__(self, type, value, exception): + print("Unlock") + fcntl.lockf(self.fd, fcntl.LOCK_UN) + + diff --git a/src/discord_image_bridge/utils.py b/src/discord_image_bridge/utils.py index d24ce58..1e013c1 100644 --- a/src/discord_image_bridge/utils.py +++ b/src/discord_image_bridge/utils.py @@ -4,10 +4,10 @@ import cherrypy import requests from hashlib import sha256 as do_hash -from .dbpool import DBPoolManager from . import discord from . import _values from .downloadpool import DownloadPool +from . import fsmanager def download_and_cache(url, filename): @@ -16,16 +16,12 @@ def download_and_cache(url, filename): if resp.status_code==200: hash=do_hash(resp.content).hexdigest() try: - if not os.path.isdir(hash[:2]): - os.mkdir(hash[:2]) - fname=os.path.join(hash[:2],hash) - with open(fname,'wb') as fd: + fname=fsmanager.hash2fname(hash) + dirname=os.path.dirname(fname) + if not os.path.isdir(dirname): + os.mkdir(dirname) + with fsmanager.DataFile(open(fname,'wb')) as fd: fd.write(resp.content) - size=fd.tell() - - with _values.dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("INSERT INTO cache VALUES (?,?,?)",(hash, size, int(time()))) - conn.commit() return hash, fname @@ -35,8 +31,27 @@ def download_and_cache(url, filename): cherrypy.log("Error writing "+filename+" to disk: "+repr(e)) return None, None +def download_uncached(hash): + with fsmanager.MetaFile(hash) as metafd: + data=metafd.read() + if not "sources" in data: + cherrypy.log("No sources available for "+hash) + return None, None + for source in data["sources"]: + match source["type"]: + case "discord": + return download_uncached_discord( + channel=source["channel"], + msgid=source["message"], + attachmentid=source["attachment"] + ) + case _: + # NOTE: maybe log here? + return None, None + cherrypy.log("No sources available for "+hash) + return None, None -def download_uncached(channel, msgid, attachmentid): +def download_uncached_discord(channel, msgid, attachmentid): status,data=discord.channel_message_get(channel_id=channel, message_id=msgid) for attachment in data["attachments"]: if attachment["id"]==str(attachmentid): @@ -48,41 +63,20 @@ def clear_cache(): cherrypy.log("Clearing cache") c=0 before=int(time())-ttl - with _values.dbpool.get_connection() as conn, conn.cursor() as cur: - cur.execute("SELECT hash FROM cache WHERE fetched