aboutsummarybugs & patchesrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVosjedev <vosje@vosjedev.net>2026-01-25 11:39:36 +0100
committerVosjedev <vosje@vosjedev.net>2026-01-25 11:39:36 +0100
commitb032cbf4c404752b3fc1e1ad106d317d5c09c98e (patch)
treed5691ea324cff22830c1aa96f2405b827c9bdac1 /src
parenta2de5d820c052e317820a907d78e73d8dc0e0ecd (diff)
downloaddiscord_image_bridge-b032cbf4c404752b3fc1e1ad106d317d5c09c98e.tar.gz
discord_image_bridge-b032cbf4c404752b3fc1e1ad106d317d5c09c98e.tar.bz2
discord_image_bridge-b032cbf4c404752b3fc1e1ad106d317d5c09c98e.tar.xz
fuck databases when not needed
move everything from mariadb to a filesystem-based store of metadata Signed-off-by: Vosjedev <vosje@vosjedev.net>
Diffstat (limited to 'src')
-rw-r--r--src/discord_image_bridge/__init__.py86
-rw-r--r--src/discord_image_bridge/discord.py5
-rw-r--r--src/discord_image_bridge/downloadpool.py2
-rw-r--r--src/discord_image_bridge/fsmanager.py62
-rw-r--r--src/discord_image_bridge/utils.py129
5 files changed, 154 insertions, 130 deletions
diff --git a/src/discord_image_bridge/__init__.py b/src/discord_image_bridge/__init__.py
index 7570d70..e0a658a 100644
--- a/src/discord_image_bridge/__init__.py
+++ b/src/discord_image_bridge/__init__.py
@@ -6,69 +6,52 @@ if not os.path.isdir(dir):
os.chdir(dir)
print(os.getcwd())
-from logging import WARNING
+from typing import Literal
+
import magic
from urllib.parse import quote
import cherrypy
-from .dbpool import DBPoolManager
from .discord import DiscordWsManager
-from . import _values
from . import utils
+from . import fsmanager
+
+fsmanager.makedirs()
class Root(object):
- def __init__(self,dbpool:DBPoolManager):
- self.dbpool=dbpool
- _values.dbpool=dbpool
-
@cherrypy.expose
def default(self, *args, **kwargs):
- if len(args)<2:
- cherrypy.response.status=400
- return "Not enough arguments. Please provide seq and filename."
-
- seq=args[0]
- if not seq.isdigit():
+ if len(args)<1:
cherrypy.response.status=400
- return "Seq is not a valid int."
- seq=int(seq)
-
- fname=args[1]
+ return "Not enough arguments. Please provide filehash."
+ hash=args[0]
- with self.dbpool.get_connection() as conn, conn.cursor() as cur:
- cur.execute("SELECT hash, channel, message, id FROM attachments WHERE seq=? AND filename=?",(seq,fname))
- match=cur.fetchone()
- if not match:
- cherrypy.response.status=404
- return "Seq/filename combination not found."
+ osname=fsmanager.hash2fname(hash)
+ if not os.path.isfile(osname):
+ ohash, osname=utils.download_uncached(hash)
+ if not ohash.startswith(hash):
+ cherrypy.log("Wrong hash! Expected %s, got %s"%(hash,ohash))
+ cherrypy.response.status=500
+ return "Got wrong content hash, not serving for the sake of security"
- hash=match[0]
- osname=os.path.join(hash[:2],hash)
- if not os.path.isfile(osname):
- ohash, osname=utils.download_uncached(*match[1:4])
- if not hash==ohash:
- cherrypy.log("Wrong hash! Expected %s, got %s"%(hash,ohash))
- cherrypy.log("Fname=%s"%fname)
- cherrypy.response.status=500
- return "Got wrong content hash, not serving for the sake of security"
-
- fd=open(osname, 'rb')
- cherrypy.response.headers["Content-Type"]=magic.from_descriptor(fd.fileno(), mime=True)
- fd.seek(0)
- return fd
+ fd=open(osname, 'rb')
+ cherrypy.response.headers["Content-Type"]=magic.from_descriptor(fd.fileno(), mime=True)
+ fd.seek(0)
+ return fd
- @cherrypy.expose
- def get_url(self, attachment_id:int):
- with self.dbpool.get_connection() as conn, conn.cursor() as cur:
- cur.execute("SELECT seq, filename FROM attachments WHERE id=?",(attachment_id,))
- data=cur.fetchone()
- if not data:
- cherrypy.response.status=404
- return
- domain=cherrypy.request.base
- url=domain+"/"+str(data[0])+'/'+quote(data[1],safe='')
- return url
+ # TODO: reimplement
+ #@cherrypy.expose
+ #def get_url(self, platform:Literal["discord","telegram"], attachment_id:int):
+ # with self.dbpool.get_connection() as conn, conn.cursor() as cur:
+ # cur.execute("SELECT seq, filename FROM attachments WHERE id=?",(attachment_id,))
+ # data=cur.fetchone()
+ # if not data:
+ # cherrypy.response.status=404
+ # return
+ # domain=cherrypy.request.base
+ # url=domain+"/"+str(data[0])+'/'+quote(data[1],safe='')
+ # return url
@cherrypy.expose
def run_cron(self, token=None):
@@ -84,13 +67,10 @@ class Root(object):
return "Hello World!"
def run():
- dbpool=DBPoolManager(cherrypy.engine)
- dbpool.subscribe()
-
- manager=DiscordWsManager(cherrypy.engine, dbpool)
+ manager=DiscordWsManager(cherrypy.engine)
manager.subscribe()
- root=Root(dbpool=dbpool)
+ root=Root()
cherrypy.quickstart(root)
diff --git a/src/discord_image_bridge/discord.py b/src/discord_image_bridge/discord.py
index e4854f2..2bfaca5 100644
--- a/src/discord_image_bridge/discord.py
+++ b/src/discord_image_bridge/discord.py
@@ -15,7 +15,6 @@ import cherrypy
from requests import Request
from . import sendqueue as rl
-from .dbpool import DBPoolManager
from . import _values
TOKEN=_values.TOKEN
@@ -220,13 +219,11 @@ class DiscordWsClient(WebSocketClient, DiscordWsClientMandatoryAttrs):
class DiscordWsManager(cherrypy.process.plugins.SimplePlugin):
- def __init__(self, bus, dbpool:DBPoolManager):
+ def __init__(self, bus):
cherrypy.process.plugins.SimplePlugin.__init__(self, bus)
self.client:DiscordWsClient=DiscordWsClientMandatoryAttrs()
self.closing=Event()
- self.dbpool=dbpool
-
def start(self):
self.manager_thread=Thread(target=self.manager)
self.manager_thread.start()
diff --git a/src/discord_image_bridge/downloadpool.py b/src/discord_image_bridge/downloadpool.py
index d86513e..fcf11cd 100644
--- a/src/discord_image_bridge/downloadpool.py
+++ b/src/discord_image_bridge/downloadpool.py
@@ -50,6 +50,8 @@ class DownloadPool():
data["callback"](*data["result"])
except Exception as e:
data["exception"]=e
+ import traceback
+ traceback.print_exc()
data["done"].set()
def stop(self):
diff --git a/src/discord_image_bridge/fsmanager.py b/src/discord_image_bridge/fsmanager.py
new file mode 100644
index 0000000..7acaa3b
--- /dev/null
+++ b/src/discord_image_bridge/fsmanager.py
@@ -0,0 +1,62 @@
+import os, fcntl, select
+import json
+from threading import Event
+
+def makedirs():
+ for dir in ["cache", "meta", "links"]:
+ if not os.path.isdir(dir):
+ os.mkdir(dir)
+
+def hash2fname(hash):
+ return os.path.join("cache",hash[:2],hash[:10])
+
+def hash2meta_fname(hash):
+ return os.path.join("meta",hash[:10]+".json")
+
+class MetaFile:
+ def __init__(self, hash):
+ self.hash=hash
+ print("init")
+
+ def __enter__(self):
+ fname=hash2meta_fname(self.hash)
+ print("Opening "+fname)
+ self.fd=open( fname, "r+" if os.path.isfile(fname) else "x+" )
+ print("Opened metafile "+self.fd.name)
+ fcntl.lockf(self.fd, fcntl.LOCK_EX)
+ return self
+
+ def read(self):
+ print("Read")
+ self.fd.seek(0)
+ if self.fd.read(1):
+ self.fd.seek(0)
+ return json.load(self.fd)
+ else: # file is empty
+ return {}
+
+ def write(self, data):
+ print("Write")
+ self.fd.seek(0)
+ self.fd.truncate()
+ json.dump(data, self.fd)
+
+ def __exit__(self, type, value, traceback):
+ fcntl.lockf(self.fd, fcntl.LOCK_UN)
+
+hashlocks:dict[str,Event]={}
+
+class DataFile:
+ def __init__(self, fd):
+ self.fd=fd
+
+ def __enter__(self):
+ print("Lock")
+ fcntl.lockf(self.fd, fcntl.LOCK_EX)
+ return self.fd
+
+ def __exit__(self, type, value, exception):
+ print("Unlock")
+ fcntl.lockf(self.fd, fcntl.LOCK_UN)
+
+
diff --git a/src/discord_image_bridge/utils.py b/src/discord_image_bridge/utils.py
index d24ce58..1e013c1 100644
--- a/src/discord_image_bridge/utils.py
+++ b/src/discord_image_bridge/utils.py
@@ -4,10 +4,10 @@ import cherrypy
import requests
from hashlib import sha256 as do_hash
-from .dbpool import DBPoolManager
from . import discord
from . import _values
from .downloadpool import DownloadPool
+from . import fsmanager
def download_and_cache(url, filename):
@@ -16,16 +16,12 @@ def download_and_cache(url, filename):
if resp.status_code==200:
hash=do_hash(resp.content).hexdigest()
try:
- if not os.path.isdir(hash[:2]):
- os.mkdir(hash[:2])
- fname=os.path.join(hash[:2],hash)
- with open(fname,'wb') as fd:
+ fname=fsmanager.hash2fname(hash)
+ dirname=os.path.dirname(fname)
+ if not os.path.isdir(dirname):
+ os.mkdir(dirname)
+ with fsmanager.DataFile(open(fname,'wb')) as fd:
fd.write(resp.content)
- size=fd.tell()
-
- with _values.dbpool.get_connection() as conn, conn.cursor() as cur:
- cur.execute("INSERT INTO cache VALUES (?,?,?)",(hash, size, int(time())))
- conn.commit()
return hash, fname
@@ -35,8 +31,27 @@ def download_and_cache(url, filename):
cherrypy.log("Error writing "+filename+" to disk: "+repr(e))
return None, None
+def download_uncached(hash):
+ with fsmanager.MetaFile(hash) as metafd:
+ data=metafd.read()
+ if not "sources" in data:
+ cherrypy.log("No sources available for "+hash)
+ return None, None
+ for source in data["sources"]:
+ match source["type"]:
+ case "discord":
+ return download_uncached_discord(
+ channel=source["channel"],
+ msgid=source["message"],
+ attachmentid=source["attachment"]
+ )
+ case _:
+ # NOTE: maybe log here?
+ return None, None
+ cherrypy.log("No sources available for "+hash)
+ return None, None
-def download_uncached(channel, msgid, attachmentid):
+def download_uncached_discord(channel, msgid, attachmentid):
status,data=discord.channel_message_get(channel_id=channel, message_id=msgid)
for attachment in data["attachments"]:
if attachment["id"]==str(attachmentid):
@@ -48,41 +63,20 @@ def clear_cache():
cherrypy.log("Clearing cache")
c=0
before=int(time())-ttl
- with _values.dbpool.get_connection() as conn, conn.cursor() as cur:
- cur.execute("SELECT hash FROM cache WHERE fetched<?",(before,))
- for (hash,) in cur:
- print(hash)
- path=os.path.join(hash[:2],hash)
- if os.path.exists(path):
- c+=1
- os.remove(path)
- cur.execute("DELETE FROM cache WHERE fetched<?",(before,))
- conn.commit()
- cherrypy.log("%d files removed"%c)
+ # TODO: Rewrite
+ #with _values.dbpool.get_connection() as conn, conn.cursor() as cur:
+ # cur.execute("SELECT hash FROM cache WHERE fetched<?",(before,))
+ # for (hash,) in cur:
+ # print(hash)
+ # path=os.path.join(hash[:2],hash)
+ # if os.path.exists(path):
+ # c+=1
+ # os.remove(path)
+ # cur.execute("DELETE FROM cache WHERE fetched<?",(before,))
+ # conn.commit()
+ #cherrypy.log("%d files removed"%c)
def on_ready(plugin:discord.DiscordWsManager, client:discord.DiscordWsClient):
- dbpool:DBPoolManager=plugin.dbpool
-
- with dbpool.get_connection() as conn, conn.cursor() as cur:
- cur.execute(
- "CREATE TABLE IF NOT EXISTS attachments ("
- "seq INT NOT NULL,"
- "filename VARCHAR(255) NOT NULL,"
- "channel BIGINT UNSIGNED,"
- "message BIGINT UNSIGNED,"
- "id BIGINT UNSIGNED,"
- "hash CHAR(64)"
- ")"
- )
- cur.execute(
- "CREATE TABLE IF NOT EXISTS cache ("
- "hash CHAR(64),"
- "size BIGINT,"
- "fetched BIGINT"
- ")"
- )
- conn.commit()
-
download_pool=DownloadPool(download_and_cache)
cherrypy.engine.subscribe("stop",download_pool.stop)
@@ -90,36 +84,25 @@ def on_ready(plugin:discord.DiscordWsManager, client:discord.DiscordWsClient):
def on_message_create(payload, data):
msgid=data["id"]
channel=data["channel_id"]
- with dbpool.get_connection() as conn, conn.cursor() as cur:
- for attachment in data["attachments"]:
- id=attachment["id"]
- fname=attachment["filename"]
-
- cur.execute("SELECT id FROM attachments WHERE id=? AND filename=?",(id,fname))
- if cur.fetchone():
- cherrypy.log("Attachment with id %s and fname `%s' already downloaded"%(id,fname))
- continue
-
- def callback(hash, disk_fname):
- with dbpool.get_connection() as conn, conn.cursor() as cur:
- cur.execute("UPDATE attachments SET hash=? WHERE id=?",(hash,id))
- conn.commit()
-
- download_pool.exec(args=(attachment["url"], fname),callback=callback)
+ for attachment in data["attachments"]:
+ id=attachment["id"]
+ fname=attachment["filename"]
- cur.execute("SELECT seq FROM attachments WHERE filename=? ORDER BY seq DESC LIMIT 1",(fname,))
- nr=cur.fetchone()
- if nr:
- nr=nr[0]+1
- else:
- nr=0
- cherrypy.log("Caching attachment %s/%s %s under %d"%(channel,id,fname,nr))
+ def callback(hash, disk_fname):
+ cherrypy.log("Writing meta")
+ with fsmanager.MetaFile(hash) as metafd:
+ data=metafd.read()
+ if not "sources" in data:
+ data["sources"]=[]
+ data["sources"].append({
+ "type":"discord",
+ "message":msgid,
+ "channel":channel,
+ "attachment":id
+ })
+ metafd.write(data)
+ cherrypy.log("Done")
- cur.execute("INSERT INTO attachments (seq, filename, channel, message, id) VALUES (?,?,?,?,?)",(
- nr,fname[:255],
- channel,msgid,id
- ))
- conn.commit()
-
+ download_pool.exec(args=(attachment["url"], fname),callback=callback)