Don't index text since it interferes with other results

This commit is contained in:
Anthony Wang 2024-07-28 02:58:02 +00:00
parent 945c2da5d0
commit 7c6dad605f
Signed by: a
SSH key fingerprint: SHA256:B5ADfMCqd2M7d/jtXDoihAV/yfXOAbWWri9+GdCN4hQ

View file

@ -98,18 +98,16 @@ def index(path, parent):
# Modified or not in emb
emb = None
type = mimetypes.guess_type(path)[0]
try:
if type is None and os.path.getsize(path) < 2**16:
with open(path) as f:
emb = model.embed_text(f.read())
elif type.startswith("audio"):
emb = model.embed_audio(path)
elif type.startswith("image"):
emb = model.embed_image(path)
elif type.startswith("video") and os.path.getsize(path) < 2**25:
emb = model.embed_video(path)
except:
print(traceback.format_exc())
if isinstance(type, str):
try:
if type.startswith("audio"):
emb = model.embed_audio(path)
elif type.startswith("image"):
emb = model.embed_image(path)
elif type.startswith("video") and os.path.getsize(path) < 2**25:
emb = model.embed_video(path)
except:
print(traceback.format_exc())
if emb is None:
# Might be in index but no longer valid