textgen/main.py

50 lines
1.1 KiB
Python

import os
import subprocess
import time
import threading
from flask import Flask, Response
app = Flask(__name__)
@app.route("/<prompt>")
def llama(prompt):
if prompt == "favicon.ico":
return Response(status=204)
def generate():
try:
process = subprocess.Popen(
[
"/opt/llama.cpp/main",
"-ngl",
"32",
"-m",
"/opt/llama.cpp/models/wizardLM-7B.ggmlv3.q4_0.bin",
"-n",
"1024",
"-p",
f"{prompt}\n\n### Response:",
],
stdout=subprocess.PIPE,
)
for c in iter(lambda: process.stdout.read(1), b""):
yield c
finally:
process.kill()
return Response(generate(), mimetype="text/plain")
path = "/srv/http/pages/textgen"
def fixperms():
time.sleep(0.1)
os.chmod(path, 660)
threading.Thread(target=fixperms).start()
app.run(host="unix://" + path)