textgen/main.py
2023-05-29 20:15:59 +00:00

50 lines
1.1 KiB
Python

import os
import subprocess
import time
import threading
from flask import Flask, Response
app = Flask(__name__)
@app.route("/<prompt>")
def llama(prompt):
if prompt == "favicon.ico":
return Response(status=204)
def generate():
try:
process = subprocess.Popen(
[
"/opt/llama.cpp/main",
"-ngl",
"32",
"-m",
"/opt/llama.cpp/models/ggml-vicuna-7b-1.1-q4_0.bin",
"-n",
"1024",
"-p",
f"### Human: {prompt}\n### Assistant:",
],
stdout=subprocess.PIPE,
)
for c in iter(lambda: process.stdout.read(1), b""):
yield c
finally:
process.kill()
return Response(generate(), mimetype="text/plain")
path = "/srv/http/pages/textgen"
def fixperms():
time.sleep(0.1)
os.chmod(path, 660)
threading.Thread(target=fixperms).start()
app.run(host="unix://" + path)