Add llama streaming script

This commit is contained in:
Anthony Wang 2023-04-10 03:40:19 +00:00
parent 6d7c618a19
commit 5b92e1661e
Signed by: a
GPG key ID: 42A5B952E6DD8D38

48
llama.py Normal file
View file

@ -0,0 +1,48 @@
import os
import subprocess
import time
import threading
from flask import Flask, Response
app = Flask(__name__)
@app.route("/<prompt>")
def llama(prompt):
if prompt == "favicon.ico":
return
def generate():
try:
process = subprocess.Popen(
[
"/opt/llama.cpp/main",
"-m",
"/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin",
"-n",
"-1",
"-p",
prompt,
],
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
)
for c in iter(lambda: process.stdout.read(1), b""):
yield c
except GeneratorExit:
process.terminate()
return Response(generate(), mimetype="text/plain")
path = "/srv/http/pages/textgen"
def fixperms():
time.sleep(0.1)
os.chmod(path, 666)
threading.Thread(target=fixperms).start()
app.run(host="unix://" + path)