Add llama streaming script
This commit is contained in:
parent
6d7c618a19
commit
5b92e1661e
48
llama.py
Normal file
48
llama.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import threading
|
||||
from flask import Flask, Response
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/<prompt>")
|
||||
def llama(prompt):
|
||||
if prompt == "favicon.ico":
|
||||
return
|
||||
|
||||
def generate():
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
[
|
||||
"/opt/llama.cpp/main",
|
||||
"-m",
|
||||
"/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin",
|
||||
"-n",
|
||||
"-1",
|
||||
"-p",
|
||||
prompt,
|
||||
],
|
||||
stderr=subprocess.STDOUT,
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
for c in iter(lambda: process.stdout.read(1), b""):
|
||||
yield c
|
||||
except GeneratorExit:
|
||||
process.terminate()
|
||||
|
||||
return Response(generate(), mimetype="text/plain")
|
||||
|
||||
|
||||
path = "/srv/http/pages/textgen"
|
||||
|
||||
|
||||
def fixperms():
|
||||
time.sleep(0.1)
|
||||
os.chmod(path, 666)
|
||||
|
||||
|
||||
threading.Thread(target=fixperms).start()
|
||||
|
||||
app.run(host="unix://" + path)
|
Loading…
Reference in a new issue