Add llama streaming script

2023-04-10 03:40:19 +00:00 · 2023-04-10 03:40:19 +00:00 · 5b92e1661e
commit 5b92e1661e
parent 6d7c618a19
1 changed files with 48 additions and 0 deletions
--- a/llama.py
+++ b/llama.py
@ -0,0 +1,48 @@
+import os
+import subprocess
+import time
+import threading
+from flask import Flask, Response
+
+app = Flask(__name__)
+
+
+@app.route("/<prompt>")
+def llama(prompt):
+    if prompt == "favicon.ico":
+        return
+
+    def generate():
+        try:
+            process = subprocess.Popen(
+                [
+                    "/opt/llama.cpp/main",
+                    "-m",
+                    "/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin",
+                    "-n",
+                    "-1",
+                    "-p",
+                    prompt,
+                ],
+                stderr=subprocess.STDOUT,
+                stdout=subprocess.PIPE,
+            )
+            for c in iter(lambda: process.stdout.read(1), b""):
+                yield c
+        except GeneratorExit:
+            process.terminate()
+
+    return Response(generate(), mimetype="text/plain")
+
+
+path = "/srv/http/pages/textgen"
+
+
+def fixperms():
+    time.sleep(0.1)
+    os.chmod(path, 666)
+
+
+threading.Thread(target=fixperms).start()
+
+app.run(host="unix://" + path)