Stop after 1024 tokens

2023-04-10 14:33:41 +00:00 · 2023-04-10 14:33:41 +00:00 · 9a4bcec9cc
commit 9a4bcec9cc
parent 3432b3be76
1 changed files with 3 additions and 5 deletions
--- a/llama.py
+++ b/llama.py
@ -20,12 +20,10 @@ def llama(prompt):
                    "-m",
                    "/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin",
                    "-n",
-                    "4096",
+                    "1024",
                    "-c",
-                    "2048",
+                    "1024",
                    "-r",
-                    "### Human:",
-                    "-p",
                    f"### Human: {prompt}\n### Assistant:",
                ],
                stderr=subprocess.STDOUT,
@ -34,7 +32,7 @@ def llama(prompt):
            for c in iter(lambda: process.stdout.read(1), b""):
                yield c
        finally:
-            process.terminate()
+            process.kill()

    return Response(generate(), mimetype="text/plain")