Stop after 1024 tokens

This commit is contained in:
Anthony Wang 2023-04-10 14:33:41 +00:00
parent 3432b3be76
commit 9a4bcec9cc
Signed by: a
GPG key ID: 42A5B952E6DD8D38

View file

@ -20,12 +20,10 @@ def llama(prompt):
"-m", "-m",
"/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin", "/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin",
"-n", "-n",
"4096", "1024",
"-c", "-c",
"2048", "1024",
"-r", "-r",
"### Human:",
"-p",
f"### Human: {prompt}\n### Assistant:", f"### Human: {prompt}\n### Assistant:",
], ],
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
@ -34,7 +32,7 @@ def llama(prompt):
for c in iter(lambda: process.stdout.read(1), b""): for c in iter(lambda: process.stdout.read(1), b""):
yield c yield c
finally: finally:
process.terminate() process.kill()
return Response(generate(), mimetype="text/plain") return Response(generate(), mimetype="text/plain")