Stop after 1024 tokens

This commit is contained in:
Anthony Wang 2023-04-10 14:33:41 +00:00
parent 3432b3be76
commit 9a4bcec9cc
Signed by: a
GPG key ID: 42A5B952E6DD8D38

View file

@ -20,12 +20,10 @@ def llama(prompt):
"-m",
"/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin",
"-n",
"4096",
"1024",
"-c",
"2048",
"1024",
"-r",
"### Human:",
"-p",
f"### Human: {prompt}\n### Assistant:",
],
stderr=subprocess.STDOUT,
@ -34,7 +32,7 @@ def llama(prompt):
for c in iter(lambda: process.stdout.read(1), b""):
yield c
finally:
process.terminate()
process.kill()
return Response(generate(), mimetype="text/plain")