Stop after 1024 tokens
This commit is contained in:
parent
3432b3be76
commit
9a4bcec9cc
1 changed files with 3 additions and 5 deletions
8
llama.py
8
llama.py
|
@ -20,12 +20,10 @@ def llama(prompt):
|
||||||
"-m",
|
"-m",
|
||||||
"/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin",
|
"/opt/llama.cpp/models/ggml-vicuna-13b-4bit.bin",
|
||||||
"-n",
|
"-n",
|
||||||
"4096",
|
"1024",
|
||||||
"-c",
|
"-c",
|
||||||
"2048",
|
"1024",
|
||||||
"-r",
|
"-r",
|
||||||
"### Human:",
|
|
||||||
"-p",
|
|
||||||
f"### Human: {prompt}\n### Assistant:",
|
f"### Human: {prompt}\n### Assistant:",
|
||||||
],
|
],
|
||||||
stderr=subprocess.STDOUT,
|
stderr=subprocess.STDOUT,
|
||||||
|
@ -34,7 +32,7 @@ def llama(prompt):
|
||||||
for c in iter(lambda: process.stdout.read(1), b""):
|
for c in iter(lambda: process.stdout.read(1), b""):
|
||||||
yield c
|
yield c
|
||||||
finally:
|
finally:
|
||||||
process.terminate()
|
process.kill()
|
||||||
|
|
||||||
return Response(generate(), mimetype="text/plain")
|
return Response(generate(), mimetype="text/plain")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue