You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
118 lines
3.5 KiB
118 lines
3.5 KiB
# https://github.com/nsarrazin/serge/blob/main/api/utils/generate.py
|
|
|
|
import subprocess, os
|
|
import asyncio
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def generate(
|
|
prompt: str,
|
|
):
|
|
CHUNK_SIZE = 4
|
|
|
|
args = (
|
|
"/home/hendrik/Projects/AI/alpaca.cpp/chat",
|
|
"--model",
|
|
"/home/hendrik/Projects/AI/alpaca.cpp/" + "ggml-alpaca-7b-q4.bin",
|
|
"--prompt",
|
|
prompt,
|
|
"--n_predict",
|
|
str(256),
|
|
"--temp",
|
|
str(0.1),
|
|
"--top_k",
|
|
str(50),
|
|
"--top_p",
|
|
str(0.95),
|
|
"--repeat_last_n",
|
|
str(64),
|
|
"--repeat_penalty",
|
|
str(1.3),
|
|
"--ctx_size",
|
|
str(512),
|
|
"--threads",
|
|
str(4)
|
|
)
|
|
|
|
logger.debug(f"Calling LLaMa with arguments", args)
|
|
print(prompt)
|
|
procLlama = await asyncio.create_subprocess_exec(
|
|
*args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
|
)
|
|
|
|
while True:
|
|
chunk = await procLlama.stdout.read(CHUNK_SIZE)
|
|
|
|
if not chunk:
|
|
return_code = await procLlama.wait()
|
|
|
|
if return_code != 0:
|
|
error_output = await procLlama.stderr.read()
|
|
logger.error(error_output.decode("utf-8"))
|
|
raise ValueError(f"RETURN CODE {return_code}\n\n"+error_output.decode("utf-8"))
|
|
else:
|
|
return
|
|
|
|
try:
|
|
chunk = chunk.decode("utf-8")
|
|
except UnicodeDecodeError:
|
|
return
|
|
|
|
yield chunk
|
|
|
|
|
|
async def get_full_prompt(simple_prompt: str, chat_history=None):
|
|
|
|
prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." + "\n\n"
|
|
|
|
HISTORY_LEN = 5
|
|
if chat_history:
|
|
for message in chat_history[-HISTORY_LEN:]:
|
|
if not message["is_own_message"]:
|
|
prompt += "### Instruction:\n" + message["message"] + "\n"
|
|
else:
|
|
prompt += "### Response:\n" + message["message"] + "\n"
|
|
|
|
prompt += "### Instruction:\n" + simple_prompt + "\n"
|
|
prompt += "### Response:\n"
|
|
|
|
return prompt
|
|
|
|
|
|
async def get_full_prompt_with_input(simple_prompt: str, additional_input: str, chat_history=None):
|
|
|
|
prompt_with_input = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request." + "\n\n"
|
|
|
|
HISTORY_LEN = 5
|
|
if chat_history:
|
|
for message in chat_history[-HISTORY_LEN:]:
|
|
if not message["is_own_message"]:
|
|
prompt += "### Instruction:\n" + message["message"] + "\n"
|
|
else:
|
|
prompt += "### Response:\n" + message["message"] + "\n"
|
|
|
|
prompt += "### Instruction:\n" + simple_prompt + "\n"
|
|
prompt += "### Input:\n" + additional_input + "\n"
|
|
prompt += "### Response:\n"
|
|
|
|
return prompt
|
|
|
|
|
|
async def get_full_prompt_chat_style(simple_prompt: str, chat_history=None):
|
|
|
|
prompt = "Transcript of a dialog, where the User interacts with an Assistant named Julia. Julia is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision." + "\n\n"
|
|
|
|
HISTORY_LEN = 5
|
|
if chat_history:
|
|
for message in chat_history[-HISTORY_LEN:]:
|
|
if not message["is_own_message"]:
|
|
prompt += "User: " + message["message"] + "\n"
|
|
else:
|
|
prompt += "Julia: " + message["message"] + "\n"
|
|
|
|
prompt += "User: " + simple_prompt + "\n"
|
|
prompt += "Julia: "
|
|
|
|
return prompt
|
|
|