# https://github.com/nsarrazin/serge/blob/main/api/utils/generate.py import subprocess, os import asyncio import logging logger = logging.getLogger(__name__) async def generate( prompt: str, ): CHUNK_SIZE = 4 args = ( "/home/hendrik/Projects/AI/alpaca.cpp/chat", "--model", "/home/hendrik/Projects/AI/alpaca.cpp/" + "ggml-alpaca-7b-q4.bin", "--prompt", prompt, "--n_predict", str(256), "--temp", str(0.1), "--top_k", str(50), "--top_p", str(0.95), "--repeat_last_n", str(64), "--repeat_penalty", str(1.3), "--ctx_size", str(512), "--threads", str(4) ) logger.debug(f"Calling LLaMa with arguments", args) print(prompt) procLlama = await asyncio.create_subprocess_exec( *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) while True: chunk = await procLlama.stdout.read(CHUNK_SIZE) if not chunk: return_code = await procLlama.wait() if return_code != 0: error_output = await procLlama.stderr.read() logger.error(error_output.decode("utf-8")) raise ValueError(f"RETURN CODE {return_code}\n\n"+error_output.decode("utf-8")) else: return try: chunk = chunk.decode("utf-8") except UnicodeDecodeError: return yield chunk async def get_full_prompt(simple_prompt: str, chat_history=None): prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." + "\n\n" HISTORY_LEN = 5 if chat_history: for message in chat_history[-HISTORY_LEN:]: if not message["is_own_message"]: prompt += "### Instruction:\n" + message["message"] + "\n" else: prompt += "### Response:\n" + message["message"] + "\n" prompt += "### Instruction:\n" + simple_prompt + "\n" prompt += "### Response:\n" return prompt async def get_full_prompt_with_input(simple_prompt: str, additional_input: str, chat_history=None): prompt_with_input = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request." + "\n\n" HISTORY_LEN = 5 if chat_history: for message in chat_history[-HISTORY_LEN:]: if not message["is_own_message"]: prompt += "### Instruction:\n" + message["message"] + "\n" else: prompt += "### Response:\n" + message["message"] + "\n" prompt += "### Instruction:\n" + simple_prompt + "\n" prompt += "### Input:\n" + additional_input + "\n" prompt += "### Response:\n" return prompt async def get_full_prompt_chat_style(simple_prompt: str, chat_history=None): prompt = "Transcript of a dialog, where the User interacts with an Assistant named Julia. Julia is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision." + "\n\n" HISTORY_LEN = 5 if chat_history: for message in chat_history[-HISTORY_LEN:]: if not message["is_own_message"]: prompt += "User: " + message["message"] + "\n" else: prompt += "Julia: " + message["message"] + "\n" prompt += "User: " + simple_prompt + "\n" prompt += "Julia: " return prompt