matrix-pygmalion-bot/matrix_pygmalion_bot/ai/llama_cpp.py


								# https://github.com/nsarrazin/serge/blob/main/api/utils/generate.py


								import subprocess, os

								import asyncio

								import logging


								logger = logging.getLogger(__name__)


								async def generate(

								    prompt: str,

								):

								    CHUNK_SIZE = 4


								    args = (

								        "/home/hendrik/Projects/AI/alpaca.cpp/chat",

								        "--model",

								        "/home/hendrik/Projects/AI/alpaca.cpp/" + "ggml-alpaca-7b-q4.bin",

								        "--prompt",

								        prompt,

								        "--n_predict",

								        str(256),

								        "--temp",

								        str(0.1),

								        "--top_k",

								        str(50),

								        "--top_p",

								        str(0.95),

								        "--repeat_last_n",

								        str(64),

								        "--repeat_penalty",

								        str(1.3),

								        "--ctx_size",

								        str(512),

								        "--threads",

								        str(4)

								    )


								    logger.debug(f"Calling LLaMa with arguments", args)

								    print(prompt)

								    procLlama = await asyncio.create_subprocess_exec(

								        *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE

								    )


								    while True:

								        chunk = await procLlama.stdout.read(CHUNK_SIZE)


								        if not chunk:

								            return_code = await procLlama.wait()


								            if return_code != 0:

								                error_output = await procLlama.stderr.read()

								                logger.error(error_output.decode("utf-8"))

								                raise ValueError(f"RETURN CODE {return_code}\n\n"+error_output.decode("utf-8"))

								            else:

								                return


								        try:

								            chunk = chunk.decode("utf-8")

								        except UnicodeDecodeError:

								            return


								        yield chunk


								async def get_full_prompt(simple_prompt: str, chat_history=None):


								    prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." + "\n\n"


								    HISTORY_LEN = 5

								    if chat_history:

								        for message in chat_history[-HISTORY_LEN:]:

								            if not message["is_own_message"]:

								                prompt += "### Instruction:\n" + message["message"] + "\n"

								            else:

								                prompt += "### Response:\n" + message["message"] + "\n"


								    prompt += "### Instruction:\n" + simple_prompt + "\n"

								    prompt += "### Response:\n"


								    return prompt


								async def get_full_prompt_with_input(simple_prompt: str, additional_input: str, chat_history=None):


								    prompt_with_input = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request." + "\n\n"


								    HISTORY_LEN = 5

								    if chat_history:

								        for message in chat_history[-HISTORY_LEN:]:

								            if not message["is_own_message"]:

								                prompt += "### Instruction:\n" + message["message"] + "\n"

								            else:

								                prompt += "### Response:\n" + message["message"] + "\n"


								    prompt += "### Instruction:\n" + simple_prompt + "\n"

								    prompt += "### Input:\n" + additional_input + "\n"

								    prompt += "### Response:\n"


								    return prompt


								async def get_full_prompt_chat_style(simple_prompt: str, chat_history=None):


								    prompt = "Transcript of a dialog, where the User interacts with an Assistant named Julia. Julia is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision." + "\n\n"


								    HISTORY_LEN = 5

								    if chat_history:

								        for message in chat_history[-HISTORY_LEN:]:

								            if not message["is_own_message"]:

								                prompt += "User: " + message["message"] + "\n"

								            else:

								                prompt += "Julia: " + message["message"] + "\n"


								    prompt += "User: " + simple_prompt + "\n"

								    prompt += "Julia: "


								    return prompt