matrix-pygmalion-bot/matrix_pygmalion_bot/ai/llama_cpp.py

# https://github.com/nsarrazin/serge/blob/main/api/utils/generate.py

import subprocess, os
import asyncio
import logging

logger = logging.getLogger(__name__)


async def generate(
    prompt: str,
):
    CHUNK_SIZE = 4

    args = (
        "/home/hendrik/Projects/AI/alpaca.cpp/chat",
        "--model",
        "/home/hendrik/Projects/AI/alpaca.cpp/" + "ggml-alpaca-7b-q4.bin",
        "--prompt",
        prompt,
        "--n_predict",
        str(256),
        "--temp",
        str(0.1),
        "--top_k",
        str(50),
        "--top_p",
        str(0.95),
        "--repeat_last_n",
        str(64),
        "--repeat_penalty",
        str(1.3),
        "--ctx_size",
        str(512),
        "--threads",
        str(4)
    )

    logger.debug(f"Calling LLaMa with arguments", args)
    print(prompt)
    procLlama = await asyncio.create_subprocess_exec(
        *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
    
    while True:
        chunk = await procLlama.stdout.read(CHUNK_SIZE)

        if not chunk:
            return_code = await procLlama.wait()

            if return_code != 0:
                error_output = await procLlama.stderr.read()
                logger.error(error_output.decode("utf-8"))
                raise ValueError(f"RETURN CODE {return_code}\n\n"+error_output.decode("utf-8"))
            else:
                return

        try:
            chunk = chunk.decode("utf-8")
        except UnicodeDecodeError:
            return

        yield chunk


async def get_full_prompt(simple_prompt: str, chat_history=None):

    prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." + "\n\n"

    HISTORY_LEN = 5
    if chat_history:
        for message in chat_history[-HISTORY_LEN:]:
            if not message["is_own_message"]:
                prompt += "### Instruction:\n" + message["message"] + "\n"
            else:
                prompt += "### Response:\n" + message["message"] + "\n"

    prompt += "### Instruction:\n" + simple_prompt + "\n"
    prompt += "### Response:\n"

    return prompt


async def get_full_prompt_with_input(simple_prompt: str, additional_input: str, chat_history=None):

    prompt_with_input = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request." + "\n\n"

    HISTORY_LEN = 5
    if chat_history:
        for message in chat_history[-HISTORY_LEN:]:
            if not message["is_own_message"]:
                prompt += "### Instruction:\n" + message["message"] + "\n"
            else:
                prompt += "### Response:\n" + message["message"] + "\n"

    prompt += "### Instruction:\n" + simple_prompt + "\n"
    prompt += "### Input:\n" + additional_input + "\n"
    prompt += "### Response:\n"

    return prompt


async def get_full_prompt_chat_style(simple_prompt: str, chat_history=None):

    prompt = "Transcript of a dialog, where the User interacts with an Assistant named Julia. Julia is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision." + "\n\n"

    HISTORY_LEN = 5
    if chat_history:
        for message in chat_history[-HISTORY_LEN:]:
            if not message["is_own_message"]:
                prompt += "User: " + message["message"] + "\n"
            else:
                prompt += "Julia: " + message["message"] + "\n"

    prompt += "User: " + simple_prompt + "\n"
    prompt += "Julia: "

    return prompt
working single login 2 years ago			`# https://github.com/nsarrazin/serge/blob/main/api/utils/generate.py`

			`import subprocess, os`
			`import asyncio`
			`import logging`

			`logger = logging.getLogger(__name__)`


			`async def generate(`
			`prompt: str,`
			`):`
			`CHUNK_SIZE = 4`

			`args = (`
			`"/home/hendrik/Projects/AI/alpaca.cpp/chat",`
			`"--model",`
			`"/home/hendrik/Projects/AI/alpaca.cpp/" + "ggml-alpaca-7b-q4.bin",`
			`"--prompt",`
			`prompt,`
			`"--n_predict",`
			`str(256),`
			`"--temp",`
			`str(0.1),`
			`"--top_k",`
			`str(50),`
			`"--top_p",`
			`str(0.95),`
			`"--repeat_last_n",`
			`str(64),`
			`"--repeat_penalty",`
			`str(1.3),`
			`"--ctx_size",`
			`str(512),`
			`"--threads",`
			`str(4)`
			`)`

			`logger.debug(f"Calling LLaMa with arguments", args)`
			`print(prompt)`
			`procLlama = await asyncio.create_subprocess_exec(`
			`*args, stdout=subprocess.PIPE, stderr=subprocess.PIPE`
			`)`

			`while True:`
			`chunk = await procLlama.stdout.read(CHUNK_SIZE)`

			`if not chunk:`
			`return_code = await procLlama.wait()`

			`if return_code != 0:`
			`error_output = await procLlama.stderr.read()`
			`logger.error(error_output.decode("utf-8"))`
			`raise ValueError(f"RETURN CODE {return_code}\n\n"+error_output.decode("utf-8"))`
			`else:`
			`return`

			`try:`
			`chunk = chunk.decode("utf-8")`
			`except UnicodeDecodeError:`
			`return`

			`yield chunk`


			`async def get_full_prompt(simple_prompt: str, chat_history=None):`

			`prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." + "\n\n"`

			`HISTORY_LEN = 5`
			`if chat_history:`
			`for message in chat_history[-HISTORY_LEN:]:`
			`if not message["is_own_message"]:`
			`prompt += "### Instruction:\n" + message["message"] + "\n"`
			`else:`
			`prompt += "### Response:\n" + message["message"] + "\n"`

			`prompt += "### Instruction:\n" + simple_prompt + "\n"`
			`prompt += "### Response:\n"`

			`return prompt`


			`async def get_full_prompt_with_input(simple_prompt: str, additional_input: str, chat_history=None):`

			`prompt_with_input = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request." + "\n\n"`

			`HISTORY_LEN = 5`
			`if chat_history:`
			`for message in chat_history[-HISTORY_LEN:]:`
			`if not message["is_own_message"]:`
			`prompt += "### Instruction:\n" + message["message"] + "\n"`
			`else:`
			`prompt += "### Response:\n" + message["message"] + "\n"`

			`prompt += "### Instruction:\n" + simple_prompt + "\n"`
			`prompt += "### Input:\n" + additional_input + "\n"`
			`prompt += "### Response:\n"`

			`return prompt`


			`async def get_full_prompt_chat_style(simple_prompt: str, chat_history=None):`

			`prompt = "Transcript of a dialog, where the User interacts with an Assistant named Julia. Julia is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision." + "\n\n"`

			`HISTORY_LEN = 5`
			`if chat_history:`
			`for message in chat_history[-HISTORY_LEN:]:`
			`if not message["is_own_message"]:`
			`prompt += "User: " + message["message"] + "\n"`
			`else:`
			`prompt += "Julia: " + message["message"] + "\n"`

			`prompt += "User: " + simple_prompt + "\n"`
			`prompt += "Julia: "`

			`return prompt`