# https://github.com/nsarrazin/serge/blob/main/api/utils/generate.py

import subprocess, os
import asyncio
import logging

logger = logging.getLogger(__name__)


async def generate(
    prompt: str,
):
    CHUNK_SIZE = 4

    args = (
        "/home/hendrik/Projects/AI/alpaca.cpp/chat",
        "--model",
        "/home/hendrik/Projects/AI/alpaca.cpp/" + "ggml-alpaca-7b-q4.bin",
        "--prompt",
        prompt,
        "--n_predict",
        str(256),
        "--temp",
        str(0.1),
        "--top_k",
        str(50),
        "--top_p",
        str(0.95),
        "--repeat_last_n",
        str(64),
        "--repeat_penalty",
        str(1.3),
        "--ctx_size",
        str(512),
        "--threads",
        str(4)
    )

    logger.debug(f"Calling LLaMa with arguments", args)
    print(prompt)
    procLlama = await asyncio.create_subprocess_exec(
        *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
    
    while True:
        chunk = await procLlama.stdout.read(CHUNK_SIZE)

        if not chunk:
            return_code = await procLlama.wait()

            if return_code != 0:
                error_output = await procLlama.stderr.read()
                logger.error(error_output.decode("utf-8"))
                raise ValueError(f"RETURN CODE {return_code}\n\n"+error_output.decode("utf-8"))
            else:
                return

        try:
            chunk = chunk.decode("utf-8")
        except UnicodeDecodeError:
            return

        yield chunk


async def get_full_prompt(simple_prompt: str, chat_history=None):

    prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." + "\n\n"

    HISTORY_LEN = 5
    if chat_history:
        for message in chat_history[-HISTORY_LEN:]:
            if not message["is_own_message"]:
                prompt += "### Instruction:\n" + message["message"] + "\n"
            else:
                prompt += "### Response:\n" + message["message"] + "\n"

    prompt += "### Instruction:\n" + simple_prompt + "\n"
    prompt += "### Response:\n"

    return prompt


async def get_full_prompt_with_input(simple_prompt: str, additional_input: str, chat_history=None):

    prompt_with_input = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request." + "\n\n"

    HISTORY_LEN = 5
    if chat_history:
        for message in chat_history[-HISTORY_LEN:]:
            if not message["is_own_message"]:
                prompt += "### Instruction:\n" + message["message"] + "\n"
            else:
                prompt += "### Response:\n" + message["message"] + "\n"

    prompt += "### Instruction:\n" + simple_prompt + "\n"
    prompt += "### Input:\n" + additional_input + "\n"
    prompt += "### Response:\n"

    return prompt


async def get_full_prompt_chat_style(simple_prompt: str, chat_history=None):

    prompt = "Transcript of a dialog, where the User interacts with an Assistant named Julia. Julia is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision." + "\n\n"

    HISTORY_LEN = 5
    if chat_history:
        for message in chat_history[-HISTORY_LEN:]:
            if not message["is_own_message"]:
                prompt += "User: " + message["message"] + "\n"
            else:
                prompt += "Julia: " + message["message"] + "\n"

    prompt += "User: " + simple_prompt + "\n"
    prompt += "Julia: "

    return prompt