matrix-pygmalion-bot/matrix_pygmalion_bot/ai/llama_helpers.py

import asyncio
import os, tempfile
import logging

import json
import requests

from transformers import AutoTokenizer, AutoConfig
from huggingface_hub import hf_hub_download

import io
import base64
from PIL import Image, PngImagePlugin

logger = logging.getLogger(__name__)


async def get_full_prompt(simple_prompt: str, bot, chat_history):

    # https://github.com/ggerganov/llama.cpp/tree/master/examples
##    prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n"
#    prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n"
#    #"BEGINNING OF CONVERSATION:"
#    prompt += "### Human: " + simple_prompt + "\n"
#    prompt += "### Assistant:"

    prompt = f"This is a transcript of a 1000 page, never ending conversation between {bot.user_name} and the cute and helpful AI assistant {bot.name}. {bot.name} is a girl who is an AI running on the users computer.\n"
    prompt += f"{bot.name} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next.\n"
    prompt += f"{bot.name} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help.\n"
    prompt += f"{bot.name} is a very helpful AI and will help the user with anything they need, she is also very friendly and will try to make the user feel better if they are sad.\n"
    prompt += f"{bot.name} is also very curious and will ask the user a lot of questions about themselves and their life, she will also try to make the user like her.\n"
    prompt += f"\n"
    #prompt += f"{bot.user_name}: " + simple_prompt + "\n"
    #prompt += f"{bot.name}:"

    MAX_TOKENS = 2048
    max_new_tokens = 200
    total_num_tokens = await num_tokens(prompt)
    total_num_tokens += await num_tokens(f"{bot.user_name}: " + simple_prompt + "\n{bot.name}:")
    visible_history = []
    current_message = True
    for key, chat_item in reversed(chat_history.chat_history.items()):
        if current_message:
            current_message = False
            continue
        if chat_item.message["en"].startswith('!begin'):
            break
        if chat_item.message["en"].startswith('!'):
            continue
        if chat_item.message["en"].startswith('<ERROR>'):
            continue
        #if chat_item.message["en"] == bot.greeting:
        #    continue
        if chat_item.num_tokens == None:
            chat_item.num_tokens = await num_tokens("{}: {}".format(chat_item.user_name, chat_item.message["en"]))
        # TODO: is it MAX_TOKENS or MAX_TOKENS - max_new_tokens??
        logger.debug(f"History: " + str(chat_item) + " [" + str(chat_item.num_tokens) + "]")
        if total_num_tokens + chat_item.num_tokens < MAX_TOKENS - max_new_tokens:
            visible_history.append(chat_item)
            total_num_tokens += chat_item.num_tokens
        else:
            break
    visible_history = reversed(visible_history)

    #prompt += bot.name + ": " + bot.greeting + "\n"
    for chat_item in visible_history:
        if chat_item.is_own_message:
            prompt += bot.name + ": " + chat_item.message["en"] + "\n"
        else:
            prompt += f"{bot.user_name}: " + chat_item.message["en"] + "\n"
    prompt += f"{bot.user_name}: " + simple_prompt + "\n"
    prompt += f"{bot.name}:"

    return prompt


async def num_tokens(input_text: str):
    return await estimate_num_tokens(input_text)


async def estimate_num_tokens(input_text: str):
    return len(input_text)//4+1
llama style prompts 2 years ago			`import asyncio`
			`import os, tempfile`
			`import logging`

			`import json`
			`import requests`

			`from transformers import AutoTokenizer, AutoConfig`
			`from huggingface_hub import hf_hub_download`

			`import io`
			`import base64`
			`from PIL import Image, PngImagePlugin`

			`logger = logging.getLogger(__name__)`


			`async def get_full_prompt(simple_prompt: str, bot, chat_history):`

			`# https://github.com/ggerganov/llama.cpp/tree/master/examples`
prompts and reply postprocessing 2 years ago			`## prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n"`
			`# prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n"`
			`# #"BEGINNING OF CONVERSATION:"`
			`# prompt += "### Human: " + simple_prompt + "\n"`
			`# prompt += "### Assistant:"`
llama style prompts 2 years ago
			`prompt = f"This is a transcript of a 1000 page, never ending conversation between {bot.user_name} and the cute and helpful AI assistant {bot.name}. {bot.name} is a girl who is an AI running on the users computer.\n"`
			`prompt += f"{bot.name} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next.\n"`
			`prompt += f"{bot.name} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help.\n"`
			`prompt += f"{bot.name} is a very helpful AI and will help the user with anything they need, she is also very friendly and will try to make the user feel better if they are sad.\n"`
			`prompt += f"{bot.name} is also very curious and will ask the user a lot of questions about themselves and their life, she will also try to make the user like her.\n"`
			`prompt += f"\n"`
prompts and reply postprocessing 2 years ago			`#prompt += f"{bot.user_name}: " + simple_prompt + "\n"`
			`#prompt += f"{bot.name}:"`

			`MAX_TOKENS = 2048`
			`max_new_tokens = 200`
			`total_num_tokens = await num_tokens(prompt)`
			`total_num_tokens += await num_tokens(f"{bot.user_name}: " + simple_prompt + "\n{bot.name}:")`
			`visible_history = []`
			`current_message = True`
			`for key, chat_item in reversed(chat_history.chat_history.items()):`
			`if current_message:`
			`current_message = False`
			`continue`
			`if chat_item.message["en"].startswith('!begin'):`
			`break`
			`if chat_item.message["en"].startswith('!'):`
			`continue`
			`if chat_item.message["en"].startswith('<ERROR>'):`
			`continue`
			`#if chat_item.message["en"] == bot.greeting:`
			`# continue`
			`if chat_item.num_tokens == None:`
			`chat_item.num_tokens = await num_tokens("{}: {}".format(chat_item.user_name, chat_item.message["en"]))`
			`# TODO: is it MAX_TOKENS or MAX_TOKENS - max_new_tokens??`
			`logger.debug(f"History: " + str(chat_item) + " [" + str(chat_item.num_tokens) + "]")`
			`if total_num_tokens + chat_item.num_tokens < MAX_TOKENS - max_new_tokens:`
			`visible_history.append(chat_item)`
			`total_num_tokens += chat_item.num_tokens`
			`else:`
			`break`
			`visible_history = reversed(visible_history)`

			`#prompt += bot.name + ": " + bot.greeting + "\n"`
			`for chat_item in visible_history:`
			`if chat_item.is_own_message:`
			`prompt += bot.name + ": " + chat_item.message["en"] + "\n"`
			`else:`
			`prompt += f"{bot.user_name}: " + chat_item.message["en"] + "\n"`
llama style prompts 2 years ago			`prompt += f"{bot.user_name}: " + simple_prompt + "\n"`
			`prompt += f"{bot.name}:"`

			`return prompt`


			`async def num_tokens(input_text: str):`
prompts and reply postprocessing 2 years ago			`return await estimate_num_tokens(input_text)`
llama style prompts 2 years ago

			`async def estimate_num_tokens(input_text: str):`
			`return len(input_text)//4+1`