From 84bc0fac90361bfa7684b1bd6b2dedbf694043d7 Mon Sep 17 00:00:00 2001 From: Hendrik Langer Date: Thu, 13 Apr 2023 00:24:11 +0200 Subject: [PATCH] prompts and reply postprocessing --- matrix_pygmalion_bot/ai/koboldcpp.py | 35 +++++++++------- matrix_pygmalion_bot/ai/llama_helpers.py | 51 ++++++++++++++++++++---- matrix_pygmalion_bot/core.py | 6 +-- 3 files changed, 68 insertions(+), 24 deletions(-) diff --git a/matrix_pygmalion_bot/ai/koboldcpp.py b/matrix_pygmalion_bot/ai/koboldcpp.py index 1172819..0e849f2 100644 --- a/matrix_pygmalion_bot/ai/koboldcpp.py +++ b/matrix_pygmalion_bot/ai/koboldcpp.py @@ -59,6 +59,7 @@ async def generate_sync( TIMEOUT = 360 DELAY = 5 tokens = 0 + complete = False complete_reply = "" for i in range(TIMEOUT//DELAY): input_data["max_length"] = 16 # pseudo streaming @@ -75,27 +76,33 @@ async def generate_sync( complete_reply += partial_reply tokens += input_data["max_length"] await typing_fn() - if not partial_reply or partial_reply.find('<|endoftext|>') != -1 or partial_reply.find("\nYou:") != -1 or partial_reply.find("\n### Human:") != -1 or tokens >= max_new_tokens: - idx = complete_reply.find(f"\nYou:") - if idx == -1: - idx = complete_reply.find(f"\n### Human:") + if not partial_reply or tokens >= max_new_tokens: + complete = True + break + for t in [f"\nYou:", f"\n### Human:", f"\n{bot.user_name}:", '<|endoftext|>']: + idx = complete_reply.find(t) if idx != -1: complete_reply = complete_reply[:idx].strip() - else: - complete_reply = complete_reply.removesuffix('<|endoftext|>').strip() - complete_reply = complete_reply.replace(f"\n{bot.name}: ", " ") - complete_reply = complete_reply.replace(f"\n: ", " ") - complete_reply = complete_reply.replace(f"", f"{bot.name}") - complete_reply = complete_reply.replace(f"", f"You") - return complete_reply.strip() - else: - continue + complete = True + break + if complete: + break elif r.status_code == 503: #model busy await asyncio.sleep(DELAY) else: raise ValueError(f"") - raise ValueError(f" Timeout") + + if complete_reply: + complete_reply = complete_reply.removesuffix('<|endoftext|>') + complete_reply = complete_reply.replace(f"\n{bot.name}: ", " ") + complete_reply = complete_reply.replace(f"\n: ", " ") + complete_reply = complete_reply.replace(f"", f"{bot.name}") + complete_reply = complete_reply.replace(f"", f"You") + complete_reply = complete_reply.replace(f"### Assistant", f"{bot.name}") + return complete_reply.strip() + else: + raise ValueError(f" Timeout") async def generate_image(input_prompt: str, negative_prompt: str, api_url: str, api_key: str, typing_fn): diff --git a/matrix_pygmalion_bot/ai/llama_helpers.py b/matrix_pygmalion_bot/ai/llama_helpers.py index 93294cb..d799134 100644 --- a/matrix_pygmalion_bot/ai/llama_helpers.py +++ b/matrix_pygmalion_bot/ai/llama_helpers.py @@ -18,11 +18,11 @@ logger = logging.getLogger(__name__) async def get_full_prompt(simple_prompt: str, bot, chat_history): # https://github.com/ggerganov/llama.cpp/tree/master/examples -# prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n" - prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n" - #"BEGINNING OF CONVERSATION:" - prompt += "### Human: " + simple_prompt + "\n" - prompt += "### Assistant:" +## prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n" +# prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n" +# #"BEGINNING OF CONVERSATION:" +# prompt += "### Human: " + simple_prompt + "\n" +# prompt += "### Assistant:" prompt = f"This is a transcript of a 1000 page, never ending conversation between {bot.user_name} and the cute and helpful AI assistant {bot.name}. {bot.name} is a girl who is an AI running on the users computer.\n" prompt += f"{bot.name} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next.\n" @@ -30,15 +30,52 @@ async def get_full_prompt(simple_prompt: str, bot, chat_history): prompt += f"{bot.name} is a very helpful AI and will help the user with anything they need, she is also very friendly and will try to make the user feel better if they are sad.\n" prompt += f"{bot.name} is also very curious and will ask the user a lot of questions about themselves and their life, she will also try to make the user like her.\n" prompt += f"\n" + #prompt += f"{bot.user_name}: " + simple_prompt + "\n" + #prompt += f"{bot.name}:" + + MAX_TOKENS = 2048 + max_new_tokens = 200 + total_num_tokens = await num_tokens(prompt) + total_num_tokens += await num_tokens(f"{bot.user_name}: " + simple_prompt + "\n{bot.name}:") + visible_history = [] + current_message = True + for key, chat_item in reversed(chat_history.chat_history.items()): + if current_message: + current_message = False + continue + if chat_item.message["en"].startswith('!begin'): + break + if chat_item.message["en"].startswith('!'): + continue + if chat_item.message["en"].startswith(''): + continue + #if chat_item.message["en"] == bot.greeting: + # continue + if chat_item.num_tokens == None: + chat_item.num_tokens = await num_tokens("{}: {}".format(chat_item.user_name, chat_item.message["en"])) + # TODO: is it MAX_TOKENS or MAX_TOKENS - max_new_tokens?? + logger.debug(f"History: " + str(chat_item) + " [" + str(chat_item.num_tokens) + "]") + if total_num_tokens + chat_item.num_tokens < MAX_TOKENS - max_new_tokens: + visible_history.append(chat_item) + total_num_tokens += chat_item.num_tokens + else: + break + visible_history = reversed(visible_history) + + #prompt += bot.name + ": " + bot.greeting + "\n" + for chat_item in visible_history: + if chat_item.is_own_message: + prompt += bot.name + ": " + chat_item.message["en"] + "\n" + else: + prompt += f"{bot.user_name}: " + chat_item.message["en"] + "\n" prompt += f"{bot.user_name}: " + simple_prompt + "\n" prompt += f"{bot.name}:" - return prompt async def num_tokens(input_text: str): - return estimate_num_tokens(input_text) + return await estimate_num_tokens(input_text) async def estimate_num_tokens(input_text: str): diff --git a/matrix_pygmalion_bot/core.py b/matrix_pygmalion_bot/core.py index 1621cba..8c6f436 100644 --- a/matrix_pygmalion_bot/core.py +++ b/matrix_pygmalion_bot/core.py @@ -87,8 +87,8 @@ class Callbacks(object): print(event) await self.bot.send_message(self.client, room.room_id, "Hello World!") return - elif re.search("^!image(?P[0-9])?(\s(?P.*))?$", event.body): - m = re.search("^!image(?P[0-9])?(\s(?P.*))?$", event.body) + elif re.search("(?s)^!image(?P[0-9])?(\s(?P.*))?$", event.body): + m = re.search("(?s)^!image(?P[0-9])?(\s(?P.*))?$", event.body) if m['num']: num = int(m['num']) else: @@ -221,7 +221,7 @@ class Callbacks(object): full_prompt = await text_ai.get_full_prompt(chat_message.getTranslation("en"), self.bot, self.bot.chat_history.room(room.display_name)) num_tokens = await text_ai.num_tokens(full_prompt) logger.debug(full_prompt) - logger.debug(f"Prompt has " + str(num_tokens) + " tokens") + logger.info(f"Prompt has " + str(num_tokens) + " tokens") # answer = "" # time = 0 # error = None