From 06230a7e1e55606238817c9f684eeac91ee6cf9c Mon Sep 17 00:00:00 2001 From: Hendrik Langer Date: Sat, 6 May 2023 00:31:12 +0200 Subject: [PATCH] skip old messages, redo summary --- matrix_pygmalion_bot/bot/ai/langchain.py | 61 +++++++++++-------- .../bot/ai/langchain_memory.py | 5 ++ matrix_pygmalion_bot/bot/memory/chatlog.py | 1 + 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/matrix_pygmalion_bot/bot/ai/langchain.py b/matrix_pygmalion_bot/bot/ai/langchain.py index 9c0755b..cb9ec6d 100644 --- a/matrix_pygmalion_bot/bot/ai/langchain.py +++ b/matrix_pygmalion_bot/bot/ai/langchain.py @@ -89,8 +89,8 @@ class AI(object): self.rooms = {} from ..wrappers.langchain_koboldcpp import KoboldCpp - self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>']) - self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5002/api/latest/generate", stop=['<|endoftext|>'], max_tokens=512) + self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>'], verbose=True) + self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5002/api/latest/generate", stop=['<|endoftext|>'], max_tokens=512, verbose=True) self.llm_chat_model = "pygmalion-7b" self.llm_summary_model = "vicuna-13b" self.text_wrapper = text_wrapper @@ -108,9 +108,13 @@ class AI(object): moving_summary = self.bot.rooms[room_id]['moving_summary'] else: moving_summary = "No previous events." + if "last_message_ids_summarized" in self.bot.rooms[room_id]: + last_message_ids_summarized = self.bot.rooms[room_id]['last_message_ids_summarized'] + else: + last_message_ids_summarized = [] if not human_prefix: human_prefix = "Human" - memory = CustomMemory(memory_key="chat_history", input_key="input", human_prefix=human_prefix, ai_prefix=self.bot.name, llm=self.llm_summary, summary_prompt=prompt_progressive_summary, moving_summary_buffer=moving_summary, max_len=1200, min_len=200) + memory = CustomMemory(memory_key="chat_history", input_key="input", human_prefix=human_prefix, ai_prefix=self.bot.name, llm=self.llm_summary, summary_prompt=prompt_progressive_summary, moving_summary_buffer=moving_summary, max_len=1200, min_len=200, last_message_ids_summarized=last_message_ids_summarized) self.rooms[room_id]["memory"] = memory #memory.chat_memory.add_ai_message(self.bot.greeting) else: @@ -122,6 +126,9 @@ class AI(object): async def add_chat_message(self, message): room_id = message.additional_kwargs['room_id'] conversation_memory = self.get_memory(room_id) + if 'event_id' in message.additional_kwargs and message.additional_kwargs['event_id'] in conversation_memory.last_message_ids_summarized: + #don't add already summarized messages + return conversation_memory.chat_memory.messages.append(message) conversation_memory.chat_memory_day.messages.append(message) @@ -218,28 +225,33 @@ class AI(object): # the resulting template text to feed it into the instruct prompt's instruction # or do this with the prompt.partial() - prompt = prompt_chat.partial( - ai_name=self.bot.name, - persona=self.bot.persona, - scenario=self.bot.scenario, - human_name=chat_human_name, - ai_name_chat=chat_ai_name, - ) - if "summary" in prompt_chat.input_variables: - prompt = prompt.partial(summary=conversation_memory.moving_summary_buffer) - if "example_dialogue" in prompt_chat.input_variables: - prompt = prompt.partial( - example_dialogue=self.bot.example_dialogue.replace("{{user}}", chat_human_name) + for i in range(1): + prompt = prompt_chat.partial( + ai_name=self.bot.name, + persona=self.bot.persona, + scenario=self.bot.scenario, + human_name=chat_human_name, + ai_name_chat=chat_ai_name, ) - - tmp_prompt_text = prompt.format(chat_history=conversation_memory.buffer, input=message.content) - prompt_len = self.llm_chat.get_num_tokens(tmp_prompt_text) - - if prompt_len+256 > 2000: - logger.warning(f"Prompt too large. Estimated {prompt_len} tokens") - await reply_fn(f" Prompt too large. Estimated {prompt_len} tokens") - await conversation_memory.prune_memory(conversation_memory.min_len) - + if "summary" in prompt.input_variables: + prompt = prompt.partial(summary=conversation_memory.moving_summary_buffer) + if "example_dialogue" in prompt.input_variables: + prompt = prompt.partial( + example_dialogue=self.bot.example_dialogue.replace("{{user}}", chat_human_name) + ) + + tmp_prompt_text = prompt.format(chat_history=conversation_memory.buffer, input=message.content) + prompt_len = self.llm_chat.get_num_tokens(tmp_prompt_text) + + if prompt_len+256 > 2000: + logger.warning(f"Prompt too large. Estimated {prompt_len} tokens") + await reply_fn(f" Prompt too large. Estimated {prompt_len} tokens") + if i == 0: + await conversation_memory.prune_memory(conversation_memory.min_len) + elif i == 1: + conversation_memory.moving_summary_buffer = await self.summarize(conversation_memory.moving_summary_buffer) + else: + break #roleplay_chain = RoleplayChain(llm_chain=chain, character_name=self.bot.name, persona=self.bot.persona, scenario=self.bot.scenario, ai_name_chat=chat_ai_name, human_name_chat=chat_human_name) @@ -288,6 +300,7 @@ class AI(object): new_summary_len = self.llm_chat.get_num_tokens(conversation_memory.moving_summary_buffer) logger.info(f"Refined summary from {summary_len} tokens to {new_summary_len} tokens ({new_summary_len-summary_len} tokens)") self.bot.rooms[room_id]['moving_summary'] = conversation_memory.moving_summary_buffer + self.bot.rooms[room_id]['last_message_ids_summarized'] = conversation_memory.last_message_ids_summarized return output diff --git a/matrix_pygmalion_bot/bot/ai/langchain_memory.py b/matrix_pygmalion_bot/bot/ai/langchain_memory.py index 525a2e7..c016784 100644 --- a/matrix_pygmalion_bot/bot/ai/langchain_memory.py +++ b/matrix_pygmalion_bot/bot/ai/langchain_memory.py @@ -52,6 +52,7 @@ class CustomMemory(BaseMemory): #length_function: Callable[[str], int] = self.llm.get_num_tokens_from_messages, moving_summary_buffer: str = "" + last_message_ids_summarized = [] llm: BaseLanguageModel summary_prompt: BasePromptTemplate = SUMMARY_PROMPT @@ -102,6 +103,10 @@ class CustomMemory(BaseMemory): pruned_memory.append(buffer.pop(0)) curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) self.moving_summary_buffer = await self.apredict_new_summary(pruned_memory, self.moving_summary_buffer) + for m in pruned_memory: + if "event_id" in m.additional_kwargs: + self.last_message_ids_summarized.append(m.additional_kwargs['event_id']) + self.last_message_ids_summarized = self.last_message_ids_summarized[-100 :] async def asave_context(self, input_msg: BaseMessage, output_msg: BaseMessage) -> None: """Save context from this conversation to buffer.""" diff --git a/matrix_pygmalion_bot/bot/memory/chatlog.py b/matrix_pygmalion_bot/bot/memory/chatlog.py index 1aa295b..d9ac7a2 100644 --- a/matrix_pygmalion_bot/bot/memory/chatlog.py +++ b/matrix_pygmalion_bot/bot/memory/chatlog.py @@ -15,6 +15,7 @@ class ChatLog(object): if not message.room_id in self.chat_history: self.chat_history[message.room_id] = {} self.chat_history[message.room_id][message.event_id] = message + self.chat_history[message.room_id] = dict(list(self.chat_history[message.room_id].items())[-100 :]) if hasattr(self, 'directory') and is_new: keepcharacters = (' ','.','_','-')