import asyncio import time from .prompts import * from .langchain_memory import BotConversationSummerBufferWindowMemory from langchain import PromptTemplate from langchain import LLMChain, ConversationChain from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory, CombinedMemory, ConversationSummaryMemory from langchain.chains.base import Chain from typing import Dict, List from langchain.document_loaders import TextLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import SentenceTransformerEmbeddings from langchain.vectorstores import Chroma import humanize import datetime as dt import logging logger = logging.getLogger(__name__) class RoleplayChain(Chain): llm_chain: LLMChain character_name: str persona: str scenario: str ai_name_chat: str human_name_chat: str output_key: str = "output_text" #: :meta private: @property def input_keys(self) -> List[str]: return ["character_name", "persona", "scenario", "ai_name_chat", "human_name_chat", "llm_chain"] @property def output_keys(self) -> List[str]: return [self.output_key] def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: other_keys = {k: v for k, v in inputs.items() if k not in self.input_keys} result = self.llm_chain.predict(**other_keys) return {self.output_key: result} class AI(object): def __init__(self, bot, text_wrapper, image_wrapper, memory_path: str): self.name = bot.name self.bot = bot self.memory_path = memory_path self.rooms = {} from ..wrappers.langchain_koboldcpp import KoboldCpp self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>']) self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>']) self.text_wrapper = text_wrapper self.image_wrapper = image_wrapper #self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200) def get_memory(self, message): if not message.room_id in self.rooms: self.rooms[message.room_id] = {} memory = ConversationBufferMemory(memory_key="chat_history", input_key="input", human_prefix=message.user_name, ai_prefix=self.bot.name) self.rooms[message.room_id]["memory"] = memory self.rooms[message.room_id]["summary"] = "No previous events." memory.chat_memory.add_ai_message(self.bot.greeting) #memory.save_context({"input": None, "output": self.bot.greeting}) memory.load_memory_variables({}) else: memory = self.rooms[message.room_id]["memory"] #print(f"memory: {memory.load_memory_variables({})}") #print(f"memory has an estimated {self.llm_chat.get_num_tokens(memory.buffer)} number of tokens") return memory async def generate(self, message, reply_fn, typing_fn): embeddings = SentenceTransformerEmbeddings() #embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2") loader = TextLoader('./germany.txt') documents = loader.load() text_splitter = RecursiveCharacterTextSplitter( # Set a really small chunk size, just to show. chunk_size = 600, chunk_overlap = 100, length_function = len, ) docs = text_splitter.split_documents(documents) db = Chroma(persist_directory=os.path.join(self.memory_path, f'chroma-db'), embedding_function=embeddings) print(f"Indexing {len(docs)} documents") texts = [doc.page_content for doc in docs] metadatas = [doc.metadata for doc in docs] #db.add_texts(texts=texts, metadatas=metadatas, ids=None) #db.persist() query = "How is climate in Germany?" output_docs = db.similarity_search_with_score(query) print(query) print('###') for doc, score in output_docs: print("-" * 80) print("Score: ", score) print(doc.page_content) print("-" * 80) prompt_template = "{input}" chain = LLMChain( llm=self.llm_chat, prompt=PromptTemplate.from_template(prompt_template), ) output = await chain.arun(message.message) return output.strip() async def generate_roleplay(self, message, reply_fn, typing_fn): chat_ai_name = self.bot.name chat_human_name = message.user_name if False: # model is vicuna chat_ai_name = "### Assistant" chat_human_name = "### Human" conversation_memory = self.get_memory(message) readonlymemory = ReadOnlySharedMemory(memory=conversation_memory) summary_memory = ConversationSummaryMemory(llm=self.llm_summary, memory_key="summary", input_key="input") #combined_memory = CombinedMemory(memories=[conversation_memory, summary_memory]) k = 5 #5 max_k = 12 #10 if len(conversation_memory.chat_memory.messages) > max_k*2: async def make_progressive_summary(previous_summary, chat_history_text_string): await asyncio.sleep(0) # yield for matrix-nio #self.rooms[message.room_id]["summary"] = summary_memory.predict_new_summary(conversation_memory.chat_memory.messages, previous_summary).strip() summary_chain = LLMChain(llm=self.llm_summary, prompt=prompt_progressive_summary) self.rooms[message.room_id]["summary"] = await summary_chain.apredict(summary=previous_summary, chat_history=chat_history_text_string) # ToDo: maybe add an add_task_done callback and don't access the variable directly from here? logger.info(f"New summary is: \"{self.rooms[message.room_id]['summary']}\"") conversation_memory.chat_memory.messages = conversation_memory.chat_memory.messages[-k * 2 :] conversation_memory.load_memory_variables({}) #summary = summarize(conversation_memory.buffer) #print(summary) #return summary logger.info("memory progressive summary scheduled...") await self.bot.schedule(self.bot.queue, make_progressive_summary, self.rooms[message.room_id]["summary"], conversation_memory.buffer) #.add_done_callback( #t = dt.datetime.fromtimestamp(message.timestamp) #when = humanize.naturaltime(t) #print(when) # ToDo: either use prompt.format() to fill out the pygmalion prompt and use # the resulting template text to feed it into the instruct prompt's instruction # or do this with the prompt.partial() prompt = prompt_vicuna.partial( ai_name=self.bot.name, persona=self.bot.persona, scenario=self.bot.scenario, summary=self.rooms[message.room_id]["summary"], human_name=message.user_name, #example_dialogue=replace_all(self.bot.example_dialogue, {"{{user}}": chat_human_name, "{{char}}": chat_ai_name}) ai_name_chat=chat_ai_name, ) chain = ConversationChain( llm=self.llm_chat, prompt=prompt, verbose=True, memory=readonlymemory, #stop=['<|endoftext|>', '\nYou:', f"\n{message.user_name}:"], ) # output = llm_chain(inputs={"ai_name": self.bot.name, "persona": self.bot.persona, "scenario": self.bot.scenario, "human_name": message.user_name, "ai_name_chat": self.bot.name, "chat_history": "", "input": message.message})['results'][0]['text'] #roleplay_chain = RoleplayChain(llm_chain=chain, character_name=self.bot.name, persona=self.bot.persona, scenario=self.bot.scenario, ai_name_chat=chat_ai_name, human_name_chat=chat_human_name) stop = ['<|endoftext|>', f"\n{chat_human_name}"] #print(f"Message is: \"{message.message}\"") output = await chain.arun({"input":message.message, "stop": stop}) output = output.replace("", self.bot.name).replace("", message.user_name) output = output.replace("### Assistant", self.bot.name) output = output.replace(f"\n{self.bot.name}: ", " ") output = output.strip() if "*activates the neural uplink*" in output: pass # call agent conversation_memory.chat_memory.add_user_message(message.message) conversation_memory.chat_memory.add_ai_message(output) conversation_memory.load_memory_variables({}) return output.strip() async def summarize(self, text): summary_chain = LLMChain(llm=llm_summary, prompt=prompt_summary, verbose=True) return await summary_chain.arun(text=text) #ToDo: We can summarize the whole dialogue here, let half of it in the buffer but skip doing a summary until this is flushed, too? async def prime_llm(self, text): self.llm_chat(text, max_tokens=1) def replace_all(text, dic): for i, j in dic.items(): text = text.replace(i, j) return text