matrix-pygmalion-bot/matrix_pygmalion_bot/bot/ai/langchain.py

import asyncio
import time
from .prompts import *
from .langchain_memory import BotConversationSummerBufferWindowMemory

from langchain import PromptTemplate
from langchain import LLMChain, ConversationChain
from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory, CombinedMemory, ConversationSummaryMemory

from langchain.chains.base import Chain
from typing import Dict, List

from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma

import humanize
import datetime as dt

import logging

logger = logging.getLogger(__name__)


class RoleplayChain(Chain):
    llm_chain: LLMChain

    character_name: str
    persona: str
    scenario: str
    ai_name_chat: str
    human_name_chat: str

    output_key: str = "output_text"  #: :meta private:

    @property
    def input_keys(self) -> List[str]:
        return ["character_name", "persona", "scenario", "ai_name_chat", "human_name_chat", "llm_chain"]

    @property
    def output_keys(self) -> List[str]:
        return [self.output_key]

    def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
        other_keys = {k: v for k, v in inputs.items() if k not in self.input_keys}
        result = self.llm_chain.predict(**other_keys)
        return {self.output_key: result}
    

class AI(object):

    def __init__(self, bot, text_wrapper, image_wrapper, memory_path: str):
        self.name = bot.name
        self.bot = bot
        self.memory_path = memory_path
        self.rooms = {}

        from ..wrappers.langchain_koboldcpp import KoboldCpp
        self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>'])
        self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>'])
        self.text_wrapper = text_wrapper
        self.image_wrapper = image_wrapper

        #self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)

    def get_memory(self, message):
        if not message.room_id in self.rooms:
            self.rooms[message.room_id] = {}
            memory = ConversationBufferMemory(memory_key="chat_history", input_key="input", human_prefix=message.user_name, ai_prefix=self.bot.name)
            self.rooms[message.room_id]["memory"] = memory
            self.rooms[message.room_id]["summary"] = "No previous events."
            memory.chat_memory.add_ai_message(self.bot.greeting)
            #memory.save_context({"input": None, "output": self.bot.greeting})
            memory.load_memory_variables({})
        else:
            memory = self.rooms[message.room_id]["memory"]
        #print(f"memory: {memory.load_memory_variables({})}")
        #print(f"memory has an estimated {self.llm_chat.get_num_tokens(memory.buffer)} number of tokens")
        return memory


    async def generate(self, message, reply_fn, typing_fn):

        embeddings = SentenceTransformerEmbeddings()
        #embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")

        loader = TextLoader('./germany.txt')
        documents = loader.load()

        text_splitter = RecursiveCharacterTextSplitter(
            # Set a really small chunk size, just to show.
            chunk_size = 600,
            chunk_overlap  = 100,
            length_function = len,
        )

        docs = text_splitter.split_documents(documents)

        db = Chroma(persist_directory=os.path.join(self.memory_path, f'chroma-db'), embedding_function=embeddings)

        print(f"Indexing {len(docs)} documents")
        texts = [doc.page_content for doc in docs]
        metadatas = [doc.metadata for doc in docs]
        #db.add_texts(texts=texts, metadatas=metadatas, ids=None)
        #db.persist()

        query = "How is climate in Germany?"
        output_docs = db.similarity_search_with_score(query)
        print(query)
        print('###')
        for doc, score in output_docs:
            print("-" * 80)
            print("Score: ", score)
            print(doc.page_content)
            print("-" * 80)

        prompt_template = "{input}"
        chain = LLMChain(
            llm=self.llm_chat,
            prompt=PromptTemplate.from_template(prompt_template),
        )
        output = await chain.arun(message.message)
        return output.strip()


    async def generate_roleplay(self, message, reply_fn, typing_fn):

        chat_ai_name = self.bot.name
        chat_human_name = message.user_name
        if False: # model is vicuna
            chat_ai_name = "### Assistant"
            chat_human_name = "### Human"

        conversation_memory = self.get_memory(message)
        readonlymemory = ReadOnlySharedMemory(memory=conversation_memory)
        summary_memory = ConversationSummaryMemory(llm=self.llm_summary, memory_key="summary", input_key="input")
        #combined_memory = CombinedMemory(memories=[conversation_memory, summary_memory])

        k = 5 #5
        max_k = 12 #10
        if len(conversation_memory.chat_memory.messages) > max_k*2:

            async def make_progressive_summary(previous_summary, chat_history_text_string):
                await asyncio.sleep(0) # yield for matrix-nio
                #self.rooms[message.room_id]["summary"] = summary_memory.predict_new_summary(conversation_memory.chat_memory.messages, previous_summary).strip()
                summary_chain = LLMChain(llm=self.llm_summary, prompt=prompt_progressive_summary)
                self.rooms[message.room_id]["summary"] = await summary_chain.apredict(summary=previous_summary, chat_history=chat_history_text_string)
                # ToDo: maybe add an add_task_done callback and don't access the variable directly from here?
                logger.info(f"New summary is: \"{self.rooms[message.room_id]['summary']}\"")
                conversation_memory.chat_memory.messages = conversation_memory.chat_memory.messages[-k * 2 :]
                conversation_memory.load_memory_variables({})
                #summary = summarize(conversation_memory.buffer)
                #print(summary)
                #return summary

            logger.info("memory progressive summary scheduled...")
            await self.bot.schedule(self.bot.queue, make_progressive_summary, self.rooms[message.room_id]["summary"], conversation_memory.buffer) #.add_done_callback(


        #t = dt.datetime.fromtimestamp(message.timestamp)
        #when = humanize.naturaltime(t)
        #print(when)


        # ToDo: either use prompt.format() to fill out the pygmalion prompt and use
        # the resulting template text to feed it into the instruct prompt's instruction
        # or do this with the prompt.partial()

        prompt = prompt_vicuna.partial(
            ai_name=self.bot.name,
            persona=self.bot.persona,
            scenario=self.bot.scenario,
            summary=self.rooms[message.room_id]["summary"],
            human_name=message.user_name,
            #example_dialogue=replace_all(self.bot.example_dialogue, {"{{user}}": chat_human_name, "{{char}}": chat_ai_name})
            ai_name_chat=chat_ai_name,
        )

        chain = ConversationChain(
            llm=self.llm_chat,
            prompt=prompt,
            verbose=True,
            memory=readonlymemory,
            #stop=['<|endoftext|>', '\nYou:', f"\n{message.user_name}:"],
        )
        
#        output = llm_chain(inputs={"ai_name": self.bot.name, "persona": self.bot.persona, "scenario": self.bot.scenario, "human_name": message.user_name, "ai_name_chat": self.bot.name, "chat_history": "", "input": message.message})['results'][0]['text']

        #roleplay_chain = RoleplayChain(llm_chain=chain, character_name=self.bot.name, persona=self.bot.persona, scenario=self.bot.scenario, ai_name_chat=chat_ai_name, human_name_chat=chat_human_name)

        stop = ['<|endoftext|>', f"\n{chat_human_name}"]
        #print(f"Message is: \"{message.message}\"")
        output = await chain.arun({"input":message.message, "stop": stop})
        output = output.replace("<BOT>", self.bot.name).replace("<USER>", message.user_name)
        output = output.replace("### Assistant", self.bot.name)
        output = output.replace(f"\n{self.bot.name}: ", " ")
        output = output.strip()

        if "*activates the neural uplink*" in output:
            pass # call agent

        conversation_memory.chat_memory.add_user_message(message.message)
        conversation_memory.chat_memory.add_ai_message(output)
        conversation_memory.load_memory_variables({})

        return output.strip()


    async def summarize(self, text):
            summary_chain = LLMChain(llm=llm_summary, prompt=prompt_summary, verbose=True)
            return await summary_chain.arun(text=text)
            #ToDo: We can summarize the whole dialogue here, let half of it in the buffer but skip doing a summary until this is flushed, too?

    async def prime_llm(self, text):
        self.llm_chat(text, max_tokens=1)


def replace_all(text, dic):
    for i, j in dic.items():
        text = text.replace(i, j)
    return text
complete rewrite 2 years ago			`import asyncio`
			`import time`
			`from .prompts import *`
			`from .langchain_memory import BotConversationSummerBufferWindowMemory`

			`from langchain import PromptTemplate`
further rewrite 2 years ago			`from langchain import LLMChain, ConversationChain`
bot summary memory 2 years ago			`from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory, CombinedMemory, ConversationSummaryMemory`
complete rewrite 2 years ago
further rewrite 2 years ago			`from langchain.chains.base import Chain`
			`from typing import Dict, List`

			`from langchain.document_loaders import TextLoader`
			`from langchain.text_splitter import RecursiveCharacterTextSplitter`
			`from langchain.embeddings import SentenceTransformerEmbeddings`
			`from langchain.vectorstores import Chroma`
complete rewrite 2 years ago
bot summary memory 2 years ago			`import humanize`
			`import datetime as dt`

complete rewrite 2 years ago			`import logging`

			`logger = logging.getLogger(__name__)`


further rewrite 2 years ago			`class RoleplayChain(Chain):`
			`llm_chain: LLMChain`

			`character_name: str`
			`persona: str`
			`scenario: str`
			`ai_name_chat: str`
			`human_name_chat: str`

			`output_key: str = "output_text" #: :meta private:`

			`@property`
			`def input_keys(self) -> List[str]:`
			`return ["character_name", "persona", "scenario", "ai_name_chat", "human_name_chat", "llm_chain"]`

			`@property`
			`def output_keys(self) -> List[str]:`
			`return [self.output_key]`

			`def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:`
			`other_keys = {k: v for k, v in inputs.items() if k not in self.input_keys}`
			`result = self.llm_chain.predict(**other_keys)`
			`return {self.output_key: result}`



complete rewrite 2 years ago			`class AI(object):`

further rewrite 2 years ago			`def __init__(self, bot, text_wrapper, image_wrapper, memory_path: str):`
complete rewrite 2 years ago			`self.name = bot.name`
			`self.bot = bot`
further rewrite 2 years ago			`self.memory_path = memory_path`
langchain test 2 years ago			`self.rooms = {}`
complete rewrite 2 years ago
			`from ..wrappers.langchain_koboldcpp import KoboldCpp`
			`self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<\|endoftext\|>'])`
			`self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<\|endoftext\|>'])`
			`self.text_wrapper = text_wrapper`
			`self.image_wrapper = image_wrapper`

further rewrite 2 years ago			`#self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)`

langchain test 2 years ago			`def get_memory(self, message):`
			`if not message.room_id in self.rooms:`
			`self.rooms[message.room_id] = {}`
bot summary memory 2 years ago			`memory = ConversationBufferMemory(memory_key="chat_history", input_key="input", human_prefix=message.user_name, ai_prefix=self.bot.name)`
langchain test 2 years ago			`self.rooms[message.room_id]["memory"] = memory`
bot summary memory 2 years ago			`self.rooms[message.room_id]["summary"] = "No previous events."`
langchain test 2 years ago			`memory.chat_memory.add_ai_message(self.bot.greeting)`
			`#memory.save_context({"input": None, "output": self.bot.greeting})`
			`memory.load_memory_variables({})`
			`else:`
			`memory = self.rooms[message.room_id]["memory"]`
bot summary memory 2 years ago			`#print(f"memory: {memory.load_memory_variables({})}")`
			`#print(f"memory has an estimated {self.llm_chat.get_num_tokens(memory.buffer)} number of tokens")`
langchain test 2 years ago			`return memory`
further rewrite 2 years ago

			`async def generate(self, message, reply_fn, typing_fn):`
complete rewrite 2 years ago
further rewrite 2 years ago			`embeddings = SentenceTransformerEmbeddings()`
			`#embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")`

			`loader = TextLoader('./germany.txt')`
			`documents = loader.load()`

			`text_splitter = RecursiveCharacterTextSplitter(`
			`# Set a really small chunk size, just to show.`
			`chunk_size = 600,`
			`chunk_overlap = 100,`
			`length_function = len,`
			`)`
complete rewrite 2 years ago
further rewrite 2 years ago			`docs = text_splitter.split_documents(documents)`
complete rewrite 2 years ago
config file handling 2 years ago			`db = Chroma(persist_directory=os.path.join(self.memory_path, f'chroma-db'), embedding_function=embeddings)`
further rewrite 2 years ago
			`print(f"Indexing {len(docs)} documents")`
			`texts = [doc.page_content for doc in docs]`
			`metadatas = [doc.metadata for doc in docs]`
			`#db.add_texts(texts=texts, metadatas=metadatas, ids=None)`
			`#db.persist()`

			`query = "How is climate in Germany?"`
			`output_docs = db.similarity_search_with_score(query)`
			`print(query)`
			`print('###')`
			`for doc, score in output_docs:`
			`print("-" * 80)`
			`print("Score: ", score)`
			`print(doc.page_content)`
			`print("-" * 80)`
complete rewrite 2 years ago
			`prompt_template = "{input}"`
			`chain = LLMChain(`
			`llm=self.llm_chat,`
			`prompt=PromptTemplate.from_template(prompt_template),`
			`)`
langchain async 2 years ago			`output = await chain.arun(message.message)`
complete rewrite 2 years ago			`return output.strip()`

bot summary memory 2 years ago
complete rewrite 2 years ago			`async def generate_roleplay(self, message, reply_fn, typing_fn):`
further rewrite 2 years ago
bot summary memory 2 years ago			`chat_ai_name = self.bot.name`
			`chat_human_name = message.user_name`
			`if False: # model is vicuna`
			`chat_ai_name = "### Assistant"`
			`chat_human_name = "### Human"`

			`conversation_memory = self.get_memory(message)`
			`readonlymemory = ReadOnlySharedMemory(memory=conversation_memory)`
			`summary_memory = ConversationSummaryMemory(llm=self.llm_summary, memory_key="summary", input_key="input")`
			`#combined_memory = CombinedMemory(memories=[conversation_memory, summary_memory])`

			`k = 5 #5`
			`max_k = 12 #10`
			`if len(conversation_memory.chat_memory.messages) > max_k*2:`

			`async def make_progressive_summary(previous_summary, chat_history_text_string):`
summary 2 years ago			`await asyncio.sleep(0) # yield for matrix-nio`
bot summary memory 2 years ago			`#self.rooms[message.room_id]["summary"] = summary_memory.predict_new_summary(conversation_memory.chat_memory.messages, previous_summary).strip()`
			`summary_chain = LLMChain(llm=self.llm_summary, prompt=prompt_progressive_summary)`
			`self.rooms[message.room_id]["summary"] = await summary_chain.apredict(summary=previous_summary, chat_history=chat_history_text_string)`
			`# ToDo: maybe add an add_task_done callback and don't access the variable directly from here?`
			`logger.info(f"New summary is: \"{self.rooms[message.room_id]['summary']}\"")`
			`conversation_memory.chat_memory.messages = conversation_memory.chat_memory.messages[-k * 2 :]`
			`conversation_memory.load_memory_variables({})`
			`#summary = summarize(conversation_memory.buffer)`
			`#print(summary)`
			`#return summary`

			`logger.info("memory progressive summary scheduled...")`
summary 2 years ago			`await self.bot.schedule(self.bot.queue, make_progressive_summary, self.rooms[message.room_id]["summary"], conversation_memory.buffer) #.add_done_callback(`
bot summary memory 2 years ago


			`#t = dt.datetime.fromtimestamp(message.timestamp)`
			`#when = humanize.naturaltime(t)`
			`#print(when)`


			`# ToDo: either use prompt.format() to fill out the pygmalion prompt and use`
			`# the resulting template text to feed it into the instruct prompt's instruction`
			`# or do this with the prompt.partial()`
further rewrite 2 years ago
			`prompt = prompt_vicuna.partial(`
			`ai_name=self.bot.name,`
			`persona=self.bot.persona,`
			`scenario=self.bot.scenario,`
bot summary memory 2 years ago			`summary=self.rooms[message.room_id]["summary"],`
further rewrite 2 years ago			`human_name=message.user_name,`
bot summary memory 2 years ago			`#example_dialogue=replace_all(self.bot.example_dialogue, {"{{user}}": chat_human_name, "{{char}}": chat_ai_name})`
			`ai_name_chat=chat_ai_name,`
complete rewrite 2 years ago			`)`
further rewrite 2 years ago
			`chain = ConversationChain(`
complete rewrite 2 years ago			`llm=self.llm_chat,`
further rewrite 2 years ago			`prompt=prompt,`
complete rewrite 2 years ago			`verbose=True,`
config file handling 2 years ago			`memory=readonlymemory,`
complete rewrite 2 years ago			`#stop=['<\|endoftext\|>', '\nYou:', f"\n{message.user_name}:"],`
			`)`
further rewrite 2 years ago
			`# output = llm_chain(inputs={"ai_name": self.bot.name, "persona": self.bot.persona, "scenario": self.bot.scenario, "human_name": message.user_name, "ai_name_chat": self.bot.name, "chat_history": "", "input": message.message})['results'][0]['text']`

bot summary memory 2 years ago			`#roleplay_chain = RoleplayChain(llm_chain=chain, character_name=self.bot.name, persona=self.bot.persona, scenario=self.bot.scenario, ai_name_chat=chat_ai_name, human_name_chat=chat_human_name)`
further rewrite 2 years ago
summary 2 years ago			`stop = ['<\|endoftext\|>', f"\n{chat_human_name}"]`
bot summary memory 2 years ago			`#print(f"Message is: \"{message.message}\"")`
langchain async 2 years ago			`output = await chain.arun({"input":message.message, "stop": stop})`
bot summary memory 2 years ago			`output = output.replace("<BOT>", self.bot.name).replace("<USER>", message.user_name)`
			`output = output.replace("### Assistant", self.bot.name)`
summary 2 years ago			`output = output.replace(f"\n{self.bot.name}: ", " ")`
config file handling 2 years ago			`output = output.strip()`
bot summary memory 2 years ago
			`if "activates the neural uplink" in output:`
			`pass # call agent`

			`conversation_memory.chat_memory.add_user_message(message.message)`
			`conversation_memory.chat_memory.add_ai_message(output)`
			`conversation_memory.load_memory_variables({})`
langchain test 2 years ago
complete rewrite 2 years ago			`return output.strip()`
further rewrite 2 years ago

bot summary memory 2 years ago			`async def summarize(self, text):`
			`summary_chain = LLMChain(llm=llm_summary, prompt=prompt_summary, verbose=True)`
			`return await summary_chain.arun(text=text)`
			`#ToDo: We can summarize the whole dialogue here, let half of it in the buffer but skip doing a summary until this is flushed, too?`

			`async def prime_llm(self, text):`
			`self.llm_chat(text, max_tokens=1)`


langchain test 2 years ago			`def replace_all(text, dic):`
			`for i, j in dic.items():`
			`text = text.replace(i, j)`
			`return text`