matrix-pygmalion-bot/matrix_pygmalion_bot/bot/ai/langchain.py


								import asyncio

								import os, time

								from .prompts import *

								from .langchain_memory import CustomMemory, ChangeNamesMemory # BotConversationSummaryBufferWindowMemory, TestMemory

								from ..utilities.messages import Message


								from langchain import PromptTemplate

								from langchain import LLMChain, ConversationChain

								from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory, CombinedMemory, ConversationSummaryMemory


								from langchain.chains.base import Chain

								from typing import Dict, List, Union


								from langchain.document_loaders import TextLoader

								from langchain.text_splitter import RecursiveCharacterTextSplitter

								from langchain.embeddings import SentenceTransformerEmbeddings

								from langchain.vectorstores import Chroma


								from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser, ZeroShotAgent

								from langchain.schema import AgentAction, AgentFinish

								from langchain.schema import AIMessage, HumanMessage, SystemMessage, ChatMessage

								from langchain.utilities import OpenWeatherMapAPIWrapper, SearxSearchWrapper, PythonREPL

								from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper


								import humanize

								from datetime import datetime, timedelta


								import logging


								logger = logging.getLogger(__name__)


								class RoleplayChain(Chain):

								    llm_chain: LLMChain


								    character_name: str

								    persona: str

								    scenario: str

								    ai_name_chat: str

								    human_name_chat: str


								    output_key: str = "output_text"  #: :meta private:


								    @property

								    def input_keys(self) -> List[str]:

								        return ["character_name", "persona", "scenario", "ai_name_chat", "human_name_chat", "llm_chain"]


								    @property

								    def output_keys(self) -> List[str]:

								        return [self.output_key]


								    def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:

								        other_keys = {k: v for k, v in inputs.items() if k not in self.input_keys}

								        result = self.llm_chain.predict(**other_keys)

								        return {self.output_key: result}


								class CustomOutputParser(AgentOutputParser):


								    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:

								        # Check if agent should finish

								        if "Final Answer:" in llm_output:

								            return AgentFinish(

								                # Return values is generally always a dictionary with a single `output` key

								                # It is not recommended to try anything else at the moment :)

								                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},

								                log=llm_output,

								            )

								        # Parse out the action and action input

								        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"

								        match = re.search(regex, llm_output, re.DOTALL)

								        if not match:

								            regex = r"Action\s*\d*\s*:(.*?)[\s]*[\"\'](.*)[\"\']"

								            match = re.search(regex, llm_output, re.DOTALL)

								            if not match:

								                raise ValueError(f"Could not parse LLM output: `{llm_output}`")

								        action = match.group(1).strip()

								        action_input = match.group(2)

								        # Return the action and action input

								        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)


								class AI(object):


								    def __init__(self, bot, text_wrapper, image_wrapper, memory_path: str):

								        self.name = bot.name

								        self.bot = bot

								        self.memory_path = memory_path

								        self.rooms = {}


								        from ..wrappers.langchain_koboldcpp import KoboldCpp

								        self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>'])

								        self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5002/api/latest/generate", stop=['<|endoftext|>'], max_tokens=512)

								        self.llm_chat_model = "pygmalion-7b"

								        self.llm_summary_model = "vicuna-13b"

								        self.text_wrapper = text_wrapper

								        self.image_wrapper = image_wrapper

								        self.embeddings = SentenceTransformerEmbeddings()

								        #embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")

								        self.db = Chroma(persist_directory=os.path.join(self.memory_path, f'chroma-db'), embedding_function=self.embeddings)


								        #self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)


								    def get_memory(self, room_id, human_prefix=None):

								        if not room_id in self.rooms:

								            self.rooms[room_id] = {}

								            if "moving_summary" in self.bot.rooms[room_id]:

								                moving_summary = self.bot.rooms[room_id]['moving_summary']

								            else:

								                moving_summary = "No previous events."

								            if not human_prefix:

								                human_prefix = "Human"

								            memory = CustomMemory(memory_key="chat_history", input_key="input", human_prefix=human_prefix, ai_prefix=self.bot.name, llm=self.llm_summary, summary_prompt=prompt_progressive_summary, moving_summary_buffer=moving_summary, max_len=1200, min_len=200)

								            self.rooms[room_id]["memory"] = memory

								            #memory.chat_memory.add_ai_message(self.bot.greeting)

								        else:

								            memory = self.rooms[room_id]["memory"]

								            if human_prefix:

								                memory.human_prefix = human_prefix

								        return memory


								    async def add_chat_message(self, message):

								        room_id = message.additional_kwargs['room_id']

								        conversation_memory = self.get_memory(room_id)

								        conversation_memory.chat_memory.messages.append(message)

								        conversation_memory.chat_memory_day.messages.append(message)


								    async def clear(self, room_id):

								        conversation_memory = self.get_memory(room_id)

								        conversation_memory.clear()


								    async def ingest_textfile(self, filename, category):

								        loader = TextLoader(filename)

								        documents = loader.load()

								        documents[0].metadata['indexed'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

								        documents[0].metadata['category'] = category


								        text_splitter = RecursiveCharacterTextSplitter(

								            # Set a really small chunk size, just to show.

								            chunk_size = 1024,

								            chunk_overlap  = 80,

								            length_function = len,

								            #length_function = self.llm_chat.get_num_tokens,  # The Embeddings are generated with SsentenceTransformers, not this model

								        )


								        docs = text_splitter.split_documents(documents)


								        for i in range(len(docs)):

								            docs[i].metadata['part'] = f"{i}/{len(docs)}"


								        print(f"Indexing {len(docs)} documents")

								        texts = [doc.page_content for doc in docs]

								        metadatas = [doc.metadata for doc in docs]

								        self.db.add_texts(texts=texts, metadatas=metadatas, ids=None)

								        self.db.persist()


								    async def search_vectordb(self, query, category):

								        #query = "How is climate in Germany?"

								        #retreiver = db.as_retreiver()

								        #docs = retreiver.get_relevant_documents(query)

								        if category:

								            #https://github.com/chroma-core/chroma/blob/main/examples/where_filtering.ipynb

								            output_docs = self.db.similarity_search_with_score(query, filter={"category": category})

								        else:

								            output_docs = self.db.similarity_search_with_score(query)

								        print(query)

								        print('###')

								        for doc, score in output_docs:

								            print("-" * 80)

								            print("Score: ", score)

								            #print(doc.page_content)

								            print(doc)

								            print("-" * 80)


								    async def generate(self, message, reply_fn, typing_fn):


								        prompt_template = "{input}"

								        chain = LLMChain(

								            llm=self.llm_chat,

								            prompt=PromptTemplate.from_template(prompt_template),

								        )

								        output = await chain.arun(message.content)

								        return output.strip()


								    async def generate_roleplay(self, message, reply_fn, typing_fn):


								        chat_ai_name = self.bot.name

								        chat_human_name = message.additional_kwargs['user_name']

								        room_id = message.additional_kwargs['room_id']


								        if self.llm_chat_model.startswith('vicuna'):

								            prompt_chat = prompt_vicuna

								            chat_ai_name = "### Assistant"

								            chat_human_name = "### Human"

								        elif self.llm_chat_model.startswith('pygmalion'):

								            prompt_chat = prompt_pygmalion

								            chat_human_name = "You"

								        elif self.llm_chat_model.startswith('koboldai'):

								            prompt_chat = prompt_koboldai

								        else:

								            prompt_chat = prompt_alpaca


								        conversation_memory = self.get_memory(room_id, chat_human_name)

								        readonlymemory = ReadOnlySharedMemory(memory=conversation_memory)

								        custom_memory = ChangeNamesMemory(memory=conversation_memory, replace_ai_chat_names={self.bot.name: chat_ai_name}, replace_human_chat_names={message.additional_kwargs['user_name']: chat_human_name})

								        #summary_memory = ConversationSummaryMemory(llm=self.llm_summary, memory_key="summary", input_key="input")

								        #combined_memory = CombinedMemory(memories=[conversation_memory, summary_memory])


								        #await self.bot.schedule(self.bot.queue, make_progressive_summary, self.rooms[room_id]["summary"], conversation_memory.buffer) #.add_done_callback(


								        #t = datetime.fromtimestamp(message.additional_kwargs['timestamp'])

								        #when = humanize.naturaltime(t)

								        #print(when)


								        # ToDo: either use prompt.format() to fill out the pygmalion prompt and use

								        # the resulting template text to feed it into the instruct prompt's instruction

								        # or do this with the prompt.partial()


								        prompt = prompt_chat.partial(

								            ai_name=self.bot.name,

								            persona=self.bot.persona,

								            scenario=self.bot.scenario,

								            human_name=chat_human_name,

								            ai_name_chat=chat_ai_name,

								        )

								        if "summary" in prompt_chat.input_variables:

								            prompt = prompt.partial(summary=conversation_memory.moving_summary_buffer)

								        if "example_dialogue" in prompt_chat.input_variables:

								            prompt = prompt.partial(

								                example_dialogue=self.bot.example_dialogue.replace("{{user}}", chat_human_name)

								            )


								        tmp_prompt_text = prompt.format(chat_history=conversation_memory.buffer, input=message.content)

								        prompt_len = self.llm_chat.get_num_tokens(tmp_prompt_text)


								        if prompt_len+256 > 2000:

								            logger.warning(f"Prompt too large. Estimated {prompt_len} tokens")

								            await reply_fn(f"<WARNING> Prompt too large. Estimated {prompt_len} tokens")

								            await conversation_memory.prune_memory(conversation_memory.min_len)


								        #roleplay_chain = RoleplayChain(llm_chain=chain, character_name=self.bot.name, persona=self.bot.persona, scenario=self.bot.scenario, ai_name_chat=chat_ai_name, human_name_chat=chat_human_name)


								        chain = ConversationChain(

								            llm=self.llm_chat,

								            prompt=prompt,

								            verbose=True,

								            memory=custom_memory,

								            #stop=['<|endoftext|>', '\nYou:', f"\n{chat_human_name}:"],

								        )


								#        output = llm_chain(inputs={"ai_name": self.bot.name, "persona": self.bot.persona, "scenario": self.bot.scenario, "human_name": chat_human_name, "ai_name_chat": self.bot.name, "chat_history": "", "input": message.content})['results'][0]['text']


								        stop = ['<|endoftext|>', f"\n{chat_human_name}"]

								        #print(f"Message is: \"{message.content}\"")

								        await asyncio.sleep(0)

								        output = await chain.arun({"input":message.content, "stop": stop})

								        output = output.replace("<BOT>", self.bot.name).replace("<USER>", chat_human_name)

								        output = output.replace("### Assistant", self.bot.name)

								        output = output.replace(f"\n{self.bot.name}: ", " ")

								        output = output.strip()


								        if "*activates the neural uplink*" in output.casefold():

								            pass # call agent


								        own_message_resp = await reply_fn(output)


								        output_message = AIMessage(

								            content=output,

								            additional_kwargs={

								                "timestamp": datetime.now().timestamp(),

								                "user_name": self.bot.name,

								                "event_id": own_message_resp.event_id,

								                "user_id": self.bot.connection.user_id,

								                "room_name": message.additional_kwargs['room_name'],

								                "room_id": own_message_resp.room_id,

								            }

								        )


								        await conversation_memory.asave_context(message, output_message)

								        summary_len = self.llm_chat.get_num_tokens(conversation_memory.moving_summary_buffer)

								        if summary_len > 400:

								            logger.warning("Summary is getting too long. Refining...")

								            conversation_memory.moving_summary_buffer = await self.summarize(conversation_memory.moving_summary_buffer)

								            new_summary_len = self.llm_chat.get_num_tokens(conversation_memory.moving_summary_buffer)

								            logger.info(f"Refined summary from {summary_len} tokens to {new_summary_len} tokens ({new_summary_len-summary_len} tokens)")

								        self.bot.rooms[room_id]['moving_summary'] = conversation_memory.moving_summary_buffer


								        return output


								    async def summarize(self, text):

								        await asyncio.sleep(0) # yield for matrix-nio

								        summary_chain = LLMChain(llm=self.llm_summary, prompt=prompt_summary, verbose=True)

								        return await summary_chain.arun(text=text)

								        #ToDo: We can summarize the whole dialogue here, let half of it in the buffer but skip doing a summary until this is flushed, too?

								        #ToDo: max_tokens and stop


								    async def diary(self, room_id):

								        await asyncio.sleep(0) # yield for matrix-nio

								        diary_chain = LLMChain(llm=self.llm_summary, prompt=prompt_outline, verbose=True)

								        conversation_memory = self.get_memory(room_id)


								        if self.llm_summary.get_num_tokens(conversation_memory.buffer_day) < 1600:

								            input_text = conversation_memory.buffer_day

								        else:

								            input_text = conversation_memory.moving_summary_buffer


								        return await diary_chain.apredict(text=input_text, ai_name=self.bot.name)


								    async def agent(self):


								        os.environ["OPENWEATHERMAP_API_KEY"] = "82452fdb0d1e0e805ac096db87914342"

								        # Tools

								        search = DuckDuckGoSearchAPIWrapper()

								        weather = OpenWeatherMapAPIWrapper()

								        search2 = SearxSearchWrapper(searx_host="https://search.mdosch.de")

								        python_repl = PythonREPL()


								        tools = [

								            Tool(

								                name = "Search",

								                func=search.run,

								                description="useful for when you need to answer questions about current events"

								            ),

								            Tool(

								                name = "Searx Search",

								                func=search.run,

								                description="useful for when you need to answer questions about current events"

								            ),

								            Tool(

								                name = "Weather",

								                func=weather.run,

								                description="Useful for fetching current weather information for a specified location. Input should be a location string (e.g. 'London,GB')."

								            ),

								            Tool(

								                name = "Summary",

								                func=summry_chain.run,

								                description="useful for when you summarize a conversation. The input to this tool should be a string, representing who will read this summary."

								            )

								        ]


								        prompt = ZeroShotAgent.create_prompt(

								            tools=tools,

								            prefix=prefix,

								            suffix=suffix,

								            input_variables=["input", "chat_history", "agent_scratchpad"]

								        )


								        output_parser = CustomOutputParser()


								        # LLM chain consisting of the LLM and a prompt

								        llm_chain = LLMChain(llm=llm, prompt=prompt_agent)


								        agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)

								        #agent = initialize_agent(tools, llm, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, return_intermediate_steps=True, memory=memory)


								        #tool_names = [tool.name for tool in tools]

								        #agent = LLMSingleActionAgent(

								        #    llm_chain=llm_chain,

								        #    output_parser=output_parser,

								        #    stop=["\nObservation:"],

								        #    allowed_tools=tool_names,

								        #    verbose=True,

								        #)


								        agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)


								        await agent_executor.arun(input="How many people live in canada as of 2023?")


								    async def sleep(self):

								        logger.info(f"{self.bot.name} sleeping now... running background tasks...")

								        # Write Date into chat history

								        for room_id in self.rooms.keys():

								            #fake_message = Message(datetime.now().timestamp(), self.bot.name, "", event_id=None, user_id=None, room_name=None, room_id=room_id)

								            conversation_memory = self.get_memory(room_id)

								            message = SystemMessage(

								                content=f"~~~~ {datetime.now().strftime('%A, %B %d, %Y')} ~~~~",

								                additional_kwargs={

								                    "timestamp": datetime.now().timestamp(),

								                    "user_name": None,

								                    "event_id": None,

								                    "user_id": None,

								                    "room_name": None,

								                    "room_id": room_id,

								                }

								            )

								            if conversation_memory.chat_memory.messages[-1].content.startswith('~~~~ '):

								                conversation_memory.chat_memory.messages.pop()

								            conversation_memory.chat_memory.messages.append(message)

								            #conversation_memory.chat_memory.add_system_message(message)


								        # Summarize the last day and save a diary entry

								        yesterday = ( datetime.now() - timedelta(days=1) ).strftime('%Y-%m-%d')

								        for room_id in self.rooms.keys():

								            if len(conversation_memory.chat_memory_day.messages) > 0:

								                if not "diary" in self.bot.rooms[room_id]:

								                    self.bot.rooms[room_id]['diary'] = {}

								                self.bot.rooms[room_id]["diary"][yesterday] = await self.diary(room_id)

								        # Calculate new goals for the character

								        # Update stats

								        # Let background tasks run

								        conversation_memory.chat_memory_day.clear()

								        await conversation_memory.prune_memory(conversation_memory.min_len)

								        await self.bot.write_conf2(self.bot.rooms)

								        logger.info(f"{self.bot.name} done sleeping and ready for the next day...")


								    async def prime_llm(self, text):

								        self.llm_chat(text, max_tokens=1)


								def replace_all(text, dic):

								    #example_dialogue=replace_all(self.bot.example_dialogue, {"{{user}}": chat_human_name, "{{char}}": chat_ai_name})

								    for i, j in dic.items():

								        text = text.replace(i, j)

								    return text