@ -1,22 +1,29 @@
import asyncio
import time
import os , time
from . prompts import *
from . langchain_memory import BotConversationSummerBufferWindowMemory
#from .langchain_memory import BotConversationSummaryBufferWindowMemory, TestMemory
from . . utilities . messages import Message
from langchain import PromptTemplate
from langchain import LLMChain , ConversationChain
from langchain . memory import ConversationBufferMemory , ReadOnlySharedMemory , CombinedMemory , ConversationSummaryMemory
from langchain . chains . base import Chain
from typing import Dict , List
from typing import Dict , List , Union
from langchain . document_loaders import TextLoader
from langchain . text_splitter import RecursiveCharacterTextSplitter
from langchain . embeddings import SentenceTransformerEmbeddings
from langchain . vectorstores import Chroma
from langchain . agents import Tool , AgentExecutor , LLMSingleActionAgent , AgentOutputParser , ZeroShotAgent
from langchain . schema import AgentAction , AgentFinish
from langchain . schema import AIMessage , HumanMessage , SystemMessage , ChatMessage
from langchain . utilities import OpenWeatherMapAPIWrapper , SearxSearchWrapper , PythonREPL
from langchain . utilities . duckduckgo_search import DuckDuckGoSearchAPIWrapper
import humanize
import datetime as dt
from datetime import datetime , timedelta
import logging
@ -46,7 +53,31 @@ class RoleplayChain(Chain):
other_keys = { k : v for k , v in inputs . items ( ) if k not in self . input_keys }
result = self . llm_chain . predict ( * * other_keys )
return { self . output_key : result }
class CustomOutputParser ( AgentOutputParser ) :
def parse ( self , llm_output : str ) - > Union [ AgentAction , AgentFinish ] :
# Check if agent should finish
if " Final Answer: " in llm_output :
return AgentFinish (
# Return values is generally always a dictionary with a single `output` key
# It is not recommended to try anything else at the moment :)
return_values = { " output " : llm_output . split ( " Final Answer: " ) [ - 1 ] . strip ( ) } ,
log = llm_output ,
)
# Parse out the action and action input
regex = r " Action \ s* \ d* \ s*:(.*?) \ nAction \ s* \ d* \ s*Input \ s* \ d* \ s*:[ \ s]*(.*) "
match = re . search ( regex , llm_output , re . DOTALL )
if not match :
regex = r " Action \ s* \ d* \ s*:(.*?)[ \ s]*[ \" \ ' ](.*)[ \" \ ' ] "
match = re . search ( regex , llm_output , re . DOTALL )
if not match :
raise ValueError ( f " Could not parse LLM output: ` { llm_output } ` " )
action = match . group ( 1 ) . strip ( )
action_input = match . group ( 2 )
# Return the action and action input
return AgentAction ( tool = action , tool_input = action_input . strip ( " " ) . strip ( ' " ' ) , log = llm_output )
class AI ( object ) :
@ -59,63 +90,87 @@ class AI(object):
from . . wrappers . langchain_koboldcpp import KoboldCpp
self . llm_chat = KoboldCpp ( temperature = self . bot . temperature , endpoint_url = " http://172.16.85.10:5001/api/latest/generate " , stop = [ ' <|endoftext|> ' ] )
self . llm_summary = KoboldCpp ( temperature = 0.2 , endpoint_url = " http://172.16.85.10:5001/api/latest/generate " , stop = [ ' <|endoftext|> ' ] )
self . llm_summary = KoboldCpp ( temperature = 0.2 , endpoint_url = " http://172.16.85.10:5001/api/latest/generate " , stop = [ ' <|endoftext|> ' ] , max_tokens = 512 )
self . text_wrapper = text_wrapper
self . image_wrapper = image_wrapper
self . embeddings = SentenceTransformerEmbeddings ( )
#embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")
self . db = Chroma ( persist_directory = os . path . join ( self . memory_path , f ' chroma-db ' ) , embedding_function = self . embeddings )
#self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)
def get_memory ( self , message ) :
if not message . room_id in self . rooms :
self . rooms [ message . room_id ] = { }
memory = ConversationBufferMemory ( memory_key = " chat_history " , input_key = " input " , human_prefix = message . user_name , ai_prefix = self . bot . name )
self . rooms [ message . room_id ] [ " memory " ] = memory
self . rooms [ message . room_id ] [ " summary " ] = " No previous events. "
def get_memory ( self , room_id , human_prefix = " Human " ) :
if not room_id in self . rooms :
self . rooms [ room_id ] = { }
memory = ConversationBufferMemory ( memory_key = " chat_history " , input_key = " input " , human_prefix = human_prefix , ai_prefix = self . bot . name )
self . rooms [ room_id ] [ " memory " ] = memory
self . rooms [ room_id ] [ " summary " ] = " No previous events. "
memory . chat_memory . add_ai_message ( self . bot . greeting )
#memory.save_context({"input": None, "output": self.bot.greeting})
memory . load_memory_variables ( { } )
else :
memory = self . rooms [ message . room_id ] [ " memory " ]
memory = self . rooms [ room_id ] [ " memory " ]
#print(f"memory: {memory.load_memory_variables({})}")
#print(f"memory has an estimated {self.llm_chat.get_num_tokens(memory.buffer)} number of tokens")
return memory
async def add_chat_message ( self , message ) :
conversation_memory = self . get_memory ( message . room_id )
langchain_message = message . to_langchain ( )
if message . user_id == self . bot . connection . user_id :
langchain_message . role = self . bot . name
conversation_memory . chat_memory . messages . append ( langchain_message )
async def generate ( self , message , reply_fn , typing_fn ) :
embeddings = SentenceTransformerEmbeddings ( )
#embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")
async def clear ( self , room_id ) :
conversation_memory = self . get_memory ( room_id )
conversation_memory . clear ( )
loader = TextLoader ( ' ./germany.txt ' )
async def ingest_textfile ( self , filename , category ) :
loader = TextLoader ( filename )
documents = loader . load ( )
documents [ 0 ] . metadata [ ' indexed ' ] = datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
documents [ 0 ] . metadata [ ' category ' ] = category
text_splitter = RecursiveCharacterTextSplitter (
# Set a really small chunk size, just to show.
chunk_size = 600 ,
chunk_overlap = 10 0,
chunk_size = 1024 ,
chunk_overlap = 8 0,
length_function = len ,
#length_function = self.llm_chat.get_num_tokens, # The Embeddings are generated with SsentenceTransformers, not this model
)
docs = text_splitter . split_documents ( documents )
db = Chroma ( persist_directory = os . path . join ( self . memory_path , f ' chroma-db ' ) , embedding_function = embeddings )
for i in range ( len ( docs ) ) :
docs [ i ] . metadata [ ' part ' ] = f " { i } / { len ( docs ) } "
print ( f " Indexing { len ( docs ) } documents " )
texts = [ doc . page_content for doc in docs ]
metadatas = [ doc . metadata for doc in docs ]
#db.add_texts(texts=texts, metadatas=metadatas, ids=None)
#db.persist()
query = " How is climate in Germany? "
output_docs = db . similarity_search_with_score ( query )
self . db . add_texts ( texts = texts , metadatas = metadatas , ids = None )
self . db . persist ( )
async def search_vectordb ( self , query , category ) :
#query = "How is climate in Germany?"
#retreiver = db.as_retreiver()
#docs = retreiver.get_relevant_documents(query)
if category :
#https://github.com/chroma-core/chroma/blob/main/examples/where_filtering.ipynb
output_docs = self . db . similarity_search_with_score ( query , filter = { " category " : category } )
else :
output_docs = self . db . similarity_search_with_score ( query )
print ( query )
print ( ' ### ' )
for doc , score in output_docs :
print ( " - " * 80 )
print ( " Score: " , score )
print ( doc . page_content )
#print(doc.page_content)
print ( doc )
print ( " - " * 80 )
async def generate ( self , message , reply_fn , typing_fn ) :
prompt_template = " {input} "
chain = LLMChain (
llm = self . llm_chat ,
@ -126,6 +181,17 @@ class AI(object):
async def generate_roleplay ( self , message , reply_fn , typing_fn ) :
langchain_human_message = HumanMessage (
content = message . message ,
additional_kwargs = {
" timestamp " : message . timestamp ,
" user_name " : message . user_name ,
" event_id " : message . event_id ,
" user_id " : message . user_id ,
" room_name " : message . room_name ,
" room_id " : message . room_id ,
}
)
chat_ai_name = self . bot . name
chat_human_name = message . user_name
@ -133,19 +199,20 @@ class AI(object):
chat_ai_name = " ### Assistant "
chat_human_name = " ### Human "
conversation_memory = self . get_memory ( message )
conversation_memory = self . get_memory ( message . room_id , message . user_name )
conversation_memory . human_prefix = chat_human_name
readonlymemory = ReadOnlySharedMemory ( memory = conversation_memory )
summary_memory = ConversationSummaryMemory ( llm = self . llm_summary , memory_key = " summary " , input_key = " input " )
#combined_memory = CombinedMemory(memories=[conversation_memory, summary_memory])
k = 5 # 5
max_k = 12 #10
k = 1 # 5
max_k = 3 # 12
if len ( conversation_memory . chat_memory . messages ) > max_k * 2 :
async def make_progressive_summary ( previous_summary , chat_history_text_string ) :
await asyncio . sleep ( 0 ) # yield for matrix-nio
#self.rooms[message.room_id]["summary"] = summary_memory.predict_new_summary(conversation_memory.chat_memory.messages, previous_summary).strip()
summary_chain = LLMChain ( llm = self . llm_summary , prompt = prompt_progressive_summary )
summary_chain = LLMChain ( llm = self . llm_summary , prompt = prompt_progressive_summary , verbose = True )
self . rooms [ message . room_id ] [ " summary " ] = await summary_chain . apredict ( summary = previous_summary , chat_history = chat_history_text_string )
# ToDo: maybe add an add_task_done callback and don't access the variable directly from here?
logger . info ( f " New summary is: \" { self . rooms [ message . room_id ] [ ' summary ' ] } \" " )
@ -155,12 +222,13 @@ class AI(object):
#print(summary)
#return summary
logger . info ( " memory progressive summary scheduled... " )
await self . bot . schedule ( self . bot . queue , make_progressive_summary , self . rooms [ message . room_id ] [ " summary " ] , conversation_memory . buffer ) #.add_done_callback(
#t = dt.d atetime.fromtimestamp(message.timestamp)
#t = datetime.fromtimestamp(message.timestamp)
#when = humanize.naturaltime(t)
#print(when)
@ -193,26 +261,150 @@ class AI(object):
stop = [ ' <|endoftext|> ' , f " \n { chat_human_name } " ]
#print(f"Message is: \"{message.message}\"")
await asyncio . sleep ( 0 )
output = await chain . arun ( { " input " : message . message , " stop " : stop } )
output = output . replace ( " <BOT> " , self . bot . name ) . replace ( " <USER> " , message . user_name )
output = output . replace ( " ### Assistant " , self . bot . name )
output = output . replace ( f " \n { self . bot . name } : " , " " )
output = output . strip ( )
if " *activates the neural uplink* " in output :
langchain_ai_message = AIMessage (
content = output ,
additional_kwargs = {
" timestamp " : datetime . now ( ) . timestamp ( ) ,
" user_name " : self . bot . name ,
" event_id " : None ,
" user_id " : None ,
" room_name " : message . room_name ,
" room_id " : message . room_id ,
}
)
if " *activates the neural uplink* " in output . casefold ( ) :
pass # call agent
#conversation_memory.chat_memory.messages.append(ChatMessage(content=message, role=message.user_name))
conversation_memory . chat_memory . add_user_message ( message . message )
conversation_memory . chat_memory . add_ai_message ( output )
conversation_memory . load_memory_variables ( { } )
if not " messages_today " in self . rooms [ message . room_id ] :
self . rooms [ message . room_id ] [ " messages_today " ] = [ ]
self . rooms [ message . room_id ] [ " messages_today " ] . append ( langchain_human_message )
self . rooms [ message . room_id ] [ " messages_today " ] . append ( langchain_ai_message )
return output . strip ( )
async def summarize ( self , text ) :
summary_chain = LLMChain ( llm = llm_summary , prompt = prompt_summary , verbose = True )
return await summary_chain . arun ( text = text )
#ToDo: We can summarize the whole dialogue here, let half of it in the buffer but skip doing a summary until this is flushed, too?
await asyncio . sleep ( 0 ) # yield for matrix-nio
summary_chain = LLMChain ( llm = self . llm_summary , prompt = prompt_summary , verbose = True )
return await summary_chain . arun ( text = text )
#ToDo: We can summarize the whole dialogue here, let half of it in the buffer but skip doing a summary until this is flushed, too?
#ToDo: max_tokens and stop
async def diary ( self , room_id ) :
await asyncio . sleep ( 0 ) # yield for matrix-nio
diary_chain = LLMChain ( llm = self . llm_summary , prompt = prompt_outline , verbose = True )
#self.rooms[message.room_id]["summary"]
string_messages = [ ]
for m in self . rooms [ room_id ] [ " messages_today " ] :
string_messages . append ( f " { message . user_name } : { message . message } " )
return await diary_chain . apredict ( text = " \n " . join ( string_messages ) )
async def agent ( self ) :
os . environ [ " OPENWEATHERMAP_API_KEY " ] = " 82452fdb0d1e0e805ac096db87914342 "
# Tools
search = DuckDuckGoSearchAPIWrapper ( )
weather = OpenWeatherMapAPIWrapper ( )
search2 = SearxSearchWrapper ( searx_host = " https://search.mdosch.de " )
python_repl = PythonREPL ( )
tools = [
Tool (
name = " Search " ,
func = search . run ,
description = " useful for when you need to answer questions about current events "
) ,
Tool (
name = " Searx Search " ,
func = search . run ,
description = " useful for when you need to answer questions about current events "
) ,
Tool (
name = " Weather " ,
func = weather . run ,
description = " Useful for fetching current weather information for a specified location. Input should be a location string (e.g. ' London,GB ' ). "
) ,
Tool (
name = " Summary " ,
func = summry_chain . run ,
description = " useful for when you summarize a conversation. The input to this tool should be a string, representing who will read this summary. "
)
]
prompt = ZeroShotAgent . create_prompt (
tools = tools ,
prefix = prefix ,
suffix = suffix ,
input_variables = [ " input " , " chat_history " , " agent_scratchpad " ]
)
output_parser = CustomOutputParser ( )
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain ( llm = llm , prompt = prompt_agent )
agent = ZeroShotAgent ( llm_chain = llm_chain , tools = tools , verbose = True )
#agent = initialize_agent(tools, llm, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, return_intermediate_steps=True, memory=memory)
#tool_names = [tool.name for tool in tools]
#agent = LLMSingleActionAgent(
# llm_chain=llm_chain,
# output_parser=output_parser,
# stop=["\nObservation:"],
# allowed_tools=tool_names,
# verbose=True,
#)
agent_executor = AgentExecutor . from_agent_and_tools ( agent = agent , tools = tools , verbose = True , memory = memory )
await agent_executor . arun ( input = " How many people live in canada as of 2023? " )
async def sleep ( self ) :
# Write Date into chat history
for room_id in self . rooms . keys ( ) :
#fake_message = Message(datetime.now().timestamp(), self.bot.name, "", event_id=None, user_id=None, room_name=None, room_id=room_id)
conversation_memory = self . get_memory ( room_id )
message = SystemMessage (
content = f " ~~~~ { datetime . now ( ) . strftime ( ' % A, % B %d , % Y ' ) } ~~~~ " ,
additional_kwargs = {
" timestamp " : datetime . now ( ) . timestamp ( ) ,
" user_name " : self . bot . name ,
" event_id " : None ,
" user_id " : None ,
" room_name " : None ,
" room_id " : room_id ,
}
)
conversation_memory . chat_memory . messages . append ( message )
#conversation_memory.chat_memory.add_system_message(message)
# Summarize the last day and save a diary entry
yesterday = ( datetime . now ( ) - timedelta ( days = 1 ) ) . strftime ( ' % Y- % m- %d ' )
for room_id in self . rooms . keys ( ) :
if " messages_today " in self . rooms [ room_id ] :
self . bot . rooms [ room_id ] [ " diary " ] [ yesterday ] = await self . diary ( room_id )
# Calculate new goals for the character
# Update stats
# Let background tasks run
self . rooms [ room_id ] [ " messages_today " ] = [ ]
await self . bot . write_conf2 ( self . bot . rooms )
async def prime_llm ( self , text ) :
self . llm_chat ( text , max_tokens = 1 )