Chatbot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

395 lines
16 KiB

import asyncio
import os, time
from .prompts import *
from .langchain_memory import CustomMemory # BotConversationSummaryBufferWindowMemory, TestMemory
from ..utilities.messages import Message
from langchain import PromptTemplate
from langchain import LLMChain, ConversationChain
from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory, CombinedMemory, ConversationSummaryMemory
from langchain.chains.base import Chain
from typing import Dict, List, Union
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser, ZeroShotAgent
from langchain.schema import AgentAction, AgentFinish
from langchain.schema import AIMessage, HumanMessage, SystemMessage, ChatMessage
from langchain.utilities import OpenWeatherMapAPIWrapper, SearxSearchWrapper, PythonREPL
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
import humanize
from datetime import datetime, timedelta
import logging
logger = logging.getLogger(__name__)
class RoleplayChain(Chain):
llm_chain: LLMChain
character_name: str
persona: str
scenario: str
ai_name_chat: str
human_name_chat: str
output_key: str = "output_text" #: :meta private:
@property
def input_keys(self) -> List[str]:
return ["character_name", "persona", "scenario", "ai_name_chat", "human_name_chat", "llm_chain"]
@property
def output_keys(self) -> List[str]:
return [self.output_key]
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
other_keys = {k: v for k, v in inputs.items() if k not in self.input_keys}
result = self.llm_chain.predict(**other_keys)
return {self.output_key: result}
class CustomOutputParser(AgentOutputParser):
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
# Check if agent should finish
if "Final Answer:" in llm_output:
return AgentFinish(
# Return values is generally always a dictionary with a single `output` key
# It is not recommended to try anything else at the moment :)
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
log=llm_output,
)
# Parse out the action and action input
regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
match = re.search(regex, llm_output, re.DOTALL)
if not match:
regex = r"Action\s*\d*\s*:(.*?)[\s]*[\"\'](.*)[\"\']"
match = re.search(regex, llm_output, re.DOTALL)
if not match:
raise ValueError(f"Could not parse LLM output: `{llm_output}`")
action = match.group(1).strip()
action_input = match.group(2)
# Return the action and action input
return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
class AI(object):
def __init__(self, bot, text_wrapper, image_wrapper, memory_path: str):
self.name = bot.name
self.bot = bot
self.memory_path = memory_path
self.rooms = {}
from ..wrappers.langchain_koboldcpp import KoboldCpp
self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>'])
self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5002/api/latest/generate", stop=['<|endoftext|>'], max_tokens=512)
self.text_wrapper = text_wrapper
self.image_wrapper = image_wrapper
self.embeddings = SentenceTransformerEmbeddings()
#embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")
self.db = Chroma(persist_directory=os.path.join(self.memory_path, f'chroma-db'), embedding_function=self.embeddings)
#self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)
def get_memory(self, room_id, human_prefix="Human"):
if not room_id in self.rooms:
self.rooms[room_id] = {}
if "moving_summary" in self.bot.rooms[room_id]:
moving_summary = self.bot.rooms[room_id]['moving_summary']
else:
moving_summary = "No previous events."
memory = CustomMemory(memory_key="chat_history", input_key="input", human_prefix=human_prefix, ai_prefix=self.bot.name, llm=self.llm_summary, summary_prompt=prompt_progressive_summary, moving_summary_buffer=moving_summary, max_len=1200, min_len=200)
self.rooms[room_id]["memory"] = memory
#memory.chat_memory.add_ai_message(self.bot.greeting)
else:
memory = self.rooms[room_id]["memory"]
if human_prefix != memory.human_prefix:
memory.human_prefix = human_prefix
return memory
async def add_chat_message(self, message):
room_id = message.additional_kwargs['room_id']
conversation_memory = self.get_memory(room_id)
conversation_memory.chat_memory.messages.append(message)
async def clear(self, room_id):
conversation_memory = self.get_memory(room_id)
conversation_memory.clear()
async def ingest_textfile(self, filename, category):
loader = TextLoader(filename)
documents = loader.load()
documents[0].metadata['indexed'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
documents[0].metadata['category'] = category
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = 1024,
chunk_overlap = 80,
length_function = len,
#length_function = self.llm_chat.get_num_tokens, # The Embeddings are generated with SsentenceTransformers, not this model
)
docs = text_splitter.split_documents(documents)
for i in range(len(docs)):
docs[i].metadata['part'] = f"{i}/{len(docs)}"
print(f"Indexing {len(docs)} documents")
texts = [doc.page_content for doc in docs]
metadatas = [doc.metadata for doc in docs]
self.db.add_texts(texts=texts, metadatas=metadatas, ids=None)
self.db.persist()
async def search_vectordb(self, query, category):
#query = "How is climate in Germany?"
#retreiver = db.as_retreiver()
#docs = retreiver.get_relevant_documents(query)
if category:
#https://github.com/chroma-core/chroma/blob/main/examples/where_filtering.ipynb
output_docs = self.db.similarity_search_with_score(query, filter={"category": category})
else:
output_docs = self.db.similarity_search_with_score(query)
print(query)
print('###')
for doc, score in output_docs:
print("-" * 80)
print("Score: ", score)
#print(doc.page_content)
print(doc)
print("-" * 80)
async def generate(self, message, reply_fn, typing_fn):
prompt_template = "{input}"
chain = LLMChain(
llm=self.llm_chat,
prompt=PromptTemplate.from_template(prompt_template),
)
output = await chain.arun(message.content)
return output.strip()
async def generate_roleplay(self, message, reply_fn, typing_fn):
chat_ai_name = self.bot.name
chat_human_name = message.additional_kwargs['user_name']
room_id = message.additional_kwargs['room_id']
if False: # model is vicuna
chat_ai_name = "### Assistant"
chat_human_name = "### Human"
conversation_memory = self.get_memory(room_id, chat_human_name)
readonlymemory = ReadOnlySharedMemory(memory=conversation_memory)
#summary_memory = ConversationSummaryMemory(llm=self.llm_summary, memory_key="summary", input_key="input")
#combined_memory = CombinedMemory(memories=[conversation_memory, summary_memory])
#await self.bot.schedule(self.bot.queue, make_progressive_summary, self.rooms[room_id]["summary"], conversation_memory.buffer) #.add_done_callback(
#t = datetime.fromtimestamp(message.additional_kwargs['timestamp'])
#when = humanize.naturaltime(t)
#print(when)
# ToDo: either use prompt.format() to fill out the pygmalion prompt and use
# the resulting template text to feed it into the instruct prompt's instruction
# or do this with the prompt.partial()
prompt = prompt_vicuna.partial(
ai_name=self.bot.name,
persona=self.bot.persona,
scenario=self.bot.scenario,
summary=conversation_memory.moving_summary_buffer,
human_name=chat_human_name,
#example_dialogue=replace_all(self.bot.example_dialogue, {"{{user}}": chat_human_name, "{{char}}": chat_ai_name})
ai_name_chat=chat_ai_name,
)
tmp_prompt_text = prompt.format(chat_history=conversation_memory.buffer, input=message.content)
prompt_len = self.llm_chat.get_num_tokens(tmp_prompt_text)
if prompt_len+256 > 2000:
logger.warning(f"Prompt too large. Estimated {prompt_len} tokens")
#roleplay_chain = RoleplayChain(llm_chain=chain, character_name=self.bot.name, persona=self.bot.persona, scenario=self.bot.scenario, ai_name_chat=chat_ai_name, human_name_chat=chat_human_name)
chain = ConversationChain(
llm=self.llm_chat,
prompt=prompt,
verbose=True,
memory=readonlymemory,
#stop=['<|endoftext|>', '\nYou:', f"\n{chat_human_name}:"],
)
# output = llm_chain(inputs={"ai_name": self.bot.name, "persona": self.bot.persona, "scenario": self.bot.scenario, "human_name": chat_human_name, "ai_name_chat": self.bot.name, "chat_history": "", "input": message.content})['results'][0]['text']
stop = ['<|endoftext|>', f"\n{chat_human_name}"]
#print(f"Message is: \"{message.content}\"")
await asyncio.sleep(0)
output = await chain.arun({"input":message.content, "stop": stop})
output = output.replace("<BOT>", self.bot.name).replace("<USER>", chat_human_name)
output = output.replace("### Assistant", self.bot.name)
output = output.replace(f"\n{self.bot.name}: ", " ")
output = output.strip()
if "*activates the neural uplink*" in output.casefold():
pass # call agent
own_message_resp = await reply_fn(output)
output_message = AIMessage(
content=output,
additional_kwargs={
"timestamp": datetime.now().timestamp(),
"user_name": self.bot.name,
"event_id": own_message_resp.event_id,
"user_id": self.bot.connection.user_id,
"room_name": message.additional_kwargs['room_name'],
"room_id": own_message_resp.room_id,
}
)
await conversation_memory.asave_context(message, output_message)
summary_len = self.llm_chat.get_num_tokens(conversation_memory.moving_summary_buffer)
if summary_len > 400:
logger.warning("Summary is getting too long. Refining...")
conversation_memory.moving_summary_buffer = await self.summarize(conversation_memory.moving_summary_buffer)
new_summary_len = self.llm_chat.get_num_tokens(conversation_memory.moving_summary_buffer)
logger.info(f"Refined summary from {summary_len} tokens to {new_summary_len} tokens ({new_summary_len-summary_len} tokens)")
self.bot.rooms[room_id]['moving_summary'] = conversation_memory.moving_summary_buffer
return output
async def summarize(self, text):
await asyncio.sleep(0) # yield for matrix-nio
summary_chain = LLMChain(llm=self.llm_summary, prompt=prompt_summary, verbose=True)
return await summary_chain.arun(text=text)
#ToDo: We can summarize the whole dialogue here, let half of it in the buffer but skip doing a summary until this is flushed, too?
#ToDo: max_tokens and stop
async def diary(self, room_id):
await asyncio.sleep(0) # yield for matrix-nio
diary_chain = LLMChain(llm=self.llm_summary, prompt=prompt_outline, verbose=True)
conversation_memory = self.get_memory(room_id)
if self.llm_summary.get_num_tokens(conversation_memory.buffer_day) < 1600:
input_text = conversation_memory.buffer_day
else:
input_text = conversation_memory.moving_summary_buffer
return await diary_chain.apredict(text=input_text)
async def agent(self):
os.environ["OPENWEATHERMAP_API_KEY"] = "82452fdb0d1e0e805ac096db87914342"
# Tools
search = DuckDuckGoSearchAPIWrapper()
weather = OpenWeatherMapAPIWrapper()
search2 = SearxSearchWrapper(searx_host="https://search.mdosch.de")
python_repl = PythonREPL()
tools = [
Tool(
name = "Search",
func=search.run,
description="useful for when you need to answer questions about current events"
),
Tool(
name = "Searx Search",
func=search.run,
description="useful for when you need to answer questions about current events"
),
Tool(
name = "Weather",
func=weather.run,
description="Useful for fetching current weather information for a specified location. Input should be a location string (e.g. 'London,GB')."
),
Tool(
name = "Summary",
func=summry_chain.run,
description="useful for when you summarize a conversation. The input to this tool should be a string, representing who will read this summary."
)
]
prompt = ZeroShotAgent.create_prompt(
tools=tools,
prefix=prefix,
suffix=suffix,
input_variables=["input", "chat_history", "agent_scratchpad"]
)
output_parser = CustomOutputParser()
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt_agent)
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
#agent = initialize_agent(tools, llm, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, return_intermediate_steps=True, memory=memory)
#tool_names = [tool.name for tool in tools]
#agent = LLMSingleActionAgent(
# llm_chain=llm_chain,
# output_parser=output_parser,
# stop=["\nObservation:"],
# allowed_tools=tool_names,
# verbose=True,
#)
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
await agent_executor.arun(input="How many people live in canada as of 2023?")
async def sleep(self):
# Write Date into chat history
for room_id in self.rooms.keys():
#fake_message = Message(datetime.now().timestamp(), self.bot.name, "", event_id=None, user_id=None, room_name=None, room_id=room_id)
conversation_memory = self.get_memory(room_id)
message = SystemMessage(
content=f"~~~~ {datetime.now().strftime('%A, %B %d, %Y')} ~~~~",
additional_kwargs={
"timestamp": datetime.now().timestamp(),
"user_name": None,
"event_id": None,
"user_id": None,
"room_name": None,
"room_id": room_id,
}
)
conversation_memory.chat_memory.messages.append(message)
#conversation_memory.chat_memory.add_system_message(message)
# Summarize the last day and save a diary entry
yesterday = ( datetime.now() - timedelta(days=1) ).strftime('%Y-%m-%d')
for room_id in self.rooms.keys():
if len(conversation_memory.chat_memory_day.messages) > 0:
self.bot.rooms[room_id]["diary"][yesterday] = await self.diary(room_id)
# Calculate new goals for the character
# Update stats
# Let background tasks run
conversation_memory.chat_memory_day.clear()
await self.bot.write_conf2(self.bot.rooms)
async def prime_llm(self, text):
self.llm_chat(text, max_tokens=1)
def replace_all(text, dic):
for i, j in dic.items():
text = text.replace(i, j)
return text