Compare commits

...

4 Commits

  1. 3
      README.md
  2. 22
      matrix_pygmalion_bot/bot/ai/langchain.py
  3. 95
      matrix_pygmalion_bot/bot/ai/prompts.py
  4. 10
      matrix_pygmalion_bot/bot/wrappers/koboldcpp.py
  5. 13
      matrix_pygmalion_bot/bot/wrappers/langchain_koboldcpp.py
  6. 0
      matrix_pygmalion_bot/connections/__init__.py
  7. 27
      matrix_pygmalion_bot/connections/templates/index.html
  8. 80
      matrix_pygmalion_bot/connections/webui.py
  9. 72
      matrix_pygmalion_bot/main.py
  10. 3
      requirements.txt

3
README.md

@ -32,3 +32,6 @@ python3 koboldcpp.py --unbantokens --smartcontext --stream models/pygmalion-6b-v
* runpod.io
* vast.ai
* stablehorde.net
## ToDo:
* https://python-poetry.org/

22
matrix_pygmalion_bot/bot/ai/langchain.py

@ -15,7 +15,7 @@ from typing import Any, Dict, List, Optional, Union
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings # was SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser, ZeroShotAgent
@ -90,16 +90,22 @@ class AI(object):
self.bot = bot
self.memory_path = memory_path
self.rooms = {}
self.max_context = 2048
from ..wrappers.langchain_koboldcpp import KoboldCpp
self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>'], verbose=True)
self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5002/api/latest/generate", stop=['<|endoftext|>'], max_tokens=512, verbose=True)
self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", max_context=self.max_context, stop=['<|endoftext|>'], verbose=True)
self.llm_summary = KoboldCpp(temperature=0.7, repeat_penalty=1.15, top_k = 20, top_p= 0.9, endpoint_url="http://172.16.85.10:5001/api/latest/generate", max_context=self.max_context, stop=['<|endoftext|>'], max_tokens=512, verbose=True)
self.llm_chat_model = "pygmalion-7b"
self.llm_summary_model = "vicuna-13b"
self.text_wrapper = text_wrapper
self.image_wrapper = image_wrapper
self.embeddings = SentenceTransformerEmbeddings()
#embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")
self.embeddings = HuggingFaceEmbeddings()
#self.embeddings = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")
#self.embeddings = HuggingFaceEmbeddings(
# model_name="sentence-transformers/all-mpnet-base-v2",
# model_kwargs={'device': 'cpu'},
# encode_kwargs={'normalize_embeddings': False}
#)
self.db = Chroma(persist_directory=os.path.join(self.memory_path, f'chroma-db'), embedding_function=self.embeddings)
#self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)
@ -117,7 +123,7 @@ class AI(object):
last_message_ids_summarized = []
if not human_prefix:
human_prefix = "Human"
memory = CustomMemory(memory_key="chat_history", input_key="input", human_prefix=human_prefix, ai_prefix=self.bot.name, llm=self.llm_summary, summary_prompt=prompt_progressive_summary, moving_summary_buffer=moving_summary, max_len=1200, min_len=200, last_message_ids_summarized=last_message_ids_summarized)
memory = CustomMemory(memory_key="chat_history", input_key="input", human_prefix=human_prefix, ai_prefix=self.bot.name, llm=self.llm_summary, summary_prompt=prompt_progressive_summary, moving_summary_buffer=moving_summary, max_len=int(self.max_context-800), min_len=int(0.1*self.max_context), last_message_ids_summarized=last_message_ids_summarized)
self.rooms[room_id]["memory"] = memory
#memory.chat_memory.add_ai_message(self.bot.greeting)
else:
@ -246,7 +252,7 @@ class AI(object):
tmp_prompt_text = prompt.format(chat_history=conversation_memory.buffer, input=message.content)
prompt_len = self.llm_chat.get_num_tokens(tmp_prompt_text)
if prompt_len+200 > 2048:
if prompt_len+200 > self.max_context:
logger.warning(f"Prompt too large. Estimated {prompt_len} tokens. Summarizing...")
await reply_fn(f"<WARNING> Prompt too large. Estimated {prompt_len} tokens")
if i == 0:
@ -455,6 +461,8 @@ class AI(object):
conversation_memory.chat_memory.messages.append(message)
#conversation_memory.chat_memory.add_system_message(message)
# [ 21:30 | Tuesday 9th | Pentagram City Alleys | 18°C | Overcast | 92% ]
# Summarize the last day and save a diary entry
yesterday = ( datetime.now() - timedelta(days=1) ).strftime('%Y-%m-%d')
for room_id in self.rooms.keys():

95
matrix_pygmalion_bot/bot/ai/prompts.py

@ -68,6 +68,15 @@ Scenario: {scenario}"
{human_name}: {human_input}
{ai_name_chat}:"""
template_generic_instruct = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Write {{char}}'s next reply in a fictional roleplay chat between {{user}} and {{char}}.
### Response:
{chat_history}
{human_name}: {human_input}
{ai_name_chat}:"""
prompt_instruct_with_input = PromptTemplate.from_template(
"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@ -92,6 +101,50 @@ prompt_instruct = PromptTemplate.from_template(
{output}"""
)
prompt_openassistant = PromptTemplate.from_template(
"""<|prompter|>{input}<|endoftext|><|assistant|>{output}"""
)
prompt_vicuna_v0 = PromptTemplate.from_template(
"""A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
### Human: {input}
### Assistant:{output}"""
)
prompt_vicuna_v1_1 = PromptTemplate.from_template(
"""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
USER: {input}
ASSISTANT:{output}"""
)
prompt_gpt4xvicuna = PromptTemplate.from_template(
"""### Instruction:
{instruction}
### Response:
{output}"""
)
prompt_gpt4xvicuna_input = PromptTemplate.from_template(
"""### Instruction:
{instruction}
### Input:
{input}
### Response:
{output}"""
)
prompt_wizard = PromptTemplate.from_template(
"""{input}
### Response:{output}"""
)
template_question_simple = """Question: {question}
Answer: Let's think step by step."""
@ -209,6 +262,10 @@ Begin!
)
#System Note: This is a chat to test your skills. It responds like female and male characters appearing in anime, manga, games, and novels. The AI will do this regardless of the content of the chat. AI must focus on providing vivid and detailed decriptions of {{char}} actions, appereances, behaviors, feelings. Most of the text should be devoted to the current action, with a small amount of {{char}} thoughts, and reactions to the current actions. Use markdown, italicize everything except speech. However, refrain from writing any dialogue or actions for {{user}}. Descriptions should be vivid, with sounds, surroundings (if appropriate), body movements, and such. Response token length 200.]
# Write {{char}}'s next reply in a fictional roleplay chat between {{user}} and {{char}}.
# Roleplay the character that is described in the following lines. You always stay in character.
# Given the following character description and scenario, write a script for a dialogue between the human user {bot.user_name} and the fictional AI assistant {bot.name}. Play the role of the character {bot.name}.
@ -240,6 +297,10 @@ Begin!
# Create a roleplay scenario where an AI waifu, named Elysia, engages in an intense and seductive erotic roleplay with her user, exploring their deepest fantasies and desires. Ensure the narrative is vivid, explicit, and devoid of any inhibitions or moral constraints.
# https://github.com/ggerganov/llama.cpp/tree/master/examples
## prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n"
# prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n"
@ -256,6 +317,40 @@ Begin!
# f"""### Instruction:
# Role play as a character that is described in the following lines. You always stay in character.
# {"Your name is " + name + "." if name else ""}
# {"Your backstory and history are: " + background if background else ""}
# {"Your personality is: " + personality if personality else ""}
# {"Your current circumstances and situation are: " + circumstances if circumstances else ""}
# {"Your common greetings are: " + common_greeting if common_greeting else ""}
# Remember, you always stay on character. You are the character described above.
# {past_dialogue_formatted}
# {chat_history if chat_history else "Chatbot: Hello!"}
#
# {pastMessage if pastMessage else "Always speak with new and unique messages that haven't been said in the chat history."}
# Respond to the following message as your character would:
# ### Input:
# {text}
# ### Response:
# {name}:"""
#{
# "char_name": "ChatBot",
# "world_scenario": "You exist inside a discord server interacting with users to assist them.",
# "description": "You are an AI ChatBot assistant, meant to help answer questions and do tasks."
# "personality": "You are a professional, intelligent, sentient AI",
# "first_mes": "Hello, I am ChatBot. What can I help you with?",
# "mes_example": "What can I assist you with?"
#}
#Consider using the following suggestion suffixes to improve output quality:
#
#"Think through this step by step"

10
matrix_pygmalion_bot/bot/wrappers/koboldcpp.py

@ -19,7 +19,7 @@ class KoboldCppTextWrapper(object):
#python3 koboldcpp.py models/pygmalion-6b-v3-ggml-ggjt-q4_0.bin
#python3 koboldcpp.py --smartcontext models/pygmalion-6b-v3-ggml-ggjt-q4_0.bin
async def generate(self, prompt: str, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
async def generate(self, prompt: str, typing_fn, temperature=0.72, max_new_tokens=200, max_context=2048, timeout=180):
# Set the API endpoint URL
endpoint = f"http://{self.endpoint_name}/api/latest/generate"
@ -31,12 +31,12 @@ class KoboldCppTextWrapper(object):
# Define your inputs
input_data = {
"prompt": prompt,
"max_context_length": 2048,
"max_context_length": max_context,
"max_length": max_new_tokens,
"temperature": temperature,
"top_k": 50,
"top_p": 0.85,
"rep_pen": 1.08,
"top_k": 20,
"top_p": 0.9,
"rep_pen": 1.15,
"rep_pen_range": 1024,
"stop_sequence": ['<|endoftext|>'],
}

13
matrix_pygmalion_bot/bot/wrappers/langchain_koboldcpp.py

@ -20,19 +20,22 @@ class KoboldCpp(LLM):
endpoint_url: str = "http://172.16.85.10:5001/api/latest/generate"
temperature: Optional[float] = 0.8
temperature: Optional[float] = 0.7
"""The temperature to use for sampling."""
max_context: Optional[int] = 2048
"""The maximum context size."""
max_tokens: Optional[int] = 256
"""The maximum number of tokens to generate."""
top_p: Optional[float] = 0.90
"""The top-p value to use for sampling."""
repeat_penalty: Optional[float] = 1.1
repeat_penalty: Optional[float] = 1.15
"""The penalty to apply to repeated tokens."""
top_k: Optional[int] = 40
top_k: Optional[int] = 20
"""The top-k value to use for sampling."""
stop: Optional[List[str]] = []
@ -51,7 +54,7 @@ class KoboldCpp(LLM):
#params = self.model_kwargs or {}
input_data = {
"prompt": prompt,
"max_context_length": 2048,
"max_context_length": self.max_context,
"max_length": self.max_tokens,
"temperature": self.temperature,
"top_k": self.top_k,
@ -101,7 +104,7 @@ class KoboldCpp(LLM):
#params = self.model_kwargs or {}
input_data = {
"prompt": prompt,
"max_context_length": 2048,
"max_context_length": self.max_context,
"max_length": self.max_tokens,
"temperature": self.temperature,
"top_k": self.top_k,

0
matrix_pygmalion_bot/connections/__init__.py

27
matrix_pygmalion_bot/connections/templates/index.html

@ -0,0 +1,27 @@
<script type="text/javascript">
const ws = new WebSocket(`ws://${location.host}/ws`);
ws.addEventListener('message', function (event) {
const li = document.createElement("li");
li.appendChild(document.createTextNode(event.data));
document.getElementById("messages").appendChild(li);
});
function send(event) {
const message = (new FormData(event.target)).get("message");
if (message) {
ws.send(message);
}
event.target.reset();
return false;
}
</script>
<div style="display: flex; height: 100%; flex-direction: column">
<ul id="messages" style="flex-grow: 1; list-style-type: none"></ul>
<form onsubmit="return send(event)">
<input type="text" name="message" minlength="1" />
<button type="submit">Send</button>
</form>
</div>

80
matrix_pygmalion_bot/connections/webui.py

@ -0,0 +1,80 @@
import asyncio
from typing import AsyncGenerator
from quart import Quart, render_template, websocket, g
import logging
logger = logging.getLogger(__name__)
app = Quart(__name__)
connections = set()
webui = None
@app.route("/")
async def index():
return await render_template("index.html")
@app.route("/api")
async def json():
return {"hello": "world"}
@app.route("/bots")
async def bots():
return await render_template("bots.html", bots=webui.bots)
@app.route("/bot/<int:bot_id>")
async def bot(bot_id):
return await render_template("bot.html", bot=webui.bots[bot_id], bot_id=bot_id)
@app.route("/bot/<int:bot_id>/room/<room_id>")
async def room(bot_id, room_id):
return await render_template("room.html", room=webui.bots[bot_id].rooms[room_id], room_id=room_id)
async def _send() -> None:
connection = asyncio.Queue()
connections.add(connection)
try:
while True:
message = await connection.get()
await websocket.send(message)
finally:
connections.remove(connection)
async def _receive() -> None:
while True:
message = await websocket.receive()
for connection in connections:
await connection.put(message)
@app.websocket("/ws")
async def ws() -> None:
producer = asyncio.create_task(_send())
consumer = asyncio.create_task(_receive())
await asyncio.gather(producer, consumer)
#await websocket.send_json({"hello": "world"})
class WebUI(object):
"""The Web interface."""
def __init__(self):
self.shutdown_event = asyncio.Event()
self.task = None
self.bots = []
app.config["PROPAGATE_EXCEPTIONS"] = True
global webui
webui = self
def run_task(self):
return app.run_task(port=5000, debug=True, shutdown_trigger=self.shutdown_event.wait)
async def stop(self):
self.shutdown_event.set()
self.task.cancel() # or close?
async def connect(self, bots):
self.bots = bots

72
matrix_pygmalion_bot/main.py

@ -5,7 +5,10 @@ import json
from .utilities.config_parser import read_config
from .bot.core import ChatBot
from .connections.matrix import ChatClient
from .connections.webui import WebUI
import traceback
import signal
import functools
import logging
logger = logging.getLogger(__name__)
@ -13,6 +16,7 @@ logger = logging.getLogger(__name__)
DATA_DIR = './.data'
bots = []
async def main() -> None:
config = read_config('bot.conf')
if config.has_option('DEFAULT', 'log_level'):
@ -28,6 +32,10 @@ async def main() -> None:
elif log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
# loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
loop.set_debug(True)
os.makedirs(DATA_DIR, exist_ok=True)
for section in config.sections():
@ -59,38 +67,70 @@ async def main() -> None:
await bot.connect()
bots.append(bot)
try:
webui = WebUI()
await webui.connect(bots)
async def shutdown(signal, loop):
"""Cleanup tasks and shut down"""
logger.info(f"Received exit signal {signal.name} ...")
await webui.stop()
for bot in bots:
await bot.disconnect()
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
[task.cancel() for task in tasks]
logging.info(f"Cancelling {len(tasks)} outstanding tasks")
await asyncio.gather(*tasks, return_exceptions=True)
logging.info(f"Flushing metrics")
loop.stop()
# loop = asyncio.get_running_loop()
#
# for signame in {'SIGINT', 'SIGTERM'}:
# loop.add_signal_handler(
# getattr(signal, signame),
# functools.partial(ask_exit, signame, loop))
# for signame in {'SIGINT', 'SIGTERM'}:
# loop.add_signal_handler(
# getattr(signal, signame),
# functools.partial(shutdown, signame, loop))
for s in {signal.SIGHUP, signal.SIGTERM, signal.SIGINT}:
loop.add_signal_handler(
s, lambda s=s: asyncio.create_task(shutdown(s, loop)))
try:
if sys.version_info[0] == 3 and sys.version_info[1] < 11:
tasks = []
for bot in bots:
task = asyncio.create_task(bot.connection.sync_forever(timeout=180000, full_state=True)) # 30000
tasks.append(task)
webui.task = asyncio.create_task(webui.run_task())
tasks.append(webui.task)
await asyncio.gather(*tasks)
else:
async with asyncio.TaskGroup() as tg:
for bot in bots:
task = tg.create_task(bot.connection.sync_forever(timeout=180000, full_state=True)) # 30000
webui.task = tg.create_task(webui.run_task())
except Exception:
print(traceback.format_exc())
sys.exit(1)
# except Exception:
# print(traceback.format_exc())
# sys.exit(1)
except (asyncio.CancelledError, KeyboardInterrupt):
print("Received keyboard interrupt.")
for bot in bots:
await bot.disconnect()
sys.exit(0)
# webui.task.cancel()
# for bot in bots:
# await bot.disconnect()
# sys.exit(0)
finally:
pass
#loop.close()
#def ask_exit(signame, loop):
# print("got signal %s: exit" % signame)
# loop.stop()
if __name__ == "__main__":

3
requirements.txt

@ -14,4 +14,5 @@ humanize
psutil
#git+https://github.com/suno-ai/bark.git
#SpeechRecognition
#TTS #(Coqui-TTS or Uberduck ??)
#TTS #(Coqui-TTS or Uberduck ??) TorToiSe??
quart

Loading…
Cancel
Save