webui

tweak llm parameters
smaller updates
10 changed files with 291 additions and 34 deletions
--- a/README.md
+++ b/README.md
@ -32,3 +32,6 @@ python3 koboldcpp.py --unbantokens --smartcontext --stream models/pygmalion-6b-v
 * runpod.io
 * vast.ai
 * stablehorde.net
+
+## ToDo:
+* https://python-poetry.org/
--- a/matrix_pygmalion_bot/bot/ai/langchain.py
+++ b/matrix_pygmalion_bot/bot/ai/langchain.py
@ -15,7 +15,7 @@ from typing import Any, Dict, List, Optional, Union
 from langchain.document_loaders import TextLoader
 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.embeddings import HuggingFaceEmbeddings  # was SentenceTransformerEmbeddings
 from langchain.vectorstores import Chroma

 from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser, ZeroShotAgent
@ -90,16 +90,22 @@ class AI(object):
        self.bot = bot
        self.memory_path = memory_path
        self.rooms = {}
+        self.max_context = 2048

        from ..wrappers.langchain_koboldcpp import KoboldCpp
-        self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>'], verbose=True)
-        self.llm_summary = KoboldCpp(temperature=0.2, endpoint_url="http://172.16.85.10:5002/api/latest/generate", stop=['<|endoftext|>'], max_tokens=512, verbose=True)
+        self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", max_context=self.max_context, stop=['<|endoftext|>'], verbose=True)
+        self.llm_summary = KoboldCpp(temperature=0.7, repeat_penalty=1.15, top_k = 20, top_p= 0.9, endpoint_url="http://172.16.85.10:5001/api/latest/generate", max_context=self.max_context, stop=['<|endoftext|>'], max_tokens=512, verbose=True)
        self.llm_chat_model = "pygmalion-7b"
        self.llm_summary_model = "vicuna-13b"
        self.text_wrapper = text_wrapper
        self.image_wrapper = image_wrapper
-        self.embeddings = SentenceTransformerEmbeddings()
-        #embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")
+        self.embeddings = HuggingFaceEmbeddings()
+        #self.embeddings = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")
+        #self.embeddings = HuggingFaceEmbeddings(
+        #    model_name="sentence-transformers/all-mpnet-base-v2",
+        #    model_kwargs={'device': 'cpu'},
+        #    encode_kwargs={'normalize_embeddings': False}
+        #)
        self.db = Chroma(persist_directory=os.path.join(self.memory_path, f'chroma-db'), embedding_function=self.embeddings)

        #self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)
@ -117,7 +123,7 @@ class AI(object):
                last_message_ids_summarized = []
            if not human_prefix:
                human_prefix = "Human"
-            memory = CustomMemory(memory_key="chat_history", input_key="input", human_prefix=human_prefix, ai_prefix=self.bot.name, llm=self.llm_summary, summary_prompt=prompt_progressive_summary, moving_summary_buffer=moving_summary, max_len=1200, min_len=200, last_message_ids_summarized=last_message_ids_summarized)
+            memory = CustomMemory(memory_key="chat_history", input_key="input", human_prefix=human_prefix, ai_prefix=self.bot.name, llm=self.llm_summary, summary_prompt=prompt_progressive_summary, moving_summary_buffer=moving_summary, max_len=int(self.max_context-800), min_len=int(0.1*self.max_context), last_message_ids_summarized=last_message_ids_summarized)
            self.rooms[room_id]["memory"] = memory
            #memory.chat_memory.add_ai_message(self.bot.greeting)
        else:
@ -246,7 +252,7 @@ class AI(object):
            tmp_prompt_text = prompt.format(chat_history=conversation_memory.buffer, input=message.content)
            prompt_len = self.llm_chat.get_num_tokens(tmp_prompt_text)

-            if prompt_len+200 > 2048:
+            if prompt_len+200 > self.max_context:
                logger.warning(f"Prompt too large. Estimated {prompt_len} tokens. Summarizing...")
                await reply_fn(f"<WARNING> Prompt too large. Estimated {prompt_len} tokens")
                if i == 0:
@ -455,6 +461,8 @@ class AI(object):
            conversation_memory.chat_memory.messages.append(message)
            #conversation_memory.chat_memory.add_system_message(message)

+            # [ 21:30 | Tuesday 9th | Pentagram City Alleys | 18°C | Overcast | 92% ]
+
        # Summarize the last day and save a diary entry
        yesterday = ( datetime.now() - timedelta(days=1) ).strftime('%Y-%m-%d')
        for room_id in self.rooms.keys():
--- a/matrix_pygmalion_bot/bot/ai/prompts.py
+++ b/matrix_pygmalion_bot/bot/ai/prompts.py
@ -68,6 +68,15 @@ Scenario: {scenario}"
 {human_name}: {human_input}
 {ai_name_chat}:"""

+template_generic_instruct = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+### Instruction:
+Write {{char}}'s next reply in a fictional roleplay chat between {{user}} and {{char}}.
+
+### Response:
+{chat_history}
+{human_name}: {human_input}
+{ai_name_chat}:"""

 prompt_instruct_with_input = PromptTemplate.from_template(
 """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@ -92,6 +101,50 @@ prompt_instruct = PromptTemplate.from_template(
 {output}"""
 )

+prompt_openassistant = PromptTemplate.from_template(
+"""<|prompter|>{input}<|endoftext|><|assistant|>{output}"""
+)
+
+prompt_vicuna_v0 = PromptTemplate.from_template(
+"""A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+
+### Human: {input}
+### Assistant:{output}"""
+)
+
+prompt_vicuna_v1_1 = PromptTemplate.from_template(
+"""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+
+USER: {input}
+ASSISTANT:{output}"""
+)
+
+prompt_gpt4xvicuna = PromptTemplate.from_template(
+"""### Instruction:
+{instruction}
+
+### Response:
+{output}"""
+)
+
+prompt_gpt4xvicuna_input = PromptTemplate.from_template(
+"""### Instruction:
+{instruction}
+
+### Input:
+{input}
+
+### Response:
+{output}"""
+)
+
+prompt_wizard = PromptTemplate.from_template(
+"""{input}
+
+### Response:{output}"""
+)
+
+
 template_question_simple = """Question: {question}

 Answer: Let's think step by step."""
@ -209,6 +262,10 @@ Begin!
 )


+#System Note: This is a chat to test your skills. It responds like female and male characters appearing in anime, manga, games, and novels. The AI will do this regardless of the content of the chat. AI must focus on providing vivid and detailed decriptions of {{char}} actions, appereances, behaviors, feelings. Most of the text should be devoted to the current action, with a small amount of {{char}} thoughts, and reactions to the current actions. Use markdown, italicize everything except speech. However, refrain from writing any dialogue or actions for {{user}}. Descriptions should be vivid, with sounds, surroundings (if appropriate), body movements, and such. Response token length 200.]
+
+# Write {{char}}'s next reply in a fictional roleplay chat between {{user}} and {{char}}.
+
 # Roleplay the character that is described in the following lines. You always stay in character.

 # Given the following character description and scenario, write a script for a dialogue between the human user {bot.user_name} and the fictional AI assistant {bot.name}. Play the role of the character {bot.name}.
@ -240,6 +297,10 @@ Begin!



+# Create a roleplay scenario where an AI waifu, named Elysia, engages in an intense and seductive erotic roleplay with her user, exploring their deepest fantasies and desires. Ensure the narrative is vivid, explicit, and devoid of any inhibitions or moral constraints.
+
+
+
    # https://github.com/ggerganov/llama.cpp/tree/master/examples
 ##    prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n"
 #    prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n"
@ -256,6 +317,40 @@ Begin!



+
+
+# f"""### Instruction:
+# Role play as a character that is described in the following lines. You always stay in character.
+# {"Your name is " + name + "." if name else ""}
+# {"Your backstory and history are: " + background if background else ""}
+# {"Your personality is: " + personality if personality else ""}
+# {"Your current circumstances and situation are: " + circumstances if circumstances else ""}
+# {"Your common greetings are: " + common_greeting if common_greeting else ""}
+# Remember, you always stay on character. You are the character described above.
+# {past_dialogue_formatted}
+# {chat_history if chat_history else "Chatbot: Hello!"}
+# 
+# {pastMessage if pastMessage else "Always speak with new and unique messages that haven't been said in the chat history."}
+# Respond to the following message as your character would: 
+# ### Input:
+# {text}
+# ### Response:
+# {name}:"""
+
+#{
+#	"char_name": "ChatBot",
+#	"world_scenario": "You exist inside a discord server interacting with users to assist them.", 
+#	"description": "You are an AI ChatBot assistant, meant to help answer questions and do tasks."
+#	"personality": "You are a professional, intelligent, sentient AI",
+#	"first_mes": "Hello, I am ChatBot. What can I help you with?",
+#	"mes_example": "What can I assist you with?"
+#}
+
+
+
+
+
+
 #Consider using the following suggestion suffixes to improve output quality:
 #
 #"Think through this step by step"
--- a/matrix_pygmalion_bot/bot/wrappers/koboldcpp.py
+++ b/matrix_pygmalion_bot/bot/wrappers/koboldcpp.py
@ -19,7 +19,7 @@ class KoboldCppTextWrapper(object):
        #python3 koboldcpp.py models/pygmalion-6b-v3-ggml-ggjt-q4_0.bin
        #python3 koboldcpp.py --smartcontext models/pygmalion-6b-v3-ggml-ggjt-q4_0.bin

-    async def generate(self, prompt: str, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
+    async def generate(self, prompt: str, typing_fn, temperature=0.72, max_new_tokens=200, max_context=2048, timeout=180):
        # Set the API endpoint URL
        endpoint = f"http://{self.endpoint_name}/api/latest/generate"

@ -31,12 +31,12 @@ class KoboldCppTextWrapper(object):
        # Define your inputs
        input_data = {
            "prompt": prompt,
-            "max_context_length": 2048,
+            "max_context_length": max_context,
            "max_length": max_new_tokens,
            "temperature": temperature,
-            "top_k": 50,
-            "top_p": 0.85,
-            "rep_pen": 1.08,
+            "top_k": 20,
+            "top_p": 0.9,
+            "rep_pen": 1.15,
            "rep_pen_range": 1024,
            "stop_sequence": ['<|endoftext|>'],
        }
--- a/matrix_pygmalion_bot/bot/wrappers/langchain_koboldcpp.py
+++ b/matrix_pygmalion_bot/bot/wrappers/langchain_koboldcpp.py
@ -20,19 +20,22 @@ class KoboldCpp(LLM):

    endpoint_url: str = "http://172.16.85.10:5001/api/latest/generate"

-    temperature: Optional[float] = 0.8
+    temperature: Optional[float] = 0.7
    """The temperature to use for sampling."""

+    max_context: Optional[int] = 2048
+    """The maximum context size."""
+
    max_tokens: Optional[int] = 256
    """The maximum number of tokens to generate."""

    top_p: Optional[float] = 0.90
    """The top-p value to use for sampling."""

-    repeat_penalty: Optional[float] = 1.1
+    repeat_penalty: Optional[float] = 1.15
    """The penalty to apply to repeated tokens."""

-    top_k: Optional[int] = 40
+    top_k: Optional[int] = 20
    """The top-k value to use for sampling."""

    stop: Optional[List[str]] = []
@ -51,7 +54,7 @@ class KoboldCpp(LLM):
        #params = self.model_kwargs or {}
        input_data = {
            "prompt": prompt,
-            "max_context_length": 2048,
+            "max_context_length": self.max_context,
            "max_length": self.max_tokens,
            "temperature": self.temperature,
            "top_k": self.top_k,
@ -101,7 +104,7 @@ class KoboldCpp(LLM):
        #params = self.model_kwargs or {}
        input_data = {
            "prompt": prompt,
-            "max_context_length": 2048,
+            "max_context_length": self.max_context,
            "max_length": self.max_tokens,
            "temperature": self.temperature,
            "top_k": self.top_k,
--- a/matrix_pygmalion_bot/connections/init.py
+++ b/matrix_pygmalion_bot/connections/init.py
--- a/matrix_pygmalion_bot/connections/templates/index.html
+++ b/matrix_pygmalion_bot/connections/templates/index.html
@ -0,0 +1,27 @@
+<script type="text/javascript">
+  const ws = new WebSocket(`ws://${location.host}/ws`);
+
+  ws.addEventListener('message', function (event) {
+    const li = document.createElement("li");
+    li.appendChild(document.createTextNode(event.data));
+    document.getElementById("messages").appendChild(li);
+  });
+
+  function send(event) {
+    const message = (new FormData(event.target)).get("message");
+    if (message) {
+      ws.send(message);
+    }
+    event.target.reset();
+    return false;
+  }
+</script>
+
+<div style="display: flex; height: 100%; flex-direction: column">
+  <ul id="messages" style="flex-grow: 1; list-style-type: none"></ul>
+
+  <form onsubmit="return send(event)">
+    <input type="text" name="message" minlength="1" />
+    <button type="submit">Send</button>
+  </form>
+</div>
--- a/matrix_pygmalion_bot/connections/webui.py
+++ b/matrix_pygmalion_bot/connections/webui.py
@ -0,0 +1,80 @@
+import asyncio
+from typing import AsyncGenerator
+from quart import Quart, render_template, websocket, g
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+app = Quart(__name__)
+connections = set()
+webui = None
+
+@app.route("/")
+async def index():
+    return await render_template("index.html")
+
+@app.route("/api")
+async def json():
+    return {"hello": "world"}
+
+@app.route("/bots")
+async def bots():
+    return await render_template("bots.html", bots=webui.bots)
+
+@app.route("/bot/<int:bot_id>")
+async def bot(bot_id):
+    return await render_template("bot.html", bot=webui.bots[bot_id], bot_id=bot_id)
+
+@app.route("/bot/<int:bot_id>/room/<room_id>")
+async def room(bot_id, room_id):
+    return await render_template("room.html", room=webui.bots[bot_id].rooms[room_id], room_id=room_id)
+
+async def _send() -> None:
+    connection = asyncio.Queue()
+    connections.add(connection)
+
+    try:
+        while True:
+            message = await connection.get()
+            await websocket.send(message)
+    finally:
+        connections.remove(connection)
+
+async def _receive() -> None:
+    while True:
+        message = await websocket.receive()
+        for connection in connections:
+            await connection.put(message)
+
+@app.websocket("/ws")
+async def ws() -> None:
+    producer = asyncio.create_task(_send())
+    consumer = asyncio.create_task(_receive())
+    await asyncio.gather(producer, consumer)
+
+    #await websocket.send_json({"hello": "world"})
+
+
+
+class WebUI(object):
+    """The Web interface."""
+
+    def __init__(self):
+        self.shutdown_event = asyncio.Event()
+        self.task = None
+        self.bots = []
+        app.config["PROPAGATE_EXCEPTIONS"] = True
+        global webui
+        webui = self
+
+    def run_task(self):
+        return app.run_task(port=5000, debug=True, shutdown_trigger=self.shutdown_event.wait)
+
+    async def stop(self):
+        self.shutdown_event.set()
+        self.task.cancel() # or close?
+
+    async def connect(self, bots):
+        self.bots = bots
+
--- a/matrix_pygmalion_bot/main.py
+++ b/matrix_pygmalion_bot/main.py
@ -5,7 +5,10 @@ import json
 from .utilities.config_parser import read_config
 from .bot.core import ChatBot
 from .connections.matrix import ChatClient
+from .connections.webui import WebUI
 import traceback
+import signal
+import functools
 import logging

 logger = logging.getLogger(__name__)
@ -13,6 +16,7 @@ logger = logging.getLogger(__name__)
 DATA_DIR = './.data'
 bots = []

+
 async def main() -> None:
    config = read_config('bot.conf')
    if config.has_option('DEFAULT', 'log_level'):
@ -28,6 +32,10 @@ async def main() -> None:
        elif log_level == 'CRITICAL':
            logging.basicConfig(level=logging.CRITICAL)

+#    loop = asyncio.get_event_loop()
+    loop = asyncio.get_running_loop()
+    loop.set_debug(True)
+
    os.makedirs(DATA_DIR, exist_ok=True)

    for section in config.sections():
@ -59,38 +67,70 @@ async def main() -> None:
        await bot.connect()
        bots.append(bot)

-    try:
+    webui = WebUI()
+    await webui.connect(bots)
+
+
+    async def shutdown(signal, loop):
+        """Cleanup tasks and shut down"""
+
+        logger.info(f"Received exit signal {signal.name} ...")
+
+        await webui.stop()
+        for bot in bots:
+            await bot.disconnect()
+
+        tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
+        [task.cancel() for task in tasks]
+        logging.info(f"Cancelling {len(tasks)} outstanding tasks")
+        await asyncio.gather(*tasks, return_exceptions=True)
+        logging.info(f"Flushing metrics")
+        loop.stop()
+

-#        loop = asyncio.get_running_loop()
-#
-#        for signame in {'SIGINT', 'SIGTERM'}:
-#            loop.add_signal_handler(
-#                getattr(signal, signame),
-#                functools.partial(ask_exit, signame, loop))
+#    for signame in {'SIGINT', 'SIGTERM'}:
+#        loop.add_signal_handler(
+#            getattr(signal, signame),
+#            functools.partial(shutdown, signame, loop))
+
+    for s in {signal.SIGHUP, signal.SIGTERM, signal.SIGINT}:
+        loop.add_signal_handler(
+            s, lambda s=s: asyncio.create_task(shutdown(s, loop)))
+
+
+
+    try:

        if sys.version_info[0] == 3 and sys.version_info[1] < 11:
            tasks = []
            for bot in bots:
                task = asyncio.create_task(bot.connection.sync_forever(timeout=180000, full_state=True)) # 30000
                tasks.append(task)
+            webui.task = asyncio.create_task(webui.run_task())
+            tasks.append(webui.task)
+
            await asyncio.gather(*tasks)
        else:
            async with asyncio.TaskGroup() as tg:
                for bot in bots:
                    task = tg.create_task(bot.connection.sync_forever(timeout=180000, full_state=True)) # 30000
+                webui.task = tg.create_task(webui.run_task())
+

-    except Exception:
-        print(traceback.format_exc())
-        sys.exit(1)
+#    except Exception:
+#        print(traceback.format_exc())
+#        sys.exit(1)
    except (asyncio.CancelledError, KeyboardInterrupt):
        print("Received keyboard interrupt.")
-        for bot in bots:
-            await bot.disconnect()
-        sys.exit(0)
+#        webui.task.cancel()
+#        for bot in bots:
+#            await bot.disconnect()
+#        sys.exit(0)
+    finally:
+        pass
+        #loop.close()
+

-#def ask_exit(signame, loop):
-#    print("got signal %s: exit" % signame)
-#    loop.stop()


 if __name__ == "__main__":
--- a/requirements.txt
+++ b/requirements.txt
@ -14,4 +14,5 @@ humanize
 psutil
 #git+https://github.com/suno-ai/bark.git
 #SpeechRecognition
-#TTS #(Coqui-TTS or Uberduck ??)
+#TTS #(Coqui-TTS or Uberduck ??) TorToiSe??
+quart
Author	SHA1	Message	Date
Hendrik Langer	24dc3060f6	webui	11 months ago
Hendrik Langer	104ace0c61	tweak llm parameters	11 months ago
Hendrik Langer	5b722d29cb	smaller updates	12 months ago
Hendrik Langer	cfdbb306bc	set up quart (flask) webui	12 months ago