further rewrite

2 years ago · f44815ae8a
15 changed files with 520 additions and 224 deletions
--- a/README.md
+++ b/README.md
@ -3,6 +3,7 @@
 ## Setup
 ```sh
 # install dependencies
+apt install libolm-dev
 pipenv install --dev

 pipenv shell
@ -16,6 +17,17 @@ source env/bin/activate
 pip install -r requirements.txt
 ```

+## Install KoboldCpp
+```sh
+git clone https://github.com/LostRuins/koboldcpp.git
+apt update && apt-get install libopenblas-dev libclblast-dev libmkl-dev
+cd koboldcpp && make LLAMA_CLBLAST=1 LLAMA_OPENBLAS=1
+cd models && wget https://huggingface.co/concedo/pygmalion-6bv3-ggml-ggjt/resolve/main/pygmalion-6b-v3-ggml-ggjt-q4_0.bin
+python3 koboldcpp.py --unbantokens --smartcontext --stream models/pygmalion-6b-v3-ggml-ggjt-q4_0.bin
+```
+
+
+
 ## Cloud services
 * runpod.io
 * vast.ai
--- a/matrix_pygmalion_bot/bot/ai/langchain.py
+++ b/matrix_pygmalion_bot/bot/ai/langchain.py
@ -4,19 +4,54 @@ from .prompts import *
 from .langchain_memory import BotConversationSummerBufferWindowMemory

 from langchain import PromptTemplate
-from langchain.chains import LLMChain
+from langchain import LLMChain, ConversationChain
+from langchain.memory import ConversationBufferMemory

+from langchain.chains.base import Chain
+from typing import Dict, List
+
+from langchain.document_loaders import TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.vectorstores import Chroma

 import logging

 logger = logging.getLogger(__name__)


+class RoleplayChain(Chain):
+    llm_chain: LLMChain
+
+    character_name: str
+    persona: str
+    scenario: str
+    ai_name_chat: str
+    human_name_chat: str
+
+    output_key: str = "output_text"  #: :meta private:
+
+    @property
+    def input_keys(self) -> List[str]:
+        return ["character_name", "persona", "scenario", "ai_name_chat", "human_name_chat", "llm_chain"]
+
+    @property
+    def output_keys(self) -> List[str]:
+        return [self.output_key]
+
+    def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
+        other_keys = {k: v for k, v in inputs.items() if k not in self.input_keys}
+        result = self.llm_chain.predict(**other_keys)
+        return {self.output_key: result}
+    
+
+
 class AI(object):

-    def __init__(self, bot, text_wrapper, image_wrapper):
+    def __init__(self, bot, text_wrapper, image_wrapper, memory_path: str):
        self.name = bot.name
        self.bot = bot
+        self.memory_path = memory_path

        from ..wrappers.langchain_koboldcpp import KoboldCpp
        self.llm_chat = KoboldCpp(temperature=self.bot.temperature, endpoint_url="http://172.16.85.10:5001/api/latest/generate", stop=['<|endoftext|>'])
@ -24,40 +59,81 @@ class AI(object):
        self.text_wrapper = text_wrapper
        self.image_wrapper = image_wrapper

-        self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)
+        #self.memory = BotConversationSummerBufferWindowMemory(llm=self.llm_summary, max_token_limit=1200, min_token_limit=200)
+        self.memory = ConversationBufferMemory(memory_key="chat_history", human_prefix="You", ai_prefix=self.bot.name)
+
+
+
+    async def generate(self, message, reply_fn, typing_fn):

+        embeddings = SentenceTransformerEmbeddings()
+        #embeddings = SentenceTransformerEmbeddings(model="all-MiniLM-L6-v2")
+
+        loader = TextLoader('./germany.txt')
+        documents = loader.load()
+
+        text_splitter = RecursiveCharacterTextSplitter(
+            # Set a really small chunk size, just to show.
+            chunk_size = 600,
+            chunk_overlap  = 100,
+            length_function = len,
+        )

+        docs = text_splitter.split_documents(documents)

+        db = Chroma(persist_directory=f'{self.memory_path}/chroma-db', embedding_function=embeddings)
+
+        print(f"Indexing {len(docs)} documents")
+        texts = [doc.page_content for doc in docs]
+        metadatas = [doc.metadata for doc in docs]
+        #db.add_texts(texts=texts, metadatas=metadatas, ids=None)
+        #db.persist()
+
+        query = "How is climate in Germany?"
+        output_docs = db.similarity_search_with_score(query)
+        print(query)
+        print('###')
+        for doc, score in output_docs:
+            print("-" * 80)
+            print("Score: ", score)
+            print(doc.page_content)
+            print("-" * 80)

-    async def generate(self, input_text):
        prompt_template = "{input}"
        chain = LLMChain(
            llm=self.llm_chat,
            prompt=PromptTemplate.from_template(prompt_template),
        )
-        output = chain.run(input_text)
+        output = chain.run(message.message)
        return output.strip()

    async def generate_roleplay(self, message, reply_fn, typing_fn):
-        prompt = PromptTemplate(
-            input_variables=["ai_name", "persona", "scenario", "chat_history", "human_name", "ai_name_chat", "human_input"],
-            template=prompt_template_alpaca,
-        )
-        template_roleplay = prompt.format(
-            ai_name = self.bot.name,
-            persona = self.bot.persona,
-            scenario = self.bot.scenario,
-            chat_history = "{history}",
-            human_name = message.user_name,
-            ai_name_chat = self.bot.name,
-            human_input = "{input}",
+
+        self.memory.human_prefix = message.user_name
+
+        prompt = prompt_vicuna.partial(
+            ai_name=self.bot.name,
+            persona=self.bot.persona,
+            scenario=self.bot.scenario,
+            human_name=message.user_name,
+            ai_name_chat=self.bot.name,
        )
-        chain = LLMChain(
+
+        chain = ConversationChain(
            llm=self.llm_chat,
-            prompt=PromptTemplate.from_template(template_roleplay),
+            prompt=prompt,
            verbose=True,
            memory=self.memory,
            #stop=['<|endoftext|>', '\nYou:', f"\n{message.user_name}:"],
        )
-        output = chain.run(message.message)
+        
+#        output = llm_chain(inputs={"ai_name": self.bot.name, "persona": self.bot.persona, "scenario": self.bot.scenario, "human_name": message.user_name, "ai_name_chat": self.bot.name, "chat_history": "", "input": message.message})['results'][0]['text']
+
+        #roleplay_chain = RoleplayChain(llm_chain=chain, character_name=self.bot.name, persona=self.bot.persona, scenario=self.bot.scenario, ai_name_chat=self.bot.name, human_name_chat=message.user_name)
+
+        output = chain.run({"input":message.message, "stop": ['<|endoftext|>', f"\n{message.user_name}:"]})
        return output.strip()
+
+
+def estimate_num_tokens(input_text: str):
+    return len(input_text)//4+1
--- a/matrix_pygmalion_bot/bot/ai/prompts.py
+++ b/matrix_pygmalion_bot/bot/ai/prompts.py
@ -1,22 +1,26 @@
+from langchain import PromptTemplate


-
-prompt_template_pygmalion = """{ai_name}'s Persona: {persona}
+prompt_pygmalion = PromptTemplate.from_template(
+"""{ai_name}'s Persona: {persona}
 Scenario: {scenario}

 <START>
 {chat_history}
 {human_name}: {human_input}
 {ai_name_chat}:"""
+)

-prompt_template_koboldai = """[Character: {ai_name} {persona}]
+prompt_koboldai = PromptTemplate.from_template(
+"""[Character: {ai_name} {persona}]
 [Start Scene: {scenario}]

 {chat_history}
 {human_name}: {human_input}
 {ai_name_chat}:"""
+)

-prompt_template_alpaca = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+template_alpaca = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

 ### Instruction:
 Roleplay the character {ai_name}, that is described in the following lines. You always stay in character.
@ -29,8 +33,13 @@ Scenario: {scenario}
 {chat_history}
 {human_name}: {human_input}
 {ai_name_chat}:"""
+prompt_alpaca = PromptTemplate(
+    input_variables=["ai_name", "persona", "scenario", "chat_history", "human_name", "ai_name_chat", "human_input"],
+    template=template_alpaca,
+)
+

-prompt_template_vicuna = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+prompt_vicuna = PromptTemplate.from_template("""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

 ### Instruction:
 Roleplay the character {ai_name}, that is described in the following lines. You always stay in character.
@ -41,10 +50,11 @@ Scenario: {scenario}

 ### Response:
 {chat_history}
-{human_name}: {human_input}
+{human_name}: {input}
 {ai_name_chat}:"""
+)

-prompt_template_generic = """Roleplay the character {ai_name}, that is described in the following lines. You always stay in character.
+template_generic = """Roleplay the character {ai_name}, that is described in the following lines. You always stay in character.

 {ai_name}'s Persona: {persona}
 Scenario: {scenario}"
@ -54,7 +64,8 @@ Scenario: {scenario}"
 {ai_name_chat}:"""


-prompt_template_instruct_with_input = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+prompt_instruct_with_input = PromptTemplate.from_template(
+"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

 ### Instruction:
 {instruction}
@ -64,8 +75,9 @@ prompt_template_instruct_with_input = """Below is an instruction that describes

 ### Response:
 {output}"""
+)

-prompt_template_question_simple = """Question: {question}
+template_question_simple = """Question: {question}

 Answer: Let's think step by step."""

@ -93,3 +105,13 @@ Answer: Let's think step by step."""
    #prompt += f"{bot.name} is a very helpful AI and will help the user with anything they need, she is also very friendly and will try to make the user feel better if they are sad.\n"
    #prompt += f"{bot.name} is also very curious and will ask the user a lot of questions about themselves and their life, she will also try to make the user like her.\n"
    #prompt += f"\n"
+
+
+
+#Consider using the following suggestion suffixes to improve output quality:
+#
+#"Think through this step by step"
+#"Let's think about this logically"
+#"Explain your reasoning"
+#"Provide details to support your answer"
+#"Compare and contrast your answer with alternatives"
--- a/matrix_pygmalion_bot/bot/core.py
+++ b/matrix_pygmalion_bot/bot/core.py
@ -64,44 +64,45 @@ class ChatBot(object):
 #        module_text_ai = importlib.import_module("bot.ai.langchain", package=None)
 #        self.text_ai = module_text_ai.AI(self)

-        from .wrappers.langchain_koboldcpp import KoboldCpp
-        from .wrappers.runpod_text import RunpodTextWrapper
-        text_generators = {}
+        from .wrappers.runpod import RunpodTextWrapper
+        from .wrappers.stablehorde import StableHordeTextWrapper
+        from .wrappers.koboldcpp import KoboldCppTextWrapper
+        self.text_generators = {}
        for text_endpoint in sorted(available_text_endpoints, key=lambda d: d['id']):
            if text_endpoint['service'] == "koboldcpp":
-                text_generator = KoboldCpp(temperature=self.temperature, endpoint_url=text_endpoint['endpoint'], stop=['<|endoftext|>'])
+                text_generator = KoboldCppTextWrapper(text_endpoint['endpoint'], text_endpoint['model'])
            elif text_endpoint['service'] == "stablehorde":
-                pass
+                text_generator = StableHordeTextWrapper(text_endpoint['api_key'], text_endpoint['endpoint'], text_endpoint['model'])
            elif text_endpoint['service'] == "runpod":
-                text_generator = RunpodTextWrapper(text_endpoint['api_key'], endpoint=text_endpoint['endpoint'])
-                pass
+                text_generator = RunpodTextWrapper(text_endpoint['api_key'], text_endpoint['endpoint'], text_endpoint['model'])
            else:
                raise ValueError(f"no text service with the name \"{service_text}\"")
            i = text_endpoint['id']
-            text_generators[i] = text_generator
+            self.text_generators[i] = text_generator

-        from .wrappers.runpod_image import RunpodImageWrapper
-        from .wrappers.runpod_image_automatic1111 import RunpodImageAutomaticWrapper
-        image_generators = {}
+        from .wrappers.runpod import RunpodImageWrapper
+        from .wrappers.runpod import RunpodImageAutomaticWrapper
+        from .wrappers.stablehorde import StableHordeImageWrapper
+        self.image_generators = {}
        for image_endpoint in sorted(available_image_endpoints, key=lambda d: d['id']):
            if image_endpoint['service'] == "runpod":
-                image_generator = RunpodImageWrapper(image_endpoint['api_key'])
+                image_generator = RunpodImageWrapper(image_endpoint['api_key'], image_endpoint['endpoint'], image_endpoint['model'])
            elif image_endpoint['service'] == "runpod-automatic1111":
-                image_generator = RunpodImageAutomaticWrapper(image_endpoint['api_key'])
+                image_generator = RunpodImageAutomaticWrapper(image_endpoint['api_key'], image_endpoint['endpoint'], image_endpoint['model'])
            elif image_endpoint['service'] == "stablehorde":
-                #image_generator = StableHordeImageWrapper(image_endpoint['api_key'])
-                pass
+                image_generator = StableHordeImageWrapper(image_endpoint['api_key'], image_endpoint['endpoint'], image_endpoint['model'])
            else:
                raise ValueError(f"no image service with the name \"{service_image}\"")
            i = image_endpoint['id']
-            def make_fn_generate_image_for_endpoint(wrapper, endpoint):
-                async def generate_image(input_prompt, negative_prompt, typing_fn, timeout=180):
-                    return await wrapper.generate(input_prompt, negative_prompt, endpoint, typing_fn, timeout)
-                return generate_image
-            #self.image_generators.append(generate_image)
-            image_generators[i] = make_fn_generate_image_for_endpoint(image_generator, image_endpoint['endpoint'])
+#            def make_fn_generate_image_for_endpoint(wrapper, endpoint):
+#                async def generate_image(input_prompt, negative_prompt, typing_fn, timeout=180):
+#                    return await wrapper.generate(input_prompt, negative_prompt, endpoint, typing_fn, timeout)
+#                return generate_image
+#            #self.image_generators.append(generate_image)
+#            image_generators[i] = make_fn_generate_image_for_endpoint(image_generator, image_endpoint['endpoint'])
+            self.image_generators[i] = image_generator

-        self.ai = AI(self, text_generators, image_generators)
+        self.ai = AI(self, self.text_generators, self.image_generators, self.memory_path)


    async def message_cb(self, room, event) -> None:
@ -122,15 +123,13 @@ class ChatBot(object):
            self.chatlog.save(message)
            return

-        if event.decrypted:
-            encrypted_symbol = "🛡 "
-        else:
-            encrypted_symbol = "⚠️ "
-        print(
-            f"{room.display_name} |{encrypted_symbol}| {room.user_name(event.sender)}: {event.body}"
-        )
-        print(repr(room))
-        print(repr(event))
+#        if event.decrypted:
+#            encrypted_symbol = "🛡 "
+#        else:
+#            encrypted_symbol = "⚠️ "
+#        print(
+#            f"{room.display_name} |{encrypted_symbol}| {room.user_name(event.sender)}: {event.body}"
+#        )

        if room.is_named:
            print(f"room.display_name: {room.display_name}")
@ -165,7 +164,7 @@ class ChatBot(object):
        else:
            await self.schedule(self.queue, self.process_message, message, reply_fn, typing_fn)
        self.chatlog.save(message)
-
+        print("done")

    async def redaction_cb(self, room, event) -> None:
        self.chatlog.remove_message_by_id(event.event_id)
@ -188,7 +187,7 @@ class ChatBot(object):
 #"ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face"
 #"ugly, deformed, out of frame"
            try:
-                output = await self.ai.image_generators[num](prompt, negative_prompt, typing_fn)
+                output = await self.image_generators[num].generate(prompt, negative_prompt, typing_fn)
                await self.connection.room_typing(message.room_id, False)
                for imagefile in output:
                    await self.connection.send_image(message.room_id, imagefile)
@ -220,6 +219,7 @@ class ChatBot(object):

    async def process_message(self, message, reply_fn, typing_fn):
        output = await self.ai.generate_roleplay(message, reply_fn, typing_fn)
+        #output = await self.ai.generate(message, reply_fn, typing_fn)
        # typing false
        await reply_fn(output)

--- a/matrix_pygmalion_bot/bot/wrappers/koboldcpp.py
+++ b/matrix_pygmalion_bot/bot/wrappers/koboldcpp.py
@ -0,0 +1,59 @@
+import asyncio
+import requests
+import json
+import logging
+
+logger = logging.getLogger(__name__)
+
+class KoboldCppTextWrapper(object):
+    """Base Class for koboldcpp"""
+
+    def __init__(self, endpoint_name: str, model_name: str):
+        self.endpoint_name = endpoint_name
+        self.model_name = model_name
+
+    def setup():
+        os.system("mkdir -p repositories && (cd repositories && git clone https://github.com/LostRuins/koboldcpp.git)")
+        os.system("apt update && apt-get install libopenblas-dev libclblast-dev libmkl-dev")
+        os.system("(cd repositories/koboldcpp && make LLAMA_OPENBLAS=1 && cd models && wget https://huggingface.co/concedo/pygmalion-6bv3-ggml-ggjt/resolve/main/pygmalion-6b-v3-ggml-ggjt-q4_0.bin)")
+        #python3 koboldcpp.py models/pygmalion-6b-v3-ggml-ggjt-q4_0.bin
+        #python3 koboldcpp.py --smartcontext models/pygmalion-6b-v3-ggml-ggjt-q4_0.bin
+
+    async def generate(self, prompt: str, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
+        # Set the API endpoint URL
+        endpoint = f"http://{self.endpoint_name}/api/latest/generate"
+
+        # Set the headers for the request
+        headers = {
+            "Content-Type": "application/json",
+        }
+
+        # Define your inputs
+        input_data = {
+            "prompt": prompt,
+            "max_context_length": 2048,
+            "max_length": max_new_tokens,
+            "temperature": temperature,
+            "top_k": 50,
+            "top_p": 0.85,
+            "rep_pen": 1.08,
+            "rep_pen_range": 1024,
+            "stop_sequence": ['<|endoftext|>'],
+        }
+
+        logger.info(f"sending request to koboldcpp. endpoint=\"{self.endpoint_name}\"")
+
+        TRIES = 30
+        for i in range(TRIES):
+            r = requests.post(endpoint, json=input_data, headers=headers, timeout=timeout)
+            r_json = r.json()
+            logger.info(r_json)
+            if r.status_code == 200:
+                output = r_json["results"][0]["text"]
+                return output
+            elif r.status_code == 503:
+                logger.info(f"api is busy. waiting...")
+                asyncio.sleep(5)
+
+        raise ValueError(f"<ERROR> TIMEOUT / NO OUTOUT")
+
--- a/matrix_pygmalion_bot/bot/wrappers/llamacpp_text.py
+++ b/matrix_pygmalion_bot/bot/wrappers/llamacpp_text.py
--- a/matrix_pygmalion_bot/bot/wrappers/runpod.py
+++ b/matrix_pygmalion_bot/bot/wrappers/runpod.py
@ -1,6 +1,10 @@
 import asyncio
 import requests
 import json
+import os, tempfile
+import io
+import base64
+from PIL import Image, PngImagePlugin
 import logging

 logger = logging.getLogger(__name__)
@ -8,12 +12,14 @@ logger = logging.getLogger(__name__)
 class RunpodWrapper(object):
    """Base Class for runpod"""

-    def __init__(self, api_key):
+    def __init__(self, api_key: str, endpoint_name: str, model_name: str):
        self.api_key = api_key
+        self.endpoint_name = endpoint_name
+        self.model_name = model_name

-    async def generate(self, input_data, endpoint_name, typing_fn, timeout=180):
+    async def generate(self, input_data: str, typing_fn, timeout=180):
        # Set the API endpoint URL
-        endpoint = f"https://api.runpod.ai/v2/{endpoint_name}/run"
+        endpoint = f"https://api.runpod.ai/v2/{self.endpoint_name}/run"

        # Set the headers for the request
        headers = {
@ -21,7 +27,7 @@ class RunpodWrapper(object):
            "Authorization": f"Bearer {self.api_key}"
        }

-        logger.info(f"sending request to runpod.io. endpoint=\"{endpoint_name}\"")
+        logger.info(f"sending request to runpod.io. endpoint=\"{self.endpoint_name}\"")

        # Make the request
        try:
@ -37,7 +43,7 @@ class RunpodWrapper(object):
            TIMEOUT = 360
            DELAY = 5
            for i in range(TIMEOUT//DELAY):
-                endpoint = f"https://api.runpod.ai/v2/{endpoint_name}/status/{job_id}"
+                endpoint = f"https://api.runpod.ai/v2/{self.endpoint_name}/status/{job_id}"
                r = requests.get(endpoint, headers=headers)
                r_json = r.json()
                logger.info(r_json)
@ -57,3 +63,129 @@ class RunpodWrapper(object):
            raise ValueError(f"<ERROR> TIMEOUT")
        else:
            raise ValueError(f"<ERROR>")
+
+
+class RunpodTextWrapper(RunpodWrapper):
+
+    async def generate(self, prompt, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
+
+        # Define your inputs
+        input_data = {
+            "input": {
+                "prompt": prompt,
+                "max_length": min(max_new_tokens, 2048),
+                "temperature": bot.temperature,
+                "do_sample": True,
+            }
+        }
+        output = await super().generate(input_data, api_key, typing_fn, timeout)
+        output = output.removeprefix(prompt)
+        return(output)
+
+    async def generate2(self, prompt, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
+        generate(prompt, typing_fn, temperature, nax_new_tokens, timeout)
+
+
+class RunpodImageWrapper(RunpodWrapper):
+    async def download_image(self, url, path):
+        r = requests.get(url, stream=True)
+        if r.status_code == 200:
+            with open(path, 'wb') as f:
+                for chunk in r:
+                    f.write(chunk)
+
+    async def generate(self, input_prompt: str, negative_prompt: str, typing_fn, timeout=180):
+
+        # Define your inputs
+        input_data = {
+            "input": {
+                "prompt": input_prompt,
+                "negative_prompt": negative_prompt,
+                "width": 512,
+                "height": 768,
+                "num_outputs": 3,
+    #            "nsfw": True
+            },
+        }
+
+        output = await super().generate(input_data, typing_fn, timeout)
+
+        os.makedirs("./.data/images", exist_ok=True)
+        files = []
+        for image in output:
+            temp_name = next(tempfile._get_candidate_names())
+            filename = "./.data/images/" + temp_name + ".jpg"
+            await self.download_image(image["image"], filename)
+            files.append(filename)
+
+        return files
+
+
+class RunpodImageAutomaticWrapper(RunpodWrapper):
+
+    async def generate(self, input_prompt: str, negative_prompt: str, typing_fn, timeout=180):
+
+        # Define your inputs
+        input_data = {
+            "input": {
+                "prompt": input_prompt,
+                "nagative_prompt": negative_prompt,
+                "steps": 25,
+                "cfg_scale": 7,
+                "seed": -1,
+                "width": 512,
+                "height": 768,
+                "batch_size": 3,
+        #        "sampler_index": "DPM++ 2M Karras",
+        #        "enable_hr": True,
+        #        "hr_scale": 2,
+        #        "hr_upscaler": "ESRGAN_4x", # "Latent"
+        #        "denoising_strength": 0.5,
+        #        "hr_second_pass_steps": 15,
+                "restore_faces": True,
+        #        "gfpgan_visibility": 0.5,
+        #        "codeformer_visibility": 0.5,
+        #        "codeformer_weight": 0.5,
+        ##        "override_settings": {
+        ##            "filter_nsfw": False,
+        ##        },
+                "api_endpoint": "txt2img",
+            },
+            "cmd": "txt2img"
+        }
+
+        output = await super().generate(input_data, typing_fn, timeout)
+
+        upscale = False
+        if upscale:
+            count = 0
+            for i in output['images']:
+                payload = {
+                    "init_images": [i],
+                    "prompt": input_prompt,
+                    "nagative_prompt": negative_prompt,
+                    "steps": 20,
+                    "seed": -1,
+                    #"sampler_index": "Euler",
+    #                                  tile_width, tile_height, mask_blur, padding, seams_fix_width, seams_fix_denoise, seams_fix_padding, upscaler_index, save_upscaled_image, redraw_mode, save_seams_fix_image, seams_fix_mask_blur, seams_fix_type, target_size_type, custom_width, custom_height, custom_scale
+    #                "script_args": ["",512,0,8,32,64,0.275,32,3,False,0,True,8,3,2,1080,1440,1.875],
+    #                "script_name": "Ultimate SD upscale",
+                }
+                upscaled_output = await serverless_automatic_request(payload, "img2img", api_url, api_key, typing_fn)
+                output['images'][count] = upscaled_output['images'][count]
+
+
+        os.makedirs("./.data/images", exist_ok=True)
+        files = []
+        for i in output['images']:
+            temp_name = next(tempfile._get_candidate_names())
+            filename = "./.data/images/" + temp_name + ".png"
+            image = Image.open(io.BytesIO(base64.b64decode(i.split(",",1)[0])))
+            info = output['info']
+            parameters = output['parameters']
+            pnginfo = PngImagePlugin.PngInfo()
+            pnginfo.add_text("parameters", info)
+            image.save(filename, pnginfo=pnginfo)
+            files.append(filename)
+
+        return files
--- a/matrix_pygmalion_bot/bot/wrappers/runpod_image.py
+++ b/matrix_pygmalion_bot/bot/wrappers/runpod_image.py
@ -1,42 +0,0 @@
-import asyncio
-import requests
-import os, tempfile
-from .runpod import RunpodWrapper
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class RunpodImageWrapper(RunpodWrapper):
-    async def download_image(self, url, path):
-        r = requests.get(url, stream=True)
-        if r.status_code == 200:
-            with open(path, 'wb') as f:
-                for chunk in r:
-                    f.write(chunk)
-
-    async def generate(self, input_prompt: str, negative_prompt: str, endpoint_name: str, typing_fn, timeout=180):
-
-        # Define your inputs
-        input_data = {
-            "input": {
-                "prompt": input_prompt,
-                "negative_prompt": negative_prompt,
-                "width": 512,
-                "height": 768,
-                "num_outputs": 3,
-    #            "nsfw": True
-            },
-        }
-
-        output = await super().generate(input_data, endpoint_name, typing_fn, timeout)
-
-        os.makedirs("./.data/images", exist_ok=True)
-        files = []
-        for image in output:
-            temp_name = next(tempfile._get_candidate_names())
-            filename = "./.data/images/" + temp_name + ".jpg"
-            await self.download_image(image["image"], filename)
-            files.append(filename)
-
-        return files
--- a/matrix_pygmalion_bot/bot/wrappers/runpod_image_automatic1111.py
+++ b/matrix_pygmalion_bot/bot/wrappers/runpod_image_automatic1111.py
@ -1,82 +0,0 @@
-import asyncio
-import requests
-import os, tempfile
-from .runpod import RunpodWrapper
-
-import io
-import base64
-from PIL import Image, PngImagePlugin
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class RunpodImageAutomaticWrapper(RunpodWrapper):
-
-    async def generate(self, input_prompt: str, negative_prompt: str, endpoint_name: str, typing_fn, timeout=180):
-
-        # Define your inputs
-        input_data = {
-            "input": {
-                "prompt": input_prompt,
-                "nagative_prompt": negative_prompt,
-                "steps": 25,
-                "cfg_scale": 7,
-                "seed": -1,
-                "width": 512,
-                "height": 768,
-                "batch_size": 3,
-        #        "sampler_index": "DPM++ 2M Karras",
-        #        "enable_hr": True,
-        #        "hr_scale": 2,
-        #        "hr_upscaler": "ESRGAN_4x", # "Latent"
-        #        "denoising_strength": 0.5,
-        #        "hr_second_pass_steps": 15,
-                "restore_faces": True,
-        #        "gfpgan_visibility": 0.5,
-        #        "codeformer_visibility": 0.5,
-        #        "codeformer_weight": 0.5,
-        ##        "override_settings": {
-        ##            "filter_nsfw": False,
-        ##        },
-                "api_endpoint": "txt2img",
-            },
-            "cmd": "txt2img"
-        }
-
-        output = await super().generate(input_data, endpoint_name, typing_fn, timeout)
-
-        upscale = False
-        if upscale:
-            count = 0
-            for i in output['images']:
-                payload = {
-                    "init_images": [i],
-                    "prompt": input_prompt,
-                    "nagative_prompt": negative_prompt,
-                    "steps": 20,
-                    "seed": -1,
-                    #"sampler_index": "Euler",
-    #                                  tile_width, tile_height, mask_blur, padding, seams_fix_width, seams_fix_denoise, seams_fix_padding, upscaler_index, save_upscaled_image, redraw_mode, save_seams_fix_image, seams_fix_mask_blur, seams_fix_type, target_size_type, custom_width, custom_height, custom_scale
-    #                "script_args": ["",512,0,8,32,64,0.275,32,3,False,0,True,8,3,2,1080,1440,1.875],
-    #                "script_name": "Ultimate SD upscale",
-                }
-                upscaled_output = await serverless_automatic_request(payload, "img2img", api_url, api_key, typing_fn)
-                output['images'][count] = upscaled_output['images'][count]
-
-
-        os.makedirs("./.data/images", exist_ok=True)
-        files = []
-        for i in output['images']:
-            temp_name = next(tempfile._get_candidate_names())
-            filename = "./.data/images/" + temp_name + ".png"
-            image = Image.open(io.BytesIO(base64.b64decode(i.split(",",1)[0])))
-            info = output['info']
-            parameters = output['parameters']
-            pnginfo = PngImagePlugin.PngInfo()
-            pnginfo.add_text("parameters", info)
-            image.save(filename, pnginfo=pnginfo)
-            files.append(filename)
-
-        return files
--- a/matrix_pygmalion_bot/bot/wrappers/runpod_text.py
+++ b/matrix_pygmalion_bot/bot/wrappers/runpod_text.py
@ -1,31 +0,0 @@
-import asyncio
-import json
-from .runpod import RunpodWrapper
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class RunpodTextWrapper(RunpodWrapper):
-
-    def __init__(self, api_key, endpoint):
-        self.api_key = api_key
-        self.endpoint = endpoint
-
-    async def generate(self, prompt, endpoint_name, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
-
-        # Define your inputs
-        input_data = {
-            "input": {
-                "prompt": prompt,
-                "max_length": min(max_new_tokens, 2048),
-                "temperature": bot.temperature,
-                "do_sample": True,
-            }
-        }
-        output = await super().generate(input_data, endpoint_name, api_key, typing_fn, timeout)
-        output = output.removeprefix(prompt)
-        return(output)
-
-    async def generate2(self, prompt, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
-        generate(prompt, self.endpoint, typing_fn, temperature, nax_new_tokens, timeout)
--- a/matrix_pygmalion_bot/bot/wrappers/runpod_text_oobabooga.py
+++ b/matrix_pygmalion_bot/bot/wrappers/runpod_text_oobabooga.py
@ -8,7 +8,7 @@ logger = logging.getLogger(__name__)

 class RunpodTextOobaboogaWrapper(RunpodWrapper):

-    def generate(self, prompt, endpoint_name, api_key, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
+    def generate(self, prompt, api_key, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):

        # Define your inputs
        input_data = {
@ -40,7 +40,7 @@ class RunpodTextOobaboogaWrapper(RunpodWrapper):
                ])]
            }
        }
-        output = await super().generate(input_data, endpoint_name, api_key, typing_fn, timeout)
+        output = await super().generate(input_data, api_key, typing_fn, timeout)
        if isinstance(output, list):
            output.sort(key=len, reverse=True)
            output = output[0]
--- a/matrix_pygmalion_bot/bot/wrappers/stablehorde.py
+++ b/matrix_pygmalion_bot/bot/wrappers/stablehorde.py
@ -0,0 +1,148 @@
+import asyncio
+import requests
+import json
+import os, tempfile
+import logging
+
+logger = logging.getLogger(__name__)
+
+class StableHordeWrapper(object):
+    """Base Class for stablehorde"""
+
+    def __init__(self, api_key: str, endpoint_name: str, model_name: str):
+        self.api_key = api_key
+        self.endpoint_name = endpoint_name
+        self.model_name = model_name
+
+    async def generate(self, input_data: str, typing_fn, timeout=180):
+        # Set the API endpoint URL
+        endpoint = "https://stablehorde.net/api/v2/generate/async"
+        #endpoint = "https://koboldai.net/api/v2/generate/async"
+
+        # Set the headers for the request
+        headers = {
+            "Content-Type": "application/json",
+            "accept": "application/json",
+            "apikey": f"{self.api_key}"
+        }
+
+        logger.info(f"sending request to stablehorde.net. endpoint=\"{self.endpoint_name}\"")
+
+        # Make the request
+        try:
+            r = requests.post(endpoint, json=input_data, headers=headers, timeout=timeout)
+        except requests.exceptions.RequestException as e:
+            raise ValueError(f"<HTTP ERROR>")
+        r_json = r.json()
+        logger.debug(r_json)
+
+        if r.status_code == 202:
+            #status = r_json["message"]
+            job_id = r_json["id"]
+            TIMEOUT = 360
+            DELAY = 11
+            output = None
+            for i in range(TIMEOUT//DELAY):
+                endpoint = f"https://stablehorde.net/api/v2/generate/status/{job_id}"
+                #endpoint = f"https://koboldai.net/api/v2/generate/text/status/{job_id}"
+                r = requests.get(endpoint, headers=headers)
+                r_json = r.json()
+                logger.info(r_json)
+                #status = r_json["message"]
+                if "done" not in r_json:
+                    raise ValueError("<ERROR>")
+                if "faulted" in r_json and r_json["faulted"] == True:
+                    raise ValueError("<ERROR> Faulted")
+                if r_json["done"] == True:
+                    output = r_json["generations"]
+                    break
+                else:
+                    if "processing" in r_json and r_json["processing"] == 1:
+                        await typing_fn()
+                    elif "wait_time" in r_json and r_json["wait_time"] < 20 and r_json["wait_time"] != 0 and r_json["queue_position"] < 100:
+                        await typing_fn()
+                    await asyncio.sleep(DELAY)
+        else:
+            raise ValueError(f"<ERROR> HTTP code {r.status_code}")
+
+        if not output:
+            raise ValueError(f"<ERROR> TIMEOUT / NO OUTOUT")
+        return output
+
+
+class StableHordeTextWrapper(StableHordeWrapper):
+
+    async def generate(self, prompt, typing_fn, temperature=0.72, max_new_tokens=200, timeout=180):
+
+        # Define your inputs
+        input_data = {
+            "prompt": prompt,
+            "params": {
+                "n": 1,
+    #            "frmtadsnsp": False,
+    #            "frmtrmblln": False,
+    #            "frmtrmspch": False,
+    #            "frmttriminc": False,
+                "max_context_length": 1024,
+                "max_length": 512,
+                "rep_pen": 1.1,
+                "rep_pen_range": 1024,
+                "rep_pen_slope": 0.7,
+    #            "singleline": False,
+    #            "soft_prompt": "",
+                "temperature": 0.75,
+                "tfs": 1.0,
+                "top_a": 0.0,
+                "top_k": 0,
+                "top_p": 0.9,
+                "typical": 1.0,
+    #            "sampler_order": [0],
+            },
+            "softprompts": [],
+            "trusted_workers": False,
+            "nsfw": True,
+    #        "workers": [],
+            "models": [f"{self.endpoint_name}"]
+        }
+
+        output = await super().generate(input_data, api_key, typing_fn, timeout)
+        output = output[0]["text"].removeprefix(prompt)
+        return(output)
+
+
+class StableHordeImageWrapper(StableHordeWrapper):
+
+    async def download_image(self, url, path):
+        r = requests.get(url, stream=True)
+        if r.status_code == 200:
+            with open(path, 'wb') as f:
+                for chunk in r:
+                    f.write(chunk)
+
+    async def generate(self, input_prompt: str, negative_prompt: str, typing_fn, timeout=180):
+
+        # Define your inputs
+        input_data = {
+            "prompt": input_prompt,
+            "params": {
+    #            "negative_prompt": negative_prompt,
+                "width": 512,
+                "height": 512,
+            },
+            "nsfw": True,
+            "trusted_workers": False,
+    #        "workers": [],
+            "models": [f"{self.endpoint_name}"]
+        }
+
+        output = await super().generate(input_data, typing_fn, timeout)
+
+        os.makedirs("./.data/images", exist_ok=True)
+        files = []
+        for image in output:
+            temp_name = next(tempfile._get_candidate_names())
+            filename = "./.data/images/" + temp_name + ".jpg"
+            await self.download_image(image["img"], filename)
+            files.append(filename)
+
+        return files
--- a/matrix_pygmalion_bot/bot/wrappers/stablehorde_image.py
+++ b/matrix_pygmalion_bot/bot/wrappers/stablehorde_image.py
--- a/matrix_pygmalion_bot/bot/wrappers/stablehorde_text.py
+++ b/matrix_pygmalion_bot/bot/wrappers/stablehorde_text.py
--- a/requirements.txt
+++ b/requirements.txt
@ -7,3 +7,5 @@ pillow
 argostranslate
 webuiapi
 langchain
+chromadb
+sentence-transformers