diff --git a/matrix_pygmalion_bot/ai/koboldcpp.py b/matrix_pygmalion_bot/ai/koboldcpp.py index f92afbf..eebb385 100644 --- a/matrix_pygmalion_bot/ai/koboldcpp.py +++ b/matrix_pygmalion_bot/ai/koboldcpp.py @@ -28,8 +28,7 @@ async def generate_sync( prompt: str, api_key: str, bot, - typing_fn, - api_endpoint = "pygmalion-6b" + typing_fn ): # Set the API endpoint URL endpoint = f"http://172.16.85.10:5001/api/latest/generate" diff --git a/matrix_pygmalion_bot/ai/runpod.py b/matrix_pygmalion_bot/ai/runpod.py index 045746f..b906bdd 100644 --- a/matrix_pygmalion_bot/ai/runpod.py +++ b/matrix_pygmalion_bot/ai/runpod.py @@ -21,11 +21,10 @@ async def generate_sync( prompt: str, api_key: str, bot, - typing_fn, - api_endpoint = "pygmalion-6b" + typing_fn ): # Set the API endpoint URL - endpoint = f"https://api.runpod.ai/v2/{api_endpoint}/run" + endpoint = f"https://api.runpod.ai/v2/{bot.runpod_text_endpoint}/run" # Set the headers for the request headers = { @@ -46,11 +45,49 @@ async def generate_sync( } } + input_data_oobabooga = { + "input": { + "data": [json.dumps([ + prompt, + { + 'max_new_tokens': min(max_new_tokens, 2048), + 'do_sample': True, + 'temperature': bot.temperature, + 'top_p': 0.73, + 'typical_p': 1, + 'repetition_penalty': 1.1, + 'encoder_repetition_penalty': 1.0, + 'top_k': 0, + 'min_length': 0, + 'no_repeat_ngram_size': 0, + 'num_beams': 1, + 'penalty_alpha': 0, + 'length_penalty': 1, + 'early_stopping': False, + 'seed': -1, + 'add_bos_token': True, + 'custom_stopping_strings': [], + 'truncation_length': 2048, + 'ban_eos_token': False, + 'skip_special_tokens': True, + } + ])] + } + } + + if bot.runpod_text_endpoint in ['pygmalion-6b', 'gpt-neo-2_7b', 'gpt-neo-1_3b']: + api_mode = "runpod" + else: + api_mode = "oobabooga" + logger.info(f"sending request to runpod.io") # Make the request try: - r = requests.post(endpoint, json=input_data, headers=headers, timeout=180) + if api_mode == "runpod": + r = requests.post(endpoint, json=input_data, headers=headers, timeout=180) + else: + r = requests.post(endpoint, json=input_data_oobabooga, headers=headers, timeout=180) except requests.exceptions.RequestException as e: raise ValueError(f"") r_json = r.json() @@ -62,7 +99,7 @@ async def generate_sync( TIMEOUT = 360 DELAY = 5 for i in range(TIMEOUT//DELAY): - endpoint = f"https://api.runpod.ai/v2/{api_endpoint}/status/{job_id}" + endpoint = f"https://api.runpod.ai/v2/{bot.runpod_text_endpoint}/status/{job_id}" r = requests.get(endpoint, headers=headers) r_json = r.json() logger.info(r_json) @@ -79,7 +116,10 @@ async def generate_sync( text = output[0] else: text = r_json["output"] - answer = text.removeprefix(prompt) + if api_mode == "runpod": + answer = text.removeprefix(prompt) + else: + answer = text["data"][0].removeprefix(prompt) # lines = reply.split('\n') # reply = lines[0].strip() idx = answer.find(f"\nYou:") diff --git a/matrix_pygmalion_bot/core.py b/matrix_pygmalion_bot/core.py index 08edb6d..d8a5604 100644 --- a/matrix_pygmalion_bot/core.py +++ b/matrix_pygmalion_bot/core.py @@ -86,7 +86,6 @@ class Callbacks(object): ) ) - api_endpoint = "pygmalion-6b" await self.client.room_read_markers(room.room_id, event.event_id, event.event_id) # Ignore messages when disabled if "disabled" in self.bot.room_config[room.room_id] and self.bot.room_config[room.room_id]["disabled"] == True and not event.body.startswith('!start'): @@ -225,9 +224,6 @@ class Callbacks(object): new_answer = event.body.removeprefix('!replace').strip() await self.bot.send_message(self.client, room.room_id, new_answer, reply_to=chat_history_item.relates_to_event) return - elif event.body.startswith('!2'): - chat_message.updateText( event.body.removeprefix('!2').strip() ) - api_endpoint = "ynznznpn6qz6yh" elif event.body.startswith('!'): await self.bot.send_message(self.client, room.room_id, " UNKNOWN COMMAND") return @@ -258,7 +254,7 @@ class Callbacks(object): # print("") try: typing = lambda : self.client.room_typing(room.room_id, True, 15000) - answer = await self.bot.text_ai.generate_sync(full_prompt, self.bot.runpod_api_key, self.bot, typing, api_endpoint) + answer = await self.bot.text_ai.generate_sync(full_prompt, self.bot.runpod_api_key, self.bot, typing) answer = answer.strip() await self.client.room_typing(room.room_id, False) if not (self.bot.translate is None): @@ -308,6 +304,7 @@ class ChatBot(object): self.service_image = "other" self.model = "other" self.runpod_api_key = None + self.runpod_text_endpoint = "pygmalion-6b" self.text_ai = None self.image_ai = None @@ -552,6 +549,8 @@ async def main() -> None: bot.model = config[section]['model'] if config.has_option('DEFAULT', 'runpod_api_key'): bot.runpod_api_key = config['DEFAULT']['runpod_api_key'] + if config.has_option('DEFAULT', 'runpod_text_endpoint'): + bot.runpod_text_endpoint = config['DEFAULT']['runpod_text_endpoint'] if config.has_option('DEFAULT', 'stablehorde_api_key'): bot.stablehorde_api_key = config['DEFAULT']['stablehorde_api_key'] await bot.read_conf2(section) diff --git a/runpod/runpod-worker-oobabooga-api/Dockerfile b/runpod/runpod-worker-oobabooga-api/Dockerfile index 552da5d..a024019 100644 --- a/runpod/runpod-worker-oobabooga-api/Dockerfile +++ b/runpod/runpod-worker-oobabooga-api/Dockerfile @@ -7,8 +7,8 @@ FROM ${DEV_IMAGE} as builder # https://github.com/runpod/containers/blob/main/oobabooga/Dockerfile -# run; DOCKER_BUILDKIT=1 docker build --build-arg MODEL_NAME="PygmalionAI/pygmalion-350m" -t magn418/runpod-oobabooga-pygmalion:test . - +# DOCKER_BUILDKIT=1 docker build --build-arg MODEL_NAME="PygmalionAI/pygmalion-350m" -t magn418/runpod-oobabooga-pygmalion:test . +# docker builder prune ARG MODEL_NAME="PygmalionAI/pygmalion-350m" ENV MODEL_NAME=${MODEL_NAME} @@ -83,10 +83,14 @@ RUN cd /workspace/text-generation-webui/ && mkdir repositories && cd repositorie git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \ (cd GPTQ-for-LLaMa && python3 setup_cuda.py bdist_wheel -d .) # && python3 setup_cuda.py install + + +FROM builder AS modeldownloader RUN cd /workspace/text-generation-webui && python3 download-model.py ${MODEL_NAME} #RUN git lfs install && \ # git clone --depth 1 https://huggingface.co/${MODEL_NAME} + FROM ${BASE_IMAGE} #ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN mkdir -p /workspace @@ -116,6 +120,7 @@ RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade #RUN mkdir -p /workspace/text-generation-webui/repositories COPY --from=builder /workspace/text-generation-webui /workspace/text-generation-webui +COPY --from=modeldownloader /workspace/text-generation-webui/models /workspace/text-generation-webui/models RUN --mount=type=cache,target=/root/.cache,sharing=locked \ cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \ diff --git a/runpod/runpod-worker-sd/Dockerfile b/runpod/test/runpod-worker-sd/Dockerfile similarity index 100% rename from runpod/runpod-worker-sd/Dockerfile rename to runpod/test/runpod-worker-sd/Dockerfile diff --git a/runpod/runpod-worker-sd/README.md b/runpod/test/runpod-worker-sd/README.md similarity index 100% rename from runpod/runpod-worker-sd/README.md rename to runpod/test/runpod-worker-sd/README.md diff --git a/runpod/runpod-worker-sd/model_fetcher.py b/runpod/test/runpod-worker-sd/model_fetcher.py similarity index 100% rename from runpod/runpod-worker-sd/model_fetcher.py rename to runpod/test/runpod-worker-sd/model_fetcher.py diff --git a/runpod/runpod-worker-sd/predict.py b/runpod/test/runpod-worker-sd/predict.py similarity index 100% rename from runpod/runpod-worker-sd/predict.py rename to runpod/test/runpod-worker-sd/predict.py diff --git a/runpod/runpod-worker-sd/runpod_infer.py b/runpod/test/runpod-worker-sd/runpod_infer.py similarity index 100% rename from runpod/runpod-worker-sd/runpod_infer.py rename to runpod/test/runpod-worker-sd/runpod_infer.py diff --git a/runpod/runpod-worker-transformers/Dockerfile b/runpod/test/runpod-worker-transformers/Dockerfile similarity index 100% rename from runpod/runpod-worker-transformers/Dockerfile rename to runpod/test/runpod-worker-transformers/Dockerfile diff --git a/runpod/runpod-worker-transformers/README.md b/runpod/test/runpod-worker-transformers/README.md similarity index 100% rename from runpod/runpod-worker-transformers/README.md rename to runpod/test/runpod-worker-transformers/README.md diff --git a/runpod/runpod-worker-transformers/RWKV.py b/runpod/test/runpod-worker-transformers/RWKV.py similarity index 100% rename from runpod/runpod-worker-transformers/RWKV.py rename to runpod/test/runpod-worker-transformers/RWKV.py diff --git a/runpod/runpod-worker-transformers/model_fetcher.py b/runpod/test/runpod-worker-transformers/model_fetcher.py similarity index 100% rename from runpod/runpod-worker-transformers/model_fetcher.py rename to runpod/test/runpod-worker-transformers/model_fetcher.py diff --git a/runpod/runpod-worker-transformers/runpod_infer.py b/runpod/test/runpod-worker-transformers/runpod_infer.py similarity index 100% rename from runpod/runpod-worker-transformers/runpod_infer.py rename to runpod/test/runpod-worker-transformers/runpod_infer.py diff --git a/runpod/runpod-worker-transformers/test_input.json b/runpod/test/runpod-worker-transformers/test_input.json similarity index 100% rename from runpod/runpod-worker-transformers/test_input.json rename to runpod/test/runpod-worker-transformers/test_input.json