Browse Source

implement pseudo-streaming

master
Hendrik Langer 2 years ago
parent
commit
9d5f2de7a5
  1. 61
      matrix_pygmalion_bot/ai/koboldcpp.py
  2. 32
      matrix_pygmalion_bot/core.py

61
matrix_pygmalion_bot/ai/koboldcpp.py

@ -37,7 +37,7 @@ async def generate_sync(
"Content-Type": "application/json", "Content-Type": "application/json",
} }
max_new_tokens = 120 max_new_tokens = 200
prompt_num_tokens = await num_tokens(prompt) prompt_num_tokens = await num_tokens(prompt)
# Define your inputs # Define your inputs
@ -55,28 +55,45 @@ async def generate_sync(
logger.info(f"sending request to koboldcpp") logger.info(f"sending request to koboldcpp")
# Make the request
try:
r = requests.post(endpoint, json=input_data, headers=headers, timeout=360) TIMEOUT = 360
except requests.exceptions.RequestException as e: DELAY = 5
raise ValueError(f"<HTTP ERROR> {e}") tokens = 0
r_json = r.json() complete_reply = ""
logger.info(r_json) for i in range(TIMEOUT//DELAY):
input_data["max_length"] = 16 # pseudo streaming
if r.status_code == 200: # Make the request
reply = r_json["results"][0]["text"] try:
idx = reply.find(f"\nYou:") r = requests.post(endpoint, json=input_data, headers=headers, timeout=360)
if idx != -1: except requests.exceptions.RequestException as e:
reply = reply[:idx].strip() raise ValueError(f"<HTTP ERROR> {e}")
r_json = r.json()
logger.info(r_json)
if r.status_code == 200:
partial_reply = r_json["results"][0]["text"]
input_data["prompt"] += partial_reply
complete_reply += partial_reply
tokens += input_data["max_length"]
await typing_fn()
if not partial_reply or partial_reply.find('<|endoftext|>') != -1 or partial_reply.find("\nYou:") != -1 or tokens >= max_new_tokens:
idx = complete_reply.find(f"\nYou:")
if idx != -1:
complete_reply = complete_reply[:idx].strip()
else:
complete_reply = complete_reply.removesuffix('<|endoftext|>').strip()
complete_reply = complete_reply.replace(f"\n{bot.name}: ", " ")
complete_reply = complete_reply.replace(f"\n<BOT>: ", " ")
complete_reply = complete_reply.replace(f"<BOT>", "{bot.name}")
complete_reply = complete_reply.replace(f"<USER>", "You")
return complete_reply.strip()
else:
continue
elif r.status_code == 503:
#model busy
await asyncio.sleep(DELAY)
else: else:
reply = reply.removesuffix('<|endoftext|>').strip() raise ValueError(f"<ERROR>")
reply = reply.replace(f"\n{bot.name}: ", " ")
reply = reply.replace(f"\n<BOT>: ", " ")
reply = reply.replace(f"<BOT>", "{bot.name}")
reply = reply.replace(f"<USER>", "You")
return reply.strip()
else:
raise ValueError(f"<ERROR>")
async def generate_image(input_prompt: str, negative_prompt: str, api_url: str, api_key: str, typing_fn): async def generate_image(input_prompt: str, negative_prompt: str, api_url: str, api_key: str, typing_fn):

32
matrix_pygmalion_bot/core.py

@ -16,8 +16,8 @@ import json
from .helpers import Event from .helpers import Event
from .chatlog import BotChatHistory from .chatlog import BotChatHistory
ai = importlib.import_module("matrix_pygmalion_bot.ai.runpod_pygmalion") image_ai = importlib.import_module("matrix_pygmalion_bot.ai.runpod_pygmalion")
ai = importlib.import_module("matrix_pygmalion_bot.ai.koboldcpp") text_ai = importlib.import_module("matrix_pygmalion_bot.ai.koboldcpp")
#ai = importlib.import_module("matrix_pygmalion_bot.ai.stablehorde") #ai = importlib.import_module("matrix_pygmalion_bot.ai.stablehorde")
#from .llama_cpp import generate, get_full_prompt, get_full_prompt_chat_style #from .llama_cpp import generate, get_full_prompt, get_full_prompt_chat_style
#from .runpod_pygmalion import generate_sync, get_full_prompt #from .runpod_pygmalion import generate_sync, get_full_prompt
@ -114,30 +114,30 @@ class Callbacks(object):
typing = lambda : self.client.room_typing(room.room_id, True, 15000) typing = lambda : self.client.room_typing(room.room_id, True, 15000)
if self.bot.service == "runpod": if self.bot.service == "runpod":
if num == 1: if num == 1:
output = await ai.generate_image1(prompt, negative_prompt, self.bot.runpod_api_key, typing) output = await image_ai.generate_image1(prompt, negative_prompt, self.bot.runpod_api_key, typing)
elif num == 2: elif num == 2:
output = await ai.generate_image2(prompt, negative_prompt, self.bot.runpod_api_key, typing) output = await image_ai.generate_image2(prompt, negative_prompt, self.bot.runpod_api_key, typing)
elif num == 3: elif num == 3:
output = await ai.generate_image3(prompt, negative_prompt, self.bot.runpod_api_key, typing) output = await image_ai.generate_image3(prompt, negative_prompt, self.bot.runpod_api_key, typing)
elif num == 4: elif num == 4:
output = await ai.generate_image4(prompt, negative_prompt, self.bot.runpod_api_key, typing) output = await image_ai.generate_image4(prompt, negative_prompt, self.bot.runpod_api_key, typing)
elif num == 5: elif num == 5:
output = await ai.generate_image5(prompt, negative_prompt, self.bot.runpod_api_key, typing) output = await image_ai.generate_image5(prompt, negative_prompt, self.bot.runpod_api_key, typing)
elif num == 6: elif num == 6:
output = await ai.generate_image6(prompt, negative_prompt, self.bot.runpod_api_key, typing) output = await image_ai.generate_image6(prompt, negative_prompt, self.bot.runpod_api_key, typing)
elif num == 7: elif num == 7:
output = await ai.generate_image7(prompt, negative_prompt, self.bot.runpod_api_key, typing) output = await image_ai.generate_image7(prompt, negative_prompt, self.bot.runpod_api_key, typing)
elif num == 8: elif num == 8:
output = await ai.generate_image8(prompt, negative_prompt, self.bot.runpod_api_key, typing) output = await image_ai.generate_image8(prompt, negative_prompt, self.bot.runpod_api_key, typing)
else: else:
raise ValueError('no image generator with that number') raise ValueError('no image generator with that number')
elif self.bot.service == "stablehorde": elif self.bot.service == "stablehorde":
if num == 1: if num == 1:
output = await ai.generate_image1(prompt, negative_prompt, self.bot.stablehorde_api_key, typing) output = await image_ai.generate_image1(prompt, negative_prompt, self.bot.stablehorde_api_key, typing)
elif num == 2: elif num == 2:
output = await ai.generate_image2(prompt, negative_prompt, self.bot.stablehorde_api_key, typing) output = await image_ai.generate_image2(prompt, negative_prompt, self.bot.stablehorde_api_key, typing)
elif num == 3: elif num == 3:
output = await ai.generate_image3(prompt, negative_prompt, self.bot.stablehorde_api_key, typing) output = await image_ai.generate_image3(prompt, negative_prompt, self.bot.stablehorde_api_key, typing)
else: else:
raise ValueError('no image generator with that number') raise ValueError('no image generator with that number')
else: else:
@ -216,8 +216,8 @@ class Callbacks(object):
# send, mail, drop, snap picture, photo, image, portrait # send, mail, drop, snap picture, photo, image, portrait
pass pass
full_prompt = await ai.get_full_prompt(chat_message.getTranslation("en"), self.bot, self.bot.chat_history.room(room.display_name)) full_prompt = await text_ai.get_full_prompt(chat_message.getTranslation("en"), self.bot, self.bot.chat_history.room(room.display_name))
num_tokens = await ai.num_tokens(full_prompt) num_tokens = await text_ai.num_tokens(full_prompt)
logger.debug(full_prompt) logger.debug(full_prompt)
logger.debug(f"Prompt has " + str(num_tokens) + " tokens") logger.debug(f"Prompt has " + str(num_tokens) + " tokens")
# answer = "" # answer = ""
@ -237,7 +237,7 @@ class Callbacks(object):
# print("") # print("")
try: try:
typing = lambda : self.client.room_typing(room.room_id, True, 15000) typing = lambda : self.client.room_typing(room.room_id, True, 15000)
answer = await ai.generate_sync(full_prompt, self.bot.runpod_api_key, self.bot, typing, api_endpoint) answer = await text_ai.generate_sync(full_prompt, self.bot.runpod_api_key, self.bot, typing, api_endpoint)
answer = answer.strip() answer = answer.strip()
await self.client.room_typing(room.room_id, False) await self.client.room_typing(room.room_id, False)
if not (self.bot.translate is None): if not (self.bot.translate is None):

Loading…
Cancel
Save