From 97eb29190e063897196dd8793724ff190d2632e9 Mon Sep 17 00:00:00 2001 From: Hendrik Langer Date: Sun, 16 Apr 2023 14:08:06 +0200 Subject: [PATCH] double the pseudo-streaming chunk size. nearly every reply takes more than 16 tokens. --- matrix_pygmalion_bot/ai/koboldcpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matrix_pygmalion_bot/ai/koboldcpp.py b/matrix_pygmalion_bot/ai/koboldcpp.py index 986667e..61e0665 100644 --- a/matrix_pygmalion_bot/ai/koboldcpp.py +++ b/matrix_pygmalion_bot/ai/koboldcpp.py @@ -63,7 +63,7 @@ async def generate_sync( complete = False complete_reply = "" for i in range(TIMEOUT//DELAY): - input_data["max_length"] = 16 # pseudo streaming + input_data["max_length"] = 32 # pseudo streaming # Make the request try: r = requests.post(endpoint, json=input_data, headers=headers, timeout=600)