diff --git a/matrix_pygmalion_bot/bot/wrappers/langchain_koboldcpp.py b/matrix_pygmalion_bot/bot/wrappers/langchain_koboldcpp.py index 2b79731..609ce1f 100644 --- a/matrix_pygmalion_bot/bot/wrappers/langchain_koboldcpp.py +++ b/matrix_pygmalion_bot/bot/wrappers/langchain_koboldcpp.py @@ -60,7 +60,7 @@ class KoboldCpp(LLM): "top_k": self.top_k, "top_p": self.top_p, "rep_pen": self.repeat_penalty, - "rep_pen_range": 256, + "rep_pen_range": 1024, "stop_sequence": self.stop, } @@ -110,7 +110,7 @@ class KoboldCpp(LLM): "top_k": self.top_k, "top_p": self.top_p, "rep_pen": self.repeat_penalty, - "rep_pen_range": 256, + "rep_pen_range": 1024, "stop_sequence": self.stop, } @@ -123,13 +123,28 @@ class KoboldCpp(LLM): logger.info(f"sending request to koboldcpp.") - TRIES = 30 + TRIES = 60 + request_timeout=20*60 for i in range(TRIES): try: loop = asyncio.get_running_loop() #r = requests.post(self.endpoint_url, json=input_data, headers=headers, timeout=600) - r = await loop.run_in_executor(None, functools.partial(requests.post, self.endpoint_url, json=input_data, headers=headers, timeout=600)) + r = await loop.run_in_executor(None, functools.partial(requests.post, self.endpoint_url, json=input_data, headers=headers, timeout=request_timeout)) + #r.raise_for_status() r_json = r.json() + except requests.exceptions.HTTPError as errh: + print ("Http Error:",errh) + await asyncio.sleep(5) + continue + except requests.exceptions.ConnectionError as errc: + print ("Error Connecting:",errc) + await asyncio.sleep(5) + continue + except requests.exceptions.Timeout as errt: + raise ValueError(f"http timeout error.") + #print ("Timeout Error:",errt) + #await asyncio.sleep(5) + #continue except requests.exceptions.RequestException as e: raise ValueError(f"http connection error.") logger.info(r_json) @@ -142,6 +157,7 @@ class KoboldCpp(LLM): elif r.status_code == 503: logger.info(f"api is busy. waiting...") await asyncio.sleep(5) + continue else: raise ValueError(f"http error. unknown response code") for s in input_data["stop_sequence"]: