|
@ -60,7 +60,7 @@ class KoboldCpp(LLM): |
|
|
"top_k": self.top_k, |
|
|
"top_k": self.top_k, |
|
|
"top_p": self.top_p, |
|
|
"top_p": self.top_p, |
|
|
"rep_pen": self.repeat_penalty, |
|
|
"rep_pen": self.repeat_penalty, |
|
|
"rep_pen_range": 256, |
|
|
"rep_pen_range": 1024, |
|
|
"stop_sequence": self.stop, |
|
|
"stop_sequence": self.stop, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@ -110,7 +110,7 @@ class KoboldCpp(LLM): |
|
|
"top_k": self.top_k, |
|
|
"top_k": self.top_k, |
|
|
"top_p": self.top_p, |
|
|
"top_p": self.top_p, |
|
|
"rep_pen": self.repeat_penalty, |
|
|
"rep_pen": self.repeat_penalty, |
|
|
"rep_pen_range": 256, |
|
|
"rep_pen_range": 1024, |
|
|
"stop_sequence": self.stop, |
|
|
"stop_sequence": self.stop, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@ -123,13 +123,28 @@ class KoboldCpp(LLM): |
|
|
|
|
|
|
|
|
logger.info(f"sending request to koboldcpp.") |
|
|
logger.info(f"sending request to koboldcpp.") |
|
|
|
|
|
|
|
|
TRIES = 30 |
|
|
TRIES = 60 |
|
|
|
|
|
request_timeout=20*60 |
|
|
for i in range(TRIES): |
|
|
for i in range(TRIES): |
|
|
try: |
|
|
try: |
|
|
loop = asyncio.get_running_loop() |
|
|
loop = asyncio.get_running_loop() |
|
|
#r = requests.post(self.endpoint_url, json=input_data, headers=headers, timeout=600) |
|
|
#r = requests.post(self.endpoint_url, json=input_data, headers=headers, timeout=600) |
|
|
r = await loop.run_in_executor(None, functools.partial(requests.post, self.endpoint_url, json=input_data, headers=headers, timeout=600)) |
|
|
r = await loop.run_in_executor(None, functools.partial(requests.post, self.endpoint_url, json=input_data, headers=headers, timeout=request_timeout)) |
|
|
|
|
|
#r.raise_for_status() |
|
|
r_json = r.json() |
|
|
r_json = r.json() |
|
|
|
|
|
except requests.exceptions.HTTPError as errh: |
|
|
|
|
|
print ("Http Error:",errh) |
|
|
|
|
|
await asyncio.sleep(5) |
|
|
|
|
|
continue |
|
|
|
|
|
except requests.exceptions.ConnectionError as errc: |
|
|
|
|
|
print ("Error Connecting:",errc) |
|
|
|
|
|
await asyncio.sleep(5) |
|
|
|
|
|
continue |
|
|
|
|
|
except requests.exceptions.Timeout as errt: |
|
|
|
|
|
raise ValueError(f"http timeout error.") |
|
|
|
|
|
#print ("Timeout Error:",errt) |
|
|
|
|
|
#await asyncio.sleep(5) |
|
|
|
|
|
#continue |
|
|
except requests.exceptions.RequestException as e: |
|
|
except requests.exceptions.RequestException as e: |
|
|
raise ValueError(f"http connection error.") |
|
|
raise ValueError(f"http connection error.") |
|
|
logger.info(r_json) |
|
|
logger.info(r_json) |
|
@ -142,6 +157,7 @@ class KoboldCpp(LLM): |
|
|
elif r.status_code == 503: |
|
|
elif r.status_code == 503: |
|
|
logger.info(f"api is busy. waiting...") |
|
|
logger.info(f"api is busy. waiting...") |
|
|
await asyncio.sleep(5) |
|
|
await asyncio.sleep(5) |
|
|
|
|
|
continue |
|
|
else: |
|
|
else: |
|
|
raise ValueError(f"http error. unknown response code") |
|
|
raise ValueError(f"http error. unknown response code") |
|
|
for s in input_data["stop_sequence"]: |
|
|
for s in input_data["stop_sequence"]: |
|
|