@ -60,7 +60,7 @@ class KoboldCpp(LLM):
" top_k " : self . top_k ,
" top_p " : self . top_p ,
" rep_pen " : self . repeat_penalty ,
" rep_pen_range " : 256 ,
" rep_pen_range " : 1024 ,
" stop_sequence " : self . stop ,
}
@ -110,7 +110,7 @@ class KoboldCpp(LLM):
" top_k " : self . top_k ,
" top_p " : self . top_p ,
" rep_pen " : self . repeat_penalty ,
" rep_pen_range " : 256 ,
" rep_pen_range " : 1024 ,
" stop_sequence " : self . stop ,
}
@ -123,13 +123,28 @@ class KoboldCpp(LLM):
logger . info ( f " sending request to koboldcpp. " )
TRIES = 30
TRIES = 60
request_timeout = 20 * 60
for i in range ( TRIES ) :
try :
loop = asyncio . get_running_loop ( )
#r = requests.post(self.endpoint_url, json=input_data, headers=headers, timeout=600)
r = await loop . run_in_executor ( None , functools . partial ( requests . post , self . endpoint_url , json = input_data , headers = headers , timeout = 600 ) )
r = await loop . run_in_executor ( None , functools . partial ( requests . post , self . endpoint_url , json = input_data , headers = headers , timeout = request_timeout ) )
#r.raise_for_status()
r_json = r . json ( )
except requests . exceptions . HTTPError as errh :
print ( " Http Error: " , errh )
await asyncio . sleep ( 5 )
continue
except requests . exceptions . ConnectionError as errc :
print ( " Error Connecting: " , errc )
await asyncio . sleep ( 5 )
continue
except requests . exceptions . Timeout as errt :
raise ValueError ( f " http timeout error. " )
#print ("Timeout Error:",errt)
#await asyncio.sleep(5)
#continue
except requests . exceptions . RequestException as e :
raise ValueError ( f " http connection error. " )
logger . info ( r_json )
@ -142,6 +157,7 @@ class KoboldCpp(LLM):
elif r . status_code == 503 :
logger . info ( f " api is busy. waiting... " )
await asyncio . sleep ( 5 )
continue
else :
raise ValueError ( f " http error. unknown response code " )
for s in input_data [ " stop_sequence " ] :