"""KoboldCpp LLM wrapper for testing purposes.""" import logging import time from typing import Any, List, Mapping, Optional import json import requests from langchain.llms.base import LLM logger = logging.getLogger(__name__) class KoboldCpp(LLM): """KoboldCpp LLM wrapper for testing purposes.""" endpoint_url: str = "http://172.16.85.10:5001/api/latest/generate" temperature: Optional[float] = 0.8 """The temperature to use for sampling.""" max_tokens: Optional[int] = 256 """The maximum number of tokens to generate.""" top_p: Optional[float] = 0.90 """The top-p value to use for sampling.""" repeat_penalty: Optional[float] = 1.1 """The penalty to apply to repeated tokens.""" top_k: Optional[int] = 40 """The top-k value to use for sampling.""" stop: Optional[List[str]] = [] """A list of strings to stop generation when encountered.""" # model_kwargs: Dict[str, Any] = Field(default_factory=dict) @property def _llm_type(self) -> str: """Return type of llm.""" return "KoboldCpp" def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: """First try to lookup in queries, else return 'foo' or 'bar'.""" #params = self.model_kwargs or {} input_data = { "prompt": prompt, "max_context_length": 2048, "max_length": self.max_tokens, "temperature": self.temperature, "top_k": self.top_k, "top_p": self.top_p, "rep_pen": self.repeat_penalty, "rep_pen_range": 256, "stop_sequence": self.stop, } if stop: input_data["stop_sequence"] = stop headers = { "Content-Type": "application/json", } logger.info(f"sending request to koboldcpp.") TRIES = 30 for i in range(TRIES): try: r = requests.post(self.endpoint_url, json=input_data, headers=headers, timeout=600) r_json = r.json() except requests.exceptions.RequestException as e: raise ValueError(f"http connection error.") logger.info(r_json) if r.status_code == 200: try: response = r_json["results"][0]["text"] except KeyError: raise ValueError(f"LangChain requires 'results' key in response.") break elif r.status_code == 503: logger.info(f"api is busy. waiting...") time.sleep(5) else: raise ValueError(f"http error. unknown response code") for s in input_data["stop_sequence"]: response = response.removesuffix(s).rstrip() return response.lstrip() @property def _identifying_params(self) -> Mapping[str, Any]: return {}