diff --git a/runpod/runpod-worker-sd/Dockerfile b/runpod/runpod-worker-sd/Dockerfile index 0c16637..ecec185 100644 --- a/runpod/runpod-worker-sd/Dockerfile +++ b/runpod/runpod-worker-sd/Dockerfile @@ -31,11 +31,12 @@ RUN add-apt-repository ppa:deadsnakes/ppa && \ apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \ apt-get clean && rm -rf /var/lib/apt/lists/* -RUN pip install --upgrade pip +RUN pip install --upgrade pip && \ pip install huggingface-hub && \ pip install diffusers && \ + pip install safetensors && \ pip install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu116 && \ - pip install bitsandbytes + pip install bitsandbytes && \ pip install transformers accelerate xformers triton && \ pip install runpod diff --git a/runpod/runpod-worker-sd/README.md b/runpod/runpod-worker-sd/README.md index dd19aee..df84981 100644 --- a/runpod/runpod-worker-sd/README.md +++ b/runpod/runpod-worker-sd/README.md @@ -1,3 +1,5 @@ +# NOT (YET) WORKING + git clone https://github.com/runpod/serverless-ckpt-template.git cd serverless-ckpt-template @@ -8,5 +10,5 @@ docker push magn418/runpod-hassan:1.5 Models: PFG https://civitai.com/models/1227/pfg hassanblend https://civitai.com/models/1173/hassanblend-15-and-previous-versions -Deliberate +Deliberate https://civitai.com/models/4823 Anything v3 ? diff --git a/runpod/runpod-worker-sd/model_fetcher.py b/runpod/runpod-worker-sd/model_fetcher.py index c0daf32..06adc03 100644 --- a/runpod/runpod-worker-sd/model_fetcher.py +++ b/runpod/runpod-worker-sd/model_fetcher.py @@ -37,10 +37,12 @@ def download_model(model_url: str): for chunk in downloaded_model.iter_content(chunk_size=1024): if chunk: f.write(chunk) - os.system("wget -q https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py") - os.system(f"python3 convert_original_stable_diffusion_to_diffusers.py --from_safetensors --checkpoint_path model.safetensors --dump_path {MODEL_CACHE_DIR}/pt") + os.system("wget -q https://raw.githubusercontent.com/huggingface/diffusers/v0.14.0/scripts/convert_original_stable_diffusion_to_diffusers.py") + os.system(f"pip install omegaconf") + os.system(f"python3 convert_original_stable_diffusion_to_diffusers.py --from_safetensors --checkpoint_path model.safetensors --dump_path {MODEL_CACHE_DIR}/model") + os.system(f"rm model.safetensors") #os.system(f"python3 convert_original_stable_diffusion_to_diffusers.py --checkpoint_path model.ckpt --dump_path pt") - model_id = "pt" + model_id = "./{MODEL_CACHE_DIR}/model" saftey_checker = StableDiffusionSafetyChecker.from_pretrained( SAFETY_MODEL_ID, diff --git a/runpod/runpod-worker-sd/runpod_infer.py b/runpod/runpod-worker-sd/runpod_infer.py index abef5d9..9d3dcb2 100644 --- a/runpod/runpod-worker-sd/runpod_infer.py +++ b/runpod/runpod-worker-sd/runpod_infer.py @@ -149,7 +149,7 @@ if __name__ == "__main__": url_parts = args.model_url.split("/") model_id = f"{url_parts[-2]}/{url_parts[-1]}" else: - model_id = f"model.safetensors" + model_id = f"./{MODEL_CACHE_DIR}/model" MODEL = predict.Predictor(model_id) MODEL.setup() diff --git a/runpod/runpod-worker-transformers/Dockerfile b/runpod/runpod-worker-transformers/Dockerfile new file mode 100644 index 0000000..fd903fc --- /dev/null +++ b/runpod/runpod-worker-transformers/Dockerfile @@ -0,0 +1,46 @@ +ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 +FROM ${BASE_IMAGE} as dev-base + +ARG MODEL_NAME +ENV MODEL_NAME=${MODEL_NAME} + +WORKDIR /src + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +ENV DEBIAN_FRONTEND noninteractive\ + SHELL=/bin/bash +RUN apt-key del 7fa2af80 +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub +RUN apt-get update --yes && \ + apt-get upgrade --yes && \ + apt install --yes --no-install-recommends\ + wget\ + bash\ + openssh-server &&\ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + echo "en_US.UTF-8 UTF-8" > /etc/locale.gen + +RUN apt-get update && apt-get install -y --no-install-recommends +RUN apt-get install software-properties-common -y +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get install python3.8 -y +RUN apt-get install python3-pip -y +RUN apt-get install python3.8-distutils -y + +RUN apt-get install python3.8-dev -y +RUN apt-get install python3.8-venv -y +RUN python3.8 -m venv /venv +ENV PATH=/venv/bin:$PATH + +RUN python3.8 -m pip install --upgrade pip==23.0.1 +RUN python3.8 -m pip install runpod==0.9.1 +RUN python3.8 -m pip install torch==2.0.0 +RUN python3.8 -m pip install transformers==4.27.2 + +COPY model_fetcher.py /src/model_fetcher.py +RUN python3.8 model_fetcher.py --model_name=${MODEL_NAME} + +COPY runpod_infer.py /src/runpod_infer.py +COPY test_input.json /src/test_input.json + +CMD python3.8 -u runpod_infer.py --model_name=${MODEL_NAME} diff --git a/runpod/runpod-worker-transformers/README.md b/runpod/runpod-worker-transformers/README.md new file mode 100644 index 0000000..181ac37 --- /dev/null +++ b/runpod/runpod-worker-transformers/README.md @@ -0,0 +1,13 @@ +## Building + +Select one of the following models to build: + +- `gpt-neo-1.3B` +- `gpt-neo-2.7B` +- `gpt-neox-20b` +- `pygmalion-6b` +- `gpt-j-6b` + +```BASH +docker build --build-arg MODEL_NAME={model name} -t repo/image_name:tag . +``` diff --git a/runpod/runpod-worker-transformers/model_fetcher.py b/runpod/runpod-worker-transformers/model_fetcher.py new file mode 100644 index 0000000..e5a7a7d --- /dev/null +++ b/runpod/runpod-worker-transformers/model_fetcher.py @@ -0,0 +1,61 @@ +''' +RunPod | Transformer | Model Fetcher +''' + +import argparse + +import torch +from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, + GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) + + +def download_model(model_name): + + # --------------------------------- Neo 1.3B --------------------------------- # + if model_name == 'gpt-neo-1.3B': + GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") + GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") + + # --------------------------------- Neo 2.7B --------------------------------- # + elif model_name == 'gpt-neo-2.7B': + GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16) + GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B") + + # ----------------------------------- NeoX ----------------------------------- # + elif model_name == 'gpt-neox-20b': + GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half() + GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b") + + # --------------------------------- Pygmalion -------------------------------- # + elif model_name == 'pygmalion-6b': + AutoModelForCausalLM.from_pretrained("PygmalionAI/pygmalion-6b") + AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b") + + # ----------------------------------- GPT-J ----------------------------------- # + elif model_name == 'gpt-j-6b': + GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", + torch_dtype=torch.float16) + AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") + + # ------------------------------ PPO Shygmalion 6B ----------------------------- # + elif model_name == 'shygmalion-6b': + AutoModelForCausalLM.from_pretrained("TehVenom/PPO_Shygmalion-6b") + AutoTokenizer.from_pretrained("TehVenom/PPO_Shygmalion-6b") + + # ------------------------------ Erebus 13B (NSFW) ----------------------------- # + elif model_name == 'erebus-13b': + AutoModelForCausalLM.from_pretrained("KoboldAI/OPT-13B-Erebus") + AutoTokenizer.from_pretrained("KoboldAI/OPT-13B-Erebus") + + +# ---------------------------------------------------------------------------- # +# Parse Arguments # +# ---------------------------------------------------------------------------- # +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument("--model_name", type=str, + default="gpt-neo-1.3B", help="URL of the model to download.") + + +if __name__ == "__main__": + args = parser.parse_args() + download_model(args.model_name) diff --git a/runpod/runpod-worker-transformers/runpod_infer.py b/runpod/runpod-worker-transformers/runpod_infer.py new file mode 100644 index 0000000..169763a --- /dev/null +++ b/runpod/runpod-worker-transformers/runpod_infer.py @@ -0,0 +1,189 @@ +''' +RunPod | Transformer | Handler +''' +import argparse + +import torch +import runpod +from runpod.serverless.utils.rp_validator import validate +from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, + GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) + + +torch.cuda.is_available() +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +INPUT_SCHEMA = { + 'prompt': { + 'type': str, + 'required': True + }, + 'do_sample': { + 'type': bool, + 'required': False, + 'default': True, + 'description': ''' + Enables decoding strategies such as multinomial sampling, + beam-search multinomial sampling, Top-K sampling and Top-p sampling. + All these strategies select the next token from the probability distribution + over the entire vocabulary with various strategy-specific adjustments. + ''' + }, + 'max_length': { + 'type': int, + 'required': False, + 'default': 100 + }, + 'temperature': { + 'type': float, + 'required': False, + 'default': 0.9 + }, + 'repetition_penalty': { + 'type': float, + 'required': False, + 'default': 1.1 + }, + 'top_p': { + 'type': float, + 'required': False, + 'default': 0.5 + }, + 'top_k': { + 'type': int, + 'required': False, + 'default': 40 + }, + 'typical_p': { + 'type': float, + 'required': False, + 'default': 1.0 + }, + 'encoder_repetition_penalty': { + 'type': float, + 'required': False, + 'default': 1.0 + }, + 'min_length': { + 'type': int, + 'required': False, + 'default': 0 + }, + 'num_beams': { + 'type': int, + 'required': False, + 'default': 1 + }, + 'early_stopping': { + 'type': bool, + 'required': False, + 'default': False + }, + 'penalty_alpha': { + 'type': float, + 'required': False, + 'default': 0.0 + }, + 'length_penalty': { + 'type': float, + 'required': False, + 'default': 1.0 + }, + 'no_repeat_ngram_size': { + 'type': int, + 'required': False, + 'default': 0 + }, + +} + + +def generator(job): + ''' + Run the job input to generate text output. + ''' + # Validate the input + val_input = validate(job['input'], INPUT_SCHEMA) + if 'errors' in val_input: + return {"error": val_input['errors']} + val_input = val_input['validated_input'] + + input_ids = tokenizer(val_input['prompt'], return_tensors="pt").input_ids.to(device) + + gen_tokens = model.generate( + input_ids, + do_sample=val_input['do_sample'], + temperature=val_input['temperature'], + max_length=val_input['max_length'], + repetition_penalty=val_input['repetition_penalty'], + top_p=val_input['top_p'], + top_k=val_input['top_k'], + typical_p=val_input['typical_p'], + encoder_repetition_penalty=val_input['encoder_repetition_penalty'], + min_length=val_input['min_length'], + num_beams=val_input['num_beams'], + early_stopping=val_input['early_stopping'], + penalty_alpha=val_input['penalty_alpha'], + length_penalty=val_input['length_penalty'], + no_repeat_ngram_size=val_input['no_repeat_ngram_size'], + ).to(device) + + gen_text = tokenizer.batch_decode(gen_tokens)[0] + + return gen_text + + +# ---------------------------------------------------------------------------- # +# Parse Arguments # +# ---------------------------------------------------------------------------- # +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument("--model_name", type=str, + default="gpt-neo-1.3B", help="URL of the model to download.") + + +if __name__ == "__main__": + args = parser.parse_args() + + # --------------------------------- Neo 1.3B --------------------------------- # + if args.model_name == 'gpt-neo-1.3B': + model = GPTNeoForCausalLM.from_pretrained( + "EleutherAI/gpt-neo-1.3B", local_files_only=True).to(device) + tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B", local_files_only=True) + + elif args.model_name == 'gpt-neo-2.7B': + model = GPTNeoForCausalLM.from_pretrained( + "EleutherAI/gpt-neo-2.7B", local_files_only=True, torch_dtype=torch.float16).to(device) + tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", local_files_only=True) + + elif args.model_name == 'gpt-neox-20b': + model = GPTNeoXForCausalLM.from_pretrained( + "EleutherAI/gpt-neox-20b", local_files_only=True).half().to(device) + tokenizer = GPTNeoXTokenizerFast.from_pretrained( + "EleutherAI/gpt-neox-20b", local_files_only=True) + + elif args.model_name == 'pygmalion-6b': + model = AutoModelForCausalLM.from_pretrained( + "PygmalionAI/pygmalion-6b", local_files_only=True).to(device) + tokenizer = AutoTokenizer.from_pretrained( + "PygmalionAI/pygmalion-6b", local_files_only=True) + + elif args.model_name == 'gpt-j-6b': + model = GPTJForCausalLM.from_pretrained( + "EleutherAI/gpt-j-6B", local_files_only=True, revision="float16", + torch_dtype=torch.float16).to(device) + tokenizer = AutoTokenizer.from_pretrained( + "EleutherAI/gpt-j-6B", local_files_only=True) + + elif args.model_name == 'shygmalion-6b': + model = AutoModelForCausalLM.from_pretrained( + "TehVenom/PPO_Shygmalion-6b", local_files_only=True).to(device) + tokenizer = AutoTokenizer.from_pretrained( + "TehVenom/PPO_Shygmalion-6b", local_files_only=True) + + elif args.model_name == 'erebus-13b': + model = AutoModelForCausalLM.from_pretrained( + "KoboldAI/OPT-13B-Erebus", local_files_only=True).to(device) + tokenizer = AutoTokenizer.from_pretrained( + "KoboldAI/OPT-13B-Erebus", local_files_only=True) + + runpod.serverless.start({"handler": generator}) diff --git a/runpod/runpod-worker-transformers/test_input.json b/runpod/runpod-worker-transformers/test_input.json new file mode 100644 index 0000000..87542a6 --- /dev/null +++ b/runpod/runpod-worker-transformers/test_input.json @@ -0,0 +1,5 @@ +{ + "input": { + "prompt": "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI." + } +}