diff --git a/runpod/runpod-worker-sd/README.md b/runpod/runpod-worker-sd/README.md index df84981..8619375 100644 --- a/runpod/runpod-worker-sd/README.md +++ b/runpod/runpod-worker-sd/README.md @@ -1,11 +1,16 @@ # NOT (YET) WORKING +uses (containers/)serverless-automatic instead + +## old info serverless-ckpt-template +``` git clone https://github.com/runpod/serverless-ckpt-template.git cd serverless-ckpt-template docker build --build-arg MODEL_URL=https://huggingface.co/hassanblend/HassanBlend1.5.1.2 -t magn418/runpod-hassan:1.5 . docker login docker push magn418/runpod-hassan:1.5 +``` Models: PFG https://civitai.com/models/1227/pfg diff --git a/runpod/runpod-worker-transformers/Dockerfile b/runpod/runpod-worker-transformers/Dockerfile index fd903fc..c352b9f 100644 --- a/runpod/runpod-worker-transformers/Dockerfile +++ b/runpod/runpod-worker-transformers/Dockerfile @@ -1,46 +1,54 @@ ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 FROM ${BASE_IMAGE} as dev-base -ARG MODEL_NAME -ENV MODEL_NAME=${MODEL_NAME} - -WORKDIR /src - +WORKDIR / SHELL ["/bin/bash", "-o", "pipefail", "-c"] ENV DEBIAN_FRONTEND noninteractive\ SHELL=/bin/bash -RUN apt-key del 7fa2af80 -RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub + RUN apt-get update --yes && \ + # - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as + # the ubuntu base image is rebuilt too seldom sometimes (less than once a month) apt-get upgrade --yes && \ - apt install --yes --no-install-recommends\ - wget\ - bash\ - openssh-server &&\ + apt install --yes --no-install-recommends \ + build-essential \ + ca-certificates \ + git \ + git-lfs \ + wget \ + curl \ + bash \ + libgl1 \ + software-properties-common \ + openssh-server && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ echo "en_US.UTF-8 UTF-8" > /etc/locale.gen -RUN apt-get update && apt-get install -y --no-install-recommends -RUN apt-get install software-properties-common -y -RUN add-apt-repository ppa:deadsnakes/ppa -RUN apt-get install python3.8 -y -RUN apt-get install python3-pip -y -RUN apt-get install python3.8-distutils -y +RUN apt-key del 7fa2af80 && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub + +RUN add-apt-repository ppa:deadsnakes/ppa && \ + apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \ + apt-get clean && rm -rf /var/lib/apt/lists/* -RUN apt-get install python3.8-dev -y -RUN apt-get install python3.8-venv -y -RUN python3.8 -m venv /venv -ENV PATH=/venv/bin:$PATH +RUN pip install --upgrade pip && \ + pip install huggingface-hub && \ + pip install diffusers && \ + pip install safetensors && \ + pip install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu116 && \ + pip install bitsandbytes && \ + pip install transformers accelerate xformers triton && \ + pip install runpod -RUN python3.8 -m pip install --upgrade pip==23.0.1 -RUN python3.8 -m pip install runpod==0.9.1 -RUN python3.8 -m pip install torch==2.0.0 -RUN python3.8 -m pip install transformers==4.27.2 +RUN mkdir /workspace +WORKDIR /workspace -COPY model_fetcher.py /src/model_fetcher.py -RUN python3.8 model_fetcher.py --model_name=${MODEL_NAME} +COPY model_fetcher.py /workspace/ +RUN python model_fetcher.py --model_name=${MODEL_NAME} +#RUN git lfs install && \ +# git clone --depth 1 https://huggingface.co/${MODEL_NAME} -COPY runpod_infer.py /src/runpod_infer.py -COPY test_input.json /src/test_input.json +COPY runpod_infer.py /workspace/ +COPY test_input.json /workspace/ -CMD python3.8 -u runpod_infer.py --model_name=${MODEL_NAME} +CMD python -u runpod_infer.py --model_name=${MODEL_NAME} diff --git a/runpod/runpod-worker-transformers/README.md b/runpod/runpod-worker-transformers/README.md index 181ac37..a076ea0 100644 --- a/runpod/runpod-worker-transformers/README.md +++ b/runpod/runpod-worker-transformers/README.md @@ -11,3 +11,14 @@ Select one of the following models to build: ```BASH docker build --build-arg MODEL_NAME={model name} -t repo/image_name:tag . ``` + +```BASH +pip install --upgrade pip +git clone https://github.com/AlpinDale/gptq-gptj.git +cd gptq-gptj +pip install -r requirements.txt +CUDA_VISIBLE_DEVICES=0 python gptj.py PygmalionAI/pygmalion-6b c4 --wbits 4 --groupsize 128 --save pygmalion-6b-8bit-128g.pt +CUDA_VISIBLE_DEVICES=0 python gptj.py PygmalionAI/pygmalion-6b c4 --wbits 4 --groupsize 128 --save_safetensors pygmalion-6b-8bit-128g.safetensors +scp -P 22023 pygmalion-6b-8bit-128g.safetensors will@xd0.de:/home/will/ +scp -P 22023 pygmalion-6b-8bit-128g.safetensors will@xd0.de:/home/will/ +``` diff --git a/runpod/runpod-worker-transformers/model_fetcher.py b/runpod/runpod-worker-transformers/model_fetcher.py index e5a7a7d..3758a9e 100644 --- a/runpod/runpod-worker-transformers/model_fetcher.py +++ b/runpod/runpod-worker-transformers/model_fetcher.py @@ -38,15 +38,30 @@ def download_model(model_name): AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") # ------------------------------ PPO Shygmalion 6B ----------------------------- # - elif model_name == 'shygmalion-6b': + elif model_name == 'ppo-shygmalion-6b': AutoModelForCausalLM.from_pretrained("TehVenom/PPO_Shygmalion-6b") AutoTokenizer.from_pretrained("TehVenom/PPO_Shygmalion-6b") + # ------------------------------ Dolly Shygmalion 6B ----------------------------- # + elif model_name == 'dolly-shygmalion-6b': + AutoModelForCausalLM.from_pretrained("TehVenom/Dolly_Shygmalion-6b") + AutoTokenizer.from_pretrained("TehVenom/Dolly_Shygmalion-6b") + # ------------------------------ Erebus 13B (NSFW) ----------------------------- # elif model_name == 'erebus-13b': AutoModelForCausalLM.from_pretrained("KoboldAI/OPT-13B-Erebus") AutoTokenizer.from_pretrained("KoboldAI/OPT-13B-Erebus") + # --------------------------- Alpaca 13B (Quantized) -------------------------- # + elif model_name == 'gpt4-x-alpaca-13b-native-4bit-128g': + AutoModelForCausalLM.from_pretrained("anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g") + AutoTokenizer.from_pretrained("anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g") + + # --------------------------------- Alpaca 13B -------------------------------- # + elif model_name == 'gpt4-x-alpaca': + AutoModelForCausalLM.from_pretrained("chavinlo/gpt4-x-alpaca") + AutoTokenizer.from_pretrained("chavinlo/gpt4-x-alpaca") + # ---------------------------------------------------------------------------- # # Parse Arguments # diff --git a/runpod/runpod-worker-transformers/runpod_infer.py b/runpod/runpod-worker-transformers/runpod_infer.py index 169763a..7974528 100644 --- a/runpod/runpod-worker-transformers/runpod_infer.py +++ b/runpod/runpod-worker-transformers/runpod_infer.py @@ -7,7 +7,8 @@ import torch import runpod from runpod.serverless.utils.rp_validator import validate from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, - GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) + GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM, + LlamaForCausalLM, LlamaTokenizer) torch.cuda.is_available() @@ -174,16 +175,34 @@ if __name__ == "__main__": tokenizer = AutoTokenizer.from_pretrained( "EleutherAI/gpt-j-6B", local_files_only=True) - elif args.model_name == 'shygmalion-6b': + elif args.model_name == 'ppo-shygmalion-6b': model = AutoModelForCausalLM.from_pretrained( "TehVenom/PPO_Shygmalion-6b", local_files_only=True).to(device) tokenizer = AutoTokenizer.from_pretrained( "TehVenom/PPO_Shygmalion-6b", local_files_only=True) + elif args.model_name == 'dolly-shygmalion-6b': + model = AutoModelForCausalLM.from_pretrained( + "TehVenom/Dolly_Shygmalion-6b", local_files_only=True).to(device) + tokenizer = AutoTokenizer.from_pretrained( + "TehVenom/Dolly_Shygmalion-6b", local_files_only=True) + elif args.model_name == 'erebus-13b': model = AutoModelForCausalLM.from_pretrained( "KoboldAI/OPT-13B-Erebus", local_files_only=True).to(device) tokenizer = AutoTokenizer.from_pretrained( "KoboldAI/OPT-13B-Erebus", local_files_only=True) + elif args.model_name == 'gpt4-x-alpaca-13b-native-4bit-128g': + model = LlamaForCausalLM.from_pretrained( + "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device) + tokenizer = LlamaTokenizer.from_pretrained( + "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True) + + elif args.model_name == 'gpt4-x-alpaca': + model = LlamaForCausalLM.from_pretrained( + "chavinlo/gpt4-x-alpaca", local_files_only=True).to(device) + tokenizer = LlamaTokenizer.from_pretrained( + "chavinlo/gpt4-x-alpaca", local_files_only=True) + runpod.serverless.start({"handler": generator})