From 2df11c2ea741739283f00b9fae709c4024fd4ad8 Mon Sep 17 00:00:00 2001 From: Hendrik Langer Date: Thu, 6 Apr 2023 15:41:47 +0200 Subject: [PATCH] test --- runpod/runpod-worker-transformers/Dockerfile | 71 +++++++++++-------- .../model_fetcher.py | 16 ++++- .../runpod_infer.py | 39 +++++++--- 3 files changed, 85 insertions(+), 41 deletions(-) diff --git a/runpod/runpod-worker-transformers/Dockerfile b/runpod/runpod-worker-transformers/Dockerfile index 3b8369a..00b6556 100644 --- a/runpod/runpod-worker-transformers/Dockerfile +++ b/runpod/runpod-worker-transformers/Dockerfile @@ -1,4 +1,7 @@ -ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 +#ARG BASE_IMAGE=nvidia/cuda:12.0.1-cudnn8-runtime-ubuntu22.04 +#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 +ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117 +#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3 FROM ${BASE_IMAGE} as dev-base ARG MODEL_NAME @@ -12,38 +15,48 @@ ENV DEBIAN_FRONTEND noninteractive\ RUN apt-get update --yes && \ # - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as # the ubuntu base image is rebuilt too seldom sometimes (less than once a month) - apt-get upgrade --yes && \ + #apt-get upgrade --yes && \ apt install --yes --no-install-recommends \ - build-essential \ - ca-certificates \ - git \ - git-lfs \ - wget \ - curl \ - bash \ - libgl1 \ - software-properties-common \ - openssh-server && \ - apt-get clean && rm -rf /var/lib/apt/lists/* && \ - echo "en_US.UTF-8 UTF-8" > /etc/locale.gen - -RUN apt-key del 7fa2af80 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub - -RUN add-apt-repository ppa:deadsnakes/ppa && \ - apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \ +# build-essential \ + ca-certificates \ + git \ + git-lfs \ + wget \ + curl \ + bash \ + libgl1 \ + software-properties-common \ + openssh-server && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + echo "en_US.UTF-8 UTF-8" > /etc/locale.gen + +#RUN apt-key del 7fa2af80 && \ +# apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub +# +#RUN add-apt-repository ppa:deadsnakes/ppa && \ +# apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \ +# update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \ +# update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \ +## update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \ +# update-alternatives --set python3 /usr/bin/python3.10 && \ +# update-alternatives --set python /usr/bin/python3 && \ +# apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN apt-get update --yes && \ + apt install --yes --no-install-recommends \ + python3 python3-dev python3-venv python3-pip && \ apt-get clean && rm -rf /var/lib/apt/lists/* -RUN pip install --upgrade pip && \ - pip install huggingface-hub && \ - pip install diffusers && \ - pip install safetensors && \ - pip install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu116 && \ - pip install bitsandbytes && \ - pip install transformers accelerate xformers triton && \ - pip install runpod +RUN pip3 install --upgrade pip && \ +# pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ + pip3 install bitsandbytes && \ + pip3 install safetensors && \ + pip3 install diffusers && \ + pip3 install transformers accelerate xformers triton && \ + pip3 install huggingface-hub && \ + pip3 install runpod -RUN mkdir /workspace +RUN mkdir -p /workspace WORKDIR /workspace COPY model_fetcher.py /workspace/ diff --git a/runpod/runpod-worker-transformers/model_fetcher.py b/runpod/runpod-worker-transformers/model_fetcher.py index 62c502e..2be448c 100644 --- a/runpod/runpod-worker-transformers/model_fetcher.py +++ b/runpod/runpod-worker-transformers/model_fetcher.py @@ -7,7 +7,7 @@ import argparse import torch from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) -from huggingface_hub import snapshot_download +from huggingface_hub import snapshot_download, hf_hub_download def download_model(model_name): @@ -32,6 +32,16 @@ def download_model(model_name): # AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b") snapshot_download(repo_id="PygmalionAI/pygmalion-6b", revision="main") + # --------------------------------- Pygmalion -------------------------------- # + elif model_name == 'pygmalion-6b-4bit-128g': + snapshot_download(repo_id="mayaeary/pygmalion-6b-4bit-128g", revision="main") + + # --------------------------------- Pygmalion -------------------------------- # + elif model_name == 'pygmalion-6b-gptq-4bit': +# AutoModelForCausalLM.from_pretrained("OccamRazor/pygmalion-6b-gptq-4bit", from_pt=True) +# AutoTokenizer.from_pretrained("OccamRazor/pygmalion-6b-gptq-4bit") + snapshot_download(repo_id="OccamRazor/pygmalion-6b-gptq-4bit", revision="main") + # ----------------------------------- GPT-J ----------------------------------- # elif model_name == 'gpt-j-6b': GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", @@ -63,6 +73,10 @@ def download_model(model_name): AutoModelForCausalLM.from_pretrained("chavinlo/gpt4-x-alpaca", load_in_8bit=True) AutoTokenizer.from_pretrained("chavinlo/gpt4-x-alpaca") + # --------------------------------- RWKV Raven 7B -------------------------------- # + elif model_name == 'rwkv-4-raven-7b': + hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename="RWKV-4-Raven-7B-v7-EngAndMore-20230404-ctx4096.pth") + #https://huggingface.co/yahma/RWKV-14b_quant/resolve/main/RWKV-4-Pile-14B-20230213-8019.pqth # ---------------------------------------------------------------------------- # # Parse Arguments # diff --git a/runpod/runpod-worker-transformers/runpod_infer.py b/runpod/runpod-worker-transformers/runpod_infer.py index 7974528..7054ec0 100644 --- a/runpod/runpod-worker-transformers/runpod_infer.py +++ b/runpod/runpod-worker-transformers/runpod_infer.py @@ -7,8 +7,8 @@ import torch import runpod from runpod.serverless.utils.rp_validator import validate from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, - GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM, - LlamaForCausalLM, LlamaTokenizer) + GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM,) +# LlamaForCausalLM, LlamaTokenizer) torch.cuda.is_available() @@ -164,10 +164,22 @@ if __name__ == "__main__": elif args.model_name == 'pygmalion-6b': model = AutoModelForCausalLM.from_pretrained( - "PygmalionAI/pygmalion-6b", local_files_only=True).to(device) + "PygmalionAI/pygmalion-6b", local_files_only=True, low_cpu_mem_usage=True).to(device) tokenizer = AutoTokenizer.from_pretrained( "PygmalionAI/pygmalion-6b", local_files_only=True) + elif args.model_name == 'pygmalion-6b-4bit-128g': + model = AutoModelForCausalLM.from_pretrained( + "mayaeary/pygmalion-6b-4bit-128g", local_files_only=True).to(device) + tokenizer = AutoTokenizer.from_pretrained( + "mayaeary/pygmalion-6b-4bit-128g", local_files_only=True) + + elif args.model_name == 'pygmalion-6b-gptq-4bit': + model = AutoModelForCausalLM.from_pretrained( + "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device) + tokenizer = AutoTokenizer.from_pretrained( + "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True) + elif args.model_name == 'gpt-j-6b': model = GPTJForCausalLM.from_pretrained( "EleutherAI/gpt-j-6B", local_files_only=True, revision="float16", @@ -194,15 +206,20 @@ if __name__ == "__main__": "KoboldAI/OPT-13B-Erebus", local_files_only=True) elif args.model_name == 'gpt4-x-alpaca-13b-native-4bit-128g': - model = LlamaForCausalLM.from_pretrained( - "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device) - tokenizer = LlamaTokenizer.from_pretrained( - "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True) + pass +# model = LlamaForCausalLM.from_pretrained( +# "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device) +# tokenizer = LlamaTokenizer.from_pretrained( +# "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True) elif args.model_name == 'gpt4-x-alpaca': - model = LlamaForCausalLM.from_pretrained( - "chavinlo/gpt4-x-alpaca", local_files_only=True).to(device) - tokenizer = LlamaTokenizer.from_pretrained( - "chavinlo/gpt4-x-alpaca", local_files_only=True) + pass +# model = LlamaForCausalLM.from_pretrained( +# "chavinlo/gpt4-x-alpaca", local_files_only=True).to(device) +# tokenizer = LlamaTokenizer.from_pretrained( +# "chavinlo/gpt4-x-alpaca", local_files_only=True) + + elif args.model_name == 'rwkv-4-raven-7b': + pass runpod.serverless.start({"handler": generator})