Browse Source

test

master
Hendrik Langer 2 years ago
parent
commit
2df11c2ea7
  1. 45
      runpod/runpod-worker-transformers/Dockerfile
  2. 16
      runpod/runpod-worker-transformers/model_fetcher.py
  3. 39
      runpod/runpod-worker-transformers/runpod_infer.py

45
runpod/runpod-worker-transformers/Dockerfile

@ -1,4 +1,7 @@
ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 #ARG BASE_IMAGE=nvidia/cuda:12.0.1-cudnn8-runtime-ubuntu22.04
#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117
#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3
FROM ${BASE_IMAGE} as dev-base FROM ${BASE_IMAGE} as dev-base
ARG MODEL_NAME ARG MODEL_NAME
@ -12,9 +15,9 @@ ENV DEBIAN_FRONTEND noninteractive\
RUN apt-get update --yes && \ RUN apt-get update --yes && \
# - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as # - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as
# the ubuntu base image is rebuilt too seldom sometimes (less than once a month) # the ubuntu base image is rebuilt too seldom sometimes (less than once a month)
apt-get upgrade --yes && \ #apt-get upgrade --yes && \
apt install --yes --no-install-recommends \ apt install --yes --no-install-recommends \
build-essential \ # build-essential \
ca-certificates \ ca-certificates \
git \ git \
git-lfs \ git-lfs \
@ -27,23 +30,33 @@ RUN apt-get update --yes && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \
echo "en_US.UTF-8 UTF-8" > /etc/locale.gen echo "en_US.UTF-8 UTF-8" > /etc/locale.gen
RUN apt-key del 7fa2af80 && \ #RUN apt-key del 7fa2af80 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub # apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
#
#RUN add-apt-repository ppa:deadsnakes/ppa && \
# apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \
# update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
# update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
## update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
# update-alternatives --set python3 /usr/bin/python3.10 && \
# update-alternatives --set python /usr/bin/python3 && \
# apt-get clean && rm -rf /var/lib/apt/lists/*
RUN add-apt-repository ppa:deadsnakes/ppa && \ RUN apt-get update --yes && \
apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \ apt install --yes --no-install-recommends \
python3 python3-dev python3-venv python3-pip && \
apt-get clean && rm -rf /var/lib/apt/lists/* apt-get clean && rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \ RUN pip3 install --upgrade pip && \
pip install huggingface-hub && \ # pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
pip install diffusers && \ pip3 install bitsandbytes && \
pip install safetensors && \ pip3 install safetensors && \
pip install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu116 && \ pip3 install diffusers && \
pip install bitsandbytes && \ pip3 install transformers accelerate xformers triton && \
pip install transformers accelerate xformers triton && \ pip3 install huggingface-hub && \
pip install runpod pip3 install runpod
RUN mkdir /workspace RUN mkdir -p /workspace
WORKDIR /workspace WORKDIR /workspace
COPY model_fetcher.py /workspace/ COPY model_fetcher.py /workspace/

16
runpod/runpod-worker-transformers/model_fetcher.py

@ -7,7 +7,7 @@ import argparse
import torch import torch
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM,
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM)
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download, hf_hub_download
def download_model(model_name): def download_model(model_name):
@ -32,6 +32,16 @@ def download_model(model_name):
# AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b") # AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b")
snapshot_download(repo_id="PygmalionAI/pygmalion-6b", revision="main") snapshot_download(repo_id="PygmalionAI/pygmalion-6b", revision="main")
# --------------------------------- Pygmalion -------------------------------- #
elif model_name == 'pygmalion-6b-4bit-128g':
snapshot_download(repo_id="mayaeary/pygmalion-6b-4bit-128g", revision="main")
# --------------------------------- Pygmalion -------------------------------- #
elif model_name == 'pygmalion-6b-gptq-4bit':
# AutoModelForCausalLM.from_pretrained("OccamRazor/pygmalion-6b-gptq-4bit", from_pt=True)
# AutoTokenizer.from_pretrained("OccamRazor/pygmalion-6b-gptq-4bit")
snapshot_download(repo_id="OccamRazor/pygmalion-6b-gptq-4bit", revision="main")
# ----------------------------------- GPT-J ----------------------------------- # # ----------------------------------- GPT-J ----------------------------------- #
elif model_name == 'gpt-j-6b': elif model_name == 'gpt-j-6b':
GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16",
@ -63,6 +73,10 @@ def download_model(model_name):
AutoModelForCausalLM.from_pretrained("chavinlo/gpt4-x-alpaca", load_in_8bit=True) AutoModelForCausalLM.from_pretrained("chavinlo/gpt4-x-alpaca", load_in_8bit=True)
AutoTokenizer.from_pretrained("chavinlo/gpt4-x-alpaca") AutoTokenizer.from_pretrained("chavinlo/gpt4-x-alpaca")
# --------------------------------- RWKV Raven 7B -------------------------------- #
elif model_name == 'rwkv-4-raven-7b':
hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename="RWKV-4-Raven-7B-v7-EngAndMore-20230404-ctx4096.pth")
#https://huggingface.co/yahma/RWKV-14b_quant/resolve/main/RWKV-4-Pile-14B-20230213-8019.pqth
# ---------------------------------------------------------------------------- # # ---------------------------------------------------------------------------- #
# Parse Arguments # # Parse Arguments #

39
runpod/runpod-worker-transformers/runpod_infer.py

@ -7,8 +7,8 @@ import torch
import runpod import runpod
from runpod.serverless.utils.rp_validator import validate from runpod.serverless.utils.rp_validator import validate
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM,
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM, GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM,)
LlamaForCausalLM, LlamaTokenizer) # LlamaForCausalLM, LlamaTokenizer)
torch.cuda.is_available() torch.cuda.is_available()
@ -164,10 +164,22 @@ if __name__ == "__main__":
elif args.model_name == 'pygmalion-6b': elif args.model_name == 'pygmalion-6b':
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
"PygmalionAI/pygmalion-6b", local_files_only=True).to(device) "PygmalionAI/pygmalion-6b", local_files_only=True, low_cpu_mem_usage=True).to(device)
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
"PygmalionAI/pygmalion-6b", local_files_only=True) "PygmalionAI/pygmalion-6b", local_files_only=True)
elif args.model_name == 'pygmalion-6b-4bit-128g':
model = AutoModelForCausalLM.from_pretrained(
"mayaeary/pygmalion-6b-4bit-128g", local_files_only=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"mayaeary/pygmalion-6b-4bit-128g", local_files_only=True)
elif args.model_name == 'pygmalion-6b-gptq-4bit':
model = AutoModelForCausalLM.from_pretrained(
"OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True)
elif args.model_name == 'gpt-j-6b': elif args.model_name == 'gpt-j-6b':
model = GPTJForCausalLM.from_pretrained( model = GPTJForCausalLM.from_pretrained(
"EleutherAI/gpt-j-6B", local_files_only=True, revision="float16", "EleutherAI/gpt-j-6B", local_files_only=True, revision="float16",
@ -194,15 +206,20 @@ if __name__ == "__main__":
"KoboldAI/OPT-13B-Erebus", local_files_only=True) "KoboldAI/OPT-13B-Erebus", local_files_only=True)
elif args.model_name == 'gpt4-x-alpaca-13b-native-4bit-128g': elif args.model_name == 'gpt4-x-alpaca-13b-native-4bit-128g':
model = LlamaForCausalLM.from_pretrained( pass
"anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device) # model = LlamaForCausalLM.from_pretrained(
tokenizer = LlamaTokenizer.from_pretrained( # "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device)
"anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True) # tokenizer = LlamaTokenizer.from_pretrained(
# "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True)
elif args.model_name == 'gpt4-x-alpaca': elif args.model_name == 'gpt4-x-alpaca':
model = LlamaForCausalLM.from_pretrained( pass
"chavinlo/gpt4-x-alpaca", local_files_only=True).to(device) # model = LlamaForCausalLM.from_pretrained(
tokenizer = LlamaTokenizer.from_pretrained( # "chavinlo/gpt4-x-alpaca", local_files_only=True).to(device)
"chavinlo/gpt4-x-alpaca", local_files_only=True) # tokenizer = LlamaTokenizer.from_pretrained(
# "chavinlo/gpt4-x-alpaca", local_files_only=True)
elif args.model_name == 'rwkv-4-raven-7b':
pass
runpod.serverless.start({"handler": generator}) runpod.serverless.start({"handler": generator})

Loading…
Cancel
Save