Browse Source

test

master
Hendrik Langer 2 years ago
parent
commit
2df11c2ea7
  1. 45
      runpod/runpod-worker-transformers/Dockerfile
  2. 16
      runpod/runpod-worker-transformers/model_fetcher.py
  3. 39
      runpod/runpod-worker-transformers/runpod_infer.py

45
runpod/runpod-worker-transformers/Dockerfile

@ -1,4 +1,7 @@
ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
#ARG BASE_IMAGE=nvidia/cuda:12.0.1-cudnn8-runtime-ubuntu22.04
#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117
#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3
FROM ${BASE_IMAGE} as dev-base
ARG MODEL_NAME
@ -12,9 +15,9 @@ ENV DEBIAN_FRONTEND noninteractive\
RUN apt-get update --yes && \
# - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as
# the ubuntu base image is rebuilt too seldom sometimes (less than once a month)
apt-get upgrade --yes && \
#apt-get upgrade --yes && \
apt install --yes --no-install-recommends \
build-essential \
# build-essential \
ca-certificates \
git \
git-lfs \
@ -27,23 +30,33 @@ RUN apt-get update --yes && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
echo "en_US.UTF-8 UTF-8" > /etc/locale.gen
RUN apt-key del 7fa2af80 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
#RUN apt-key del 7fa2af80 && \
# apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
#
#RUN add-apt-repository ppa:deadsnakes/ppa && \
# apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \
# update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
# update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
## update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
# update-alternatives --set python3 /usr/bin/python3.10 && \
# update-alternatives --set python /usr/bin/python3 && \
# apt-get clean && rm -rf /var/lib/apt/lists/*
RUN add-apt-repository ppa:deadsnakes/ppa && \
apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \
RUN apt-get update --yes && \
apt install --yes --no-install-recommends \
python3 python3-dev python3-venv python3-pip && \
apt-get clean && rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \
pip install huggingface-hub && \
pip install diffusers && \
pip install safetensors && \
pip install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu116 && \
pip install bitsandbytes && \
pip install transformers accelerate xformers triton && \
pip install runpod
RUN pip3 install --upgrade pip && \
# pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
pip3 install bitsandbytes && \
pip3 install safetensors && \
pip3 install diffusers && \
pip3 install transformers accelerate xformers triton && \
pip3 install huggingface-hub && \
pip3 install runpod
RUN mkdir /workspace
RUN mkdir -p /workspace
WORKDIR /workspace
COPY model_fetcher.py /workspace/

16
runpod/runpod-worker-transformers/model_fetcher.py

@ -7,7 +7,7 @@ import argparse
import torch
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM,
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM)
from huggingface_hub import snapshot_download
from huggingface_hub import snapshot_download, hf_hub_download
def download_model(model_name):
@ -32,6 +32,16 @@ def download_model(model_name):
# AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b")
snapshot_download(repo_id="PygmalionAI/pygmalion-6b", revision="main")
# --------------------------------- Pygmalion -------------------------------- #
elif model_name == 'pygmalion-6b-4bit-128g':
snapshot_download(repo_id="mayaeary/pygmalion-6b-4bit-128g", revision="main")
# --------------------------------- Pygmalion -------------------------------- #
elif model_name == 'pygmalion-6b-gptq-4bit':
# AutoModelForCausalLM.from_pretrained("OccamRazor/pygmalion-6b-gptq-4bit", from_pt=True)
# AutoTokenizer.from_pretrained("OccamRazor/pygmalion-6b-gptq-4bit")
snapshot_download(repo_id="OccamRazor/pygmalion-6b-gptq-4bit", revision="main")
# ----------------------------------- GPT-J ----------------------------------- #
elif model_name == 'gpt-j-6b':
GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16",
@ -63,6 +73,10 @@ def download_model(model_name):
AutoModelForCausalLM.from_pretrained("chavinlo/gpt4-x-alpaca", load_in_8bit=True)
AutoTokenizer.from_pretrained("chavinlo/gpt4-x-alpaca")
# --------------------------------- RWKV Raven 7B -------------------------------- #
elif model_name == 'rwkv-4-raven-7b':
hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename="RWKV-4-Raven-7B-v7-EngAndMore-20230404-ctx4096.pth")
#https://huggingface.co/yahma/RWKV-14b_quant/resolve/main/RWKV-4-Pile-14B-20230213-8019.pqth
# ---------------------------------------------------------------------------- #
# Parse Arguments #

39
runpod/runpod-worker-transformers/runpod_infer.py

@ -7,8 +7,8 @@ import torch
import runpod
from runpod.serverless.utils.rp_validator import validate
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM,
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM,
LlamaForCausalLM, LlamaTokenizer)
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM,)
# LlamaForCausalLM, LlamaTokenizer)
torch.cuda.is_available()
@ -164,10 +164,22 @@ if __name__ == "__main__":
elif args.model_name == 'pygmalion-6b':
model = AutoModelForCausalLM.from_pretrained(
"PygmalionAI/pygmalion-6b", local_files_only=True).to(device)
"PygmalionAI/pygmalion-6b", local_files_only=True, low_cpu_mem_usage=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"PygmalionAI/pygmalion-6b", local_files_only=True)
elif args.model_name == 'pygmalion-6b-4bit-128g':
model = AutoModelForCausalLM.from_pretrained(
"mayaeary/pygmalion-6b-4bit-128g", local_files_only=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"mayaeary/pygmalion-6b-4bit-128g", local_files_only=True)
elif args.model_name == 'pygmalion-6b-gptq-4bit':
model = AutoModelForCausalLM.from_pretrained(
"OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True)
elif args.model_name == 'gpt-j-6b':
model = GPTJForCausalLM.from_pretrained(
"EleutherAI/gpt-j-6B", local_files_only=True, revision="float16",
@ -194,15 +206,20 @@ if __name__ == "__main__":
"KoboldAI/OPT-13B-Erebus", local_files_only=True)
elif args.model_name == 'gpt4-x-alpaca-13b-native-4bit-128g':
model = LlamaForCausalLM.from_pretrained(
"anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device)
tokenizer = LlamaTokenizer.from_pretrained(
"anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True)
pass
# model = LlamaForCausalLM.from_pretrained(
# "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device)
# tokenizer = LlamaTokenizer.from_pretrained(
# "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True)
elif args.model_name == 'gpt4-x-alpaca':
model = LlamaForCausalLM.from_pretrained(
"chavinlo/gpt4-x-alpaca", local_files_only=True).to(device)
tokenizer = LlamaTokenizer.from_pretrained(
"chavinlo/gpt4-x-alpaca", local_files_only=True)
pass
# model = LlamaForCausalLM.from_pretrained(
# "chavinlo/gpt4-x-alpaca", local_files_only=True).to(device)
# tokenizer = LlamaTokenizer.from_pretrained(
# "chavinlo/gpt4-x-alpaca", local_files_only=True)
elif args.model_name == 'rwkv-4-raven-7b':
pass
runpod.serverless.start({"handler": generator})

Loading…
Cancel
Save