Browse Source

more tests on remote worker container

master
Hendrik Langer 2 years ago
parent
commit
ca068cf28c
  1. 46
      runpod/runpod-worker-transformers/Dockerfile
  2. 1
      runpod/runpod-worker-transformers/model_fetcher.py
  3. 8
      runpod/runpod-worker-transformers/runpod_infer.py

46
runpod/runpod-worker-transformers/Dockerfile

@ -1,8 +1,9 @@
ARG DEV_IMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
ARG BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 ARG BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 #ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
#ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117 #ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117
#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3 #ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3
FROM ${BASE_IMAGE} as dev-base FROM ${DEV_IMAGE} as builder
ARG MODEL_NAME ARG MODEL_NAME
ENV MODEL_NAME=${MODEL_NAME} ENV MODEL_NAME=${MODEL_NAME}
@ -15,16 +16,17 @@ ENV DEBIAN_FRONTEND noninteractive\
RUN apt-get update --yes && \ RUN apt-get update --yes && \
# - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as # - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as
# the ubuntu base image is rebuilt too seldom sometimes (less than once a month) # the ubuntu base image is rebuilt too seldom sometimes (less than once a month)
#apt-get upgrade --yes && \ apt-get upgrade --yes && \
apt install --yes --no-install-recommends \ apt install --yes --no-install-recommends \
# build-essential \ build-essential \
cmake \
ca-certificates \ ca-certificates \
git \ git \
git-lfs \ git-lfs \
wget \ wget \
curl \ curl \
bash \ bash \
libgl1 \ # libgl1 \
software-properties-common \ software-properties-common \
openssh-server && \ openssh-server && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \
@ -51,6 +53,7 @@ RUN pip3 install --upgrade pip && \
pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
pip3 install bitsandbytes && \ pip3 install bitsandbytes && \
pip3 install safetensors && \ pip3 install safetensors && \
pip3 install sentencepiece triton && \
pip3 install diffusers && \ pip3 install diffusers && \
pip3 install transformers accelerate xformers triton && \ pip3 install transformers accelerate xformers triton && \
pip3 install huggingface-hub && \ pip3 install huggingface-hub && \
@ -60,13 +63,44 @@ RUN pip3 install --upgrade pip && \
RUN mkdir -p /workspace RUN mkdir -p /workspace
WORKDIR /workspace WORKDIR /workspace
RUN mkdir repositories && git clone --branch cuda --single-branch --depth 1 https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
#RUN mkdir repositories && git clone https://github.com/AlpinDale/gptq-gptj.git repositories/GPTQ-for-LLaMa && (cd repositories/GPTQ-for-LLaMa && python3 setup_cuda.py install) RUN mkdir repositories && git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa && \
(cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \
#RUN mkdir repositories && git clone --depth 1 https://github.com/AlpinDale/gptq-gptj.git repositories/GPTQ-for-LLaMa && \
(cd repositories/GPTQ-for-LLaMa && python3 setup_cuda.py install)
COPY model_fetcher.py /workspace/ COPY model_fetcher.py /workspace/
RUN python3 model_fetcher.py --model_name=${MODEL_NAME} RUN python3 model_fetcher.py --model_name=${MODEL_NAME}
FROM ${BASE_IMAGE}
RUN mkdir -p /workspace
WORKDIR /workspace
RUN apt-get update --yes && \
apt install --yes --no-install-recommends \
python3 python3-dev python3-venv python3-pip && \
apt-get clean && rm -rf /var/lib/apt/lists/*
RUN pip3 install --upgrade pip && \
pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
pip3 install bitsandbytes && \
pip3 install safetensors && \
pip3 install sentencepiece triton && \
pip3 install diffusers && \
pip3 install transformers accelerate xformers triton && \
pip3 install huggingface-hub && \
pip3 install runpod && \
pip3 cache purge
RUN mkdir /workspace/repositories
COPY --from=builder /workspace/repositories /workspace/repositories/
COPY --from=builder /root/.cache/huggingface /root/.cache/huggingface
#RUN git lfs install && \ #RUN git lfs install && \
# git clone --depth 1 https://huggingface.co/${MODEL_NAME} # git clone --depth 1 https://huggingface.co/${MODEL_NAME}
COPY model_fetcher.py /workspace/
COPY runpod_infer.py /workspace/ COPY runpod_infer.py /workspace/
COPY test_input.json /workspace/ COPY test_input.json /workspace/

1
runpod/runpod-worker-transformers/model_fetcher.py

@ -80,6 +80,7 @@ def download_model(model_name):
#https://huggingface.co/yahma/RWKV-14b_quant/resolve/main/RWKV-4-Pile-14B-20230213-8019.pqth #https://huggingface.co/yahma/RWKV-14b_quant/resolve/main/RWKV-4-Pile-14B-20230213-8019.pqth
if snapshot_path: if snapshot_path:
print("model downloaded to \"{snapshot_path}\"")
os.system("ln -s \"{snapshot_path}\" /workdir/model") os.system("ln -s \"{snapshot_path}\" /workdir/model")
# ---------------------------------------------------------------------------- # # ---------------------------------------------------------------------------- #

8
runpod/runpod-worker-transformers/runpod_infer.py

@ -101,7 +101,7 @@ INPUT_SCHEMA = {
} }
def load_quantized(model_name, wbits, groupsize): def load_quantized(model_name, wbits, groupsize, device):
"""https://github.com/oobabooga/text-generation-webui/blob/main/modules/GPTQ_loader.py""" """https://github.com/oobabooga/text-generation-webui/blob/main/modules/GPTQ_loader.py"""
from pathlib import Path from pathlib import Path
import os, sys import os, sys
@ -167,7 +167,9 @@ def load_quantized(model_name, wbits, groupsize):
print('Loading model ...') print('Loading model ...')
if str(pt_path).endswith('.safetensors'): if str(pt_path).endswith('.safetensors'):
from safetensors.torch import load_file as safe_load from safetensors.torch import load_file as safe_load
model.load_state_dict(safe_load(str(pt_path))) if device == -1:
device = "cpu"
model.load_state_dict(safe_load(str(pt_path), device))
else: else:
model.load_state_dict(torch.load(str(pt_path))) model.load_state_dict(torch.load(str(pt_path)))
model.seqlen = 2048 model.seqlen = 2048
@ -248,7 +250,7 @@ if __name__ == "__main__":
elif args.model_name == 'pygmalion-6b-4bit-128g': elif args.model_name == 'pygmalion-6b-4bit-128g':
# model = AutoModelForCausalLM.from_pretrained( # model = AutoModelForCausalLM.from_pretrained(
# "mayaeary/pygmalion-6b-4bit-128g", local_files_only=True).to(device) # "mayaeary/pygmalion-6b-4bit-128g", local_files_only=True).to(device)
model = load_quantized("pygmalion-6b-4bit-128g", 4, 128).to(device) model = load_quantized("pygmalion-6b-4bit-128g", 4, 128, device).to(device)
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
"mayaeary/pygmalion-6b-4bit-128g", local_files_only=True) "mayaeary/pygmalion-6b-4bit-128g", local_files_only=True)

Loading…
Cancel
Save