Browse Source

remote worker

master
Hendrik Langer 2 years ago
parent
commit
77e091fddb
  1. 11
      runpod/runpod-worker-transformers/Dockerfile
  2. 2
      runpod/runpod-worker-transformers/runpod_infer.py

11
runpod/runpod-worker-transformers/Dockerfile

@ -49,8 +49,10 @@ RUN apt-get update --yes && \
python3 python3-dev python3-venv python3-pip && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
RUN pip3 install --upgrade pip && \
pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
pip3 install cuda-python==11.8.0 && \
pip3 install --default-timeout=100 torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
pip3 install bitsandbytes && \
pip3 install safetensors && \
pip3 install sentencepiece && \
@ -64,7 +66,6 @@ RUN pip3 install --upgrade pip && \
RUN mkdir -p /workspace
WORKDIR /workspace
ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
RUN mkdir repositories && git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa && \
# (cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \
#RUN mkdir repositories && git clone --depth 1 https://github.com/AlpinDale/gptq-gptj.git repositories/GPTQ-for-LLaMa && \
@ -75,6 +76,9 @@ RUN python3 model_fetcher.py --model_name=${MODEL_NAME}
FROM ${BASE_IMAGE}
ARG MODEL_NAME
ENV MODEL_NAME=${MODEL_NAME}
ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
RUN mkdir -p /workspace
WORKDIR /workspace
@ -85,7 +89,8 @@ RUN apt-get update --yes && \
apt-get clean && rm -rf /var/lib/apt/lists/*
RUN pip3 install --upgrade pip && \
pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
pip3 install cuda-python==11.8.0 && \
pip3 install --default-timeout=100 torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
pip3 install bitsandbytes && \
pip3 install safetensors && \
pip3 install sentencepiece && \

2
runpod/runpod-worker-transformers/runpod_infer.py

@ -170,7 +170,7 @@ def load_quantized(model_name, wbits, groupsize, device):
from safetensors.torch import load_file as safe_load
if device == -1:
device = "cpu"
model.load_state_dict(safe_load(str(pt_path), map_location=device), strict = False)
model.load_state_dict(safe_load(str(pt_path)), strict = False)
else:
model.load_state_dict(torch.load(str(pt_path)), strict = False)
model.seqlen = 2048

Loading…
Cancel
Save