diff --git a/runpod/runpod-worker-transformers/Dockerfile b/runpod/runpod-worker-transformers/Dockerfile index 2a371c6..26cd367 100644 --- a/runpod/runpod-worker-transformers/Dockerfile +++ b/runpod/runpod-worker-transformers/Dockerfile @@ -49,8 +49,10 @@ RUN apt-get update --yes && \ python3 python3-dev python3-venv python3-pip && \ apt-get clean && rm -rf /var/lib/apt/lists/* +ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN pip3 install --upgrade pip && \ - pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ + pip3 install cuda-python==11.8.0 && \ + pip3 install --default-timeout=100 torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ pip3 install bitsandbytes && \ pip3 install safetensors && \ pip3 install sentencepiece && \ @@ -64,7 +66,6 @@ RUN pip3 install --upgrade pip && \ RUN mkdir -p /workspace WORKDIR /workspace -ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN mkdir repositories && git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa && \ # (cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \ #RUN mkdir repositories && git clone --depth 1 https://github.com/AlpinDale/gptq-gptj.git repositories/GPTQ-for-LLaMa && \ @@ -75,6 +76,9 @@ RUN python3 model_fetcher.py --model_name=${MODEL_NAME} FROM ${BASE_IMAGE} +ARG MODEL_NAME +ENV MODEL_NAME=${MODEL_NAME} +ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN mkdir -p /workspace WORKDIR /workspace @@ -85,7 +89,8 @@ RUN apt-get update --yes && \ apt-get clean && rm -rf /var/lib/apt/lists/* RUN pip3 install --upgrade pip && \ - pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ + pip3 install cuda-python==11.8.0 && \ + pip3 install --default-timeout=100 torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ pip3 install bitsandbytes && \ pip3 install safetensors && \ pip3 install sentencepiece && \ diff --git a/runpod/runpod-worker-transformers/runpod_infer.py b/runpod/runpod-worker-transformers/runpod_infer.py index 1ef780a..c59567e 100644 --- a/runpod/runpod-worker-transformers/runpod_infer.py +++ b/runpod/runpod-worker-transformers/runpod_infer.py @@ -170,7 +170,7 @@ def load_quantized(model_name, wbits, groupsize, device): from safetensors.torch import load_file as safe_load if device == -1: device = "cpu" - model.load_state_dict(safe_load(str(pt_path), map_location=device), strict = False) + model.load_state_dict(safe_load(str(pt_path)), strict = False) else: model.load_state_dict(torch.load(str(pt_path)), strict = False) model.seqlen = 2048