remote worker

2 years ago · 77e091fddb
2 changed files with 9 additions and 4 deletions
--- a/runpod/runpod-worker-transformers/Dockerfile
+++ b/runpod/runpod-worker-transformers/Dockerfile
@ -49,8 +49,10 @@ RUN apt-get update --yes && \
      python3 python3-dev python3-venv python3-pip && \
    apt-get clean && rm -rf /var/lib/apt/lists/*
 ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
 RUN pip3 install --upgrade pip && \
-    pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
+    pip3 install cuda-python==11.8.0 && \
    pip3 install --default-timeout=100 torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
    pip3 install bitsandbytes && \
    pip3 install safetensors && \
    pip3 install sentencepiece && \
@ -64,7 +66,6 @@ RUN pip3 install --upgrade pip && \
 RUN mkdir -p /workspace
 WORKDIR /workspace
 ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
 RUN mkdir repositories && git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa && \
 #    (cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \
 #RUN mkdir repositories && git clone --depth 1 https://github.com/AlpinDale/gptq-gptj.git repositories/GPTQ-for-LLaMa && \
@ -75,6 +76,9 @@ RUN python3 model_fetcher.py --model_name=${MODEL_NAME}
 FROM ${BASE_IMAGE}
 ARG MODEL_NAME
 ENV MODEL_NAME=${MODEL_NAME}
 ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
 RUN mkdir -p /workspace
 WORKDIR /workspace
@ -85,7 +89,8 @@ RUN apt-get update --yes && \
    apt-get clean && rm -rf /var/lib/apt/lists/*
 RUN pip3 install --upgrade pip && \
-    pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
+    pip3 install cuda-python==11.8.0 && \
    pip3 install --default-timeout=100 torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
    pip3 install bitsandbytes && \
    pip3 install safetensors && \
    pip3 install sentencepiece && \
--- a/runpod/runpod-worker-transformers/runpod_infer.py
+++ b/runpod/runpod-worker-transformers/runpod_infer.py
@ -170,7 +170,7 @@ def load_quantized(model_name, wbits, groupsize, device):
        from safetensors.torch import load_file as safe_load
        if device == -1:
            device = "cpu"
-        model.load_state_dict(safe_load(str(pt_path), map_location=device), strict = False)
+        model.load_state_dict(safe_load(str(pt_path)), strict = False)
    else:
        model.load_state_dict(torch.load(str(pt_path)), strict = False)
    model.seqlen = 2048