From 8b11a0e04eeded94c32c24fd49027f01602c3e51 Mon Sep 17 00:00:00 2001
From: Hendrik Langer <hendrik+dev@xd0.de>
Date: Thu, 20 Apr 2023 18:04:31 +0200
Subject: [PATCH] work on remote worker dockerfile

---
 runpod/runpod-worker-oobabooga-api/Dockerfile | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/runpod/runpod-worker-oobabooga-api/Dockerfile b/runpod/runpod-worker-oobabooga-api/Dockerfile
index 75e9d37..eb9102c 100644
--- a/runpod/runpod-worker-oobabooga-api/Dockerfile
+++ b/runpod/runpod-worker-oobabooga-api/Dockerfile
@@ -51,7 +51,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes
       python3 python3-dev python3-venv python3-pip
 #    apt-get clean && rm -rf /var/lib/apt/lists/*
 
-ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
+#ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
+ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"
 ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
 RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \
     pip3 install cuda-python==11.8.0 && \
@@ -79,12 +80,16 @@ RUN --mount=type=cache,target=/root/.cache,sharing=locked \
 
 RUN --mount=type=cache,target=/root/.cache,sharing=locked \
     cd /workspace/text-generation-webui/ && mkdir repositories && cd repositories && \
-# https://github.com/oobabooga/GPTQ-for-LLaMa
+    # Triton
+    git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \
+    git reset --hard 508de42ff45ec560a4504e12b0d42114d599cf38 && \
+    (cd GPTQ-for-LLaMa && pip3 install -r requirements.txt && rm -rf .git)
+    # Cuda
 #    git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \
+#    (cd GPTQ-for-LLaMa && python3 setup_cuda.py install && python3 setup_cuda.py bdist_wheel -d .)
+    # Cuda (Oobabooga's fork)
+#    git clone --branch cuda --single-branch --depth 1 https://github.com/oobabooga/GPTQ-for-LLaMa.git && \
 #    (cd GPTQ-for-LLaMa && python3 setup_cuda.py bdist_wheel -d .)
-    git clone --depth 1 https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \
-    (cd GPTQ-for-LLaMa && pip3 install -r requirements.txt)
-# && python3 setup_cuda.py install
 
 
 FROM builder AS modeldownloader
@@ -102,7 +107,7 @@ RUN cd /workspace/text-generation-webui && python3 download-model.py ${MODEL_NAM
     #https://huggingface.co/BlinkDL/rwkv-4-pile-7b/resolve/main/RWKV-4-Pile-7B-20230406-ctx8192-test949.pth
 
 #RUN cd /workspace/text-generation-webui && python3 download-model.py MetaIX/GPT4-X-Alpaca-30B-Int4 --text-only && \
-#    wget -P /workspace/text-generation-webui/models/GPT4-X-Alpaca-30B-Int4/ https://huggingface.co/MetaIX/GPT4-X-Alpaca-30B-Int4/resolve/main/gpt4-x-alpaca-30b-4bit.safetensors
+#    wget -P /workspace/text-generation-webui/models/MetaIX_GPT4-X-Alpaca-30B-Int4/ https://huggingface.co/MetaIX/GPT4-X-Alpaca-30B-Int4/resolve/main/gpt4-x-alpaca-30b-4bit.safetensors
 
 FROM ${BASE_IMAGE}
 #ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"