add runpod oobabooga worker

2 years ago · 1b4922a68e
5 changed files with 209 additions and 0 deletions
--- a/runpod/runpod-worker-oobabooga-api/Dockerfile
+++ b/runpod/runpod-worker-oobabooga-api/Dockerfile
@ -0,0 +1,131 @@
+ARG DEV_IMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+ARG BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
+#ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117
+#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3
+FROM ${DEV_IMAGE} as builder
+
+# https://github.com/runpod/containers/blob/main/oobabooga/Dockerfile
+
+# run; DOCKER_BUILDKIT=1 docker build --build-arg MODEL_NAME="PygmalionAI/pygmalion-350m" -t magn418/runpod-oobabooga-pygmalion:test .
+
+
+ARG MODEL_NAME="PygmalionAI/pygmalion-350m"
+ENV MODEL_NAME=${MODEL_NAME}
+
+WORKDIR /
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+ENV DEBIAN_FRONTEND noninteractive\
+    SHELL=/bin/bash
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \
+    # - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as
+    #   the ubuntu base image is rebuilt too seldom sometimes (less than once a month)
+    apt-get upgrade --yes && \
+    apt install --yes --no-install-recommends \
+      build-essential \
+      cmake \
+      ca-certificates \
+      git \
+      git-lfs \
+      wget \
+      curl \
+      bash \
+#      libgl1 \
+      software-properties-common \
+      openssh-server
+#      apt-get clean && rm -rf /var/lib/apt/lists/* && \
+#      echo "en_US.UTF-8 UTF-8" > /etc/locale.gen
+
+#RUN apt-key del 7fa2af80 && \
+#    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
+#
+#RUN add-apt-repository ppa:deadsnakes/ppa && \
+#    apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \
+#    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
+#    update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
+##    update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
+#    update-alternatives --set python3 /usr/bin/python3.10 && \
+#    update-alternatives --set python /usr/bin/python3 && \
+#    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \
+    apt install --yes --no-install-recommends \
+      python3 python3-dev python3-venv python3-pip
+#    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
+ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
+RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \
+    pip3 install cuda-python==11.8.0 && \
+    pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
+    pip3 install bitsandbytes && \
+    pip3 install safetensors && \
+    pip3 install sentencepiece && \
+    pip3 install diffusers && \
+    pip3 install accelerate xformers triton && \
+    pip3 install git+https://github.com/huggingface/transformers.git && \
+    pip3 install huggingface-hub && \
+    pip3 install runpod
+#    pip3 cache purge
+
+RUN mkdir -p /workspace
+WORKDIR /workspace
+
+#RUN mkdir /workspace && 
+RUN cd /workspace && git clone https://github.com/oobabooga/text-generation-webui.git && \
+  cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \
+  cd extensions/api && pip3 install -r requirements.txt
+#  pip3 cache purge
+
+RUN cd /workspace/text-generation-webui/ && mkdir repositories && cd repositories && \
+# https://github.com/oobabooga/GPTQ-for-LLaMa
+    git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \
+    (cd GPTQ-for-LLaMa && python3 setup_cuda.py bdist_wheel -d .)
+# && python3 setup_cuda.py install
+RUN cd /workspace/text-generation-webui && python3 download-model.py ${MODEL_NAME}
+#RUN git lfs install && \
+#    git clone --depth 1 https://huggingface.co/${MODEL_NAME}
+
+FROM ${BASE_IMAGE}
+#ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
+RUN mkdir -p /workspace
+WORKDIR /workspace
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \
+    apt install --yes --no-install-recommends \
+      python3 python3-dev python3-venv python3-pip \
+      make g++ \
+      git
+#    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \
+    pip3 install cuda-python==11.8.0 && \
+    pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
+    pip3 install bitsandbytes && \
+    cp /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so && \
+    pip3 install safetensors && \
+    pip3 install sentencepiece && \
+    pip3 install diffusers && \
+    pip3 install accelerate xformers triton && \
+    pip3 install git+https://github.com/huggingface/transformers.git && \
+    pip3 install rwkv && \
+    pip3 install huggingface-hub && \
+    pip3 install runpod
+#    pip3 cache purge
+
+#RUN mkdir -p /workspace/text-generation-webui/repositories
+COPY --from=builder /workspace/text-generation-webui /workspace/text-generation-webui
+
+RUN --mount=type=cache,target=/root/.cache,sharing=locked \
+    cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \
+    (cd extensions/api && pip3 install -r requirements.txt) && \
+    (cd repositories/GPTQ-for-LLaMa && pip3 install -r requirements.txt) && \
+    pip3 install /workspace/text-generation-webui/repositories/GPTQ-for-LLaMa/*.whl
+#    pip3 cache purge
+
+COPY runpod_infer.py /workspace/
+COPY start.sh /
+RUN chmod +x /start.sh
+
+CMD [ "/start.sh" ]
--- a/runpod/runpod-worker-oobabooga-api/README.md
+++ b/runpod/runpod-worker-oobabooga-api/README.md
@ -0,0 +1,24 @@
+## Building
+
+Select one of the following models to build:
+
+- `gpt-neo-1.3B`
+- `gpt-neo-2.7B`
+- `gpt-neox-20b`
+- `pygmalion-6b`
+- `gpt-j-6b`
+
+```BASH
+docker build --build-arg MODEL_NAME={model name} -t repo/image_name:tag .
+```
+
+```BASH
+pip install --upgrade pip
+git clone https://github.com/AlpinDale/gptq-gptj.git
+cd gptq-gptj
+pip install -r requirements.txt
+CUDA_VISIBLE_DEVICES=0 python gptj.py PygmalionAI/pygmalion-6b c4 --wbits 4 --groupsize 128 --save pygmalion-6b-8bit-128g.pt
+CUDA_VISIBLE_DEVICES=0 python gptj.py PygmalionAI/pygmalion-6b c4 --wbits 4 --groupsize 128 --save_safetensors pygmalion-6b-8bit-128g.safetensors
+scp -P 22023 pygmalion-6b-8bit-128g.safetensors will@xd0.de:/home/will/
+scp -P 22023 pygmalion-6b-8bit-128g.safetensors will@xd0.de:/home/will/
+```
--- a/runpod/runpod-worker-oobabooga-api/runpod_infer.py
+++ b/runpod/runpod-worker-oobabooga-api/runpod_infer.py
@ -0,0 +1,39 @@
+import runpod
+import subprocess
+import requests
+import time
+
+def check_api_availability(host):
+    while True:
+        try:
+            response = requests.get(host)
+            return
+        except requests.exceptions.RequestException as e:
+            print(f"API is not available, retrying in 200ms... ({e})")
+        except Exception as e:
+            print('something went wrong')
+        time.sleep(200/1000)
+
+check_api_availability("http://127.0.0.1:7860/run/textgen")
+
+print('run handler')
+
+def handler(event):
+    '''
+    This is the handler function that will be called by the serverless.
+    '''
+    print('got event')
+    print(event)
+
+    response = requests.post(url=f'http://127.0.0.1:7860/run/textgen', json=event["input"])
+
+    json = response.json()
+    # do the things
+
+    print(json)
+
+    # return the output that you want to be returned like pre-signed URLs to output artifacts
+    return json
+
+
+runpod.serverless.start({"handler": handler})
--- a/runpod/runpod-worker-oobabooga-api/start.sh
+++ b/runpod/runpod-worker-oobabooga-api/start.sh
@ -0,0 +1,10 @@
+#!/bin/bash
+echo "Container Started"
+export PYTHONUNBUFFERED=1
+
+cd /workspace/text-generation-webui
+echo "starting api"
+python3 server.py --listen --no-stream --extensions api &
+cd /workspace
+echo "starting worker"
+python3 -u runpod_infer.py
--- a/runpod/runpod-worker-oobabooga-api/test_input.json
+++ b/runpod/runpod-worker-oobabooga-api/test_input.json
@ -0,0 +1,5 @@
+{
+    "input": {
+        "prompt": "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
+    }
+}