diff --git a/runpod/runpod-worker-oobabooga-api/Dockerfile b/runpod/runpod-worker-oobabooga-api/Dockerfile new file mode 100644 index 0000000..552da5d --- /dev/null +++ b/runpod/runpod-worker-oobabooga-api/Dockerfile @@ -0,0 +1,131 @@ +ARG DEV_IMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 +ARG BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 +#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 +#ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117 +#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3 +FROM ${DEV_IMAGE} as builder + +# https://github.com/runpod/containers/blob/main/oobabooga/Dockerfile + +# run; DOCKER_BUILDKIT=1 docker build --build-arg MODEL_NAME="PygmalionAI/pygmalion-350m" -t magn418/runpod-oobabooga-pygmalion:test . + + +ARG MODEL_NAME="PygmalionAI/pygmalion-350m" +ENV MODEL_NAME=${MODEL_NAME} + +WORKDIR / +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +ENV DEBIAN_FRONTEND noninteractive\ + SHELL=/bin/bash + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \ + # - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as + # the ubuntu base image is rebuilt too seldom sometimes (less than once a month) + apt-get upgrade --yes && \ + apt install --yes --no-install-recommends \ + build-essential \ + cmake \ + ca-certificates \ + git \ + git-lfs \ + wget \ + curl \ + bash \ +# libgl1 \ + software-properties-common \ + openssh-server +# apt-get clean && rm -rf /var/lib/apt/lists/* && \ +# echo "en_US.UTF-8 UTF-8" > /etc/locale.gen + +#RUN apt-key del 7fa2af80 && \ +# apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub +# +#RUN add-apt-repository ppa:deadsnakes/ppa && \ +# apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \ +# update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \ +# update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \ +## update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \ +# update-alternatives --set python3 /usr/bin/python3.10 && \ +# update-alternatives --set python /usr/bin/python3 && \ +# apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \ + apt install --yes --no-install-recommends \ + python3 python3-dev python3-venv python3-pip +# apt-get clean && rm -rf /var/lib/apt/lists/* + +ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" +ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} +RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \ + pip3 install cuda-python==11.8.0 && \ + pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ + pip3 install bitsandbytes && \ + pip3 install safetensors && \ + pip3 install sentencepiece && \ + pip3 install diffusers && \ + pip3 install accelerate xformers triton && \ + pip3 install git+https://github.com/huggingface/transformers.git && \ + pip3 install huggingface-hub && \ + pip3 install runpod +# pip3 cache purge + +RUN mkdir -p /workspace +WORKDIR /workspace + +#RUN mkdir /workspace && +RUN cd /workspace && git clone https://github.com/oobabooga/text-generation-webui.git && \ + cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \ + cd extensions/api && pip3 install -r requirements.txt +# pip3 cache purge + +RUN cd /workspace/text-generation-webui/ && mkdir repositories && cd repositories && \ +# https://github.com/oobabooga/GPTQ-for-LLaMa + git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \ + (cd GPTQ-for-LLaMa && python3 setup_cuda.py bdist_wheel -d .) +# && python3 setup_cuda.py install +RUN cd /workspace/text-generation-webui && python3 download-model.py ${MODEL_NAME} +#RUN git lfs install && \ +# git clone --depth 1 https://huggingface.co/${MODEL_NAME} + +FROM ${BASE_IMAGE} +#ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" +RUN mkdir -p /workspace +WORKDIR /workspace + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \ + apt install --yes --no-install-recommends \ + python3 python3-dev python3-venv python3-pip \ + make g++ \ + git +# apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \ + pip3 install cuda-python==11.8.0 && \ + pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ + pip3 install bitsandbytes && \ + cp /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so && \ + pip3 install safetensors && \ + pip3 install sentencepiece && \ + pip3 install diffusers && \ + pip3 install accelerate xformers triton && \ + pip3 install git+https://github.com/huggingface/transformers.git && \ + pip3 install rwkv && \ + pip3 install huggingface-hub && \ + pip3 install runpod +# pip3 cache purge + +#RUN mkdir -p /workspace/text-generation-webui/repositories +COPY --from=builder /workspace/text-generation-webui /workspace/text-generation-webui + +RUN --mount=type=cache,target=/root/.cache,sharing=locked \ + cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \ + (cd extensions/api && pip3 install -r requirements.txt) && \ + (cd repositories/GPTQ-for-LLaMa && pip3 install -r requirements.txt) && \ + pip3 install /workspace/text-generation-webui/repositories/GPTQ-for-LLaMa/*.whl +# pip3 cache purge + +COPY runpod_infer.py /workspace/ +COPY start.sh / +RUN chmod +x /start.sh + +CMD [ "/start.sh" ] diff --git a/runpod/runpod-worker-oobabooga-api/README.md b/runpod/runpod-worker-oobabooga-api/README.md new file mode 100644 index 0000000..a076ea0 --- /dev/null +++ b/runpod/runpod-worker-oobabooga-api/README.md @@ -0,0 +1,24 @@ +## Building + +Select one of the following models to build: + +- `gpt-neo-1.3B` +- `gpt-neo-2.7B` +- `gpt-neox-20b` +- `pygmalion-6b` +- `gpt-j-6b` + +```BASH +docker build --build-arg MODEL_NAME={model name} -t repo/image_name:tag . +``` + +```BASH +pip install --upgrade pip +git clone https://github.com/AlpinDale/gptq-gptj.git +cd gptq-gptj +pip install -r requirements.txt +CUDA_VISIBLE_DEVICES=0 python gptj.py PygmalionAI/pygmalion-6b c4 --wbits 4 --groupsize 128 --save pygmalion-6b-8bit-128g.pt +CUDA_VISIBLE_DEVICES=0 python gptj.py PygmalionAI/pygmalion-6b c4 --wbits 4 --groupsize 128 --save_safetensors pygmalion-6b-8bit-128g.safetensors +scp -P 22023 pygmalion-6b-8bit-128g.safetensors will@xd0.de:/home/will/ +scp -P 22023 pygmalion-6b-8bit-128g.safetensors will@xd0.de:/home/will/ +``` diff --git a/runpod/runpod-worker-oobabooga-api/runpod_infer.py b/runpod/runpod-worker-oobabooga-api/runpod_infer.py new file mode 100644 index 0000000..59d148b --- /dev/null +++ b/runpod/runpod-worker-oobabooga-api/runpod_infer.py @@ -0,0 +1,39 @@ +import runpod +import subprocess +import requests +import time + +def check_api_availability(host): + while True: + try: + response = requests.get(host) + return + except requests.exceptions.RequestException as e: + print(f"API is not available, retrying in 200ms... ({e})") + except Exception as e: + print('something went wrong') + time.sleep(200/1000) + +check_api_availability("http://127.0.0.1:7860/run/textgen") + +print('run handler') + +def handler(event): + ''' + This is the handler function that will be called by the serverless. + ''' + print('got event') + print(event) + + response = requests.post(url=f'http://127.0.0.1:7860/run/textgen', json=event["input"]) + + json = response.json() + # do the things + + print(json) + + # return the output that you want to be returned like pre-signed URLs to output artifacts + return json + + +runpod.serverless.start({"handler": handler}) diff --git a/runpod/runpod-worker-oobabooga-api/start.sh b/runpod/runpod-worker-oobabooga-api/start.sh new file mode 100644 index 0000000..a13cb5e --- /dev/null +++ b/runpod/runpod-worker-oobabooga-api/start.sh @@ -0,0 +1,10 @@ +#!/bin/bash +echo "Container Started" +export PYTHONUNBUFFERED=1 + +cd /workspace/text-generation-webui +echo "starting api" +python3 server.py --listen --no-stream --extensions api & +cd /workspace +echo "starting worker" +python3 -u runpod_infer.py diff --git a/runpod/runpod-worker-oobabooga-api/test_input.json b/runpod/runpod-worker-oobabooga-api/test_input.json new file mode 100644 index 0000000..87542a6 --- /dev/null +++ b/runpod/runpod-worker-oobabooga-api/test_input.json @@ -0,0 +1,5 @@ +{ + "input": { + "prompt": "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI." + } +}