matrix-pygmalion-bot/runpod/runpod-worker-oobabooga-api/Dockerfile


								ARG DEV_IMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04

								ARG BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04

								#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04

								#ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117

								#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3

								FROM ${DEV_IMAGE} as builder


								# https://github.com/runpod/containers/blob/main/oobabooga/Dockerfile


								# DOCKER_BUILDKIT=1 docker build --build-arg MODEL_NAME="PygmalionAI/pygmalion-350m" -t magn418/runpod-oobabooga-pygmalion:test .

								# docker builder prune


								WORKDIR /

								SHELL ["/bin/bash", "-o", "pipefail", "-c"]

								ENV DEBIAN_FRONTEND noninteractive\

								    SHELL=/bin/bash


								RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \

								    # - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as

								    #   the ubuntu base image is rebuilt too seldom sometimes (less than once a month)

								    apt-get upgrade --yes && \

								    apt install --yes --no-install-recommends \

								      build-essential \

								      cmake \

								      ca-certificates \

								      git \

								      git-lfs \

								      wget \

								      curl \

								      bash \

								#      libgl1 \

								      software-properties-common \

								      openssh-server

								#      apt-get clean && rm -rf /var/lib/apt/lists/* && \

								#      echo "en_US.UTF-8 UTF-8" > /etc/locale.gen


								#RUN apt-key del 7fa2af80 && \

								#    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub

								#

								#RUN add-apt-repository ppa:deadsnakes/ppa && \

								#    apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \

								#    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \

								#    update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \

								##    update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \

								#    update-alternatives --set python3 /usr/bin/python3.10 && \

								#    update-alternatives --set python /usr/bin/python3 && \

								#    apt-get clean && rm -rf /var/lib/apt/lists/*


								RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \

								    apt install --yes --no-install-recommends \

								      python3 python3-dev python3-venv python3-pip

								#    apt-get clean && rm -rf /var/lib/apt/lists/*


								#ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"

								ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"

								ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}

								RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \

								    pip3 install cuda-python==11.8.0 && \

								    pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \

								    pip3 install bitsandbytes && \

								    pip3 install safetensors && \

								    pip3 install sentencepiece && \

								    pip3 install diffusers && \

								    pip3 install deepspeed && \

								    pip3 install accelerate xformers triton && \

								    pip3 install git+https://github.com/huggingface/transformers.git && \

								    pip3 install huggingface-hub && \

								    pip3 install runpod

								#    pip3 cache purge


								RUN mkdir -p /workspace

								WORKDIR /workspace


								#RUN mkdir /workspace &&

								RUN --mount=type=cache,target=/root/.cache,sharing=locked \

								    cd /workspace && git clone --depth 1 https://github.com/oobabooga/text-generation-webui.git && \

								    cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \

								    cd extensions/api && pip3 install -r requirements.txt

								#  pip3 cache purge


								RUN --mount=type=cache,target=/root/.cache,sharing=locked \

								    cd /workspace/text-generation-webui/ && mkdir repositories && cd repositories && \

								    # Triton

								    git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \

								    git reset --hard 508de42ff45ec560a4504e12b0d42114d599cf38 && \

								    (cd GPTQ-for-LLaMa && pip3 install -r requirements.txt && rm -rf .git)

								    # Cuda

								#    git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \

								#    (cd GPTQ-for-LLaMa && python3 setup_cuda.py install && python3 setup_cuda.py bdist_wheel -d .)

								    # Cuda (Oobabooga's fork)

								#    git clone --branch cuda --single-branch --depth 1 https://github.com/oobabooga/GPTQ-for-LLaMa.git && \

								#    (cd GPTQ-for-LLaMa && python3 setup_cuda.py bdist_wheel -d .)


								FROM builder AS modeldownloader


								ARG MODEL_NAME="PygmalionAI/pygmalion-350m"

								ENV MODEL_NAME=${MODEL_NAME}


								RUN cd /workspace/text-generation-webui && python3 download-model.py ${MODEL_NAME}

								#RUN git lfs install && \

								#    git clone --depth 1 https://huggingface.co/${MODEL_NAME}


								#RUN wget -P /workspace/text-generation-webui/models/ https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/v2/20B_tokenizer.json && \

								#    wget -P /workspace/text-generation-webui/models/ https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-7B-v9-Eng99%25-Other1%25-20230412-ctx8192.pth


								    #https://huggingface.co/BlinkDL/rwkv-4-pile-7b/resolve/main/RWKV-4-Pile-7B-20230406-ctx8192-test949.pth


								#RUN cd /workspace/text-generation-webui && python3 download-model.py MetaIX/GPT4-X-Alpaca-30B-Int4 --text-only && \

								#    wget -P /workspace/text-generation-webui/models/MetaIX_GPT4-X-Alpaca-30B-Int4/ https://huggingface.co/MetaIX/GPT4-X-Alpaca-30B-Int4/resolve/main/gpt4-x-alpaca-30b-4bit.safetensors


								FROM ${BASE_IMAGE}

								#ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"

								RUN mkdir -p /workspace

								WORKDIR /workspace


								RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \

								    apt install --yes --no-install-recommends \

								      python3 python3-dev python3-venv python3-pip \

								      make g++ \

								      git

								#    apt-get clean && rm -rf /var/lib/apt/lists/*


								RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \

								    pip3 install cuda-python==11.8.0 && \

								    pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \

								    pip3 install bitsandbytes && \

								    cp /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so && \

								    pip3 install safetensors && \

								    pip3 install sentencepiece && \

								    pip3 install diffusers && \

								    pip3 install deepspeed && \

								    pip3 install accelerate xformers triton && \

								    pip3 install git+https://github.com/huggingface/transformers.git && \

								    pip3 install rwkv && \

								    pip3 install huggingface-hub && \

								    pip3 install runpod

								#    pip3 cache purge


								#RUN mkdir -p /workspace/text-generation-webui/repositories

								COPY --from=builder /workspace/text-generation-webui /workspace/text-generation-webui

								COPY --from=modeldownloader /workspace/text-generation-webui/models /workspace/text-generation-webui/models


								RUN --mount=type=cache,target=/root/.cache,sharing=locked \

								    cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \

								    (cd extensions/api && pip3 install -r requirements.txt) && \

								    (cd repositories/GPTQ-for-LLaMa && pip3 install -r requirements.txt)

								#    pip3 install /workspace/text-generation-webui/repositories/GPTQ-for-LLaMa/*.whl

								#    pip3 cache purge


								COPY runpod_infer.py /workspace/

								COPY start.sh /

								RUN chmod +x /start.sh


								CMD [ "/start.sh" ]