matrix-pygmalion-bot/runpod/runpod-worker-oobabooga-api/Dockerfile

ARG DEV_IMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
ARG BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
#ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117
#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3
FROM ${DEV_IMAGE} as builder

# https://github.com/runpod/containers/blob/main/oobabooga/Dockerfile

# DOCKER_BUILDKIT=1 docker build --build-arg MODEL_NAME="PygmalionAI/pygmalion-350m" -t magn418/runpod-oobabooga-pygmalion:test .
# docker builder prune

WORKDIR /
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND noninteractive\
    SHELL=/bin/bash

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \
    # - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as
    #   the ubuntu base image is rebuilt too seldom sometimes (less than once a month)
    apt-get upgrade --yes && \
    apt install --yes --no-install-recommends \
      build-essential \
      cmake \
      ca-certificates \
      git \
      git-lfs \
      wget \
      curl \
      bash \
#      libgl1 \
      software-properties-common \
      openssh-server
#      apt-get clean && rm -rf /var/lib/apt/lists/* && \
#      echo "en_US.UTF-8 UTF-8" > /etc/locale.gen

#RUN apt-key del 7fa2af80 && \
#    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
#
#RUN add-apt-repository ppa:deadsnakes/ppa && \
#    apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \
#    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
#    update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
##    update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
#    update-alternatives --set python3 /usr/bin/python3.10 && \
#    update-alternatives --set python /usr/bin/python3 && \
#    apt-get clean && rm -rf /var/lib/apt/lists/*

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \
    apt install --yes --no-install-recommends \
      python3 python3-dev python3-venv python3-pip
#    apt-get clean && rm -rf /var/lib/apt/lists/*

#ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \
    pip3 install cuda-python==11.8.0 && \
    pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
    pip3 install bitsandbytes && \
    pip3 install safetensors && \
    pip3 install sentencepiece && \
    pip3 install diffusers && \
    pip3 install deepspeed && \
    pip3 install accelerate xformers triton && \
    pip3 install git+https://github.com/huggingface/transformers.git && \
    pip3 install huggingface-hub && \
    pip3 install runpod
#    pip3 cache purge

RUN mkdir -p /workspace
WORKDIR /workspace

#RUN mkdir /workspace && 
RUN --mount=type=cache,target=/root/.cache,sharing=locked \
    cd /workspace && git clone --depth 1 https://github.com/oobabooga/text-generation-webui.git && \
    cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \
    cd extensions/api && pip3 install -r requirements.txt
#  pip3 cache purge

RUN --mount=type=cache,target=/root/.cache,sharing=locked \
    cd /workspace/text-generation-webui/ && mkdir repositories && cd repositories && \
    # Triton
    git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \
    git reset --hard 508de42ff45ec560a4504e12b0d42114d599cf38 && \
    (cd GPTQ-for-LLaMa && pip3 install -r requirements.txt && rm -rf .git)
    # Cuda
#    git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \
#    (cd GPTQ-for-LLaMa && python3 setup_cuda.py install && python3 setup_cuda.py bdist_wheel -d .)
    # Cuda (Oobabooga's fork)
#    git clone --branch cuda --single-branch --depth 1 https://github.com/oobabooga/GPTQ-for-LLaMa.git && \
#    (cd GPTQ-for-LLaMa && python3 setup_cuda.py bdist_wheel -d .)


FROM builder AS modeldownloader

ARG MODEL_NAME="PygmalionAI/pygmalion-350m"
ENV MODEL_NAME=${MODEL_NAME}

RUN cd /workspace/text-generation-webui && python3 download-model.py ${MODEL_NAME}
#RUN git lfs install && \
#    git clone --depth 1 https://huggingface.co/${MODEL_NAME}

#RUN wget -P /workspace/text-generation-webui/models/ https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/v2/20B_tokenizer.json && \
#    wget -P /workspace/text-generation-webui/models/ https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-7B-v9-Eng99%25-Other1%25-20230412-ctx8192.pth

    #https://huggingface.co/BlinkDL/rwkv-4-pile-7b/resolve/main/RWKV-4-Pile-7B-20230406-ctx8192-test949.pth

#RUN cd /workspace/text-generation-webui && python3 download-model.py MetaIX/GPT4-X-Alpaca-30B-Int4 --text-only && \
#    wget -P /workspace/text-generation-webui/models/MetaIX_GPT4-X-Alpaca-30B-Int4/ https://huggingface.co/MetaIX/GPT4-X-Alpaca-30B-Int4/resolve/main/gpt4-x-alpaca-30b-4bit.safetensors

FROM ${BASE_IMAGE}
#ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
RUN mkdir -p /workspace
WORKDIR /workspace

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \
    apt install --yes --no-install-recommends \
      python3 python3-dev python3-venv python3-pip \
      make g++ \
      git
#    apt-get clean && rm -rf /var/lib/apt/lists/*

RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \
    pip3 install cuda-python==11.8.0 && \
    pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \
    pip3 install bitsandbytes && \
    cp /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so && \
    pip3 install safetensors && \
    pip3 install sentencepiece && \
    pip3 install diffusers && \
    pip3 install deepspeed && \
    pip3 install accelerate xformers triton && \
    pip3 install git+https://github.com/huggingface/transformers.git && \
    pip3 install rwkv && \
    pip3 install huggingface-hub && \
    pip3 install runpod
#    pip3 cache purge

#RUN mkdir -p /workspace/text-generation-webui/repositories
COPY --from=builder /workspace/text-generation-webui /workspace/text-generation-webui
COPY --from=modeldownloader /workspace/text-generation-webui/models /workspace/text-generation-webui/models

RUN --mount=type=cache,target=/root/.cache,sharing=locked \
    cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \
    (cd extensions/api && pip3 install -r requirements.txt) && \
    (cd repositories/GPTQ-for-LLaMa && pip3 install -r requirements.txt)
#    pip3 install /workspace/text-generation-webui/repositories/GPTQ-for-LLaMa/*.whl
#    pip3 cache purge

COPY runpod_infer.py /workspace/
COPY start.sh /
RUN chmod +x /start.sh

CMD [ "/start.sh" ]
add runpod oobabooga worker 2 years ago			`ARG DEV_IMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04`
			`ARG BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04`
			`#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04`
			`#ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117`
			`#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3`
			`FROM ${DEV_IMAGE} as builder`

			`# https://github.com/runpod/containers/blob/main/oobabooga/Dockerfile`

oobabooga runpod worker 2 years ago			`# DOCKER_BUILDKIT=1 docker build --build-arg MODEL_NAME="PygmalionAI/pygmalion-350m" -t magn418/runpod-oobabooga-pygmalion:test .`
			`# docker builder prune`
add runpod oobabooga worker 2 years ago
			`WORKDIR /`
			`SHELL ["/bin/bash", "-o", "pipefail", "-c"]`
			`ENV DEBIAN_FRONTEND noninteractive\`
			`SHELL=/bin/bash`

			`RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \`
			`# - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as`
			`# the ubuntu base image is rebuilt too seldom sometimes (less than once a month)`
			`apt-get upgrade --yes && \`
			`apt install --yes --no-install-recommends \`
			`build-essential \`
			`cmake \`
			`ca-certificates \`
			`git \`
			`git-lfs \`
			`wget \`
			`curl \`
			`bash \`
			`# libgl1 \`
			`software-properties-common \`
			`openssh-server`
			`# apt-get clean && rm -rf /var/lib/apt/lists/* && \`
			`# echo "en_US.UTF-8 UTF-8" > /etc/locale.gen`

			`#RUN apt-key del 7fa2af80 && \`
			`# apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub`
			`#`
			`#RUN add-apt-repository ppa:deadsnakes/ppa && \`
			`# apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \`
			`# update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \`
			`# update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \`
			`## update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \`
			`# update-alternatives --set python3 /usr/bin/python3.10 && \`
			`# update-alternatives --set python /usr/bin/python3 && \`
			`# apt-get clean && rm -rf /var/lib/apt/lists/*`

			`RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \`
			`apt install --yes --no-install-recommends \`
			`python3 python3-dev python3-venv python3-pip`
			`# apt-get clean && rm -rf /var/lib/apt/lists/*`

work on remote worker dockerfile 2 years ago			`#ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"`
			`ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"`
add runpod oobabooga worker 2 years ago			`ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}`
			`RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \`
			`pip3 install cuda-python==11.8.0 && \`
			`pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \`
			`pip3 install bitsandbytes && \`
			`pip3 install safetensors && \`
			`pip3 install sentencepiece && \`
			`pip3 install diffusers && \`
add deepspeed 2 years ago			`pip3 install deepspeed && \`
add runpod oobabooga worker 2 years ago			`pip3 install accelerate xformers triton && \`
			`pip3 install git+https://github.com/huggingface/transformers.git && \`
			`pip3 install huggingface-hub && \`
			`pip3 install runpod`
			`# pip3 cache purge`

			`RUN mkdir -p /workspace`
			`WORKDIR /workspace`

			`#RUN mkdir /workspace &&`
fix Dockerfile (again) 2 years ago			`RUN --mount=type=cache,target=/root/.cache,sharing=locked \`
fix Dockerfile 2 years ago			`cd /workspace && git clone --depth 1 https://github.com/oobabooga/text-generation-webui.git && \`
			`cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \`
			`cd extensions/api && pip3 install -r requirements.txt`
add runpod oobabooga worker 2 years ago			`# pip3 cache purge`

fix Dockerfile (again) 2 years ago			`RUN --mount=type=cache,target=/root/.cache,sharing=locked \`
fix Dockerfile 2 years ago			`cd /workspace/text-generation-webui/ && mkdir repositories && cd repositories && \`
work on remote worker dockerfile 2 years ago			`# Triton`
			`git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \`
			`git reset --hard 508de42ff45ec560a4504e12b0d42114d599cf38 && \`
			`(cd GPTQ-for-LLaMa && pip3 install -r requirements.txt && rm -rf .git)`
			`# Cuda`
fix Dockerfile 2 years ago			`# git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \`
work on remote worker dockerfile 2 years ago			`# (cd GPTQ-for-LLaMa && python3 setup_cuda.py install && python3 setup_cuda.py bdist_wheel -d .)`
			`# Cuda (Oobabooga's fork)`
			`# git clone --branch cuda --single-branch --depth 1 https://github.com/oobabooga/GPTQ-for-LLaMa.git && \`
fix Dockerfile 2 years ago			`# (cd GPTQ-for-LLaMa && python3 setup_cuda.py bdist_wheel -d .)`
oobabooga runpod worker 2 years ago

			`FROM builder AS modeldownloader`
fix Dockerfile 2 years ago
			`ARG MODEL_NAME="PygmalionAI/pygmalion-350m"`
			`ENV MODEL_NAME=${MODEL_NAME}`

fix Dockerfile (again) 2 years ago			`RUN cd /workspace/text-generation-webui && python3 download-model.py ${MODEL_NAME}`
add runpod oobabooga worker 2 years ago			`#RUN git lfs install && \`
			`# git clone --depth 1 https://huggingface.co/${MODEL_NAME}`
fix Dockerfile (again) 2 years ago
			`#RUN wget -P /workspace/text-generation-webui/models/ https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/v2/20B_tokenizer.json && \`
			`# wget -P /workspace/text-generation-webui/models/ https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-7B-v9-Eng99%25-Other1%25-20230412-ctx8192.pth`

fix Dockerfile 2 years ago			`#https://huggingface.co/BlinkDL/rwkv-4-pile-7b/resolve/main/RWKV-4-Pile-7B-20230406-ctx8192-test949.pth`
fix Dockerfile (again) 2 years ago
fix Dockerfile 2 years ago			`#RUN cd /workspace/text-generation-webui && python3 download-model.py MetaIX/GPT4-X-Alpaca-30B-Int4 --text-only && \`
work on remote worker dockerfile 2 years ago			`# wget -P /workspace/text-generation-webui/models/MetaIX_GPT4-X-Alpaca-30B-Int4/ https://huggingface.co/MetaIX/GPT4-X-Alpaca-30B-Int4/resolve/main/gpt4-x-alpaca-30b-4bit.safetensors`
oobabooga runpod worker 2 years ago
add runpod oobabooga worker 2 years ago			`FROM ${BASE_IMAGE}`
			`#ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"`
			`RUN mkdir -p /workspace`
			`WORKDIR /workspace`

			`RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \`
			`apt install --yes --no-install-recommends \`
			`python3 python3-dev python3-venv python3-pip \`
			`make g++ \`
			`git`
			`# apt-get clean && rm -rf /var/lib/apt/lists/*`

			`RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \`
			`pip3 install cuda-python==11.8.0 && \`
			`pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \`
			`pip3 install bitsandbytes && \`
			`cp /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so && \`
			`pip3 install safetensors && \`
			`pip3 install sentencepiece && \`
			`pip3 install diffusers && \`
add deepspeed 2 years ago			`pip3 install deepspeed && \`
add runpod oobabooga worker 2 years ago			`pip3 install accelerate xformers triton && \`
			`pip3 install git+https://github.com/huggingface/transformers.git && \`
			`pip3 install rwkv && \`
			`pip3 install huggingface-hub && \`
			`pip3 install runpod`
			`# pip3 cache purge`

			`#RUN mkdir -p /workspace/text-generation-webui/repositories`
			`COPY --from=builder /workspace/text-generation-webui /workspace/text-generation-webui`
oobabooga runpod worker 2 years ago			`COPY --from=modeldownloader /workspace/text-generation-webui/models /workspace/text-generation-webui/models`
add runpod oobabooga worker 2 years ago
			`RUN --mount=type=cache,target=/root/.cache,sharing=locked \`
			`cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \`
			`(cd extensions/api && pip3 install -r requirements.txt) && \`
fix Dockerfile (again) 2 years ago			`(cd repositories/GPTQ-for-LLaMa && pip3 install -r requirements.txt)`
			`# pip3 install /workspace/text-generation-webui/repositories/GPTQ-for-LLaMa/*.whl`
add runpod oobabooga worker 2 years ago			`# pip3 cache purge`

			`COPY runpod_infer.py /workspace/`
			`COPY start.sh /`
			`RUN chmod +x /start.sh`

			`CMD [ "/start.sh" ]`