Hendrik Langer
2 years ago
5 changed files with 209 additions and 0 deletions
@ -0,0 +1,131 @@ |
|||||
|
ARG DEV_IMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 |
||||
|
ARG BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 |
||||
|
#ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 |
||||
|
#ARG BASE_IMAGE=runpod/pytorch:3.10-2.0.0-117 |
||||
|
#ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.03-py3 |
||||
|
FROM ${DEV_IMAGE} as builder |
||||
|
|
||||
|
# https://github.com/runpod/containers/blob/main/oobabooga/Dockerfile |
||||
|
|
||||
|
# run; DOCKER_BUILDKIT=1 docker build --build-arg MODEL_NAME="PygmalionAI/pygmalion-350m" -t magn418/runpod-oobabooga-pygmalion:test . |
||||
|
|
||||
|
|
||||
|
ARG MODEL_NAME="PygmalionAI/pygmalion-350m" |
||||
|
ENV MODEL_NAME=${MODEL_NAME} |
||||
|
|
||||
|
WORKDIR / |
||||
|
SHELL ["/bin/bash", "-o", "pipefail", "-c"] |
||||
|
ENV DEBIAN_FRONTEND noninteractive\ |
||||
|
SHELL=/bin/bash |
||||
|
|
||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \ |
||||
|
# - apt-get upgrade is run to patch known vulnerabilities in apt-get packages as |
||||
|
# the ubuntu base image is rebuilt too seldom sometimes (less than once a month) |
||||
|
apt-get upgrade --yes && \ |
||||
|
apt install --yes --no-install-recommends \ |
||||
|
build-essential \ |
||||
|
cmake \ |
||||
|
ca-certificates \ |
||||
|
git \ |
||||
|
git-lfs \ |
||||
|
wget \ |
||||
|
curl \ |
||||
|
bash \ |
||||
|
# libgl1 \ |
||||
|
software-properties-common \ |
||||
|
openssh-server |
||||
|
# apt-get clean && rm -rf /var/lib/apt/lists/* && \ |
||||
|
# echo "en_US.UTF-8 UTF-8" > /etc/locale.gen |
||||
|
|
||||
|
#RUN apt-key del 7fa2af80 && \ |
||||
|
# apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub |
||||
|
# |
||||
|
#RUN add-apt-repository ppa:deadsnakes/ppa && \ |
||||
|
# apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \ |
||||
|
# update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \ |
||||
|
# update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \ |
||||
|
## update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \ |
||||
|
# update-alternatives --set python3 /usr/bin/python3.10 && \ |
||||
|
# update-alternatives --set python /usr/bin/python3 && \ |
||||
|
# apt-get clean && rm -rf /var/lib/apt/lists/* |
||||
|
|
||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \ |
||||
|
apt install --yes --no-install-recommends \ |
||||
|
python3 python3-dev python3-venv python3-pip |
||||
|
# apt-get clean && rm -rf /var/lib/apt/lists/* |
||||
|
|
||||
|
ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" |
||||
|
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} |
||||
|
RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \ |
||||
|
pip3 install cuda-python==11.8.0 && \ |
||||
|
pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ |
||||
|
pip3 install bitsandbytes && \ |
||||
|
pip3 install safetensors && \ |
||||
|
pip3 install sentencepiece && \ |
||||
|
pip3 install diffusers && \ |
||||
|
pip3 install accelerate xformers triton && \ |
||||
|
pip3 install git+https://github.com/huggingface/transformers.git && \ |
||||
|
pip3 install huggingface-hub && \ |
||||
|
pip3 install runpod |
||||
|
# pip3 cache purge |
||||
|
|
||||
|
RUN mkdir -p /workspace |
||||
|
WORKDIR /workspace |
||||
|
|
||||
|
#RUN mkdir /workspace && |
||||
|
RUN cd /workspace && git clone https://github.com/oobabooga/text-generation-webui.git && \ |
||||
|
cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \ |
||||
|
cd extensions/api && pip3 install -r requirements.txt |
||||
|
# pip3 cache purge |
||||
|
|
||||
|
RUN cd /workspace/text-generation-webui/ && mkdir repositories && cd repositories && \ |
||||
|
# https://github.com/oobabooga/GPTQ-for-LLaMa |
||||
|
git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git && \ |
||||
|
(cd GPTQ-for-LLaMa && python3 setup_cuda.py bdist_wheel -d .) |
||||
|
# && python3 setup_cuda.py install |
||||
|
RUN cd /workspace/text-generation-webui && python3 download-model.py ${MODEL_NAME} |
||||
|
#RUN git lfs install && \ |
||||
|
# git clone --depth 1 https://huggingface.co/${MODEL_NAME} |
||||
|
|
||||
|
FROM ${BASE_IMAGE} |
||||
|
#ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" |
||||
|
RUN mkdir -p /workspace |
||||
|
WORKDIR /workspace |
||||
|
|
||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update --yes && \ |
||||
|
apt install --yes --no-install-recommends \ |
||||
|
python3 python3-dev python3-venv python3-pip \ |
||||
|
make g++ \ |
||||
|
git |
||||
|
# apt-get clean && rm -rf /var/lib/apt/lists/* |
||||
|
|
||||
|
RUN --mount=type=cache,target=/root/.cache,sharing=locked pip3 install --upgrade pip setuptools && \ |
||||
|
pip3 install cuda-python==11.8.0 && \ |
||||
|
pip3 install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu118 && \ |
||||
|
pip3 install bitsandbytes && \ |
||||
|
cp /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so && \ |
||||
|
pip3 install safetensors && \ |
||||
|
pip3 install sentencepiece && \ |
||||
|
pip3 install diffusers && \ |
||||
|
pip3 install accelerate xformers triton && \ |
||||
|
pip3 install git+https://github.com/huggingface/transformers.git && \ |
||||
|
pip3 install rwkv && \ |
||||
|
pip3 install huggingface-hub && \ |
||||
|
pip3 install runpod |
||||
|
# pip3 cache purge |
||||
|
|
||||
|
#RUN mkdir -p /workspace/text-generation-webui/repositories |
||||
|
COPY --from=builder /workspace/text-generation-webui /workspace/text-generation-webui |
||||
|
|
||||
|
RUN --mount=type=cache,target=/root/.cache,sharing=locked \ |
||||
|
cd /workspace/text-generation-webui && pip3 install -r requirements.txt && \ |
||||
|
(cd extensions/api && pip3 install -r requirements.txt) && \ |
||||
|
(cd repositories/GPTQ-for-LLaMa && pip3 install -r requirements.txt) && \ |
||||
|
pip3 install /workspace/text-generation-webui/repositories/GPTQ-for-LLaMa/*.whl |
||||
|
# pip3 cache purge |
||||
|
|
||||
|
COPY runpod_infer.py /workspace/ |
||||
|
COPY start.sh / |
||||
|
RUN chmod +x /start.sh |
||||
|
|
||||
|
CMD [ "/start.sh" ] |
@ -0,0 +1,24 @@ |
|||||
|
## Building |
||||
|
|
||||
|
Select one of the following models to build: |
||||
|
|
||||
|
- `gpt-neo-1.3B` |
||||
|
- `gpt-neo-2.7B` |
||||
|
- `gpt-neox-20b` |
||||
|
- `pygmalion-6b` |
||||
|
- `gpt-j-6b` |
||||
|
|
||||
|
```BASH |
||||
|
docker build --build-arg MODEL_NAME={model name} -t repo/image_name:tag . |
||||
|
``` |
||||
|
|
||||
|
```BASH |
||||
|
pip install --upgrade pip |
||||
|
git clone https://github.com/AlpinDale/gptq-gptj.git |
||||
|
cd gptq-gptj |
||||
|
pip install -r requirements.txt |
||||
|
CUDA_VISIBLE_DEVICES=0 python gptj.py PygmalionAI/pygmalion-6b c4 --wbits 4 --groupsize 128 --save pygmalion-6b-8bit-128g.pt |
||||
|
CUDA_VISIBLE_DEVICES=0 python gptj.py PygmalionAI/pygmalion-6b c4 --wbits 4 --groupsize 128 --save_safetensors pygmalion-6b-8bit-128g.safetensors |
||||
|
scp -P 22023 pygmalion-6b-8bit-128g.safetensors will@xd0.de:/home/will/ |
||||
|
scp -P 22023 pygmalion-6b-8bit-128g.safetensors will@xd0.de:/home/will/ |
||||
|
``` |
@ -0,0 +1,39 @@ |
|||||
|
import runpod |
||||
|
import subprocess |
||||
|
import requests |
||||
|
import time |
||||
|
|
||||
|
def check_api_availability(host): |
||||
|
while True: |
||||
|
try: |
||||
|
response = requests.get(host) |
||||
|
return |
||||
|
except requests.exceptions.RequestException as e: |
||||
|
print(f"API is not available, retrying in 200ms... ({e})") |
||||
|
except Exception as e: |
||||
|
print('something went wrong') |
||||
|
time.sleep(200/1000) |
||||
|
|
||||
|
check_api_availability("http://127.0.0.1:7860/run/textgen") |
||||
|
|
||||
|
print('run handler') |
||||
|
|
||||
|
def handler(event): |
||||
|
''' |
||||
|
This is the handler function that will be called by the serverless. |
||||
|
''' |
||||
|
print('got event') |
||||
|
print(event) |
||||
|
|
||||
|
response = requests.post(url=f'http://127.0.0.1:7860/run/textgen', json=event["input"]) |
||||
|
|
||||
|
json = response.json() |
||||
|
# do the things |
||||
|
|
||||
|
print(json) |
||||
|
|
||||
|
# return the output that you want to be returned like pre-signed URLs to output artifacts |
||||
|
return json |
||||
|
|
||||
|
|
||||
|
runpod.serverless.start({"handler": handler}) |
@ -0,0 +1,10 @@ |
|||||
|
#!/bin/bash |
||||
|
echo "Container Started" |
||||
|
export PYTHONUNBUFFERED=1 |
||||
|
|
||||
|
cd /workspace/text-generation-webui |
||||
|
echo "starting api" |
||||
|
python3 server.py --listen --no-stream --extensions api & |
||||
|
cd /workspace |
||||
|
echo "starting worker" |
||||
|
python3 -u runpod_infer.py |
@ -0,0 +1,5 @@ |
|||||
|
{ |
||||
|
"input": { |
||||
|
"prompt": "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI." |
||||
|
} |
||||
|
} |
Loading…
Reference in new issue