Browse Source

work on remote workers

master
Hendrik Langer 2 years ago
parent
commit
81703cf10c
  1. 5
      runpod/runpod-worker-sd/Dockerfile
  2. 4
      runpod/runpod-worker-sd/README.md
  3. 8
      runpod/runpod-worker-sd/model_fetcher.py
  4. 2
      runpod/runpod-worker-sd/runpod_infer.py
  5. 46
      runpod/runpod-worker-transformers/Dockerfile
  6. 13
      runpod/runpod-worker-transformers/README.md
  7. 61
      runpod/runpod-worker-transformers/model_fetcher.py
  8. 189
      runpod/runpod-worker-transformers/runpod_infer.py
  9. 5
      runpod/runpod-worker-transformers/test_input.json

5
runpod/runpod-worker-sd/Dockerfile

@ -31,11 +31,12 @@ RUN add-apt-repository ppa:deadsnakes/ppa && \
apt-get install python3.10 python3.10-dev python3.10-venv python3-pip -y --no-install-recommends && \
apt-get clean && rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip
RUN pip install --upgrade pip && \
pip install huggingface-hub && \
pip install diffusers && \
pip install safetensors && \
pip install torch torchvision torchaudio --extra-index-url=https://download.pytorch.org/whl/cu116 && \
pip install bitsandbytes
pip install bitsandbytes && \
pip install transformers accelerate xformers triton && \
pip install runpod

4
runpod/runpod-worker-sd/README.md

@ -1,3 +1,5 @@
# NOT (YET) WORKING
git clone https://github.com/runpod/serverless-ckpt-template.git
cd serverless-ckpt-template
@ -8,5 +10,5 @@ docker push magn418/runpod-hassan:1.5
Models:
PFG https://civitai.com/models/1227/pfg
hassanblend https://civitai.com/models/1173/hassanblend-15-and-previous-versions
Deliberate
Deliberate https://civitai.com/models/4823
Anything v3 ?

8
runpod/runpod-worker-sd/model_fetcher.py

@ -37,10 +37,12 @@ def download_model(model_url: str):
for chunk in downloaded_model.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
os.system("wget -q https://raw.githubusercontent.com/huggingface/diffusers/main/scripts/convert_original_stable_diffusion_to_diffusers.py")
os.system(f"python3 convert_original_stable_diffusion_to_diffusers.py --from_safetensors --checkpoint_path model.safetensors --dump_path {MODEL_CACHE_DIR}/pt")
os.system("wget -q https://raw.githubusercontent.com/huggingface/diffusers/v0.14.0/scripts/convert_original_stable_diffusion_to_diffusers.py")
os.system(f"pip install omegaconf")
os.system(f"python3 convert_original_stable_diffusion_to_diffusers.py --from_safetensors --checkpoint_path model.safetensors --dump_path {MODEL_CACHE_DIR}/model")
os.system(f"rm model.safetensors")
#os.system(f"python3 convert_original_stable_diffusion_to_diffusers.py --checkpoint_path model.ckpt --dump_path pt")
model_id = "pt"
model_id = "./{MODEL_CACHE_DIR}/model"
saftey_checker = StableDiffusionSafetyChecker.from_pretrained(
SAFETY_MODEL_ID,

2
runpod/runpod-worker-sd/runpod_infer.py

@ -149,7 +149,7 @@ if __name__ == "__main__":
url_parts = args.model_url.split("/")
model_id = f"{url_parts[-2]}/{url_parts[-1]}"
else:
model_id = f"model.safetensors"
model_id = f"./{MODEL_CACHE_DIR}/model"
MODEL = predict.Predictor(model_id)
MODEL.setup()

46
runpod/runpod-worker-transformers/Dockerfile

@ -0,0 +1,46 @@
ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
FROM ${BASE_IMAGE} as dev-base
ARG MODEL_NAME
ENV MODEL_NAME=${MODEL_NAME}
WORKDIR /src
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND noninteractive\
SHELL=/bin/bash
RUN apt-key del 7fa2af80
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
RUN apt-get update --yes && \
apt-get upgrade --yes && \
apt install --yes --no-install-recommends\
wget\
bash\
openssh-server &&\
apt-get clean && rm -rf /var/lib/apt/lists/* && \
echo "en_US.UTF-8 UTF-8" > /etc/locale.gen
RUN apt-get update && apt-get install -y --no-install-recommends
RUN apt-get install software-properties-common -y
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install python3.8 -y
RUN apt-get install python3-pip -y
RUN apt-get install python3.8-distutils -y
RUN apt-get install python3.8-dev -y
RUN apt-get install python3.8-venv -y
RUN python3.8 -m venv /venv
ENV PATH=/venv/bin:$PATH
RUN python3.8 -m pip install --upgrade pip==23.0.1
RUN python3.8 -m pip install runpod==0.9.1
RUN python3.8 -m pip install torch==2.0.0
RUN python3.8 -m pip install transformers==4.27.2
COPY model_fetcher.py /src/model_fetcher.py
RUN python3.8 model_fetcher.py --model_name=${MODEL_NAME}
COPY runpod_infer.py /src/runpod_infer.py
COPY test_input.json /src/test_input.json
CMD python3.8 -u runpod_infer.py --model_name=${MODEL_NAME}

13
runpod/runpod-worker-transformers/README.md

@ -0,0 +1,13 @@
## Building
Select one of the following models to build:
- `gpt-neo-1.3B`
- `gpt-neo-2.7B`
- `gpt-neox-20b`
- `pygmalion-6b`
- `gpt-j-6b`
```BASH
docker build --build-arg MODEL_NAME={model name} -t repo/image_name:tag .
```

61
runpod/runpod-worker-transformers/model_fetcher.py

@ -0,0 +1,61 @@
'''
RunPod | Transformer | Model Fetcher
'''
import argparse
import torch
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM,
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM)
def download_model(model_name):
# --------------------------------- Neo 1.3B --------------------------------- #
if model_name == 'gpt-neo-1.3B':
GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
# --------------------------------- Neo 2.7B --------------------------------- #
elif model_name == 'gpt-neo-2.7B':
GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16)
GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
# ----------------------------------- NeoX ----------------------------------- #
elif model_name == 'gpt-neox-20b':
GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half()
GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
# --------------------------------- Pygmalion -------------------------------- #
elif model_name == 'pygmalion-6b':
AutoModelForCausalLM.from_pretrained("PygmalionAI/pygmalion-6b")
AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b")
# ----------------------------------- GPT-J ----------------------------------- #
elif model_name == 'gpt-j-6b':
GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16",
torch_dtype=torch.float16)
AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
# ------------------------------ PPO Shygmalion 6B ----------------------------- #
elif model_name == 'shygmalion-6b':
AutoModelForCausalLM.from_pretrained("TehVenom/PPO_Shygmalion-6b")
AutoTokenizer.from_pretrained("TehVenom/PPO_Shygmalion-6b")
# ------------------------------ Erebus 13B (NSFW) ----------------------------- #
elif model_name == 'erebus-13b':
AutoModelForCausalLM.from_pretrained("KoboldAI/OPT-13B-Erebus")
AutoTokenizer.from_pretrained("KoboldAI/OPT-13B-Erebus")
# ---------------------------------------------------------------------------- #
# Parse Arguments #
# ---------------------------------------------------------------------------- #
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--model_name", type=str,
default="gpt-neo-1.3B", help="URL of the model to download.")
if __name__ == "__main__":
args = parser.parse_args()
download_model(args.model_name)

189
runpod/runpod-worker-transformers/runpod_infer.py

@ -0,0 +1,189 @@
'''
RunPod | Transformer | Handler
'''
import argparse
import torch
import runpod
from runpod.serverless.utils.rp_validator import validate
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM,
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM)
torch.cuda.is_available()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INPUT_SCHEMA = {
'prompt': {
'type': str,
'required': True
},
'do_sample': {
'type': bool,
'required': False,
'default': True,
'description': '''
Enables decoding strategies such as multinomial sampling,
beam-search multinomial sampling, Top-K sampling and Top-p sampling.
All these strategies select the next token from the probability distribution
over the entire vocabulary with various strategy-specific adjustments.
'''
},
'max_length': {
'type': int,
'required': False,
'default': 100
},
'temperature': {
'type': float,
'required': False,
'default': 0.9
},
'repetition_penalty': {
'type': float,
'required': False,
'default': 1.1
},
'top_p': {
'type': float,
'required': False,
'default': 0.5
},
'top_k': {
'type': int,
'required': False,
'default': 40
},
'typical_p': {
'type': float,
'required': False,
'default': 1.0
},
'encoder_repetition_penalty': {
'type': float,
'required': False,
'default': 1.0
},
'min_length': {
'type': int,
'required': False,
'default': 0
},
'num_beams': {
'type': int,
'required': False,
'default': 1
},
'early_stopping': {
'type': bool,
'required': False,
'default': False
},
'penalty_alpha': {
'type': float,
'required': False,
'default': 0.0
},
'length_penalty': {
'type': float,
'required': False,
'default': 1.0
},
'no_repeat_ngram_size': {
'type': int,
'required': False,
'default': 0
},
}
def generator(job):
'''
Run the job input to generate text output.
'''
# Validate the input
val_input = validate(job['input'], INPUT_SCHEMA)
if 'errors' in val_input:
return {"error": val_input['errors']}
val_input = val_input['validated_input']
input_ids = tokenizer(val_input['prompt'], return_tensors="pt").input_ids.to(device)
gen_tokens = model.generate(
input_ids,
do_sample=val_input['do_sample'],
temperature=val_input['temperature'],
max_length=val_input['max_length'],
repetition_penalty=val_input['repetition_penalty'],
top_p=val_input['top_p'],
top_k=val_input['top_k'],
typical_p=val_input['typical_p'],
encoder_repetition_penalty=val_input['encoder_repetition_penalty'],
min_length=val_input['min_length'],
num_beams=val_input['num_beams'],
early_stopping=val_input['early_stopping'],
penalty_alpha=val_input['penalty_alpha'],
length_penalty=val_input['length_penalty'],
no_repeat_ngram_size=val_input['no_repeat_ngram_size'],
).to(device)
gen_text = tokenizer.batch_decode(gen_tokens)[0]
return gen_text
# ---------------------------------------------------------------------------- #
# Parse Arguments #
# ---------------------------------------------------------------------------- #
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--model_name", type=str,
default="gpt-neo-1.3B", help="URL of the model to download.")
if __name__ == "__main__":
args = parser.parse_args()
# --------------------------------- Neo 1.3B --------------------------------- #
if args.model_name == 'gpt-neo-1.3B':
model = GPTNeoForCausalLM.from_pretrained(
"EleutherAI/gpt-neo-1.3B", local_files_only=True).to(device)
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B", local_files_only=True)
elif args.model_name == 'gpt-neo-2.7B':
model = GPTNeoForCausalLM.from_pretrained(
"EleutherAI/gpt-neo-2.7B", local_files_only=True, torch_dtype=torch.float16).to(device)
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", local_files_only=True)
elif args.model_name == 'gpt-neox-20b':
model = GPTNeoXForCausalLM.from_pretrained(
"EleutherAI/gpt-neox-20b", local_files_only=True).half().to(device)
tokenizer = GPTNeoXTokenizerFast.from_pretrained(
"EleutherAI/gpt-neox-20b", local_files_only=True)
elif args.model_name == 'pygmalion-6b':
model = AutoModelForCausalLM.from_pretrained(
"PygmalionAI/pygmalion-6b", local_files_only=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"PygmalionAI/pygmalion-6b", local_files_only=True)
elif args.model_name == 'gpt-j-6b':
model = GPTJForCausalLM.from_pretrained(
"EleutherAI/gpt-j-6B", local_files_only=True, revision="float16",
torch_dtype=torch.float16).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"EleutherAI/gpt-j-6B", local_files_only=True)
elif args.model_name == 'shygmalion-6b':
model = AutoModelForCausalLM.from_pretrained(
"TehVenom/PPO_Shygmalion-6b", local_files_only=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"TehVenom/PPO_Shygmalion-6b", local_files_only=True)
elif args.model_name == 'erebus-13b':
model = AutoModelForCausalLM.from_pretrained(
"KoboldAI/OPT-13B-Erebus", local_files_only=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"KoboldAI/OPT-13B-Erebus", local_files_only=True)
runpod.serverless.start({"handler": generator})

5
runpod/runpod-worker-transformers/test_input.json

@ -0,0 +1,5 @@
{
"input": {
"prompt": "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
}
}
Loading…
Cancel
Save