Hendrik Langer
2 years ago
9 changed files with 326 additions and 7 deletions
@ -0,0 +1,46 @@ |
|||||
|
ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 |
||||
|
FROM ${BASE_IMAGE} as dev-base |
||||
|
|
||||
|
ARG MODEL_NAME |
||||
|
ENV MODEL_NAME=${MODEL_NAME} |
||||
|
|
||||
|
WORKDIR /src |
||||
|
|
||||
|
SHELL ["/bin/bash", "-o", "pipefail", "-c"] |
||||
|
ENV DEBIAN_FRONTEND noninteractive\ |
||||
|
SHELL=/bin/bash |
||||
|
RUN apt-key del 7fa2af80 |
||||
|
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub |
||||
|
RUN apt-get update --yes && \ |
||||
|
apt-get upgrade --yes && \ |
||||
|
apt install --yes --no-install-recommends\ |
||||
|
wget\ |
||||
|
bash\ |
||||
|
openssh-server &&\ |
||||
|
apt-get clean && rm -rf /var/lib/apt/lists/* && \ |
||||
|
echo "en_US.UTF-8 UTF-8" > /etc/locale.gen |
||||
|
|
||||
|
RUN apt-get update && apt-get install -y --no-install-recommends |
||||
|
RUN apt-get install software-properties-common -y |
||||
|
RUN add-apt-repository ppa:deadsnakes/ppa |
||||
|
RUN apt-get install python3.8 -y |
||||
|
RUN apt-get install python3-pip -y |
||||
|
RUN apt-get install python3.8-distutils -y |
||||
|
|
||||
|
RUN apt-get install python3.8-dev -y |
||||
|
RUN apt-get install python3.8-venv -y |
||||
|
RUN python3.8 -m venv /venv |
||||
|
ENV PATH=/venv/bin:$PATH |
||||
|
|
||||
|
RUN python3.8 -m pip install --upgrade pip==23.0.1 |
||||
|
RUN python3.8 -m pip install runpod==0.9.1 |
||||
|
RUN python3.8 -m pip install torch==2.0.0 |
||||
|
RUN python3.8 -m pip install transformers==4.27.2 |
||||
|
|
||||
|
COPY model_fetcher.py /src/model_fetcher.py |
||||
|
RUN python3.8 model_fetcher.py --model_name=${MODEL_NAME} |
||||
|
|
||||
|
COPY runpod_infer.py /src/runpod_infer.py |
||||
|
COPY test_input.json /src/test_input.json |
||||
|
|
||||
|
CMD python3.8 -u runpod_infer.py --model_name=${MODEL_NAME} |
@ -0,0 +1,13 @@ |
|||||
|
## Building |
||||
|
|
||||
|
Select one of the following models to build: |
||||
|
|
||||
|
- `gpt-neo-1.3B` |
||||
|
- `gpt-neo-2.7B` |
||||
|
- `gpt-neox-20b` |
||||
|
- `pygmalion-6b` |
||||
|
- `gpt-j-6b` |
||||
|
|
||||
|
```BASH |
||||
|
docker build --build-arg MODEL_NAME={model name} -t repo/image_name:tag . |
||||
|
``` |
@ -0,0 +1,61 @@ |
|||||
|
''' |
||||
|
RunPod | Transformer | Model Fetcher |
||||
|
''' |
||||
|
|
||||
|
import argparse |
||||
|
|
||||
|
import torch |
||||
|
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, |
||||
|
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) |
||||
|
|
||||
|
|
||||
|
def download_model(model_name): |
||||
|
|
||||
|
# --------------------------------- Neo 1.3B --------------------------------- # |
||||
|
if model_name == 'gpt-neo-1.3B': |
||||
|
GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") |
||||
|
GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") |
||||
|
|
||||
|
# --------------------------------- Neo 2.7B --------------------------------- # |
||||
|
elif model_name == 'gpt-neo-2.7B': |
||||
|
GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16) |
||||
|
GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B") |
||||
|
|
||||
|
# ----------------------------------- NeoX ----------------------------------- # |
||||
|
elif model_name == 'gpt-neox-20b': |
||||
|
GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half() |
||||
|
GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b") |
||||
|
|
||||
|
# --------------------------------- Pygmalion -------------------------------- # |
||||
|
elif model_name == 'pygmalion-6b': |
||||
|
AutoModelForCausalLM.from_pretrained("PygmalionAI/pygmalion-6b") |
||||
|
AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b") |
||||
|
|
||||
|
# ----------------------------------- GPT-J ----------------------------------- # |
||||
|
elif model_name == 'gpt-j-6b': |
||||
|
GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", |
||||
|
torch_dtype=torch.float16) |
||||
|
AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") |
||||
|
|
||||
|
# ------------------------------ PPO Shygmalion 6B ----------------------------- # |
||||
|
elif model_name == 'shygmalion-6b': |
||||
|
AutoModelForCausalLM.from_pretrained("TehVenom/PPO_Shygmalion-6b") |
||||
|
AutoTokenizer.from_pretrained("TehVenom/PPO_Shygmalion-6b") |
||||
|
|
||||
|
# ------------------------------ Erebus 13B (NSFW) ----------------------------- # |
||||
|
elif model_name == 'erebus-13b': |
||||
|
AutoModelForCausalLM.from_pretrained("KoboldAI/OPT-13B-Erebus") |
||||
|
AutoTokenizer.from_pretrained("KoboldAI/OPT-13B-Erebus") |
||||
|
|
||||
|
|
||||
|
# ---------------------------------------------------------------------------- # |
||||
|
# Parse Arguments # |
||||
|
# ---------------------------------------------------------------------------- # |
||||
|
parser = argparse.ArgumentParser(description=__doc__) |
||||
|
parser.add_argument("--model_name", type=str, |
||||
|
default="gpt-neo-1.3B", help="URL of the model to download.") |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
args = parser.parse_args() |
||||
|
download_model(args.model_name) |
@ -0,0 +1,189 @@ |
|||||
|
''' |
||||
|
RunPod | Transformer | Handler |
||||
|
''' |
||||
|
import argparse |
||||
|
|
||||
|
import torch |
||||
|
import runpod |
||||
|
from runpod.serverless.utils.rp_validator import validate |
||||
|
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, |
||||
|
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) |
||||
|
|
||||
|
|
||||
|
torch.cuda.is_available() |
||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
||||
|
|
||||
|
INPUT_SCHEMA = { |
||||
|
'prompt': { |
||||
|
'type': str, |
||||
|
'required': True |
||||
|
}, |
||||
|
'do_sample': { |
||||
|
'type': bool, |
||||
|
'required': False, |
||||
|
'default': True, |
||||
|
'description': ''' |
||||
|
Enables decoding strategies such as multinomial sampling, |
||||
|
beam-search multinomial sampling, Top-K sampling and Top-p sampling. |
||||
|
All these strategies select the next token from the probability distribution |
||||
|
over the entire vocabulary with various strategy-specific adjustments. |
||||
|
''' |
||||
|
}, |
||||
|
'max_length': { |
||||
|
'type': int, |
||||
|
'required': False, |
||||
|
'default': 100 |
||||
|
}, |
||||
|
'temperature': { |
||||
|
'type': float, |
||||
|
'required': False, |
||||
|
'default': 0.9 |
||||
|
}, |
||||
|
'repetition_penalty': { |
||||
|
'type': float, |
||||
|
'required': False, |
||||
|
'default': 1.1 |
||||
|
}, |
||||
|
'top_p': { |
||||
|
'type': float, |
||||
|
'required': False, |
||||
|
'default': 0.5 |
||||
|
}, |
||||
|
'top_k': { |
||||
|
'type': int, |
||||
|
'required': False, |
||||
|
'default': 40 |
||||
|
}, |
||||
|
'typical_p': { |
||||
|
'type': float, |
||||
|
'required': False, |
||||
|
'default': 1.0 |
||||
|
}, |
||||
|
'encoder_repetition_penalty': { |
||||
|
'type': float, |
||||
|
'required': False, |
||||
|
'default': 1.0 |
||||
|
}, |
||||
|
'min_length': { |
||||
|
'type': int, |
||||
|
'required': False, |
||||
|
'default': 0 |
||||
|
}, |
||||
|
'num_beams': { |
||||
|
'type': int, |
||||
|
'required': False, |
||||
|
'default': 1 |
||||
|
}, |
||||
|
'early_stopping': { |
||||
|
'type': bool, |
||||
|
'required': False, |
||||
|
'default': False |
||||
|
}, |
||||
|
'penalty_alpha': { |
||||
|
'type': float, |
||||
|
'required': False, |
||||
|
'default': 0.0 |
||||
|
}, |
||||
|
'length_penalty': { |
||||
|
'type': float, |
||||
|
'required': False, |
||||
|
'default': 1.0 |
||||
|
}, |
||||
|
'no_repeat_ngram_size': { |
||||
|
'type': int, |
||||
|
'required': False, |
||||
|
'default': 0 |
||||
|
}, |
||||
|
|
||||
|
} |
||||
|
|
||||
|
|
||||
|
def generator(job): |
||||
|
''' |
||||
|
Run the job input to generate text output. |
||||
|
''' |
||||
|
# Validate the input |
||||
|
val_input = validate(job['input'], INPUT_SCHEMA) |
||||
|
if 'errors' in val_input: |
||||
|
return {"error": val_input['errors']} |
||||
|
val_input = val_input['validated_input'] |
||||
|
|
||||
|
input_ids = tokenizer(val_input['prompt'], return_tensors="pt").input_ids.to(device) |
||||
|
|
||||
|
gen_tokens = model.generate( |
||||
|
input_ids, |
||||
|
do_sample=val_input['do_sample'], |
||||
|
temperature=val_input['temperature'], |
||||
|
max_length=val_input['max_length'], |
||||
|
repetition_penalty=val_input['repetition_penalty'], |
||||
|
top_p=val_input['top_p'], |
||||
|
top_k=val_input['top_k'], |
||||
|
typical_p=val_input['typical_p'], |
||||
|
encoder_repetition_penalty=val_input['encoder_repetition_penalty'], |
||||
|
min_length=val_input['min_length'], |
||||
|
num_beams=val_input['num_beams'], |
||||
|
early_stopping=val_input['early_stopping'], |
||||
|
penalty_alpha=val_input['penalty_alpha'], |
||||
|
length_penalty=val_input['length_penalty'], |
||||
|
no_repeat_ngram_size=val_input['no_repeat_ngram_size'], |
||||
|
).to(device) |
||||
|
|
||||
|
gen_text = tokenizer.batch_decode(gen_tokens)[0] |
||||
|
|
||||
|
return gen_text |
||||
|
|
||||
|
|
||||
|
# ---------------------------------------------------------------------------- # |
||||
|
# Parse Arguments # |
||||
|
# ---------------------------------------------------------------------------- # |
||||
|
parser = argparse.ArgumentParser(description=__doc__) |
||||
|
parser.add_argument("--model_name", type=str, |
||||
|
default="gpt-neo-1.3B", help="URL of the model to download.") |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
args = parser.parse_args() |
||||
|
|
||||
|
# --------------------------------- Neo 1.3B --------------------------------- # |
||||
|
if args.model_name == 'gpt-neo-1.3B': |
||||
|
model = GPTNeoForCausalLM.from_pretrained( |
||||
|
"EleutherAI/gpt-neo-1.3B", local_files_only=True).to(device) |
||||
|
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B", local_files_only=True) |
||||
|
|
||||
|
elif args.model_name == 'gpt-neo-2.7B': |
||||
|
model = GPTNeoForCausalLM.from_pretrained( |
||||
|
"EleutherAI/gpt-neo-2.7B", local_files_only=True, torch_dtype=torch.float16).to(device) |
||||
|
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", local_files_only=True) |
||||
|
|
||||
|
elif args.model_name == 'gpt-neox-20b': |
||||
|
model = GPTNeoXForCausalLM.from_pretrained( |
||||
|
"EleutherAI/gpt-neox-20b", local_files_only=True).half().to(device) |
||||
|
tokenizer = GPTNeoXTokenizerFast.from_pretrained( |
||||
|
"EleutherAI/gpt-neox-20b", local_files_only=True) |
||||
|
|
||||
|
elif args.model_name == 'pygmalion-6b': |
||||
|
model = AutoModelForCausalLM.from_pretrained( |
||||
|
"PygmalionAI/pygmalion-6b", local_files_only=True).to(device) |
||||
|
tokenizer = AutoTokenizer.from_pretrained( |
||||
|
"PygmalionAI/pygmalion-6b", local_files_only=True) |
||||
|
|
||||
|
elif args.model_name == 'gpt-j-6b': |
||||
|
model = GPTJForCausalLM.from_pretrained( |
||||
|
"EleutherAI/gpt-j-6B", local_files_only=True, revision="float16", |
||||
|
torch_dtype=torch.float16).to(device) |
||||
|
tokenizer = AutoTokenizer.from_pretrained( |
||||
|
"EleutherAI/gpt-j-6B", local_files_only=True) |
||||
|
|
||||
|
elif args.model_name == 'shygmalion-6b': |
||||
|
model = AutoModelForCausalLM.from_pretrained( |
||||
|
"TehVenom/PPO_Shygmalion-6b", local_files_only=True).to(device) |
||||
|
tokenizer = AutoTokenizer.from_pretrained( |
||||
|
"TehVenom/PPO_Shygmalion-6b", local_files_only=True) |
||||
|
|
||||
|
elif args.model_name == 'erebus-13b': |
||||
|
model = AutoModelForCausalLM.from_pretrained( |
||||
|
"KoboldAI/OPT-13B-Erebus", local_files_only=True).to(device) |
||||
|
tokenizer = AutoTokenizer.from_pretrained( |
||||
|
"KoboldAI/OPT-13B-Erebus", local_files_only=True) |
||||
|
|
||||
|
runpod.serverless.start({"handler": generator}) |
@ -0,0 +1,5 @@ |
|||||
|
{ |
||||
|
"input": { |
||||
|
"prompt": "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI." |
||||
|
} |
||||
|
} |
Loading…
Reference in new issue