Hendrik Langer
2 years ago
9 changed files with 326 additions and 7 deletions
@ -0,0 +1,46 @@ |
|||
ARG BASE_IMAGE=nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 |
|||
FROM ${BASE_IMAGE} as dev-base |
|||
|
|||
ARG MODEL_NAME |
|||
ENV MODEL_NAME=${MODEL_NAME} |
|||
|
|||
WORKDIR /src |
|||
|
|||
SHELL ["/bin/bash", "-o", "pipefail", "-c"] |
|||
ENV DEBIAN_FRONTEND noninteractive\ |
|||
SHELL=/bin/bash |
|||
RUN apt-key del 7fa2af80 |
|||
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub |
|||
RUN apt-get update --yes && \ |
|||
apt-get upgrade --yes && \ |
|||
apt install --yes --no-install-recommends\ |
|||
wget\ |
|||
bash\ |
|||
openssh-server &&\ |
|||
apt-get clean && rm -rf /var/lib/apt/lists/* && \ |
|||
echo "en_US.UTF-8 UTF-8" > /etc/locale.gen |
|||
|
|||
RUN apt-get update && apt-get install -y --no-install-recommends |
|||
RUN apt-get install software-properties-common -y |
|||
RUN add-apt-repository ppa:deadsnakes/ppa |
|||
RUN apt-get install python3.8 -y |
|||
RUN apt-get install python3-pip -y |
|||
RUN apt-get install python3.8-distutils -y |
|||
|
|||
RUN apt-get install python3.8-dev -y |
|||
RUN apt-get install python3.8-venv -y |
|||
RUN python3.8 -m venv /venv |
|||
ENV PATH=/venv/bin:$PATH |
|||
|
|||
RUN python3.8 -m pip install --upgrade pip==23.0.1 |
|||
RUN python3.8 -m pip install runpod==0.9.1 |
|||
RUN python3.8 -m pip install torch==2.0.0 |
|||
RUN python3.8 -m pip install transformers==4.27.2 |
|||
|
|||
COPY model_fetcher.py /src/model_fetcher.py |
|||
RUN python3.8 model_fetcher.py --model_name=${MODEL_NAME} |
|||
|
|||
COPY runpod_infer.py /src/runpod_infer.py |
|||
COPY test_input.json /src/test_input.json |
|||
|
|||
CMD python3.8 -u runpod_infer.py --model_name=${MODEL_NAME} |
@ -0,0 +1,13 @@ |
|||
## Building |
|||
|
|||
Select one of the following models to build: |
|||
|
|||
- `gpt-neo-1.3B` |
|||
- `gpt-neo-2.7B` |
|||
- `gpt-neox-20b` |
|||
- `pygmalion-6b` |
|||
- `gpt-j-6b` |
|||
|
|||
```BASH |
|||
docker build --build-arg MODEL_NAME={model name} -t repo/image_name:tag . |
|||
``` |
@ -0,0 +1,61 @@ |
|||
''' |
|||
RunPod | Transformer | Model Fetcher |
|||
''' |
|||
|
|||
import argparse |
|||
|
|||
import torch |
|||
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, |
|||
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) |
|||
|
|||
|
|||
def download_model(model_name): |
|||
|
|||
# --------------------------------- Neo 1.3B --------------------------------- # |
|||
if model_name == 'gpt-neo-1.3B': |
|||
GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") |
|||
GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") |
|||
|
|||
# --------------------------------- Neo 2.7B --------------------------------- # |
|||
elif model_name == 'gpt-neo-2.7B': |
|||
GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16) |
|||
GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B") |
|||
|
|||
# ----------------------------------- NeoX ----------------------------------- # |
|||
elif model_name == 'gpt-neox-20b': |
|||
GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half() |
|||
GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b") |
|||
|
|||
# --------------------------------- Pygmalion -------------------------------- # |
|||
elif model_name == 'pygmalion-6b': |
|||
AutoModelForCausalLM.from_pretrained("PygmalionAI/pygmalion-6b") |
|||
AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b") |
|||
|
|||
# ----------------------------------- GPT-J ----------------------------------- # |
|||
elif model_name == 'gpt-j-6b': |
|||
GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", |
|||
torch_dtype=torch.float16) |
|||
AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") |
|||
|
|||
# ------------------------------ PPO Shygmalion 6B ----------------------------- # |
|||
elif model_name == 'shygmalion-6b': |
|||
AutoModelForCausalLM.from_pretrained("TehVenom/PPO_Shygmalion-6b") |
|||
AutoTokenizer.from_pretrained("TehVenom/PPO_Shygmalion-6b") |
|||
|
|||
# ------------------------------ Erebus 13B (NSFW) ----------------------------- # |
|||
elif model_name == 'erebus-13b': |
|||
AutoModelForCausalLM.from_pretrained("KoboldAI/OPT-13B-Erebus") |
|||
AutoTokenizer.from_pretrained("KoboldAI/OPT-13B-Erebus") |
|||
|
|||
|
|||
# ---------------------------------------------------------------------------- # |
|||
# Parse Arguments # |
|||
# ---------------------------------------------------------------------------- # |
|||
parser = argparse.ArgumentParser(description=__doc__) |
|||
parser.add_argument("--model_name", type=str, |
|||
default="gpt-neo-1.3B", help="URL of the model to download.") |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
args = parser.parse_args() |
|||
download_model(args.model_name) |
@ -0,0 +1,189 @@ |
|||
''' |
|||
RunPod | Transformer | Handler |
|||
''' |
|||
import argparse |
|||
|
|||
import torch |
|||
import runpod |
|||
from runpod.serverless.utils.rp_validator import validate |
|||
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, |
|||
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM) |
|||
|
|||
|
|||
torch.cuda.is_available() |
|||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|||
|
|||
INPUT_SCHEMA = { |
|||
'prompt': { |
|||
'type': str, |
|||
'required': True |
|||
}, |
|||
'do_sample': { |
|||
'type': bool, |
|||
'required': False, |
|||
'default': True, |
|||
'description': ''' |
|||
Enables decoding strategies such as multinomial sampling, |
|||
beam-search multinomial sampling, Top-K sampling and Top-p sampling. |
|||
All these strategies select the next token from the probability distribution |
|||
over the entire vocabulary with various strategy-specific adjustments. |
|||
''' |
|||
}, |
|||
'max_length': { |
|||
'type': int, |
|||
'required': False, |
|||
'default': 100 |
|||
}, |
|||
'temperature': { |
|||
'type': float, |
|||
'required': False, |
|||
'default': 0.9 |
|||
}, |
|||
'repetition_penalty': { |
|||
'type': float, |
|||
'required': False, |
|||
'default': 1.1 |
|||
}, |
|||
'top_p': { |
|||
'type': float, |
|||
'required': False, |
|||
'default': 0.5 |
|||
}, |
|||
'top_k': { |
|||
'type': int, |
|||
'required': False, |
|||
'default': 40 |
|||
}, |
|||
'typical_p': { |
|||
'type': float, |
|||
'required': False, |
|||
'default': 1.0 |
|||
}, |
|||
'encoder_repetition_penalty': { |
|||
'type': float, |
|||
'required': False, |
|||
'default': 1.0 |
|||
}, |
|||
'min_length': { |
|||
'type': int, |
|||
'required': False, |
|||
'default': 0 |
|||
}, |
|||
'num_beams': { |
|||
'type': int, |
|||
'required': False, |
|||
'default': 1 |
|||
}, |
|||
'early_stopping': { |
|||
'type': bool, |
|||
'required': False, |
|||
'default': False |
|||
}, |
|||
'penalty_alpha': { |
|||
'type': float, |
|||
'required': False, |
|||
'default': 0.0 |
|||
}, |
|||
'length_penalty': { |
|||
'type': float, |
|||
'required': False, |
|||
'default': 1.0 |
|||
}, |
|||
'no_repeat_ngram_size': { |
|||
'type': int, |
|||
'required': False, |
|||
'default': 0 |
|||
}, |
|||
|
|||
} |
|||
|
|||
|
|||
def generator(job): |
|||
''' |
|||
Run the job input to generate text output. |
|||
''' |
|||
# Validate the input |
|||
val_input = validate(job['input'], INPUT_SCHEMA) |
|||
if 'errors' in val_input: |
|||
return {"error": val_input['errors']} |
|||
val_input = val_input['validated_input'] |
|||
|
|||
input_ids = tokenizer(val_input['prompt'], return_tensors="pt").input_ids.to(device) |
|||
|
|||
gen_tokens = model.generate( |
|||
input_ids, |
|||
do_sample=val_input['do_sample'], |
|||
temperature=val_input['temperature'], |
|||
max_length=val_input['max_length'], |
|||
repetition_penalty=val_input['repetition_penalty'], |
|||
top_p=val_input['top_p'], |
|||
top_k=val_input['top_k'], |
|||
typical_p=val_input['typical_p'], |
|||
encoder_repetition_penalty=val_input['encoder_repetition_penalty'], |
|||
min_length=val_input['min_length'], |
|||
num_beams=val_input['num_beams'], |
|||
early_stopping=val_input['early_stopping'], |
|||
penalty_alpha=val_input['penalty_alpha'], |
|||
length_penalty=val_input['length_penalty'], |
|||
no_repeat_ngram_size=val_input['no_repeat_ngram_size'], |
|||
).to(device) |
|||
|
|||
gen_text = tokenizer.batch_decode(gen_tokens)[0] |
|||
|
|||
return gen_text |
|||
|
|||
|
|||
# ---------------------------------------------------------------------------- # |
|||
# Parse Arguments # |
|||
# ---------------------------------------------------------------------------- # |
|||
parser = argparse.ArgumentParser(description=__doc__) |
|||
parser.add_argument("--model_name", type=str, |
|||
default="gpt-neo-1.3B", help="URL of the model to download.") |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
args = parser.parse_args() |
|||
|
|||
# --------------------------------- Neo 1.3B --------------------------------- # |
|||
if args.model_name == 'gpt-neo-1.3B': |
|||
model = GPTNeoForCausalLM.from_pretrained( |
|||
"EleutherAI/gpt-neo-1.3B", local_files_only=True).to(device) |
|||
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B", local_files_only=True) |
|||
|
|||
elif args.model_name == 'gpt-neo-2.7B': |
|||
model = GPTNeoForCausalLM.from_pretrained( |
|||
"EleutherAI/gpt-neo-2.7B", local_files_only=True, torch_dtype=torch.float16).to(device) |
|||
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", local_files_only=True) |
|||
|
|||
elif args.model_name == 'gpt-neox-20b': |
|||
model = GPTNeoXForCausalLM.from_pretrained( |
|||
"EleutherAI/gpt-neox-20b", local_files_only=True).half().to(device) |
|||
tokenizer = GPTNeoXTokenizerFast.from_pretrained( |
|||
"EleutherAI/gpt-neox-20b", local_files_only=True) |
|||
|
|||
elif args.model_name == 'pygmalion-6b': |
|||
model = AutoModelForCausalLM.from_pretrained( |
|||
"PygmalionAI/pygmalion-6b", local_files_only=True).to(device) |
|||
tokenizer = AutoTokenizer.from_pretrained( |
|||
"PygmalionAI/pygmalion-6b", local_files_only=True) |
|||
|
|||
elif args.model_name == 'gpt-j-6b': |
|||
model = GPTJForCausalLM.from_pretrained( |
|||
"EleutherAI/gpt-j-6B", local_files_only=True, revision="float16", |
|||
torch_dtype=torch.float16).to(device) |
|||
tokenizer = AutoTokenizer.from_pretrained( |
|||
"EleutherAI/gpt-j-6B", local_files_only=True) |
|||
|
|||
elif args.model_name == 'shygmalion-6b': |
|||
model = AutoModelForCausalLM.from_pretrained( |
|||
"TehVenom/PPO_Shygmalion-6b", local_files_only=True).to(device) |
|||
tokenizer = AutoTokenizer.from_pretrained( |
|||
"TehVenom/PPO_Shygmalion-6b", local_files_only=True) |
|||
|
|||
elif args.model_name == 'erebus-13b': |
|||
model = AutoModelForCausalLM.from_pretrained( |
|||
"KoboldAI/OPT-13B-Erebus", local_files_only=True).to(device) |
|||
tokenizer = AutoTokenizer.from_pretrained( |
|||
"KoboldAI/OPT-13B-Erebus", local_files_only=True) |
|||
|
|||
runpod.serverless.start({"handler": generator}) |
@ -0,0 +1,5 @@ |
|||
{ |
|||
"input": { |
|||
"prompt": "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI." |
|||
} |
|||
} |
Loading…
Reference in new issue