|
@ -7,8 +7,8 @@ import torch |
|
|
import runpod |
|
|
import runpod |
|
|
from runpod.serverless.utils.rp_validator import validate |
|
|
from runpod.serverless.utils.rp_validator import validate |
|
|
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, |
|
|
from transformers import (GPTNeoForCausalLM, GPT2Tokenizer, GPTNeoXForCausalLM, |
|
|
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM, |
|
|
GPTNeoXTokenizerFast, GPTJForCausalLM, AutoTokenizer, AutoModelForCausalLM,) |
|
|
LlamaForCausalLM, LlamaTokenizer) |
|
|
# LlamaForCausalLM, LlamaTokenizer) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
torch.cuda.is_available() |
|
|
torch.cuda.is_available() |
|
@ -164,10 +164,22 @@ if __name__ == "__main__": |
|
|
|
|
|
|
|
|
elif args.model_name == 'pygmalion-6b': |
|
|
elif args.model_name == 'pygmalion-6b': |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
"PygmalionAI/pygmalion-6b", local_files_only=True).to(device) |
|
|
"PygmalionAI/pygmalion-6b", local_files_only=True, low_cpu_mem_usage=True).to(device) |
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
"PygmalionAI/pygmalion-6b", local_files_only=True) |
|
|
"PygmalionAI/pygmalion-6b", local_files_only=True) |
|
|
|
|
|
|
|
|
|
|
|
elif args.model_name == 'pygmalion-6b-4bit-128g': |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
|
|
|
"mayaeary/pygmalion-6b-4bit-128g", local_files_only=True).to(device) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
|
|
|
"mayaeary/pygmalion-6b-4bit-128g", local_files_only=True) |
|
|
|
|
|
|
|
|
|
|
|
elif args.model_name == 'pygmalion-6b-gptq-4bit': |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
|
|
|
"OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
|
|
|
"OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True) |
|
|
|
|
|
|
|
|
elif args.model_name == 'gpt-j-6b': |
|
|
elif args.model_name == 'gpt-j-6b': |
|
|
model = GPTJForCausalLM.from_pretrained( |
|
|
model = GPTJForCausalLM.from_pretrained( |
|
|
"EleutherAI/gpt-j-6B", local_files_only=True, revision="float16", |
|
|
"EleutherAI/gpt-j-6B", local_files_only=True, revision="float16", |
|
@ -194,15 +206,20 @@ if __name__ == "__main__": |
|
|
"KoboldAI/OPT-13B-Erebus", local_files_only=True) |
|
|
"KoboldAI/OPT-13B-Erebus", local_files_only=True) |
|
|
|
|
|
|
|
|
elif args.model_name == 'gpt4-x-alpaca-13b-native-4bit-128g': |
|
|
elif args.model_name == 'gpt4-x-alpaca-13b-native-4bit-128g': |
|
|
model = LlamaForCausalLM.from_pretrained( |
|
|
pass |
|
|
"anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device) |
|
|
# model = LlamaForCausalLM.from_pretrained( |
|
|
tokenizer = LlamaTokenizer.from_pretrained( |
|
|
# "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True).to(device) |
|
|
"anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True) |
|
|
# tokenizer = LlamaTokenizer.from_pretrained( |
|
|
|
|
|
# "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g", local_files_only=True) |
|
|
|
|
|
|
|
|
elif args.model_name == 'gpt4-x-alpaca': |
|
|
elif args.model_name == 'gpt4-x-alpaca': |
|
|
model = LlamaForCausalLM.from_pretrained( |
|
|
pass |
|
|
"chavinlo/gpt4-x-alpaca", local_files_only=True).to(device) |
|
|
# model = LlamaForCausalLM.from_pretrained( |
|
|
tokenizer = LlamaTokenizer.from_pretrained( |
|
|
# "chavinlo/gpt4-x-alpaca", local_files_only=True).to(device) |
|
|
"chavinlo/gpt4-x-alpaca", local_files_only=True) |
|
|
# tokenizer = LlamaTokenizer.from_pretrained( |
|
|
|
|
|
# "chavinlo/gpt4-x-alpaca", local_files_only=True) |
|
|
|
|
|
|
|
|
|
|
|
elif args.model_name == 'rwkv-4-raven-7b': |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
runpod.serverless.start({"handler": generator}) |
|
|
runpod.serverless.start({"handler": generator}) |
|
|