From bd4fe4bb634c3fb0e0e10a6fead7bae7f6ced188 Mon Sep 17 00:00:00 2001 From: Hendrik Langer Date: Fri, 7 Apr 2023 00:41:05 +0200 Subject: [PATCH] try other quantized model --- runpod/runpod-worker-transformers/Dockerfile | 2 +- runpod/runpod-worker-transformers/runpod_infer.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/runpod/runpod-worker-transformers/Dockerfile b/runpod/runpod-worker-transformers/Dockerfile index c2b7018..8d294cd 100644 --- a/runpod/runpod-worker-transformers/Dockerfile +++ b/runpod/runpod-worker-transformers/Dockerfile @@ -65,7 +65,7 @@ WORKDIR /workspace ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN mkdir repositories && git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa && \ - (cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \ +# (cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \ #RUN mkdir repositories && git clone --depth 1 https://github.com/AlpinDale/gptq-gptj.git repositories/GPTQ-for-LLaMa && \ (cd repositories/GPTQ-for-LLaMa && python3 setup_cuda.py install) diff --git a/runpod/runpod-worker-transformers/runpod_infer.py b/runpod/runpod-worker-transformers/runpod_infer.py index b947436..51c934c 100644 --- a/runpod/runpod-worker-transformers/runpod_infer.py +++ b/runpod/runpod-worker-transformers/runpod_infer.py @@ -255,8 +255,9 @@ if __name__ == "__main__": "mayaeary/pygmalion-6b-4bit-128g", local_files_only=True) elif args.model_name == 'pygmalion-6b-gptq-4bit': - model = AutoModelForCausalLM.from_pretrained( - "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device) + model = load_quantized("pygmalion-6b-gptq-4bit", 4, 128, device).to(device) +# model = AutoModelForCausalLM.from_pretrained( +# "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device) tokenizer = AutoTokenizer.from_pretrained( "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True)