diff --git a/runpod/runpod-worker-transformers/Dockerfile b/runpod/runpod-worker-transformers/Dockerfile index c2b7018..8d294cd 100644 --- a/runpod/runpod-worker-transformers/Dockerfile +++ b/runpod/runpod-worker-transformers/Dockerfile @@ -65,7 +65,7 @@ WORKDIR /workspace ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN mkdir repositories && git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa && \ - (cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \ +# (cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \ #RUN mkdir repositories && git clone --depth 1 https://github.com/AlpinDale/gptq-gptj.git repositories/GPTQ-for-LLaMa && \ (cd repositories/GPTQ-for-LLaMa && python3 setup_cuda.py install) diff --git a/runpod/runpod-worker-transformers/runpod_infer.py b/runpod/runpod-worker-transformers/runpod_infer.py index b947436..51c934c 100644 --- a/runpod/runpod-worker-transformers/runpod_infer.py +++ b/runpod/runpod-worker-transformers/runpod_infer.py @@ -255,8 +255,9 @@ if __name__ == "__main__": "mayaeary/pygmalion-6b-4bit-128g", local_files_only=True) elif args.model_name == 'pygmalion-6b-gptq-4bit': - model = AutoModelForCausalLM.from_pretrained( - "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device) + model = load_quantized("pygmalion-6b-gptq-4bit", 4, 128, device).to(device) +# model = AutoModelForCausalLM.from_pretrained( +# "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device) tokenizer = AutoTokenizer.from_pretrained( "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True)