Browse Source

try other quantized model

master
Hendrik Langer 2 years ago
parent
commit
bd4fe4bb63
  1. 2
      runpod/runpod-worker-transformers/Dockerfile
  2. 5
      runpod/runpod-worker-transformers/runpod_infer.py

2
runpod/runpod-worker-transformers/Dockerfile

@ -65,7 +65,7 @@ WORKDIR /workspace
ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
RUN mkdir repositories && git clone --branch cuda --single-branch https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa && \
(cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \
# (cd repositories/GPTQ-for-LLaMa && git reset --hard 437154dd434c3f9d5c9c4e6f401d6d71116ac248) && \
#RUN mkdir repositories && git clone --depth 1 https://github.com/AlpinDale/gptq-gptj.git repositories/GPTQ-for-LLaMa && \
(cd repositories/GPTQ-for-LLaMa && python3 setup_cuda.py install)

5
runpod/runpod-worker-transformers/runpod_infer.py

@ -255,8 +255,9 @@ if __name__ == "__main__":
"mayaeary/pygmalion-6b-4bit-128g", local_files_only=True)
elif args.model_name == 'pygmalion-6b-gptq-4bit':
model = AutoModelForCausalLM.from_pretrained(
"OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device)
model = load_quantized("pygmalion-6b-gptq-4bit", 4, 128, device).to(device)
# model = AutoModelForCausalLM.from_pretrained(
# "OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True, from_pt=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"OccamRazor/pygmalion-6b-gptq-4bit", local_files_only=True)

Loading…
Cancel
Save