From 0004ec8be3ca150ce6d8b79f2991bfe3a9dc65ad Mon Sep 17 00:00:00 2001 From: "Sebastian.W" Date: Wed, 10 Apr 2024 18:36:10 +0800 Subject: [PATCH] fix(autogptq): do not use_triton with qwen-vl (#1985) * Enhance autogptq backend to support VL models * update dependencies for autogptq * remove redundant auto-gptq dependency * Convert base64 to image_url for Qwen-VL model * implemented model inference for qwen-vl * remove user prompt from generated answer * fixed write image error * fixed use_triton issue when loading Qwen-VL model --------- Co-authored-by: Binghua Wu --- backend/python/autogptq/autogptq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py index bbafdd92..c7c35028 100755 --- a/backend/python/autogptq/autogptq.py +++ b/backend/python/autogptq/autogptq.py @@ -39,7 +39,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.model_name = "Qwen-VL-Chat" model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=request.TrustRemoteCode, - use_triton=request.UseTriton, device_map="auto").eval() else: model = AutoGPTQForCausalLM.from_quantized(model_path,