Spaces:

broadfield-dev
/

qwen3-vl-2b-instruct

Paused

App Files Files Community

broadfield-dev commited on Oct 21

Commit

0c9364d

verified ·

1 Parent(s): c2ef06d

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -57

app.py CHANGED Viewed

@@ -1,85 +1,86 @@
 import gradio as gr
 import torch
 from PIL import Image
 import requests
 from io import BytesIO
-from sglang import Engine
-from qwen_vl_utils import process_vision_info
-from transformers import AutoProcessor
 # --- Configuration ---
-CHECKPOINT_PATH = "Qwen/Qwen3-VL-2B-Instruct-FP8"
-# --- Model and Processor Loading ---
-# Note: This is a heavy operation and will be done once when the Space starts.
-processor = AutoProcessor.from_pretrained(CHECKPOINT_PATH, trust_remote_code=True)
-# SGLang Engine setup for GPU
-# For a CPU space, this will be extremely slow. A GPU is strongly recommended.
-llm_engine = Engine(
-    model_path=CHECKPOINT_PATH,
-    enable_multimodal=True,
-    mem_fraction_static=0.8,
-    tp_size=1, # Set to 1 for a single GPU
-    attention_backend="fa3"
-)
 # --- Inference Function ---
 def process_and_generate(image_input, text_prompt):
     """
-    Processes the image and text prompt, and generates a response from the model.
     """
     if image_input is None or text_prompt.strip() == "":
         return "Please provide both an image and a text prompt."
-    # Convert Gradio's image input (numpy array) to a PIL Image
     pil_image = Image.fromarray(image_input)
-    # Prepare the messages payload for the model
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "image", "image": pil_image},
-                {"type": "text", "text": text_prompt},
-            ],
-        }
-    ]
-    # Apply the chat template and process vision info
-    text = processor.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    image_inputs, _ = process_vision_info(
-        messages,
-        image_patch_size=processor.image_processor.patch_size
-    )
-    # Define sampling parameters
-    sampling_params = {"max_new_tokens": 1024, "temperature": 0.7}
-    # Generate the response
     try:
-        response = llm_engine.generate(
-            prompt=text,
-            image_data=image_inputs,
-            sampling_params=sampling_params
-        )
-        return response['text']
     except Exception as e:
         return f"An error occurred during generation: {str(e)}"
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown(
         """
-        # Qwen3-VL-2B-Instruct-FP8 Demo
-        This Space demonstrates the capabilities of the Qwen3-VL-2B-Instruct-FP8 model.
-        Upload an image, type a question or a command, and see the model's response.
-        **Note:** This demo is running on a CPU and may be slow. For better performance, consider upgrading to a GPU Space.
         """
     )
@@ -99,9 +100,8 @@ with gr.Blocks() as demo:
     gr.Examples(
         examples=[
-            ["https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-VL/receipt.png", "Read all the text in the image."],
-            ["https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-VL/what_is_in_the_box.jpg", "What is in the red box?"],
-            ["https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-VL/chart.png", "What is the value for 'Training & Other'?"],
         ],
         inputs=[image_input, text_prompt]
     )

 import gradio as gr
 import torch
 from PIL import Image
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation import GenerationConfig
 import requests
 from io import BytesIO
+import os
 # --- Configuration ---
+# Using a CPU-compatible model from the Qwen family
+MODEL_PATH = "Qwen/Qwen-VL-Chat"
+CPU_DEVICE = "cpu" # Explicitly use CPU
+# --- Model and Tokenizer Loading ---
+# This will be done once when the Space starts. It will be slow on a CPU.
+print("Loading model and tokenizer... This may take a while on a CPU.")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
+# For CPU, we load the model in bfloat16 if supported, otherwise float32.
+# Note: This will consume a significant amount of RAM.
+try:
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_PATH,
+        device_map=CPU_DEVICE,
+        trust_remote_code=True,
+        bf16=torch.cuda.is_bf16_supported(), # bf16 on CPU can be slow, but uses less memory
+    ).eval()
+except RuntimeError:
+     # Fallback to float32 if bf16 is not supported or causes issues
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_PATH,
+        device_map=CPU_DEVICE,
+        trust_remote_code=True
+    ).eval()
+# Specify generation configuration
+model.generation_config = GenerationConfig.from_pretrained(MODEL_PATH, trust_remote_code=True)
+print("Model and tokenizer loaded successfully.")
 # --- Inference Function ---
 def process_and_generate(image_input, text_prompt):
     """
+    Processes the image and text prompt, and generates a response from the model on the CPU.
     """
     if image_input is None or text_prompt.strip() == "":
         return "Please provide both an image and a text prompt."
+    # Convert Gradio's numpy array to a PIL Image
     pil_image = Image.fromarray(image_input)
+    # Create a temporary path to save the image
+    temp_image_path = "temp_image.png"
+    pil_image.save(temp_image_path)
+    # The model's tokenizer can directly handle an image path.
+    # We construct the query according to the model's required format.
+    query = tokenizer.from_list_format([
+        {'image': temp_image_path},
+        {'text': text_prompt},
+    ])
+    print("Generating response... This will be slow.")
     try:
+        # Generate the response
+        response, history = model.chat(tokenizer, query=query, history=None)
+        # Clean up the temporary image file
+        os.remove(temp_image_path)
+        return response
     except Exception as e:
+        # Clean up even if there's an error
+        if os.path.exists(temp_image_path):
+            os.remove(temp_image_path)
         return f"An error occurred during generation: {str(e)}"
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown(
         """
+        # Qwen-VL-Chat CPU Demo
+        This Space demonstrates the `Qwen/Qwen-VL-Chat` model, a CPU-compatible alternative to Qwen3-VL.
+        **Warning:** Running this vision-language model on a CPU is very slow. Please be patient after clicking generate.
         """
     )
     gr.Examples(
         examples=[
+            ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/QWEN-VL/assets/demo.jpeg", "这是什么?"],
+            ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/QWEN-VL/assets/demo.jpeg", "框出图中礼服和帽子"],
         ],
         inputs=[image_input, text_prompt]
     )