khang119966's picture
Update app.py
029d2da verified
import gradio as gr
import torch
import spaces
from threading import Thread
from transformers import TextIteratorStreamer
current_model_name = None
model = None
tokenizer = None
MODELS = [
"Qwen/Qwen3.5-0.8B",
"Qwen/Qwen3.5-2B",
"Qwen/Qwen3.5-4B",
"Qwen/Qwen3.5-9B",
]
@spaces.GPU(duration=120)
def process_chat(image, prompt_text, model_name, history):
global current_model_name, model, tokenizer
if current_model_name != model_name:
from unsloth import FastVisionModel
yield history + [{"role": "assistant", "content": f"⏳ Loading model `{model_name}`... please wait."}]
if model is not None:
del model, tokenizer
torch.cuda.empty_cache()
model, tokenizer = FastVisionModel.from_pretrained(
model_name,
load_in_4bit=False,
use_gradient_checkpointing="unsloth",
)
FastVisionModel.for_inference(model)
current_model_name = model_name
if image is None:
yield history + [{"role": "assistant", "content": "⚠️ Please upload an image first!"}]
return
if not prompt_text.strip():
prompt_text = "Describe this image in detail."
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": prompt_text}
]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
image,
input_text,
add_special_tokens=False,
return_tensors="pt",
).to("cuda")
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=1024,
use_cache=True,
temperature=1.5,
min_p=0.1,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
history = history + [
{"role": "user", "content": prompt_text},
{"role": "assistant", "content": ""},
]
for new_text in streamer:
history[-1]["content"] += new_text
yield history
with gr.Blocks(title="Qwen3.5 Vision Chat") as demo:
gr.Markdown(
"""
# 🦁 Qwen3.5 Vision Chat
A simple demo to chat with **Qwen3.5 Vision models** using an image.
### 📌 How to use
1️⃣ Upload an image (or paste from clipboard).
2️⃣ Select the model size you want.
3️⃣ Ask a question about the image.
4️⃣ Click **Send 🚀** or press **Enter**.
❤️ If this demo is useful, please consider giving it a **like / heart**.
"""
)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(type="pil", label="🖼️ Upload Image", sources=["upload", "clipboard"])
model_dropdown = gr.Dropdown(
choices=MODELS,
value="Qwen/Qwen3.5-2B",
label="⚙️ Select Model"
)
clear_btn = gr.Button("🗑️ Clear History", variant="stop")
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="💬 Chat", height=500)
with gr.Row():
text_input = gr.Textbox(
show_label=False,
placeholder="Ask something about the image...",
scale=8
)
submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)
submit_btn.click(
fn=process_chat,
inputs=[image_input, text_input, model_dropdown, chatbot],
outputs=[chatbot]
).then(lambda: "", None, text_input)
text_input.submit(
fn=process_chat,
inputs=[image_input, text_input, model_dropdown, chatbot],
outputs=[chatbot]
).then(lambda: "", None, text_input)
clear_btn.click(lambda: [], None, chatbot)
if __name__ == "__main__":
demo.queue(max_size=20).launch()