Spaces:
Running on Zero
Running on Zero
| import gradio as gr | |
| import torch | |
| import spaces | |
| from threading import Thread | |
| from transformers import TextIteratorStreamer | |
| current_model_name = None | |
| model = None | |
| tokenizer = None | |
| MODELS = [ | |
| "Qwen/Qwen3.5-0.8B", | |
| "Qwen/Qwen3.5-2B", | |
| "Qwen/Qwen3.5-4B", | |
| "Qwen/Qwen3.5-9B", | |
| ] | |
| def process_chat(image, prompt_text, model_name, history): | |
| global current_model_name, model, tokenizer | |
| if current_model_name != model_name: | |
| from unsloth import FastVisionModel | |
| yield history + [{"role": "assistant", "content": f"⏳ Loading model `{model_name}`... please wait."}] | |
| if model is not None: | |
| del model, tokenizer | |
| torch.cuda.empty_cache() | |
| model, tokenizer = FastVisionModel.from_pretrained( | |
| model_name, | |
| load_in_4bit=False, | |
| use_gradient_checkpointing="unsloth", | |
| ) | |
| FastVisionModel.for_inference(model) | |
| current_model_name = model_name | |
| if image is None: | |
| yield history + [{"role": "assistant", "content": "⚠️ Please upload an image first!"}] | |
| return | |
| if not prompt_text.strip(): | |
| prompt_text = "Describe this image in detail." | |
| messages = [ | |
| {"role": "user", "content": [ | |
| {"type": "image"}, | |
| {"type": "text", "text": prompt_text} | |
| ]} | |
| ] | |
| input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) | |
| inputs = tokenizer( | |
| image, | |
| input_text, | |
| add_special_tokens=False, | |
| return_tensors="pt", | |
| ).to("cuda") | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| max_new_tokens=1024, | |
| use_cache=True, | |
| temperature=1.5, | |
| min_p=0.1, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| history = history + [ | |
| {"role": "user", "content": prompt_text}, | |
| {"role": "assistant", "content": ""}, | |
| ] | |
| for new_text in streamer: | |
| history[-1]["content"] += new_text | |
| yield history | |
| with gr.Blocks(title="Qwen3.5 Vision Chat") as demo: | |
| gr.Markdown( | |
| """ | |
| # 🦁 Qwen3.5 Vision Chat | |
| A simple demo to chat with **Qwen3.5 Vision models** using an image. | |
| ### 📌 How to use | |
| 1️⃣ Upload an image (or paste from clipboard). | |
| 2️⃣ Select the model size you want. | |
| 3️⃣ Ask a question about the image. | |
| 4️⃣ Click **Send 🚀** or press **Enter**. | |
| ❤️ If this demo is useful, please consider giving it a **like / heart**. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image(type="pil", label="🖼️ Upload Image", sources=["upload", "clipboard"]) | |
| model_dropdown = gr.Dropdown( | |
| choices=MODELS, | |
| value="Qwen/Qwen3.5-2B", | |
| label="⚙️ Select Model" | |
| ) | |
| clear_btn = gr.Button("🗑️ Clear History", variant="stop") | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot(label="💬 Chat", height=500) | |
| with gr.Row(): | |
| text_input = gr.Textbox( | |
| show_label=False, | |
| placeholder="Ask something about the image...", | |
| scale=8 | |
| ) | |
| submit_btn = gr.Button("Send 🚀", variant="primary", scale=1) | |
| submit_btn.click( | |
| fn=process_chat, | |
| inputs=[image_input, text_input, model_dropdown, chatbot], | |
| outputs=[chatbot] | |
| ).then(lambda: "", None, text_input) | |
| text_input.submit( | |
| fn=process_chat, | |
| inputs=[image_input, text_input, model_dropdown, chatbot], | |
| outputs=[chatbot] | |
| ).then(lambda: "", None, text_input) | |
| clear_btn.click(lambda: [], None, chatbot) | |
| if __name__ == "__main__": | |
| demo.queue(max_size=20).launch() |