# pip install -U transformers gradio pillow torchvision import torch import gradio as gr from transformers import AutoProcessor, AutoModelForImageTextToText # ── 載入模型 ────────────────────────────────────────────────────────────────── MODEL_ID = "google/gemma-4-12B" print(f"載入模型:{MODEL_ID} ...") processor = AutoProcessor.from_pretrained(MODEL_ID) model = AutoModelForImageTextToText.from_pretrained( MODEL_ID, dtype=torch.float16, # torch_dtype 已棄用,改用 dtype device_map="auto", ) model.eval() print("模型載入完成!") # ── 推論函式 ────────────────────────────────────────────────────────────────── def build_messages(history: list, user_text: str, image=None) -> list: """將 Gradio tuples 歷史轉換成 HuggingFace messages 格式。""" messages = [] for user_turn, assistant_turn in history: messages.append({"role": "user", "content": [{"type": "text", "text": user_turn or ""}]}) messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_turn or ""}]}) user_content = [] if image is not None: user_content.append({"type": "image", "image": image}) user_content.append({"type": "text", "text": user_text}) messages.append({"role": "user", "content": user_content}) return messages def chat(user_message, image, history, max_new_tokens, temperature, top_p): """主對話函式。history 為 list of [user, assistant] tuples。""" if not (user_message and user_message.strip()) and image is None: return history, history, "", None messages = build_messages(history, user_message, image) pil_images = [image] if image is not None else None inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True, images=pil_images, ).to(model.device, dtype=torch.float16) with torch.inference_mode(): output_ids = model.generate( **inputs, max_new_tokens=int(max_new_tokens), do_sample=temperature > 0, temperature=float(temperature) if temperature > 0 else 1.0, top_p=float(top_p), ) input_len = inputs["input_ids"].shape[-1] response = processor.decode( output_ids[0, input_len:], skip_special_tokens=True ).strip() history = history + [[user_message, response]] return history, history, "", None def clear_history(): return [], [], "", None # ── Gradio UI(Gradio 6 相容)──────────────────────────────────────────────── CSS = """ #chatbot { height: 550px; } .send-btn { background: #10b981 !important; color: white !important; } footer { display: none !important; } """ with gr.Blocks(title="Gemma-4 Chat") as demo: gr.Markdown( """ # 🤖 Gemma-4 多模態對話助理 支援純文字對話,亦可上傳圖片進行圖文問答。 """ ) state = gr.State([]) with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot( elem_id="chatbot", label="對話視窗", # Gradio 6 不支援 type="messages";使用預設 tuples 格式 ) with gr.Row(): user_input = gr.Textbox( placeholder="輸入訊息,按 Enter 或點擊送出…", show_label=False, lines=2, scale=5, ) send_btn = gr.Button("送出 ▶", elem_classes="send-btn", scale=1) with gr.Column(scale=1): image_input = gr.Image( label="上傳圖片(選填)", type="pil", height=220, ) gr.Markdown("### ⚙️ 生成參數") max_new_tokens = gr.Slider(64, 2048, value=512, step=64, label="最大生成長度") temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="Temperature(0 = 確定性)") top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p") clear_btn = gr.Button("🗑️ 清除對話", variant="secondary") send_inputs = [user_input, image_input, state, max_new_tokens, temperature, top_p] send_outputs = [chatbot, state, user_input, image_input] send_btn.click(chat, inputs=send_inputs, outputs=send_outputs) user_input.submit(chat, inputs=send_inputs, outputs=send_outputs) clear_btn.click(clear_history, outputs=[chatbot, state, user_input, image_input]) # ── 啟動 ───────────────────────────────────────────────────────────────────── if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, # Colab 請改 True inbrowser=True, theme=gr.themes.Soft(primary_hue="emerald"), # Gradio 6: theme 移到 launch() css=CSS, )