import gradio as gr import os import json import time from dotenv import load_dotenv from google import genai from google.genai import types # Import our new DINO processing function from dino_processor import process_video_with_dino # --- Configuration and Client Initialization --- load_dotenv() try: client = genai.Client(api_key=os.environ["GEMINI_API_KEY"]) except KeyError: raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.") # --- Main Gradio Function --- def full_analysis(video_file_path): if not video_file_path: # Return empty values for all components return "Please upload a video first.", "", "", None, None # --- DINO Analysis --- print("--- Starting DINO Analysis ---") try: # This function will return a list of tuples: [(overlay1, attn1), (overlay2, attn2), ...] dino_results = process_video_with_dino(video_file_path) overlay_images = [res[0] for res in dino_results] attention_maps = [res[1] for res in dino_results] print("--- DINO Analysis Complete ---") except Exception as e: print(f"ERROR during DINO processing: {e}") # Return an error message and empty galleries return f"Error in DINO processing: {e}", "", "", None, None # --- Gemini Analysis --- print("--- Starting Gemini Analysis ---") uploaded_file = None try: # Wait for DINO to finish before starting the Gemini upload uploaded_file = client.files.upload(file=video_file_path) while uploaded_file.state.name == "PROCESSING": time.sleep(5) uploaded_file = client.files.get(name=uploaded_file.name) if uploaded_file.state.name != "ACTIVE": raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}") prompt = """ Analyze the provided video. Respond ONLY with a valid JSON object with three keys: 1. "device_type": A short string identifying the device. 2. "condition": A single word: "Mint", "Excellent", "Good", "Fair", or "Poor". 3. "reason": A brief string explaining the condition. """ model_name = "gemini-1.5-flash-latest" # Using a reliable public model config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json") contents = [uploaded_file, prompt] response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config) parsed_json = json.loads(response.text) device_type = parsed_json.get("device_type", "N/A") condition = parsed_json.get("condition", "N/A") reason = parsed_json.get("reason", "N/A") print("--- Gemini Analysis Complete ---") except Exception as e: print(f"ERROR during Gemini processing: {e}") device_type, condition, reason = f"Error in Gemini processing: {e}", "", "" finally: if uploaded_file: client.files.delete(name=uploaded_file.name) # Return all the results to the Gradio UI return device_type, condition, reason, overlay_images, attention_maps # --- Gradio Interface --- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 📱 Advanced Device Condition Analyzer") gr.Markdown("Upload a video to get a condition analysis from Gemini and attention maps from DINO.") video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4") submit_button = gr.Button("Run Full Analysis", variant="primary") gr.Markdown("## Gemini Condition Analysis") with gr.Row(): device_type_output = gr.Textbox(label="Device Type") condition_output = gr.Textbox(label="Condition") reason_output = gr.Textbox(label="Reason / Details") gr.Markdown("## DINO: Overlayed Heatmaps on Representative Frames") # Use gr.Gallery to display multiple images overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery") gr.Markdown("## DINO: Self-Attention Maps") attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery") submit_button.click( fn=full_analysis, inputs=video_input, outputs=[ device_type_output, condition_output, reason_output, overlay_gallery, attention_gallery ], show_progress='full' ) demo.launch(debug=True)