Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from typing import List, Dict, Any, Tuple, Optional | |
| def create_cluster_browser_app(): | |
| """ | |
| Create a simple Gradio app for browsing prompts by cluster from uploaded CSV file. | |
| """ | |
| def load_and_validate_csv(file) -> Tuple[Optional[pd.DataFrame], str, List[str], str]: | |
| """ | |
| Load and validate the uploaded CSV file. | |
| Args: | |
| file: Uploaded file object from Gradio | |
| Returns: | |
| Tuple of (dataframe, status_message, cluster_options, cluster_stats) | |
| """ | |
| if file is None: | |
| return None, "Please upload a CSV file with 'prompt' and 'cluster' columns.", ["(No data loaded)"], "" | |
| try: | |
| df = pd.read_csv(file.name) | |
| # Validate required columns | |
| required_cols = ['prompt', 'cluster'] | |
| missing_cols = [col for col in required_cols if col not in df.columns] | |
| if missing_cols: | |
| return None, f"Missing required columns: {missing_cols}. Please ensure your CSV has 'prompt' and 'cluster' columns.", ["(No data loaded)"], "" | |
| # Validate data types | |
| if not pd.api.types.is_numeric_dtype(df['cluster']): | |
| return None, "The 'cluster' column must contain numeric values.", ["(No data loaded)"], "" | |
| # Get cluster options | |
| unique_clusters = sorted(df['cluster'].unique()) | |
| cluster_options = ["(All Clusters)"] + [f"Cluster {c}" for c in unique_clusters] | |
| # Get cluster statistics | |
| stats = [] | |
| for cluster_num in unique_clusters: | |
| count = len(df[df['cluster'] == cluster_num]) | |
| stats.append(f"Cluster {cluster_num}: {count} prompts") | |
| total_prompts = len(df) | |
| stats_text = f"**Total Prompts:** {total_prompts}\n\n**Cluster Distribution:**\n" + "\n".join(stats) | |
| return df, f"✅ Successfully loaded {len(df)} prompts with {len(unique_clusters)} clusters.", cluster_options, stats_text | |
| except Exception as e: | |
| return None, f"Error loading CSV file: {str(e)}", ["(No data loaded)"], "" | |
| def filter_by_cluster(df: pd.DataFrame, cluster_sel: str) -> pd.DataFrame: | |
| """Filter dataframe by selected cluster.""" | |
| if df is None or cluster_sel == "(All Clusters)" or cluster_sel == "(No data loaded)": | |
| return df if df is not None else pd.DataFrame() | |
| cluster_num = int(cluster_sel.split()[-1]) # Extract number from "Cluster X" | |
| return df[df['cluster'] == cluster_num].reset_index(drop=True) | |
| def format_prompt_cell(prompt_text: str) -> str: | |
| """Format a single prompt in its own cell.""" | |
| return f""" | |
| <div style=" | |
| background: #f8f9fa; | |
| border: 1px solid #e9ecef; | |
| border-radius: 8px; | |
| padding: 16px; | |
| margin: 8px 0; | |
| box-shadow: 0 1px 3px rgba(0,0,0,0.1); | |
| "> | |
| <div style="font-size: 14px; line-height: 1.5; color: #333;"> | |
| {prompt_text} | |
| </div> | |
| </div> | |
| """ | |
| def format_prompts(df: pd.DataFrame) -> str: | |
| """Format all prompts in the dataframe as individual cells.""" | |
| if df is None or len(df) == 0: | |
| return "No prompts to display." | |
| formatted_prompts = [] | |
| for idx, row in df.iterrows(): | |
| prompt_text = str(row['prompt']).strip() | |
| formatted_prompts.append(format_prompt_cell(prompt_text)) | |
| return "\n".join(formatted_prompts) | |
| def on_file_upload(file): | |
| """Handle file upload and validation.""" | |
| df, status_msg, cluster_options, cluster_stats = load_and_validate_csv(file) | |
| if df is not None: | |
| # Show all prompts initially | |
| prompts_html = format_prompts(df) | |
| return df, status_msg, gr.Dropdown(choices=cluster_options, value="(All Clusters)", interactive=True), prompts_html, cluster_stats | |
| else: | |
| return None, status_msg, gr.Dropdown(choices=cluster_options, value="(No data loaded)", interactive=False), "No data loaded.", "" | |
| def on_cluster_change(df, cluster_sel): | |
| """Handle cluster selection change.""" | |
| if df is None: | |
| return "No data loaded." | |
| filtered_df = filter_by_cluster(df, cluster_sel) | |
| return format_prompts(filtered_df) | |
| # Create the Gradio interface | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo: | |
| gr.Markdown("# Prompt Cluster Browser") | |
| # Store the loaded dataframe | |
| df_state = gr.State(None) | |
| with gr.Row(): | |
| # Sidebar | |
| with gr.Column(scale=1): | |
| # File upload section | |
| file_upload = gr.File( | |
| label="Upload Clustered Prompts CSV", | |
| file_types=[".csv"], | |
| file_count="single" | |
| ) | |
| # Status | |
| status_md = gr.Markdown("Please upload a CSV file to get started.") | |
| # Cluster statistics | |
| stats_md = gr.Markdown("") | |
| # Cluster selection | |
| cluster_dropdown = gr.Dropdown( | |
| ["(No data loaded)"], | |
| label="Select Cluster", | |
| value="(No data loaded)", | |
| interactive=False | |
| ) | |
| # Main content area | |
| with gr.Column(scale=3): | |
| prompts_html = gr.HTML("Upload a CSV file to browse clusters") | |
| # Connect event handlers | |
| file_upload.change( | |
| on_file_upload, | |
| [file_upload], | |
| [df_state, status_md, cluster_dropdown, prompts_html, stats_md] | |
| ) | |
| cluster_dropdown.change( | |
| on_cluster_change, | |
| [df_state, cluster_dropdown], | |
| [prompts_html] | |
| ) | |
| return demo | |
| def launch_cluster_browser(): | |
| """ | |
| Launch the cluster browser app. | |
| """ | |
| app = create_cluster_browser_app() | |
| app.launch() | |
| if __name__ == "__main__": | |
| launch_cluster_browser() | |