import gradio as gr
import pandas as pd
import json
from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS, EU_LANGUAGES, MULTILINGUAL_TAB_TEXT
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
from utils_display import AutoEvalColumn, MultilingualColumn, fields, make_clickable_model, styled_error, styled_message
import numpy as np
from datetime import datetime, timezone

LAST_UPDATED = "Aug 15th 2025"

# Global variable to store detailed benchmark data
benchmark_details = {}
expanded_languages = set()  # Track which languages are expanded

column_names = {
    "MODEL": "Model",
    "Avg. WER": "Average WER ⬇️",    
    "RTFx": "RTFx ⬆️️",
    "AMI WER": "AMI",
    "Earnings22 WER": "Earnings22",
    "Gigaspeech WER": "Gigaspeech",
    "LS Clean WER": "LS Clean",
    "LS Other WER": "LS Other",
    "SPGISpeech WER": "SPGISpeech",
    "Tedlium WER": "Tedlium",
    "Voxpopuli WER": "Voxpopuli",
}

eval_queue_repo, requested_models, csv_results, multilingual_csv_path = load_all_info_from_dataset_hub()

if not csv_results.exists():
    raise Exception(f"CSV file {csv_results} does not exist locally")
# Get csv with data and parse columns
original_df = pd.read_csv(csv_results)
# Formats the columns
def formatter(x):
    if type(x) is str:
        x = x
    elif x == -1:
        x = "NA"
    else: 
        x = round(x, 2)
    return x

for col in original_df.columns:
    if col == "model":
        original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
    else:
        original_df[col] = original_df[col].apply(formatter) # For numerical values
original_df.rename(columns=column_names, inplace=True)
original_df.sort_values(by='Average WER ⬇️', inplace=True)

COLS = [c.name for c in fields(AutoEvalColumn)]
TYPES = [c.type for c in fields(AutoEvalColumn)]

# Multilingual columns (dynamic based on expansion state)
MULTILINGUAL_COLS = [c.name for c in fields(MultilingualColumn)]

def create_multilingual_dataframe():
    """Create multilingual dataframe with CoVoST, MLS, and FLEURS benchmark data"""
    global benchmark_details, expanded_languages
    
    if multilingual_csv_path is None or not multilingual_csv_path.exists():
        raise Exception("Multilingual CSV file not found")
    
    # Load CSV data
    multilingual_raw_df = pd.read_csv(multilingual_csv_path)
    
    # Store detailed benchmark data for click functionality
    benchmark_details = {}
    
    multilingual_data = []
    for _, row_data in multilingual_raw_df.iterrows():
        model_name = row_data['model']
        model_details = {}
        row = {"Model": make_clickable_model(model_name)}
        
        # Process data for each language and collect all individual datapoints
        all_datapoints = []  # Collect all individual dataset scores across all languages
        
        for lang_code, lang_info in EU_LANGUAGES.items():
            # Get individual benchmark scores from CSV, using None for missing values
            # Special cases: de doesn't have MLS, pt doesn't have CoVoST
            if lang_code == "pt":
                covost_score = None  # pt doesn't have CoVoST data
            else:
                covost_score = row_data.get(f"{lang_code}_covost", None)
            
            if lang_code == "de":
                mls_score = None  # de doesn't have MLS data
            else:
                mls_score = row_data.get(f"{lang_code}_mls", None)
            
            fleurs_score = row_data.get(f"{lang_code}_fleurs", None)
            
            # Convert string zeros or empty values to None
            for score_name, score_val in [("covost", covost_score), ("mls", mls_score), ("fleurs", fleurs_score)]:
                if score_val is not None and (score_val == 0.0 or score_val == "" or str(score_val).strip() == "0" or str(score_val).strip() == ""):
                    if score_name == "covost":
                        covost_score = None
                    elif score_name == "mls":
                        mls_score = None
                    elif score_name == "fleurs":
                        fleurs_score = None
            
            # Add individual datapoints to the global list
            if covost_score is not None and covost_score > 0:
                all_datapoints.append(covost_score)
            if mls_score is not None and mls_score > 0:
                all_datapoints.append(mls_score)
            if fleurs_score is not None and fleurs_score > 0:
                all_datapoints.append(fleurs_score)
            
            # Calculate average only from available scores for this language (for display)
            available_scores = [s for s in [covost_score, mls_score, fleurs_score] if s is not None and s > 0]
            if available_scores:
                avg_score = round(sum(available_scores) / len(available_scores), 2)
            else:
                avg_score = None
            
            # Store individual scores for detailed view (only store existing datasets)
            lang_data = {"average": avg_score if avg_score is not None else "NA"}
            
            # Only store datasets that exist for this language
            if lang_code != "pt" and covost_score is not None:  # pt doesn't have CoVoST
                lang_data["CoVoST"] = covost_score
            if lang_code != "de" and mls_score is not None:  # de doesn't have MLS
                lang_data["MLS"] = mls_score
            if fleurs_score is not None:  # All languages have FLEURS
                lang_data["FLEURS"] = fleurs_score
                
            model_details[lang_code] = lang_data
        
        # Calculate overall multilingual average from all individual datapoints
        if all_datapoints:
            row["Average WER ⬇️"] = round(np.mean(all_datapoints), 2)
        else:
            row["Average WER ⬇️"] = 0.0
        
        # Add RTFx from the CSV (it should be a single value per model)
        rtfx_value = row_data.get("rtfx", row_data.get("RTFx", 0.0))
        # Convert 0 or -1 values to "NA" like in the English leaderboard
        if rtfx_value == 0.0 or rtfx_value == -1 or rtfx_value == 0 or rtfx_value == "0" or rtfx_value == "0.0":
            row["RTFx ⬆️️"] = "NA"
        else:
            row["RTFx ⬆️️"] = rtfx_value
        
        # Add language columns based on expansion state
        for lang_code, lang_info in EU_LANGUAGES.items():
            lang_col_name = f"{lang_info['flag']} {lang_info['name']}"
            model_data = model_details[lang_code]
            
            if lang_code in expanded_languages:
                # Show average column AND detailed columns
                row[f"{lang_col_name} Avg"] = model_data["average"]
                
                # Only show columns for datasets that actually exist in the data
                if "CoVoST" in model_data:
                    row[f"{lang_col_name} CoVoST"] = model_data["CoVoST"]
                if "MLS" in model_data:
                    row[f"{lang_col_name} MLS"] = model_data["MLS"]
                if "FLEURS" in model_data:
                    row[f"{lang_col_name} FLEURS"] = model_data["FLEURS"]
            else:
                # Show only average column
                row[lang_col_name] = model_data["average"]
        
        # Store model details for click functionality
        benchmark_details[model_name] = model_details
        multilingual_data.append(row)
    
    multilingual_df = pd.DataFrame(multilingual_data)
    multilingual_df = multilingual_df.sort_values(by='Average WER ⬇️')
    return multilingual_df

def get_multilingual_datatypes(df):
    """Generate appropriate datatypes for multilingual dataframe columns"""
    datatypes = []
    for col in df.columns:
        if col == "Model":
            datatypes.append("markdown")  # This allows HTML rendering
        else:
            datatypes.append("number")
    return datatypes

def get_language_details(model, language_code):
    """Get detailed breakdown for a specific model and language"""
    global benchmark_details
    
    if model not in benchmark_details or language_code not in benchmark_details[model]:
        return None
    
    language_info = EU_LANGUAGES.get(language_code, {})
    language_name = language_info.get("name", "Unknown")
    model_data = benchmark_details[model][language_code]
    
    details = {
        "Language": f"{language_info.get('flag', '')} {language_name}",
        "Model": model,
        "CoVoST WER": model_data["CoVoST"],
        "MLS WER": model_data["MLS"],
        "FLEURS WER": model_data["FLEURS"],
        "Average WER": model_data["average"]
    }
    
    return details

def toggle_language_expansion(language_code):
    """Toggle expansion of language columns when button is clicked"""
    global expanded_languages
    
    # Toggle expansion state
    if language_code in expanded_languages:
        expanded_languages.remove(language_code)
    else:
        expanded_languages.add(language_code)
    
    # Recreate dataframe with new expansion state
    updated_df = create_multilingual_dataframe()
    updated_datatypes = get_multilingual_datatypes(updated_df)
    
    return gr.update(value=updated_df, datatype=updated_datatypes)

# Initialize multilingual dataframe
multilingual_df = create_multilingual_dataframe()


def request_model(model_text, chbcoco2017):
    
    # Determine the selected checkboxes
    dataset_selection = []
    if chbcoco2017:
        dataset_selection.append("ESB Datasets tests only")

    if len(dataset_selection) == 0:
        return styled_error("You need to select at least one dataset")
        
    base_model_on_hub, error_msg = is_model_on_hub(model_text)

    if not base_model_on_hub:
        return styled_error(f"Base model '{model_text}' {error_msg}")
    
    # Construct the output dictionary
    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
    required_datasets = ', '.join(dataset_selection)
    eval_entry = {
        "date": current_time,
        "model": model_text,
        "datasets_selected": required_datasets
    }
    
    # Prepare file path 
    DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)
    
    fn_datasets = '@ '.join(dataset_selection)
    filename = model_text.replace("/","@") + "@@" + fn_datasets 
    if filename in requested_models:
        return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.")
    try:
        filename_ext = filename + ".txt"
        out_filepath = DIR_OUTPUT_REQUESTS / filename_ext

        # Write the results to a text file
        with open(out_filepath, "w") as f:
            f.write(json.dumps(eval_entry))
            
        upload_file(filename, out_filepath)
        
        # Include file in the list of uploaded files
        requested_models.append(filename)
        
        # Remove the local file
        out_filepath.unlink()

        return styled_message("🤗 Your request has been submitted and will be evaluated soon!</p>")
    except Exception as e:
        return styled_error(f"Error submitting request!")

def filter_main_table(show_proprietary=True):
    filtered_df = original_df.copy()
    
    # Filter proprietary models if needed
    if not show_proprietary and "License" in filtered_df.columns:
        # Keep only models with "Open" license
        filtered_df = filtered_df[filtered_df["License"] == "Open"]
        
    return filtered_df

with gr.Blocks(css=LEADERBOARD_CSS) as demo:
    gr.HTML(BANNER, elem_id="banner")
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
            leaderboard_table = gr.components.Dataframe(
                value=original_df,
                datatype=TYPES,
                elem_id="leaderboard-table",
                interactive=False,
                visible=True,
            )
            with gr.Row():
                show_proprietary_checkbox = gr.Checkbox(
                    label="Show proprietary models",
                    value=True,
                    elem_id="show-proprietary-checkbox"
                )
            
            # Connect checkbox to the filtering function
            show_proprietary_checkbox.change(
                filter_main_table,
                inputs=[show_proprietary_checkbox],
                outputs=leaderboard_table
            )

        with gr.TabItem("🌍 Multilingual", elem_id="multilingual-benchmark-tab-table", id=1):
            gr.Markdown(MULTILINGUAL_TAB_TEXT, elem_classes="markdown-text")
            
            # Language toggle buttons
            gr.Markdown("Click on a language button to show/hide detailed benchmark scores (CoVoST, MLS, FLEURS):")
            
            language_buttons = {}
            lang_codes = list(EU_LANGUAGES.keys())
            
            # First row of buttons (5 languages)
            with gr.Row():
                for lang_code in lang_codes[:5]:
                    lang_info = EU_LANGUAGES[lang_code]
                    button_label = f"{lang_info['flag']} {lang_info['name']}"
                    language_buttons[lang_code] = gr.Button(
                        button_label, 
                        variant="secondary",
                        size="sm"
                    )
            
            # Second row of buttons (remaining 5 languages)
            with gr.Row():
                for lang_code in lang_codes[5:]:
                    lang_info = EU_LANGUAGES[lang_code]
                    button_label = f"{lang_info['flag']} {lang_info['name']}"
                    language_buttons[lang_code] = gr.Button(
                        button_label, 
                        variant="secondary",
                        size="sm"
                    )
            
            multilingual_table = gr.components.Dataframe(
                value=multilingual_df,
                datatype=get_multilingual_datatypes(multilingual_df),
                elem_id="multilingual-table",
                interactive=False,
                visible=True,
            )
            
            # Connect buttons to toggle language expansion
            for lang_code, button in language_buttons.items():
                def create_toggle_func(code):
                    return lambda: toggle_language_expansion(code)
                
                button.click(
                    create_toggle_func(lang_code),
                    outputs=[multilingual_table]
                )

        with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=3):
            gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")

        with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=4):
            with gr.Column():
                gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
            with gr.Column():
                gr.Markdown("Select a dataset:", elem_classes="markdown-text")
                with gr.Column():
                    model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")
                    chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset", visible=False, value=True, interactive=False)
                with gr.Column():
                    mdw_submission_result = gr.Markdown()
                    btn_submitt = gr.Button(value="🚀 Request")
                    btn_submitt.click(request_model, 
                                      [model_name_textbox, chb_coco2017], 
                                      mdw_submission_result)
        # add an about section
        with gr.TabItem("🤗 About", elem_id="od-benchmark-tab-table", id=5):
            gr.Markdown("## About", elem_classes="markdown-text")

    gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")
    
    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            gr.Textbox(
                value=CITATION_TEXT, lines=7,
                label="Copy the BibTeX snippet to cite this source",
                elem_id="citation-button",
                show_copy_button=True,
            )

demo.launch(ssr_mode=False)