import pandas as pd
import streamlit as st

import analysis_utils as au
from analysis_utils import number_breakdown_from_df
from app import load_and_cache_data

# from app import VA_ROOT
from query_comp import QueryWrapper, get_base_url
from varco_arena.varco_arena_core.prompts import load_prompt
from view_utils import (
    default_page_setting,
    escape_markdown,
    set_nav_bar,
    show_linebreak_in_md,
)

DEFAULT_LAYOUT_DICT = {
    "title": {"font": {"size": 20, "family": "Gothic A1"}},
    "font": {"size": 16, "family": "Gothic A1"},
    "xaxis": {"tickfont": {"size": 12, "family": "Gothic A1"}},
    "yaxis": {"tickfont": {"size": 12, "family": "Gothic A1"}},
    "legend": {"font": {"size": 12, "family": "Gothic A1"}},
}


def navigate(t, source, key, val):
    # print(key, val)
    if source is None:
        return
    target_index = t.index(source) + val
    if 0 <= target_index < len(t):
        st.session_state[key] = t[target_index]
        st.rerun()


def main():
    sidebar_placeholder = default_page_setting(layout="wide")
    set_nav_bar(
        False,
        sidebar_placeholder=sidebar_placeholder,
        toggle_hashstr="see_results_init",
    )

    # load the data
    # print(f"{st.session_state.get('result_file_path', None)=}")
    most_recent_run = st.session_state.get("result_file_path", None)
    most_recent_run = str(most_recent_run) if most_recent_run is not None else None
    (
        st.session_state["all_result_dict"],
        st.session_state["df_dict"],
    ) = load_and_cache_data(result_file_path=most_recent_run)

    # side bar
    st.sidebar.title("Select Result:")
    result_select = QueryWrapper("expname")(
        st.sidebar.selectbox,
        list(st.session_state["all_result_dict"].keys()),
    )

    if result_select is None:
        if st.session_state.korean:
            st.markdown("결과를 확인하려면 먼저 **🔥Arena-Lite를 구동**하셔야 합니다")
        else:
            st.markdown("You should **🔥Run Arena-Lite** first to see results")
        st.image("streamlit_app_local/page_result_1.png")
        st.image("streamlit_app_local/page_result_2.png")
        st.image("streamlit_app_local/page_result_3.png")
        st.image("streamlit_app_local/page_result_3.png")
        st.stop()
    eval_prompt_name = result_select.split("/")[-1].strip()
    if st.sidebar.button("Clear Cache"):
        st.cache_data.clear()
        st.cache_resource.clear()
        st.rerun()

    if result_select:
        if "alpha2names" in st.session_state:
            del st.session_state["alpha2names"]

    fig_dict_per_task = st.session_state["all_result_dict"][result_select]
    task_list = list(fig_dict_per_task.keys())
    elo_rating_by_task = fig_dict_per_task["Overall"]["elo_rating_by_task"]
    # tabs = st.tabs(task_list)

    df_dict_per_task = st.session_state["df_dict"][result_select]

    default_layout_dict = DEFAULT_LAYOUT_DICT
    task = QueryWrapper("task", "Select Task")(st.selectbox, task_list)

    if task is None:
        st.stop()
    figure_dict = fig_dict_per_task[task]
    judgename = figure_dict["judgename"]
    df = df_dict_per_task[task]
    interpretation, n_models, size_testset = number_breakdown_from_df(df)
    if st.session_state.korean:
        st.markdown(f"## 결과 ({task})")
        st.markdown(f"##### Judge 모델: {judgename} / 평가프롬: {eval_prompt_name}")
        st.markdown(f"##### 테스트셋 사이즈: {int(size_testset)} 행")
    else:
        st.markdown(f"## Results ({task})")
        st.markdown(f"##### Judge Model: {judgename} / prompt: {eval_prompt_name}")
        st.markdown(f"##### Size of Testset: {int(size_testset)} rows")
    col1, col2 = st.columns(2)
    with col1:
        with st.container(border=True):
            st.markdown(f"#### Ratings ({task})")
            st.table(figure_dict["elo_rating"])
            st.write(show_linebreak_in_md(escape_markdown(interpretation)))

    with col2:
        with st.container(border=True):
            st.plotly_chart(
                elo_rating_by_task.update_layout(**default_layout_dict),
                use_container_width=True,
                key=f"{task}_elo_rating_by_task",
            )

    st.divider()

    if st.session_state.korean:
        st.markdown("### 토너먼트 (테스트 시나리오) 별로 보기")
    else:
        st.markdown("### Tournament Results by Test Scenario")

    # with st.expander("볼 토너먼트 고르기"):
    d = list(df.idx_inst_src.unique())
    default_idx = st.session_state.get("selected_tournament", None)
    cols = st.columns((1, 18, 1))
    with cols[0]:
        if st.button("◀", key="prev_tournament"):
            navigate(d, default_idx, "selected_tournament", -1)
    with cols[1]:
        tournament_prm_select = QueryWrapper("tournament", "Select Tournament")(
            st.selectbox,
            d,
            default_idx,
            key=f"{task}_tournament_select",
            on_change=lambda: st.session_state.update(
                selected_tournament=st.session_state.get(f"{task}_tournament_select"),
                selected_match=None,
            ),
            label_visibility="collapsed",
        )
    with cols[2]:
        if st.button("▶", key="next_tournament"):
            navigate(d, default_idx, "selected_tournament", 1)

    # tournament_prm_select = st.selectbox(
    #     "Select Tournament",
    #     df.idx_inst_src.unique(),
    #     index=d.index(st.session_state.get("selected_tournament")),
    #     key=f"{task}_tournament_{result_select}",
    # )

    # print(tournament_prm_select, type(tournament_prm_select))
    st.session_state["selected_tournament"] = tournament_prm_select
    # tournament_prm_select = st.selectbox(
    #     "Select Tournament",
    #     df.idx_inst_src.unique(),
    #     key=f"{task}_tournament_{result_select}",
    # )
    df_now_processed = None
    if tournament_prm_select:
        df_now = df[df.idx_inst_src == tournament_prm_select]
        df_now_processed, _alpha2names = au.init_tournament_dataframe(
            df_now,
            alpha2names=st.session_state["alpha2names"]
            if "alpha2names" in st.session_state.keys()
            else None,
        )
        if "alpha2names" not in st.session_state:
            st.session_state["alpha2names"] = _alpha2names

        try:
            bracket_drawing = au.draw(
                df_now_processed,
                alpha2names=st.session_state["alpha2names"],
            )
            legend = au.make_legend_str(
                df_now_processed, st.session_state["alpha2names"]
            )
            st.code(bracket_drawing + legend)

            m = list(df_now_processed.human_readable_idx)
            default_idx = st.session_state.get("selected_match", None)
            cols = st.columns((1, 18, 1))
            with cols[0]:
                if st.button("◀", key="prev_match"):
                    navigate(m, default_idx, "selected_match", -1)
            with cols[1]:
                match_idx_human = QueryWrapper("match", "Select Match")(
                    st.selectbox,
                    m,
                    default_idx,
                    key=f"{task}_match_select",
                    label_visibility="collapsed",
                )
            with cols[2]:
                if st.button("▶", key="next_match"):
                    navigate(m, default_idx, "selected_match", 1)

            # match_idx_human = st.selectbox(
            #     "Select Match",
            #     df_now_processed.human_readable_idx,
            #     key=f"{task}_match_{result_select}",
            # )
            # print(match_idx_human)
            st.session_state["selected_match"] = match_idx_human
            # match_idx_human = st.selectbox(
            #     "Select Match",
            #     df_now_processed.human_readable_idx,
            #     key=f"{task}_match_{result_select}",
            # )
            if match_idx_human:
                match_idx = int(match_idx_human.split(": ")[0])
                row = df_now_processed.loc[match_idx]

                st.markdown("#### Current Test Scenario:")

                with st.expander(
                    f"### Evaluation Prompt (evalprompt: {eval_prompt_name}--{task})"
                ):
                    prompt = load_prompt(eval_prompt_name, task=task)
                    kwargs = dict(
                        inst="{inst}",
                        src="{src}",
                        out_a="{out_a}",
                        out_b="{out_b}",
                        task=task,
                    )
                    if eval_prompt_name == "translation_pair":
                        kwargs["source_lang"] = "{source_lang}"
                        kwargs["target_lang"] = "{target_lang}"
                    prompt_cmpl = prompt.complete_prompt(**kwargs)
                    for msg in prompt_cmpl:
                        st.markdown(f"**{msg['role']}**")
                        st.info(show_linebreak_in_md(escape_markdown(msg["content"])))

                st.info(show_linebreak_in_md(tournament_prm_select))

                winner = row.winner
                col1, col2 = st.columns(2)

                winnerbox = st.success
                loserbox = st.error
                with col1:
                    iswinner = winner == "model_a"
                    writemsg = winnerbox if iswinner else loserbox
                    st.markdown(f"#### ({row.model_a}) {row.human_readable_model_a}")
                    writemsg(
                        show_linebreak_in_md(row.generated_a),
                        icon="✅" if iswinner else "❌",
                    )
                with col2:
                    iswinner = winner == "model_b"
                    writemsg = winnerbox if iswinner else loserbox
                    st.markdown(f"#### ({row.model_b}) {row.human_readable_model_b}")
                    writemsg(
                        show_linebreak_in_md(row.generated_b),
                        icon="✅" if iswinner else "❌",
                    )
        except Exception as e:
            import traceback

            traceback.print_exc()
            st.markdown(
                "**Bug: 아래 표를 복사해서 이슈로 남겨주시면 개선에 도움이 됩니다. 감사합니다🙏**"
                if st.session_state.korean
                else "Bug: Please open issue and attach the table output below to help me out. Thanks in advance.🙏"
            )
            st.error(e)
            st.info(tournament_prm_select)
            st.table(
                df_now_processed[
                    [
                        "depth",
                        "round",
                        "winner_nodes",
                        "winner_resolved",
                        "winner",
                        "model_a",
                        "model_b",
                    ]
                ]
            )
    st.write("Sharable link")
    st.code(f"{get_base_url()}/see_results?{QueryWrapper.get_sharable_link()}")
    st.divider()

    if st.session_state.korean:
        st.markdown("### 매치 통계")
    else:
        st.markdown("### Match Stats.")
    col1, col2 = st.columns(2)
    col1, col2 = st.columns(2)
    with col1:
        with st.container(border=True):
            st.plotly_chart(
                figure_dict[
                    "fraction_of_model_a_wins_for_all_a_vs_b_matches"
                ].update_layout(autosize=True, **default_layout_dict),
                use_container_width=True,
                key=f"{task}_fraction_of_model_a_wins_for_all_a_vs_b_matches",
            )
    with col2:
        with st.container(border=True):
            st.plotly_chart(
                figure_dict["match_count_of_each_combination_of_models"].update_layout(
                    autosize=True, **default_layout_dict
                ),
                use_container_width=True,
                key=f"{task}_match_count_of_each_combination_of_models",
            )

    with col1:
        with st.container(border=True):
            st.plotly_chart(
                figure_dict["match_count_for_each_model"].update_layout(
                    **default_layout_dict
                ),
                use_container_width=True,
                key=f"{task}_match_count_for_each_model",
            )
    with col2:
        pass

    if st.session_state.korean:
        st.markdown("### 참고용 LLM Judge 편향 정보")
    else:
        st.markdown("### FYI: How biased is your LLM Judge?")

    with st.expander("펼쳐서 보기" if st.session_state.korean else "Expand to show"):
        st.info(
            """
Arena-Lite에서는 position bias의 영향을 최소화하기 위해 모든 모델이 A나 B위치에 번갈아 위치하도록 하였습니다. 그러나 LLM Judge 혹은 Prompt의 성능이 부족하다고 느껴진다면, 아래 알려진 LLM Judge bias가 참고가 될겁니다.
* position bias (왼쪽)
* length bias (오른쪽)

결과의 왜곡이 LLM Judge의 부족함 떄문이었다는 점을 규명하려면 사용하신 LLM Judge와 Prompt의 binary classification 정확도를 측정해보시길 바랍니다 (Arena-Lite를 활용하여 이를 수행해볼 수 있습니다!).""".strip()
            if st.session_state.korean
            else """
In Arena-Lite, to minimize the effect of position bias, all models are alternately positioned in either position A or B. However, if you feel the LLM Judge or Prompt performance is insufficient, the following known LLM Judge biases may be helpful to reference:
* position bias (left)
* length bias (right)

To determine if result distortion was due to LLM Judge limitations, please measure the binary classification accuracy of your LLM Judge and Prompt (You could use Arena-Lite for this purpose!).
""".strip()
        )
        st.markdown(f"#### {judgename} + prompt = {eval_prompt_name}")
        col1, col2 = st.columns(2)
        with col1:
            with st.container(border=True):
                st.plotly_chart(
                    figure_dict["counts_of_match_winners"].update_layout(
                        **default_layout_dict
                    ),
                    use_container_width=True,
                    key=f"{task}_counts_of_match_winners",
                )
        with col2:
            with st.container(border=True):
                st.plotly_chart(
                    figure_dict["length_bias"].update_layout(**default_layout_dict),
                    use_container_width=True,
                    key=f"{task}_length_bias",
                )
                st.table(figure_dict["length_bias_df"].groupby("category").describe().T)


if __name__ == "__main__":
    main()