Spaces:
Sleeping
Sleeping
improved result.json to include api call details, added a new prompt
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +10 -0
- .vscode/launch.json +15 -0
- __pycache__/analysis_utils.cpython-311.pyc +0 -0
- __pycache__/app.cpython-311.pyc +0 -0
- __pycache__/query_comp.cpython-311.pyc +0 -0
- __pycache__/view_utils.cpython-311.pyc +0 -0
- eval_models_list.txt +2 -0
- eval_prompt_list.txt +1 -0
- modules/__pycache__/nav.cpython-311.pyc +0 -0
- pages/__pycache__/see_results.cpython-311.pyc +0 -0
- streamlit_app_local/__pycache__/analysis_utils.cpython-311.pyc +0 -0
- streamlit_app_local/__pycache__/analysis_utils.cpython-38.pyc +0 -0
- streamlit_app_local/__pycache__/app.cpython-311.pyc +0 -0
- streamlit_app_local/__pycache__/app.cpython-38.pyc +0 -0
- streamlit_app_local/__pycache__/query_comp.cpython-311.pyc +0 -0
- streamlit_app_local/__pycache__/view_utils.cpython-311.pyc +0 -0
- streamlit_app_local/__pycache__/view_utils.cpython-38.pyc +0 -0
- streamlit_app_local/eval_models_list.txt +0 -3
- streamlit_app_local/eval_models_list.txt +1 -0
- streamlit_app_local/eval_prompt_list.txt +0 -5
- streamlit_app_local/eval_prompt_list.txt +1 -0
- streamlit_app_local/modules/__pycache__/nav.cpython-311.pyc +0 -0
- streamlit_app_local/modules/__pycache__/nav.cpython-38.pyc +0 -0
- streamlit_app_local/pages/see_results.py +3 -1
- varco_arena/__pycache__/calc_cost.cpython-311.pyc +0 -0
- varco_arena/__pycache__/calc_cost.cpython-38.pyc +0 -0
- varco_arena/main.py +0 -7
- varco_arena/varco_arena_core/__pycache__/__init__.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/custom_input_utils.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/data_utils.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/elo.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/eval_utils.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/league.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/manager.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/match.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/tournament.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/tracking_utils.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/__pycache__/visualization.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/eval_utils.py +1 -0
- varco_arena/varco_arena_core/league.py +1 -0
- varco_arena/varco_arena_core/match.py +0 -1
- varco_arena/varco_arena_core/prompts/__init__.py +3 -1
- varco_arena/varco_arena_core/prompts/__pycache__/__init__.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/prompts/__pycache__/base_prompt.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/prompts/__pycache__/llmbar.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/prompts/__pycache__/llmbar_brief.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/prompts/__pycache__/naive_ab.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/prompts/__pycache__/prompt_utils.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/prompts/__pycache__/rag_pair.cpython-311.pyc +0 -0
- varco_arena/varco_arena_core/prompts/__pycache__/rag_pair_kr.cpython-311.pyc +0 -0
.gitignore
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore all subdirectories in user_submit
|
2 |
+
**/user_submit/*/
|
3 |
+
**/__pycache__/
|
4 |
+
**/*.pyc
|
5 |
+
|
6 |
+
# But re-include these four
|
7 |
+
!**/user_submit/llm/
|
8 |
+
!**/user_submit/rag/
|
9 |
+
!**/user_submit/mt/
|
10 |
+
!**/user_submit/12-02-14:29:30/
|
.vscode/launch.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
// Use IntelliSense to learn about possible attributes.
|
3 |
+
// Hover to view descriptions of existing attributes.
|
4 |
+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
5 |
+
"version": "0.2.0",
|
6 |
+
"configurations": [
|
7 |
+
{
|
8 |
+
"name": "Python Debugger: Current File",
|
9 |
+
"type": "debugpy",
|
10 |
+
"request": "launch",
|
11 |
+
"program": "${file}",
|
12 |
+
"console": "integratedTerminal"
|
13 |
+
}
|
14 |
+
]
|
15 |
+
}
|
__pycache__/analysis_utils.cpython-311.pyc
DELETED
Binary file (17.7 kB)
|
|
__pycache__/app.cpython-311.pyc
DELETED
Binary file (22.3 kB)
|
|
__pycache__/query_comp.cpython-311.pyc
DELETED
Binary file (7.99 kB)
|
|
__pycache__/view_utils.cpython-311.pyc
DELETED
Binary file (18.3 kB)
|
|
eval_models_list.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
gpt-4o-mini
|
2 |
gpt-4o-2024-05-13
|
3 |
gpt-4o-2024-08-06
|
|
|
|
|
|
1 |
gpt-4o-mini
|
2 |
gpt-4o-2024-05-13
|
3 |
gpt-4o-2024-08-06
|
4 |
+
gpt-4.1
|
5 |
+
gpt-4.1-mini
|
eval_prompt_list.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
llmbar
|
2 |
translation_pair
|
3 |
rag_pair_kr
|
|
|
|
1 |
llmbar
|
2 |
translation_pair
|
3 |
rag_pair_kr
|
4 |
+
translation_new
|
modules/__pycache__/nav.cpython-311.pyc
DELETED
Binary file (3.8 kB)
|
|
pages/__pycache__/see_results.cpython-311.pyc
DELETED
Binary file (26.3 kB)
|
|
streamlit_app_local/__pycache__/analysis_utils.cpython-311.pyc
DELETED
Binary file (17.7 kB)
|
|
streamlit_app_local/__pycache__/analysis_utils.cpython-38.pyc
DELETED
Binary file (9.12 kB)
|
|
streamlit_app_local/__pycache__/app.cpython-311.pyc
DELETED
Binary file (15.9 kB)
|
|
streamlit_app_local/__pycache__/app.cpython-38.pyc
DELETED
Binary file (6.32 kB)
|
|
streamlit_app_local/__pycache__/query_comp.cpython-311.pyc
DELETED
Binary file (8 kB)
|
|
streamlit_app_local/__pycache__/view_utils.cpython-311.pyc
DELETED
Binary file (18.3 kB)
|
|
streamlit_app_local/__pycache__/view_utils.cpython-38.pyc
DELETED
Binary file (9.91 kB)
|
|
streamlit_app_local/eval_models_list.txt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
gpt-4o-mini
|
2 |
-
gpt-4o-2024-05-13
|
3 |
-
gpt-4o-2024-08-06
|
|
|
|
|
|
|
|
streamlit_app_local/eval_models_list.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../eval_models_list.txt
|
streamlit_app_local/eval_prompt_list.txt
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
llmbar
|
2 |
-
llmbar_brief
|
3 |
-
translation_pair
|
4 |
-
rag_pair_kr
|
5 |
-
contextual (WIP)
|
|
|
|
|
|
|
|
|
|
|
|
streamlit_app_local/eval_prompt_list.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../eval_prompt_list.txt
|
streamlit_app_local/modules/__pycache__/nav.cpython-311.pyc
DELETED
Binary file (2.85 kB)
|
|
streamlit_app_local/modules/__pycache__/nav.cpython-38.pyc
DELETED
Binary file (889 Bytes)
|
|
streamlit_app_local/pages/see_results.py
CHANGED
@@ -2,6 +2,7 @@ import pandas as pd
|
|
2 |
import streamlit as st
|
3 |
from varco_arena_core.prompts import load_prompt
|
4 |
|
|
|
5 |
import analysis_utils as au
|
6 |
from analysis_utils import number_breakdown_from_df
|
7 |
from app import VA_ROOT
|
@@ -248,11 +249,12 @@ def main():
|
|
248 |
out_b="{out_b}",
|
249 |
task=task,
|
250 |
)
|
251 |
-
if eval_prompt_name
|
252 |
kwargs["source_lang"] = "{source_lang}"
|
253 |
kwargs["target_lang"] = "{target_lang}"
|
254 |
prompt_cmpl = prompt.complete_prompt(**kwargs)
|
255 |
for msg in prompt_cmpl:
|
|
|
256 |
st.markdown(f"**{msg['role']}**")
|
257 |
st.info(show_linebreak_in_md(escape_markdown(msg["content"])))
|
258 |
|
|
|
2 |
import streamlit as st
|
3 |
from varco_arena_core.prompts import load_prompt
|
4 |
|
5 |
+
from pprint import pprint
|
6 |
import analysis_utils as au
|
7 |
from analysis_utils import number_breakdown_from_df
|
8 |
from app import VA_ROOT
|
|
|
249 |
out_b="{out_b}",
|
250 |
task=task,
|
251 |
)
|
252 |
+
if eval_prompt_name in ["translation_pair", "translation_new"]:
|
253 |
kwargs["source_lang"] = "{source_lang}"
|
254 |
kwargs["target_lang"] = "{target_lang}"
|
255 |
prompt_cmpl = prompt.complete_prompt(**kwargs)
|
256 |
for msg in prompt_cmpl:
|
257 |
+
pprint(msg)
|
258 |
st.markdown(f"**{msg['role']}**")
|
259 |
st.info(show_linebreak_in_md(escape_markdown(msg["content"])))
|
260 |
|
varco_arena/__pycache__/calc_cost.cpython-311.pyc
DELETED
Binary file (5.11 kB)
|
|
varco_arena/__pycache__/calc_cost.cpython-38.pyc
DELETED
Binary file (2.88 kB)
|
|
varco_arena/main.py
CHANGED
@@ -134,13 +134,6 @@ if __name__ == "__main__":
|
|
134 |
"-p",
|
135 |
"--evalprompt",
|
136 |
default="llmbar_brief",
|
137 |
-
choices=[
|
138 |
-
"llmbar_brief",
|
139 |
-
"llmbar",
|
140 |
-
"translation_pair",
|
141 |
-
"rag_pair_kr",
|
142 |
-
# "contextual_pair",
|
143 |
-
],
|
144 |
)
|
145 |
|
146 |
parser.add_argument(
|
|
|
134 |
"-p",
|
135 |
"--evalprompt",
|
136 |
default="llmbar_brief",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
)
|
138 |
|
139 |
parser.add_argument(
|
varco_arena/varco_arena_core/__pycache__/__init__.cpython-311.pyc
DELETED
Binary file (219 Bytes)
|
|
varco_arena/varco_arena_core/__pycache__/custom_input_utils.cpython-311.pyc
DELETED
Binary file (331 Bytes)
|
|
varco_arena/varco_arena_core/__pycache__/data_utils.cpython-311.pyc
DELETED
Binary file (8.07 kB)
|
|
varco_arena/varco_arena_core/__pycache__/elo.cpython-311.pyc
DELETED
Binary file (4.78 kB)
|
|
varco_arena/varco_arena_core/__pycache__/eval_utils.cpython-311.pyc
DELETED
Binary file (7.32 kB)
|
|
varco_arena/varco_arena_core/__pycache__/league.cpython-311.pyc
DELETED
Binary file (4.12 kB)
|
|
varco_arena/varco_arena_core/__pycache__/manager.cpython-311.pyc
DELETED
Binary file (9.54 kB)
|
|
varco_arena/varco_arena_core/__pycache__/match.cpython-311.pyc
DELETED
Binary file (9.29 kB)
|
|
varco_arena/varco_arena_core/__pycache__/tournament.cpython-311.pyc
DELETED
Binary file (7.19 kB)
|
|
varco_arena/varco_arena_core/__pycache__/tracking_utils.cpython-311.pyc
DELETED
Binary file (9.42 kB)
|
|
varco_arena/varco_arena_core/__pycache__/visualization.cpython-311.pyc
DELETED
Binary file (8.91 kB)
|
|
varco_arena/varco_arena_core/eval_utils.py
CHANGED
@@ -138,6 +138,7 @@ async def async_query_openai(
|
|
138 |
|
139 |
increase_match_count() # you're hacky Jumin...
|
140 |
|
|
|
141 |
return normalized_result, resp
|
142 |
|
143 |
|
|
|
138 |
|
139 |
increase_match_count() # you're hacky Jumin...
|
140 |
|
141 |
+
normalized_result["api_call_kwargs"] = kwargs
|
142 |
return normalized_result, resp
|
143 |
|
144 |
|
varco_arena/varco_arena_core/league.py
CHANGED
@@ -59,6 +59,7 @@ class League:
|
|
59 |
"round": "league",
|
60 |
"match_order_in_round": "league",
|
61 |
"tstamp": now_time,
|
|
|
62 |
# "logs": match.match_metainfo_log[0],
|
63 |
},
|
64 |
]
|
|
|
59 |
"round": "league",
|
60 |
"match_order_in_round": "league",
|
61 |
"tstamp": now_time,
|
62 |
+
"api_call_kwargs": match_result[0]["api_call_kwargs"],
|
63 |
# "logs": match.match_metainfo_log[0],
|
64 |
},
|
65 |
]
|
varco_arena/varco_arena_core/match.py
CHANGED
@@ -6,7 +6,6 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|
6 |
|
7 |
from .eval_utils import async_eval_w_prompt
|
8 |
|
9 |
-
|
10 |
class Match:
|
11 |
def __init__(
|
12 |
self,
|
|
|
6 |
|
7 |
from .eval_utils import async_eval_w_prompt
|
8 |
|
|
|
9 |
class Match:
|
10 |
def __init__(
|
11 |
self,
|
varco_arena/varco_arena_core/prompts/__init__.py
CHANGED
@@ -8,12 +8,14 @@ from .llmbar import LLMBarPrompt
|
|
8 |
from .llmbar_brief import LLMBarBriefPrompt
|
9 |
from .rag_pair_kr import RagPairKRPrompt
|
10 |
from .translation_pair import TranslationPairPrompt
|
|
|
11 |
|
12 |
NAME2PROMPT_CLS = dict(
|
13 |
llmbar_brief=LLMBarBriefPrompt(),
|
14 |
llmbar=LLMBarPrompt(),
|
15 |
translation_pair=TranslationPairPrompt(),
|
16 |
rag_pair_kr=RagPairKRPrompt(),
|
|
|
17 |
# contextual_vqa = Contextual_VQA(),
|
18 |
# contextual_ocr = Contextual_OCR(),
|
19 |
)
|
@@ -24,8 +26,8 @@ def load_prompt(
|
|
24 |
"llmbar_brief",
|
25 |
"llmbar",
|
26 |
"translation_pair",
|
|
|
27 |
"rag_pair_kr",
|
28 |
-
# "contextual_pair"
|
29 |
],
|
30 |
task: str = "", # used for further prompt variation (eval prompt might depend on task.)
|
31 |
):
|
|
|
8 |
from .llmbar_brief import LLMBarBriefPrompt
|
9 |
from .rag_pair_kr import RagPairKRPrompt
|
10 |
from .translation_pair import TranslationPairPrompt
|
11 |
+
from .translation_new import TranslationNewPrompt
|
12 |
|
13 |
NAME2PROMPT_CLS = dict(
|
14 |
llmbar_brief=LLMBarBriefPrompt(),
|
15 |
llmbar=LLMBarPrompt(),
|
16 |
translation_pair=TranslationPairPrompt(),
|
17 |
rag_pair_kr=RagPairKRPrompt(),
|
18 |
+
translation_new=TranslationNewPrompt(),
|
19 |
# contextual_vqa = Contextual_VQA(),
|
20 |
# contextual_ocr = Contextual_OCR(),
|
21 |
)
|
|
|
26 |
"llmbar_brief",
|
27 |
"llmbar",
|
28 |
"translation_pair",
|
29 |
+
"translation_new",
|
30 |
"rag_pair_kr",
|
|
|
31 |
],
|
32 |
task: str = "", # used for further prompt variation (eval prompt might depend on task.)
|
33 |
):
|
varco_arena/varco_arena_core/prompts/__pycache__/__init__.cpython-311.pyc
DELETED
Binary file (1.44 kB)
|
|
varco_arena/varco_arena_core/prompts/__pycache__/base_prompt.cpython-311.pyc
DELETED
Binary file (6.07 kB)
|
|
varco_arena/varco_arena_core/prompts/__pycache__/llmbar.cpython-311.pyc
DELETED
Binary file (7.29 kB)
|
|
varco_arena/varco_arena_core/prompts/__pycache__/llmbar_brief.cpython-311.pyc
DELETED
Binary file (1.48 kB)
|
|
varco_arena/varco_arena_core/prompts/__pycache__/naive_ab.cpython-311.pyc
DELETED
Binary file (1.47 kB)
|
|
varco_arena/varco_arena_core/prompts/__pycache__/prompt_utils.cpython-311.pyc
DELETED
Binary file (7.55 kB)
|
|
varco_arena/varco_arena_core/prompts/__pycache__/rag_pair.cpython-311.pyc
DELETED
Binary file (2.28 kB)
|
|
varco_arena/varco_arena_core/prompts/__pycache__/rag_pair_kr.cpython-311.pyc
DELETED
Binary file (2.29 kB)
|
|