sonsus commited on
Commit
1409f18
·
1 Parent(s): f6154c5

improved result.json to include api call details, added a new prompt

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +10 -0
  2. .vscode/launch.json +15 -0
  3. __pycache__/analysis_utils.cpython-311.pyc +0 -0
  4. __pycache__/app.cpython-311.pyc +0 -0
  5. __pycache__/query_comp.cpython-311.pyc +0 -0
  6. __pycache__/view_utils.cpython-311.pyc +0 -0
  7. eval_models_list.txt +2 -0
  8. eval_prompt_list.txt +1 -0
  9. modules/__pycache__/nav.cpython-311.pyc +0 -0
  10. pages/__pycache__/see_results.cpython-311.pyc +0 -0
  11. streamlit_app_local/__pycache__/analysis_utils.cpython-311.pyc +0 -0
  12. streamlit_app_local/__pycache__/analysis_utils.cpython-38.pyc +0 -0
  13. streamlit_app_local/__pycache__/app.cpython-311.pyc +0 -0
  14. streamlit_app_local/__pycache__/app.cpython-38.pyc +0 -0
  15. streamlit_app_local/__pycache__/query_comp.cpython-311.pyc +0 -0
  16. streamlit_app_local/__pycache__/view_utils.cpython-311.pyc +0 -0
  17. streamlit_app_local/__pycache__/view_utils.cpython-38.pyc +0 -0
  18. streamlit_app_local/eval_models_list.txt +0 -3
  19. streamlit_app_local/eval_models_list.txt +1 -0
  20. streamlit_app_local/eval_prompt_list.txt +0 -5
  21. streamlit_app_local/eval_prompt_list.txt +1 -0
  22. streamlit_app_local/modules/__pycache__/nav.cpython-311.pyc +0 -0
  23. streamlit_app_local/modules/__pycache__/nav.cpython-38.pyc +0 -0
  24. streamlit_app_local/pages/see_results.py +3 -1
  25. varco_arena/__pycache__/calc_cost.cpython-311.pyc +0 -0
  26. varco_arena/__pycache__/calc_cost.cpython-38.pyc +0 -0
  27. varco_arena/main.py +0 -7
  28. varco_arena/varco_arena_core/__pycache__/__init__.cpython-311.pyc +0 -0
  29. varco_arena/varco_arena_core/__pycache__/custom_input_utils.cpython-311.pyc +0 -0
  30. varco_arena/varco_arena_core/__pycache__/data_utils.cpython-311.pyc +0 -0
  31. varco_arena/varco_arena_core/__pycache__/elo.cpython-311.pyc +0 -0
  32. varco_arena/varco_arena_core/__pycache__/eval_utils.cpython-311.pyc +0 -0
  33. varco_arena/varco_arena_core/__pycache__/league.cpython-311.pyc +0 -0
  34. varco_arena/varco_arena_core/__pycache__/manager.cpython-311.pyc +0 -0
  35. varco_arena/varco_arena_core/__pycache__/match.cpython-311.pyc +0 -0
  36. varco_arena/varco_arena_core/__pycache__/tournament.cpython-311.pyc +0 -0
  37. varco_arena/varco_arena_core/__pycache__/tracking_utils.cpython-311.pyc +0 -0
  38. varco_arena/varco_arena_core/__pycache__/visualization.cpython-311.pyc +0 -0
  39. varco_arena/varco_arena_core/eval_utils.py +1 -0
  40. varco_arena/varco_arena_core/league.py +1 -0
  41. varco_arena/varco_arena_core/match.py +0 -1
  42. varco_arena/varco_arena_core/prompts/__init__.py +3 -1
  43. varco_arena/varco_arena_core/prompts/__pycache__/__init__.cpython-311.pyc +0 -0
  44. varco_arena/varco_arena_core/prompts/__pycache__/base_prompt.cpython-311.pyc +0 -0
  45. varco_arena/varco_arena_core/prompts/__pycache__/llmbar.cpython-311.pyc +0 -0
  46. varco_arena/varco_arena_core/prompts/__pycache__/llmbar_brief.cpython-311.pyc +0 -0
  47. varco_arena/varco_arena_core/prompts/__pycache__/naive_ab.cpython-311.pyc +0 -0
  48. varco_arena/varco_arena_core/prompts/__pycache__/prompt_utils.cpython-311.pyc +0 -0
  49. varco_arena/varco_arena_core/prompts/__pycache__/rag_pair.cpython-311.pyc +0 -0
  50. varco_arena/varco_arena_core/prompts/__pycache__/rag_pair_kr.cpython-311.pyc +0 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore all subdirectories in user_submit
2
+ **/user_submit/*/
3
+ **/__pycache__/
4
+ **/*.pyc
5
+
6
+ # But re-include these four
7
+ !**/user_submit/llm/
8
+ !**/user_submit/rag/
9
+ !**/user_submit/mt/
10
+ !**/user_submit/12-02-14:29:30/
.vscode/launch.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ "name": "Python Debugger: Current File",
9
+ "type": "debugpy",
10
+ "request": "launch",
11
+ "program": "${file}",
12
+ "console": "integratedTerminal"
13
+ }
14
+ ]
15
+ }
__pycache__/analysis_utils.cpython-311.pyc DELETED
Binary file (17.7 kB)
 
__pycache__/app.cpython-311.pyc DELETED
Binary file (22.3 kB)
 
__pycache__/query_comp.cpython-311.pyc DELETED
Binary file (7.99 kB)
 
__pycache__/view_utils.cpython-311.pyc DELETED
Binary file (18.3 kB)
 
eval_models_list.txt CHANGED
@@ -1,3 +1,5 @@
1
  gpt-4o-mini
2
  gpt-4o-2024-05-13
3
  gpt-4o-2024-08-06
 
 
 
1
  gpt-4o-mini
2
  gpt-4o-2024-05-13
3
  gpt-4o-2024-08-06
4
+ gpt-4.1
5
+ gpt-4.1-mini
eval_prompt_list.txt CHANGED
@@ -1,3 +1,4 @@
1
  llmbar
2
  translation_pair
3
  rag_pair_kr
 
 
1
  llmbar
2
  translation_pair
3
  rag_pair_kr
4
+ translation_new
modules/__pycache__/nav.cpython-311.pyc DELETED
Binary file (3.8 kB)
 
pages/__pycache__/see_results.cpython-311.pyc DELETED
Binary file (26.3 kB)
 
streamlit_app_local/__pycache__/analysis_utils.cpython-311.pyc DELETED
Binary file (17.7 kB)
 
streamlit_app_local/__pycache__/analysis_utils.cpython-38.pyc DELETED
Binary file (9.12 kB)
 
streamlit_app_local/__pycache__/app.cpython-311.pyc DELETED
Binary file (15.9 kB)
 
streamlit_app_local/__pycache__/app.cpython-38.pyc DELETED
Binary file (6.32 kB)
 
streamlit_app_local/__pycache__/query_comp.cpython-311.pyc DELETED
Binary file (8 kB)
 
streamlit_app_local/__pycache__/view_utils.cpython-311.pyc DELETED
Binary file (18.3 kB)
 
streamlit_app_local/__pycache__/view_utils.cpython-38.pyc DELETED
Binary file (9.91 kB)
 
streamlit_app_local/eval_models_list.txt DELETED
@@ -1,3 +0,0 @@
1
- gpt-4o-mini
2
- gpt-4o-2024-05-13
3
- gpt-4o-2024-08-06
 
 
 
 
streamlit_app_local/eval_models_list.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ../eval_models_list.txt
streamlit_app_local/eval_prompt_list.txt DELETED
@@ -1,5 +0,0 @@
1
- llmbar
2
- llmbar_brief
3
- translation_pair
4
- rag_pair_kr
5
- contextual (WIP)
 
 
 
 
 
 
streamlit_app_local/eval_prompt_list.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ../eval_prompt_list.txt
streamlit_app_local/modules/__pycache__/nav.cpython-311.pyc DELETED
Binary file (2.85 kB)
 
streamlit_app_local/modules/__pycache__/nav.cpython-38.pyc DELETED
Binary file (889 Bytes)
 
streamlit_app_local/pages/see_results.py CHANGED
@@ -2,6 +2,7 @@ import pandas as pd
2
  import streamlit as st
3
  from varco_arena_core.prompts import load_prompt
4
 
 
5
  import analysis_utils as au
6
  from analysis_utils import number_breakdown_from_df
7
  from app import VA_ROOT
@@ -248,11 +249,12 @@ def main():
248
  out_b="{out_b}",
249
  task=task,
250
  )
251
- if eval_prompt_name == "translation_pair":
252
  kwargs["source_lang"] = "{source_lang}"
253
  kwargs["target_lang"] = "{target_lang}"
254
  prompt_cmpl = prompt.complete_prompt(**kwargs)
255
  for msg in prompt_cmpl:
 
256
  st.markdown(f"**{msg['role']}**")
257
  st.info(show_linebreak_in_md(escape_markdown(msg["content"])))
258
 
 
2
  import streamlit as st
3
  from varco_arena_core.prompts import load_prompt
4
 
5
+ from pprint import pprint
6
  import analysis_utils as au
7
  from analysis_utils import number_breakdown_from_df
8
  from app import VA_ROOT
 
249
  out_b="{out_b}",
250
  task=task,
251
  )
252
+ if eval_prompt_name in ["translation_pair", "translation_new"]:
253
  kwargs["source_lang"] = "{source_lang}"
254
  kwargs["target_lang"] = "{target_lang}"
255
  prompt_cmpl = prompt.complete_prompt(**kwargs)
256
  for msg in prompt_cmpl:
257
+ pprint(msg)
258
  st.markdown(f"**{msg['role']}**")
259
  st.info(show_linebreak_in_md(escape_markdown(msg["content"])))
260
 
varco_arena/__pycache__/calc_cost.cpython-311.pyc DELETED
Binary file (5.11 kB)
 
varco_arena/__pycache__/calc_cost.cpython-38.pyc DELETED
Binary file (2.88 kB)
 
varco_arena/main.py CHANGED
@@ -134,13 +134,6 @@ if __name__ == "__main__":
134
  "-p",
135
  "--evalprompt",
136
  default="llmbar_brief",
137
- choices=[
138
- "llmbar_brief",
139
- "llmbar",
140
- "translation_pair",
141
- "rag_pair_kr",
142
- # "contextual_pair",
143
- ],
144
  )
145
 
146
  parser.add_argument(
 
134
  "-p",
135
  "--evalprompt",
136
  default="llmbar_brief",
 
 
 
 
 
 
 
137
  )
138
 
139
  parser.add_argument(
varco_arena/varco_arena_core/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (219 Bytes)
 
varco_arena/varco_arena_core/__pycache__/custom_input_utils.cpython-311.pyc DELETED
Binary file (331 Bytes)
 
varco_arena/varco_arena_core/__pycache__/data_utils.cpython-311.pyc DELETED
Binary file (8.07 kB)
 
varco_arena/varco_arena_core/__pycache__/elo.cpython-311.pyc DELETED
Binary file (4.78 kB)
 
varco_arena/varco_arena_core/__pycache__/eval_utils.cpython-311.pyc DELETED
Binary file (7.32 kB)
 
varco_arena/varco_arena_core/__pycache__/league.cpython-311.pyc DELETED
Binary file (4.12 kB)
 
varco_arena/varco_arena_core/__pycache__/manager.cpython-311.pyc DELETED
Binary file (9.54 kB)
 
varco_arena/varco_arena_core/__pycache__/match.cpython-311.pyc DELETED
Binary file (9.29 kB)
 
varco_arena/varco_arena_core/__pycache__/tournament.cpython-311.pyc DELETED
Binary file (7.19 kB)
 
varco_arena/varco_arena_core/__pycache__/tracking_utils.cpython-311.pyc DELETED
Binary file (9.42 kB)
 
varco_arena/varco_arena_core/__pycache__/visualization.cpython-311.pyc DELETED
Binary file (8.91 kB)
 
varco_arena/varco_arena_core/eval_utils.py CHANGED
@@ -138,6 +138,7 @@ async def async_query_openai(
138
 
139
  increase_match_count() # you're hacky Jumin...
140
 
 
141
  return normalized_result, resp
142
 
143
 
 
138
 
139
  increase_match_count() # you're hacky Jumin...
140
 
141
+ normalized_result["api_call_kwargs"] = kwargs
142
  return normalized_result, resp
143
 
144
 
varco_arena/varco_arena_core/league.py CHANGED
@@ -59,6 +59,7 @@ class League:
59
  "round": "league",
60
  "match_order_in_round": "league",
61
  "tstamp": now_time,
 
62
  # "logs": match.match_metainfo_log[0],
63
  },
64
  ]
 
59
  "round": "league",
60
  "match_order_in_round": "league",
61
  "tstamp": now_time,
62
+ "api_call_kwargs": match_result[0]["api_call_kwargs"],
63
  # "logs": match.match_metainfo_log[0],
64
  },
65
  ]
varco_arena/varco_arena_core/match.py CHANGED
@@ -6,7 +6,6 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
6
 
7
  from .eval_utils import async_eval_w_prompt
8
 
9
-
10
  class Match:
11
  def __init__(
12
  self,
 
6
 
7
  from .eval_utils import async_eval_w_prompt
8
 
 
9
  class Match:
10
  def __init__(
11
  self,
varco_arena/varco_arena_core/prompts/__init__.py CHANGED
@@ -8,12 +8,14 @@ from .llmbar import LLMBarPrompt
8
  from .llmbar_brief import LLMBarBriefPrompt
9
  from .rag_pair_kr import RagPairKRPrompt
10
  from .translation_pair import TranslationPairPrompt
 
11
 
12
  NAME2PROMPT_CLS = dict(
13
  llmbar_brief=LLMBarBriefPrompt(),
14
  llmbar=LLMBarPrompt(),
15
  translation_pair=TranslationPairPrompt(),
16
  rag_pair_kr=RagPairKRPrompt(),
 
17
  # contextual_vqa = Contextual_VQA(),
18
  # contextual_ocr = Contextual_OCR(),
19
  )
@@ -24,8 +26,8 @@ def load_prompt(
24
  "llmbar_brief",
25
  "llmbar",
26
  "translation_pair",
 
27
  "rag_pair_kr",
28
- # "contextual_pair"
29
  ],
30
  task: str = "", # used for further prompt variation (eval prompt might depend on task.)
31
  ):
 
8
  from .llmbar_brief import LLMBarBriefPrompt
9
  from .rag_pair_kr import RagPairKRPrompt
10
  from .translation_pair import TranslationPairPrompt
11
+ from .translation_new import TranslationNewPrompt
12
 
13
  NAME2PROMPT_CLS = dict(
14
  llmbar_brief=LLMBarBriefPrompt(),
15
  llmbar=LLMBarPrompt(),
16
  translation_pair=TranslationPairPrompt(),
17
  rag_pair_kr=RagPairKRPrompt(),
18
+ translation_new=TranslationNewPrompt(),
19
  # contextual_vqa = Contextual_VQA(),
20
  # contextual_ocr = Contextual_OCR(),
21
  )
 
26
  "llmbar_brief",
27
  "llmbar",
28
  "translation_pair",
29
+ "translation_new",
30
  "rag_pair_kr",
 
31
  ],
32
  task: str = "", # used for further prompt variation (eval prompt might depend on task.)
33
  ):
varco_arena/varco_arena_core/prompts/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (1.44 kB)
 
varco_arena/varco_arena_core/prompts/__pycache__/base_prompt.cpython-311.pyc DELETED
Binary file (6.07 kB)
 
varco_arena/varco_arena_core/prompts/__pycache__/llmbar.cpython-311.pyc DELETED
Binary file (7.29 kB)
 
varco_arena/varco_arena_core/prompts/__pycache__/llmbar_brief.cpython-311.pyc DELETED
Binary file (1.48 kB)
 
varco_arena/varco_arena_core/prompts/__pycache__/naive_ab.cpython-311.pyc DELETED
Binary file (1.47 kB)
 
varco_arena/varco_arena_core/prompts/__pycache__/prompt_utils.cpython-311.pyc DELETED
Binary file (7.55 kB)
 
varco_arena/varco_arena_core/prompts/__pycache__/rag_pair.cpython-311.pyc DELETED
Binary file (2.28 kB)
 
varco_arena/varco_arena_core/prompts/__pycache__/rag_pair_kr.cpython-311.pyc DELETED
Binary file (2.29 kB)