eEg. &ddlZddlZddlZddlZddlZddlmZmZmZddlm Z ddl Tddl Z ddl mZejejejejedddlmZddlmZmZmZmZe ejd d Zejd Z d e fd Z! dde"de#de#de$e#de#de%f dZ&dZ'e(dkr e'dSdS)N)datetime timedeltatimezone)Path)*) set_trace..) load_prompt)default_page_settingescape_markdown set_nav_barshow_linebreak_in_mdVARCO_ARENA_RESULT_PATHz ./user_submitreturnct}|r'|st|dn|dt t d}tj|}| d}dt||z z}t|}|dd|stj d dS|D][}||jz }t|d 5} | |dddn #1swxYwY\t%|d } t)| d krtjd dStjdt)| d|S)Nz: file exists and is not a directory. Consider renaming it.T)parents )hoursz%m-%d_%H:%M:%Sz./)rexist_okz?No files to upload. Please drag/drop or browse files to upload.wbz*.jsonlz7You need at least 2 jsonlines files to properly run VA.u✅ Successfully uploaded z jsonl files.)VA_ROOTexistsis_dir ValueErrormkdirrrrnowstrftimestrrstwarningnameopenwrite getbufferlistgloblenerrorsuccessresolve) uploaded_files user_sub_rootKSTtstamptstr files_dir_str files_dirfile file_pathfjslfiless L/home/deftson/nfs-deftson/2024/public_varco_arena/streamlit_app_local/app.py upload_filesr9!sM*##%%  \\\   D))) 91%%% & &C \#  F ??+ , ,D3}t3444M]##I OOD4O000 ' TUUUUU# * *D!DI-Ii&& *!(())) * * * * * * * * * * * * * * * y1122 x==1   HN O O O4 JPCMMPPP Q Q Q$$&& &s(EE E F gpt-4o-mini?price_estimation promptnameexp_nameapi_keyevaluation_modelupdate_intervalc ttjj}t |}|r||z }d|d|d|d|d|d }|r|d}n|dd }t |d}tj|tj tj tj d d d } tj | j d tj} tj} |d} t#j| j gggddr| j } | r| | z } |r| }| |dntj}|| z |kre| d}t+|dkr| }n!ddg|ddz}| |d|} t | tjd| | j }|rx|d}t+|dkr%|ddg|ddzz }n||z }| |dt |n| }||fS)Nz!python ../varco_arena/main.py -i z -o z -k z -p z -e z -j 64z -cpythonz yes | python T)stdoutstderrstdintextbufsizeshellF rbash)languagez ... .. . g? z ... .. . i)r r! session_state upfiles_dirrreplaceprint subprocessPopenPIPESTDOUTos set_blockingrEfilenotimeemptyselectreadlinecodesplitr)joinsleeppollread)r<r=r>r?r@rAptnoutdircommandprocesslast_update_timeterminal_output full_outputoutputto_show current_timelinesremaining_output return_codes r8run_varco_arenarsHs!- . . 0C #YYF#("|#||6||w||T^||dt|||G=/////(O<< 'NNNG o GOGN))++U333y{{hjjO...K! ='.)2r1 5 5a 8 ^,,..F v% # 8)G#((6(BBBB#'9;;L#&66HH + 1 1$ 7 7u::>>&1GG&*ii0@5:0M&N&NG',,Wv,FFF+7(f JsOOO <<>> %&~2244  ((..t44u::??tyy*:);eCDDk)IJJJGG//G$$Wv$>>>&''' C!F,,..K ; c tjddtjd<t}t d|dtjdtjjrtjdntjdtjtjjrtj dntj d tj d d }d tjvrdtj_ tj drt|tj_ tjjrbtjd5tjt!dddddn #1swxYwYnatjd5tjt!dddddn #1swxYwYtjd5tjjrtjdntjdtjdt!ddd}tjdt!ddd}|t*kr t-dt*dt.dtjd }tjtjjrd!nd"5t3|d#$}t5d%d&d'd(d#)}|d*kr d+|d,<d-|d.<|jdPi|}tj d/||D]T}tj d0|d1d0tjt9t;|d2U dddn #1swxYwY|rtjd tjd3nktj d4d5} d5} t?|| | d |6\} } | rtjd7n%tj d8d tj_!dddn #1swxYwYtjd95tjjrtjd:ntjd;tj"d} tj"d?}|#d@dA}|#t*d#t*d#}|#dBd#}|#dCdA}|#dDdA}tjjrtjdEntjdFtjdGtjdHd pdItj$vJ}|rt d |dKtjd tjd3nj| stjdLnSt?|| |d|6\} } | rtjdMn%tj dN| tj_%dddn #1swxYwYt d|dOdS)QNkoreanFapp_init)sidebar_placeholdertoggle_hashstru⚔️ VARCO ARENA ⚔️u**VARCO Arena는 각 모델의 생성된 결과를 비교 평가하여 모델의 성능 순위를 제공하는 시스템입니다. 커스텀 테스트셋에 유용하게 활용할 수 있으며, 모범답안을 필요로 하지 않습니다.**z**VARCO Arena is an LLM benchmarking system that compares model responses across customized test scenarios without requiring reference answers.**u모델 출력파일 업로드z### 1. Upload LLM responsesz&Drag and Drop jsonlines files (.jsonl)T)accept_multiple_filesrRz Upload Filesu*❓❔ 무엇을 업로드 하나요❓❔zguide_mds/input_jsonls_kr.mdUTF8)encodingu#❓❔ What should I upload ❓❔zguide_mds/input_jsonls_en.mdcost_estimation_formu### 2. 가격 산정z### 2. Cost Estimationz Select Judgezeval_models_list.txtrKzSelect Evalutaion Promptzeval_prompt_list.txtzUSR_SUB=zg is preserved name for the system. Consider another naming for the prompt or consider changing VA_ROOT=z (USR_SUB == VA_ROOT.parts[-1]).zCalculate Cost!uKLLM Judge에 활용되는 프롬프트 (`Calculate Cost!` 클릭시 갱신)zR**Evaluation Prompt for LLM Judge (will refresh after `Calculate Cost!` clicked)**-)taskz{inst}z{src}z{out_a}z{out_b})instsrcout_aout_brtranslation_pairz {source_lang} source_langz {target_lang} target_langz### Evaluation Prompt: z**rolecontentuE❌ Requirements: You have to upload jsonlines files first to proceedz##### Estimated Costdummy)r=r?r>r<r@u:❌ RuntimeError: An error occurred during cost estimationu*✅ Cost estimation completed successfullyrun_arena_formu### 3. Varco Arena 구동하기z### 3. Run Varco ArenazEnter your OpenAI API Keypassword)typez(Optional) Enter Exp. namer _/ ~uZ**주의**:`Ctrl+C` 버튼은 구현되지 않았습니다. 구동 전 숙고해주세요.z5**Caution: `Ctrl+C` button hasn't been implemented.**u🔥 Run Arena!cost_estimatedresult_file_path)disabledapp_during_runu0❌ Requirements: OpenAI key required to run VA.u:❌ RuntimeError: An error occurred during Varco Arena runu*✅ Varco Arena run completed successfully app_run_done)&r!rQgetr r titlervr%dividermarkdown file_uploaderrRbuttonr9expanderinfor$reform selectboxraUSR_SUBrrform_submit_buttonr dictcomplete_promptrr r*rsr+r text_inputrSkeysr)rxr- eval_modelr=estimate_buttonpromptkwargs prompt_cmplmsg dummy_api_keydummy_exp_namerrrr?r> run_buttons r8mainrs5 !#!1!5!5h!F!FBX.00 #6zH ()))    B      `   JLLL 3 34444 1222%0N B,,,'+$ y  D'3N'C'C$ R [E F F R R GD7&IIINNPP Q Q Q R R R R R R R R R R R R R R R[> ? ? R R GD7&IIINNPP Q Q Q R R R R R R R R R R R R R R R ' ( (>;>;  " / H+ , , , , H- . . .\  '& 9 9 9 > > @ @ F Ft L L  \ & '& 9 9 9 > > @ @ F Ft L L    g7gg}Dggg /0ABB [& f Y Ye   O O !#666F F///(7}%(7}%0&0::6::K K>*>> ? ? ?" O O 0V000111,_S^-L-LMMNNNN O' O O O O O O O O O O O O O O O.  ;##M22:[ 2333 ' !(0?))+%)%/ 111- +;HYZZZZJKLLL6:B$3}>;>;>;>;>;>;>;>;>;>;>;>;>;>;>;B ! " "5I5I  " / H6 7 7 7 7 H- . . .- ;*MMM=!=>>## #  ## ^^^^  ##C--##C--##C--  " N Hq r r r r HL M M M* *../?GGG'!$$&&'     I $7/     ##M22:[ IKLLLL0?)#%%*%/ 111- +IHYZZZZJKLLL8HB$5k5I5I5I5I5I5I5I5I5I5I5I5I5I5I5Il #6~sp56F77F;>F;6HH H8D3S!+B1P) S!)P- -S!0P- 1B$S!!S%(S%H"\..\25\2__main__)FNNNr:r;))rYr^rUsysr\rrrpathlibrtyping streamlitr!ipdbrpathappendabspathrbdirname__file__varco_arena_core.promptsr view_utilsr r r renvironrrpartsrr9boolr Optionalfloatrsr__name__rrtr8rs  2222222222 RW__X-F-F M MNNOOO000000 $rz~~7II J J - $'D$'$'$'$'P#!) LLLL L c] L  LLLLL^fffR zDFFFFFrt