o e @sddlmZddlZddlZddlZddlZddlZddlZddlZ ddl Z ddlm Z ddl m Z ddlmZddlmZddlmZmZmZmZmZddlZddlmZd ejd <dd lmZdd lmZdd l m!Z!ddl"m#Z#m$Z$ze#ddus~JdZ%e#dj&dZ'Wne$e(fydZ%dZ'Ynwddl)m*Z*m+Z+m,Z,ddl m-Z-d#ddZ.GdddeZ/dZ0dZ1dZ2dZ3dZ4dZ5d Z6Gd!d"d"e-Z7dS)$) annotationsN)Future) timedelta)Enum)Path)Callable GeneratorAnyUnionList)version1HF_HUB_DISABLE_TELEMETRY) SpaceStage)build_hf_headers)utils) distributionPackageNotFoundError gradio_clientTz0.7.F)JobDEFAULT_TEMP_DIREndpoint)ClientcCsh|dkr |jj}nz |jj|d}Wntjjy'|r#td|d}Ynw|r2|r0t||SdS)Nr)timeoutz+not enough time to determine job status: %s)future _exception exception concurrentfutures TimeoutErrorprint RuntimeError)jobrraise_exceptionverboseer' /h2ogpt/gradio_utils/grclient.py check_job+s  r)c@seZdZdZdZdZdZdS)LangChainActionzLangChain actionQuery SummarizeExtractN)__name__ __module__ __qualname____doc__QUERY SUMMARIZE_MAPEXTRACTr'r'r'r(r*?s r*z~Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.z]According to only the information in the document sources provided within the context above: zUsing only the information in the document sources above, write a condensed and concise summary of key results (preferably as bullet points).zEIn order to extract information, pay attention to the following text.zBUsing only the information in the document sources above, extract zAnswer this question with vibrant details in order for some NLP embedding model to use that answer as better query than original question: c2seZdZdZdddeddddddf dddZddZddZddZfdd Z d!d"Z d#d$Z d%d&Z dddd'dfd/d0Z dd1d2Zdd3d4Zdd5d6Zdd8d9Zddd:dd;d<Zddd:dd=d>Zddd:dd@dAZddd:ddBdCZddDddddddEddgdFdGdHgdIgdIdDeeeeeeeddddJdKddLdJdMdNdddddEdOdOdPdQdRddddf2dddZddZdddZdddZ Z!S) GradioClientzu Parent class of gradio client To handle automatically refreshing client if detect gradio server changed N(FTsrcstrhf_token str | None max_workersint serializebool output_dirstr | Path | Noner%authtuple[str, str] | None h2ogpt_keypersist check_hashcheck_model_namec Cs|durd}t|g|_t||||||| | | d |_tr%|jt|d||_||_||_d|_ i|_ trCt |t r?t |n||_n||_||_||_||_d|_||_| |_| |_| |_g|_d|_dS)aS Parameters: src: Either the name of the Hugging Face Space to load, (e.g. "abidlabs/whisper-large-v2") or the full URL (including "http" or "https") of the hosted Gradio app to load (e.g. "http://mydomain.com/app" or "https://bec81a83-5b5c-471e.gradio.live/"). hf_token: The Hugging Face token to use to access private Spaces. Automatically fetched if you are logged in via the Hugging Face Hub CLI. Obtain from: https://huggingface.co/settings/token max_workers: The maximum number of thread workers that can be used to make requests to the remote Gradio app simultaneously. serialize: Whether the client should serialize the inputs and deserialize the outputs of the remote API. If set to False, the client will pass the inputs and outputs as-is, without serializing/deserializing them. E.g. you if you set this to False, you'd submit an image in base64 format instead of a filepath, and you'd get back an image in base64 format from the remote API instead of a filepath. output_dir: The directory to save files that are downloaded from the remote API. If None, reads from the GRADIO_TEMP_DIR environment variable. Defaults to a temporary directory on your machine. verbose: Whether the client should print statements to the console. h2ogpt_key: h2oGPT key to gain access to the server persist: whether to persist the state, so repeated calls are aware of the prior user session This allows the scratch MyData to be reused, etc. This also maintains the chat_conversation history check_hash: whether to check git hash for consistency between server and client to ensure API always up to date check_model_name: whether to check the model name here (adds delays), or just let server fail (fater) NF) r:r<r>r@r%rDrErFrG)rB)tupleargsdictkwargsis_gradio_client_version7updater%r:r>space_idcookies isinstancerr9r@r<r8rBconfigrDrErFrGchat_conversation server_hash) selfr8r:r<r>r@r%rBrDrErFrGr'r'r(__init__ZsF   zGradioClient.__init__cC|jr |jdddSd|jSNFr9) print_info return_formatzNot setup for %srQview_apir8rTr'r'r(__repr__ zGradioClient.__repr__cCrVrWrZr\r'r'r(__str__r^zGradioClient.__str__csj}tjdtjd_|ds|dr#|dr|n|d}n|}|dur4t d|d|_ |_ }|t j kr^jrJtd t j kr^td   t j ksQ|tjvrkt d |d jrwtd jd trjdurjtjjtj_trtjjtj_tjjtj_tjjdddtj _!tjjtj"_#tjjtj$_%&_'trj'(dd_)t*j'(dd_+,_-t.t/0_1trddl2m3}j)drt4n|nt4trfddt5j'dD_6nfddt5j'dD_6t7j8j9j:d_;<_=trKd_>d_?ddl@mA}i_BtC_DS)Nr)token library_namelibrary_versionzhttp://zhttps:///zCould not find Space: z7. If it is a private Space, please provide an hf_token.z'Space is still building. Please wait...z+The current space is in the invalid state: z'. Please contact the owner to fix this.zLoaded as API: u ✔httpwsprotocolr z2.0r)EndpointV3Compatibilityssecs g|] \}}||jqSr')rh.0fn_index dependencyendpoint_classrTr'r( sz&GradioClient.setup.. dependenciescsg|] \}}||qSr'r'rkror'r(rq r<F)Message)Er8rr:r __version__headers startswithendswith_space_name_to_src ValueErrorrN_get_space_staterBUILDINGr%r!timesleepINVALID_RUNTIMErLrB_loginurllibparseurljoinAPI_URLapi_urlSSE_URLsse_url SSE_DATA_URL sse_data_urlreplaceWS_URLws_url UPLOAD_URL upload_url RESET_URL reset_url _get_configrQgetrhr app_version _get_api_info_infor9uuiduuid4 session_hashgradio_client.clientrir enumerate endpointsrrThreadPoolExecutorr<executorget_server_hashrS stream_openstreaming_futuregradio_client.utilsrupending_messages_per_eventsetpending_event_ids)rTr8_srcstaterirur'ror(setups                 zGradioClient.setupcs0|jdur | |jrtjddSdS)Nz /system_hashapi_name GET_GITHASH)rQrrFsupersubmitresultr\ __class__r'r(rs zGradioClient.get_server_hashcCsx|jdur ||}|j|kr:|jr td|j|fdd|jdur1|jr1|jr1tddd|||_dSdS)Nzserver hash changed: %s %sTflushz^Failed to persist due to server hash change, only kept chat_conversation not user session hash)rQrrrSr%r!rErefresh_client)rTrSr'r'r(refresh_client_if_shoulds    z%GradioClient.refresh_client_if_shouldc CsL|jdur ||j}|dd|dd|dd|ddd}d}td|dD]<}z t|ji|}Wq1tym}z$||krJ|j rZt d ||dt |f|d7}t d WYd}~q1d}~ww|durvtd |jr||jnd}|jD] \}}t|||q|r||_|j rt d |j|f||_dS) z Ensure every client call is independent Also ensure map between api_name and fn_index is updated in case server changed (e.g. restarted with new code) Returns: NrDrErFrGrrgzTrying refresh %d/%d %s zFailed to get new clientzHit refresh_client(): %s %s)rQrrKcopypoprangerrIr{r%r!r9r~rr"rEr__dict__itemssetattrrrS) rTrKntrialsclienttrialr& session_hash0kvr'r'r(r$s>      zGradioClient.refresh_clientcs|jdur |td|jD] \}}t||qtjj |j d_ fddt jdD_ |j_|j_S)Nr5rtcsg|] \}}t||qSr')rrkrr'r(rqVrsz&GradioClient.clone..rr)rQrr6rrr reset_sessionrrrr<rrrrSrR)rTrrr'rr(cloneLs   zGradioClient.clone)rrmresult_callbacksrrm int | Noner Callable | list[Callable] | Nonereturnrc s(|jdur |z|tj|||d}Wn.tyG}z"tdt|t fdd| tj|||d}WYd}~nd}~wwt |ddd}|durtdt|d t |jfdd| tj|||d}t |d dd}|durtd t|d t |jfdd|S) N)rrmz Hit e=%s %sTrg{Gz?Frr$zGR job failed: %s %sr5皙?zGR job failed again: %s %s)rQrrrr Exceptionr!r9 traceback format_excrr)join format_tb __traceback__)rTrrmrrIr#r&e2rr'r(r_s<  zGradioClient.submitcOsJ|d||d<tjj|d<d|d<d}|j|i|D]\}}|}q|S)r Prompt LLM (direct to LLM with instruct prompting required for instruct models) and get response instructionlangchain_actionLLMlangchain_moder5rr*r2valuequery_or_summarize_or_extract)rTrrIrKretresponse texts_outr'r'r(questions zGradioClient.questioncos@|d||d<tjj|d<d|d<|j|i|EdH}|S)rrrrrNr)rTrrIrKrr'r'r(question_streams  zGradioClient.question_streamcOsB|d||d<tjj|d<d}|j|i|D]\}}|}q|S)h Search for documents matching a query, then ask that query to LLM with those documents rrr5r)rTqueryrIrKrrrr'r'r(rs  zGradioClient.query8Generator[tuple[str | list[str], list[str]], None, None]cos8|d||d<tjj|d<|j|i|EdH}|S)rrrNr)rTrrIrKrr'r'r( query_streams  zGradioClient.query_stream)rfocuscOV|d|pt|d<|d||d<tjj|d<d}|j|i|D]\}}|}q"|S) Search for documents matching a focus, then ask a query to LLM with those documents If focus "" or None, no similarity search is done and all documents (up to top_k_docs) are used prompt_summaryrrr5rprompt_summary0r*r3rrrTrrrIrKrrrr'r'r( summarize zGradioClient.summarizecoL|d|pt|d<|d||d<tjj|d<|j|i|EdH}|S)rrrrNrrTrrrIrKrr'r'r(summarize_stream  zGradioClient.summarize_stream list[str]cOr)rprompt_extractionrrr5rprompt_extraction0r*r4rrrr'r'r(extractrzGradioClient.extractcor)rrrrNrrr'r'r(extract_streamrzGradioClient.extract_streamr5irAllRelevantandrg?gQ?ihisplit_or_mergez rrtextlist[str] | str | Nonefileurlembedchunk chunk_sizerrlangchain_agents List[str] top_k_docsdocument_choiceUnion[str, List[str]]document_subsetdocument_source_substringsdocument_source_substrings_opdocument_content_substringsdocument_content_substrings_op system_promptpre_prompt_query prompt_querypre_prompt_summaryrpre_prompt_extractionrhyde_llm_promptmodelstr | int | None stream_output do_sample temperaturefloattop_ptop_krepetition_penalty penalty_alphamax_timemax_new_tokensadd_search_to_contextrRlist[tuple[str, str]] | Nonetext_context_listlist[str] | Nonedocs_ordering_typemin_max_new_tokensmax_input_tokensmax_total_input_tokensdocs_token_handling docs_joiner hyde_level hyde_templatehyde_show_only_final doc_json_modeassertsc3H #s|jdur ||jr|}3n|}3|p|j}||3_||| p$d} tgd}4t| |||g}5|2tt ddO}2rNt t rN|sN|sN|'sN}'dg}6rt }7|3j g|5|4|Rddi}6t }8tdtt|8|7d d d |2r|6d dusJ|6d | ksJd|6dvsJ|6ddksJ|r|3j |dd\}9}|3j |g|5|4|Rddi}6|2r|6d dusJ|6d | ksJtj||6dvsJ|6ddksJ|r|3j |g|5|4|Rddi}6|2r|6d dusJ|6d | ksJ||6dvs J|6ddksJ|6dsJ|6r5|6ds5d|6dvr5td|6dd d d}:| tjjkr@|n|}| tjjkrK|n|}tdUid|d|d| d| d| d| d | d!|d"|d#|d$|d%|d&|d'|d(|d)|d*|d+|d,|d-|d.|d/|d0|d1| d2|!d3|"d4|#d5|$d6|%d7|&r|&n|jd8|'d9|(d:|)d;|*d<|+d=|,d>|-d?|.d@|/dA|0dB|1};|3j|_|j|dfd}| tjjkr|>}>n dDdEt|>D}>|6dF}?dGdE|?D}@dHdE|?D|2rYrN|sN|sNtfdIdJtt DsNJt t |@ksYJ|>fV||>f|jdK<n|3j!tt|;|:d}Ad}Bd}>g|A"s|Aj#j$j%j&j'dLkrnOt(|Ad ddM}C|CdurnB|Aj#j$j)}D|Dr|Aj#j$j)dK}6t|6}E|EdC}>|>t |Bd}F|Fst *dNqw|>}B|FsJdO|FfVt *dP|A"r||A)}Gt |Gd krt(|AdPd dM|GdK}6t|6}E|EdC}>|EdF}?dQdE|?D|>t |BdfV||>t |Bdf|jdK<nt(|AdRd dM|>t |BdfV||>t |Bdf|jdK<WW|3j|_dSt+y}}Cz0tdSt|Cd,t-.|Cj/fd d |=|=0 means use that to limit context filling to that many tokens max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action max_new_tokens: Maximum new tokens min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query or top_k_docs original document chunks summarization None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens docs_joiner: string to join lists of text when doing split_or_merge. None means ' ' hyde_level: 0-3 for HYDE. 0 uses just query to find similarity with docs 1 uses query + pure LLM response to find similarity with docs 2: uses query + LLM response using docs to find similarity with docs 3+: etc. hyde_template: see src/gen.py hyde_show_only_final: see src/gen.py doc_json_mode: see src/gen.py asserts: whether to do asserts to ensure handling is correct Returns: summary/answer: str or extraction List[str] NMyData)NNNNNN HARD_ASSERTSFrz /add_textzupload text: %s)secondsTrrrg user_pasterdrr5z /upload_apirz /add_file_apiz/add_urlrz Exception: %sz/submit_nochat_apirDrrrrrrrr r r r r rrrrrvisible_modelsrrrrrrrrrrrRr!r#r$r%r&r'r(r)r*r+r,rcSsg|]}|qSr')strip)rlrr'r'r(rqz>GradioClient.query_or_summarize_or_extract..sourcescSg|]}|dqS)scorer'rlxr'r'r(rqr6cSr8contentr'r:r'r'r(rqr6c3s |] }d|kVqdS)Nr')rlcutoffrrr'r( !s z=GradioClient.query_or_summarize_or_extract..rFINISHEDrgMbP?zmust yield non-empty stringrcSr8r<r'r:r'r'r(rqMr6g?zh2oGPT predict failed: %s %sztrying again: %sr')0rQrrErrD check_modelrHr?osgetenvrPlistr~predictr!r9rpathbasenamer*r3rrJrRrSappendrast literal_evalr4r4anylenrdone communicatorr# latest_statuscodenamer)outputsrrrrrr)HrTrDrrrrrrrrrrrrrr r r r r rrrrrrrrrrrrrrrrrrrRr!r#r$r%r&r'r(r)r*r+r,r-rloaders doc_optionsrest0t1_rrKtrialsrrr7 scores_outr#text0r& outputs_listres_dict text_chunkres_allr'r?r(rs  '         "#$%&'()*+,-.2              z*GradioClient.query_or_summarize_or_extractc Cs|dkrO|jrQ|}t|tr|t|kst|trS||vrUd}t|tr9t||d}|r9dt|dd}t dt|dt|dd|d |dSdSdSdS) Nrr5rgz Did you mean ?z Invalid llm: z*, must be either an integer between 0 and z! or one of the following values: .) rG list_modelsrPr=rMr9difflibget_close_matchesreprr")rTr valid_llms did_you_meanaltr'r'r(rBes2    zGradioClient.check_modellist[dict[str, Any]]cCs$|jdur |t|jddS)z1 Full model info in list if dict N /model_namesrrQrrJrKrFr\r'r'r(get_models_fullxs zGradioClient.get_models_fullcCs.|jdur |ddt|jddDS)z5 Model names available from endpoint NcSr8) base_modelr'r:r'r'r(rqr6z,GradioClient.list_models..rkrrlr\r'r'r(rcs zGradioClient.list_models)r8r9r:r;r<r=r>r?r@rAr%r?rBrCrDr9rEr?rFr?rGr?)rr;rmrrrrr)rr9)rr)rr)frDr9rr9rrrrrrrr?rr?rr=rr9rr;rrrr=rrrr9r rr r9r rr r9r r;rr;rr;rr;rr;rr;rr;rr;rrrr?rr?rrrrrr=rrrrrr=rr=rr?rRr r!r"r#r;r$r=r%r=r&r=r'r9r(r9r)r=r*r9r+r?r,r?r-r?rr)rrj)"r.r/r0r1rrUr]r_rrrrrrrrrrrrrrpre_prompt_query0 prompt_query0pre_prompt_summary0rpre_prompt_extraction0rhyde_llm_prompt0rrBrmrc __classcell__r'r'rr(r6Ts H \ ( (        r6)rTF)8 __future__rrdrconcurrent.futuresrrCr~ urllib.parserrrdatetimerenumrpathlibrtypingrrr r r rJ packagingr environhuggingface_hubrhuggingface_hub.utilsrrrimportlib.metadatarrhave_gradio_clientrxrLAssertionErrorrrrrrr)r*rorprqrrrrrsr6r'r'r'r(sR