o
    ž‚©eè¡  ã                	   @  sœ  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZ d dlZd dlmZ d	ejd
< d dlmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$ ze#dƒdus~J ‚dZ%e#dƒj &d¡Z'W n e$e(fy˜   dZ%dZ'Y nw d dl)m*Z*m+Z+m,Z, d dl m-Z- d#dd„Z.G dd„ deƒZ/dZ0dZ1dZ2dZ3dZ4dZ5d Z6G d!d"„ d"e-ƒZ7dS )$é    )ÚannotationsN)ÚFuture)Ú	timedelta)ÚEnum)ÚPath)ÚCallableÚ	GeneratorÚAnyÚUnionÚList)ÚversionÚ1ÚHF_HUB_DISABLE_TELEMETRY)Ú
SpaceStage)Úbuild_hf_headers)Úutils)ÚdistributionÚPackageNotFoundErrorÚgradio_clientTz0.7.F)ÚJobÚDEFAULT_TEMP_DIRÚEndpoint)ÚClientç        c                 C  sh   |dkr	| j j}nz	| j j|d}W n tjjy'   |r#td| ƒ d }Y nw |r2|r0t|ƒ‚|S d S )Nr   )Útimeoutz+not enough time to determine job status: %s)ÚfutureÚ
_exceptionÚ	exceptionÚ
concurrentÚfuturesÚTimeoutErrorÚprintÚRuntimeError)Újobr   Úraise_exceptionÚverboseÚe© r'   ú /h2ogpt/gradio_utils/grclient.pyÚ	check_job+   s   
üûr)   c                   @  s   e Zd ZdZdZdZdZdS )ÚLangChainActionzLangChain actionÚQueryÚ	SummarizeÚExtractN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚQUERYÚSUMMARIZE_MAPÚEXTRACTr'   r'   r'   r(   r*   ?   s
    r*   z~Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.z]According to only the information in the document sources provided within the context above: Ú zUsing only the information in the document sources above, write a condensed and concise summary of key results (preferably as bullet points).zEIn order to extract information, pay attention to the following text.zBUsing only the information in the document sources above, extract z‹Answer this question with vibrant details in order for some NLP embedding model to use that answer as better query than original question: c                2      s„  e Zd ZdZdddeddddddf
d”dd„Zdd„ Zdd„ Zdd„ Z‡ fdd „Z	d!d"„ Z
d#d$„ Zd%d&„ Zdddd'œd•‡ fd/d0„Zd–d1d2„Zd–d3d4„Zd–d5d6„Zd—d8d9„Zddd:œd–d;d<„Zddd:œd–d=d>„Zddd:œd˜d@dA„Zddd:œd˜dBdC„ZddDddddddEddg dFdGdHg dIg dIdDeeeeeeeddddJdKddLdJdMdNdddddEdOdOdPdQdRddddf2d™d‹dŒ„ZddŽ„ Zdšdd‘„Zd˜d’d“„Z ‡  Z!S )›ÚGradioClientzu
    Parent class of gradio client
    To handle automatically refreshing client if detect gradio server changed
    Né(   FTÚsrcÚstrÚhf_tokenú
str | NoneÚmax_workersÚintÚ	serializeÚboolÚ
output_dirústr | Path | Noner%   Úauthútuple[str, str] | NoneÚ
h2ogpt_keyÚpersistÚ
check_hashÚcheck_model_namec                 C  sÌ   |du rd}t |gƒ| _t|||||||	|
|d	| _tr%| j t|d¡ || _|| _|| _d| _	i | _
trCt|tƒr?t|ƒn|| _n|| _|| _|| _|| _d| _|| _|	| _|
| _|| _g | _d| _dS )aS  
        Parameters:
            src: Either the name of the Hugging Face Space to load, (e.g. "abidlabs/whisper-large-v2") or the full URL (including "http" or "https") of the hosted Gradio app to load (e.g. "http://mydomain.com/app" or "https://bec81a83-5b5c-471e.gradio.live/").
            hf_token: The Hugging Face token to use to access private Spaces. Automatically fetched if you are logged in via the Hugging Face Hub CLI. Obtain from: https://huggingface.co/settings/token
            max_workers: The maximum number of thread workers that can be used to make requests to the remote Gradio app simultaneously.
            serialize: Whether the client should serialize the inputs and deserialize the outputs of the remote API. If set to False, the client will pass the inputs and outputs as-is, without serializing/deserializing them. E.g. you if you set this to False, you'd submit an image in base64 format instead of a filepath, and you'd get back an image in base64 format from the remote API instead of a filepath.
            output_dir: The directory to save files that are downloaded from the remote API. If None, reads from the GRADIO_TEMP_DIR environment variable. Defaults to a temporary directory on your machine.
            verbose: Whether the client should print statements to the console.

            h2ogpt_key: h2oGPT key to gain access to the server
            persist: whether to persist the state, so repeated calls are aware of the prior user session
                     This allows the scratch MyData to be reused, etc.
                     This also maintains the chat_conversation history
            check_hash: whether to check git hash for consistency between server and client to ensure API always up to date
            check_model_name: whether to check the model name here (adds delays), or just let server fail (fater)
        NF)	r:   r<   r>   r@   r%   rD   rE   rF   rG   )rB   )ÚtupleÚargsÚdictÚkwargsÚis_gradio_client_version7Úupdater%   r:   r>   Úspace_idÚcookiesÚ
isinstancer   r9   r@   r<   r8   rB   ÚconfigrD   rE   rF   rG   Úchat_conversationÚserver_hash)Úselfr8   r:   r<   r>   r@   r%   rB   rD   rE   rF   rG   r'   r'   r(   Ú__init__Z   sF   ÷ÿ
zGradioClient.__init__c                 C  ó   | j r
| jdddS d| j S ©NFr9   )Ú
print_infoÚreturn_formatzNot setup for %s©rQ   Úview_apir8   ©rT   r'   r'   r(   Ú__repr__¢   ó   
zGradioClient.__repr__c                 C  rV   rW   rZ   r\   r'   r'   r(   Ú__str__§   r^   zGradioClient.__str__c                   sš  ˆj }tˆjdtjdˆ_| d¡s| d¡r#| d¡r|n|d }nˆ |¡}|d u r4t	d|› dƒ‚|ˆ_
|ˆ_ ˆ ¡ }|tjkr^ˆjrJtdƒ ˆ ¡ tjkr^t d	¡ 	 ˆ ¡ tjksQ|tjv rkt	d
|› dƒ‚ˆjrwtdˆj › dƒ tr„ˆjd ur„ˆ ˆj¡ tj ˆj tj¡ˆ_tr¤tj ˆj tj¡ˆ_tj ˆj tj¡ˆ_tj ˆj  ddd¡tj ¡ˆ_!tj ˆj tj"¡ˆ_#tj ˆj tj$¡ˆ_%ˆ &¡ ˆ_'træˆj' (dd¡ˆ_)t* ˆj' (dd¡¡ˆ_+ˆ ,¡ ˆ_-t.t/ 0¡ ƒˆ_1trddl2m3} ˆj) d¡rþt4n|‰ nt4‰ tr‡ ‡fdd„t5ˆj'd ƒD ƒˆ_6n‡ ‡fdd„t5ˆj'd ƒD ƒˆ_6t7j8j9ˆj:dˆ_;ˆ <¡ ˆ_=trKdˆ_>d ˆ_?ddl@mA} i ˆ_BtCƒ ˆ_DˆS )Nr   )ÚtokenÚlibrary_nameÚlibrary_versionzhttp://zhttps://ú/zCould not find Space: z7. If it is a private Space, please provide an hf_token.z'Space is still building. Please wait...é   z+The current space is in the invalid state: z'. Please contact the owner to fix this.zLoaded as API: u    âœ”ÚhttpÚwsé   Úprotocolr   z2.0r   )ÚEndpointV3CompatibilityÚssec                   s    g | ]\}}ˆ ˆ||ˆj ƒ‘qS r'   )rh   ©Ú.0Úfn_indexÚ
dependency©Úendpoint_classrT   r'   r(   Ú
<listcomp>ë   s    ÿÿz&GradioClient.setup.<locals>.<listcomp>Údependenciesc                   s   g | ]
\}}ˆ ˆ||ƒ‘qS r'   r'   rk   ro   r'   r(   rq   ð   ó    
ÿÿ©r<   F)ÚMessage)Er8   r   r:   r   Ú__version__ÚheadersÚ
startswithÚendswithÚ_space_name_to_srcÚ
ValueErrorrN   Ú_get_space_stater   ÚBUILDINGr%   r!   ÚtimeÚsleepÚINVALID_RUNTIMErL   rB   Ú_loginÚurllibÚparseÚurljoinÚAPI_URLÚapi_urlÚSSE_URLÚsse_urlÚSSE_DATA_URLÚsse_data_urlÚreplaceÚWS_URLÚws_urlÚ
UPLOAD_URLÚ
upload_urlÚ	RESET_URLÚ	reset_urlÚ_get_configrQ   Úgetrh   r   Úapp_versionÚ_get_api_infoÚ_infor9   ÚuuidÚuuid4Úsession_hashÚgradio_client.clientri   r   Ú	enumerateÚ	endpointsr   r   ÚThreadPoolExecutorr<   ÚexecutorÚget_server_hashrS   Ústream_openÚstreaming_futureÚgradio_client.utilsru   Úpending_messages_per_eventÚsetÚpending_event_ids)rT   r8   Ú_srcÚstateri   ru   r'   ro   r(   Úsetup¬   sŒ   ý

ÿ

þ

ÿ
ÿ

ÿ
þþÿ
zGradioClient.setupc                   s0   | j d u r	|  ¡  	 | jrtƒ jdd ¡ S dS )Nz/system_hash©Úapi_nameÚGET_GITHASH)rQ   r¨   rF   ÚsuperÚsubmitÚresultr\   ©Ú	__class__r'   r(   rŸ     s   
zGradioClient.get_server_hashc                 C  sx   | j d u r	|  ¡  |  ¡ }| j|kr:| jr td| j|f dd | jd ur1| jr1| jr1tddd |  ¡  || _d S d S )Nzserver hash changed: %s %sT©Úflushz^Failed to persist due to server hash change, only kept chat_conversation not user session hash)rQ   r¨   rŸ   rS   r%   r!   rE   Úrefresh_client)rT   rS   r'   r'   r(   Úrefresh_client_if_should  s   


øz%GradioClient.refresh_client_if_shouldc           	      C  sL  | j du r	|  ¡  | j ¡ }| dd¡ | dd¡ | dd¡ | dd¡ d}d}td|d ƒD ]<}z
t| ji |¤Ž}W q1 tym } z$||krJ‚ | j	rZt
d	||d t|ƒf ƒ |d7 }t d
¡ W Y d}~q1d}~ww |du rvtdƒ‚| jr|| jnd}|j ¡ D ]
\}}t| ||ƒ qƒ|r“|| _| j	rŸt
d| j|f ƒ |  ¡ | _dS )zÁ
        Ensure every client call is independent
        Also ensure map between api_name and fn_index is updated in case server changed (e.g. restarted with new code)
        Returns:
        NrD   rE   rF   rG   é   r   rg   zTrying refresh %d/%d %sé
   zFailed to get new clientzHit refresh_client(): %s %s)rQ   r¨   rK   ÚcopyÚpopÚranger   rI   r{   r%   r!   r9   r~   r   r"   rE   r™   Ú__dict__ÚitemsÚsetattrrŸ   rS   )	rT   rK   ÚntrialsÚclientÚtrialr&   Úsession_hash0ÚkÚvr'   r'   r(   r³   $  s>   

€ùzGradioClient.refresh_clientc                   s†   | j d u r	|  ¡  tdƒ‰ | j ¡ D ]
\}}tˆ ||ƒ qˆ  ¡  tjj	| j
dˆ _‡ fdd„tˆ j d ƒD ƒˆ _| jˆ _| jˆ _ˆ S )Nr5   rt   c                   s   g | ]
\}}t ˆ ||ƒ‘qS r'   )r   rk   ©r¾   r'   r(   rq   V  rs   z&GradioClient.clone.<locals>.<listcomp>rr   )rQ   r¨   r6   rº   r»   r¼   Úreset_sessionr   r   r   r<   rž   r›   rœ   rS   rR   )rT   rÁ   rÂ   r'   rÃ   r(   ÚcloneL  s   
ÿ
þzGradioClient.clone)rª   rm   Úresult_callbacksrª   rm   ú
int | NonerÆ   ú Callable | list[Callable] | NoneÚreturnr   c             
     s(  | j d u r	|  ¡  z|  ¡  tƒ j|||dœŽ}W n. tyG } z"tdt|ƒt 	¡ f dd |  
¡  tƒ j|||dœŽ}W Y d }~nd }~ww t|ddd}|d ur’tdt|ƒd	 t |j¡¡f dd |  
¡  tƒ j|||dœŽ}t|d
dd}|d ur’tdt|ƒd	 t |j¡¡f dd |S )N)rª   rm   zHit e=%s

%sTr±   g{®Gáz„?F©r   r$   zGR job failed: %s %sr5   çš™™™™™¹?zGR job failed again: %s
%s)rQ   r¨   r´   r¬   r­   Ú	Exceptionr!   r9   Ú	tracebackÚ
format_excr³   r)   ÚjoinÚ	format_tbÚ__traceback__)rT   rª   rm   rÆ   rI   r#   r&   Úe2r¯   r'   r(   r­   _  s<   
 €üÿýÿýzGradioClient.submitc                 O  sJ   |  d|¡|d< tjj|d< d|d< d}| j|i |¤ŽD ]\}}|}q|S )úr
        Prompt LLM (direct to LLM with instruct prompting required for instruct models) and get response
        ÚinstructionÚlangchain_actionÚLLMÚlangchain_moder5   ©r“   r*   r2   ÚvalueÚquery_or_summarize_or_extract)rT   rÔ   rI   rK   ÚretÚresponseÚ	texts_outr'   r'   r(   Úquestion‡  s   zGradioClient.questionc                 o  s@    |  d|¡|d< tjj|d< d|d< | j|i |¤ŽE dH }|S )rÓ   rÔ   rÕ   rÖ   r×   NrØ   )rT   rÔ   rI   rK   rÛ   r'   r'   r(   Úquestion_stream“  s   €zGradioClient.question_streamc                 O  sB   |  d|¡|d< tjj|d< d}| j|i |¤ŽD ]\}}|}q|S )úh
        Search for documents matching a query, then ask that query to LLM with those documents
        rÔ   rÕ   r5   rØ   )rT   ÚqueryrI   rK   rÛ   rÜ   rÝ   r'   r'   r(   rá     s   zGradioClient.queryú8Generator[tuple[str | list[str], list[str]], None, None]c                 o  s8    |  d|¡|d< tjj|d< | j|i |¤ŽE dH }|S )rà   rÔ   rÕ   NrØ   )rT   rá   rI   rK   rÛ   r'   r'   r(   Úquery_stream¨  s
   €zGradioClient.query_stream)rá   Úfocusc                O  óV   |  d|pt¡|d< |  d|¡|d< tjj|d< d}| j|i |¤ŽD ]\}}|}q"|S )úÍ
        Search for documents matching a focus, then ask a query to LLM with those documents
        If focus "" or None, no similarity search is done and all documents (up to top_k_docs) are used
        Úprompt_summaryrÔ   rÕ   r5   ©r“   Úprompt_summary0r*   r3   rÙ   rÚ   ©rT   rá   rä   rI   rK   rÛ   rÜ   rÝ   r'   r'   r(   Ú	summarize±  ó   zGradioClient.summarizec                o  óL    |  d|pt¡|d< |  d|¡|d< tjj|d< | j|i |¤ŽE dH }|S )ræ   rç   rÔ   rÕ   Nrè   ©rT   rá   rä   rI   rK   rÛ   r'   r'   r(   Úsummarize_stream¾  ó   €zGradioClient.summarize_streamú	list[str]c                O  rå   )ræ   Úprompt_extractionrÔ   rÕ   r5   ©r“   Úprompt_extraction0r*   r4   rÙ   rÚ   rê   r'   r'   r(   ÚextractÉ  rì   zGradioClient.extractc                o  rí   )ræ   rò   rÔ   rÕ   Nró   rî   r'   r'   r(   Úextract_streamÖ  rð   zGradioClient.extract_streamr5   i   r¶   ÚAllÚRelevantÚandr   g      è?g…ëQ¸ñ?ih  i   éÿÿÿÿÚsplit_or_mergez

r   rÔ   Útextúlist[str] | str | NoneÚfileÚurlÚembedÚchunkÚ
chunk_sizer×   rÕ   Úlangchain_agentsú	List[str]Ú
top_k_docsÚdocument_choiceúUnion[str, List[str]]Údocument_subsetÚdocument_source_substringsÚdocument_source_substrings_opÚdocument_content_substringsÚdocument_content_substrings_opÚsystem_promptÚpre_prompt_queryÚprompt_queryÚpre_prompt_summaryrç   Úpre_prompt_extractionrò   Úhyde_llm_promptÚmodelústr | int | NoneÚstream_outputÚ	do_sampleÚtemperatureÚfloatÚtop_pÚtop_kÚrepetition_penaltyÚpenalty_alphaÚmax_timeÚmax_new_tokensÚadd_search_to_contextrR   úlist[tuple[str, str]] | NoneÚtext_context_listúlist[str] | NoneÚdocs_ordering_typeÚmin_max_new_tokensÚmax_input_tokensÚmax_total_input_tokensÚdocs_token_handlingÚdocs_joinerÚ
hyde_levelÚhyde_templateÚhyde_show_only_finalÚdoc_json_modeÚassertsc3           H      #  s   | j du r
|  ¡  | jr| }3n|  ¡ }3|p| j}||3_|  |¡ |	p$d}	tg d¢ƒ}4t|	|||gƒ}5|2tt 	dd¡ƒO }2ˆ rNt
ˆ tƒrN|sN|sN|'sNˆ }'d‰ g }6ˆ r›t ¡ }7|3jˆ g|5¢|4¢|‘R ddiŽ}6t ¡ }8tdtt|8|7 d	ƒ d
d |2r›|6d du sƒJ ‚|6d |	ks‹J ‚d|6d v s“J ‚|6d dks›J ‚|rÜ|3j|dd\}9}|3j|g|5¢|4¢|‘R ddiŽ}6|2rÜ|6d du sÀJ ‚|6d |	ksÈJ ‚tj |¡|6d v sÔJ ‚|6d dksÜJ ‚|r|3j|g|5¢|4¢|‘R ddiŽ}6|2r|6d du súJ ‚|6d |	ksJ ‚||6d v sJ ‚|6d dksJ ‚|6d sJ ‚|6r5|6d s5d|6d v r5td|6d  d
d d}:|
tjjkr@|n|}|
tjjkrK|n|}tdUi d|“d|“d|	“d|
“d|“d|“d |“d!|“d"|“d#|“d$|“d%|“d&|“d'|“d(|“d)|“d*|“d+|“d,|“d-|“d.|“d/|“d0|“d1| “d2|!“d3|"“d4|#“d5|$“d6|%“d7|&r­|&n| j“d8|'“d9|(“d:|)“d;|*“d<|+“d=|,“d>|-“d?|.“d@|/“dA|0“dB|1“Ž};|3j| _| j |df¡ d}<t|<ƒD ]£}=z›zU|sf|3jtt|;ƒƒ|:d}6|3j| _t |6¡}6|6dC }>|
tjjkr|> ¡ }>n
dDdE„ t |>¡D ƒ}>|6dF }?dGdE„ |?D ƒ}@dHdE„ |?D ƒ‰|2rYˆ rN|sN|sNt‡ ‡fdIdJ„tt ˆ ƒƒD ƒƒsNJ ‚t ˆƒt |@ƒksYJ ‚|>ˆfV  ||>f| jdK< nÒ|3j!tt|;ƒƒ|:d}Ad}Bd}>g ‰|A "¡ sÖ|Aj#j$j%j&j'dLkr‡nOt(|AdddM}C|Cdur”nB|Aj#j$j)}D|DrÌ|Aj#j$j)dK }6t |6¡}E|EdC }>|>t |Bƒd… }F|Fs¾t *dN¡ qw|>}B|FsÇJ dOƒ‚|FˆfV  t *dP¡ |A "¡ r||A )¡ }Gt |Gƒdkrt(|AdPd
dM |GdK }6t |6¡}E|EdC }>|EdF }?dQdE„ |?D ƒ‰|>t |Bƒd… ˆfV  ||>t |Bƒd… f| jdK< nt(|AdRd
dM |>t |Bƒd… ˆfV  ||>t |Bƒd… f| jdK< W W |3j| _ dS  t+y} }C z0tdSt|Cƒd ,t- .|Cj/¡¡f d
d |=|<d krd‚ tdT|= d
d t *d|= ¡ W Y d}C~Cnd}C~Cww W |3j| _qå|3j| _w dS )VaU  
        Query or Summarize or Extract using h2oGPT
        Args:
            instruction: Query for LLM chat.  Used for similarity search

            For query, prompt template is:
              "{pre_prompt_query}
                """
                {content}
                """
                {prompt_query}{instruction}"
             If added to summarization, prompt template is
              "{pre_prompt_summary}
                """
                {content}
                """
                Focusing on {instruction}, {prompt_summary}"
            text: textual content or list of such contents
            file: a local file to upload or files to upload
            url: a url to give or urls to use
            embed: whether to embed content uploaded

            langchain_mode: "LLM" to talk to LLM with no docs, "MyData" for personal docs, "UserData" for shared docs, etc.
            langchain_action: Action to take, "Query" or "Summarize" or "Extract"
            langchain_agents: Which agents to use, if any
            top_k_docs: number of document parts.
                        When doing query, number of chunks
                        When doing summarization, not related to vectorDB chunks that are not used
                        E.g. if PDF, then number of pages
            chunk: whether to chunk sources for document Q/A
            chunk_size: Size in characters of chunks
            document_choice: Which documents ("All" means all) -- need to use upload_api API call to get server's name if want to select
            document_subset: Type of query, see src/gen.py
            document_source_substrings: See gen.py
            document_source_substrings_op: See gen.py
            document_content_substrings: See gen.py
            document_content_substrings_op: See gen.py

            system_prompt: pass system prompt to models that support it.
              If 'auto' or None, then use automatic version
              If '', then use no system prompt (default)
            pre_prompt_query: Prompt that comes before document part
            prompt_query: Prompt that comes after document part
            pre_prompt_summary: Prompt that comes before document part
               None makes h2oGPT internally use its defaults
               E.g. "In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text"
            prompt_summary: Prompt that comes after document part
              None makes h2oGPT internally use its defaults
              E.g. "Using only the text above, write a condensed and concise summary of key results (preferably as bullet points):
"
            i.e. for some internal document part fstring, the template looks like:
                template = "%s
                """
                %s
                """
                %s" % (pre_prompt_summary, fstring, prompt_summary)
            hyde_llm_prompt: hyde prompt for first step when using LLM
            h2ogpt_key: Access Key to h2oGPT server (if not already set in client at init time)
            model: base_model name or integer index of model_lock on h2oGPT server
                            None results in use of first (0th index) model in server
                   to get list of models do client.list_models()
            pre_prompt_extraction: Same as pre_prompt_summary but for when doing extraction
            prompt_extraction: Same as prompt_summary but for when doing extraction
            do_sample: see src/gen.py
            temperature: see src/gen.py
            top_p: see src/gen.py
            top_k: see src/gen.py
            repetition_penalty: see src/gen.py
            penalty_alpha: see src/gen.py
            max_new_tokens: see src/gen.py
            min_max_new_tokens: see src/gen.py
            max_input_tokens: see src/gen.py
            max_total_input_tokens: see src/gen.py

            stream_output: Whether to stream output
            do_sample: whether to sample
            max_time: how long to take

            add_search_to_context: Whether to do web search and add results to context
            chat_conversation: List of tuples for (human, bot) conversation that will be pre-appended to an (instruction, None) case for a query
            text_context_list: List of strings to add to context for non-database version of document Q/A for faster handling via API etc.
               Forces LangChain code path and uses as many entries in list as possible given max_seq_len, with first assumed to be most relevant and to go near prompt.
            docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
            max_input_tokens: Max input tokens to place into model context for each LLM call
                                     -1 means auto, fully fill context for query, and fill by original document chunk for summarization
                                     >=0 means use that to limit context filling to that many tokens
            max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
            max_new_tokens: Maximum new tokens
            min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.

            docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
                                                                             or top_k_docs original document chunks summarization
                                        None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens
            docs_joiner: string to join lists of text when doing split_or_merge.  None means '

'
            hyde_level: 0-3 for HYDE.
                        0 uses just query to find similarity with docs
                        1 uses query + pure LLM response to find similarity with docs
                        2: uses query + LLM response using docs to find similarity with docs
                        3+: etc.
            hyde_template: see src/gen.py
            hyde_show_only_final: see src/gen.py
            doc_json_mode: see src/gen.py

            asserts: whether to do asserts to ensure handling is correct

        Returns: summary/answer: str or extraction List[str]

        NÚMyData)NNNNNNÚHARD_ASSERTSFrª   z	/add_textzupload text: %s)ÚsecondsTr±   r   rg   Ú
user_pasterd   rµ   r5   z/upload_apir©   z/add_file_apiz/add_urlé   rÌ   zException: %sz/submit_nochat_apirD   rÔ   r×   rÕ   r  r  r  r  r	  r
  r  r  r  r  r  r  rç   r  Úvisible_modelsr  r  r  r  r  r  r  r  r  r  rR   r!  r#  r$  r%  r&  r'  r(  r)  r*  r+  r,  rÜ   c                 S  s   g | ]}|  ¡ ‘qS r'   )Ústrip)rl   Úrr'   r'   r(   rq     ó    z>GradioClient.query_or_summarize_or_extract.<locals>.<listcomp>Úsourcesc                 S  ó   g | ]}|d  ‘qS )Úscorer'   ©rl   Úxr'   r'   r(   rq     r6  c                 S  r8  ©Úcontentr'   r:  r'   r'   r(   rq     r6  c                 3  s     | ]}ˆ d |… ˆkV  qd S )Nr'   )rl   Úcutoff©rü   rÝ   r'   r(   Ú	<genexpr>!  s   € 
ÿz=GradioClient.query_or_summarize_or_extract.<locals>.<genexpr>rú   ÚFINISHEDrÊ   gü©ñÒMbP?zmust yield non-empty stringrË   c                 S  r8  r<  r'   r:  r'   r'   r(   rq   M  r6  g      ð?zh2oGPT predict failed: %s %sztrying again: %sr'   )0rQ   r¨   rE   rÅ   rD   Úcheck_modelrH   r?   ÚosÚgetenvrP   Úlistr~   Úpredictr!   r9   r   ÚpathÚbasenamer*   r3   rÙ   rJ   rR   rS   Úappendr¹   ÚastÚliteral_evalr4   r4  ÚanyÚlenr­   ÚdoneÚcommunicatorr#   Úlatest_statusÚcodeÚnamer)   Úoutputsr   rÌ   rÏ   rÍ   rÐ   rÑ   )HrT   rD   rÔ   rü   rþ   rÿ   r   r  r  r×   rÕ   r  r  r  r  r	  r
  r  r  r  r  r  r  rç   r  rò   r  r  r  r  r  r  r  r  r  r  r  r  rR   r!  r#  r$  r%  r&  r'  r(  r)  r*  r+  r,  r-  r¾   ÚloadersÚdoc_optionsÚresÚt0Út1Ú_rª   rK   Útrialsr¿   rÜ   r7  Ú
scores_outr#   Útext0r&   Úoutputs_listÚres_dictÚ
text_chunkÚres_allr'   r?  r(   rÚ   á  s  € 
'

ÿþýüûÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿþÿýûúùø	÷
öõôóòðïîíìëéèçæåäãâá à"Þ#€Ý$Ü%Û&Ú'Ù(Ø)×*Ö+Õ,Ô-Ó.Ò2
þ


ÿ






ÿ
î
óÿý€ö
ªz*GradioClient.query_or_summarize_or_extractc              	   C  s®   |dkrO| j rQ|  ¡ }t|tƒr|t|ƒkst|tƒrS||vrUd}t|tƒr9t ||d¡}|r9dt|d ƒ› d}t	dt|ƒ› dt|ƒd › d|› d	|› ƒ‚d S d S d S d S )
Nr   r5   rg   z
Did you mean Ú?zInvalid llm: z*, must be either an integer between 0 and z! or one of the following values: Ú.)
rG   Úlist_modelsrP   r=   rM  r9   ÚdifflibÚget_close_matchesÚreprr"   )rT   r  Ú
valid_llmsÚdid_you_meanÚaltr'   r'   r(   rB  e  s2   ÿÿ

ÿÿÿÿózGradioClient.check_modelúlist[dict[str, Any]]c                 C  s$   | j du r	|  ¡  t | jdd¡S )z1
        Full model info in list if dict
        Nú/model_namesr©   ©rQ   r¨   rJ  rK  rF  r\   r'   r'   r(   Úget_models_fullx  s   
zGradioClient.get_models_fullc                 C  s.   | j du r	|  ¡  dd„ t | jdd¡D ƒS )z5
        Model names available from endpoint
        Nc                 S  r8  )Ú
base_modelr'   r:  r'   r'   r(   rq   †  r6  z,GradioClient.list_models.<locals>.<listcomp>rk  r©   rl  r\   r'   r'   r(   rc  €  s   
zGradioClient.list_models)r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   r%   r?   rB   rC   rD   r9   rE   r?   rF   r?   rG   r?   )rª   r;   rm   rÇ   rÆ   rÈ   rÉ   r   )rÉ   r9   )rÉ   râ   )rÉ   rñ   )frD   r9   rÔ   r9   rü   rý   rþ   rý   rÿ   rý   r   r?   r  r?   r  r=   r×   r9   rÕ   r;   r  r  r  r=   r  r  r  r9   r	  r  r
  r9   r  r  r  r9   r  r;   r  r;   r  r;   r  r;   rç   r;   r  r;   rò   r;   r  r;   r  r  r  r?   r  r?   r  r  r  r  r  r=   r  r  r  r  r  r=   r  r=   r  r?   rR   r   r!  r"  r#  r;   r$  r=   r%  r=   r&  r=   r'  r9   r(  r9   r)  r=   r*  r9   r+  r?   r,  r?   r-  r?   rÉ   râ   )rÉ   rj  )"r.   r/   r0   r1   r   rU   r]   r_   r¨   rŸ   r´   r³   rÅ   r­   rÞ   rß   rá   rã   rë   rï   rõ   rö   Úpre_prompt_query0Úprompt_query0Úpre_prompt_summary0ré   Úpre_prompt_extraction0rô   Úhyde_llm_prompt0rÚ   rB  rm  rc  Ú__classcell__r'   r'   r¯   r(   r6   T   s²    ôH\(û
(


	Ç   
r6   )r   TF)8Ú
__future__r   rd  rÍ   Úconcurrent.futuresr   rC  r~   Úurllib.parser‚   r—   r   Údatetimer   Úenumr   Úpathlibr   Útypingr   r   r	   r
   r   rJ  Ú	packagingr   ÚenvironÚhuggingface_hubr   Úhuggingface_hub.utilsr   r   r   Úimportlib.metadatar   r   Úhave_gradio_clientrx   rL   ÚAssertionErrorrš   r   r   r   r   r)   r*   ro  rp  rq  ré   rr  rô   rs  r6   r'   r'   r'   r(   Ú<module>   sR    
þ
