a Of@s\ UddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl Z ddl Z ddlZddlZddlmZmZmZddlmZmZddlZddlZddlZddlZddlZddlZddlmZddlZgZee d<ej!"dej!#ddZ$de$Z%z,ddl&Z&ddl'Z'ddl(Z(ddl)Z)ddl*Z*Wn6e+ytZ,ze+d e,d WYdZ,[,n dZ,[,00ddl-Z-dd l.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4dd l5m6Z6ddl7Z7dd l8Tddl9m:Z:ddl;mZ>m?Z?ddl@mAZAmBZBddl-ZCde-_Dddl&mEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLddlMmNZNddlOmPZPddlQmRZRddlSmTZTmUZUmVZVddlWmXZXddlYmZZZddl[Z[ddl*Z*ddlm\Z\eEdde%dZ]eNZ^eNZ_dgZ`e]jaeXe`ddgdgddd lmbZbeZd!d"d#Zcdaddaed"afdagdahdaidajdZkdald$emed%andZod"apdaqee-jre d&<daseee d'<iateue d(<d)ZvdZwdaxd"aydazee/e d*<da{ee0e d+<e:a|da}da~d"adaiagZee d,<e2t|d-adZdZdZed.d/d0Zed1d2d3Zeeud4d5d6Ze&ecfeFeed7d8d9Zeed:d;d<Zded=d>d?Zd@dAZde-jdBdCdDZddEdFZdGdHZdIdJZGdKdLdLZeadddddifeeeeueueeeeeeeeeeemeeudM dNdOZedPdQdRZdSdTZddVdWZdXdYZdZd[Zifeud\d]d^Zd_d`Ze^dadbdcZe^jddeIegdegdfe^jdgeIegdegdfdhdiZe^jdjeIegdkgdfe^jdleIegdkgdfe^jdmeIegdkgdfdeIeeJfeFeLeeeeJdndodpZdqdrZedskredtduZedveedwD]ZeeeqeeZeee_jdxeIegdyeFdzd{dpZe_jd|eIegd}gdfd~dZe^jdeIegd}gdfe^jdeIegd}gdfdeIeeJfeFeLeeeeJdnddZe^jdeIegeVdgde^jdeIegeVdgdeIeeJfeFeeJdddZe^jdeIegeVdgde^jdeIegeVdgdeIeeJfeFeeJdddZe^jddgeIegedeKdfeFeeedddZe^jddgeIegdeFedddZe^jddgeIegdeFedddZe^jddgeIegde&jdddfedddZe^jddgeIegededddZe^jddgeIegdeFdzddZe^jddgeIegde&jdddfedddZe^jddgeIegdeFdzddZe^jdddegeIegdedddZe^jdddegeIegde^jdddegeIegdeFdzddZe^jdddegeIegdedddÄZddńZe^jddgeIegddeIeeJfeFeeeeJdȜddʄZe^jdeIegdgdfeFdzdd̈́Ze^jddgeIegdedМdd҄Ze^jddgdԍedМddքZe^jddgdԍedМddքZe^jddgdԍedМddքZe^jddgdԍedМddքZe^jddgdԍeFdzddZe^jddgeIegde&jdddfeFeedddZe^jddgdԍddZe^jddgdԍddZe^deFdzddZe^dddZe^dddZddZejegdddZeje]edddZ]e]e^e]e_dS(N)datetime timedeltatimezone)OptionalList)Groqmessagesz../..a from openai import OpenAI import json base_url = "https://ka1kuk-litellm.hf.space" api_key = "hf_xxxx" client = OpenAI(base_url=base_url, api_key=api_key) messages = [{"role": "user", "content": "What's the capital of France?"}] response = client.chat.completions.create( model="huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1", response_format={ "type": "json_object" }, messages=messages, stream=False, ) print(response.choices[0].message.content) zRProxy Server to call 100+ LLMs in the OpenAI format Sample with openai library: zMissing dependency z$. Run `pip install 'litellm[proxy]'`) PrismaClientDBClientget_instance_fn ProxyLogging_cache_user_row send_email)load_google_kms)*) DualCache)perform_health_checkverbose_router_loggerverbose_proxy_logger)getenvset_env_variablesT)FastAPIRequest HTTPExceptionstatusDependsBackgroundTasksHeaderResponse) APIRouter)OAuth2PasswordBearer)jsonable_encoder)StreamingResponse FileResponseORJSONResponse)CORSMiddleware) APIKeyHeader)Union/z LiteLLM API)Zdocs_urltitle descriptionr)Z allow_originsZallow_credentialsZ allow_methodsZ allow_headers)Dict AuthorizationF)nameZ auto_errorZconfig_.yaml llm_routerllm_model_listgeneral_settingsz api_log.json prisma_clientcustom_db_clientqueue)user_api_key_cachefeaturecCs*tr&d|i}tjtjj|fdddS)Nr8T)targetargsdaemon)user_telemetry threadingThreadlitellmutilsZlitellm_telemetrystart)r8datarC/home/user/app/proxy_server.pyusage_telemetrys  rEapi_keycCs|dsJ|dd}|S)NBearer ) startswithreplacerFrCrCrD_get_bearer_tokens rL) pydantic_objreturncCs$z |WS|YS0dSN)Z model_dumpdict)rMrCrCrD_get_pydantic_json_dicts rQ)requestrGrNc szbt|trt|d}tdur>t||dIdH}t|WStdurdt|tr\t|dWStWS|jj}|dkrt dddkrtWSt t j dd|durtd t|t}|rttdWS|d r|std |d s|d s|dr |s t dddkr tdtdur._update_user_dbcstdurptjdIdH}td||dur8d}n|j}|}td|tjd|idIdHnrtdurtjddIdH}td||durd}n|j}|}td|tjd|idd IdHdS) NrZzexisting spend: rrrr[rBrYr]r)r3rsrrrrrr4r)rr[rCrD_update_key_dbs,  z'update_database.._update_key_dbz Error updating Prisma database: )rrrrrgatherror format_exc)r[rrdrrtasksrrC)rr[rdrDrs"   rc CszHddg}ttjd }tj|||d}Wdn1s<0YWn6ty~}ztd|dWYd}~n d}~00dS)Nollamaservew)stdoutstderrzd LiteLLM Warning: proxy started with `ollama` model `ollama serve` failed with Exceptionz). Ensure you run `ollama serve` )openrdevnull subprocessPopenrorrr)commandrprocessrrCrCrDrun_ollama_serves2rcsRttdIdH\}}|td<|td<t|td<t|td<ttIdHqdS)z} Periodically run health checks in the background on the endpoints. Update health_check_results, based on this. )rbNhealthy_endpointsunhealthy_endpoints healthy_countunhealthy_count)rr1health_check_resultsrzrsleephealth_check_interval)rrrCrCrD_run_background_health_checks  rc@sheZdZdZddddZeedddZdeee dd d Z e d d d Z ee j edddZdS) ProxyConfigz} Abstraction class on top of config loading/updating logic. Gives us one place to control all config updating logic. N)rNcCsdSrOrC)selfrCrCrD__init__,szProxyConfig.__init__)config_file_pathrNcCs8tj|sdStj|\}}|dkp6|dkS)NFr/z.yml)rrmisfilesplitextlower)rr_Zfile_extensionrCrCrDis_yaml/s zProxyConfig.is_yamlc s|pt}|dur|atj|rZt|d}t|}Wdqh1sN0Yngiiid}tdurt dddkrt ddg}gd}|D]}tj d|dd }| |qt j|IdH} |S) Nr)rbr2router_settingslitellm_settingsSAVE_CONFIG_TO_DBFTr param_nameryr)user_config_file_pathrrmexistsryaml safe_loadr3r? get_secretrget_generic_datarrr) rrZ file_path config_filery_taskskeyskrZ responsesrCrCrD get_config6s2*   zProxyConfig.get_config new_configc s~|IdH}ttd }tj||ddWdn1s@0YztjttdIdH\aaa Wntt y}z\t ttd }tj||ddWdn1s0Yt dddWYd}~n d}~00tdurztjddd d krz|d g}|D]X}|d id ddur |d d }dt}|tj|<d||d d <q tj|ddIdHdS)NrF)Zdefault_flow_stylerouterrzInvalid config passed inrTr) default_valueTrbrrGZLITELLM_MODEL_KEY_ os.environ/ry)rBr^)rrrrdump proxy_config load_configr0r1r2rorrrr3r?rrnr~uuiduuid4renviron insert_data) rr backup_configrrr{mrGZkey_namerCrCrD save_config`s:. ."  zProxyConfig.save_configrc%sd|j|dIdH}t|}|ddtdtj|dd|dd}|rn| D]\}}|t j |<qZ|dd}|duri}|r`d} d } | D]\}}|d krt | d d d l m} i} d|vr|d} | | | dd}td||dkrtdd}tdd}tdd}| ||||dt | d| d|t | d| d|t | d| d|t | d| d|t | D]2\}}t|tur|drt|| |<q| fi| t_t | dttjj| q|dkrPt||dgt_t| dtjd| q|d kr~t||dgt_td!tjq|d"krgt_|D]0}d#|vrtjt|d$n tj|qt| d%tjd| q|d&krFgt_|D]0}d#|vrtjt|d$n tj|qt| d%tjd| q|dkrRqtt||q|d'i}|durzi}|r|d(d}|dur|tjj krt!d)d*n"|tj"j krt#d)d+nt$d,|d-d.}t#|d+|d/d.}t!|d*t%j&|d0d|d1d2d3|d4d}|r^|dr^td5t|}td6||d7td8da't'rt'drtt'a'|d9d}|durt||da(|d:d}|dur|d;ks|da*t+|d?d.a,|d@dAa-dBtjdkdC}|dDd}|r||dD<t dE|D]}|dF D]4\}}t.|trV|drVt||dF|<qVt dG|dHdId |dFdJ} |dFdKd}!dL| vrF|!durFt/qF|dMd}"|"rJt.|"t0rJt12tj3}#dNdDhfdOdP|#j4D}$|" D]\}}||$vr,|||<q,tj3fi|}|||fS)Qz< Load config values into proxy global state rNenvironment_variableszFLoaded config YAML (api_key and environment_variables are not shown): )indentrzzrez Setting Cache on Proxyr)Cache cache_paramstypeZrediszpassed cache type=Z REDIS_HOSTZ REDIS_PORTZREDIS_PASSWORD)rhostportpasswordz Cache Type: z Cache Host:z Cache Port:zCache Password:rzSet Cache on LiteLLM Proxy: callbacks)r`rz Initialized Callbacks - post_call_ruleszlitellm.post_call_rules: r.)r`z! Initialized Success Callbacks - failure_callbackr2key_management_systemTr)use_google_kmsz&Invalid Key Management System selectedrFralertingalerting_thresholdX)rrrzGOING INTO LITELLM.GET_SECRET!zRETRIEVED DB URL: rkLITELLM_MASTER_KEY custom_auth database_typeZ dynamo_dbZdynamodb database_args)Zcustom_db_argsZcustom_db_typeZbackground_health_checksri,)Z num_retriesZcache_responsesrbz<LiteLLM: Proxy initialized with Config, Set models:rz  model_namerIrcapi_baserrrcsg|]}|vr|qSrCrC).0xZ exclude_argsrCrD {z+ProxyConfig.load_config..)5rcopydeepcopyr~rrrr|dumpsrnitemsrrprintlitellm.cachingrupdater?rrrhrJrevarsr rrrrrsetattrrrr`rZ GOOGLE_KMSr ValueErrorrZ update_valuesrkrir r4ruse_background_health_checksrrgrrPinspectgetfullargspecRouterr:)%rrrryZ printed_yamlrrYr`rZblue_color_codeZreset_color_coderrZcache_params_in_configZ cache_typeZ cache_hostZ cache_portZcache_passwordcallbackr2rrrrr r r Z router_paramsrbrcrvZlitellm_model_nameZlitellm_model_api_baserZarg_specZavailable_argsrCrrDrs^                                                       zProxyConfig.load_config)N)__name__ __module__ __qualname____doc__rrhboolrrrPrrr?r!rrCrCrCrDr's*1 r) durationr{rxryr max_budgetr[rd user_emailmax_parallel_requestsrc  sttdurtdurtd|dur0dtd}tddd} |durLd} n| |d} tt| d} t |}t |}t | }|ptt }z||||d}|| |||||| |d }tdurt |}||td tj|d IdHn2tdur(tj|d d IdHtj|dd IdHWn:tyd}z tttjdWYd}~n d}~00|| ||dS)Nz]Connect Proxy to database to generate keys - https://docs.litellm.ai/docs/proxy/virtual_keys zsk-)r)cSsttd|}|std|\}}t|}|dkr8|S|dkrH|dS|dkrX|dS|dkrh|d Std dS) Nz(\d+)([smhd]?)zInvalid duration formatsrr_hidiQzUnsupported duration unit)rematchrgroupsint)r)r2r`ZunitrCrCrD_duration_in_secondss  z4generate_key_helper_fn.._duration_in_seconds)seconds)r*r+rdr) r[rvr{rxryrrdr,rz PrismaClient: Before Insert DatarBr)r`r^rYrU)r[rvrdr*)r3r4rorp token_urlsaferhrrtrr|rrrrPrrrrrrrrrHTTP_500_INTERNAL_SERVER_ERROR)r)r{rxryrr*r[rdr+r,rr5rvZ duration_sZ aliases_jsonZ config_jsonZ metadata_jsonZ user_dataZkey_dataZverification_token_datarrCrCrDgenerate_key_helper_fns^        "r;tokensc s^z trtj|dIdH}ntWn8tyX}z tttjdWYd}~n d}~00|S)Nr<r8)r3Z delete_datarorrrrr:)r=Zdeleted_tokensrrCrCrDdelete_verification_tokens"r>cKsddl}||tjd<dS)Nr WORKER_CONFIG)r|rrr)rBr|rCrCrDsave_worker_configsr@rcsb|a|a|dkrDddlm}m}ddl}|j|jd|j|jd|dkrddlm}m}ddl}|j|jd|j|jddt _ n|dkr>|dkr>t j dd}|dkr>|dkrddlm}m}ddl}|j|jd|j|jdnH|d kr>ddlm}m}ddl}|j|jd|j|jddt _ d itii}|rjtjt|d IdH\aaa| r| a| |td <|r|a||td <|r|t j d<|r|}||td<|r|a||td<|r|a||td<|r||td<| dkrdt _d|d d<| dkr2dt _d|d d<| rJ| t _| |d d<trP| atdddS)NTrr)levelFZ LITELLM_LOGrIINFODEBUGZgeneralrheadersrZAZURE_API_VERSION max_tokens temperaturerequest_timeoutalias drop_paramsadd_function_to_promptr*Zlocal_proxy_serverr7) user_model user_debuglitellm._loggingrrloggingsetLevelrBrCr?Z set_verboserrrnupperrrr0r1r2 user_headers user_api_baseuser_temperatureuser_request_timeoutrIrJr* experimentalr<rE)rcrHr api_versionrrdetailed_debugrFrErGr* telemetryrIrJrDsave use_queueryrrrNZlitellm_log_settingZdynamic_configuser_max_tokensrCrCrD initializes              r\ccsftd|D]R}td|zdt|dVWqdt|dVYq0qdS)Ninside generatorreturned chunk: data:  )rrrr|rrP)rchunkrCrCrDdata_generatores rbc Cstdzt}|2zp3dHW}td|zdt|dVWqty}zdt|dVWYd}~qd}~00q6t}t t j ||ddd}d|dVWn6ty}zdt|dVWYd}~n d}~00dS)Nr]r^r_r` slow_responserrrz[DONE]) rrrtimer|rrProrhrrrresponse_taking_too_long)ruser_api_key_dictrrarrZ done_messagerCrCrDasync_data_generatoros& ,rhrccCsZ|di}|didd}z$d|vr6|dd}t|}|WSiYS0dS)N model_inforrcZazureZ base_model)rnr?Zget_model_info)rcrjZmodel_to_lookuplitellm_model_inforCrCrDget_litellm_model_infos   rlcCsDi}|d}|D],}d|vr6|d\}}|||<qd||<q|S)Nz, =T)split)Z cache_control cache_dictZ directivesZ directiverYr`rCrCrDparse_cache_controls   rpstartupcsjddl}tddatdur,ttddtd}t d|tj |rt j |drzt jt|dIdH\aaaqtfi|IdHn$|td}tfi|IdHttrttt d ttdurtIdHtdur tIdHtdur8tdur8tdgiidtd IdHtdurftdurftdgiidtd IdHdS) NrrZ DATABASE_URLrr?zworker_config: rrzprisma client - )r)r{rxryrr[)r|r?rrkr3rrrrrrrmrrrrr0r1r2r\loadsrZ_init_litellm_callbacksrrrrconnectr4r;)r| worker_configrCrCrD startup_eventsJ        ruz /v1/modelszmodel management) dependenciestagsz/modelsc Csg}tddrtj}tr8tt|ddtD}tdurJ|tg7}t d|z2t d}| d}dd|D}| |Wn$ty}z WYd}~n d}~00td d|Dd d S) NZinfer_model_from_keysFcSsg|] }|dqSr rCrrrCrCrDrrzmodel_list.. all_models: zhttp://0.0.0.0:11434/api/tagsr{cSs g|]}d|dddqS)zollama/r.z:latestrI)rKryrCrCrDrrcSsg|]}|ddddqS)rcijNcZopenai)idobjectcreatedZowned_byrC)rrcrCrCrDrs r)rBr|)r2rnr?r@Zget_valid_modelsr1rsetrKrrrrequestsr|extendrorP) all_modelsrr{Z ollama_modelsrrCrCrDrbs*      rbz/v1/completions completionsz /completionsz!/engines/{model:path}/completions)rRfastapi_responsercrgbackground_tasksc sRz|IdH}|}zt|}Wnt|}Yn0|d|j|d<tddplt pl|pl|d|d<t r~t |d<d|vr|j |dd<|j|dd<t |j |dd<n$|j |jd|d<t |j |dd<t rt |d <trt|d <trt|d <tr t|d <tj||d dIdH}t}tdurBddtDng} d|vrhtjfi|IdH} ntdur|d| vrtjfi|IdH} ntdurtjdur|dtjvrtjfi|IdH} nPtdur|dtjvrtjfi|ddiIdH} ntjfi|IdH} t| drH| jddpDd} nd} td| d|vr|ddkrd| i} tt|| dd| dWSt} t !tj"|| dd| |j d<| WSt#yL}zrtdtd |d!t$%t$&}t'|d"|}z |j(}Wnd#}Yn0t)||d$WYd}~n d}~00dS)%Nrcompletion_modelrcrrrrD)rrrFrGrEr completionrgrBZ call_typecSsg|] }|dqSrxrCryrCrCrDrGrzcompletion..rGspecific_deploymentT_hidden_paramsmodel_idrIfinal response: rx-litellm-model-idrgrtext/event-stream media_typerDrcrdz!EXCEPTION RAISED IN PROXY MAIN.PYAn error occurred: P Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`r`rT)*bodydecodeast literal_evalr|rrrnrdr2rKrGrPrDrSrTr[rRr pre_call_hookrer1r?Zatext_completionr0model_group_aliasdeployment_nameshasattrrrrrr#rhrrrfrorrrrhrUr)rRrrcrgrrbody_strrBrrouter_model_namesrrcustom_headersrrerror_traceback error_msgrrCrCrDrs               rcCs8tjtddd|dddddgd d }|S) N/chat/completionszapplication/jsonrH)z Content-Typer- gpt-3.5-turboz what is YC?r)contentrole)rcr)rDr|)rpostZFASTREPL_PROXY_URL_BASEr|)rZresprCrCrDcompletion_requestsr__main__drGznew keyz/v1/chat/completions)rvrRcs|jddd}|IdH}td|||d<t|d<t|d<t|d|vrt|dt kr|d d krd |d<t j fi|}d|vr|dd krt t|d d S|S) Nr-rHrIzreceived request data: Zuser_keybudget_managerrkrtrueTrr)rDrnrKr|rrrkrrrhrllmrr#rb)rRrYrBrrCrCrDrsz/v2/chat/completionszchat/completionscCsdS)NtestrCrCrCrCrDrsrrz1/openai/deployments/{model:path}/chat/completionsc s4zi}|IdH}|}zt|}Wnt|}Yn0t|j|jt |j t |d|d<|j }t d||dd} | rt| } | d|d<t d|tddptp|p|d |d <|d ddur|jdur|j|d <d |vrHt d |d |j|d d <|j|d d<t |j |d d<n.d |ji|d <t |j |d d<|j|d d<trt|d<trt|d<trt|d<trt|d<tj||ddIdH}t} tdurddtDng} d|vr tjfi|IdH} nd|vrF|d}tjfi|}|jfi|IdH} nt durv|d | vrvt jfi|IdH} nt durt j!dur|d t j!vrt jfi|IdH} nPt dur|d t j"vrt jfi|ddiIdH} ntjfi|IdH} t#| dr&| j$ddp"d}nd}d|vrb|ddkrbd |i}t%t&|| d!d"|d#WSt}t'(tj)| |d$d%||j d <| WSt*y.}zt+,tj-||d&IdHt d'|d(tdurd)dtDng} t dur|d d| vrt d*t d+t d,t j./D]\}}t |d-|q0t d.t j0/D]\}}t |d-|qdt d/t j1/D]\}}t |d-|qt2rt+,t3|t4r|nBt+5}t|d0|}z |j6}Wnd1}Yn0t4||d2WYd}~n d}~00dS)3NrlmethodrDrproxy_server_requestzRequest Headers: z Cache-Controlzs-maxagerareceiving data: rrcrrreceived metadata: rrrDrFrGrErrrcSsg|] }|dqSrxrCryrCrCrDr$rz#chat_completion..rG user_configrTrrrIrrrrrrcrdrgZoriginal_exceptionrrcSsg|] }|dqSrxrCryrCrCrDrfrzResults from routerz Router statsz Total Calls madez: z Success Calls madez Fail Calls mader`rrT)7rrrrr|rrrhrlrrPrDrrrrrnrpr2rKrdrGrSrTr[rRrrrer1r? acompletionr~r!r0rrrrr#rhrrrfrorrpost_call_failure_hookZ total_callsrZ success_callsZ fail_callsrLrgrrrU)rRrrcrgrrBrrrDZcache_control_headerrorrr router_config user_routerrrrrrYr`rrrrCrCrDchat_completions                           rz/v1/embeddings embeddings)rvZresponse_classrwz /embeddings)rRrgrc szF|IdH}t|}t|j|jt|jt|d|d<| dddurf|j durf|j |d<t ddp|t p||d|d<t rt |d<d|vr|j |dd<t|j|dd<|j |dd <n.d|j i|d<t|j|dd<|j |dd <tdur d d tDng}d |vrt|d trt|d d trt|d d d trtdur|d|vrtD]x}|d|dkr|ddtjvs|dddrn6g}|d D]}|tjd|dq||d <qqrtj||ddIdH}t} d|vr,tjfi|IdH} nd|vrh|d} tjfi| } | jfi|IdH} ntdur|d|vrtjfi|IdH} ntdurtjdur|dtjvrtjfi|IdH} nPtdur|dtjvrtjfi|ddiIdH} ntjfi|IdH} t} t !tj"| | dd| WSt#y}z|tj$||dIdHt%&t|t'r|nBt%(}t|d|}z |j)}Wnd}Yn0t'||dWYd}~n d}~00dS)NrrrZembedding_modelrcrrrDrcSsg|] }|dqSrxrCryrCrCrDrrzembeddings..inputrr rzazure/r)rcr=rrrGrrTrcrdrr`rrT)*rorjsonrrrhrlrrPrDrrnrdr2rKrGr1rgrr4r?Zopen_ai_embedding_modelsrJrrrrreZ aembeddingr~r!r0rrrrrfrorrrrrrU)rRrgrrrBrrZ input_listirrrrrrrrrrCrCrDrs                    z/v1/images/generationszimage generationz/images/generationsc sz.|IdH}t|}t|j|jt|jt|d|d<| dddurf|j durf|j |d<t ddp|t p||d|d<t rt |d<d|vr|j |dd<t|j|dd<|j |dd <n.d|j i|d<t|j|dd<|j |dd <tdur d d tDng}tj||d d IdH}t}d|vrPtjfi|IdH}ntdur|d|vrtjfi|IdH}ntdur|dtjvrtjfi|ddiIdH}nTtdurtjdur|dtjvrtjfi|IdH}ntjfi|IdH}t}ttj||dd|WSty} z|tj|| dIdHtt| trp| nBt } t| d| } z | j!} Wnd} Yn0t| | dWYd} ~ n d} ~ 00dS)NrrrZimage_generation_modelrcrrrDrcSsg|] }|dqSrxrCryrCrCrDrDrz$image_generation..rrrGrTrcrdrr`rrT)"rrrrrhrlrrPrDrrnrdr2rKrGr1rrrer?Zaimage_generationr0rrrrrfrorrrrgrrrU) rRrgrrrBrrrrrrrrrCrCrDimage_generations             rz /key/generatezkey management)rwrvZresponse_model)rRrBr-cs@td|}tfi|IdH}t|d|d|ddS)a Generate an API key based on the provided data. Docs: https://docs.litellm.ai/docs/proxy/virtual_keys Parameters: - duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). **(Default is set to 1 hour.)** - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models - config: Optional[dict] - any key-specific configs, overrides config in config.yaml - spend: Optional[int] - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x. - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } Returns: - key: (str) The generated api key - expires: (datetime) Datetime object for when key expires. - user_id: (str) Unique user id - used for tracking spend across multiple keys for same user id. zentered /key/generateNr[rvrd)rYrvrd)rrrr|r;GenerateKeyResponse)rRrBr- data_jsonrrCrCrDgenerate_key_fns  rz /key/update)rwrv)rRrBc szb|}|d}tdur$tddd|D}tj|i|d|idIdH}d|i|WSty}z"ttjdt |id WYd}~n d}~00dS) z Update an existing key rYNzNot connected to DB!cSsi|]\}}|dur||qSrOrCrrr#rCrCrD rz!update_key_fn..r[rerrorrT) r|r~r3rorrrrHTTP_400_BAD_REQUESTrh)rRrBrrYZnon_default_valuesrrrCrCrD update_key_fns   rz /key/deletec spz0|j}t|dIdH}t||ks(Jd|iWStyj}z"ttjdt|idWYd}~n d}~00dS)Nr< deleted_keysrrT)rr>rzrorrrrh)rRrBrrrrCrCrD delete_key_fns  rz /key/info.zKey in the request parameters)r+rXc snz.tdurtdtj|dIdH}||dWStyh}z"ttjdt|idWYd}~n d}~00dS)NDatabase not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keysrZ)rYinforrTr3rorsrrrrh)rYZkey_inforrCrCrD info_key_fns  rz /user/newzuser managementr7cs<|}tfi|IdH}t|d|d|d|ddS)a Use this to create a new user with a budget. Returns user id, budget + new key. Parameters: - user_id: Optional[str] - Specify a user id. If not set, a unique id will be generated. - max_budget: Optional[float] - Specify max budget for a given user. - duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). **(Default is set to 1 hour.)** - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models - config: Optional[dict] - any key-specific configs, overrides config in config.yaml - spend: Optional[int] - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x. - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } Returns: - key: (str) The generated api key - expires: (datetime) Datetime object for when key expires. - user_id: (str) Unique user id - used for tracking spend across multiple keys for same user id. - max_budget: (float|None) Max budget for given user. Nr[rvrdr*)rYrvrdr*)r|r;NewUserResponse)rBrrrCrCrDnew_usersrrSc s|IdH}|d}|d}|dur2tdddtdurBtdtjd|dd IdH}|dur|j}tfid giid |d IdH}n"tfid giid |d IdH}tdd}dtd|dd|d|dd|dd|d d}t fi|IdHdS)a Allows UI ("https://dashboard.litellm.ai/", or self-hosted - os.getenv("LITELLM_HOSTED_UI")) to request a magic link to be sent to user email, for auth to proxy. Only allows emails from accepted email subdomains. Rate limit: 1 request every 60s. Only works, if you enable 'allow_user_auth' in general settings: e.g.: ```yaml general_settings: allow_user_auth: true ``` Requirements: SMTP server details saved in .env: - os.environ["SMTP_HOST"] - os.environ["SMTP_PORT"] - os.environ["SMTP_USERNAME"] - os.environ["SMTP_PASSWORD"] - os.environ["SMTP_SENDER_EMAIL"] Nr+ZpagerzUser email is nonerTrWZusersrZ24hrr)r)r{rxryrrd)r)r{rxryrr+ZLITELLM_HOSTED_UIzhttps://dashboard.litellm.ai/z LiteLLM ProxyZSMTP_SENDER_EMAILzYour Magic Linkz' Follow this link, to login: z user/?token=r[z &user_id=rdz&page=z )Z sender_nameZ sender_emailZreceiver_emailsubjecthtmlz Email sent!) r|rr3rorrdr;rrr)rRrBr+Z page_paramsrrdZbase_urlparamsrCrCrD user_auths6    $rz /user/infoz!User ID in the request parametersrc szFtdurtdtj|dIdH}tj|dddIdH}|||dWSty}z"ttjdt|id WYd}~n d}~00dS) zJ Use this to get user information. (user row + all user key info) NrrrYZfind_all)rdr^Z query_type)rd user_inforrrTr)rdrrrrCrCrDrXs   rz /user/updatecsdS)z0 [TODO]: Use this to update user budget NrCrrCrCrD user_updateusrz /model/newz=Allows adding new models to the model list in the config.yaml)r+rwrv) model_paramsc sztIdH}tdttd||j}dd|D}|d|j |j |dtd|dtj |dIdHd d iWSt y}z:t t|tr|ntd d t|d WYd}~n d}~00dS)NzUser config path: Loaded config: cSsi|]\}}|dur||qSrOrCrrCrCrDrrz!add_new_model..rb)r rrjzupdated model list: rmessagezModel added successfullyrInternal Server Error: rT)rrrrrrrjr|rrr rrrorrrgrrh)rryrjrrCrCrD add_new_models,   rz /model/infozpProvides more info about each model in /models, including config.yaml descriptions (except api key and api base)z/v1/model/infocstIdH}|d}|D]T}|di}t|d}|D]\}}||vr<|||<q<||d<|dddqtd|d|iS)NrbrjrirrGrzrB)rrrnrlrr~rrr)rRryrrcrjrkrr#rCrCrD model_info_v1s   rz /model/deletez;Allows deleting models in the model list in the config.yaml)rjc sztjtstdddtIdH}t|dgdkrHtdddd}|dD]&}|did d|j krT|}q|qT|durtdd d|d |tj |d IdH}d d iWSty}zWYd}~nDd}~0t y}z"tddt |dWYd}~n d}~00dS)NizConfig file does not exist.rTrbrrz&No model list available in the config.rjr{z$Model with given model_id not found.rrzModel deleted successfullyrr)rrmrrrrrrzrnr{removerrorh)rjryZmodel_to_deletercrrCrCrD delete_models0    rc sztj||ddIdH}tdtdur:ddtDng}tdurj|d|vrjtjfi|IdH}ntdur|dtjvrtjfi|dd iIdH}nNtdurtjdur|dtjvrtjfi|IdH}nt jfi|IdH}td ||WSt y}zntd |j d |j |j d krrd|j vrrtdt j dddd}t|IdHn|WYd}~qd}~00qdS)z2 worker to make litellm completions calls rrNz(_litellm_chat_completions_worker startedcSsg|] }|dqSrxrCryrCrCrDr rz4_litellm_chat_completions_worker..rcrTrz7EXCEPTION RAISED IN _litellm_chat_completions_worker - z; iz"Max parallel request limit reachedz#Max parallel request limit reached!r )Zremaining_retriesZ max_retriesZ min_timeout)rrrrrr1r0rrrr?rrUrVZ_calculate_retry_afterrr)rBrgrrrtimeoutrCrCrD _litellm_chat_completions_worker s^      rz/queue/chat/completionsrU)rRrcrgrc szi}|IdH}t|j|jt|jt|d|d<td|t ddpft pf|pf|d|d<| dddur|j dur|j |d<d|vrtd|d|j |dd <t|j|dd <|j |dd <n.d |j i|d<t|j|dd <|j |dd <trt|d <tr&t|d <tr4t|d<trBt|d<tjt||dtjdIdH}d|vr|ddkrtt||dddWS|WSty}z6tj||dIdHttjdt|idWYd}~n d}~00dS)NrrrrrcrrrrrDrrFrGrEr)rBrg)rrTrrrrrrT)r|rhrlrrPrDrrrrr2rnrKrdrGrSrTr[rRrwait_forrr?rGr#rhrorrrrr)rRrcrgrrBrrrCrCrDasync_queue_request= sv       rz /ollama_logscstjd}t|S)Nz~/.ollama/logs/server.log)rrm expanduserr$)rRfilepathrCrCrDretrieve_server_log s rz/config/updatez config.yaml config_infoc s~z tIdH}t|}td||jdurd|di|jjdd}i||d|d<|j dur|di|j }i||d|d<|j dur|di|j }i||d|d<tj |dIdHd | di d gvrt jd d d IdHddiWSty6}z|WYd}~nLd}~0tyx}z*ttddt|dWYd}~n d}~00dS)z For Admin UI - allows admin to update config via UI Currently supports modifying General Settings + LiteLLM settings Nrr2T)Z exclude_nonerrrZslackrzThis is a testZLow)rrArzConfig updated successfullyrzAn error occurred - rT)rrrrrrrr2 setdefaultrPrrrrnrZalerting_handlerrrorrrh)rryrZupdated_general_settingsZupdated_environment_variablesZupdated_litellm_settingsrrCrCrD update_config sR          rz /config/yaml)rwcsddiS)a This is a mock endpoint, to show what you can set in config.yaml details in the Swagger UI. Parameters: The config.yaml object has the following attributes: - **model_list**: *Optional[List[ModelParams]]* - A list of supported models on the server, along with model-specific configurations. ModelParams includes "model_name" (name of the model), "litellm_params" (litellm-specific parameters for the model), and "model_info" (additional info about the model such as id, mode, cost per token, etc). - **litellm_settings**: *Optional[dict]*: Settings for the litellm module. You can specify multiple properties like "drop_params", "set_verbose", "api_base", "cache". - **general_settings**: *Optional[ConfigGeneralSettings]*: General settings for the server like "completion_model" (default model for chat completion calls), "use_azure_key_vault" (option to load keys from azure key vault), "master_key" (key required for all calls to proxy), and others. Please, refer to each class's description for a better understanding of the specific attributes within them. Note: This is a mock endpoint primarily meant for demonstration purposes, and does not actually provide or change any configurations. helloZworldrCrrCrCrDconfig_yaml_endpoint srz /groq/chatgroqcsBttjdd}|jjjdddgdd}t|jdj j dS NZ GROQ_API_KEYrFrz.Explain the importance of fast language models)rrzllama3-8b-8192)rrcr rrrrnZchatrcreaterchoicesrrrrrrCrCrDr s  z/openinterpreter/chatZopeninterpretercsBttjdd}|jjjdddgdd}t|jdj j dSrrrrCrCrDr s  csBttjdd}|jjjdddgdd}t|jdj j dSrrrrCrCrDr s  z/testhealthcs d|jjiS)z A test endpoint that pings the proxy server to check if it's healthy. Parameters: request (Request): The incoming request. Returns: dict: A dictionary containing the route of the request URL. r)rlrmrrCrCrD test_endpoint. s rz/healthz!Specify the model name (optional))rRrccstdurNtdur Unprotected endpoint for checking if worker is alive z I'm alive!rCrCrCrCrDhealth_liveliness srcsdS)NzLiteLLM: RUNNINGrCrrCrCrDhome srz/routescsDg}tjD]0}|j|j|j|jr(|jjndd}||q d|iS)zD Get a list of available routes in the FastAPI application. N)rmmethodsr.endpointroutes)apprrmrr.rr$r)rrZ route_inforCrCrD get_routes s  rshutdowncs&trtdtIdHtdS)NzDisconnecting from Prisma)r3rrrZ disconnectcleanup_router_config_variablesrCrCrCrDshutdown_event s rcCs dadadadadadadadSrO)rkr otel_loggingriZuser_custom_auth_pathrrrCrCrCrDr sr)rZholaZmerhabazEcho Bot)fnZexamplesr*zhttp://localhost:7860/)Zgradio_api_url)F)NN)N)NNNNFFNNrNFTTNFFN)ZgradioZgrsysrplatformrerr1rrr=rshutilrandomrrrrrtypingrrrprhashlibrwarnings importlibrrrr__annotations__rminsertabspathsampler+ZfastapiZbackoffrrrN ImportErrorrr?Zlitellm.proxy.utilsr r r r r rZ(litellm.proxy.secret_managers.google_kmsrZpydanticZlitellm.proxy._typesrrZlitellm.proxy.health_checkrrMrrr@rrrZsuppress_debug_inforrrrrrrrZfastapi.routingr Zfastapi.securityr!Zfastapi.encodersr"Zfastapi.responsesr#r$r%Zfastapi.middleware.corsr&Zfastapi.security.api_keyr'r|r(rrZroutersoriginsZadd_middlewarer,Zapi_key_headerrRrKrLr[rTrSr<rrQr4rZ local_loggingrUr0r!r1r2rPZlog_filertrkrr3r4r6rirrZrrr5rZ async_resultZcelery_app_connZ celery_fnrhrErLZ BaseModelrQZSecurityrjrrr(rrZ ModelResponserrrrrrfloatr;r>r@r\rbrhrlrpZon_eventrurnrbrrrr$rrYrrangerZ cost_currentrrrrrrZGenerateKeyRequestrZUpdateKeyRequestrZDeleteKeyRequestrZQueryrrZNewUserRequestrrrZ ModelParamsrrZModelInfoDeleterrrrZ ConfigYAMLrrrrrrrrrrZ ChatInterfaceechoZdemoZmount_gradio_appZinclude_routerrCrCrCrDs& @      &    (               *  1 Q`  \ m  : |     .i   !?  %+8X;3