YdddlZddlZddlmZddlmZmZmZmZmZddl Z ddl Z ddl m Z ddl Tddl TddlmZdZdZd Z ddZdZdS)N)download_loader)Document LLMPredictor PromptHelperQuestionAnswerPrompt RefinePrompt)tqdm)*)local_embeddingcd|D}|dt}|D]m}t|d5}|dx}r,|||dx},dddn #1swxYwYn|S)Ncg|] }|j S)name).0xs ;/Users/treediagram/dev/ChuanhuChatGPT/modules/llama_func.py z"get_index_name..s+++Q!&+++c@tj|S)N)ospathbasename)rs rz get_index_name..s"'"2"21"5"5r)keyrbi )sorthashlibmd5openreadupdate hexdigest)file_src file_pathsmd5_hash file_pathfchunks rget_index_namer)s++(+++JOO55O666{{}}H'' )T " " 'a66$<<'% '&&&66$<<'% ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '     sAB!!B% (B% cg}t|dkrG|t|dd|dd}t|dkG|S)Nri)lenappendr)textblockss r block_splitr/"s] F d))a-- htETE{++,,,DEE{ d))a-- Mrc4g}tjdtjd||D]}|j}tj|}tj|d}tjd| |dkrtjd ddlm }dd l m }|d  d d }|||j } nt#d } t|d5} tj| } t#| jD]} | | z }  dddn #1swxYwYYnxYw| } nN|dkrOtjdt)d}|}||dj } n|dkrOtjdt)d}|}||dj } n|dkrLtjdt-|}|D]$}|t1|%/tjdt|dd5}|} dddn #1swxYwYn.#t4$r!}tjd|Yd}~nd}~wwxYwt9| }|t1|gz }tjd|S)NzLoading documents...z file_src: zloading file: z.pdfzLoading PDF...r) parse_pdf) advance_docspdf two_columnFrz.docxzLoading Word... DocxReader)filez.epubzLoading EPUB... EpubReaderz.xlsxzLoading Excel...zLoading text file...rzutf-8)encodingzError loading file: zDocuments loaded.)loggingdebugrrrrsplitextinfomodules.pdf_funcr2modules.configr3getr-rPyPDF2 PdfReaderr pages extract_textr load_dataexcel_to_stringr,rr Exceptionerror add_space)r# documentsr8filepathfilename file_typer2r3r5pdftext pdfFileObj pdfReaderpagetext_rawr7loaderr9 text_listelemr'er-s r get_documentsrY*sI M())) M)x))***/&/&97##H--G$$X..q1  0h00111& F"" ./// ;::::::;;;;;;!-e!4!8!8u!M!MJ'i*==BGG; Gh--;$*$4Z$@$@ $($9$9;;D#t'8'8':'::GG;;;;;;;;;;;;;;;;#g%% /000,\:: #!+++::1=Bg%% /000,\:: #!+++::1=Bg%% 0111+H55 %55D$$Xd^^4444 4555(C':::(a vvxxH(((((((((((((((    M;;; < < < DDDD "" htnn%%  M%&&& sJ219C+*J2+E?AE EE EE EDJ2&J2J& J2&J* *J2-J* .J22 K<KKX cddlm}ddlm} ddlm} m} m} m} |r|tj d<ndtj d<|dkrdn|}|dkrdn|}|dkrdn|}t||||d | }t|}tj d |d r-tjd | d |d S t%|}t&r| | d}n | }tjdt)5| |||}| ||}dddn #1swxYwYtjdtjdd|d |d tjd|S#t4$r/}tjd|t9|Yd}~dSd}~wwxYw)Nr) ChatOpenAI)HuggingFaceEmbeddings)GPTSimpleVectorIndexServiceContextLangchainEmbeddingOpenAIEmbeddingOPENAI_API_KEYz sk-xxxxxxxr6r^r])max_input_size num_outputmax_chunk_overlapembedding_limitchunk_size_limit separatorz./index/z.jsonu0找到了缓存的索引文件,加载中……z:sentence-transformers/distiluse-base-multilingual-cased-v2) model_nameu构建索引中……) prompt_helperrk embed_model)service_contextu索引构建完成!z./indexT)exist_oku索引已保存至本地!u索引构建失败!)langchain.chat_modelsr` langchain.embeddings.huggingfacera llama_indexrbrcrdrerenvironrr)rexistsr<r?load_from_diskrYr retrieve_proxy from_defaultsfrom_documentsr=makedirs save_to_diskrIrJprint)api_keyr#rg num_outputsrirkrjrlr`rarbrcrdrern index_namerLrorpindexrXs rconstruct_indexrbs100000FFFFFFeeeeeeeeeeee4'. #$$(4 #$/144tt:J-22ddO BII %+' M ))J w~~222233 GHHH#223Oj3O3O3OPPP %h//I 0001F1FUQ2R2R2RSS -o// L0 1 1 1!!  "0">">"/%5 +#?## -;;<                 M1 2 2 2 K D 1 1 1 1   ;*;;; < < < M5 6 6 6L    M11 5 5 5 !HHH44444 s> AG%0E! G!E%%G(E%)AG G>$G99G>cxddddddd}|D]\}}|||}|S)Nu, u。 u? u! u: u; )u,u。u?u!u:u;)itemsreplace)r- punctuationscn_puncen_puncs rrKrKsS!&W]flmmL(..00..||GW-- Kr)rZr[r\r]Nr^)rr<rtrrrrrrcoloramarCr modules.presets modules.utilsrAr r)r/rYrrKrrrrs/ '''''' ******    555v====@r