a ^f@sddlZddlZddlZddlmZddlmZddlmZddl m Z ddl m Z ddl mZmZedGdd d e ZdS) N)autocast)registry)disabled_train) MiniGPTBase) BertConfigBertLMHeadModelminigpt4csLeZdZdZdddZdfdd ZeddZddZeddZ Z S)MiniGPT4z MiniGPT-4 model z$configs/models/minigpt4_vicuna0.yamlz#configs/models/minigpt4_llama2.yaml)pretrain_vicuna0pretrain_llama2 eva_clip_gmhttps://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_flant5xxl.pthrFfp16T  c s,tj||||||| ||||d ||_|jrptd|| |jj| \|_|_|j |d|jj j }tdn|jjd}tdt ||jj j |_| r"t| d}|}Wdn1s0Ydd |D}fd d |D|_td t|jtd t|jng|_dS) N) vit_modelimg_sizedrop_path_rateuse_grad_checkpoint vit_precision freeze_vit llama_model max_txt_lenend_sym low_resource device_8bitzLoading Q-Former)url_or_filenamezLoading Q-Former DonezDo not use Q-Former here.rcSsg|]}d|vr|qS)z ).0Z raw_promptr!r!8/usr/LogicCheckGPT/MiniGPT-4/minigpt4/models/minigpt4.py Pz%MiniGPT4.__init__..csg|]}|qSr!)format)r"pprompt_templater!r#r$Qr%zLoad {} training promptszPrompt Example {})super__init__ has_qformerprint init_Qformervisual_encoder num_featuresQformer query_tokensload_from_pretrainedconfig hidden_sizennLinearr llama_projopenread splitlines prompt_listr&lenrandomchoice)selfrq_former_modelrrrrrr,freeze_qformernum_query_tokenr prompt_pathr)rrrrZ img_f_dimfZ raw_promptsZfilted_prompts __class__r(r#r+sF       *zMiniGPT4.__init__c Cstd}||_d|_d|_||_t|d}tt d||j }|j j d|jdd|_d|jj_d|jj_|jjjD]}d|_d|_qx|r|D]\}} d| _q|}t|_d|_td ||fS) Nzbert-base-uncasedT)r4g)meanstdFzfreeze Qformer)rfrom_pretrainedZ encoder_widthadd_cross_attentionZcross_attention_freqZ query_lengthrr6 Parametertorchzerosr5datanormal_initializer_rangeclsbert embeddingsword_embeddingsZposition_embeddingsencoderlayeroutput intermediatenamed_parameters requires_gradevalrtrainlogginginfo) rTrCZ vision_widthfreezeencoder_configr1r2rYnameparamr!r!r#r.Ws0     zMiniGPT4.init_Qformerc CsJ|j}t|jdkr0|jdg|jddR}|||||}|jrt j | ddt j d|}|j |jddd}|jj|||dd}||j}nN|ddddddf}|j\}} } ||t| dt| d}||}t j | ddt j d|j} Wdn1s80Y|| fS) Nr)dtyperT)Z query_embedsencoder_hidden_statesencoder_attention_mask return_dictrI)devicer=shapereshapemaybe_autocast ln_visionr/tor,rOonessizelongr2expandr1rUr8last_hidden_stateviewint) r@imagerlZ image_embedsZ image_attsr2Z query_outputZ inputs_llamabspnhsZ atts_llamar!r!r# encode_imgvs* "  DzMiniGPT4.encode_imgcCs4|dd}|dd}|d}|d}|d}|dd }|d d }|d d } |dd} |dd} |dd} |dd } |dd }|dd}|dd}|dd}|dd}||||||| | | | ||||||| |d}|dd}|r0td|tj|dd}|j|d d d!}|S)"Nrr rAr image_sizerCrrrrFrrrTr,rBrrrDrr)rrrr)rrArrrrrr,rBrCrrDr)rrrrckptzLoad MiniGPT-4 Checkpoint: {}cpu) map_locationmodel)strict)getr-r&rOloadload_state_dict)rTcfgrrArrCrrrrrr,rBrrrDr)rrrZ ckpt_pathrmsgr!r!r# from_configsT                  zMiniGPT4.from_config)r r rrFrTTTrrrrrrFr) __name__ __module__ __qualname____doc__PRETRAINED_MODEL_CONFIG_DICTr+ classmethodr.r}r __classcell__r!r!rFr#r s6> r )r`r>rOZtorch.cuda.amprtorch.nnr6minigpt4.common.registryrminigpt4.models.base_modelrminigpt4.models.minigpt_baserZminigpt4.models.Qformerrrregister_modelr r!r!r!r#s