cMg#xddlZddlmZddlmZddlmZddlm Z ddl m Z m Z ddl mZmZGdd ZdS) N) ToPILImage)Image)VQVAEHF)FrozenCLIPEmbedder)SwittiHFget_crop_condition)sample_with_top_k_top_p_gumbel_softmax_with_rngcTeZdZdZdZdZdZed"dZe dZ de e e zfd Z d#de e e zd e d efdZej d$de e e zd e dedzdedededededededeeefd eje ezfd!ZdS)%SwittiPipelinezyresearch/VQVAE-Swittizopenai/clip-vit-large-patch14z(laion/CLIP-ViT-bigG-14-laion2B-39B-b160kc||_||_||_||_|j|j||_dS)N)swittivae text_encodertext_encoder_2evaldevice)selfrrrrrs 2/home/notantonvoron/switti_demo/models/pipeline.py__init__zSwittiPipeline.__init__sN (,    cudactj||}tj|j|}t |j|}t |j|}||||||S)N)r)rfrom_pretrainedtorvae_pathrtext_encoder_pathtext_encoder_2_path)clspretrained_model_name_or_pathrrrrrs rrzSwittiPipeline.from_pretraineds)*GHHKKFSS%cl3366v>>)#*?OOO +C,CFSSSs63 nfEEErcd|DS)Ncg|]]}td|ztj^S))rcpudetachrtorchuint8).0imgs r z+SwittiPipeline.to_image..&sd  37799##%% % ) )%+ 6 688r)tensors rto_imagezSwittiPipeline.to_image$s# rpromptct|tr|gn|}|j||j|g}t jd|Dd}|dj}|dj}|||fS)Ncg|] }|j Sr+)last_hidden_state)r(encodings rr*z1SwittiPipeline._encode_prompt..1s B B BHX ' B B Brdim) isinstancestrrencoderr&concat pooler_output attn_bias)rr. encodings prompt_embedspooled_prompt_embedsr;s r_encode_promptzSwittiPipeline._encode_prompt*s'44@&&   $ $V , ,   & &v . .  B B B B B    )}:bM+ 2I==rT null_prompt encode_nullc|||\}}}|r|j\}}} |jd} ||\} } } | ddd|f||| |j} | || |j} | ddd|f|||j} t j|| gd}t j|| gd}t j|| gd}|||fS)Nrr4)r?shapeexpandrrr&cat)rr.rArBr=r>r;BL hidden_dim pooled_dim null_embedsnull_pooled_embedsnull_attn_biass r encode_promptzSwittiPipeline.encode_prompt8sU :>9L9LV9T9T6 +Y  F,2 Aq*-3A6J>B>Q>QR]>^>^ ;K+^%aaa!e,33Aq*EEHHI]^^K!3!:!:1j!I!I!L!LMaMh!i!i +AAArrE299!Q??BB9CSTTN!I}k&BJJJM#(9.BDV-W]^#_#_#_  9n"=1EEEI2I==rN@ffffff?Fr rUseedcfgtop_ktop_p more_smooth return_pilsmooth_start_siturn_off_cfg_start_si image_sizereturnc , |jjrJ|j} |j} |jj}|d}n!| j|| j}|||\}}}|jddz}| |}| j rtd|z| dgzd|z| dgz |j }| |dd|z| j}| |}nd}|x}}| | j}| js || jz }|d| jd|z| jdz|ddd| jfz}d}||| j| jd| jd}| jD];}|j| j |j!d))* +   af.?.CVEVWYEZ[[ * *A F  fm , , , L # #D ) ) ) ) 122H @H @FB22E"E{ -",QQQR0G-GH ", ** Jbqb !"1"+$5bqb$9!%bqbM !"1"+. )"1" IJJAv~>*+&/"1"*=*+&/"1"*=|+J01 0Ebqb0I -01 0Ebqb0I -  nel333&77702C2JKKK#"#&7'' R"W E**5'::J)) K%K!ez"1"~5JqrrN8JJ  5r_4 5DA $45u==0NN1u9--5u"RU )"5"<"F"Fq"I"II2CuEq!!QQQ'#,,V44&&q!,,44Q RLLF$-$K$KF-..v%% !E>V.. @!/!4!4Q R!H!H!R!RSTVW!X!X%%n55aaa1B261Ja1O)O!OOPQ "0!6!6q!Q!?!? + +A F  e $ $ $ L # #E * * * *ooe$$((++//44  %--$$C r)r)r@T) r@NrPrQrRFTrrSrT)__name__ __module__ __qualname__rrrr classmethodr staticmethodr-r7listr?boolrOr&inference_modeintfloattupleTensorPILImagerr+rrr r s'H7D   FFF[F\ >S49_ > > > >" >>d3i>> >>>>.U! %'&0VVd3iVVDj V  V  VVVVV #V#s(OV X &VVVVVVrr )r&torchvision.transformsr PIL.Imagerr models.vqvaer models.clipr models.swittirrmodels.helpersr r r r+rrrs ------'''''' ******66666666LLLLLLLL[[[[[[[[[[r