o .e+@sddlZddlmZmZmZddlmZmZmZddl m Z ddl m Z ddl mZddlmZmZmZmZmZmZddlmZeeZGd d d eZdS) N)DiffusionPipeline AutoencoderKLUNet2DConditionModel) CLIPTokenizer CLIPTextModelCLIPImageProcessor)StableDiffusionSafetyChecker)StableDiffusionPipelineOutput)VaeImageProcessor)ListOptionalTupleUnionDictAny)loggingcseZdZ d*dededededdded ed e ffd d Z d+ddZ ddZ d,ddZ dejfddZe            d-deeeefdeedeed ed!eed"eejd#ed$ed eejd%eed&e d'eeeeffd(d)ZZS).LatentConsistencyModelPipelineTvae text_encoder tokenizerunet schedulerNsafety_checkerfeature_extractorrequires_safety_checkerc sLt|j|||||||ddt|jjjd|_t|jd|_ dS)N)rrrrrrr)vae_scale_factor) super__init__register_moduleslenrconfigblock_out_channelsrr image_processor) selfrrrrrrrr __class__6/home/patrick/Latent_Consistency_Model/lcm_pipeline.pyrs  z'LatentConsistencyModelPipeline.__init__ prompt_embedscCs|dur t|tr d}n|durt|trt|}n|jd}|dur|j|d|jjddd}|j}|j|ddd j}|jd |jd krjt ||sj|j |dd|jjdd f} t d |jjd | t |jjd r}|jjjr}|j|} nd} |j||| d}|d}|jdur|jj} n |jdur|jj} n|j} |j| |d}|j\} } }|d|d}|| || d }|S)ar Encodes the prompt into text encoder hidden states. Args: prompt (`str` or `List[str]`, *optional*): prompt to be encoded device: (`torch.device`): torch device num_images_per_prompt (`int`): number of images that should be generated per prompt prompt_embeds (`torch.FloatTensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. Nrr max_lengthTpt)paddingr+ truncationreturn_tensorslongest)r-r/z\The following part of your input was truncated because CLIP can only handle sequences up to z tokens: use_attention_mask)attention_mask)dtypedevice) isinstancestrlistr!shapermodel_max_length input_idstorchequal batch_decodeloggerwarninghasattrrr"r2r3tor4rrepeatview)r%promptr5num_images_per_promptr* batch_size text_inputstext_input_idsuntruncated_ids removed_textr3prompt_embeds_dtypebs_embedseq_len_r(r(r)_encode_prompt(s\       z-LatentConsistencyModelPipeline._encode_promptcCst|jdur d}||fSt|r|jj|dd}n|j|}|j|dd|}|j||j|d\}}||fS)Npil) output_typer,)r/)images clip_input) rr< is_tensorr$ postprocess numpy_to_pilrrB pixel_values)r%imager5r4has_nsfw_conceptfeature_extractor_inputsafety_checker_inputr(r(r)run_safety_checkerys     z1LatentConsistencyModelPipeline.run_safety_checkerc CsP||||j||jf}|durtj||d|}n||}||jj}|S)Nr4)rr<randnrBrinit_noise_sigma) r%rGnum_channels_latentsheightwidthr4r5latentsr9r(r(r)prepare_latentss   z.LatentConsistencyModelPipeline.prepare_latentsicCst|jdks J|d}|d}ttd|d}ttj||d| }||dddf|dddf}tjt |t |gdd}|ddkrZtj j |d}|j|jd |fksfJ|S) a see https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: timesteps: torch.Tensor: generate embedding vectors at these timesteps embedding_dim: int: dimension of the embeddings to generate dtype: data type of the generated embeddings Returns: embedding vectors with shape `(len(timesteps), embedding_dim)` rg@@rg@r^N)dim)rrr)r!r9r<logtensorexparangerBcatsincosnn functionalpad)r%w embedding_dimr4half_dimembr(r(r)get_w_embeddings & z.LatentConsistencyModelPipeline.get_w_embedding@r2rQrErbrcguidance_scalerFrdnum_inference_stepslcm_origin_stepsrR return_dictcross_attention_kwargsc  Cs,|p |jjj|j}|p|jjj|j}|dur t|tr d} n|dur.t|tr.t|} n| jd} |j }|j |||| d} |j |||j j }|jjj}|| ||||| j||}| |}t||}|j|ddj||jd}|j|d@}t|D]3\}}tj|f||tjd}|| j}|j|||| | dd d}|j j||||dd \}}|qWdn1swY|| j}| d ks|jj||jjjdd d}|||| j\}}n|}d}|durd g|jd}nd d|D}|j j!|| |d}| s||fSt"||dS)Nrr)r*)rr)r5r4)totalF) timestep_condencoder_hidden_statesr~r})r}latentTcSsg|]}| qSr(r().0has_nsfwr(r(r) sz;LatentConsistencyModelPipeline.__call__..)rRdo_denormalize)rSnsfw_content_detected)#rr" sample_sizerr6r7r8r!r9_execution_devicerPr set_timesteps timesteps in_channelsrer4r<rhrCrurB progress_bar enumeratefulllongstepupdaterdecodescaling_factorr]r$rVr )r%rErbrcrzrFrdr{r|r*rRr}r~rGr5rrabsrq w_embeddingritts model_preddenoisedrYrZrr(r(r)__call__sx        z'LatentConsistencyModelPipeline.__call__)T)r*N)N) NrvrvrwrNrxryNrQTN)__name__ __module__ __qualname__rrrrrrboolrrPr]rer<float32runo_gradrr7r r intfloat FloatTensorrrr __classcell__r(r(r&r)r s~   Q       r)r< diffusersrrr transformersrrr3diffusers.pipelines.stable_diffusion.safety_checkerr$diffusers.pipelines.stable_diffusionr diffusers.image_processorr typingr r r rrrr get_loggerrr?rr(r(r(r)s