1g%U ddlmZmZmZmZddlZddlZddlm Z ddl m cm Z ddl mZmZddlmZddlmZGdde jZGdd e jZGd d e jZGd d e jZGdde jZGdde jZGdde jZGdde jZGdde jZGdde jZGdde jZ d(dejdejde d e!d!e f d"Z"d#Z#Gd$d%Z$Gd&d'e jZ%dS)))TupleListUnionOptionalN) rearrangerepeat)measure)tqdmc reZdZdZ ddedededed ed d f fd Zd Zdej d ej fdZ xZ S)FourierEmbedderaThe sin/cosine positional embedding. Given an input tensor `x` of shape [n_batch, ..., c_dim], it converts each feature dimension of `x[..., i]` into: [ sin(x[..., i]), sin(f_1*x[..., i]), sin(f_2*x[..., i]), ... sin(f_N * x[..., i]), cos(x[..., i]), cos(f_1*x[..., i]), cos(f_2*x[..., i]), ... cos(f_N * x[..., i]), x[..., i] # only present if include_input is True. ], here f_i is the frequency. Denote the space is [0 / num_freqs, 1 / num_freqs, 2 / num_freqs, 3 / num_freqs, ..., (num_freqs - 1) / num_freqs]. If logspace is True, then the frequency f_i is [2^(0 / num_freqs), ..., 2^(i / num_freqs), ...]; Otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1)]. Args: num_freqs (int): the number of frequencies, default is 6; logspace (bool): If logspace is True, then the frequency f_i is [..., 2^(i / num_freqs), ...], otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1)]; input_dim (int): the input dimension, default is 3; include_input (bool): include the input tensor or not, default is True. Attributes: frequencies (torch.Tensor): If logspace is True, then the frequency f_i is [..., 2^(i / num_freqs), ...], otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1); out_dim (int): the embedding size, if include_input is True, it is input_dim * (num_freqs * 2 + 1), otherwise, it is input_dim * num_freqs * 2. T num_freqslogspace input_dim include_input include_pireturnNct|r$dtj|tjz}n(tjdd|dz z|tj}|r|tjz}|d|d||_||_ | ||_ dS) zThe initializationg@dtype? frequenciesF) persistentN) super__init__torcharangefloat32linspacepiregister_bufferrrget_dimsout_dim)selfrrrrrr __class__s ^/apdcephfs_cq8/share_1367250/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/shapegen/models/vae.pyrzFourierEmbedder.__init__Is   m"""KK  . A &m K  $ 58 #K ]KEJJJ*"}}Y// cR|js |jdkrdnd}||jdz|zz}|S)Nrr)rr)r&rtempr%s r(r$zFourierEmbedder.get_dimsjs:&D$.A*=*=qq1t~1D89r)xc|jdkr|d|jzjg|jdddR}|jr=t j||| fdSt j|| fdS|S)a Forward process. Args: x: tensor of shape [..., dim] Returns: embedding: an embedding of `x` of shape [..., dim * (num_freqs * 2 + temp)] where temp is 1 if include_input is True and 0 otherwise. r).NNdim) r contiguousrviewshaperrcatsincos)r&r-embeds r(forwardzFourierEmbedder.forwardps >A  GQy\,,..1AAGZQTRTQTZWYZZZE! Ey!UYY[[%))++!>BGGGGy%))++uyy{{!;DDDDHr))r TrTT) __name__ __module__ __qualname____doc__intboolrr$rTensorr9 __classcell__r's@r(r r $s""J#$"&"#'+$( 0000 0!% 0 " 0 .2 000000B %,r)r c:eZdZdZd dedeffd ZdZdZxZ S) DropPathz^Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). T drop_prob scale_by_keepcrtt|||_||_dSN)rrDrrFrG)r&rFrGr's r(rzDropPath.__init__s4 h&&((("*r)c|jdks|js|Sd|jz }|jdfd|jdz zz}|||}|dkr|jr||||zS)a@Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the argument. rErr)r)rFtrainingr4ndim new_empty bernoulli_rGdiv_)r&r- keep_probr4 random_tensors r(r9zDropPath.forwards >R  t} H&   33 E**55i@@ s??t1?   y ) ) )=  r)c4dt|jddS)Nz drop_prob=rz0.3f)roundrF)r&s r( extra_reprzDropPath.extra_reprs;E$.!44;;;;r))rET) r:r;r<r=floatr?rr9rTrArBs@r(rDrDsx++%+T++++++ !!!&<<<<<<rUrr9rArBs@r(rWrWs! # \ \ \ \ \ \ \ \ \ \ \DDDDDDDr)rWcPeZdZdddejddedeeffdZdZxZ S)QKVMultiheadCrossAttentionNF)n_datarZqk_norm norm_layerheadsrict||_||_|r|||zddnt j|_|r|||zddnt j|_dSNTư>elementwise_affineeps)rrrlrir]rcq_normk_norm)r&rlrirZrjrkr's r(rz#QKVMultiheadCrossAttention.__init__s   W^qjj%DdSSSSdfdodqdq W^qjj%DdSSSSdfdodqdq r)c|j\}}}|j\}}}|jzdz}|||jd}|||jd}tj||d\} } |}| } tfd|| | f\}} } tj || |  dd ||d} | S)Nr+r/r0c2t|djSNzb n h d -> b h n d)hrrltr&s r(z4QKVMultiheadCrossAttention.forward.. !-ATZ P P Pr)r r4rlr3rsplitrsrtmapFscaled_dot_product_attention transposereshape) r&qkv_n_ctxbsrirZattn_chkvouts ` r(r9z"QKVMultiheadCrossAttention.forwardsg 5!HFE4:%* FF2udj" - - WWRR 0 0{2wB///1 KKNN KKNNPPPPSTVWYZR[\\1a,Q155??1EEMMbRWY[\\ r)) r:r;r<r] LayerNormr>rrr9rArBs@r(rhrhs !%< r r r r r r r r r rr)rhc neZdZdddejdddedededeed eed ef fd Zd Z xZ S) MultiheadCrossAttentionTNF)qkv_biasri data_widthrkrjrZrlrrirrjcnt||_||_||_||n||_t j||||_t j|j|dz||_ t j|||_ t||||||_ dS)Nbiasr+)rlrirZrkrj) rrrirZrlrr]r^c_qc_kvr`rh attention) r&rZrlrrirrkrjr's r(rz MultiheadCrossAttention.__init__s    #-#5%%:9UE999IdouqyxHHH iu-- 3!    r)c||}||}|||}||}|SrI)rrrr`r&r-datas r(r9zMultiheadCrossAttention.forwardsG HHQKKyy NN1d # # KKNNr)) r:r;r<r]rr>r?rrr9rArBs@r(rrs  $$(<          SM       6r)rc eZdZdddejdddeedededeed ed ef fd Zd e j d e j fdZ xZ S)ResidualCrossAttentionBlockNTF)rirrrkrjrirZrlrrrjc "t||}t||||||||_||dd|_||dd|_||dd|_t||_dS)N)rirZrlrrrkrjTrorp)rZ) rrrattnln_1ln_2ln_3rWmlp) r&rirZrlrrrkrjr's r(rz$ResidualCrossAttentionBlock.__init__s   J+!!    Ju4HHH JzdMMM Ju4HHH U###r)r-rc||||||z}||||z}|SrI)rrrrrrs r(r9z#ResidualCrossAttentionBlock.forwardsS  $))A,, $88 8 1&& &r)) r:r;r<r]rrr>r?rrr@r9rArBs@r(rrs!%%)<$$$ $ $  $ SM $$$$$$$$<U\r)rcBeZdZddejddedeffdZdZxZS)QKVMultiheadAttentionNF)rZrjrkrlrct||_||_|r|||zddnt j|_|r|||zddnt j|_dSrn)rrrlrr]rcrsrt)r&rlrrZrjrkr's r(rzQKVMultiheadAttention.__init__"s   W^qjj%DdSSSSdfdodqdq W^qjj%DdSSSSdfdodqdq r)c|j\}}}|jzdz}|||jd}tj||d\}}}|}|}tfd|||f\}}}tj ||| dd ||d} | S)Nrr/r0c2t|djSrwryrzs r(r|z/QKVMultiheadAttention.forward..:r}r)rr+r~) r&qkvrrrZrrrrrs ` r(r9zQKVMultiheadAttention.forward1s9E54:%*hhr5$*b11+c73331a KKNN KKNNPPPPSTVWYZR[\\1a,Q155??1EEMMbRWY[\\ r)) r:r;r<r]rr>rr9rArBs@r(rr!s < r r r r r r r r r r       r)rc ReZdZejddddededededed ef fd Zd Z xZ S) MultiheadAttentionFrE)rkrjrYrrZrlrrjrYcpt||_||_||_t j||dz||_t j|||_t||||||_ |dkrt|nt j |_ dS)Nrr)rlrrZrkrjrE)rrrrZrlr]r^c_qkvr`rrrDrcrd r&rrZrlrrkrjrYr's r(rzMultiheadAttention.__init__@s    Yueaih??? iu-- .!    6Db5H5H.111bkmmr)c||}||}|||}|SrI)rrrdr`rfs r(r9zMultiheadAttention.forwardZsB JJqMM NN1   NN4;;q>> * *r)) r:r;r<r]rr>r?rUrr9rArBs@r(rr?s< #\\\\ \  \  \\\\\\\\4r)rc deZdZdejddddedededed ed ef fd Zd e j fd Z xZ S)ResidualAttentionBlockTFrErrkrjrYrrZrlrrjrYc tt||||||||_||dd|_t |||_||dd|_dS)NrrZrlrrkrjrYTrorp)rZrY)rrrrrrWrrrs r(rzResidualAttentionBlock.__init__bs &!)    Ju4HHH U>BBBJu4HHH r)r-c||||z}||||z}|SrI)rrrrrfs r(r9zResidualAttentionBlock.forward{sG  $))A,,'' ' 1&& &r) r:r;r<r]rr>r?rUrrr@r9rArBs@r(rras< #IIII I  I  IIIIIIII2r)rcheZdZdejddddedededed ed ed effd Zd e j fdZ xZ S) TransformerTFrErrrZlayersrlrrjrYc t|_|_||_t jfdt|D|_dS)Nc <g|]}tS)r)r) .0rrYrlrrkrjrrZs r( z(Transformer.__init__..sO   '%)##1   r)) rrrrZrr] ModuleListrange resblocks) r&rrZrrlrrkrjrYr's `` `````r(rzTransformer.__init__s              v      r)r-c0|jD] }||}|SrI)r)r&r-blocks r(r9zTransformer.forwards'^  EaAAr)rrBs@r(rrs< #                  >r)rcneZdZdddddedededed ed ed ed effd Zdej dej fdZ xZ S)CrossAttentionDecoderTFbinary)rrj label_type num_latents out_channelsfourier_embedderrZrlrrjrcDt||_tj|jj||_t||||||_tj ||_ tj|||_ ||_ dS)N)rirZrlrrj) rrrr]r^r% query_projrcross_attn_decoderrln_post output_projr) r&rrrrZrlrrjrr's r(rzCrossAttentionDecoder.__init__s  0)D$9$A5II"= # # # |E** 9UL99$r)querieslatentsc||||j}|||}||}||}|SrI)rrtorrrr)r&rrr-occs r(r9zCrossAttentionDecoder.forwardsm//$"7"7"@"@"C"CGM"R"RSS  # #GW 5 5 LLOOq!! r)) r:r;r<r>r r?strrr FloatTensorr9rArBs@r(rrs"%%%% % * %  %%%%%%%%%%<u05;Lr)rijbbox_minbbox_max octree_depthindexingoctree_resolutionc||z }tj|}||}tj|d|dt|dztj}tj|d|dt|dztj}tj|d|dt|dztj} tj||| |\} } } tj| | | fd} | dd} t|dzt|dzt|dzg}| ||fS) Nrrrr+)rr/)axisr)npexp2r!r>r meshgridstackr)rrrrrlength num_cellsr-yzxsyszsxyz grid_sizes r(generate_dense_grid_pointsrs4  F %%I$%  HQK!c)nnq.@ SSSA HQK!c)nnq.@ SSSA HQK!c)nnq.@ SSSA;q!Q:::LRR (BB b p c)br/r+rzMTraining with soft labels, inference with sigmoid and marching cubes level 0.r0mclewiner)methodr )DiffDMCzBPlease install diso via `pip install diso`, or set mc_algo to 'mc'rFT)deform return_quads normalizezmc_algo z not supported.r)&device isinstancerUrarrayrrrr4r rrhalfrrrsigmoidprintappendr5r3r marching_cubescpunumpyhasattrdisor ImportErrorr r rdetach ValueErrorrastypeascontiguousarray RuntimeError)r&rr rrrrrrrr bbox_size xyz_samplesrr batch_logits batch_sizestartr batch_querieslogits grid_logitsoutputsirfacesnormalsrrsdfvertss r( latents2meshzShapeVAE.latents2mesh's fe $ $ IgwHF8F1Q3K((8F1Q3K((x' )C%/ * * * & Y' 44  ]1% %;#4Q#7DDH8HHHJJJ ( (E!%);";QQQ">?BB6JJGllnnG"7NjIIIM%%m&6&6w}&E&EwOOF2~~v..2Q6fggg    ' ' ' 'i !444 !&& IaL)A,PYZ[P\']^^ddff z""# %# %A" %d??292H#A**,,2244 (333/HeWa ()3i?(JHH%%"4//Kt4444444t"-.r"s"ss#*7#?#?#?#B#B6#J#J=N=V\(9(9\m%&q>/,==C#'88C5\`8#a#aLE5+E22E$||~~113399;;H!LLNN..006688DDbDAEE$%H%H%H%HIII%'rz::!3E:: % % %t$$$$$ % % %t$$$$$ %s28A0O)I0/O0JEOP:PP)rr r r Nr )r:r;r<r>r?rrUrr9rno_gradrrrrr;rArBs@r(rrs" #!+5+5+5+5 +5  +5  +5 +5+5+5+5+5+5+5+5+5+5+5+5+5Z U]__;>"!%TT"TeElDK67T T  T  TTTTT_TTTTTr)r)rN)&typingrrrrr$rrtorch.nnr]torch.nn.functional functionalreinopsrrskimager r Moduler rDrWrhrrrrrrrndarrayr>rrrrrrr)r(rEsm20/////////// $$$$$$$$^^^^^bi^^^B<<<<