B Bb @sdZddlZddlZddlmZddlmZmZmZm Z ddl Z ddl Z ddl mmZddlmZddlmZddlmZddlmZdd lmZdd lmZddlZed ed ed ddlm Z ddlm!Z"dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+e(e)e*e+dZ,dddddddddd Z-ddd ddd!dd"d#d$d%d ddd&dd"d#d'dd dddd(dd)d*d+dd dddd(dd)d*dd,dddd(dd-d.dd dddd(dd)d.dd,ddd/dd-d.dd ddd/dd)d.d0Z.dd3d4Z/dd5d6Z0ddddd7dddddddd1d8d7d2e)fd9d:Z1e)dd1d1fee2e e2e3fe2e3e2e4e4eeeee3fd;dd?d@Z6ddAdBZ7ddd2ddddCdDdEdFdGddHddd1d8dId7e)dd1d1d1fdJdKZ8ddd2dGdddddCdDdEdFdGddHddd1d8dId7e)d1fdLdMZ9dd2dGdddddCdDdEdFdGddHddd1d8dId7e)d1dfdNdOZ:dPdQZ;ddUdVZZ?e?j@dXdYe2d"dZd[e?j@d\e2d]d^d[e?j@d_d`dadbe?j@dce3dddd[e?j@dedfe2ddgd[e?j@dhdie2ddjdkdle?j@dme2ddnd[e?j@doe2ddpd[e?j@dqe3dSdrd[e?j@dse3dCdte?j@dueAdDdte?j@dveAdEdte?j@dwe3dFdte?j@dxd`dadbe?j@dye3ddte?j@dze3dHdte?j@d{e3dd|d[e?j@d}e3dd~d[e?j@dd`ddbe?j@deAd8dte?j@deAdIdte?j@deAd7dte?j@de3ddte?j@dd`ddbe?j@dd`ddbe?j@de2ddddle?j@de2dRdte?j@de2dRdte?j@dd`ddbe?j@dd`ddbe?j@dd`ddbe?j@dd`ddbe?BZCe 0 won't contribute to the sum of the denominator. rNrg)r/topkviewZ expand_aswhere ones_like BIG_CONST)logitskprobsvaluesZ batch_minsr4r4r5 top_k_filters  rA{Gz??c<sHdd|D}|dkrd}|r>tddtd|dd}nd}|dj\}}}}}||kr,|dkr,t|djddt|gt|djdd}t|djddt||gt|djdd}t|}||ddd d d }|ddd d d }tj|t|fdd  nt |d g}d}xt | D]}|t krrt d |dfdd|D}ttt||}|dj\}}}}}|||d\} }}!|!d}"|tj|"dd }| dddddf}#tj|#dd }$d}%g}&| tks| tkr|xD|D]<}'t|$t|'}(tt|( })|%|)7}%|&|)qW|tkr|t d|%j| tks| tkrptj !}*|}+tj"|$dd },|#}-xNt | D]B}t$|,|-j%j}.||+|.d\}}+}/|/d}0|tj|0dd }qW| ||d| }1tj&|1jd| gtj'd}2|*|1|2}3|tkr^t d|3j|%|37}%|&|3d}4|dkr tj|dddddfdd }5|5t|5tk( }5t|$tk( }6|$|6}7||7|7|5}4|tkrt d|4j|%|47}%||%j|t krVt d|%|4j|%j)dddk r| tkrfddt*|Dnfddt*|Dfddt*|D}8ttt|8|}x|D]}9|9j+j,qWg}:x|D]}9|:|9qW|:}qTWfdd|D}ttt||};|;||fS)NcSsg|]}t|jdqS)float32)npzerosshapeastype).0pr4r4r5 sz perturb_past..rgg?rr7rr)dimz Iteration cs g|]}tt|ddqS)T)r-r3)r6r/ from_numpy)rIp_)r3r4r5rKs)past_key_valuesz pplm_bow_loss:)rQ inputs_embeds)r3dtypez pplm_discrim_loss:z kl_lossz pplm_lossT)Z retain_graphc s,g|]$\}}t|t|jqSr4)r/maxnormgrad)rIindexrP) grad_norms window_maskr4r5rKFscs$g|]\}}t|jtqSr4)r/rUrV SMALL_CONST)rIrWrP)rYr4r5rKKscs6g|].\}} |j|jqSr4)rVdatacpunumpy)rIrWrP)gammarXstepsizerYr4r5rKQscs g|]}tt|ddqS)T)r-r3)r6r/rO)rIrP)r3r4r5rKfs)-r/arangerZrGtupleonespermutecatrFr1r;rangeVERBOSEprintlistmaprsumdetachFsoftmaxPPLM_BOWPPLM_BOW_DISCRIMmmtlogappend VERY_VERBOSEr[r\r] PPLM_DISCRIMnnCrossEntropyLoss unsqueezeZresize_token_embeddingsmatmulweighttensorlongfloatbackward enumeraterVZzero_)<pastmodellast unpert_past unpert_logitsaccumulated_hiddenrXr_one_hot_bows_vectors classifier class_label loss_typenum_iterationshorizon_length window_lengthdecayr^kl_scaler3verbosity_levelZgrad_accumulatorZ decay_mask_Z curr_lengthZones_key_val_shapeZzeros_key_val_shapeZ ones_maskZ loss_per_iterZnew_accumulated_hiddeniZcurr_perturbationZperturbed_pastZ all_logitsZ all_hiddenhiddenr=r?ZlossZ loss_list one_hot_bowZ bow_logitsZbow_lossce_lossZcurr_unpert_pastZ curr_probsZwterRZcurr_all_hiddenZ curr_hidden predictionlabel discrim_lossZkl_loss unpert_probs correctionZcorrected_probsrVrPZnew_past pert_pastr4)r3r^rXr_rYr5 perturb_pasts 6:                          r)namerr3rfpis_deep is_deeperreturnc Csj|dkr dSt|}t|d|d||d|}d|krHt|d} n(d|krZ|d} n|dkrh|} ntd|tj| |d|t |t r||d kr|d |} n>|d } |t krt d |t d |d t d | nrt |trZ|t|d kr|} n@|d } |t krbt d |t d |d t d | n|d } || fS)N)NNrr)rrrrrr$zMEither url or path have to be specified in the discriminator model parameters)Z map_locationrrz!class_label {} not in class_vocabzavailable values are: {}zusing default class {})DISCRIMINATOR_MODELS_PARAMSr r1r ValueErrorload_state_dictr/loadeval isinstancestrREGULARrgformatintsetr@) rrr3rrrrparamsrZresolved_archive_fileZlabel_idr4r4r5get_classifiernsJ      r)bag_of_words_ids_or_pathsrc spg}xf|D]^}|tkr$tt|}n|}t|d}|d}WdQRX|fdd|Dq W|S)Nr cs g|]}j|dddqS)TF)Zadd_prefix_spaceadd_special_tokens)encodestrip)rIword) tokenizerr4r5rKsz,get_bag_of_words_indices..)BAG_OF_WORDS_ARCHIVE_MAPr openreadrsplitrs)rr bow_indicesZ id_or_pathfilepathfr r4)rr5get_bag_of_words_indicess   rcCsz|dkr dSg}xd|D]\}ttdd|}t||}|jd}t||j|}|d|d| |qW|S)NcSs t|dkS)Nr)len)r2r4r4r5z,build_bows_one_hot_vectors..rr) rhfilterr/r{r1rGrFZ vocab_sizeZscatter_rs)rrr3rZ single_bowZ num_wordsrr4r4r5build_bows_one_hot_vectorss  rd{Gz?? T'?c*Kst|||t|||\}}g}|r0t|d|}|rN|rNt}|tkrtdn@|rht}|tkrtdn&|dk rt}|tkrtdntdt |||||| d||d \}} } } |dkrt j g}!g}"g}#g}$xt |D]}%t ||||d |||||| | | | | |||||||||d \}&}'}(})|!|&|dk rH|"|'j|#|(|$|)qW|dkrtt j ||!|"|#|$fS) N;z=Both PPLM-BoW and PPLM-Discrim are on. This is not optimized.zUsing PPLM-BoWzUsing PPLM-Discrimz0Specify either a bag of words or a discriminatorF) rrcontextr3lengthsampleperturbrstop_eotr,T)rrrr3rrrrrrr_ temperaturetop_krr grad_lengthrrrr^gm_scalerrr)rrrrrorgrnru Exceptiongenerate_text_pplmr/r,Z empty_cacherersr[r\r])*rrr num_samplesr3 bag_of_wordsdiscrimrrr_rrrrrrrrr^rrrrrrrkwargsrZclass_idrrunpert_gen_tok_textrpert_gen_tok_textsZdiscrim_lossesZlosses_in_time perplexitiesrpert_gen_tok_textr loss_in_time perplexityr4r4r5full_text_generations           rc5Cs~d}|rr?)r>)r)r>rNiP)r/r{r|rrGrxrrfrrerjrrsrlrmrvrwmeanrgr[r\r]rA multinomialr8rdrdecodetolist)5rrrrr3rrrrrrr_rrrrrrrrr^rrrr output_so_farZ context_trrXrunpert_discrim_lossrZ range_funcpert_total_prob pert_timesrrrrunpert_all_hiddenunpert_last_hiddencurrent_stepsizerrloss_this_iter pert_logitspert_all_hidden pert_probsrrrrrr4r4r5rBs             rc9CsJ|dkrtddStj||tjd}xt|jdkrB|d}q(W|dkjddd}t||j dd dd}t||j dd d}d}d}d }t |||}d} d}!d}"g}#xdt ||D]T}$|ddd|$}%t | |d|$g}&t | |d|$dg}'|dkrf|%dk rf|%ddd df}!||%dddd f\}(}}(||%\})}*}+|+d },|$|kr| d}-n| }-|r|dkr|}.nz|,dddd ddf}/tj|/dd }/|dk r t|||!|*|)|/| |-||||||||||||d \}.}(} }0|#|0n|}.||!|.d \}1}}2|1ddd ddf| }1tj|1d d }3|dk rtj}4|tj|,dd }5tj|g|tjd}6|4|5|6}"|tkrtd|"jnd}"|r.tj|)ddd ddfd d }7|3||7d|}3t|3| dd}3t|3dkrH|3t|3}3nt|1| d}1tj|1d d }3| rtj|3dd}!|&tks|&t j!ks|&t j"krn@|3d|d|$dkr||3d|d|$}|d7}nd}ntj#|3dd d\}(}!|%dkr|!ntj$|%|!fdd }%|!dddkr|rPqW|dkr8d|d|}8n|rBdnd }8|8S)NzNo text to test)r3rSrriPT)as_tupler)rNFr7)rrrrXr_rrrrrrrrr^rr3r)rQzunperturbed discrim loss)r>r?)r>)r)r>rNrL)%rgr/r{r|rrGrxnonzerorsizerrerrlowerrrjrrsrlrmrvrwrrfr[r\r]rAr words_corpusr corpusrrr8rd)9rrrr3rrrrrrr_rrrrrrrrr^rrrrZ test_textZeos_posstartendrrZ error_occuredrrXrrrrrZcur_word last_wordrrrrrrrrrrrrrrrrrr4r4r5get_perplexitys $            .  rc CsT|dkrtd|dkr tdt|d}t|}WdQRX||d<|td<dS)NzHWhen using a generic discriminator, discrim_weights need to be specifiedzEWhen using a generic discriminator, discrim_meta need to be specifiedrr$generic)rrjsonrr)discrim_weights discrim_metaZdiscrim_meta_filemetar4r4r5set_generic_model_paramss rr7rc 6Cs0t|tj|t|t} tj r8|s8dnd}!|dkrNt |||dk rt |d}"||"kr|"}| tkrt d||tj|dd}#|dkry|#t|Wnt dYnX|#|!|#t|}$x|#D] }%d |%_qW|r|$j|$jgd d }&n6|}'x|'s4t d td }'qW|$j|$j|'d d }&t d t |$|&t t|#|$|&|!||||| | | | | ||||||||| ||||d\}(})}*}*}+|$|(d},| tkrt dt dt |,t g}-t}.|rB|rBt| d|$}/x4|/D],}0t!t"dd|0}1|.#dd|1DqWxt$|)D]\}2}3y|rddl%}d}4xh|3dD]D}5|5|.kr|4d|j&j'|$|5g|j(j)7}4n|4|$|5g7}4qvWn|$|3d}4t d|2dt |4|rt d|+|2t Wn YnX|-*|&|3|(fqLWdS)Nr,r\rr z:discrim = {}, pretrained_model set to discriminator's = {}T)Zoutput_hidden_stateszCan't load local modelF)rz%Did you forget to add `--cond_text`? zModel prompt >>> z= Prefix of sentence =)rrrr3rrrrrr_rrrrrrrrr^rrrrrrrrzP================================================================================z= Unperturbed generated text =rcSs t|dkS)Nr)r)r2r4r4r5r/rz"run_pplm_example..css|]}|dVqdS)rNr4)rIwr4r4r5 1sz#run_pplm_example..rz{}{}{}z= Perturbed generated text {} =rz Perplexity:)+r/Z manual_seedrErandomseedVERBOSITY_LEVELSgetrrr,r0rrrgrr Zfrom_pretrainedrrr1rr parametersr-rZ bos_tokeninputrrrrrrrhrupdatercoloramaZForeZREDStyleZ RESET_ALLrs)6r Z cond_textZuncondrrrrrrrr_rrrrrrrrr^rrrZno_cudar  verbosityrZmodel_fpZcalc_perplexityrrrrr3Zdiscriminator_pretrained_modelrrparamZtokenized_cond_textZraw_textrrrrZunpert_gen_textZgenerated_textsZ bow_word_idsrZsingle_bow_listfilteredrrZ pert_gen_textZword_idr4r4r5run_pplm_examples#               r__main__z--pretrained_modelz-Mz1pretrained model name or path to local checkpoint)typedefaulthelpz --cond_textzThe lakezPrefix texts to condition onz--uncond store_truez#Generate from end-of-text as prefix)actionrz --num_samplesz7Number of samples to generate from the modified latentsz--bag_of_wordsz-BzoBags of words used for PPLM-BoW. Either a BOW id (see list in code) or a filepath. Multiple BoWs separated by ;z --discrimz-D) rr%Ztoxicityrr&r'r(r)r*r+zDiscriminator to use)rrchoicesrz--discrim_weightsz%Weights for the generic discriminatorz--discrim_metaz.Meta information for the generic discriminatorz --class_labelz&Class label used for the discriminatorz--length)rrz --stepsizez --temperaturez--top_kz--samplez--num_iterationsz --grad_lengthz--window_lengthzPLength of past which is being optimized; 0 corresponds to infinite window lengthz--horizon_lengthz!Length of future to optimize overz--decayzwhether to decay or notz--gammaz --gm_scalez --kl_scalez--seedz --no_cudazno cudaz --coloramazcolors keywordsz --verbosityrzverbosiry levelz--fpz --model_fpz--calc_perplexityzcalculate perplexityz --is_deepzwhether to use deep classifierz --is_deeperz --stop_eotzwhether to stop at eot token)FFr,)F)r,) rrFrNNNNr7rrrrTrrrrFrCrrBrFFrNNFFFF)E__doc__argparseroperatorrtypingrrrrr]rEr/Ztorch.nn.functionalrvZ functionalrlZtorch.autogradrtqdmr transformersr Ztransformers.file_utilsr Ztransformers.modeling_gpt2r Zpplm_classification_headr ZnltkdownloadZ nltk.corpusrr rrnrurorZr<ZQUIETrrfrtrrrr6rArrrboolrrrrrrrr__name__ArgumentParserparser add_argumentr} parse_argsargsvarsr4r4r4r5s\                    72  b