%1gA5ddlZddlZddlmZddlmZddlmZddlZddl m Z m Z ddl m Z ddlmZddlmZd d lmZmZdd lmZejeZGd d eZGddeZdZedkrAddlmZedd5Z e !Z"dddn #1swxYwYe e"Z#e#$ej%dZ&ee"de&Z'dZ(e&)e(Z*e'+e*e'j,j-dZ-e-r e$dn e$ddZ.e&)e.Z* e'+e*e'j,j-dZ-e-r e$ddSe$ddS#e/$rZ0e$dYdZ0[0dSdZ0[0wwxYwdS) N)ABC) lru_cache)List)StringRecognizer AcceptState) parse_ebnf)ByteTrie) PartialUTF8)LEAF TokenTrie) get_mappingceZdZddZdededefdZdededefdZd Z de j fd Z de j fd Z de j fd Zed dZde j de j fdZdS)AbsTokenRecognizerrootFct|}|j}|j||_||_|r@|jj dstd|j |_ t||_ ||_t||j|_t#j|||_t)|||_t-|jt-|j ks3Jt-|jdt-|j dS)Ngpt2zConstrained decoding with unicode is only supported for GPT2 model. Support for other models is coming soon.Or you can use the constraints with only ascii characters.)unicodez, )rgrammar_encoding symbol_tableget start_rule_id byte_encoding __class____name__lower startswith ValueError eos_token_idr token_trie tokenizerrstring_recognizerr from_tokenizer unicode_triermappinglen)self grammar_strr!start_rule_namerparsed_grammarrs e/mnt/d/dev/semgus/TinyLlama_v1.1_GAD/../transformers-GAD/transformers_gad/token_grammar_recognizer.py__init__zAbsTokenRecognizer.__init__sW#K00):+8<<_MM$  9.7==??JJ    M  &2#I.."!12BDDV!W!W$3IwOOO"9g>>> 4<  C O% %    $, 9 93t#7#7 9 9     token_id accept_statereturnc|j|jr>||jkr|jSt d|jd|||jkrO|j|jr|jSt d|j|j|}|j ||}|S)Nz8All stacks are empty, so the only token accepted is EOS(z ), but got zWAt least one of the stack should be empty when EOS is reached. However, the stacks are ) r" _must_stopstacksrget_termination_accept_stater _can_stopr%map_consume_bytes)r'r.r/bytes_or_codepointss r+_consume_token_idz$AbsTokenRecognizer._consume_token_id,s  ! , ,\-@ A A 4,,,-JJLLL wtO`wwmuww t( ( (%// 0CDD -JJLLL <&2&9<< #l..x88-<<   r-cV|j}|j|r||jkrdSdS||jkr|j|rdSdS|j|d}|j||d}t|jdkS)NTF)verboser) r3r"r2rr5r%r6r7r&)r'r.r/r3r8 new_acc_states r+probe_token_idz!AbsTokenRecognizer.probe_token_idGs$  ! , ,V 4 4 4,,,tu t( ( (%//77 tu#l..x.GG.== u>  ='((1,,r-ct)z8Process a list of tokens according to the grammar rules.)NotImplementedError)r'argskwargss r+advance_token_idsz$AbsTokenRecognizer.advance_token_ids]s!!r-cg}|D]+}||||,tj|SN)append filter_vocabtorchstack)r'batch_accept_statesdevicebatch_acceptancer/s r+batch_filter_vocabz%AbsTokenRecognizer.batch_filter_vocabasQ/ M ML  # #D$5$5lF$K$K L L L L{+,,,r-c|jsbt|j}tdddg|z}d||j<t j|t j|S| ||}|S)Nz Empty stack, sum of acceptance: rFTdtyperJ) r3r&r%loggerdebugrrGtensorboolget_token_acceptance)r'r/rJ vocab_sizeaccepts acceptances r+rFzAbsTokenRecognizer.filter_vocabgs" JT\**J LL?A?? @ @ @g *G)-GD% &<uz&III I..|VDD r-ctjfdjD}|t jdd}|S)Ncbg|]+}t|j,S)$get_token_acceptance_array_for_stacktuple partial_utf8).0rHr/rJr's r+ z;AbsTokenRecognizer.get_token_acceptance..xsK   99%LL,";V   r-r)dim)rGcatr3reshaper&any)r'r/rJacceptance_matrixrWs``` r+rTz'AbsTokenRecognizer.get_token_acceptancevs!I      *0      '..s<3F/G/GLLPPUVPWW r-i)maxsizecttsJtjr*fd}j|dj}n@dgtjz}tj j gj j|}tj|tj|}|}|S)Nc@j|gS)N)r])r" _probe_bytes)xr]r'rHs r+zIAbsTokenRecognizer.get_token_acceptance_array_for_stack..s)!7!D!DE7"E""r-F)accept accept_eosrrN) isinstancer\listrr$rTrr&r%check_token_acceptance_in_trier trier"rGrRrSvalidate_and_set_eos_acceptance) r'rHr]rJaccept_ftoken_acceptancerVrjx_eoss ``` r+r[z7AbsTokenRecognizer.get_token_acceptance_array_for_stacks%'''''U    H $0EEE@Q F    gDL 1 11G=$&!     L)F K K K44Q77 r-rWctj|dkr d||j<n%||jrtd||j<|S)NrTF)rGrdrr)r'rWs r+rrz2AbsTokenRecognizer.validate_and_set_eos_acceptancesT 9Z A % %,0Jt( ) )$+, # ll",1Jt( )r-N)rF)r __module__ __qualname__r,intrr9rSr=rBrGTensorrLrFrTrr[rrrZr-r+rrs2::::0+6 6-s-+-$----,"""----- EL     EL    Yu4%,5<r-rcNeZdZd fd Zd dZ d deedefdZd Z xZ S) IncrementalTokenRecognizerFclt||||d|_d|_dSNT)superr, last_sizeis_incremental)r'r(r)r!rrs r+r,z#IncrementalTokenRecognizer.__init__s6 i'JJJ"r-Nc<j+fd|D}fdt||D}nMt|djdzkrfdt||D}ntdt|d_|S)Nc,g|]} |dngSrDrZ)r^single_input_idsparse_start_indexs r+r_z@IncrementalTokenRecognizer.advance_token_ids..sE%%0!!2!3!344r-cBg|]\}}||SrZ)_consume_token_ids)r^prefixr/r's r+r_z@IncrementalTokenRecognizer.advance_token_ids..s=###(FL'' ==###r-rr cNg|]!\}}|d|"S)r`)r9)r^rr/r's r+r_z@IncrementalTokenRecognizer.advance_token_ids..sD###2$l&&'7';\JJ###r-aInput ID's length is inconsistent with the current state of the GrammarConstrainedLogitsProcessor. If you want to process another input sequence, please instantiate a new GrammarConstrainedLogitsProcessor or call reset_parser method of GrammarAlignedOracleLogitsProcessor)rzipr& RuntimeError)r' input_idsrIrprefix_to_parses` ` r+rBz,IncrementalTokenRecognizer.advance_token_idss > !)2 O####,/AT,U,U###   1  $.1"4 4 4####69277###  $U Yq\**""r-T token_idsr/c||j}|r6|j|}|j||}nt |D]\}}|||}t|jdkrU|d|dz}tj |d|j|}tj d||S)Nrr z is acceptedzThe decoded string is ) r"get_initial_accept_stater!decode_consume_string enumerater9r&r3loggingrQ) r'rr/ as_stringstringir. cur_token_idsdecoded_strings r+rz-IncrementalTokenRecognizer._consume_token_idss  1JJLLL  M^**955F1AA&,WWLL(33 M M 8#55h MM |*++a//$-gAg$6MM]"@"@"@AAA%)^%:%:=%I%INM"K>"K"KLLLr-cd|_dSrD)r)r's r+resetz IncrementalTokenRecognizer.resets r-)FrDr~) rrwrxr,rBrryrrr __classcell__)rs@r+r|r|s######-#-#-#-#`QUc2=$r-r|c |D]\}}|tkr|}||krt|||<+g}|D]} | s| d} |j| } || |dsD| | dzz } | dd} |j| r| | ||t| |rt||||||S)Nr`Fr ) itemsr rSrchar_acceptance_at_elementrrEextend advance_stackr\rp) rqr3grammarrrVbyte next_trier. new_stacksstknext_element_offset num_chars new_stacks r+rprpsQ::<<  i 4<< H<''%)LL!   G GC "%b' 01DEI556IJJNNe  9q= 0 CRCI'(;< 6  !4555   g33E)4D4DEE F F F F   *:w g    Nr-__main__) AutoTokenizerzexamples/grammars/japanese.ebnfrrr)r(r)r!u トリームF)rzThe Japanese input is acceptedz"The Japanese input is not acceptedu안녕하세요zThe Korean input is acceptedz The Korean input is not accepted)1copyrabcr functoolsrtypingrrGtransformers_gad.recognizerrrtransformers_gad.parserrtransformers_gad.trier transformers_gad.utf8_utilsr vocab_structr r transformers_gad.mappingr getLoggerrrPrr|rp transformersropenfileread input_textr*printfrom_pretrainedr!tokenRecognizerjapaneseencoderrr"r3koreanrerZr-r+rsp  EEEEEEEE......******333333))))))))000000  8 $ $RRRRRRRRjKKKKK!3KKKZ$$$N z****** / 5 5!YY[[ !!!!!!!!!!!!!!!Z ++N- -f55I00)OH  **I  / /?4;u0F4 .//// 2333 F  ((I 2 33 8?54    6 E0 1 1 1 1 1 E4 5 5 5 5 5 222 01111111112Ks0B00B47B4"/F  F F;% F66F;