a yd @spddlZddlZddlZddlZddlmZddlmZmZmZddl Z ddl m Z ddZddZdd ZdS) N)Counter)corporamodels similaritiescstjjdj}tj|}dgg}|D]&}fdd||D}||q*dd|D}t |fdd|D}||fS)Nzbert-base-chinesecsg|]}|vr|qSr.0word) stopwordsr6/mnt/chromeos/MyFiles/work/nlp_tsa/tsa/TSA/abstract.py z build_corpus..cSsg|]}|D]}|q qSrr)r sentencer rrr r rcsg|]}|qSr)Zdoc2bow)r text) dictionaryrr r r) paddlenlpZ transformersZ BertTokenizerZfrom_pretrainedvocabdataZJiebaTokenizercutappendrZ Dictionary) sentencesrZ tokenizer words_listrwordscorpusr)rr r build_corpuss   rcCstjjj|||d}|j|dd}g}|D]}|\} } dd| dD} d} g} t||D]L\} }d}|D]}|| vrl|d7}ql|| kr|} | g} q\|| kr\| | q\| D]}||vr|}||q*qq*|S) N)rZid2word num_topics )rZ num_wordscSs g|]}|dddqS)*")splitstriprrrr r .rzlda..+rr)gensimrZldamodelZLdaModelZ print_topicsr!zipr)rrrrnumldatopicscentral_sentencestopicZtopic_idZ topic_wordsZ max_scoreZ candidatesrrZscorer candidateZcentral_sentencerrr r'$s0   r'cCs"t|\}}}t|||||}|S)N)rr')rr&rrrr)rrr abstruct_mainDsr,)jsonrr$Zsklearn collectionsrrrrZnumpynpZmatplotlib.pyplotZpyplotZpltrr'r,rrrr s