a }Gc @srddlZddlZddlmZddlmZddlmZm Z ddl m Z ddl m Z mZddlmZGdddZdS) N)Run) ColBERTConfig RunConfig)Launcher)create_directory print_message)encodec@s@eZdZdddZddZddZdd Zdd d Zd dZdS)IndexerNcCs>d|_||_t||_t|j|tj|_|j|ddS)zq Use Run().context() to choose the run's configuration. They are NOT extracted from `config`. N) checkpoint) index_pathr rload_from_checkpointZcheckpoint_config from_existingrconfig configure)selfr rr'/home/gupo/~/ColBERT/colbert/indexer.py__init__s  zIndexer.__init__cKs|jjfi|dSN)rr)rkw_argsrrrrszIndexer.configurecCs|jSr)r )rrrr get_indexszIndexer.get_indexcCs|jdusJ|j}g}tt|D]T}tj||}|d}|o\d|vp\d|vp\d|v}|pj|d}|r&||q&t|rt dt|d|dt d |D]}t |q|S) Nz.jsonmetadataZdoclenZplanz.ptz#> Will delete z files already at z in 20 seconds...) r sortedoslistdirpathjoinendswithappendlenrtimesleepremove)r directoryZdeletedfilenamedeleterrrerase"s     z Indexer.eraseFcCs|dvs J|j|||dkd|jddd|jj|_tj|jj }|dvsb|sbJ|jjt|jj|dur~||s|dkr| ||jS) N)TFreuseresumer)) collection index_namer)@)bsize partitions)Tr(r)Tr() rr index_path_r rrexistsrr'_Indexer__launch)rnamer* overwriteZindex_does_not_existrrrindex:s     z Indexer.indexcsZtfddt|jjD}fddt|jjD}tt}||j|||dS)Ncsg|] }qSr)list.0_managerrr Pz$Indexer.__launch..csg|]}jddqS))maxsize)Queuer6r9rrr;Qr<)mpManagerrangernranksrrlaunch)rr*Z shared_listsZ shared_queueslauncherrr9rZ__launchNs zIndexer.__launch)N)F) __name__ __module__ __qualname__rrrr'r4r1rrrrr s   r )rr!torch.multiprocessingmultiprocessingr@Zcolbert.infra.runrcolbert.infra.configrrcolbert.infra.launcherrcolbert.utils.utilsrrZ#colbert.indexing.collection_indexerrr rrrrs