h(ddlZddlZddlZddlZddlmZddlmZddlmZddl m Z ddl m Z  ddl mZddlmZdd lmZGd d Zy#e$rddlmZddlmZdd lmZY$wxYw) N)datetime)Path)Lock)hf_hub_download)EntryNotFoundError)CommitScheduler)DummyCommitScheduler) TRACKIO_DIRc eZdZUdZdZeezdzed<eZ e de de jfdZe dede fdZe dede fd Ze dede fd Ze d Ze ded ed efdZe dded edeedeedzdeedzf dZe ded edeefdZe deefdZe dedeefdZe dedeedeeeffdZdZy) SQLiteStorageFN_current_schedulerdb_pathreturncltjt|}tj|_|S)N)sqlite3connectstrRow row_factory)rconns 2/fsx/qgallouedec/trackio/trackio/sqlite_storage.py_get_connectionzSQLiteStorage._get_connections%s7|,";; projectc`djd|Dj}|sd}|dS)z1Get the database filename for a specific project.c3JK|]}|js|dvs|yw))-_N)isalnum).0cs r z8SQLiteStorage.get_project_db_filename..$s"$ !!))+jAws##defaultz.db)joinrstrip)rsafe_project_names rget_project_db_filenamez%SQLiteStorage.get_project_db_filename!sBGG$ $  &( ! ) #$C((rc>tj|}t|z S)z-Get the database path for a specific project.)r r(r )rfilenames rget_project_db_pathz!SQLiteStorage.get_project_db_path+s!88AX%%rctj|}|jjddtj j 5t jjd}|UtjsEtj|} t||d}tj||dt_ tj |5}|j#}|j%d|j%d|j'dddddd|S#t$rYwxYw#1swY"xYw#1swY|SxYw) z Initialize the SQLite database with required tables. If there is a dataset ID provided, copies from that dataset instead. Returns the database path. T)parentsexist_okTRACKIO_DATASET_IDNdataset) repo_typea\ CREATE TABLE IF NOT EXISTS metrics ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT NOT NULL, run_name TEXT NOT NULL, step INTEGER NOT NULL, metrics TEXT NOT NULL ) z CREATE INDEX IF NOT EXISTS idx_metrics_run_step ON metrics(run_name, step) )r r+parentmkdir get_schedulerlockosenvironget_dataset_import_attemptedr(rshutilcopyrrrcursorexecutecommit)rr dataset_idr*downloaded_pathrr<s rinit_dbzSQLiteStorage.init_db1s! 33G<TD9  ( ( * / /(<=J%m.U.U(@@I&5"H 'OKK9;? 7)T   #*0>-**)0>sJAE $D2= E AE E 2 D>;E =D>>E E E  Ec tj5tjtjcdddStjj d}tjj d}tjj d}|| t }nt|dtdd|}|t_|cdddS#1swYyxYw)z Get the scheduler for the database based on the environment variables. This applies to both local and Spaces. NHF_TOKENr/SPACE_REPO_NAMEr0T)repo_idr1 folder_pathprivatesquash_historytoken) r _scheduler_lockr r6r7r8r rr )hf_tokenr?space_repo_name schedulers rr4zSQLiteStorage.get_scheduler[s  * *//;$77+ *zz~~j1H(<=J jjnn->?O!_%<02 +&' + #'"  09M ,%+ * *s CB CCrunmetricsc tj|}tjj5tj |5}|j }|j d|f|jd}|dn|dz}tjj}|j d|||tj|f|jddddddy#1swYxYw#1swYyxYw)a/ Safely log metrics to the database. Before logging, this method will ensure the database exists and is set up with the correct tables. It also uses the scheduler to lock the database so that there is no race condition when logging / syncing to the Hugging Face Dataset. z SELECT MAX(step) FROM metrics WHERE run_name = ? rN INSERT INTO metrics (timestamp, run_name, step, metrics) VALUES (?, ?, ?, ?) )r rAr4r5rr<r=fetchonernow isoformatjsondumpsr>) rrNrOrrr< last_step current_stepcurrent_timestamps rlogzSQLiteStorage.logus ''0  ( ( * / /..w74 F #OO-a0 $-$5q9q= $,LLN$<$<$>! *$ 7+   ;80 /770 /s$D BC6%D6C? ;DD  metrics_liststeps timestampsc |sy|ttt|}|/tjj gt|z}t|t|k7st|t|k7r t dtj|}tjj5tj|5}|j}g}t|D]3\} } |j|| ||| tj | f5|j#d||j%ddddddy#1swYxYw#1swYyxYw)zEBulk log metrics to the database with specified steps and timestamps.Nz=metrics_list, steps, and timestamps must have the same lengthrR)listrangelenrrTrU ValueErrorr rAr4r5rr< enumerateappendrVrW executemanyr>) rrNr\r]r^rrr<datairOs rbulk_logzSQLiteStorage.bulk_logsB  =s<012E  ",,.2245L8IIJ | E *c,.?3z?.RO  ''0  ( ( * / /..w74"+L"9JAwKK&qM!!H JJw/ #:""   -80 /770 /s%E)A6E E)E& "E))E2ctj|}|jsgStj|5}|j }|j d|f|j }g}|D];}tj|d}|d|d<|d|d<|j|=|cdddS#1swYyxYw)zwRetrieve metrics for a specific run. The metrics also include the step count (int) and the timestamp (datetime object).z SELECT timestamp, step, metrics FROM metrics WHERE run_name = ? ORDER BY timestamp rO timestampstepN) r r+existsrr<r=fetchallrVloadsre) rrNrrr<rowsresultsrowrOs r get_metricszSQLiteStorage.get_metricss 33G<~~I  * *7 3t[[]F NN  ??$DG**S^4'*;'7 $"%f+w'  )4 3 3s A7B>>Cct}tjsgStjdD]}|j}|j |!t |S)zc Get list of all projects by scanning the database files in the trackio directory. z*.db)setr rmglobstemaddsorted)projectsdb_file project_names r get_projectszSQLiteStorage.get_projectssS !U!!#I"''/G"<2  *..#sAvG.(+.z?J()#4 3,/4 3s$ C  AC C#CCCcy)zCleanup when run is finished.N)selfs rfinishzSQLiteStorage.finish"s r)NN)__name__ __module__ __qualname__r9r rr __annotations__rrJ staticmethodrr Connectionrrr(r+rAr4dictr[r`intrirsr}rrrrrrr r s %HL*>>ELfO'*<*< ))))&S&T&& ''''R2&S&s&T&&P #''+ .. .4j.Cy4 . I$ ..`SstDz8  $s)     9# 9$s) 9 949c3h2 rr )rVr6r:rrpathlibr threadingrhuggingface_hubrhuggingface_hub.errorsrtrackio.commit_schedulerrtrackio.dummy_commit_schedulerr trackio.utilsr Exceptioncommit_schedulerdummy_commit_schedulerutilsr rrrrsT +5"8C)N N "0;!"sA A'&A'