o @Yh @sddlZddlZddlZddlZddlmZddlmZzddlmZddl m Z ddl m Z Wnddl mZddlm Z ddlm Z YGdddZdS) N)datetime)CommitScheduler)current_scheduler)DummyCommitScheduler) TRACKIO_DIRc @seZdZededefddZededefddZeddZeded ed efd d Z e dded ede ede e d Bde ed Bf ddZ eded ede efddZ ede efddZedede efddZddZd S) SQLiteStorageprojectreturncCs4ddd|D}|sd}tjt|dS)z-Get the database path for a specific project.css$|] }|s |dvr|VqdS))-_N)isalnum).0cr>/Users/nouamanetazi/projects/trackio/trackio/sqlite_storage.py s z4SQLiteStorage.get_project_db_path..defaultz.db)joinrstripospathr)rZsafe_project_namerrrget_project_db_paths z!SQLiteStorage.get_project_db_pathc Cst|}tjtj|ddtj3t |}| }| d| Wdn1s3wYWd|SWd|S1sKwY|S)zi Initialize the SQLite database with required tables. Returns the database path. T)exist_oka CREATE TABLE IF NOT EXISTS metrics ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT NOT NULL, project_name TEXT NOT NULL, run_name TEXT NOT NULL, step INTEGER NOT NULL, metrics TEXT NOT NULL ) N) rrrmakedirsrdirname get_schedulerlocksqlite3connectcursorexecutecommitrdb_pathconnr rrrinit_dbs       zSQLiteStorage.init_dbcCs^tdur tStjd}tjd}|durt}n t|dtdd|d}t||S)z Get the scheduler for the database based on the environment variables. This applies to both local and Spaces. NHF_TOKENTRACKIO_DATASET_IDdatasetT)repo_id repo_type folder_pathprivateZsquash_historytoken)rgetrenvironrrrset)hf_token dataset_id schedulerrrrr6s    zSQLiteStorage.get_schedulerrunmetricsc Cst|}tjZt|<}|}|d||f|d}|dur)dn|d}t }|d||||t |f|Wdn1sOwYWddSWddS1sgwYdS)a/ Safely log metrics to the database. Before logging, this method will ensure the database exists and is set up with the correct tables. It also uses the scheduler to lock the database so that there is no race condition when logging / syncing to the Hugging Face Dataset. z SELECT MAX(step) FROM metrics WHERE project_name = ? AND run_name = ? rN INSERT INTO metrics (timestamp, project_name, run_name, step, metrics) VALUES (?, ?, ?, ?, ?) )rr&rrrrr r!fetchonernow isoformatjsondumpsr") rr5r6r$r%r Z last_stepZ current_stepZcurrent_timestamprrrlogNs2      "zSQLiteStorage.logN metrics_liststeps timestampsc Cs.|sdS|durttt|}|durtgt|}t|t|ks/t|t|kr3tdt|}t j Pt |2}| }g}t|D]\} } ||| |||| t| fqN|d||Wdn1sxwYWddSWddS1swYdS)zEBulk log metrics to the database with specified steps and timestamps.Nz=metrics_list, steps, and timestamps must have the same lengthr8)listrangelenrr:r; ValueErrorrr&rrrrr enumerateappendr<r= executemanyr") rr5r?r@rAr$r%r datair6rrrbulk_logxsB       "zSQLiteStorage.bulk_logc Cst|}tj|s gSt|8}|}|d||f| }g}|D]}|\}} } t | } || d<| | d<| | q'|WdS1sMwYdS)zwRetrieve metrics for a specific run. The metrics also include the step count (int) and the timestamp (datetime object).z SELECT timestamp, step, metrics FROM metrics WHERE project_name = ? AND run_name = ? ORDER BY timestamp timestampstepN) rrrrexistsrrr r!fetchallr<loadsrG) rr5r$r%r rowsresultsrowrLrMZ metrics_jsonr6rrr get_metricss&       $zSQLiteStorage.get_metricsc Csg}tjts |Sttjtd}|D]C}z7t|(}|}| d| r?| ddd| D}| |Wdn1sIwYWqtj yYYqwtt|S)z4Get list of all projects by scanning database files.z*.dbzDSELECT name FROM sqlite_master WHERE type='table' AND name='metrics'z)SELECT DISTINCT project_name FROM metricscSg|]}|dqSrrrrSrrr z.SQLiteStorage.get_projects..N)rrrNrglobrrrr r!r9rOextendErrorrBr1)projectsZdb_filesZdb_filer%r Z project_namesrrr get_projectss,      zSQLiteStorage.get_projectscCspt|}tj|s gSt|}|}|d|fdd| DWdS1s1wYdS)z#Get list of all runs for a project.z