willsh1997's picture
:wrench: remove codecarbon install, add slightly modified codecarbon pkg to repo
deb7c43
"""
Implements tracking Intel CPU Power Consumption on Mac and Windows
using Intel Power Gadget
https://software.intel.com/content/www/us/en/develop/articles/intel-power-gadget.html
"""
import os
import re
import shutil
import subprocess
import sys
from typing import Dict, Optional, Tuple
import pandas as pd
import psutil
from rapidfuzz import fuzz, process, utils
from codecarbon.core.rapl import RAPLFile
from codecarbon.core.units import Time
from codecarbon.core.util import detect_cpu_model
from codecarbon.external.logger import logger
from codecarbon.input import DataSource
# default W value per core for a CPU if no model is found in the ref csv
DEFAULT_POWER_PER_CORE = 4
def is_powergadget_available() -> bool:
"""
Checks if Intel Power Gadget is available on the system.
Returns:
bool: `True` if Intel Power Gadget is available, `False` otherwise.
"""
try:
IntelPowerGadget()
return True
except Exception as e:
logger.debug(
"Not using PowerGadget, an exception occurred while instantiating "
+ "IntelPowerGadget : %s",
e,
)
return False
def is_rapl_available() -> bool:
"""
Checks if Intel RAPL is available on the system.
Returns:
bool: `True` if Intel RAPL is available, `False` otherwise.
"""
try:
IntelRAPL()
return True
except Exception as e:
logger.debug(
"Not using the RAPL interface, an exception occurred while instantiating "
+ "IntelRAPL : %s",
e,
)
return False
def is_psutil_available():
try:
nice = psutil.cpu_times().nice
if nice > 0.0001:
return True
else:
logger.debug(
f"is_psutil_available() : psutil.cpu_times().nice is too small : {nice} !"
)
return False
except Exception as e:
logger.debug(
"Not using the psutil interface, an exception occurred while instantiating "
+ f"psutil.cpu_times : {e}",
)
return False
class IntelPowerGadget:
"""
A class to interface with Intel Power Gadget for monitoring CPU power consumption on Windows and (non-Apple Silicon) macOS.
This class provides methods to set up and execute Intel Power Gadget's command-line interface (CLI) to
log power consumption data over a specified duration and resolution. It also includes functionality to
read and process the logged data to extract CPU power details.
Methods:
start():
Placeholder method for starting the Intel Power Gadget monitoring.
get_cpu_details() -> Dict:
Fetches the CPU power details by reading the values from the logged CSV file.
"""
_osx_exec = "PowerLog"
_osx_exec_backup = "/Applications/Intel Power Gadget/PowerLog"
_windows_exec = "PowerLog3.0.exe"
def __init__(
self,
output_dir: str = ".",
duration=1,
resolution=100,
log_file_name="intel_power_gadget_log.csv",
):
self._log_file_path = os.path.join(output_dir, log_file_name)
self._system = sys.platform.lower()
self._duration = duration
self._resolution = resolution
self._windows_exec_backup = None
self._setup_cli()
def _setup_cli(self) -> None:
"""
Setup cli command to run Intel Power Gadget
"""
if self._system.startswith("win"):
self._get_windows_exec_backup()
if shutil.which(self._windows_exec):
self._cli = shutil.which(
self._windows_exec
) # Windows exec is a relative path
elif shutil.which(self._windows_exec_backup):
self._cli = self._windows_exec_backup
else:
raise FileNotFoundError(
f"Intel Power Gadget executable not found on {self._system}"
)
elif self._system.startswith("darwin"):
if shutil.which(self._osx_exec):
self._cli = self._osx_exec
elif shutil.which(self._osx_exec_backup):
self._cli = self._osx_exec_backup
else:
raise FileNotFoundError(
f"Intel Power Gadget executable not found on {self._system}"
)
else:
raise SystemError("Platform not supported by Intel Power Gadget")
def _get_windows_exec_backup(self) -> None:
"""
Find the windows executable for the current version of intel power gadget.
Example: "C:\\Program Files\\Intel\\Power Gadget 3.5\\PowerLog3.0.exe"
"""
parent_folder = "C:\\Program Files\\Intel\\"
# Get a list of all subdirectories in the parent folder
subfolders = [f.name for f in os.scandir(parent_folder) if f.is_dir()]
# Look for a folder that contains "Power Gadget" in its name
desired_folder = next(
(folder for folder in subfolders if "Power Gadget" in folder), None
)
if desired_folder:
self._windows_exec_backup = os.path.join(
parent_folder, desired_folder, self._windows_exec
)
else:
self._windows_exec_backup = None
def _log_values(self) -> None:
"""
Logs output from Intel Power Gadget command line to a file
"""
returncode = None
if self._system.startswith("win"):
returncode = subprocess.call(
[
self._cli,
"-duration",
str(self._duration),
"-resolution",
str(self._resolution),
"-file",
self._log_file_path,
],
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
elif self._system.startswith("darwin"):
returncode = subprocess.call(
f"'{self._cli}' -duration {self._duration} -resolution {self._resolution} -file {self._log_file_path} > /dev/null", # noqa: E501
shell=True,
)
else:
return None
if returncode != 0:
logger.warning(
"Returncode while logging power values using "
+ "Intel Power Gadget: %s",
returncode,
)
def get_cpu_details(self) -> Dict:
"""
Fetches the CPU Power Details by fetching values from a logged csv file
in _log_values function
"""
self._log_values()
cpu_details = {}
try:
cpu_data = pd.read_csv(self._log_file_path).dropna()
for col_name in cpu_data.columns:
if col_name in ["System Time", "Elapsed Time (sec)", "RDTSC"]:
continue
if "Cumulative" in col_name:
cpu_details[col_name] = cpu_data[col_name].iloc[-1]
else:
cpu_details[col_name] = cpu_data[col_name].mean()
except Exception as e:
logger.info(
f"Unable to read Intel Power Gadget logged file at {self._log_file_path}\n \
Exception occurred %s",
e,
exc_info=True,
)
return cpu_details
def start(self) -> None:
"""
Placeholder method for starting the Intel Power Gadget monitoring.
"""
# TODO: Read energy
class IntelRAPL:
"""
A class to interface Intel's Running Average Power Limit (RAPL) for monitoring CPU power consumption.
This class provides methods to set up and read energy consumption data from Intel RAPL files,
which are available on Linux systems.
It enables the measurement of CPU energy usage over time and provides methods to fetch
both dynamic and static CPU energy details.
Attributes:
_lin_rapl_dir (str): The directory path where Intel RAPL files are located.
_system (str): The platform of the running system, typically used to ensure compatibility.
_rapl_files (List[RAPLFile]): A list of RAPLFile objects representing the files to read energy data from.
_cpu_details (Dict): A dictionary storing the latest CPU energy details.
_last_mesure (int): Placeholder for storing the last measurement time.
Methods:
start():
Starts monitoring CPU energy consumption.
get_cpu_details(duration: Time) -> Dict:
Fetches the CPU energy deltas over a specified duration by reading values from RAPL files.
get_static_cpu_details() -> Dict:
Returns the CPU details without recalculating them.
"""
def __init__(self, rapl_dir="/sys/class/powercap/intel-rapl/subsystem"):
self._lin_rapl_dir = rapl_dir
self._system = sys.platform.lower()
self._rapl_files = []
self._setup_rapl()
self._cpu_details: Dict = {}
self._last_mesure = 0
def _is_platform_supported(self) -> bool:
return self._system.startswith("lin")
def _setup_rapl(self) -> None:
if self._is_platform_supported():
if os.path.exists(self._lin_rapl_dir):
self._fetch_rapl_files()
else:
raise FileNotFoundError(
f"Intel RAPL files not found at {self._lin_rapl_dir} "
+ f"on {self._system}"
)
else:
raise SystemError("Platform not supported by Intel RAPL Interface")
def _fetch_rapl_files(self) -> None:
"""
Fetches RAPL files from the RAPL directory
"""
# consider files like `intel-rapl:$i`
files = list(filter(lambda x: ":" in x, os.listdir(self._lin_rapl_dir)))
i = 0
for file in files:
path = os.path.join(self._lin_rapl_dir, file, "name")
with open(path) as f:
name = f.read().strip()
# Fake the name used by Power Gadget
# We ignore "core" in name as it seems to be included in "package" for Intel CPU.
# TODO: Use "dram" for memory power
if "package" in name:
name = f"Processor Energy Delta_{i}(kWh)"
i += 1
# RAPL file to take measurement from
rapl_file = os.path.join(self._lin_rapl_dir, file, "energy_uj")
# RAPL file containing maximum possible value of energy_uj above which it wraps
rapl_file_max = os.path.join(
self._lin_rapl_dir, file, "max_energy_range_uj"
)
try:
# Try to read the file to be sure we can
with open(rapl_file, "r") as f:
_ = float(f.read())
self._rapl_files.append(
RAPLFile(name=name, path=rapl_file, max_path=rapl_file_max)
)
logger.debug("We will read Intel RAPL files at %s", rapl_file)
except PermissionError as e:
raise PermissionError(
"PermissionError : Unable to read Intel RAPL files for CPU power, we will use a constant for your CPU power."
+ " Please view https://github.com/mlco2/codecarbon/issues/244"
+ " for workarounds : %s",
e,
) from e
def get_cpu_details(self, duration: Time) -> Dict:
"""
Fetches the CPU Energy Deltas by fetching values from RAPL files
"""
cpu_details = {}
try:
list(map(lambda rapl_file: rapl_file.delta(duration), self._rapl_files))
for rapl_file in self._rapl_files:
logger.debug(rapl_file)
cpu_details[rapl_file.name] = rapl_file.energy_delta.kWh
# We fake the name used by Power Gadget when using RAPL
if "Energy" in rapl_file.name:
cpu_details[rapl_file.name.replace("Energy", "Power")] = (
rapl_file.power.W
)
except Exception as e:
logger.info(
"Unable to read Intel RAPL files at %s\n \
Exception occurred %s",
self._rapl_files,
e,
exc_info=True,
)
self._cpu_details = cpu_details
logger.debug("get_cpu_details %s", self._cpu_details)
return cpu_details
def get_static_cpu_details(self) -> Dict:
"""
Return CPU details without computing them.
"""
return self._cpu_details
def start(self) -> None:
"""
Starts monitoring CPU energy consumption.
"""
for rapl_file in self._rapl_files:
rapl_file.start()
class TDP:
"""
Represents Thermal Design Power (TDP) for detecting and estimating
the power consumption of the CPU on a machine.
The class provides methods to identify the CPU model, match it with known TDP
values from a dataset, and return the corresponding power consumption in watts.
Attributes:
model (str): The detected CPU model name.
tdp (int): The TDP value of the detected CPU in watts.
Methods:
start():
Placeholder method to initiate TDP analysis.
"""
def __init__(self):
self.model, self.tdp = self._main()
@staticmethod
def _get_cpu_constant_power(match: str, cpu_power_df: pd.DataFrame) -> int:
"""Extract constant power from matched CPU"""
return float(cpu_power_df[cpu_power_df["Name"] == match]["TDP"].values[0])
def _get_cpu_power_from_registry(self, cpu_model_raw: str) -> Optional[int]:
cpu_power_df = DataSource().get_cpu_power_data()
cpu_matching = self._get_matching_cpu(cpu_model_raw, cpu_power_df)
if cpu_matching:
power = self._get_cpu_constant_power(cpu_matching, cpu_power_df)
return power
return None
def _get_matching_cpu(
self, model_raw: str, cpu_df: pd.DataFrame, greedy=False
) -> str:
"""
Get matching cpu name
:args:
model_raw (str): raw name of the cpu model detected on the machine
cpu_df (DataFrame): table containing cpu models along their tdp
greedy (default False): if multiple cpu models match with an equal
ratio of similarity, greedy (True) selects the first model,
following the order of the cpu list provided, while non-greedy
returns None.
:return: name of the matching cpu model
:notes:
Thanks to the greedy mode, even though the match could be a model
with a tdp very different from the actual tdp of current cpu, it
still enables the relative comparison of models emissions running
on the same machine.
THRESHOLD_DIRECT defines the similarity ratio value to consider
almost-exact matches.
THRESHOLD_TOKEN_SET defines the similarity ratio value to consider
token_set matches (for more detail see fuzz.token_set_ratio).
"""
THRESHOLD_DIRECT: int = 100
THRESHOLD_TOKEN_SET: int = 100
direct_match = process.extractOne(
model_raw,
cpu_df["Name"],
processor=lambda s: s.lower(),
scorer=fuzz.ratio,
score_cutoff=THRESHOLD_DIRECT,
)
if direct_match:
return direct_match[0]
model_raw = model_raw.replace("(R)", "")
start_cpu = model_raw.find(" CPU @ ")
if start_cpu > 0:
model_raw = model_raw[0:start_cpu]
model_raw = model_raw.replace(" CPU", "")
model_raw = re.sub(r" @\s*\d+\.\d+GHz", "", model_raw)
direct_match = process.extractOne(
model_raw,
cpu_df["Name"],
processor=lambda s: s.lower(),
scorer=fuzz.ratio,
score_cutoff=THRESHOLD_DIRECT,
)
if direct_match:
return direct_match[0]
indirect_matches = process.extract(
model_raw,
cpu_df["Name"],
processor=utils.default_process,
scorer=fuzz.token_set_ratio,
score_cutoff=THRESHOLD_TOKEN_SET,
)
if indirect_matches:
if (
greedy
or len(indirect_matches) == 1
or indirect_matches[0][1] != indirect_matches[1][1]
):
return indirect_matches[0][0]
return None
def _main(self) -> Tuple[str, int]:
"""
Get CPU power from constant mode
:return: model name (str), power in Watt (int)
"""
cpu_model_detected = detect_cpu_model()
if cpu_model_detected:
power = self._get_cpu_power_from_registry(cpu_model_detected)
if power:
logger.debug(
"CPU : We detect a %s with a TDP of %s W",
cpu_model_detected,
power,
)
return cpu_model_detected, power
logger.warning(
"We saw that you have a %s but we don't know it."
+ " Please contact us.",
cpu_model_detected,
)
if is_psutil_available():
# Count thread of the CPU
threads = psutil.cpu_count(logical=True)
estimated_tdp = threads * DEFAULT_POWER_PER_CORE
logger.warning(
f"We will use the default power consumption of {DEFAULT_POWER_PER_CORE} W per thread for your {threads} CPU, so {estimated_tdp}W."
)
return cpu_model_detected, estimated_tdp
return cpu_model_detected, None
logger.warning(
"We were unable to detect your CPU using the `cpuinfo` package."
+ " Resorting to a default power consumption."
)
return "Unknown", None
def start(self):
pass