From 658e6733ee1cfd08a2ac7d17170f9c362d55fbe7 Mon Sep 17 00:00:00 2001 From: Dan Goldberg Date: Wed, 8 Apr 2026 20:29:43 -0300 Subject: [PATCH 1/7] PPCC collector --- .../collectors/collector_manager.py | 6 + .../collectors/ppcc_collector.py | 350 ++++++++++++++++++ .../tools/MFT/mlxreg.py | 9 + .../tools/MSTFlint/mstreg.py | 9 + .../tools/base_tool.py | 8 +- 5 files changed, 379 insertions(+), 3 deletions(-) create mode 100644 sos/report/mellanox_firmware_suite/collectors/ppcc_collector.py diff --git a/sos/report/mellanox_firmware_suite/collectors/collector_manager.py b/sos/report/mellanox_firmware_suite/collectors/collector_manager.py index 2d5dda5b9c..32c246a958 100644 --- a/sos/report/mellanox_firmware_suite/collectors/collector_manager.py +++ b/sos/report/mellanox_firmware_suite/collectors/collector_manager.py @@ -1,6 +1,7 @@ from .system_collector import SystemCollector from .firmware_collector import FirmwareCollector from .cable_collector import CableCollector +from .ppcc_collector import PPCCCollector class CollectorManager(object): @@ -12,6 +13,7 @@ def collect_all(self): self.collect_system_info() self.collect_firmware_info() self.collect_cable_info() + self.collect_ppcc_info() def collect_system_info(self): for ctx in self.device_contexts: @@ -26,3 +28,7 @@ def collect_firmware_info(self): def collect_cable_info(self): for ctx in self.device_contexts: CableCollector().run(self.plugin, ctx) + + def collect_ppcc_info(self): + for ctx in self.device_contexts: + PPCCCollector().run(self.plugin, ctx) diff --git a/sos/report/mellanox_firmware_suite/collectors/ppcc_collector.py b/sos/report/mellanox_firmware_suite/collectors/ppcc_collector.py new file mode 100644 index 0000000000..12a7138282 --- /dev/null +++ b/sos/report/mellanox_firmware_suite/collectors/ppcc_collector.py @@ -0,0 +1,350 @@ +import re +from enum import Enum +from typing import Dict, List, Optional, Tuple + +from .base_collector import Collector +from ..tools import ( + MftTools, + MstFlintTools, + get_tool, +) + +# mlxreg/mstreg --op uses string cmd_type values from the PPCC PRM. +PpccCommandOptions = Dict[str, str] + + +class PpccCommand(str, Enum): + GET_ALGO_STATUS = "0x3" + GET_NUM_PARAMS = "0x4" + GET_PARAM_INFO = "0x5" + GET_PARAM = "0x6" + BULK_GET_PARAMS = "0xA" + BULK_GET_COUNTERS = "0xC" + GET_NUM_COUNTERS = "0xE" + GET_COUNTER_INFO = "0xF" + ALGO_INFO_ARRAY = "0x10" + + +class PPCCCollector(Collector): + _BASE_REGISTER_INDEXES = "local_port=1,pnat=0,lp_msb=0" + _ALGO_SLOT_TEXT_INDEX_COUNT = 16 + _COMMAND_OUTPUT_LOG_MAX_CHARS = 4000 + + _TEXT_TABLE_LINE_PATTERN = re.compile( + r"^\s*text\[(\d+)\]\s*\|\s*0x([0-9a-fA-F]+)", + re.MULTILINE | re.IGNORECASE, + ) + _VALUE_FIELD_PATTERN = re.compile( + r"^\s*value\s*\|\s*0x([0-9a-fA-F]+)", + re.MULTILINE | re.IGNORECASE, + ) + + @staticmethod + def _op_for_cmd_type(command: PpccCommand) -> PpccCommandOptions: + return {"cmd_type": command.value} + + @staticmethod + def _register_indexes_for_algo_slot(algo_slot_index: int) -> str: + return ( + f"{PPCCCollector._BASE_REGISTER_INDEXES}," + f"algo_slot={algo_slot_index}" + ) + + @staticmethod + def _make_filename_for_ppcc_get( + collection_file_prefix: str, + command_options: PpccCommandOptions, + register_indexes: str, + ) -> str: + op_part = "_".join( + f"{key}_{value}" for key, value in command_options.items() + ) + index_part = register_indexes.replace("=", "_").replace(",", "_") + return ( + f"{collection_file_prefix}--reg_name_PPCC_--get_" + f"--op_{op_part}_--indexes_{index_part}" + ) + + @classmethod + def _get_algo_slot_indices(cls, mlxreg_output: str) -> List[int]: + slot_count = cls._ALGO_SLOT_TEXT_INDEX_COUNT + values_per_slot = [0] * slot_count + for match in cls._TEXT_TABLE_LINE_PATTERN.finditer(mlxreg_output): + text_index = int(match.group(1)) + if text_index >= slot_count: + continue + values_per_slot[text_index] = int(match.group(2), 16) + return [ + text_index + for text_index, value in enumerate(values_per_slot) + if value != 0 + ] + + @classmethod + def _extract_value_field(cls, mlxreg_output: str) -> Optional[int]: + match = cls._VALUE_FIELD_PATTERN.search(mlxreg_output) + if not match: + return None + return int(match.group(1), 16) + + @classmethod + def _clip_command_output(cls, text: str) -> str: + raw = (text or "").strip() + if not raw: + return "(empty)" + limit = cls._COMMAND_OUTPUT_LOG_MAX_CHARS + if len(raw) <= limit: + return raw + return raw[:limit] + + def _ppcc_get( + self, + plugin, + device_label: str, + tool, + collection_file_prefix: str, + output_subdir: str, + command_options: PpccCommandOptions, + register_indexes: str, + ) -> Tuple[int, str]: + return_code, output = tool.ppcc_get( + command_options, + register_indexes, + filename=self._make_filename_for_ppcc_get( + collection_file_prefix, + command_options, + register_indexes, + ), + subdir=output_subdir, + ) + if return_code != 0: + op = command_options.get("cmd_type", "?") + plugin._log_info( + "PPCC command failed " + f"device={device_label} cmd_type={op} " + f"indexes={register_indexes!r} rc={return_code} " + f"output:\n{self._clip_command_output(output)}" + ) + return return_code, output + + def _collect_counters_for_algo_slot( + self, + plugin, + tool, + collection_file_prefix: str, + output_subdir: str, + ctx, + algo_slot_index: int, + register_indexes: str, + ) -> None: + device_label = ctx.device + + return_code, output = self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + self._op_for_cmd_type(PpccCommand.GET_NUM_COUNTERS), + register_indexes, + ) + if return_code != 0: + return + + counter_count = self._extract_value_field(output) + if counter_count is None: + return + + counter_info_op = self._op_for_cmd_type( + PpccCommand.GET_COUNTER_INFO + ) + for counter_index in range(counter_count): + counter_indexes = ( + f"{register_indexes},algo_counter_index={counter_index}" + ) + self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + counter_info_op, + counter_indexes, + ) + + if counter_count == 0: + return + + self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + self._op_for_cmd_type(PpccCommand.BULK_GET_COUNTERS), + register_indexes, + ) + + def _collect_params_for_algo_slot( + self, + plugin, + tool, + collection_file_prefix: str, + output_subdir: str, + ctx, + algo_slot_index: int, + register_indexes: str, + ) -> None: + device_label = ctx.device + + return_code, output = self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + self._op_for_cmd_type(PpccCommand.GET_NUM_PARAMS), + register_indexes, + ) + if return_code != 0: + return + + param_count = self._extract_value_field(output) + if param_count is None: + return + + param_info_op = self._op_for_cmd_type( + PpccCommand.GET_PARAM_INFO + ) + for param_index in range(param_count): + param_indexes = ( + f"{register_indexes},algo_param_index={param_index}" + ) + self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + param_info_op, + param_indexes, + ) + + if param_count == 0: + return + + return_code, _ = self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + self._op_for_cmd_type(PpccCommand.BULK_GET_PARAMS), + register_indexes, + ) + + if return_code == 0: + return + + get_param_op = self._op_for_cmd_type(PpccCommand.GET_PARAM) + for param_index in range(param_count): + param_indexes = ( + f"{register_indexes},algo_param_index={param_index}" + ) + self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + get_param_op, + param_indexes, + ) + + def _collect_single_algo_slot( + self, + plugin, + tool, + collection_file_prefix: str, + output_subdir: str, + ctx, + algo_slot_index: int, + ) -> None: + register_indexes = self._register_indexes_for_algo_slot( + algo_slot_index + ) + device_label = ctx.device + + return_code, output = self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + self._op_for_cmd_type(PpccCommand.GET_ALGO_STATUS), + register_indexes, + ) + if return_code != 0: + return + + algo_status = self._extract_value_field(output) + if algo_status is not None and algo_status != 1: + return + + self._collect_counters_for_algo_slot( + plugin, + tool, + collection_file_prefix, + output_subdir, + ctx, + algo_slot_index, + register_indexes, + ) + self._collect_params_for_algo_slot( + plugin, + tool, + collection_file_prefix, + output_subdir, + ctx, + algo_slot_index, + register_indexes, + ) + + def _collect_ppcc_data(self, plugin, tool, tool_name: str, ctx) -> None: + collection_file_prefix = f"{tool_name}_{ctx.bdf}_" + output_subdir = f"{tool_name}_pcc" + device_label = ctx.device + + return_code, output = self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + self._op_for_cmd_type(PpccCommand.ALGO_INFO_ARRAY), + self._BASE_REGISTER_INDEXES, + ) + if return_code != 0: + return + + present_algo_slots = self._get_algo_slot_indices(output) + if not present_algo_slots: + return + + for algo_slot_index in present_algo_slots: + self._collect_single_algo_slot( + plugin, + tool, + collection_file_prefix, + output_subdir, + ctx, + algo_slot_index, + ) + + def _collect_with_mft(self, plugin, ctx): + mlxreg_tool = get_tool(MftTools.MLXREG, plugin, ctx) + self._collect_ppcc_data(plugin, mlxreg_tool, "mlxreg", ctx) + + def _collect_with_mstflint(self, plugin, ctx): + mstreg_tool = get_tool(MstFlintTools.MSTREG, plugin, ctx) + self._collect_ppcc_data(plugin, mstreg_tool, "mstreg", ctx) diff --git a/sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py b/sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py index 45da706f22..85ef01af2f 100644 --- a/sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py +++ b/sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py @@ -9,3 +9,12 @@ def mlxreg_roce_accl_query(self, filename=None): "--get", filename=filename ) + + def ppcc_get(self, op, indexes, filename=None, subdir=None): + op_str = ",".join(f"{k}={v}" for k, v in op.items()) + return self.execute_cmd( + f'mlxreg -d {self.ctx.device} --reg_name PPCC --get ' + f'--op "{op_str}" --indexes "{indexes}"', + filename=filename, + subdir=subdir, + ) diff --git a/sos/report/mellanox_firmware_suite/tools/MSTFlint/mstreg.py b/sos/report/mellanox_firmware_suite/tools/MSTFlint/mstreg.py index 09e87b3caa..8e6a1e87a0 100644 --- a/sos/report/mellanox_firmware_suite/tools/MSTFlint/mstreg.py +++ b/sos/report/mellanox_firmware_suite/tools/MSTFlint/mstreg.py @@ -9,3 +9,12 @@ def mstreg_roce_accl_query(self, filename=None): "--get", filename=filename ) + + def ppcc_get(self, op, indexes, filename=None, subdir=None): + op_str = ",".join(f"{k}={v}" for k, v in op.items()) + return self.execute_cmd( + f'mstreg -d {self.ctx.device} --reg_name PPCC --get ' + f'--op "{op_str}" --indexes "{indexes}"', + filename=filename, + subdir=subdir, + ) diff --git a/sos/report/mellanox_firmware_suite/tools/base_tool.py b/sos/report/mellanox_firmware_suite/tools/base_tool.py index e6bf3adfc8..d2accd8cc6 100644 --- a/sos/report/mellanox_firmware_suite/tools/base_tool.py +++ b/sos/report/mellanox_firmware_suite/tools/base_tool.py @@ -41,14 +41,15 @@ def execute_cmd( cache=True, get_cached=True, key=None, - filename=None + filename=None, + subdir=None, ): cache_key = key or cmd if get_cached and cache_key in self.ctx.cache: return self.ctx.cache[cache_key] - rc, output = self._run_command(cmd, timeout, filename) + rc, output = self._run_command(cmd, timeout, filename, subdir=subdir) if rc != 0: self.plugin._log_info( @@ -60,7 +61,7 @@ def execute_cmd( return (rc, output) - def _run_command(self, cmd, timeout, filename): + def _run_command(self, cmd, timeout, filename, subdir=None): if filename is None: res = self.plugin.exec_cmd(cmd=cmd, timeout=timeout) @@ -70,6 +71,7 @@ def _run_command(self, cmd, timeout, filename): suggest_filename=filename, timeout=timeout, stderr=True, + subdir=subdir, ) return res.get("status", 1), res.get("output", "") From 5481945727e3901de9a964a45e8ea998ac9d650e Mon Sep 17 00:00:00 2001 From: Dan Goldberg Date: Thu, 9 Apr 2026 23:03:13 -0300 Subject: [PATCH 2/7] Renamed collector and target subdir for PCC --- .../collectors/collector_manager.py | 8 ++++---- .../collectors/{ppcc_collector.py => pcc_collector.py} | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) rename sos/report/mellanox_firmware_suite/collectors/{ppcc_collector.py => pcc_collector.py} (98%) diff --git a/sos/report/mellanox_firmware_suite/collectors/collector_manager.py b/sos/report/mellanox_firmware_suite/collectors/collector_manager.py index 32c246a958..542377c549 100644 --- a/sos/report/mellanox_firmware_suite/collectors/collector_manager.py +++ b/sos/report/mellanox_firmware_suite/collectors/collector_manager.py @@ -1,7 +1,7 @@ from .system_collector import SystemCollector from .firmware_collector import FirmwareCollector from .cable_collector import CableCollector -from .ppcc_collector import PPCCCollector +from .pcc_collector import PccCollector class CollectorManager(object): @@ -13,7 +13,7 @@ def collect_all(self): self.collect_system_info() self.collect_firmware_info() self.collect_cable_info() - self.collect_ppcc_info() + self.collect_pcc_info() def collect_system_info(self): for ctx in self.device_contexts: @@ -29,6 +29,6 @@ def collect_cable_info(self): for ctx in self.device_contexts: CableCollector().run(self.plugin, ctx) - def collect_ppcc_info(self): + def collect_pcc_info(self): for ctx in self.device_contexts: - PPCCCollector().run(self.plugin, ctx) + PccCollector().run(self.plugin, ctx) diff --git a/sos/report/mellanox_firmware_suite/collectors/ppcc_collector.py b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py similarity index 98% rename from sos/report/mellanox_firmware_suite/collectors/ppcc_collector.py rename to sos/report/mellanox_firmware_suite/collectors/pcc_collector.py index 12a7138282..aab258f33a 100644 --- a/sos/report/mellanox_firmware_suite/collectors/ppcc_collector.py +++ b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py @@ -9,7 +9,7 @@ get_tool, ) -# mlxreg/mstreg --op uses string cmd_type values from the PPCC PRM. +# mlxreg/mstreg --op uses string cmd_type values from the PPCC register. PpccCommandOptions = Dict[str, str] @@ -25,7 +25,7 @@ class PpccCommand(str, Enum): ALGO_INFO_ARRAY = "0x10" -class PPCCCollector(Collector): +class PccCollector(Collector): _BASE_REGISTER_INDEXES = "local_port=1,pnat=0,lp_msb=0" _ALGO_SLOT_TEXT_INDEX_COUNT = 16 _COMMAND_OUTPUT_LOG_MAX_CHARS = 4000 @@ -46,7 +46,7 @@ def _op_for_cmd_type(command: PpccCommand) -> PpccCommandOptions: @staticmethod def _register_indexes_for_algo_slot(algo_slot_index: int) -> str: return ( - f"{PPCCCollector._BASE_REGISTER_INDEXES}," + f"{PccCollector._BASE_REGISTER_INDEXES}," f"algo_slot={algo_slot_index}" ) @@ -312,7 +312,7 @@ def _collect_single_algo_slot( def _collect_ppcc_data(self, plugin, tool, tool_name: str, ctx) -> None: collection_file_prefix = f"{tool_name}_{ctx.bdf}_" - output_subdir = f"{tool_name}_pcc" + output_subdir = f"pcc_info" device_label = ctx.device return_code, output = self._ppcc_get( From 197f390a77f0a7f67207373e5b4700dc475d76a2 Mon Sep 17 00:00:00 2001 From: Dan Goldberg Date: Thu, 9 Apr 2026 23:36:48 -0300 Subject: [PATCH 3/7] Optional execution for pcc (false by default) --- .../collectors/collector_manager.py | 2 ++ sos/report/plugins/mellanox_firmware.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sos/report/mellanox_firmware_suite/collectors/collector_manager.py b/sos/report/mellanox_firmware_suite/collectors/collector_manager.py index 542377c549..88279a1b2b 100644 --- a/sos/report/mellanox_firmware_suite/collectors/collector_manager.py +++ b/sos/report/mellanox_firmware_suite/collectors/collector_manager.py @@ -30,5 +30,7 @@ def collect_cable_info(self): CableCollector().run(self.plugin, ctx) def collect_pcc_info(self): + if not self.plugin.get_option("pcc", default=False): + return for ctx in self.device_contexts: PccCollector().run(self.plugin, ctx) diff --git a/sos/report/plugins/mellanox_firmware.py b/sos/report/plugins/mellanox_firmware.py index 96a2b3dbd9..7cb732b833 100644 --- a/sos/report/plugins/mellanox_firmware.py +++ b/sos/report/plugins/mellanox_firmware.py @@ -9,7 +9,7 @@ import re import shutil -from sos.report.plugins import Plugin, IndependentPlugin +from sos.report.plugins import Plugin, IndependentPlugin, PluginOpt from sos.report.mellanox_firmware_suite.tools import FirmwareTools from sos.report.mellanox_firmware_suite.device_context import DeviceContext from sos.report.mellanox_firmware_suite.collectors.collector_manager import ( @@ -40,6 +40,17 @@ class MellanoxFirmware(Plugin, IndependentPlugin): packages = ("mst", "mstflint") profiles = ("hardware", "system") + option_list = [ + PluginOpt( + "pcc", + default=False, + desc=( + "Collect PPCC (mlxreg/mstreg) register dumps; can be slow on " + "large systems" + ), + ), + ] + def __init__(self, commons): super().__init__(commons=commons) From 5fb385be43b6b83eb2133aa4d6756cb709e383b5 Mon Sep 17 00:00:00 2001 From: Dan Goldberg Date: Fri, 10 Apr 2026 22:40:55 -0300 Subject: [PATCH 4/7] Fixes and small additions --- .../collectors/pcc_collector.py | 64 +++++++++++++++---- sos/report/plugins/mellanox_firmware.py | 4 +- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py index aab258f33a..09e2370375 100644 --- a/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py +++ b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py @@ -14,6 +14,7 @@ class PpccCommand(str, Enum): + GET_ALGO_INFO = "0x0" GET_ALGO_STATUS = "0x3" GET_NUM_PARAMS = "0x4" GET_PARAM_INFO = "0x5" @@ -28,6 +29,7 @@ class PpccCommand(str, Enum): class PccCollector(Collector): _BASE_REGISTER_INDEXES = "local_port=1,pnat=0,lp_msb=0" _ALGO_SLOT_TEXT_INDEX_COUNT = 16 + _ALGO_SLOTS_COLLECT_STATUS_CMD_ONLY = frozenset({15}) _COMMAND_OUTPUT_LOG_MAX_CHARS = 4000 _TEXT_TABLE_LINE_PATTERN = re.compile( @@ -38,6 +40,10 @@ class PccCollector(Collector): r"^\s*value\s*\|\s*0x([0-9a-fA-F]+)", re.MULTILINE | re.IGNORECASE, ) + _COUNTER_EN_FIELD_PATTERN = re.compile( + r"^\s*counter_en\s*\|\s*0x([0-9a-fA-F]+)", + re.MULTILINE | re.IGNORECASE, + ) @staticmethod def _op_for_cmd_type(command: PpccCommand) -> PpccCommandOptions: @@ -87,6 +93,15 @@ def _extract_value_field(cls, mlxreg_output: str) -> Optional[int]: return None return int(match.group(1), 16) + @classmethod + def _counter_en_enabled(cls, mlxreg_output: str) -> Optional[bool]: + """LSB of counter_en from algo status dump; None if field missing.""" + match = cls._COUNTER_EN_FIELD_PATTERN.search(mlxreg_output) + if not match: + return None + v = int(match.group(1), 16) + return (v & 1) != 0 + @classmethod def _clip_command_output(cls, text: str) -> str: raw = (text or "").strip() @@ -160,7 +175,7 @@ def _collect_counters_for_algo_slot( ) for counter_index in range(counter_count): counter_indexes = ( - f"{register_indexes},algo_counter_index={counter_index}" + f"{register_indexes},algo_param_index={counter_index}" ) self._ppcc_get( plugin, @@ -275,6 +290,28 @@ def _collect_single_algo_slot( ) device_label = ctx.device + if algo_slot_index in self._ALGO_SLOTS_COLLECT_STATUS_CMD_ONLY: + self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + self._op_for_cmd_type(PpccCommand.GET_ALGO_STATUS), + register_indexes, + ) + return + + self._ppcc_get( + plugin, + device_label, + tool, + collection_file_prefix, + output_subdir, + self._op_for_cmd_type(PpccCommand.GET_ALGO_INFO), + register_indexes, + ) + return_code, output = self._ppcc_get( plugin, device_label, @@ -291,15 +328,17 @@ def _collect_single_algo_slot( if algo_status is not None and algo_status != 1: return - self._collect_counters_for_algo_slot( - plugin, - tool, - collection_file_prefix, - output_subdir, - ctx, - algo_slot_index, - register_indexes, - ) + counter_en_on = self._counter_en_enabled(output) + if counter_en_on is not False: + self._collect_counters_for_algo_slot( + plugin, + tool, + collection_file_prefix, + output_subdir, + ctx, + algo_slot_index, + register_indexes, + ) self._collect_params_for_algo_slot( plugin, tool, @@ -327,7 +366,10 @@ def _collect_ppcc_data(self, plugin, tool, tool_name: str, ctx) -> None: if return_code != 0: return - present_algo_slots = self._get_algo_slot_indices(output) + present_algo_slots = sorted( + frozenset(self._get_algo_slot_indices(output)) + | self._ALGO_SLOTS_COLLECT_STATUS_CMD_ONLY, + ) if not present_algo_slots: return diff --git a/sos/report/plugins/mellanox_firmware.py b/sos/report/plugins/mellanox_firmware.py index 7cb732b833..0f00684bf6 100644 --- a/sos/report/plugins/mellanox_firmware.py +++ b/sos/report/plugins/mellanox_firmware.py @@ -45,8 +45,8 @@ class MellanoxFirmware(Plugin, IndependentPlugin): "pcc", default=False, desc=( - "Collect PPCC (mlxreg/mstreg) register dumps; can be slow on " - "large systems" + "Collect PCC-related PPCC register dumps via mlxreg/mstreg; " + "can be slow on large systems" ), ), ] From 1269e9704be9ce3428e98215534e4093119e5434 Mon Sep 17 00:00:00 2001 From: Dan Goldberg Date: Tue, 28 Apr 2026 12:09:13 -0300 Subject: [PATCH 5/7] Added fwctl support to pcc colelctor --- .../mellanox_firmware_suite/collectors/pcc_collector.py | 8 ++++---- sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py | 3 ++- .../mellanox_firmware_suite/tools/MSTFlint/mstreg.py | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py index 09e2370375..495c88d226 100644 --- a/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py +++ b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py @@ -152,7 +152,7 @@ def _collect_counters_for_algo_slot( algo_slot_index: int, register_indexes: str, ) -> None: - device_label = ctx.device + device_label = ctx.pci return_code, output = self._ppcc_get( plugin, @@ -210,7 +210,7 @@ def _collect_params_for_algo_slot( algo_slot_index: int, register_indexes: str, ) -> None: - device_label = ctx.device + device_label = ctx.pci return_code, output = self._ppcc_get( plugin, @@ -288,7 +288,7 @@ def _collect_single_algo_slot( register_indexes = self._register_indexes_for_algo_slot( algo_slot_index ) - device_label = ctx.device + device_label = ctx.pci if algo_slot_index in self._ALGO_SLOTS_COLLECT_STATUS_CMD_ONLY: self._ppcc_get( @@ -352,7 +352,7 @@ def _collect_single_algo_slot( def _collect_ppcc_data(self, plugin, tool, tool_name: str, ctx) -> None: collection_file_prefix = f"{tool_name}_{ctx.bdf}_" output_subdir = f"pcc_info" - device_label = ctx.device + device_label = ctx.pci return_code, output = self._ppcc_get( plugin, diff --git a/sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py b/sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py index 85ef01af2f..ad814a90c7 100644 --- a/sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py +++ b/sos/report/mellanox_firmware_suite/tools/MFT/mlxreg.py @@ -10,10 +10,11 @@ def mlxreg_roce_accl_query(self, filename=None): filename=filename ) + @supports_fwctl def ppcc_get(self, op, indexes, filename=None, subdir=None): op_str = ",".join(f"{k}={v}" for k, v in op.items()) return self.execute_cmd( - f'mlxreg -d {self.ctx.device} --reg_name PPCC --get ' + f'mlxreg -d {self.ctx.effective_device} --reg_name PPCC --get ' f'--op "{op_str}" --indexes "{indexes}"', filename=filename, subdir=subdir, diff --git a/sos/report/mellanox_firmware_suite/tools/MSTFlint/mstreg.py b/sos/report/mellanox_firmware_suite/tools/MSTFlint/mstreg.py index 8e6a1e87a0..bfb78270df 100644 --- a/sos/report/mellanox_firmware_suite/tools/MSTFlint/mstreg.py +++ b/sos/report/mellanox_firmware_suite/tools/MSTFlint/mstreg.py @@ -10,10 +10,11 @@ def mstreg_roce_accl_query(self, filename=None): filename=filename ) + @supports_fwctl def ppcc_get(self, op, indexes, filename=None, subdir=None): op_str = ",".join(f"{k}={v}" for k, v in op.items()) return self.execute_cmd( - f'mstreg -d {self.ctx.device} --reg_name PPCC --get ' + f'mstreg -d {self.ctx.effective_device} --reg_name PPCC --get ' f'--op "{op_str}" --indexes "{indexes}"', filename=filename, subdir=subdir, From 7ef3cc697abc1663b3007e255a085750a6eeee76 Mon Sep 17 00:00:00 2001 From: Dan Goldberg Date: Tue, 28 Apr 2026 14:13:15 -0300 Subject: [PATCH 6/7] Cleanup --- .../collectors/pcc_collector.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py index 495c88d226..61b300a41b 100644 --- a/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py +++ b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py @@ -149,7 +149,6 @@ def _collect_counters_for_algo_slot( collection_file_prefix: str, output_subdir: str, ctx, - algo_slot_index: int, register_indexes: str, ) -> None: device_label = ctx.pci @@ -207,7 +206,6 @@ def _collect_params_for_algo_slot( collection_file_prefix: str, output_subdir: str, ctx, - algo_slot_index: int, register_indexes: str, ) -> None: device_label = ctx.pci @@ -329,29 +327,28 @@ def _collect_single_algo_slot( return counter_en_on = self._counter_en_enabled(output) - if counter_en_on is not False: + if counter_en_on: self._collect_counters_for_algo_slot( plugin, tool, collection_file_prefix, output_subdir, ctx, - algo_slot_index, register_indexes, ) self._collect_params_for_algo_slot( + plugin, tool, collection_file_prefix, output_subdir, ctx, - algo_slot_index, register_indexes, ) def _collect_ppcc_data(self, plugin, tool, tool_name: str, ctx) -> None: collection_file_prefix = f"{tool_name}_{ctx.bdf}_" - output_subdir = f"pcc_info" + output_subdir = "pcc_info" device_label = ctx.pci return_code, output = self._ppcc_get( @@ -370,8 +367,6 @@ def _collect_ppcc_data(self, plugin, tool, tool_name: str, ctx) -> None: frozenset(self._get_algo_slot_indices(output)) | self._ALGO_SLOTS_COLLECT_STATUS_CMD_ONLY, ) - if not present_algo_slots: - return for algo_slot_index in present_algo_slots: self._collect_single_algo_slot( From 7006fb3fb19b685fdd8d75acd68452a6a32c03fb Mon Sep 17 00:00:00 2001 From: Dan Goldberg Date: Thu, 30 Apr 2026 10:34:08 -0300 Subject: [PATCH 7/7] Updated plugin description and fixed flake8 error --- sos/report/mellanox_firmware_suite/collectors/pcc_collector.py | 1 - sos/report/plugins/mellanox_firmware.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py index 61b300a41b..08005d981a 100644 --- a/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py +++ b/sos/report/mellanox_firmware_suite/collectors/pcc_collector.py @@ -337,7 +337,6 @@ def _collect_single_algo_slot( register_indexes, ) self._collect_params_for_algo_slot( - plugin, tool, collection_file_prefix, diff --git a/sos/report/plugins/mellanox_firmware.py b/sos/report/plugins/mellanox_firmware.py index 0f00684bf6..0ad6d2c412 100644 --- a/sos/report/plugins/mellanox_firmware.py +++ b/sos/report/plugins/mellanox_firmware.py @@ -45,8 +45,7 @@ class MellanoxFirmware(Plugin, IndependentPlugin): "pcc", default=False, desc=( - "Collect PCC-related PPCC register dumps via mlxreg/mstreg; " - "can be slow on large systems" + "Collect PCC-related information" ), ), ]