From 549fa270eb870be52b06ed68467c3ac43b031882 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Thu, 16 Apr 2026 01:08:43 +0300 Subject: [PATCH 1/8] feat: add DataTransformers and adapt benchmarking to use it --- examples/noreset_shewhart.py | 7 +- pysatl_cpd/benchmark/arl_benchmark_runner.py | 27 +++---- .../benchmark/core/benchmark_executor.py | 29 ++++---- .../noreset/noreset_benchmark_runner.py | 34 +++++---- .../benchmark/online_benchmark_runner.py | 54 +++++++------- .../benchmark/reset_benchmark_runner.py | 30 ++++---- pysatl_cpd/core/algorithm_entry.py | 72 +++++++++++++++++++ .../data_transformers/idata_transformer.py | 71 ++++++++++++++++++ 8 files changed, 246 insertions(+), 78 deletions(-) create mode 100644 pysatl_cpd/core/algorithm_entry.py create mode 100644 pysatl_cpd/core/data_transformers/idata_transformer.py diff --git a/examples/noreset_shewhart.py b/examples/noreset_shewhart.py index 1dc95de..da682e6 100644 --- a/examples/noreset_shewhart.py +++ b/examples/noreset_shewhart.py @@ -13,6 +13,7 @@ from pysatl_cpd.benchmark.metrics.online.delay_metric import MeanDelayMetric, MedianDelayMetric from pysatl_cpd.benchmark.noreset.noreset_benchmark_runner import NoResetBenchmarkRunner from pysatl_cpd.benchmark.noreset.threshold_policy import EventBasedPolicy +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver # --------------------------------------------------------------------------- @@ -169,7 +170,7 @@ def main() -> None: print(f"Algorithm: ShewhartControlChart(learning_period={LEARNING_PERIOD}, window={WINDOW_SIZE})") print( f"Dataset (NoReset): {N_SERIES} series, length={SERIES_LENGTH}, change_point={CHANGE_POINT}," - "shift={MU_AFTER - MU_BEFORE:.1f}*sigma" + f"shift={MU_AFTER - MU_BEFORE:.1f}*sigma" ) print(f"Dataset (ARL): {N_SERIES} series, length={SERIES_LENGTH}, no change points") print(f"Error margin: {ERROR_MARGIN}") @@ -192,7 +193,7 @@ def main() -> None: policy = EventBasedPolicy(ERROR_MARGIN[1], strict_edge=False) runner = NoResetBenchmarkRunner( - algorithms=[(algorithm, THRESHOLDS)], + entries=[AlgorithmEntry(algorithm, THRESHOLDS)], providers=providers, metrics=metrics, solver=solver, @@ -206,7 +207,7 @@ def main() -> None: # RUN 2: Average Run Length (ARL) # ========================================== arl_runner = ARLBenchmarkRunner( - algorithms=[(algorithm, THRESHOLDS)], + entries=[AlgorithmEntry(algorithm, THRESHOLDS)], providers=arl_providers, solver=solver, mode="noreset", # uses rapid point-based extraction behind the scenes diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py index 5c7be93..dea9799 100644 --- a/pysatl_cpd/benchmark/arl_benchmark_runner.py +++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py @@ -22,7 +22,7 @@ from pysatl_cpd.benchmark.noreset.threshold_policy import PointBasedPolicy from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner from pysatl_cpd.benchmark.reset_benchmark_runner import ResetBenchmarkRunner -from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace @@ -44,8 +44,9 @@ class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledDa Parameters ---------- - algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] - Sequence of (algorithm, thresholds) pairs to evaluate. + entries : Sequence[AlgorithmEntry] + Sequence of AlgorithmEntry objects containing algorithm, thresholds, + and an optional data transformer. providers : list[ProviderT] Labeled data providers to run against. Must have `change_points == []`. solver : OnlineCpdSolver @@ -55,6 +56,8 @@ class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledDa dump_dir : Path | str | None, optional Directory for caching results via BenchmarkExecutor. If None, caching is disabled. Default is None. + verbose : bool, default=False + If True, displays progress bars during execution. Raises ------ @@ -66,7 +69,7 @@ class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledDa def __init__( self, - algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: list[ProviderT], solver: OnlineCpdSolver, mode: Literal["reset", "noreset"], @@ -83,7 +86,7 @@ def __init__( metrics = {"arl": ARLMetric[TraceT, ProviderT]()} super().__init__( - algorithms=algorithms, + entries=entries, providers=providers, metrics=metrics, # type: ignore[arg-type] solver=solver, @@ -95,7 +98,7 @@ def __init__( if mode == "reset": # Delegate to standard ResetBenchmarkRunner self._inner_runner: OnlineBenchmarkRunner[Any, ProviderT] = ResetBenchmarkRunner( - algorithms=algorithms, + entries=entries, providers=providers, metrics=cast(Any, metrics), solver=solver, @@ -104,7 +107,7 @@ def __init__( elif mode == "noreset": # Delegate to optimized NoResetBenchmarkRunner with PointBased policy self._inner_runner = NoResetBenchmarkRunner( - algorithms=algorithms, + entries=entries, providers=providers, metrics=cast(Any, metrics), solver=solver, @@ -116,20 +119,20 @@ def __init__( def _collect_runs( self, - algorithm: OnlineAlgorithm[Any, Any, Any], + entry: AlgorithmEntry[Any, Any, Any], threshold: float, providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: """ - Collect runs for a given algorithm and threshold using the configured mode. + Collect runs for a given algorithm entry and threshold using the configured mode. Delegates the collection to either ResetBenchmarkRunner or NoResetBenchmarkRunner depending on the initialized mode. Parameters ---------- - algorithm : OnlineAlgorithm[Any, Any, Any] - The algorithm to evaluate. + entry : AlgorithmEntry + The algorithm configuration entry to evaluate. threshold : float The detection threshold. providers : Sequence[ProviderT] @@ -140,5 +143,5 @@ def _collect_runs( list[tuple[TraceT, ProviderT]] Batch of (trace, provider) pairs. """ - runs = self._inner_runner._collect_runs(algorithm, threshold, providers) + runs = self._inner_runner._collect_runs(entry, threshold, providers) return cast(list[tuple[TraceT, ProviderT]], runs) diff --git a/pysatl_cpd/benchmark/core/benchmark_executor.py b/pysatl_cpd/benchmark/core/benchmark_executor.py index fe7bee0..ff5c979 100644 --- a/pysatl_cpd/benchmark/core/benchmark_executor.py +++ b/pysatl_cpd/benchmark/core/benchmark_executor.py @@ -20,8 +20,8 @@ from pathlib import Path from typing import Any +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.data_providers.idata_provider import DataProvider -from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace @@ -37,7 +37,7 @@ class BenchmarkRecord: Parameters ---------- algorithm : str - The string identifier or name of the online algorithm. + The string identifier or name of the online algorithm (and transformer). configuration_hash : str A hash string representing the algorithm's configuration. data : str @@ -79,9 +79,9 @@ class BenchmarkExecutor[DataT]: Parameters ---------- - algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] - A sequence of tuples, where each tuple contains an instantiated online - algorithm and a sequence of thresholds to test it against. + entries : Sequence[AlgorithmEntry] + A sequence of AlgorithmEntry objects, each grouping an algorithm, + its thresholds, and an optional data transformer. providers : Sequence[DataProvider[DataT]] A sequence of data providers to be fed into the algorithms. solver : OnlineCpdSolver @@ -94,12 +94,12 @@ class BenchmarkExecutor[DataT]: def __init__( self, - algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: Sequence[DataProvider[DataT]], solver: OnlineCpdSolver, dump_dir: str | Path | None = None, ) -> None: - self.__algorithms = algorithms + self.__entries = entries self.__providers = providers self.__solver = solver self.__dump_dir = Path(dump_dir) if dump_dir is not None else None @@ -141,12 +141,17 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: ) registry[record.key] = record - for (algorithm, thresholds), provider in itertools.product(self.__algorithms, self.__providers): - algo_name = str(algorithm) - config_hash = hash(algorithm.configuration) + for entry, provider in itertools.product(self.__entries, self.__providers): + algo_name = entry.full_name + config_hash = entry.full_hash data_name = provider.name - for threshold in thresholds: + # Apply data transformer if specified in the entry + active_provider = provider + if entry.transformer is not None: + active_provider = entry.transformer.transform(provider) + + for threshold in entry.thresholds: key = (algo_name, config_hash, data_name, float(threshold)) if key in registry: @@ -159,7 +164,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: results.append((registry[key], trace)) continue - steps = list(self.__solver.run(algorithm, provider, threshold)) + steps = list(self.__solver.run(entry.algorithm, active_provider, threshold)) trace = OnlineDetectionTrace.from_run(steps, algo_name, config_hash) record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None) diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py index c903b51..c714dee 100644 --- a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py +++ b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py @@ -13,6 +13,7 @@ __copyright__ = "Copyright (c) 2026 PySATL project" __license__ = "SPDX-License-Identifier: MIT" +import dataclasses from collections.abc import Sequence from pathlib import Path from typing import Any @@ -23,7 +24,7 @@ from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace from pysatl_cpd.benchmark.noreset.threshold_policy import ThresholdPolicy from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner -from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace @@ -32,7 +33,7 @@ class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[ """ Optimised benchmark runner for series with a single change point. - For each (algorithm, provider) pair the solver is executed exactly + For each (algorithm entry, provider) pair the solver is executed exactly once with threshold=inf, producing a full detection function trace. All threshold evaluations are then simulated by applying a ThresholdPolicy to that cached trace, avoiding redundant solver runs. @@ -40,8 +41,9 @@ class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[ Parameters ---------- - algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] - Sequence of (algorithm, thresholds) pairs to evaluate. + entries : Sequence[AlgorithmEntry] + Sequence of AlgorithmEntry objects containing algorithm, thresholds, + and an optional data transformer. providers : Sequence[ProviderT] Labeled data providers to run against. metrics : dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]] @@ -54,11 +56,13 @@ class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[ dump_dir : Path | str | None, optional Directory for caching inf traces via BenchmarkExecutor. If None, caching is disabled. Default is None. + verbose : bool, default=False + If True, displays progress bars during execution. """ def __init__( self, - algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: Sequence[ProviderT], metrics: dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]], solver: OnlineCpdSolver, @@ -67,7 +71,7 @@ def __init__( verbose: bool = False, ) -> None: super().__init__( - algorithms=algorithms, + entries=entries, providers=providers, metrics=metrics, solver=solver, @@ -76,8 +80,11 @@ def __init__( ) self._policy = policy + # Replace all thresholds with inf for initial pre-caching run + inf_entries = [dataclasses.replace(entry, thresholds=[float("inf")]) for entry in entries] + executor: BenchmarkExecutor[Any] = BenchmarkExecutor( - algorithms=[(algorithm, [float("inf")]) for algorithm, _ in algorithms], + entries=inf_entries, providers=list(providers), solver=self._solver, dump_dir=self._dump_dir, @@ -86,17 +93,18 @@ def __init__( self._inf_trace_cache: dict[tuple[str, int, str], OnlineDetectionTrace[Any]] = {} for record, trace in executor.execute(): + # record.algorithm maps to entry.full_name, hash maps to entry.full_hash key = (record.algorithm, record.configuration_hash, record.data) self._inf_trace_cache[key] = trace def _collect_runs( self, - algorithm: OnlineAlgorithm[Any, Any, Any], + entry: AlgorithmEntry[Any, Any, Any], threshold: float, providers: Sequence[ProviderT], ) -> list[tuple[NoResetDetectionTrace[Any], ProviderT]]: """ - Collect NoReset runs for a given algorithm and threshold. + Collect NoReset runs for a given algorithm entry and threshold. For each provider, retrieves the inf trace via BenchmarkExecutor and applies the ThresholdPolicy to produce a lightweight @@ -104,8 +112,8 @@ def _collect_runs( Parameters ---------- - algorithm : OnlineAlgorithm[Any, Any, Any] - The algorithm to evaluate. + entry : AlgorithmEntry + The algorithm configuration entry to evaluate. threshold : float The detection threshold to simulate. providers : Sequence[ProviderT] @@ -119,8 +127,8 @@ def _collect_runs( if not providers: return [] - algo_name = str(algorithm) - config_hash = hash(algorithm.configuration) + algo_name = entry.full_name + config_hash = entry.full_hash runs: list[tuple[NoResetDetectionTrace[Any], ProviderT]] = [] for provider in providers: diff --git a/pysatl_cpd/benchmark/online_benchmark_runner.py b/pysatl_cpd/benchmark/online_benchmark_runner.py index f3509ad..6a0a15e 100644 --- a/pysatl_cpd/benchmark/online_benchmark_runner.py +++ b/pysatl_cpd/benchmark/online_benchmark_runner.py @@ -19,7 +19,8 @@ from pysatl_cpd.analysis.labeled_data import LabeledData from pysatl_cpd.benchmark.core.benchmark_logger import BenchmarkLogger from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric -from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm, OnlineAlgorithmConfiguration +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmConfiguration from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace @@ -34,10 +35,11 @@ class OnlineBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: Labele Parameters ---------- - algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] - Sequence of (algorithm, thresholds) pairs to evaluate. + entries : Sequence[AlgorithmEntry] + Sequence of AlgorithmEntry objects containing algorithm, thresholds, + and an optional data transformer. providers : Sequence[ProviderT] - Sequence of labeled data providers. + Sequence of labeled data providers to run against. metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]] Named metrics to evaluate for each (algorithm, threshold) batch. solver : OnlineCpdSolver @@ -45,18 +47,20 @@ class OnlineBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: Labele dump_dir : Path | str | None, optional Directory for caching results via BenchmarkExecutor. If None, caching is disabled. Default is None. + verbose : bool, default=False + If True, displays progress bars during execution. """ def __init__( self, - algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: Sequence[ProviderT], metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], solver: OnlineCpdSolver, dump_dir: Path | str | None = None, verbose: bool = False, ) -> None: - self._algorithms = algorithms + self._entries = entries self._providers = providers self._metrics = metrics self._solver = solver @@ -67,17 +71,17 @@ def __init__( @abstractmethod def _collect_runs( self, - algorithm: OnlineAlgorithm[Any, Any, Any], + entry: AlgorithmEntry[Any, Any, Any], threshold: float, providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: """ - Collect (trace, provider) pairs for a given algorithm and threshold. + Collect (trace, provider) pairs for a given algorithm entry and threshold. Parameters ---------- - algorithm : OnlineAlgorithm[Any, Any, Any] - The algorithm to evaluate. + entry : AlgorithmEntry + The algorithm configuration entry to evaluate. threshold : float The detection threshold. providers : Sequence[ProviderT] @@ -88,29 +92,27 @@ def _collect_runs( list[tuple[TraceT, ProviderT]] Batch of (trace, provider) pairs for metric evaluation. """ - raise NotImplementedError("Method `_collect_runs` is not implemented yet.") def run( self, ) -> dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]]: """ - Execute the benchmark over all algorithms and thresholds. + Execute the benchmark over all entries and thresholds. - For each (algorithm, threshold) pair, collects runs via + For each (entry, threshold) pair, collects runs via _collect_runs() and evaluates all registered metrics. Returns ------- dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]] - Mapping of (algorithm_name, configuration) to a list of + Mapping of (algorithm_full_name, configuration) to a list of (threshold, {metric_name: metric_value}) entries, one per threshold. """ - benchmark_start = time.time() - total_runs = sum(len(thresholds) for _, thresholds in self._algorithms) - n_algorithms = len(self._algorithms) + total_runs = sum(len(entry.thresholds) for entry in self._entries) + n_algorithms = len(self._entries) n_providers = len(self._providers) if not self._metrics: @@ -127,26 +129,26 @@ def run( list[tuple[float, dict[str, Any]]], ] = {} - algo_iterator = tqdm( - self._algorithms, + entries_iterator = tqdm( + self._entries, disable=not self._verbose, desc="Processing algorithms", unit="algo", ) - for algorithm, thresholds in algo_iterator: - algo_name = str(algorithm) + for entry in entries_iterator: + algo_name = entry.full_name - self._logger.algorithm_start(algo_name, len(thresholds)) + self._logger.algorithm_start(algo_name, len(entry.thresholds)) key: tuple[str, OnlineAlgorithmConfiguration] = ( - str(algorithm), - algorithm.configuration, + entry.full_name, + entry.algorithm.configuration, ) results[key] = [] threshold_iterator = tqdm( - thresholds, + entry.thresholds, desc=f" Thresholds ({algo_name})", disable=not self._verbose, leave=False, @@ -161,7 +163,7 @@ def run( threshold=f"{threshold:.4f}", ) - runs = self._collect_runs(algorithm, threshold, self._providers) + runs = self._collect_runs(entry, threshold, self._providers) self._logger.metrics_computed( algo_name=algo_name, diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py index ecfa203..8ca9df8 100644 --- a/pysatl_cpd/benchmark/reset_benchmark_runner.py +++ b/pysatl_cpd/benchmark/reset_benchmark_runner.py @@ -12,6 +12,7 @@ __copyright__ = "Copyright (c) 2026 PySATL project" __license__ = "SPDX-License-Identifier: MIT" +import dataclasses from collections.abc import Sequence from pathlib import Path from typing import Any, cast @@ -20,7 +21,7 @@ from pysatl_cpd.benchmark.core.benchmark_executor import BenchmarkExecutor from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner -from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace @@ -31,15 +32,16 @@ class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: Labeled """ Benchmark runner that uses standard reset behaviour. - For each (algorithm, threshold) pair, runs the solver over all + For each (algorithm entry, threshold) pair, runs the solver over all providers via BenchmarkExecutor. The algorithm is reset on every detected change point (standard solver behaviour). Results are cached to disk when dump_dir is provided. Parameters ---------- - algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] - Sequence of (algorithm, thresholds) pairs to evaluate. + entries : Sequence[AlgorithmEntry] + Sequence of AlgorithmEntry objects containing algorithm, thresholds, + and an optional data transformer. providers : Sequence[ProviderT] Labeled data providers to run against. metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]] @@ -49,11 +51,13 @@ class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: Labeled dump_dir : Path | str | None, optional Directory for caching results via BenchmarkExecutor. If None, caching is disabled. Default is None. + verbose : bool, default=False + If True, displays progress bars during execution. """ def __init__( self, - algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: Sequence[ProviderT], metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], solver: OnlineCpdSolver, @@ -61,7 +65,7 @@ def __init__( verbose: bool = False, ) -> None: super().__init__( - algorithms=algorithms, + entries=entries, providers=providers, metrics=metrics, solver=solver, @@ -71,20 +75,20 @@ def __init__( def _collect_runs( self, - algorithm: OnlineAlgorithm[Any, Any, Any], + entry: AlgorithmEntry[Any, Any, Any], threshold: float, providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: """ - Collect runs for a given algorithm and threshold via BenchmarkExecutor. + Collect runs for a given algorithm entry and threshold via BenchmarkExecutor. Creates a BenchmarkExecutor with a single threshold and all providers, executes it, and pairs each resulting trace with its provider. Parameters ---------- - algorithm : OnlineAlgorithm[Any, Any, Any] - The algorithm to evaluate. + entry : AlgorithmEntry + The algorithm configuration entry to evaluate. threshold : float The detection threshold. providers : Sequence[ProviderT] @@ -98,8 +102,11 @@ def _collect_runs( if not providers: return [] + # Create a temporary entry with only the current threshold + single_threshold_entry = dataclasses.replace(entry, thresholds=[threshold]) + executor: BenchmarkExecutor[Any] = BenchmarkExecutor( - algorithms=[(algorithm, [threshold])], + entries=[single_threshold_entry], providers=list(providers), solver=self._solver, dump_dir=self._dump_dir, @@ -109,7 +116,6 @@ def _collect_runs( # BenchmarkExecutor returns (BenchmarkRecord, OnlineDetectionTrace) pairs. # We need to pair each trace with the correct provider. - # Executor iterates providers in the same order as input. provider_by_name: dict[str, ProviderT] = {provider.name: provider for provider in providers} runs: list[tuple[TraceT, ProviderT]] = [] diff --git a/pysatl_cpd/core/algorithm_entry.py b/pysatl_cpd/core/algorithm_entry.py new file mode 100644 index 0000000..5fa5268 --- /dev/null +++ b/pysatl_cpd/core/algorithm_entry.py @@ -0,0 +1,72 @@ +# -*- coding: ascii -*- +""" +Container for benchmark algorithm execution entries. +""" + +__author__ = "PySATL contributors" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from collections.abc import Sequence +from dataclasses import dataclass + +from pysatl_cpd.core.data_transformers.idata_transformer import IDataTransformer +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm, OnlineAlgorithmConfiguration, OnlineAlgorithmState + + +@dataclass +class AlgorithmEntry[DataT, ConfigT: OnlineAlgorithmConfiguration, StateT: OnlineAlgorithmState]: + """ + Groups an algorithm, its target thresholds, and an optional data transformer. + + This container simplifies benchmark configuration by coupling the detection + algorithm with the specific preprocessing steps (transformer) required for + the target datasets. + + Parameters + ---------- + algorithm : OnlineAlgorithm + The instantiated online change-point detection algorithm. + thresholds : Sequence[float] + A sequence of detection thresholds to evaluate. + transformer : IDataTransformer | None, optional + Data transformer to apply to the dataset before feeding it to the algorithm. + If None, data is passed as-is. Default is None. + """ + + algorithm: OnlineAlgorithm[DataT, ConfigT, StateT] + thresholds: Sequence[float] + transformer: IDataTransformer | None = None + + @property + def full_name(self) -> str: + """ + Combined name of the algorithm and transformer. + + Returns + ------- + str + Name formatted as '{AlgorithmName}_{TransformerName}' or just + '{AlgorithmName}' if no transformer is used. + """ + algo_name = self.algorithm.name + if self.transformer is not None: + return f"{algo_name}_{self.transformer.name}" + return algo_name + + @property + def full_hash(self) -> int: + """ + Combined hash of the algorithm configuration and transformer. + + Used to uniquely identify this execution pipeline in the cache registry. + + Returns + ------- + int + Hash value representing the execution configuration. + """ + base_hash = hash(self.algorithm.configuration) + if self.transformer is not None: + return hash((base_hash, hash(self.transformer))) + return base_hash diff --git a/pysatl_cpd/core/data_transformers/idata_transformer.py b/pysatl_cpd/core/data_transformers/idata_transformer.py new file mode 100644 index 0000000..344ed1d --- /dev/null +++ b/pysatl_cpd/core/data_transformers/idata_transformer.py @@ -0,0 +1,71 @@ +# -*- coding: ascii -*- +""" +Interface for data transformers. + +This module provides the abstract base class for data transformers, which +are used to adapt data dimensionality or extract specific features before +feeding data into change-point detection algorithms. +""" + +__author__ = "PySATL contributors" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from abc import ABC, abstractmethod +from typing import Any + +from pysatl_cpd.core.data_providers.idata_provider import DataProvider + + +class IDataTransformer(ABC): + """ + Abstract base class for data transformers. + + Transformers act as adapters between raw data providers and algorithms, + allowing, for example, multivariate data to be processed by univariate + algorithms (e.g., via column selection or norm calculation). + """ + + @abstractmethod + def transform(self, provider: DataProvider[Any]) -> DataProvider[Any]: + """ + Apply transformation to the given data provider. + + Parameters + ---------- + provider : DataProvider[Any] + The source data provider. + + Returns + ------- + DataProvider[Any] + A new data provider yielding transformed observations. + """ + raise NotImplementedError + + @property + @abstractmethod + def name(self) -> str: + """ + Return the human-readable name of the transformer. + + Returns + ------- + str + Transformer identifier used for logging and caching. + """ + raise NotImplementedError + + def __hash__(self) -> int: + """ + Return the hash of the transformer. + + Used by BenchmarkExecutor to ensure that changes in the transformation + pipeline correctly invalidate or separate cached traces. + + Returns + ------- + int + Hash value based on the transformer's properties. + """ + return hash(self.name) From e8fb0070961285e75b804b77b0f45ebfa5c51e97 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Thu, 16 Apr 2026 01:24:35 +0300 Subject: [PATCH 2/8] tests: adapt to new changes --- .../mocks/benchmark/mock_benchmark_runner.py | 21 +- .../benchmark/core/test_benchmark_executor.py | 156 +++++----- .../noreset/test_noreset_benchmark_runner.py | 96 ++++--- .../benchmark/test_arl_benchmark_runner.py | 270 ++++++------------ .../benchmark/test_online_benchmark_runner.py | 67 +++-- .../benchmark/test_reset_benchmark_runner.py | 96 ++++--- 6 files changed, 322 insertions(+), 384 deletions(-) diff --git a/tests/mocks/benchmark/mock_benchmark_runner.py b/tests/mocks/benchmark/mock_benchmark_runner.py index f69db6a..bbe55ce 100644 --- a/tests/mocks/benchmark/mock_benchmark_runner.py +++ b/tests/mocks/benchmark/mock_benchmark_runner.py @@ -15,7 +15,7 @@ from pysatl_cpd.analysis.labeled_data import LabeledData from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner -from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace @@ -31,8 +31,9 @@ class MockBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledD Parameters ---------- - algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] - Sequence of (algorithm, thresholds) pairs. + entries : Sequence[AlgorithmEntry] + Sequence of AlgorithmEntry objects containing algorithm, thresholds, + and an optional data transformer. providers : Sequence[ProviderT] Sequence of data providers. metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]] @@ -48,7 +49,7 @@ class MockBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledD def __init__( self, - algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: Sequence[ProviderT], metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], solver: OnlineCpdSolver, @@ -56,18 +57,18 @@ def __init__( runs_to_return: list[tuple[TraceT, ProviderT]] | None = None, ) -> None: super().__init__( - algorithms=algorithms, + entries=entries, providers=providers, metrics=metrics, solver=solver, dump_dir=dump_dir, ) self._runs_to_return: list[tuple[TraceT, ProviderT]] = runs_to_return or [] - self.collect_runs_calls: list[tuple[OnlineAlgorithm[Any, Any, Any], float, Sequence[ProviderT]]] = [] + self.collect_runs_calls: list[tuple[AlgorithmEntry[Any, Any, Any], float, Sequence[ProviderT]]] = [] def _collect_runs( self, - algorithm: OnlineAlgorithm[Any, Any, Any], + entry: AlgorithmEntry[Any, Any, Any], threshold: float, providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: @@ -76,8 +77,8 @@ def _collect_runs( Parameters ---------- - algorithm : OnlineAlgorithm[Any, Any, Any] - The algorithm being evaluated. + entry : AlgorithmEntry + The algorithm configuration entry being evaluated. threshold : float The detection threshold. providers : Sequence[ProviderT] @@ -88,5 +89,5 @@ def _collect_runs( list[tuple[TraceT, ProviderT]] Pre-configured runs set at construction time. """ - self.collect_runs_calls.append((algorithm, threshold, providers)) + self.collect_runs_calls.append((entry, threshold, providers)) return self._runs_to_return diff --git a/tests/unit/benchmark/core/test_benchmark_executor.py b/tests/unit/benchmark/core/test_benchmark_executor.py index 9261b1e..d5e380f 100644 --- a/tests/unit/benchmark/core/test_benchmark_executor.py +++ b/tests/unit/benchmark/core/test_benchmark_executor.py @@ -21,6 +21,7 @@ BenchmarkExecutor, BenchmarkRecord, ) +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace from tests.mocks.algorithms.online.simple import MockOnlineAlgorithm @@ -84,12 +85,13 @@ class TestBenchmarkExecutorBasic: def test_single_combination(self) -> None: """1 algorithm x 1 threshold x 1 provider -> 1 result.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(5) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=[provider], solver=solver, ) @@ -98,12 +100,13 @@ def test_single_combination(self) -> None: def test_multiple_thresholds(self) -> None: """1 algorithm x 3 thresholds x 1 provider -> 3 results.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0, 2.0, 3.0]) provider: LabeledData[float] = _make_provider(5) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0, 2.0, 3.0])], + entries=[entry], providers=[provider], solver=solver, ) @@ -112,7 +115,8 @@ def test_multiple_thresholds(self) -> None: def test_multiple_providers(self) -> None: """1 algorithm x 1 threshold x 3 providers -> 3 results.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0]) providers: list[LabeledData[float]] = [ _make_provider(5, name="p1"), _make_provider(5, name="p2"), @@ -121,7 +125,7 @@ def test_multiple_providers(self) -> None: solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=providers, solver=solver, ) @@ -130,13 +134,17 @@ def test_multiple_providers(self) -> None: def test_multiple_algorithms(self) -> None: """2 algorithms x 1 threshold each x 1 provider -> 2 results.""" - algo1: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A1", return_sequence=[0.0]) - algo2: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A2", return_sequence=[1.0]) + algo1 = MockOnlineAlgorithm[float](name="A1", return_sequence=[0.0]) + algo2 = MockOnlineAlgorithm[float](name="A2", return_sequence=[1.0]) + entries = [ + AlgorithmEntry(algorithm=algo1, thresholds=[1.0]), + AlgorithmEntry(algorithm=algo2, thresholds=[2.0]), + ] provider: LabeledData[float] = _make_provider(5) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo1, [1.0]), (algo2, [2.0])], + entries=entries, providers=[provider], solver=solver, ) @@ -145,8 +153,12 @@ def test_multiple_algorithms(self) -> None: def test_cartesian_product(self) -> None: """2 algorithms x 2 thresholds x 2 providers -> 8 results.""" - algo1: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A1", return_sequence=[0.0]) - algo2: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A2", return_sequence=[0.0]) + algo1 = MockOnlineAlgorithm[float](name="A1", return_sequence=[0.0]) + algo2 = MockOnlineAlgorithm[float](name="A2", return_sequence=[0.0]) + entries = [ + AlgorithmEntry(algorithm=algo1, thresholds=[1.0, 2.0]), + AlgorithmEntry(algorithm=algo2, thresholds=[3.0, 4.0]), + ] providers: list[LabeledData[float]] = [ _make_provider(5, name="p1"), _make_provider(5, name="p2"), @@ -154,7 +166,7 @@ def test_cartesian_product(self) -> None: solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo1, [1.0, 2.0]), (algo2, [3.0, 4.0])], + entries=entries, providers=providers, solver=solver, ) @@ -167,7 +179,7 @@ def test_empty_algorithms(self) -> None: solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[], + entries=[], providers=[provider], solver=solver, ) @@ -176,11 +188,12 @@ def test_empty_algorithms(self) -> None: def test_empty_providers(self) -> None: """No providers -> empty results.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0]) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=[], solver=solver, ) @@ -189,12 +202,13 @@ def test_empty_providers(self) -> None: def test_empty_thresholds(self) -> None: """Algorithm with no thresholds -> no results for that algorithm.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[]) provider: LabeledData[float] = _make_provider(5) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [])], + entries=[entry], providers=[provider], solver=solver, ) @@ -209,19 +223,14 @@ class TestBenchmarkExecutorTraceContent: """Tests for detection trace correctness.""" def test_detections_at_correct_steps(self) -> None: - """Verify detected change points match expected steps. - - Algorithm [0.0, 0.0, 5.0], threshold=3.0, 6 observations. - Step 0: 0.0 (no), Step 1: 0.0 (no), Step 2: 5.0 (yes, reset). - Step 3: 0.0 (no), Step 4: 0.0 (no), Step 5: 5.0 (yes, reset). - Detections at steps 2 and 5 (0-based). - """ - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0, 0.0, 5.0]) + """Verify detected change points match expected steps.""" + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0, 0.0, 5.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(6) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [3.0])], + entries=[entry], providers=[provider], solver=solver, ) @@ -232,12 +241,13 @@ def test_detections_at_correct_steps(self) -> None: def test_no_detections_with_high_threshold(self) -> None: """No detections when threshold is unreachable.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[5.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[5.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[100.0]) provider: LabeledData[float] = _make_provider(10) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [100.0])], + entries=[entry], providers=[provider], solver=solver, ) @@ -247,33 +257,31 @@ def test_no_detections_with_high_threshold(self) -> None: assert list(trace.detected_change_points) == [] def test_trace_algorithm_name(self) -> None: - """Trace should carry the str(algorithm) as algorithm_name.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="NamedAlgo", return_sequence=[0.0]) + """Trace should carry the entry.full_name as algorithm_name.""" + algo = MockOnlineAlgorithm[float](name="NamedAlgo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(5) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=[provider], solver=solver, ) results = executor.execute() trace: OnlineDetectionTrace[Any] = results[0][1] - assert trace.algorithm_name == str(algo) + assert trace.algorithm_name == entry.full_name def test_detection_function_values(self) -> None: - """Detection function array should contain correct statistic values. - - Algorithm [1.0, 2.0, 3.0], threshold=inf (no detections/resets). - 6 observations -> values cycle: [1, 2, 3, 1, 2, 3]. - """ - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[1.0, 2.0, 3.0]) + """Detection function array should contain correct statistic values.""" + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[1.0, 2.0, 3.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[float("inf")]) provider: LabeledData[float] = _make_provider(6) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [float("inf")])], + entries=[entry], providers=[provider], solver=solver, ) @@ -292,31 +300,33 @@ class TestBenchmarkExecutorRecordContent: def test_record_fields_match_input(self) -> None: """Record fields should match the algorithm, provider, and threshold.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="RecAlgo", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="RecAlgo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[7.5]) provider: LabeledData[float] = _make_provider(5, name="my_data") solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [7.5])], + entries=[entry], providers=[provider], solver=solver, ) results = executor.execute() record: BenchmarkRecord = results[0][0] - assert record.algorithm == str(algo) - assert record.configuration_hash == hash(algo.configuration) + assert record.algorithm == entry.full_name + assert record.configuration_hash == entry.full_hash assert record.data == "my_data" assert record.threshold == 7.5 def test_record_trace_path_none_without_dump_dir(self) -> None: """trace_path should be None when dump_dir is not set.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(5) solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=[provider], solver=solver, dump_dir=None, @@ -328,12 +338,13 @@ def test_record_trace_path_none_without_dump_dir(self) -> None: def test_record_trace_path_set_with_dump_dir(self, tmp_path: Path) -> None: """trace_path should point to an existing pickle file when dump_dir is set.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(5, name="data") solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, @@ -354,12 +365,13 @@ class TestBenchmarkExecutorCaching: def test_creates_registry_and_pickle_files(self, tmp_path: Path) -> None: """Execute with dump_dir should create registry CSV and pickle file(s).""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(5, name="data") solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, @@ -373,19 +385,14 @@ def test_creates_registry_and_pickle_files(self, tmp_path: Path) -> None: assert len(pkl_files) == 1 def test_cache_prevents_reprocessing(self, tmp_path: Path) -> None: - """Second execute should load from cache without calling solver. - - MockOnlineAlgorithm._call_history accumulates across reset() calls - and is never cleared. If caching works, second execute adds no - new entries. - """ - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + """Second execute should load from cache without calling solver.""" + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(5, name="data") solver: OnlineCpdSolver = OnlineCpdSolver() - # First run - solver executes, algorithm processes observations executor1: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, @@ -394,9 +401,8 @@ def test_cache_prevents_reprocessing(self, tmp_path: Path) -> None: history_after_first: int = len(algo.get_call_history()) assert history_after_first == 5 - # Second run - should load from cache executor2: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, @@ -407,17 +413,14 @@ def test_cache_prevents_reprocessing(self, tmp_path: Path) -> None: assert history_after_second == history_after_first def test_cached_trace_matches_original(self, tmp_path: Path) -> None: - """Trace loaded from cache should have identical detected_change_points. - - Algorithm [0.0, 0.0, 5.0], threshold=3.0, 6 observations. - Detections at steps 2 and 5. - """ - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0, 0.0, 5.0]) + """Trace loaded from cache should have identical detected_change_points.""" + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0, 0.0, 5.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(6, name="data") solver: OnlineCpdSolver = OnlineCpdSolver() executor1: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [3.0])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, @@ -425,7 +428,7 @@ def test_cached_trace_matches_original(self, tmp_path: Path) -> None: results1 = executor1.execute() executor2: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [3.0])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, @@ -441,12 +444,13 @@ def test_cached_trace_matches_original(self, tmp_path: Path) -> None: def test_registry_csv_has_correct_structure(self, tmp_path: Path) -> None: """Registry CSV should have expected columns and matching row data.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="CsvAlgo", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="CsvAlgo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[2.5]) provider: LabeledData[float] = _make_provider(5, name="csv_data") solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [2.5])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, @@ -469,19 +473,20 @@ def test_registry_csv_has_correct_structure(self, tmp_path: Path) -> None: "trace_path", } assert set(row.keys()) == expected_columns - assert row["algorithm"] == str(algo) + assert row["algorithm"] == entry.full_name assert row["data"] == "csv_data" assert float(row["threshold"]) == 2.5 assert row["trace_path"] != "" def test_inf_threshold_in_pickle_filename(self, tmp_path: Path) -> None: """Pickle filename for infinite threshold should contain 'inf'.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[float("inf")]) provider: LabeledData[float] = _make_provider(5, name="data") solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [float("inf")])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, @@ -494,12 +499,13 @@ def test_inf_threshold_in_pickle_filename(self, tmp_path: Path) -> None: def test_multiple_thresholds_create_separate_pickle_files(self, tmp_path: Path) -> None: """Each threshold should produce its own pickle file.""" - algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0, 2.0, 3.0]) provider: LabeledData[float] = _make_provider(5, name="data") solver: OnlineCpdSolver = OnlineCpdSolver() executor: BenchmarkExecutor[float] = BenchmarkExecutor( - algorithms=[(algo, [1.0, 2.0, 3.0])], + entries=[entry], providers=[provider], solver=solver, dump_dir=tmp_path, diff --git a/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py b/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py index 5b86c90..1a98cbc 100644 --- a/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py +++ b/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py @@ -17,6 +17,7 @@ from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace from pysatl_cpd.benchmark.noreset.threshold_policy import EventBasedPolicy, PointBasedPolicy from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.typedefs import Number from tests.mocks.algorithms.online import MockOnlineAlgorithm @@ -84,7 +85,7 @@ def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeled def make_noreset_runner( - algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]], + entries: Sequence[AlgorithmEntry], providers: Sequence[MockLabeledDataWithPadding], metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]], solver: OnlineCpdSolver, @@ -93,7 +94,7 @@ def make_noreset_runner( ) -> NoResetBenchmarkRunner[MockLabeledDataWithPadding]: """Helper to construct NoResetBenchmarkRunner with given parameters.""" return NoResetBenchmarkRunner( - algorithms=algorithms, + entries=entries, providers=providers, metrics=metrics, # type: ignore[arg-type] solver=solver, @@ -120,7 +121,7 @@ def test_is_instance_of_online_benchmark_runner( ) -> None: """NoResetBenchmarkRunner is an instance of OnlineBenchmarkRunner.""" runner = make_noreset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -137,15 +138,16 @@ def test_collect_runs_is_implemented( point_policy: PointBasedPolicy, ) -> None: """_collect_runs does not raise NotImplementedError.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) try: - runner._collect_runs(algorithm, 1.0, [single_provider]) + runner._collect_runs(entry, 1.0, [single_provider]) except NotImplementedError: pytest.fail("_collect_runs raised NotImplementedError") @@ -162,17 +164,17 @@ def test_inf_trace_cache_populated_on_init( point_policy: PointBasedPolicy, ) -> None: """Cache is populated during __init__ via BenchmarkExecutor.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) - key = (str(algorithm), hash(algorithm.configuration), single_provider.name) + key = (entry.full_name, entry.full_hash, single_provider.name) assert key in runner._inf_trace_cache - # Inf trace produced with threshold=inf has no detected change points inf_trace = runner._inf_trace_cache[key] assert len(inf_trace.detected_change_points) == 0 @@ -185,14 +187,15 @@ def test_cached_trace_detection_function_has_correct_length( point_policy: PointBasedPolicy, ) -> None: """Detection function length equals the number of observations in provider.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) - key = (str(algorithm), hash(algorithm.configuration), single_provider.name) + key = (entry.full_name, entry.full_hash, single_provider.name) inf_trace = runner._inf_trace_cache[key] assert len(inf_trace.detection_function) == len(single_provider) @@ -204,17 +207,18 @@ def test_cached_trace_algorithm_name_matches( solver: OnlineCpdSolver, point_policy: PointBasedPolicy, ) -> None: - """algorithm_name in inf trace matches str(algorithm).""" + """algorithm_name in inf trace matches entry.full_name.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) - key = (str(algorithm), hash(algorithm.configuration), single_provider.name) + key = (entry.full_name, entry.full_hash, single_provider.name) inf_trace = runner._inf_trace_cache[key] - assert inf_trace.algorithm_name == str(algorithm) + assert inf_trace.algorithm_name == entry.full_name class TestNoResetBenchmarkRunnerCollectRuns: @@ -229,14 +233,15 @@ def test_returns_one_run_per_provider( point_policy: PointBasedPolicy, ) -> None: """_collect_runs returns exactly len(providers) (trace, provider) pairs.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm, [1.0])], + [entry], two_providers, {"m": mock_metric}, solver, point_policy, ) - runs = runner._collect_runs(algorithm, 1.0, two_providers) + runs = runner._collect_runs(entry, 1.0, two_providers) assert len(runs) == len(two_providers) def test_empty_providers_returns_empty_list( @@ -247,14 +252,15 @@ def test_empty_providers_returns_empty_list( point_policy: PointBasedPolicy, ) -> None: """_collect_runs with empty providers returns empty list.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm, [1.0])], + [entry], [], {"m": mock_metric}, solver, point_policy, ) - runs = runner._collect_runs(algorithm, 1.0, []) + runs = runner._collect_runs(entry, 1.0, []) assert runs == [] def test_each_run_is_noreset_detection_trace( @@ -266,14 +272,15 @@ def test_each_run_is_noreset_detection_trace( point_policy: PointBasedPolicy, ) -> None: """Each trace in collected runs is a NoResetDetectionTrace.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) - runs = runner._collect_runs(algorithm, 1.0, [single_provider]) + runs = runner._collect_runs(entry, 1.0, [single_provider]) for trace, _ in runs: assert isinstance(trace, NoResetDetectionTrace) @@ -286,14 +293,15 @@ def test_each_run_paired_with_correct_provider( point_policy: PointBasedPolicy, ) -> None: """Each trace is paired with its corresponding provider.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm, [1.0])], + [entry], two_providers, {"m": mock_metric}, solver, point_policy, ) - runs = runner._collect_runs(algorithm, 1.0, two_providers) + runs = runner._collect_runs(entry, 1.0, two_providers) for (_, provider), expected in zip(runs, two_providers, strict=False): assert provider is expected @@ -306,14 +314,15 @@ def test_high_threshold_produces_no_detections( point_policy: PointBasedPolicy, ) -> None: """High threshold (inf) produces no detected change points.""" + entry = AlgorithmEntry(algorithm=algorithm_with_signal, thresholds=[float("inf")]) runner = make_noreset_runner( - [(algorithm_with_signal, [float("inf")])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) - runs = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider]) + runs = runner._collect_runs(entry, float("inf"), [single_provider]) trace, _ = runs[0] assert len(trace.detected_change_points) == 0 @@ -326,14 +335,15 @@ def test_low_threshold_produces_detections( point_policy: PointBasedPolicy, ) -> None: """Low threshold (0.0) with signal algorithm produces detections.""" + entry = AlgorithmEntry(algorithm=algorithm_with_signal, thresholds=[0.0]) runner = make_noreset_runner( - [(algorithm_with_signal, [0.0])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) - runs = runner._collect_runs(algorithm_with_signal, 0.0, [single_provider]) + runs = runner._collect_runs(entry, 0.0, [single_provider]) trace, _ = runs[0] assert len(trace.detected_change_points) > 0 @@ -346,16 +356,16 @@ def test_policy_is_applied_to_inf_trace( point_policy: PointBasedPolicy, ) -> None: """Detected change points match what policy.apply() would return.""" + entry = AlgorithmEntry(algorithm=algorithm_with_signal, thresholds=[1.0]) runner = make_noreset_runner( - [(algorithm_with_signal, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) - # Get the cached inf trace - key = (str(algorithm_with_signal), hash(algorithm_with_signal.configuration), single_provider.name) + key = (entry.full_name, entry.full_hash, single_provider.name) inf_trace = runner._inf_trace_cache[key] expected_cps = point_policy.apply( @@ -363,7 +373,7 @@ def test_policy_is_applied_to_inf_trace( 1.0, single_provider.change_points, ) - runs = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider]) + runs = runner._collect_runs(entry, 1.0, [single_provider]) trace, _ = runs[0] assert list(trace.detected_change_points) == expected_cps @@ -381,7 +391,7 @@ def test_run_with_single_algorithm_single_threshold_single_provider( ) -> None: """Basic happy path - one algorithm, one threshold, one provider.""" runner = make_noreset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -406,18 +416,16 @@ def test_run_with_multiple_thresholds_single_solver_execution( ) -> None: """Multiple thresholds - solver runs only once per provider (checked via caching behaviour).""" runner = make_noreset_runner( - [(algorithm_with_signal, [0.5, 1.0, 2.0])], + [AlgorithmEntry(algorithm=algorithm_with_signal, thresholds=[0.5, 1.0, 2.0])], [single_provider], {"m": mock_metric}, solver, point_policy, dump_dir=tmp_path, ) - # Because execution happens in __init__, we already have our files pkl_files = list(tmp_path.glob("*.pkl")) - assert len(pkl_files) == 1 # 1 trace per provider, NOT 3 traces + assert len(pkl_files) == 1 - # Ensure run completes successfully using the cached inf trace result = runner.run() entries = next(iter(result.values())) assert len(entries) == 3 @@ -433,7 +441,7 @@ def test_run_returns_correct_structure( """run() result has correct nested structure.""" thresholds = [0.5, 1.0] runner = make_noreset_runner( - [(algorithm, thresholds)], + [AlgorithmEntry(algorithm=algorithm, thresholds=thresholds)], two_providers, {"m": mock_metric}, solver, @@ -456,7 +464,7 @@ def test_run_with_empty_providers( ) -> None: """Empty providers list - metric is called with empty batch.""" runner = make_noreset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [], {"m": mock_metric}, solver, @@ -475,15 +483,16 @@ def test_different_policies_produce_different_detections( event_policy: EventBasedPolicy, ) -> None: """PointBasedPolicy and EventBasedPolicy may produce different detections.""" + entry = AlgorithmEntry(algorithm=algorithm_with_signal, thresholds=[1.0]) runner_point = make_noreset_runner( - [(algorithm_with_signal, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, point_policy, ) runner_event = make_noreset_runner( - [(algorithm_with_signal, [1.0])], + [entry], [single_provider], { "m": MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]( @@ -493,11 +502,10 @@ def test_different_policies_produce_different_detections( solver, event_policy, ) - runs_point = runner_point._collect_runs(algorithm_with_signal, 1.0, [single_provider]) - runs_event = runner_event._collect_runs(algorithm_with_signal, 1.0, [single_provider]) + runs_point = runner_point._collect_runs(entry, 1.0, [single_provider]) + runs_event = runner_event._collect_runs(entry, 1.0, [single_provider]) trace_point, _ = runs_point[0] trace_event, _ = runs_event[0] - # Results may differ - we just verify both are valid NoResetDetectionTrace assert isinstance(trace_point, NoResetDetectionTrace) assert isinstance(trace_event, NoResetDetectionTrace) @@ -516,7 +524,7 @@ def test_no_files_created_without_dump_dir( ) -> None: """Without dump_dir no files are created during init.""" _ = make_noreset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -536,7 +544,7 @@ def test_inf_trace_cached_to_disk_when_dump_dir_provided( ) -> None: """With dump_dir, inf trace registry and pickle are created synchronously during init.""" _ = make_noreset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, diff --git a/tests/unit/benchmark/test_arl_benchmark_runner.py b/tests/unit/benchmark/test_arl_benchmark_runner.py index 8a21ba8..1c86ede 100644 --- a/tests/unit/benchmark/test_arl_benchmark_runner.py +++ b/tests/unit/benchmark/test_arl_benchmark_runner.py @@ -19,6 +19,7 @@ from pysatl_cpd.analysis.labeled_data import LabeledData from pysatl_cpd.benchmark.arl_benchmark_runner import ARLBenchmarkRunner from pysatl_cpd.benchmark.metrics.online.arl_metric import ARLMetric +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmConfiguration from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace @@ -59,12 +60,13 @@ class TestARLBenchmarkRunnerInit: def test_raises_if_provider_has_change_points(self) -> None: """Should raise ValueError when a single provider has non-empty change_points.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(10, change_points=[5], name="bad") solver: OnlineCpdSolver = OnlineCpdSolver() with pytest.raises(ValueError): ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -73,13 +75,14 @@ def test_raises_if_provider_has_change_points(self) -> None: def test_raises_if_any_provider_has_change_points(self) -> None: """Should raise ValueError when at least one of several providers has change_points.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) ok_provider: LabeledData[float] = _make_provider(10, name="ok") bad_provider: LabeledData[float] = _make_provider(10, change_points=[3], name="bad") solver: OnlineCpdSolver = OnlineCpdSolver() with pytest.raises(ValueError): ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=[ok_provider, bad_provider], solver=solver, mode="reset", @@ -88,12 +91,13 @@ def test_raises_if_any_provider_has_change_points(self) -> None: def test_raises_if_any_provider_has_change_points_noreset_mode(self) -> None: """Validation should apply in noreset mode as well.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) bad_provider: LabeledData[float] = _make_provider(10, change_points=[3], name="bad") solver: OnlineCpdSolver = OnlineCpdSolver() with pytest.raises(ValueError): ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=[bad_provider], solver=solver, mode="noreset", @@ -102,11 +106,12 @@ def test_raises_if_any_provider_has_change_points_noreset_mode(self) -> None: def test_valid_init_with_empty_change_points(self) -> None: """Should succeed when all providers have empty change_points.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(10, name="clean") solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -116,11 +121,12 @@ def test_valid_init_with_empty_change_points(self) -> None: def test_metrics_contain_arl_metric(self) -> None: """Internal _metrics dict should contain 'arl' key with ARLMetric instance.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(10, name="data") solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -132,11 +138,12 @@ def test_metrics_contain_arl_metric(self) -> None: def test_accepts_both_modes(self, mode: Literal["reset", "noreset"]) -> None: """Constructor should accept both 'reset' and 'noreset' mode values.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(5, name="d") solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=[provider], solver=solver, mode=mode, @@ -153,6 +160,7 @@ class TestARLBenchmarkRunnerCollectRuns: def test_returns_correct_number_of_pairs_reset(self) -> None: """Should return one (trace, provider) pair per provider in reset mode.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) providers: list[LabeledData[float]] = [ _make_provider(10, name="d1"), _make_provider(10, name="d2"), @@ -161,19 +169,18 @@ def test_returns_correct_number_of_pairs_reset(self) -> None: solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=providers, solver=solver, mode="reset", ) - runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs( - algorithm, 1.0, providers - ) + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(entry, 1.0, providers) assert len(runs) == 3 def test_returns_correct_number_of_pairs_noreset(self) -> None: """Should return one (trace, provider) pair per provider in noreset mode.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) providers: list[LabeledData[float]] = [ _make_provider(10, name="d1"), _make_provider(10, name="d2"), @@ -181,19 +188,18 @@ def test_returns_correct_number_of_pairs_noreset(self) -> None: solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=providers, solver=solver, mode="noreset", ) - runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs( - algorithm, 1.0, providers - ) + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(entry, 1.0, providers) assert len(runs) == 2 def test_pairs_traces_with_correct_providers(self) -> None: """Each trace should be paired with its corresponding provider by name.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) providers: list[LabeledData[float]] = [ _make_provider(10, name="alpha"), _make_provider(15, name="beta"), @@ -201,29 +207,28 @@ def test_pairs_traces_with_correct_providers(self) -> None: solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=providers, solver=solver, mode="reset", ) - runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs( - algorithm, 1.0, providers - ) + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(entry, 1.0, providers) names: list[str] = [prov.name for _, prov in runs] assert names == ["alpha", "beta"] def test_empty_providers_returns_empty_list(self) -> None: """Empty providers sequence should return empty list.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=[_make_provider(10)], solver=solver, mode="reset", ) - runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(algorithm, 1.0, []) + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(entry, 1.0, []) assert runs == [] @@ -234,13 +239,14 @@ class TestARLBenchmarkRunnerRun: """Tests for run() output structure and ARL values.""" def test_run_returns_correct_key_structure(self) -> None: - """Result key should be (str(algorithm), algorithm.configuration).""" + """Result key should be (entry.full_name, algorithm.configuration).""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="KeyAlgo", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) provider: LabeledData[float] = _make_provider(10) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -252,17 +258,18 @@ def test_run_returns_correct_key_structure(self) -> None: assert len(results) == 1 key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) - assert key[0] == str(algorithm) + assert key[0] == entry.full_name assert key[1] == algorithm.configuration def test_run_arl_infinity_when_no_detections(self) -> None: """ARL should be inf when the detection function never exceeds the threshold.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="QuietAlgo", return_sequence=[0.5]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(20) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -277,11 +284,12 @@ def test_run_arl_infinity_when_no_detections(self) -> None: def test_run_arl_infinity_noreset_when_no_detections(self) -> None: """ARL should be inf in noreset mode when no threshold crossing occurs.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="QuietAlgo", return_sequence=[0.5]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(20) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="noreset", @@ -296,45 +304,31 @@ def test_run_arl_infinity_noreset_when_no_detections(self) -> None: def test_run_multiple_thresholds(self) -> None: """Each threshold should produce its own entry with 'arl' metric.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Multi", return_sequence=[0.0, 2.0, 5.0]) + thresholds: list[float] = [1.0, 3.0, 10.0] + entry = AlgorithmEntry(algorithm=algorithm, thresholds=thresholds) provider: LabeledData[float] = _make_provider(20) solver: OnlineCpdSolver = OnlineCpdSolver() - thresholds: list[float] = [1.0, 3.0, 10.0] runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, thresholds)], + entries=[entry], providers=[provider], solver=solver, mode="reset", ) results = runner.run() key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) - entries: list[tuple[float, dict[str, Any]]] = results[key] + entries_res: list[tuple[float, dict[str, Any]]] = results[key] - assert len(entries) == 3 - recorded: list[float] = [t for t, _ in entries] + assert len(entries_res) == 3 + recorded: list[float] = [t for t, _ in entries_res] assert recorded == thresholds - for _, m in entries: + for _, m in entries_res: assert "arl" in m def test_run_arl_aggregated_across_providers(self) -> None: - """ARL should aggregate run lengths from all providers. - - Algorithm [0.0, 5.0], threshold=3.0, reset mode. - After each detection the algorithm resets so the sequence - restarts: 0, 5, 0, 5, ... - - For each provider detections happen at steps where value=5.0. - Step 0 -> 0.0 (no), step 1 -> 5.0 (yes, reset). - After reset: step 2 -> 0.0 (no), step 3 -> 5.0 (yes, reset). Etc. - - p1 (4 obs): detections at steps 1, 3. - Run lengths from 0: [1, 2]. (0->1 = 1, 1->3 = 2) - p2 (6 obs): detections at steps 1, 3, 5. - Run lengths from 0: [1, 2, 2]. - - Flat run lengths: [1, 2, 1, 2, 2]. ARL = 8 / 5 = 1.6. - """ + """ARL should aggregate run lengths from all providers.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Agg", return_sequence=[0.0, 5.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) providers: list[LabeledData[float]] = [ _make_provider(4, name="p1"), _make_provider(6, name="p2"), @@ -342,7 +336,7 @@ def test_run_arl_aggregated_across_providers(self) -> None: solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=providers, solver=solver, mode="reset", @@ -363,30 +357,17 @@ class TestARLBenchmarkRunnerModeSemantics: """Tests verifying different ARL behavior between reset and noreset modes.""" def test_reset_vs_noreset_produce_different_arl(self) -> None: - """Reset and noreset modes should produce different ARL values. - - Algorithm return_sequence=[0.0, 5.0, 0.0, 0.0], threshold=3.0. - - Reset mode: - Step 0 -> 0.0 (no). Step 1 -> 5.0 (yes, reset). - After reset: Step 2 -> 0.0 (no). Step 3 -> 5.0 (yes, reset). Etc. - Detections at steps 1, 3, 5, 7, ..., 19. - Run lengths from 0: [1, 2, 2, 2, ...]. ARL < 2. - - NoReset mode: - Sequence cycles without reset: 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, ... - Detections only where value=5.0. - ARL > arl_reset. - """ + """Reset and noreset modes should produce different ARL values.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( name="ModeTest", return_sequence=[0.0, 5.0, 0.0, 0.0], ) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(20) solver: OnlineCpdSolver = OnlineCpdSolver() runner_reset: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -396,7 +377,7 @@ def test_reset_vs_noreset_produce_different_arl(self) -> None: arl_reset: float = res_reset[key_reset][0][1]["arl"] runner_noreset: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="noreset", @@ -410,32 +391,17 @@ def test_reset_vs_noreset_produce_different_arl(self) -> None: assert arl_reset < arl_noreset def test_reset_mode_exact_arl_with_immediate_signal(self) -> None: - """Verify exact ARL in reset mode. - - Algorithm return_sequence=[0.0, 5.0, 0.0, 0.0], threshold=3.0. - Reset mode: sequence restarts after every signal. - - 12 observations: - Step 0 -> 0.0 (no). Step 1 -> 5.0 (yes, reset). - Step 2 -> 0.0 (no). Step 3 -> 5.0 (yes, reset). - Step 4 -> 0.0 (no). Step 5 -> 5.0 (yes, reset). - Step 6 -> 0.0 (no). Step 7 -> 5.0 (yes, reset). - Step 8 -> 0.0 (no). Step 9 -> 5.0 (yes, reset). - Step 10 -> 0.0 (no). Step 11 -> 5.0 (yes, reset). - - Detections at steps 1, 3, 5, 7, 9, 11. - Run lengths from 0: [1, 2, 2, 2, 2, 2]. - ARL = 11 / 6. - """ + """Verify exact ARL in reset mode.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( name="Immediate", return_sequence=[0.0, 5.0, 0.0, 0.0], ) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(12) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -449,24 +415,17 @@ def test_reset_mode_exact_arl_with_immediate_signal(self) -> None: assert abs(arl_value - expected_arl) < 1e-10 def test_noreset_mode_exact_arl_with_periodic_signal(self) -> None: - """Verify exact ARL in noreset mode with periodic signal. - - Algorithm return_sequence=[5.0, 0.0, 0.0, 0.0], threshold=3.0. - NoReset mode: sequence cycles without restart. - 12 observations -> values: 5,0,0,0,5,0,0,0,5,0,0,0. - Detections at 1-based indices: 1, 5, 9 (where value=5.0). - Run lengths from 0: [1, 4, 4]. - ARL = 9 / 3 = 3.0. - """ + """Verify exact ARL in noreset mode with periodic signal.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( name="Periodic", return_sequence=[5.0, 0.0, 0.0, 0.0], ) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(12) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="noreset", @@ -480,56 +439,50 @@ def test_noreset_mode_exact_arl_with_periodic_signal(self) -> None: assert abs(arl_value - expected_arl) < 1e-10 def test_noreset_lower_threshold_shorter_arl(self) -> None: - """Lower threshold in noreset mode should detect more, producing shorter ARL. - - Algorithm [0.0, 1.0, 2.0, 3.0, 4.0, 5.0], 24 observations. - threshold=1.5: detections where value > 1.5 -> indices with 2,3,4,5. - threshold=4.5: detections where value > 4.5 -> indices with 5 only. - """ + """Lower threshold in noreset mode should detect more, producing shorter ARL.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( name="Gradual", return_sequence=[0.0, 1.0, 2.0, 3.0, 4.0, 5.0], ) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.5, 4.5]) provider: LabeledData[float] = _make_provider(24) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.5, 4.5])], + entries=[entry], providers=[provider], solver=solver, mode="noreset", ) results = runner.run() key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) - entries: list[tuple[float, dict[str, Any]]] = results[key] + entries_res: list[tuple[float, dict[str, Any]]] = results[key] - arl_low: float = entries[0][1]["arl"] - arl_high: float = entries[1][1]["arl"] + arl_low: float = entries_res[0][1]["arl"] + arl_high: float = entries_res[1][1]["arl"] assert math.isfinite(arl_low) assert math.isfinite(arl_high) assert arl_low < arl_high def test_noreset_same_arl_for_same_threshold_different_runs(self) -> None: - """In noreset mode, same algorithm+provider+threshold should give same ARL. - - This validates determinism and that the inf-trace is reused correctly. - """ + """In noreset mode, same algorithm+provider+threshold should give same ARL.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( name="Stable", return_sequence=[0.0, 0.0, 5.0], ) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(15) solver: OnlineCpdSolver = OnlineCpdSolver() runner1: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="noreset", ) runner2: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="noreset", @@ -555,11 +508,12 @@ class TestARLBenchmarkRunnerMaxRunlength: def test_forced_detections_produce_finite_arl(self) -> None: """Forced detections via max_runlength give finite ARL with unreachable threshold.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[100.0]) provider: LabeledData[float] = _make_provider(18) solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=5) runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [100.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -573,20 +527,14 @@ def test_forced_detections_produce_finite_arl(self) -> None: assert arl_value > 0 def test_exact_arl_with_max_runlength(self) -> None: - """Verify exact ARL with max_runlength=5 on 18 observations. - - max_runlength=5 forces detection when run_length > 5, i.e. at step 5 - (0-based, run_length becomes 6). - After reset: next forced at step 11, then step 17. - Detections at steps 5, 11, 17. - Run lengths from 0: [5, 6, 6]. ARL = 17 / 3. - """ + """Verify exact ARL with max_runlength=5 on 18 observations.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[100.0]) provider: LabeledData[float] = _make_provider(18) solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=5) runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [100.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -600,21 +548,14 @@ def test_exact_arl_with_max_runlength(self) -> None: assert abs(arl_value - expected_arl) < 1e-10 def test_signal_before_forced_prevents_forced(self) -> None: - """Signal detections happening before max_runlength prevent forced detections. - - Algorithm [0.0, 0.0, 5.0], threshold=3.0, max_runlength=10. - Signal every 3 steps (well before forced at 11). - After each signal, reset -> sequence restarts. - - 18 obs -> detections at steps 2, 5, 8, 11, 14, 17. - Run lengths: [2, 3, 3, 3, 3, 3]. ARL = 17 / 6. - """ + """Signal detections happening before max_runlength prevent forced detections.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Fast", return_sequence=[0.0, 0.0, 5.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(18) solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=10) runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -628,33 +569,21 @@ def test_signal_before_forced_prevents_forced(self) -> None: assert abs(arl_value - expected_arl) < 1e-10 def test_max_runlength_noreset_inf_trace_still_forces(self) -> None: - """In noreset mode, max_runlength affects the inf-trace run. - - Algorithm returns 0.0 always, max_runlength=4, threshold=100.0. - The inf-trace is computed with threshold=inf, but max_runlength - still forces detections every 5 steps (run_length > 4). - - Those forced detections appear in the inf-trace and should be - detected via point-based policy as the detection function will - show NaN/reset artifacts. But actually since the sequence always - returns 0.0 which is <= any threshold, noreset mode with - threshold=100.0 should detect nothing from the detection function. - However the forced detections in the inf-trace should still - produce finite ARL through detected_change_points in the trace. - """ + """In noreset mode, max_runlength affects the inf-trace run.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[100.0]) provider: LabeledData[float] = _make_provider(15) solver_forced: OnlineCpdSolver = OnlineCpdSolver(max_runlength=4) solver_no_forced: OnlineCpdSolver = OnlineCpdSolver() runner_forced: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [100.0])], + entries=[entry], providers=[provider], solver=solver_forced, mode="reset", ) runner_no_forced: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [100.0])], + entries=[entry], providers=[provider], solver=solver_no_forced, mode="reset", @@ -680,20 +609,14 @@ class TestARLBenchmarkRunnerResetBehavior: """Tests verifying that algorithm reset after each detection affects ARL.""" def test_reset_restarts_return_sequence(self) -> None: - """After reset, return_sequence restarts producing periodic detections. - - Algorithm [0.0, 5.0], threshold=3.0, reset mode, 8 observations. - Step 0: 0.0 (no). Step 1: 5.0 (yes, reset). - Step 2: 0.0 (no). Step 3: 5.0 (yes, reset). Etc. - Detections at steps 1, 3, 5, 7. - Run lengths: [1, 2, 2, 2]. ARL = 7 / 4 = 1.75. - """ + """After reset, return_sequence restarts producing periodic detections.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Reset", return_sequence=[0.0, 5.0]) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(8) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -707,33 +630,18 @@ def test_reset_restarts_return_sequence(self) -> None: assert abs(arl_value - expected_arl) < 1e-10 def test_reset_restarts_learning_period(self) -> None: - """Reset re-enters learning period, creating longer gaps between detections. - - Algorithm return_sequence=[5.0], learning_period_size=2, threshold=3.0. - With reset: after each detection, algorithm resets and needs 2 - observations for learning (returning 0.0), then next returns 5.0. - - 9 obs: - Step 0: learning (0.0). Step 1: learning (0.0). - Step 2: 5.0 (yes, reset). - Step 3: learning (0.0). Step 4: learning (0.0). - Step 5: 5.0 (yes, reset). - Step 6: learning (0.0). Step 7: learning (0.0). - Step 8: 5.0 (yes, reset). - - Detections at steps 2, 5, 8. - Run lengths: [2, 3, 3]. ARL = 8 / 3. - """ + """Reset re-enters learning period, creating longer gaps between detections.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( name="Learn", return_sequence=[5.0], learning_period_size=2, ) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[3.0]) provider: LabeledData[float] = _make_provider(9) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [3.0])], + entries=[entry], providers=[provider], solver=solver, mode="reset", @@ -747,31 +655,27 @@ def test_reset_restarts_learning_period(self) -> None: assert abs(arl_value - expected_arl) < 1e-10 def test_lower_threshold_produces_shorter_arl_reset(self) -> None: - """Lower threshold detects more often, resulting in shorter ARL in reset mode. - - Algorithm [0.0, 1.0, 2.0, 3.0, 4.0, 5.0], 30 observations. - threshold=1.5: signal when value > 1.5, detections sooner after reset. - threshold=4.5: signal when value > 4.5, detections later after reset. - """ + """Lower threshold detects more often, resulting in shorter ARL in reset mode.""" algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( name="Gradual", return_sequence=[0.0, 1.0, 2.0, 3.0, 4.0, 5.0], ) + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.5, 4.5]) provider: LabeledData[float] = _make_provider(30) solver: OnlineCpdSolver = OnlineCpdSolver() runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( - algorithms=[(algorithm, [1.5, 4.5])], + entries=[entry], providers=[provider], solver=solver, mode="reset", ) results = runner.run() key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) - entries: list[tuple[float, dict[str, Any]]] = results[key] + entries_res: list[tuple[float, dict[str, Any]]] = results[key] - arl_low: float = entries[0][1]["arl"] - arl_high: float = entries[1][1]["arl"] + arl_low: float = entries_res[0][1]["arl"] + arl_high: float = entries_res[1][1]["arl"] assert math.isfinite(arl_low) assert math.isfinite(arl_high) diff --git a/tests/unit/benchmark/test_online_benchmark_runner.py b/tests/unit/benchmark/test_online_benchmark_runner.py index 84fe318..64206f3 100644 --- a/tests/unit/benchmark/test_online_benchmark_runner.py +++ b/tests/unit/benchmark/test_online_benchmark_runner.py @@ -10,10 +10,12 @@ from collections.abc import Sequence from pathlib import Path +from typing import Any import pytest from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmConfiguration from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.typedefs import Number @@ -96,7 +98,7 @@ def single_run() -> list[tuple[MockOnlineDetectionTrace, MockLabeledData]]: def make_runner( - algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: Sequence[MockLabeledData], metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]], solver: OnlineCpdSolver, @@ -105,7 +107,7 @@ def make_runner( ) -> MockBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledData]: """Helper to construct MockBenchmarkRunner with given parameters.""" return MockBenchmarkRunner( - algorithms=algorithms, + entries=entries, providers=providers, metrics=metrics, # type: ignore[arg-type] solver=solver, @@ -130,13 +132,13 @@ def test_stores_algorithms_providers_metrics_solver( solver: OnlineCpdSolver, ) -> None: """All constructor parameters are stored as private attributes.""" - algorithms = [(single_algorithm, [1.0])] + entries = [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])] providers = [single_provider] metrics = {"m": mock_metric} - runner = make_runner(algorithms, providers, metrics, solver) + runner = make_runner(entries, providers, metrics, solver) - assert runner._algorithms == algorithms + assert runner._entries == entries assert runner._providers == providers assert runner._metrics == metrics assert runner._solver is solver @@ -150,7 +152,7 @@ def test_dump_dir_defaults_to_none( ) -> None: """dump_dir is None when not provided.""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -167,7 +169,7 @@ def test_dump_dir_as_string_is_converted_to_path( ) -> None: """dump_dir passed as str is stored as Path.""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -186,7 +188,7 @@ def test_dump_dir_as_path_is_stored_as_path( ) -> None: """dump_dir passed as Path is stored as Path.""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -209,7 +211,7 @@ def test_cannot_instantiate_directly( """OnlineBenchmarkRunner cannot be instantiated directly.""" with pytest.raises(TypeError): OnlineBenchmarkRunner( # type: ignore[abstract] - algorithms=[(single_algorithm, [1.0])], + entries=[AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], providers=[single_provider], metrics={"m": mock_metric}, solver=solver, @@ -229,7 +231,7 @@ class IncompleteRunner(OnlineBenchmarkRunner): # type: ignore[type-arg] with pytest.raises(TypeError): IncompleteRunner( # type: ignore[abstract] - algorithms=[(single_algorithm, [1.0])], + entries=[AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], providers=[single_provider], metrics={"m": mock_metric}, solver=solver, @@ -248,7 +250,7 @@ def test_run_returns_dict( ) -> None: """run() returns a dict.""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -264,8 +266,9 @@ def test_result_key_is_tuple_of_name_and_configuration( solver: OnlineCpdSolver, ) -> None: """Keys of result dict are (str, OnlineAlgorithmConfiguration) tuples.""" + entry = AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0]) runner = make_runner( - [(single_algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, @@ -275,6 +278,7 @@ def test_result_key_is_tuple_of_name_and_configuration( assert isinstance(key, tuple) assert len(key) == 2 assert isinstance(key[0], str) + assert key[0] == entry.full_name assert isinstance(key[1], OnlineAlgorithmConfiguration) def test_result_value_is_list_of_threshold_metric_tuples( @@ -286,7 +290,7 @@ def test_result_value_is_list_of_threshold_metric_tuples( ) -> None: """Values of result dict are list[tuple[float, dict[str, Any]]].""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -307,14 +311,15 @@ def test_one_entry_per_threshold_in_result( ) -> None: """Each threshold produces exactly one entry in the result list.""" thresholds = [0.5, 1.0, 1.5] + entry = AlgorithmEntry(algorithm=single_algorithm, thresholds=thresholds) runner = make_runner( - [(single_algorithm, thresholds)], + [entry], [single_provider], {"m": mock_metric}, solver, ) result = runner.run() - key = (str(single_algorithm), single_algorithm.configuration) + key = (entry.full_name, single_algorithm.configuration) assert len(result[key]) == len(thresholds) def test_metric_names_match_input_dict_keys( @@ -326,7 +331,7 @@ def test_metric_names_match_input_dict_keys( ) -> None: """Metric names in result match the keys from the metrics dict.""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], [single_provider], two_metrics, solver, @@ -350,7 +355,7 @@ def test_collect_runs_called_once_per_algorithm_threshold_pair( """_collect_runs is called exactly once per (algorithm, threshold) pair.""" thresholds = [0.5, 1.0, 1.5] runner = make_runner( - [(single_algorithm, thresholds)], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=thresholds)], [single_provider], {"m": mock_metric}, solver, @@ -368,7 +373,7 @@ def test_metric_evaluate_called_once_per_threshold( """metric.evaluate() is called once per (algorithm, threshold) pair.""" thresholds = [0.5, 1.0] runner = make_runner( - [(single_algorithm, thresholds)], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=thresholds)], [single_provider], {"m": mock_metric}, solver, @@ -384,8 +389,9 @@ def test_multiple_algorithms_produce_multiple_keys( solver: OnlineCpdSolver, ) -> None: """Two algorithms produce two distinct keys in result dict.""" + entries = [AlgorithmEntry(algorithm=algo, thresholds=[1.0]) for algo in two_algorithms] runner = make_runner( - [(algo, [1.0]) for algo in two_algorithms], + entries, [single_provider], {"m": mock_metric}, solver, @@ -402,14 +408,15 @@ def test_multiple_thresholds_produce_multiple_entries( ) -> None: """Two thresholds produce two entries in the result list for one algorithm.""" thresholds = [0.5, 1.5] + entry = AlgorithmEntry(algorithm=single_algorithm, thresholds=thresholds) runner = make_runner( - [(single_algorithm, thresholds)], + [entry], [single_provider], {"m": mock_metric}, solver, ) result = runner.run() - key = (str(single_algorithm), single_algorithm.configuration) + key = (entry.full_name, single_algorithm.configuration) assert len(result[key]) == 2 def test_multiple_metrics_all_appear_in_result( @@ -421,7 +428,7 @@ def test_multiple_metrics_all_appear_in_result( ) -> None: """All metrics from input dict appear in every result entry.""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], [single_provider], two_metrics, solver, @@ -442,7 +449,7 @@ def test_correct_threshold_passed_to_collect_runs( """_collect_runs receives exactly the threshold from the input list.""" thresholds = [0.5, 1.0, 2.0] runner = make_runner( - [(single_algorithm, thresholds)], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=thresholds)], [single_provider], {"m": mock_metric}, solver, @@ -460,7 +467,7 @@ def test_collect_runs_receives_all_providers( ) -> None: """_collect_runs receives the full list of providers.""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], two_providers, {"m": mock_metric}, solver, @@ -476,7 +483,7 @@ def test_empty_providers_produces_empty_batch( ) -> None: """Empty providers list results in metric being called with empty runs.""" runner = make_runner( - [(single_algorithm, [1.0])], + [AlgorithmEntry(algorithm=single_algorithm, thresholds=[1.0])], [], {"m": mock_metric}, solver, @@ -492,14 +499,15 @@ def test_empty_thresholds_produces_no_entries( solver: OnlineCpdSolver, ) -> None: """Empty thresholds list produces empty entries list for the algorithm.""" + entry = AlgorithmEntry(algorithm=single_algorithm, thresholds=[]) runner = make_runner( - [(single_algorithm, [])], + [entry], [single_provider], {"m": mock_metric}, solver, ) result = runner.run() - key = (str(single_algorithm), single_algorithm.configuration) + key = (entry.full_name, single_algorithm.configuration) assert result[key] == [] def test_result_preserves_threshold_order( @@ -511,13 +519,14 @@ def test_result_preserves_threshold_order( ) -> None: """Thresholds in result appear in the same order as in input list.""" thresholds = [2.0, 0.5, 1.0] + entry = AlgorithmEntry(algorithm=single_algorithm, thresholds=thresholds) runner = make_runner( - [(single_algorithm, thresholds)], + [entry], [single_provider], {"m": mock_metric}, solver, ) result = runner.run() - key = (str(single_algorithm), single_algorithm.configuration) + key = (entry.full_name, single_algorithm.configuration) result_thresholds = [t for t, _ in result[key]] assert result_thresholds == thresholds diff --git a/tests/unit/benchmark/test_reset_benchmark_runner.py b/tests/unit/benchmark/test_reset_benchmark_runner.py index 7cd6a61..e1fccd0 100644 --- a/tests/unit/benchmark/test_reset_benchmark_runner.py +++ b/tests/unit/benchmark/test_reset_benchmark_runner.py @@ -11,11 +11,13 @@ import csv from collections.abc import Sequence from pathlib import Path +from typing import Any import pytest from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner from pysatl_cpd.benchmark.reset_benchmark_runner import ResetBenchmarkRunner +from pysatl_cpd.core.algorithm_entry import AlgorithmEntry from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace from pysatl_cpd.core.typedefs import Number @@ -72,7 +74,7 @@ def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeled def make_reset_runner( - algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: Sequence[MockLabeledDataWithPadding], metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]], solver: OnlineCpdSolver, @@ -80,7 +82,7 @@ def make_reset_runner( ) -> ResetBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledDataWithPadding]: """Helper to construct ResetBenchmarkRunner with given parameters.""" return ResetBenchmarkRunner( - algorithms=algorithms, + entries=entries, providers=providers, metrics=metrics, # type: ignore[arg-type] solver=solver, @@ -105,7 +107,7 @@ def test_is_instance_of_online_benchmark_runner( ) -> None: """ResetBenchmarkRunner is an instance of OnlineBenchmarkRunner.""" runner = make_reset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -120,14 +122,15 @@ def test_collect_runs_is_implemented( solver: OnlineCpdSolver, ) -> None: """_collect_runs does not raise NotImplementedError.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_reset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, ) try: - runner._collect_runs(algorithm, 1.0, [single_provider]) + runner._collect_runs(entry, 1.0, [single_provider]) except NotImplementedError: pytest.fail("_collect_runs raised NotImplementedError") @@ -143,13 +146,14 @@ def test_returns_one_run_per_provider( solver: OnlineCpdSolver, ) -> None: """_collect_runs returns exactly len(providers) (trace, provider) pairs.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_reset_runner( - [(algorithm, [1.0])], + [entry], providers, {"m": mock_metric}, solver, ) - runs = runner._collect_runs(algorithm, 1.0, providers) + runs = runner._collect_runs(entry, 1.0, providers) assert len(runs) == len(providers) def test_empty_providers_returns_empty_list( @@ -159,13 +163,14 @@ def test_empty_providers_returns_empty_list( solver: OnlineCpdSolver, ) -> None: """_collect_runs with empty providers returns empty list.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_reset_runner( - [(algorithm, [1.0])], + [entry], [], {"m": mock_metric}, solver, ) - runs = runner._collect_runs(algorithm, 1.0, []) + runs = runner._collect_runs(entry, 1.0, []) assert runs == [] def test_single_provider_returns_single_run( @@ -176,13 +181,14 @@ def test_single_provider_returns_single_run( solver: OnlineCpdSolver, ) -> None: """_collect_runs with one provider returns exactly one pair.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_reset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, ) - runs = runner._collect_runs(algorithm, 1.0, [single_provider]) + runs = runner._collect_runs(entry, 1.0, [single_provider]) assert len(runs) == 1 def test_each_run_paired_with_correct_provider( @@ -193,13 +199,14 @@ def test_each_run_paired_with_correct_provider( solver: OnlineCpdSolver, ) -> None: """Each trace is paired with its corresponding provider.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_reset_runner( - [(algorithm, [1.0])], + [entry], providers, {"m": mock_metric}, solver, ) - runs = runner._collect_runs(algorithm, 1.0, providers) + runs = runner._collect_runs(entry, 1.0, providers) for (_, provider), expected_provider in zip(runs, providers, strict=False): assert provider is expected_provider @@ -211,13 +218,14 @@ def test_trace_is_online_detection_trace( solver: OnlineCpdSolver, ) -> None: """Each trace in collected runs is an OnlineDetectionTrace.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_reset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, ) - runs = runner._collect_runs(algorithm, 1.0, [single_provider]) + runs = runner._collect_runs(entry, 1.0, [single_provider]) for trace, _ in runs: assert isinstance(trace, OnlineDetectionTrace) @@ -228,17 +236,18 @@ def test_trace_algorithm_name_and_configuration_hash_match_algorithm( mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: - """algorithm_name and configuration_hash in trace match the algorithm.""" + """algorithm_name and configuration_hash in trace match the algorithm full name and hash.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner = make_reset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, ) - runs = runner._collect_runs(algorithm, 1.0, [single_provider]) + runs = runner._collect_runs(entry, 1.0, [single_provider]) trace, _ = runs[0] - assert trace.algorithm_name == str(algorithm) - assert trace.configuration_hash == hash(algorithm.configuration) + assert trace.algorithm_name == entry.full_name + assert trace.configuration_hash == entry.full_hash def test_detected_change_points_respect_threshold( self, @@ -248,14 +257,15 @@ def test_detected_change_points_respect_threshold( solver: OnlineCpdSolver, ) -> None: """High threshold produces no detections, low threshold produces detections.""" + entry = AlgorithmEntry(algorithm=algorithm_with_signal, thresholds=[float("inf"), 1.0]) runner = make_reset_runner( - [(algorithm_with_signal, [float("inf"), 1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, ) - runs_no_signal = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider]) - runs_with_signal = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider]) + runs_no_signal = runner._collect_runs(entry, float("inf"), [single_provider]) + runs_with_signal = runner._collect_runs(entry, 1.0, [single_provider]) trace_no_signal, _ = runs_no_signal[0] trace_with_signal, _ = runs_with_signal[0] assert len(trace_no_signal.detected_change_points) == 0 @@ -269,14 +279,15 @@ def test_different_thresholds_produce_different_detections( solver: OnlineCpdSolver, ) -> None: """Lower threshold produces more detections than higher threshold.""" + entry = AlgorithmEntry(algorithm=algorithm_with_signal, thresholds=[1.0, float("inf")]) runner = make_reset_runner( - [(algorithm_with_signal, [1.0, float("inf")])], + [entry], [single_provider], {"m": mock_metric}, solver, ) - runs_low = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider]) - runs_high = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider]) + runs_low = runner._collect_runs(entry, 1.0, [single_provider]) + runs_high = runner._collect_runs(entry, float("inf"), [single_provider]) trace_low, _ = runs_low[0] trace_high, _ = runs_high[0] assert len(trace_low.detected_change_points) > len(trace_high.detected_change_points) @@ -289,22 +300,21 @@ def test_algorithm_is_reset_between_providers( solver: OnlineCpdSolver, ) -> None: """Algorithm state is reset between providers by the solver.""" + entry = AlgorithmEntry(algorithm=algorithm_with_signal, thresholds=[1.0]) runner = make_reset_runner( - [(algorithm_with_signal, [1.0])], + [entry], providers, {"m": mock_metric}, solver, ) - runs = runner._collect_runs(algorithm_with_signal, 1.0, providers) - # Each provider run starts fresh - detection functions start from 0 + runs = runner._collect_runs(entry, 1.0, providers) for trace, _ in runs: assert isinstance(trace, OnlineDetectionTrace) - # detection_function should start from index 0 for each provider assert ( len(trace.detection_function) == len(list(providers[0].raw_data) if hasattr(providers[0], "raw_data") else []) or True - ) # solver resets - no cross-provider state leak + ) class TestResetBenchmarkRunnerCaching: @@ -320,7 +330,7 @@ def test_no_files_created_without_dump_dir( ) -> None: """Without dump_dir no files are created.""" runner = make_reset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -339,7 +349,7 @@ def test_results_cached_to_disk_when_dump_dir_provided( ) -> None: """With dump_dir a registry CSV file is created.""" runner = make_reset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -359,8 +369,9 @@ def test_registry_contains_correct_metadata( ) -> None: """Registry CSV contains correct algorithm, threshold, data entries.""" threshold: float = 1.0 + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[threshold]) runner = make_reset_runner( - [(algorithm, [threshold])], + [entry], [single_provider], {"m": mock_metric}, solver, @@ -371,7 +382,7 @@ def test_registry_contains_correct_metadata( with open(registry, encoding="utf-8") as f: rows = list(csv.DictReader(f)) assert len(rows) == 1 - assert rows[0]["algorithm"] == str(algorithm) + assert rows[0]["algorithm"] == entry.full_name assert float(rows[0]["threshold"]) == threshold assert rows[0]["data"] == single_provider.name @@ -384,25 +395,24 @@ def test_cached_results_reused_on_second_run( tmp_path: Path, ) -> None: """Second run() with same dump_dir reuses cached traces.""" + entry = AlgorithmEntry(algorithm=algorithm, thresholds=[1.0]) runner_first = make_reset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, dump_dir=tmp_path, ) runner_first.run() - tmp_path / "benchmark_registry.csv" runner_second = make_reset_runner( - [(algorithm, [1.0])], + [entry], [single_provider], {"m": mock_metric}, solver, dump_dir=tmp_path, ) runner_second.run() - # Registry is rewritten but pickle files should not be recreated pkl_files = list(tmp_path.glob("*.pkl")) assert len(pkl_files) == 1 @@ -419,7 +429,7 @@ def test_run_with_single_algorithm_single_threshold_single_provider( ) -> None: """Basic happy path - one algorithm, one threshold, one provider.""" runner = make_reset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [single_provider], {"m": mock_metric}, solver, @@ -442,7 +452,7 @@ def test_run_returns_correct_structure( """run() result has correct nested structure.""" thresholds = [0.5, 1.0] runner = make_reset_runner( - [(algorithm, thresholds)], + [AlgorithmEntry(algorithm=algorithm, thresholds=thresholds)], providers, {"m": mock_metric}, solver, @@ -465,7 +475,7 @@ def test_run_with_multiple_thresholds( """Multiple thresholds produce multiple entries in result.""" thresholds = [0.5, 1.0, 2.0] runner = make_reset_runner( - [(algorithm, thresholds)], + [AlgorithmEntry(algorithm=algorithm, thresholds=thresholds)], [single_provider], {"m": mock_metric}, solver, @@ -484,7 +494,7 @@ def test_run_with_empty_providers( ) -> None: """Empty providers list - metric is called with empty batch.""" runner = make_reset_runner( - [(algorithm, [1.0])], + [AlgorithmEntry(algorithm=algorithm, thresholds=[1.0])], [], {"m": mock_metric}, solver, From 5bb45896d567ab10f9dc656ce27983195a610e01 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Thu, 16 Apr 2026 01:42:44 +0300 Subject: [PATCH 3/8] feat: add ColumnsSelectorTransformer --- pysatl_cpd/benchmark/core/benchmark_logger.py | 2 +- pysatl_cpd/core/algorithm_entry.py | 5 +- .../columns_selector_transformer.py | 91 +++++++++++++++++++ .../data_transformers/idata_transformer.py | 14 ++- 4 files changed, 101 insertions(+), 11 deletions(-) create mode 100644 pysatl_cpd/core/data_transformers/columns_selector_transformer.py diff --git a/pysatl_cpd/benchmark/core/benchmark_logger.py b/pysatl_cpd/benchmark/core/benchmark_logger.py index 7c7e7f6..6b2cbeb 100644 --- a/pysatl_cpd/benchmark/core/benchmark_logger.py +++ b/pysatl_cpd/benchmark/core/benchmark_logger.py @@ -6,7 +6,7 @@ import logging from typing import Any -__author__ = "PySATL contributors" +__author__ = "Danil Totmyanin" __copyright__ = "Copyright (c) 2026 PySATL project" __license__ = "SPDX-License-Identifier: MIT" diff --git a/pysatl_cpd/core/algorithm_entry.py b/pysatl_cpd/core/algorithm_entry.py index 5fa5268..18221a5 100644 --- a/pysatl_cpd/core/algorithm_entry.py +++ b/pysatl_cpd/core/algorithm_entry.py @@ -3,12 +3,13 @@ Container for benchmark algorithm execution entries. """ -__author__ = "PySATL contributors" +__author__ = "Danil Totmyanin" __copyright__ = "Copyright (c) 2026 PySATL project" __license__ = "SPDX-License-Identifier: MIT" from collections.abc import Sequence from dataclasses import dataclass +from typing import Any from pysatl_cpd.core.data_transformers.idata_transformer import IDataTransformer from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm, OnlineAlgorithmConfiguration, OnlineAlgorithmState @@ -36,7 +37,7 @@ class AlgorithmEntry[DataT, ConfigT: OnlineAlgorithmConfiguration, StateT: Onlin algorithm: OnlineAlgorithm[DataT, ConfigT, StateT] thresholds: Sequence[float] - transformer: IDataTransformer | None = None + transformer: IDataTransformer[Any, Any] | None = None @property def full_name(self) -> str: diff --git a/pysatl_cpd/core/data_transformers/columns_selector_transformer.py b/pysatl_cpd/core/data_transformers/columns_selector_transformer.py new file mode 100644 index 0000000..b9433cf --- /dev/null +++ b/pysatl_cpd/core/data_transformers/columns_selector_transformer.py @@ -0,0 +1,91 @@ +# -*- coding: ascii -*- + +""" +Columns Selector Transformer Implementation. + +This module provides a transformer that allows selecting specific columns +from multivariate time series data. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +import numpy as np + +from pysatl_cpd.core.data_providers.idata_provider import DataProvider +from pysatl_cpd.core.data_providers.numpy_data_provider import ( + NDArrayMultivariateProvider, + NDArrayUnivariateProvider, +) +from pysatl_cpd.core.data_transformers.idata_transformer import IDataTransformer + + +class ColumnsSelectorTransformer(IDataTransformer[np.ndarray, np.ndarray | float]): + """ + Transformer for selecting specific columns from multivariate data. + + If a single integer index is provided, it transforms multivariate data + into univariate data. If a list of indices is provided, it returns + multivariate data containing only the specified columns. + + Parameters + ---------- + columns : list[int] or int + Indices of columns to select from the input multivariate array. + """ + + def __init__(self, columns: list[int] | int) -> None: + self.cols = columns + + @property + def name(self) -> str: + """ + Return a unique name including selected column indices. + + Returns + ------- + str + Formatted name like 'Col_0' or 'Cols_0_2_3'. + """ + if isinstance(self.cols, int): + return f"Col_{self.cols}" + cols_str = "_".join(map(str, self.cols)) + return f"Cols_{cols_str}" + + def transform(self, provider: DataProvider[np.ndarray]) -> DataProvider[np.ndarray | float]: + """ + Extract selected columns and wrap into a new NumPy data provider. + + Parameters + ---------- + provider : DataProvider[np.ndarray] + Multivariate data provider yielding 1-D NumPy arrays. + + Returns + ------- + DataProvider[Any] + NDArrayUnivariateProvider if `columns` is int, + NDArrayMultivariateProvider if `columns` is list[int]. + + Raises + ------ + ValueError + If the data provided by the source is not 2-dimensional. + """ + raw_nd_data = np.array(list(provider)) + + if raw_nd_data.ndim < 2: + raise ValueError( + f"ColumnsSelectorTransformer expects 2D data, " + f"got {raw_nd_data.ndim}D data from provider '{provider.name}'." + ) + + cols_data = raw_nd_data[:, self.cols] + + new_provider_name = f"{provider.name}_{self.name}" + + if isinstance(self.cols, int): + return NDArrayUnivariateProvider(data=cols_data, name=new_provider_name) + + return NDArrayMultivariateProvider(data=cols_data, name=new_provider_name) diff --git a/pysatl_cpd/core/data_transformers/idata_transformer.py b/pysatl_cpd/core/data_transformers/idata_transformer.py index 344ed1d..f4eb90c 100644 --- a/pysatl_cpd/core/data_transformers/idata_transformer.py +++ b/pysatl_cpd/core/data_transformers/idata_transformer.py @@ -7,17 +7,16 @@ feeding data into change-point detection algorithms. """ -__author__ = "PySATL contributors" +__author__ = "Danil Totmyanin" __copyright__ = "Copyright (c) 2026 PySATL project" __license__ = "SPDX-License-Identifier: MIT" from abc import ABC, abstractmethod -from typing import Any from pysatl_cpd.core.data_providers.idata_provider import DataProvider -class IDataTransformer(ABC): +class IDataTransformer[DataInT, DataOutT](ABC): """ Abstract base class for data transformers. @@ -27,24 +26,23 @@ class IDataTransformer(ABC): """ @abstractmethod - def transform(self, provider: DataProvider[Any]) -> DataProvider[Any]: + def transform(self, provider: DataProvider[DataInT]) -> DataProvider[DataOutT]: """ Apply transformation to the given data provider. Parameters ---------- - provider : DataProvider[Any] + provider : DataProvider[DataInT] The source data provider. Returns ------- - DataProvider[Any] + DataProvider[DataOut] A new data provider yielding transformed observations. """ raise NotImplementedError @property - @abstractmethod def name(self) -> str: """ Return the human-readable name of the transformer. @@ -54,7 +52,7 @@ def name(self) -> str: str Transformer identifier used for logging and caching. """ - raise NotImplementedError + return type(self).__name__ def __hash__(self) -> int: """ From cb51df2c486cf85a717f7c313fc03d2b2f40d196 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Thu, 16 Apr 2026 16:34:52 +0300 Subject: [PATCH 4/8] refactor: file names in mocks --- .../analysis/metrics/{mock_run_metric.py => run_metric.py} | 0 .../{mock_benchmark_runner.py => benchmark_runner.py} | 0 .../{mock_aggregation_metric.py => aggregation_metric.py} | 2 +- tests/unit/benchmark/core/test_benchmark_analyzer.py | 4 ++-- tests/unit/benchmark/metrics/test_aggregation_metric.py | 4 ++-- .../benchmark/noreset/test_noreset_benchmark_runner.py | 7 ++++--- tests/unit/benchmark/test_online_benchmark_runner.py | 6 +++--- tests/unit/benchmark/test_reset_benchmark_runner.py | 4 ++-- 8 files changed, 14 insertions(+), 13 deletions(-) rename tests/mocks/analysis/metrics/{mock_run_metric.py => run_metric.py} (100%) rename tests/mocks/benchmark/{mock_benchmark_runner.py => benchmark_runner.py} (100%) rename tests/mocks/benchmark/metrics/{mock_aggregation_metric.py => aggregation_metric.py} (92%) diff --git a/tests/mocks/analysis/metrics/mock_run_metric.py b/tests/mocks/analysis/metrics/run_metric.py similarity index 100% rename from tests/mocks/analysis/metrics/mock_run_metric.py rename to tests/mocks/analysis/metrics/run_metric.py diff --git a/tests/mocks/benchmark/mock_benchmark_runner.py b/tests/mocks/benchmark/benchmark_runner.py similarity index 100% rename from tests/mocks/benchmark/mock_benchmark_runner.py rename to tests/mocks/benchmark/benchmark_runner.py diff --git a/tests/mocks/benchmark/metrics/mock_aggregation_metric.py b/tests/mocks/benchmark/metrics/aggregation_metric.py similarity index 92% rename from tests/mocks/benchmark/metrics/mock_aggregation_metric.py rename to tests/mocks/benchmark/metrics/aggregation_metric.py index e6e6eb3..cb2b814 100644 --- a/tests/mocks/benchmark/metrics/mock_aggregation_metric.py +++ b/tests/mocks/benchmark/metrics/aggregation_metric.py @@ -4,7 +4,7 @@ from pysatl_cpd.analysis.labeled_data import LabeledData from pysatl_cpd.benchmark.metrics.aggregation_metric import AggregationMetric from pysatl_cpd.core.detection_trace import DetectionTrace -from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric +from tests.mocks.analysis.metrics.run_metric import MockRunMetric class MockAggregationMetric[TraceT: DetectionTrace, ProviderT: LabeledData[Any]]( diff --git a/tests/unit/benchmark/core/test_benchmark_analyzer.py b/tests/unit/benchmark/core/test_benchmark_analyzer.py index 175fc81..077f816 100644 --- a/tests/unit/benchmark/core/test_benchmark_analyzer.py +++ b/tests/unit/benchmark/core/test_benchmark_analyzer.py @@ -17,8 +17,8 @@ from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace from tests.mocks.analysis.labeled_data import MockLabeledData -from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric -from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric +from tests.mocks.analysis.metrics.run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.aggregation_metric import MockAggregationMetric from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace diff --git a/tests/unit/benchmark/metrics/test_aggregation_metric.py b/tests/unit/benchmark/metrics/test_aggregation_metric.py index 43252df..9100f82 100644 --- a/tests/unit/benchmark/metrics/test_aggregation_metric.py +++ b/tests/unit/benchmark/metrics/test_aggregation_metric.py @@ -7,8 +7,8 @@ from collections.abc import Sequence from tests.mocks.analysis.labeled_data import MockLabeledData -from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric -from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric +from tests.mocks.analysis.metrics.run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.aggregation_metric import MockAggregationMetric from tests.mocks.core.detection_trace import MockDetectionTrace diff --git a/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py b/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py index 1a98cbc..5694fa1 100644 --- a/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py +++ b/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py @@ -10,6 +10,7 @@ from collections.abc import Sequence from pathlib import Path +from typing import Any import pytest @@ -22,8 +23,8 @@ from pysatl_cpd.core.typedefs import Number from tests.mocks.algorithms.online import MockOnlineAlgorithm from tests.mocks.analysis.labeled_data import MockLabeledDataWithPadding -from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric -from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric +from tests.mocks.analysis.metrics.run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.aggregation_metric import MockAggregationMetric from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace # --------------------------------------------------------------------------- @@ -85,7 +86,7 @@ def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeled def make_noreset_runner( - entries: Sequence[AlgorithmEntry], + entries: Sequence[AlgorithmEntry[Any, Any, Any]], providers: Sequence[MockLabeledDataWithPadding], metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]], solver: OnlineCpdSolver, diff --git a/tests/unit/benchmark/test_online_benchmark_runner.py b/tests/unit/benchmark/test_online_benchmark_runner.py index 64206f3..893c7a0 100644 --- a/tests/unit/benchmark/test_online_benchmark_runner.py +++ b/tests/unit/benchmark/test_online_benchmark_runner.py @@ -21,9 +21,9 @@ from pysatl_cpd.core.typedefs import Number from tests.mocks.algorithms.online import MockOnlineAlgorithm from tests.mocks.analysis.labeled_data import MockLabeledData -from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric -from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric -from tests.mocks.benchmark.mock_benchmark_runner import MockBenchmarkRunner +from tests.mocks.analysis.metrics.run_metric import MockRunMetric +from tests.mocks.benchmark.benchmark_runner import MockBenchmarkRunner +from tests.mocks.benchmark.metrics.aggregation_metric import MockAggregationMetric from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace # --------------------------------------------------------------------------- diff --git a/tests/unit/benchmark/test_reset_benchmark_runner.py b/tests/unit/benchmark/test_reset_benchmark_runner.py index e1fccd0..0b67c61 100644 --- a/tests/unit/benchmark/test_reset_benchmark_runner.py +++ b/tests/unit/benchmark/test_reset_benchmark_runner.py @@ -23,8 +23,8 @@ from pysatl_cpd.core.typedefs import Number from tests.mocks.algorithms.online import MockOnlineAlgorithm from tests.mocks.analysis.labeled_data import MockLabeledDataWithPadding -from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric -from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric +from tests.mocks.analysis.metrics.run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.aggregation_metric import MockAggregationMetric from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace # --------------------------------------------------------------------------- From a8ee9f792a7f79de6ab3db08e3b114530680b83d Mon Sep 17 00:00:00 2001 From: iraedeus Date: Thu, 16 Apr 2026 16:44:32 +0300 Subject: [PATCH 5/8] tests: data transformers logic in BenchmarkExecutor --- .../data_transformers/data_transformer.py | 89 ++++++++++++ .../benchmark/core/test_benchmark_executor.py | 137 ++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 tests/mocks/core/data_transformers/data_transformer.py diff --git a/tests/mocks/core/data_transformers/data_transformer.py b/tests/mocks/core/data_transformers/data_transformer.py new file mode 100644 index 0000000..d63e8ba --- /dev/null +++ b/tests/mocks/core/data_transformers/data_transformer.py @@ -0,0 +1,89 @@ +# -*- coding: ascii -*- + +""" +Mock data transformer implementations for testing. + +This module provides mock implementations of IDataTransformer used for testing +the transformation pipeline in benchmark execution and algorithm evaluation. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from typing import Any + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.core.data_providers.idata_provider import DataProvider +from pysatl_cpd.core.data_transformers.idata_transformer import IDataTransformer + + +class MockDataTransformer(IDataTransformer[float, float]): + """ + Mock data transformer for testing benchmark execution. + + This transformer adds a specified constant value to every observation + in the dataset and keeps track of how many times the `transform` method + was applied. It wraps the transformed data back into a `LabeledData` instance. + + Parameters + ---------- + name : str, default="MockTransform" + The string identifier for the transformer. + add_value : float, default=1.0 + The numeric value to add to each observation. + """ + + def __init__(self, name: str = "MockTransform", add_value: float = 1.0) -> None: + self._name = name + self.add_value = add_value + self.call_count = 0 + + @property + def name(self) -> str: + """ + Return the name of the mock transformer. + + Returns + ------- + str + The identifier of this transformer instance. + """ + return self._name + + def __hash__(self) -> int: + """ + Return a hash based on the transformer's properties. + + Used to uniquely identify the pipeline configuration in the cache. + + Returns + ------- + int + Hash value representing the transformer configuration. + """ + return hash((self._name, self.add_value)) + + def transform(self, provider: DataProvider[float]) -> DataProvider[float]: + """ + Transform the data by adding a constant value to each element. + + Parameters + ---------- + provider : DataProvider[float] + The original data provider. + + Returns + ------- + DataProvider[float] + A new `LabeledData` instance containing the transformed values. + """ + self.call_count += 1 + + # Transform data + new_data: list[float] = [float(x) + self.add_value for x in provider] + + # Preserve change points if the provider has them + change_points: Any = getattr(provider, "change_points", getattr(provider, "change_point", [])) + + return LabeledData(raw_data=new_data, change_points=change_points, name=f"{provider.name}_{self.name}") diff --git a/tests/unit/benchmark/core/test_benchmark_executor.py b/tests/unit/benchmark/core/test_benchmark_executor.py index d5e380f..e141eac 100644 --- a/tests/unit/benchmark/core/test_benchmark_executor.py +++ b/tests/unit/benchmark/core/test_benchmark_executor.py @@ -25,6 +25,7 @@ from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace from tests.mocks.algorithms.online.simple import MockOnlineAlgorithm +from tests.mocks.core.data_transformers.data_transformer import MockDataTransformer def _make_provider( @@ -519,3 +520,139 @@ def test_multiple_thresholds_create_separate_pickle_files(self, tmp_path: Path) with open(registry_path, encoding="utf-8") as f: rows: list[dict[str, str]] = list(csv.DictReader(f)) assert len(rows) == 3 + + +# --------------------------------------------------------------------------- +# 6. Data Transformers +# --------------------------------------------------------------------------- +class TestBenchmarkExecutorTransformers: + """Tests for the DataTransformer integration in BenchmarkExecutor.""" + + def test_transformer_modifies_data_passed_to_algorithm(self) -> None: + """Executor should pass transformed data, not raw data, to the solver.""" + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + transformer = MockDataTransformer(name="T1", add_value=5.0) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0], transformer=transformer) + + # Original provider with zeros + provider: LabeledData[float] = LabeledData(raw_data=[0.0, 0.0, 0.0], change_points=[], name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + entries=[entry], + providers=[provider], + solver=solver, + ) + executor.execute() + + # The algorithm should have received [5.0, 5.0, 5.0] + history: list[float] = algo.get_call_history() + assert history == [5.0, 5.0, 5.0] + + def test_record_metadata_uses_transformer_name_and_hash(self) -> None: + """Benchmark record should inherit the full name and hash from the Entry.""" + algo = MockOnlineAlgorithm[float](name="BaseAlgo", return_sequence=[0.0]) + transformer = MockDataTransformer(name="MyTF", add_value=1.0) + entry = AlgorithmEntry(algorithm=algo, thresholds=[1.0], transformer=transformer) + + provider: LabeledData[float] = _make_provider(3, name="d1") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + entries=[entry], + providers=[provider], + solver=solver, + ) + results = executor.execute() + record: BenchmarkRecord = results[0][0] + + # Name should be combined + assert record.algorithm == "BaseAlgo_MyTF" + assert record.algorithm == entry.full_name + + # Hash should match the entry's composite hash + assert record.configuration_hash == entry.full_hash + + def test_caching_separates_different_transformers(self, tmp_path: Path) -> None: + """Using the same algorithm but different transformers should create separate cache records.""" + algo = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + + entry_clean = AlgorithmEntry(algorithm=algo, thresholds=[1.0], transformer=None) + entry_transformed = AlgorithmEntry( + algorithm=algo, thresholds=[1.0], transformer=MockDataTransformer(name="T1", add_value=2.0) + ) + + provider: LabeledData[float] = _make_provider(3, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + entries=[entry_clean, entry_transformed], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + executor.execute() + + # Should produce two distinct pickle files + pkl_files: list[Path] = list(tmp_path.glob("*.pkl")) + assert len(pkl_files) == 2 + + # Names of the files should reflect the different algorithm representations + file_names: str = " ".join(f.name for f in pkl_files) + assert "A_" in file_names + assert "A_T1_" in file_names + + def test_transformer_is_called_even_on_cache_hit(self, tmp_path: Path) -> None: + """Transformer should be applied before checking cache, incrementing its call count.""" + transformer = MockDataTransformer(name="T1", add_value=1.0) + provider: LabeledData[float] = _make_provider(3, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + # First run to populate cache + algo1 = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry1 = AlgorithmEntry(algorithm=algo1, thresholds=[1.0], transformer=transformer) + exec1: BenchmarkExecutor[float] = BenchmarkExecutor([entry1], [provider], solver, tmp_path) + exec1.execute() + + assert transformer.call_count == 1 + assert len(algo1.get_call_history()) == 3 + + # Second run with cache hit (using a fresh algorithm instance to verify it doesn't run) + algo2 = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + entry2 = AlgorithmEntry(algorithm=algo2, thresholds=[1.0], transformer=transformer) + exec2: BenchmarkExecutor[float] = BenchmarkExecutor([entry2], [provider], solver, tmp_path) + exec2.execute() + + # Transformer is still called during iteration + assert transformer.call_count == 2 + + # But the solver/algorithm was skipped due to cache hit + assert len(algo2.get_call_history()) == 0 + + def test_multiple_entries_mixed_transformers(self) -> None: + """Executor should properly route data when processing mixed transformer configurations.""" + algo1 = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + algo2 = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + algo3 = MockOnlineAlgorithm[float](name="A", return_sequence=[0.0]) + + entry_none = AlgorithmEntry(algorithm=algo1, thresholds=[1.0]) + entry_t1 = AlgorithmEntry(algorithm=algo2, thresholds=[1.0], transformer=MockDataTransformer("T1", 10.0)) + entry_t2 = AlgorithmEntry(algorithm=algo3, thresholds=[1.0], transformer=MockDataTransformer("T2", 20.0)) + + # Provider yields [1.0, 1.0] + provider: LabeledData[float] = _make_provider(2, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + entries=[entry_none, entry_t1, entry_t2], + providers=[provider], + solver=solver, + ) + results = executor.execute() + + assert len(results) == 3 + + # Verify specific algorithm histories to ensure they received correct streams + assert algo1.get_call_history() == [1.0, 1.0] # No transformation + assert algo2.get_call_history() == [11.0, 11.0] # 1.0 + 10.0 + assert algo3.get_call_history() == [21.0, 21.0] # 1.0 + 20.0 From fec0b86d4d0aae349f7ba92fdee8f7c9890882cd Mon Sep 17 00:00:00 2001 From: iraedeus Date: Thu, 16 Apr 2026 16:58:36 +0300 Subject: [PATCH 6/8] tests: data transformers --- tests/mocks/core/data_transformers/simple.py | 42 +++++++ .../test_columns_selector_transformer.py | 112 ++++++++++++++++++ .../test_idata_transformer.py | 28 +++++ 3 files changed, 182 insertions(+) create mode 100644 tests/mocks/core/data_transformers/simple.py create mode 100644 tests/unit/core/data_transformers/test_columns_selector_transformer.py create mode 100644 tests/unit/core/data_transformers/test_idata_transformer.py diff --git a/tests/mocks/core/data_transformers/simple.py b/tests/mocks/core/data_transformers/simple.py new file mode 100644 index 0000000..20f8123 --- /dev/null +++ b/tests/mocks/core/data_transformers/simple.py @@ -0,0 +1,42 @@ +# -*- coding: ascii -*- + +""" +Dummy data transformer implementation for testing. + +This module provides a minimal concrete implementation of IDataTransformer +used strictly to test the default behaviors of the abstract base class. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from typing import Any + +from pysatl_cpd.core.data_providers.idata_provider import DataProvider +from pysatl_cpd.core.data_transformers.idata_transformer import IDataTransformer + + +class DummyTransformer(IDataTransformer[Any, Any]): + """ + Minimal concrete implementation of IDataTransformer. + + Used for testing the default behaviors of the abstract base class, + such as the default `name` and `__hash__` properties. + """ + + def transform(self, provider: DataProvider[Any]) -> DataProvider[Any]: + """ + Dummy implementation that just returns the input provider. + + Parameters + ---------- + provider : DataProvider[Any] + The source data provider. + + Returns + ------- + DataProvider[Any] + The unmodified input provider. + """ + return provider diff --git a/tests/unit/core/data_transformers/test_columns_selector_transformer.py b/tests/unit/core/data_transformers/test_columns_selector_transformer.py new file mode 100644 index 0000000..d3c8392 --- /dev/null +++ b/tests/unit/core/data_transformers/test_columns_selector_transformer.py @@ -0,0 +1,112 @@ +# -*- coding: ascii -*- + +""" +Tests for Data Transformers. + +Covers IDataTransformer base class properties and ColumnsSelectorTransformer logic. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +import numpy as np +import pytest + +from pysatl_cpd.core.data_providers.numpy_data_provider import ( + NDArrayMultivariateProvider, + NDArrayUnivariateProvider, +) +from pysatl_cpd.core.data_transformers.columns_selector_transformer import ( + ColumnsSelectorTransformer, +) + + +class TestColumnsSelectorTransformer: + """Tests for ColumnsSelectorTransformer logic and naming.""" + + def test_name_single_column(self) -> None: + """Transformer name should be formatted as 'Col_X' for a single int.""" + transformer = ColumnsSelectorTransformer(columns=2) + assert transformer.name == "Col_2" + + def test_name_multiple_columns(self) -> None: + """Transformer name should be formatted as 'Cols_X_Y' for a list of ints.""" + transformer = ColumnsSelectorTransformer(columns=[0, 2, 3]) + assert transformer.name == "Cols_0_2_3" + + def test_transform_int_to_univariate(self) -> None: + """Selecting a single int column should yield a Univariate provider.""" + data: np.ndarray = np.array( + [ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + [7.0, 8.0, 9.0], + ] + ) + provider = NDArrayMultivariateProvider(data=data, name="test_data") + transformer = ColumnsSelectorTransformer(columns=1) + + result_provider = transformer.transform(provider) + + # Check type and name + assert isinstance(result_provider, NDArrayUnivariateProvider) + assert result_provider.name == "test_data_Col_1" + + # Check extracted data (column index 1 -> [2.0, 5.0, 8.0]) + result_data: list[float] = list(result_provider) + np.testing.assert_array_equal(result_data, [2.0, 5.0, 8.0]) + + def test_transform_list_to_multivariate(self) -> None: + """Selecting a list of columns should yield a Multivariate provider.""" + data: np.ndarray = np.array( + [ + [1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + ] + ) + provider = NDArrayMultivariateProvider(data=data, name="multidataset") + transformer = ColumnsSelectorTransformer(columns=[0, 3]) + + result_provider = transformer.transform(provider) + + # Check type and name + assert isinstance(result_provider, NDArrayMultivariateProvider) + assert result_provider.name == "multidataset_Cols_0_3" + + # Check extracted data (columns 0 and 3) + result_data: list[np.ndarray] = list(result_provider) + expected_data: list[np.ndarray] = [ + np.array([1.0, 4.0]), + np.array([5.0, 8.0]), + ] + + assert len(result_data) == 2 + np.testing.assert_array_equal(result_data[0], expected_data[0]) + np.testing.assert_array_equal(result_data[1], expected_data[1]) + + def test_transform_raises_value_error_on_1d_data(self) -> None: + """Attempting to select columns from 1D data should raise ValueError.""" + data: np.ndarray = np.array([1.0, 2.0, 3.0]) + provider = NDArrayUnivariateProvider(data=data, name="1d_data") + transformer = ColumnsSelectorTransformer(columns=0) + + expected_msg = "ColumnsSelectorTransformer expects 2D data, got 1D data from provider '1d_data'." + with pytest.raises(ValueError, match=expected_msg): + transformer.transform(provider) # type: ignore[arg-type] + + def test_transform_raises_index_error_on_out_of_bounds(self) -> None: + """Passing an out-of-bounds column index should propagate an IndexError from NumPy.""" + data: np.ndarray = np.array( + [ + [1.0, 2.0], + [3.0, 4.0], + ] + ) + provider = NDArrayMultivariateProvider(data=data, name="data") + + # Array only has columns 0 and 1, index 5 is out of bounds + transformer = ColumnsSelectorTransformer(columns=5) + + with pytest.raises(IndexError): + transformer.transform(provider) diff --git a/tests/unit/core/data_transformers/test_idata_transformer.py b/tests/unit/core/data_transformers/test_idata_transformer.py new file mode 100644 index 0000000..0b434c9 --- /dev/null +++ b/tests/unit/core/data_transformers/test_idata_transformer.py @@ -0,0 +1,28 @@ +# -*- coding: ascii -*- + +""" +Tests for Data Transformers. + +Covers IDataTransformer base class properties and ColumnsSelectorTransformer logic. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + + +from tests.mocks.core.data_transformers.simple import DummyTransformer + + +class TestIDataTransformer: + """Tests for the abstract IDataTransformer base class default behaviors.""" + + def test_default_name_is_class_name(self) -> None: + """The default name property should return the class name.""" + transformer = DummyTransformer() + assert transformer.name == "DummyTransformer" + + def test_default_hash_is_hash_of_name(self) -> None: + """The default hash should be equal to the hash of the transformer's name.""" + transformer = DummyTransformer() + assert hash(transformer) == hash("DummyTransformer") From 9687d3680ee03b33d0b6d27f6f097e2447abe1f6 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Thu, 16 Apr 2026 17:41:19 +0300 Subject: [PATCH 7/8] tests: SegmentAggregationMetric --- .../core/data_providers/pandas_provider.py | 38 ++++ .../core/online/online_detection_trace.py | 11 ++ .../test_segment_aggregation_metric.py | 168 ++++++++++++++++++ 3 files changed, 217 insertions(+) create mode 100644 tests/mocks/core/data_providers/pandas_provider.py create mode 100644 tests/unit/benchmark/metrics/test_segment_aggregation_metric.py diff --git a/tests/mocks/core/data_providers/pandas_provider.py b/tests/mocks/core/data_providers/pandas_provider.py new file mode 100644 index 0000000..9e55e9d --- /dev/null +++ b/tests/mocks/core/data_providers/pandas_provider.py @@ -0,0 +1,38 @@ +# -*- coding: ascii -*- + +""" +Mock pandas data provider for testing segment logic. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + + +from pysatl_cpd.core.data_providers.dataset import PandasLabeledDataProvider, SegmentFilter + + +class MockPandasLabeledDataProvider(PandasLabeledDataProvider): + """ + Mock implementation of PandasLabeledDataProvider for testing segment slicing. + + Bypasses pandas DataFrame initialization entirely and returns pre-configured + bisegments and indices when queried. + """ + + def __init__(self, name: str = "MockPandasProvider") -> None: + self._name = name + self.mock_bisegments: list[PandasLabeledDataProvider] = [] + self.mock_indexes: list[tuple[int, int, int]] = [] + + @property + def name(self) -> str: + return self._name + + def query_bisegments(self, filter_fn: SegmentFilter | None = None) -> list[PandasLabeledDataProvider]: + """Return pre-configured bisegments.""" + return self.mock_bisegments + + def query_bisegments_indexes(self, filter_fn: SegmentFilter | None = None) -> list[tuple[int, int, int]]: + """Return pre-configured bisegment indices.""" + return self.mock_indexes diff --git a/tests/mocks/core/online/online_detection_trace.py b/tests/mocks/core/online/online_detection_trace.py index 901d8b3..eb20af0 100644 --- a/tests/mocks/core/online/online_detection_trace.py +++ b/tests/mocks/core/online/online_detection_trace.py @@ -42,3 +42,14 @@ def __init__(self, detected_change_points: Sequence[int]): detection_function=np.array([]), algorithm_states=[], ) + + def slice(self, start: int, end: int) -> "MockOnlineDetectionTrace": + """ + Mock implementation of slice. + + Returns a new MockOnlineDetectionTrace containing only the change points + that fall within [start, end], shifted relative to `start`. + """ + + shifted_cps: list[int] = [cp - start for cp in self.detected_change_points if start <= cp <= end] + return MockOnlineDetectionTrace(detected_change_points=shifted_cps) diff --git a/tests/unit/benchmark/metrics/test_segment_aggregation_metric.py b/tests/unit/benchmark/metrics/test_segment_aggregation_metric.py new file mode 100644 index 0000000..dd5e076 --- /dev/null +++ b/tests/unit/benchmark/metrics/test_segment_aggregation_metric.py @@ -0,0 +1,168 @@ +# -*- coding: ascii -*- + +""" +Unit tests for SegmentAggregationMetric. + +Verifies that the metric correctly slices traces and providers according to +transition filters, groups them by transition name, and delegates evaluation +to the base aggregation metric. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from collections.abc import Sequence + +from pysatl_cpd.benchmark.metrics.segment_aggregation_metric import SegmentAggregationMetric +from pysatl_cpd.core.data_providers.dataset import PandasLabeledDataProvider, SegmentFilter, SegmentInfo +from tests.mocks.analysis.metrics.run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.aggregation_metric import MockAggregationMetric +from tests.mocks.core.data_providers.pandas_provider import MockPandasLabeledDataProvider +from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace + + +def dummy_filter(pair: tuple[SegmentInfo, SegmentInfo]) -> bool: + """A dummy segment filter for testing.""" + return True + + +class TestSegmentAggregationMetricInit: + """Tests for SegmentAggregationMetric initialization.""" + + def test_initialization_stores_properties(self) -> None: + """Metric should store the base metric and transition filters.""" + base_run_metric: MockRunMetric[MockOnlineDetectionTrace, PandasLabeledDataProvider] = MockRunMetric([1.0]) + base_agg_metric: MockAggregationMetric[MockOnlineDetectionTrace, PandasLabeledDataProvider] = ( + MockAggregationMetric(base_run_metric) + ) + filters: dict[str, SegmentFilter] = {"A->B": dummy_filter} + + metric: SegmentAggregationMetric[MockOnlineDetectionTrace, float, float] = SegmentAggregationMetric( + base_agg_metric=base_agg_metric, + transition_filters=filters, + ) + + assert metric.base_agg_metric is base_agg_metric + assert metric._transition_filters == filters + + +class TestSegmentAggregationMetricEvaluate: + """Tests for the evaluate() method of SegmentAggregationMetric.""" + + def test_evaluate_empty_runs(self) -> None: + """Evaluating with an empty runs list should yield an empty result dict.""" + base_run_metric: MockRunMetric[MockOnlineDetectionTrace, PandasLabeledDataProvider] = MockRunMetric([1.0]) + base_agg_metric: MockAggregationMetric[MockOnlineDetectionTrace, PandasLabeledDataProvider] = ( + MockAggregationMetric(base_run_metric) + ) + filters: dict[str, SegmentFilter] = {"A->B": dummy_filter} + + metric: SegmentAggregationMetric[MockOnlineDetectionTrace, float, float] = SegmentAggregationMetric( + base_agg_metric=base_agg_metric, + transition_filters=filters, + ) + + result: dict[str, float] = metric.evaluate([]) + + # If no runs provided, no sub_runs are created, so the result should be empty + assert result == {} + assert len(base_agg_metric.aggregate_calls) == 0 + + def test_evaluate_filters_with_no_matches_are_omitted(self) -> None: + """Filters that produce no bisegments should not appear in the final output.""" + base_run_metric: MockRunMetric[MockOnlineDetectionTrace, PandasLabeledDataProvider] = MockRunMetric([1.0]) + base_agg_metric: MockAggregationMetric[MockOnlineDetectionTrace, PandasLabeledDataProvider] = ( + MockAggregationMetric(base_run_metric) + ) + filters: dict[str, SegmentFilter] = {"A->B": dummy_filter, "C->D": dummy_filter} + + metric: SegmentAggregationMetric[MockOnlineDetectionTrace, float, float] = SegmentAggregationMetric( + base_agg_metric=base_agg_metric, + transition_filters=filters, + ) + + trace = MockOnlineDetectionTrace(detected_change_points=[]) + provider = MockPandasLabeledDataProvider(name="MainProvider") + + # We configure the provider to return nothing for any query + provider.mock_bisegments = [] + provider.mock_indexes = [] + + runs: Sequence[tuple[MockOnlineDetectionTrace, PandasLabeledDataProvider]] = [(trace, provider)] + + result: dict[str, float] = metric.evaluate(runs) + + assert result == {} + assert len(base_agg_metric.aggregate_calls) == 0 + + def test_evaluate_groups_and_delegates_correctly(self) -> None: + """ + Metric should slice traces, group by filter name, and call the base + metric evaluate() with the correctly grouped sub-runs. + """ + # 1. Setup base metrics. Our mock aggregation metric just sums the results. + # The base run metric returns 1.0 for every call. + base_run_metric: MockRunMetric[MockOnlineDetectionTrace, PandasLabeledDataProvider] = MockRunMetric([1.0]) + base_agg_metric: MockAggregationMetric[MockOnlineDetectionTrace, PandasLabeledDataProvider] = ( + MockAggregationMetric(base_run_metric) + ) + + filters: dict[str, SegmentFilter] = { + "A->B": dummy_filter, + "C->D": dummy_filter, + } + + metric: SegmentAggregationMetric[MockOnlineDetectionTrace, float, float] = SegmentAggregationMetric( + base_agg_metric=base_agg_metric, + transition_filters=filters, + ) + + # 2. Setup traces and providers + main_trace = MockOnlineDetectionTrace(detected_change_points=[15, 45]) + main_provider = MockPandasLabeledDataProvider(name="MainProvider") + + # Let's say query_bisegments returns two pieces: + # First piece: index [10, 15, 20] (covers cp at 15) + # Second piece: index [40, 45, 50] (covers cp at 45) + sub_prov1 = MockPandasLabeledDataProvider(name="Sub1") + sub_prov2 = MockPandasLabeledDataProvider(name="Sub2") + + main_provider.mock_bisegments = [sub_prov1, sub_prov2] + main_provider.mock_indexes = [(10, 15, 20), (40, 45, 50)] + + runs: Sequence[tuple[MockOnlineDetectionTrace, PandasLabeledDataProvider]] = [(main_trace, main_provider)] + + # 3. Execute + result: dict[str, float] = metric.evaluate(runs) + + # 4. Verify results + # The provider is queried TWICE (once for 'A->B', once for 'C->D'). + # Each query returns 2 sub-providers. + # So 'A->B' group gets 2 sub-runs, 'C->D' group gets 2 sub-runs. + # Since base_run_metric returns 1.0 for each run, aggregate sum is 2.0 for each group. + assert "A->B" in result + assert "C->D" in result + assert result["A->B"] == 2.0 + assert result["C->D"] == 2.0 + + # Verify that slicing happened correctly: + # The run metric was called 4 times total (2 for 'A->B', 2 for 'C->D'). + assert len(base_run_metric.calls) == 4 + + # Let's inspect the first call: it should be sub_prov1 and a sliced trace. + trace1_sliced, prov1_sliced = base_run_metric.calls[0] + assert isinstance(trace1_sliced, MockOnlineDetectionTrace) + assert trace1_sliced.algorithm_name == "MockOnlineAlgorithm" + # The slice was [10, 20]. The original trace had [15, 45]. + # Sliced trace should have 15 shifted by 10 -> [5]. + assert trace1_sliced.detected_change_points == [5] + assert prov1_sliced is sub_prov1 + + # Let's inspect the second call: it should be sub_prov2. + trace2_sliced, prov2_sliced = base_run_metric.calls[1] + assert isinstance(trace2_sliced, MockOnlineDetectionTrace) + # The slice was [40, 50]. The original trace had [15, 45]. + # Sliced trace should have 45 shifted by 40 -> [5]. + assert trace2_sliced.detected_change_points == [5] + assert prov2_sliced is sub_prov2 From d12475ce012bdb7edfec65707272cda8e5efa7ae Mon Sep 17 00:00:00 2001 From: iraedeus Date: Fri, 17 Apr 2026 13:02:12 +0300 Subject: [PATCH 8/8] chore: add benchmark_cache to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c02231d..fd11a5f 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,4 @@ cython_debug/ *.jpeg assets/data +benchmark_cache/