elastic · edsavage · Mar 12, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc
@@ -294,7 +294,7 @@ int main(int argc, char** argv) {
     // allocations rather than per allocation. But macOS is not supported for
     // production, but just as a convenience for developers. So the most
     // important thing is that the threading works as intended on Linux.
-    at::set_num_threads(threadSettings.numThreadsPerAllocation());
+    at::set_num_threads(static_cast<int>(threadSettings.numThreadsPerAllocation()));
 
     // This is not used as we don't call at::launch anywhere.
     // Setting it to 1 to ensure there is no thread pool sitting around.

diff --git a/cmake/compiler/clang.cmake b/cmake/compiler/clang.cmake
@@ -32,6 +32,9 @@ list(APPEND ML_C_FLAGS
   "-Wno-padded"
   "-Wno-poison-system-directories"
   "-Wno-sign-conversion"
+  "-Wno-missing-noreturn"
+  "-Wno-nrvo"
+  "-Wno-switch-default"
   "-Wno-unknown-warning-option"
   "-Wno-unreachable-code"
   "-Wno-used-but-marked-unused"

diff --git a/cmake/compiler/msvc.cmake b/cmake/compiler/msvc.cmake
@@ -27,10 +27,14 @@ list(APPEND ML_COMPILE_DEFINITIONS
   _WIN32_WINNT=0x0601
   Windows)
 
+# Treat SYSTEM include directories as external — suppress warnings from
+# third-party headers (Boost, Eigen, PyTorch, etc.).  Requires MSVC 17.0+.
+set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "/external:I ")
 list(APPEND ML_C_FLAGS
   "/X"
   "/nologo"
   "/W4"
+  "/external:W0"
   "/EHsc"
   "/Gw"
   "/Zc:inline"

diff --git a/cmake/functions.cmake b/cmake/functions.cmake
@@ -382,6 +382,12 @@ function(ml_add_test_executable _target)
 
   set_property(TARGET ml_test_${_target} PROPERTY POSITION_INDEPENDENT_CODE TRUE)
 
+  # Boost.Test's BOOST_TEST_MODULE / BOOST_TEST_NO_MAIN macros are consumed
+  # by the subsequent #include <boost/test/unit_test.hpp> but Clang flags
+  # them as unused.  Suppress for test targets only.
+  target_compile_options(ml_test_${_target} PRIVATE
+    $<$<CXX_COMPILER_ID:AppleClang,Clang>:-Wno-unused-macros>)
+
   if(ML_PCH)
     target_precompile_headers(ml_test_${_target} PRIVATE
       <string>

diff --git a/include/core/CConcurrentWrapper.h b/include/core/CConcurrentWrapper.h
@@ -58,7 +58,7 @@ class CConcurrentWrapper final : private CNonCopyable {
     //! The code inside of this lambda is guaranteed to be executed in an atomic fashion.
     template<typename F>
     void operator()(F f) const {
-        m_Queue.push([=] { f(m_Resource); });
+        m_Queue.push([this, f] { f(m_Resource); });
     }
 
     //! Debug the memory used by this component.

diff --git a/include/maths/common/CBasicStatistics.h b/include/maths/common/CBasicStatistics.h
@@ -1420,7 +1420,7 @@ struct SCentralMomentsCustomAdd {
     static inline void add(const U& x,
                            typename SCoordinate<T>::Type n,
                            CBasicStatistics::SSampleCentralMoments<T, ORDER>& moments) {
-        moments.add(x, n, 0);
+        moments.add(x, static_cast<double>(n), 0);
     }
 };
 }

diff --git a/include/maths/common/CBootstrapClusterer.h b/include/maths/common/CBootstrapClusterer.h
@@ -677,8 +677,6 @@ class CBootstrapClusterer {
         this->visit(next, graph, parities, state);
 
         double lowestCost = state.cost();
-        double bestCut = state.s_Cut;
-        std::size_t bestA = state.s_A;
         TBoolVec best = parities;
 
         while (state.s_A + 1 < V) {
@@ -725,16 +723,19 @@ class CBootstrapClusterer {
             double cutCost = state.cost();
             if (cutCost < lowestCost) {
                 lowestCost = cutCost;
-                bestCut = state.s_Cut;
-                bestA = state.s_A;
                 best = parities;
             }
         }
 
         cost = lowestCost;
         parities.swap(best);
 
-        LOG_TRACE(<< "Best cut = " << bestCut << ", |A| = " << bestA << ", |B| = " << V - bestA
+        LOG_TRACE(<< "Best cut |A| = "
+                  << static_cast<std::size_t>(
+                         std::count(parities.begin(), parities.end(), true))
+                  << ", |B| = "
+                  << V - static_cast<std::size_t>(
+                             std::count(parities.begin(), parities.end(), true))
                   << ", cost = " << cost << ", threshold = " << threshold);
 
         return cost < threshold;

diff --git a/include/model/CMetricModelFactory.h b/include/model/CMetricModelFactory.h
@@ -135,7 +135,7 @@ class MODEL_EXPORT CMetricModelFactory final : public CModelFactory {
     void features(const TFeatureVec& features) override;
 
     //! Set the modeled bucket length.
-    virtual void bucketLength(core_t::TTime bucketLength);
+    void bucketLength(core_t::TTime bucketLength);
     //@}
 
     //! Get the minimum seasonal variance scale

diff --git a/lib/api/CDataFrameAnalysisInstrumentation.cc b/lib/api/CDataFrameAnalysisInstrumentation.cc
@@ -56,14 +56,11 @@ const std::string MEMORY_TYPE_TAG{"analytics_memory_usage"};
 const std::string OUTLIER_DETECTION_STATS{"outlier_detection_stats"};
 const std::string PARAMETERS_TAG{"parameters"};
 const std::string PEAK_MEMORY_USAGE_TAG{"peak_usage_bytes"};
-const std::string PROGRESS_TAG{"progress"};
 const std::string REGRESSION_STATS_TAG{"regression_stats"};
-const std::string STEP_TAG{"step"};
 const std::string TIMESTAMP_TAG{"timestamp"};
 const std::string TIMING_ELAPSED_TIME_TAG{"elapsed_time"};
 const std::string TIMING_ITERATION_TIME_TAG{"iteration_time"};
 const std::string TIMING_STATS_TAG{"timing_stats"};
-const std::string TYPE_TAG{"type"};
 const std::string VALIDATION_FOLD_TAG{"fold"};
 const std::string VALIDATION_FOLD_VALUES_TAG{"fold_values"};
 const std::string VALIDATION_LOSS_TAG{"validation_loss"};

diff --git a/lib/api/CDataFrameTrainBoostedTreeRunner.cc b/lib/api/CDataFrameTrainBoostedTreeRunner.cc
@@ -340,6 +340,7 @@ std::size_t CDataFrameTrainBoostedTreeRunner::numberExtraColumns() const {
         return maths::analytics::CBoostedTreeFactory::estimateExtraColumnsForPredict(
             m_DimensionPrediction);
     }
+    LOG_ABORT(<< "Unexpected task type");
 }
 
 std::size_t CDataFrameTrainBoostedTreeRunner::dataFrameSliceCapacity() const {
@@ -372,6 +373,7 @@ CDataFrameTrainBoostedTreeRunner::rowsToWriteMask(const core::CDataFrame& frame)
     case api_t::E_Update:
         return m_BoostedTree->newTrainingRowMask();
     }
+    LOG_ABORT(<< "Unexpected task type");
 }
 
 const std::string& CDataFrameTrainBoostedTreeRunner::dependentVariableFieldName() const {
@@ -595,6 +597,7 @@ std::size_t CDataFrameTrainBoostedTreeRunner::estimateBookkeepingMemoryUsage(
         return m_TrainedModelMemoryUsage + m_BoostedTreeFactory->estimateMemoryUsageForPredict(
                                                numberTrainingRows, numberColumns);
     }
+    LOG_ABORT(<< "Unexpected task type");
 }
 
 const CDataFrameAnalysisInstrumentation&

diff --git a/lib/api/CDetectionRulesJsonParser.cc b/lib/api/CDetectionRulesJsonParser.cc
@@ -22,7 +22,6 @@ namespace {
 const std::string ACTIONS("actions");
 const std::string ACTUAL("actual");
 const std::string APPLIES_TO("applies_to");
-const std::string CONDITION("condition");
 const std::string CONDITIONS("conditions");
 const std::string DIFF_FROM_TYPICAL("diff_from_typical");
 const std::string EXCLUDE("exclude");

diff --git a/lib/api/CFieldDataCategorizer.cc b/lib/api/CFieldDataCategorizer.cc
@@ -603,11 +603,11 @@ bool CFieldDataCategorizer::periodicPersistStateInBackground() {
     // Do NOT pass the captures by reference - they
     // MUST be copied for thread safety
     if (m_PersistenceManager->addPersistFunc([
-            this, partitionFieldValues = std::move(partitionFieldValues),
-            dataCategorizerPersistFuncs = std::move(dataCategorizerPersistFuncs),
+            this, partitionFieldValuesInner = std::move(partitionFieldValues),
+            dataCategorizerPersistFuncsInner = std::move(dataCategorizerPersistFuncs),
             categorizerAllocationFailures = m_CategorizerAllocationFailures
         ](core::CDataAdder & persister) {
-            return this->doPersistState(partitionFieldValues, dataCategorizerPersistFuncs,
+            return this->doPersistState(partitionFieldValuesInner, dataCategorizerPersistFuncsInner,
                                         categorizerAllocationFailures, persister);
         }) == false) {
         LOG_ERROR(<< "Failed to add categorizer background persistence function");

diff --git a/lib/api/CForecastRunner.cc b/lib/api/CForecastRunner.cc
@@ -442,7 +442,7 @@ bool CForecastRunner::parseAndValidateForecastRequest(const std::string& control
     if (forecastJob.s_MaxForecastModelMemory != DEFAULT_MAX_FORECAST_MODEL_MEMORY &&
         (forecastJob.s_MaxForecastModelMemory >= MAX_FORECAST_MODEL_PERSISTANCE_MEMORY ||
          forecastJob.s_MaxForecastModelMemory >=
-             static_cast<std::size_t>(jobBytesSizeLimit * 0.40))) {
+             static_cast<std::size_t>(static_cast<double>(jobBytesSizeLimit) * 0.40))) {
         errorFunction(forecastJob, ERROR_BAD_MODEL_MEMORY_LIMIT);
         return false;
     }

diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc
@@ -25,7 +25,6 @@ const std::string JOB_ID{"job_id"};
 const std::string MODEL_SIZE_STATS{"model_size_stats"};
 const std::string MODEL_BYTES{"model_bytes"};
 const std::string PEAK_MODEL_BYTES{"peak_model_bytes"};
-const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"};
 const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"};
 const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"};
 const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"};

diff --git a/lib/api/CSingleFieldDataCategorizer.cc b/lib/api/CSingleFieldDataCategorizer.cc
@@ -104,10 +104,11 @@ CSingleFieldDataCategorizer::makeForegroundPersistFunc() const {
     model::CDataCategorizer::TPersistFunc categorizerPersistFunc{
         m_DataCategorizer->makeForegroundPersistFunc()};
 
-    return [ categorizerPersistFunc = std::move(categorizerPersistFunc),
-             this ](core::CStatePersistInserter & inserter) {
+    return [
+        categorizerPersistFuncInner = std::move(categorizerPersistFunc), this
+    ](core::CStatePersistInserter & inserter) {
         CSingleFieldDataCategorizer::acceptPersistInserter(
-            categorizerPersistFunc, m_DataCategorizer->examplesCollector(),
+            categorizerPersistFuncInner, m_DataCategorizer->examplesCollector(),
             *m_CategoryIdMapper, inserter);
     };
 }
@@ -126,12 +127,13 @@ CSingleFieldDataCategorizer::makeBackgroundPersistFunc() const {
     // function must be able to operate in a different thread on a snapshot of
     // the data at the time it was created.
     return [
-        categorizerPersistFunc = std::move(categorizerPersistFunc),
-        examplesCollector = std::move(examplesCollector),
-        categoryIdMapperClone = std::move(categoryIdMapperClone)
+        categorizerPersistFuncInner = std::move(categorizerPersistFunc),
+        examplesCollectorInner = std::move(examplesCollector),
+        categoryIdMapperCloneInner = std::move(categoryIdMapperClone)
     ](core::CStatePersistInserter & inserter) {
         CSingleFieldDataCategorizer::acceptPersistInserter(
-            categorizerPersistFunc, examplesCollector, *categoryIdMapperClone, inserter);
+            categorizerPersistFuncInner, examplesCollectorInner,
+            *categoryIdMapperCloneInner, inserter);
     };
 }
 

diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc
@@ -306,8 +306,8 @@ BOOST_AUTO_TEST_CASE(testOutputBucketResultsUntilGivenIncompleteInitialBucket) {
         "testfiles/testLogErrors.boost.log.ini"));
 
     // Start by creating a detector with non-trivial state
-    static const core_t::TTime BUCKET_SIZE{900};
-    static const std::string JOB_ID{"pop_sum_bytes_by_status_over_clientip"};
+    static const core_t::TTime testBucketSize{900};
+    static const std::string testJobId{"pop_sum_bytes_by_status_over_clientip"};
 
     // Open the input and output files
     std::ifstream inputStrm{inputFileName.c_str()};
@@ -321,15 +321,15 @@ BOOST_AUTO_TEST_CASE(testOutputBucketResultsUntilGivenIncompleteInitialBucket) {
     BOOST_TEST_REQUIRE(jobConfig.initFromFile(configFileName));
 
     model::CAnomalyDetectorModelConfig modelConfig =
-        model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE, model_t::E_None,
-                                                          "", 0, false);
+        model::CAnomalyDetectorModelConfig::defaultConfig(
+            testBucketSize, model_t::E_None, "", 0, false);
 
     core::CJsonOutputStreamWrapper wrappedOutputStream{outputStrm};
 
     std::string origSnapshotId;
     std::size_t numOrigDocs{0};
 
-    CTestAnomalyJob origJob{JOB_ID,
+    CTestAnomalyJob origJob{testJobId,
                             limits,
                             jobConfig,
                             modelConfig,
@@ -367,7 +367,7 @@ BOOST_AUTO_TEST_CASE(testOutputBucketResultsUntilGivenIncompleteInitialBucket) {
     std::size_t numRestoredDocs{0};
 
     CTestAnomalyJob restoredJob{
-        JOB_ID,
+        testJobId,
         limits,
         jobConfig,
         modelConfig,
@@ -879,7 +879,7 @@ BOOST_AUTO_TEST_CASE(testConfigUpdate) {
     auto generateRandomAlpha = [](int strLen) {
         std::random_device rd;
         std::mt19937 gen(rd());
-        std::uniform_int_distribution dis(0, 25);
+        std::uniform_int_distribution<int> dis(0, 25);
 
         std::string str;
         for (int i = 0; i < strLen; ++i) {

diff --git a/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc b/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc
@@ -896,7 +896,8 @@ BOOST_AUTO_TEST_CASE(testRegressionPredictionNumericalOnly, *utf::tolerance(0.00
         }};
 
         std::size_t numberExamples{
-            static_cast<std::size_t>(trainExamples * dataSummarizationFraction) + predictExamples};
+            static_cast<std::size_t>(static_cast<double>(trainExamples) * dataSummarizationFraction) +
+            predictExamples};
         runAnalyzer(numberExamples, predictExamples, TTask::E_Predict, &restorerSupplier);
         readPredictions(outputStream.str(), "target_prediction", actualPredictions);
     }
@@ -992,7 +993,8 @@ BOOST_AUTO_TEST_CASE(testRegressionPredictionNumericalCategoricalMix,
         }};
 
         std::size_t numberExamples{
-            static_cast<std::size_t>(trainExamples * dataSummarizationFraction) + predictExamples};
+            static_cast<std::size_t>(static_cast<double>(trainExamples) * dataSummarizationFraction) +
+            predictExamples};
         runAnalyzer(numberExamples, predictExamples, TTask::E_Predict, &restorerSupplier);
         readPredictions(outputStream.str(), "target_prediction", actualPredictions);
     }
@@ -2232,8 +2234,7 @@ BOOST_AUTO_TEST_CASE(testProgressMonitoringFromRestart) {
         TLossFunctionType::E_MseRegression, fieldNames, fieldValues, analyzer, 400);
     analyzer.handleRecord(fieldNames, {"", "", "", "", "", "", "", "$"});
 
-    TStrVec persistedStates{
-        splitOnNull(std::stringstream{std::move(persistenceStream->str())})};
+    TStrVec persistedStates{splitOnNull(std::stringstream{persistenceStream->str()})};
 
     LOG_DEBUG(<< "# states = " << persistedStates.size());
 

diff --git a/lib/api/unittest/CInferenceModelMetadataTest.cc b/lib/api/unittest/CInferenceModelMetadataTest.cc
@@ -306,7 +306,7 @@ BOOST_AUTO_TEST_CASE(testDataSummarization) {
 
     // check correct number of rows up to a rounding error
     BOOST_REQUIRE_CLOSE_ABSOLUTE(static_cast<double>(dataSummarizationNumRows),
-                                 numRows * summarizationFraction, 1.0);
+                                 static_cast<double>(numRows) * summarizationFraction, 1.0);
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/lib/api/unittest/CTestAnomalyJob.cc b/lib/api/unittest/CTestAnomalyJob.cc
@@ -71,4 +71,4 @@ ml::api::CAnomalyJobConfig CTestAnomalyJob::makeJobConfig(const std::string& det
     ml::api::CAnomalyJobConfig jobConfig;
     jobConfig.analysisConfig().parseDetectorsConfig(obj);
     return jobConfig;
-}
+}
diff --git a/lib/core/CDataFrame.cc b/lib/core/CDataFrame.cc
@@ -508,8 +508,9 @@ std::size_t CDataFrame::estimateMemoryUsage(bool inMainMemory,
     // We use an "uncertainty percentage factor" to account for this.
     static constexpr double containerMemoryEstimateUncertaintyPercentage{2.5};
 
-    std::size_t additionalMemory{static_cast<std::size_t>(
-        estimatedMemoryUsage * containerMemoryEstimateUncertaintyPercentage / 100)};
+    std::size_t additionalMemory{
+        static_cast<std::size_t>(static_cast<double>(estimatedMemoryUsage) *
+                                 containerMemoryEstimateUncertaintyPercentage / 100)};
 
     return estimatedMemoryUsage + additionalMemory;
 }
@@ -550,7 +551,7 @@ bool CDataFrame::parallelApplyToAllRows(std::size_t beginRows,
     sliceFuncs.reserve(funcs.size());
 
     for (auto& func : funcs) {
-        sliceFuncs.push_back([=, &func, &successful](const TRowSlicePtr& slice) mutable {
+        sliceFuncs.push_back([=, this, &func, &successful](const TRowSlicePtr& slice) mutable {
             if (successful.load() == false) {
                 return;
             }
@@ -637,23 +638,23 @@ bool CDataFrame::sequentialApplyToAllRows(std::size_t beginRows,
             // We wait here so at most one slice is copied into memory.
             wait_for_valid(backgroundApply);
 
-            backgroundApply = async(
-                defaultAsyncExecutor(),
-                [ =, &func, readSlice_ = std::move(readSlice) ]() mutable {
+            backgroundApply = async(defaultAsyncExecutor(), [
+                =, this, &func, readSlice_ = std::move(readSlice)
+            ]() mutable {
 
-                    TOptionalPopMaskedRow popMaskedRow;
-                    if (rowMask != nullptr) {
-                        beginSliceRows = *maskedRow;
-                        popMaskedRow = CPopMaskedRow{endSliceRows, maskedRow, endMaskedRows};
-                    }
+                TOptionalPopMaskedRow popMaskedRow;
+                if (rowMask != nullptr) {
+                    beginSliceRows = *maskedRow;
+                    popMaskedRow = CPopMaskedRow{endSliceRows, maskedRow, endMaskedRows};
+                }
 
-                    this->applyToRowsOfOneSlice(func[0], beginSliceRows, endSliceRows,
-                                                popMaskedRow, readSlice_);
+                this->applyToRowsOfOneSlice(func[0], beginSliceRows, endSliceRows,
+                                            popMaskedRow, readSlice_);
 
-                    if (commitResult) {
-                        (*slice)->write(readSlice_.rows(), readSlice_.docHashes());
-                    }
-                });
+                if (commitResult) {
+                    (*slice)->write(readSlice_.rows(), readSlice_.docHashes());
+                }
+            });
         }
         break;
     }

diff --git a/lib/core/CJsonLogLayout.cc b/lib/core/CJsonLogLayout.cc
@@ -68,9 +68,9 @@ void CJsonLogLayout::operator()(const boost::log::record_view& rec,
     json::object writer;
     writer[LOGGER_NAME] = LOGGER;
 
-    const auto& timeStamp = boost::log::extract<boost::posix_time::ptime>(
-                                boost::log::aux::default_attribute_names::timestamp(), rec)
-                                .get();
+    const auto timeStamp = boost::log::extract<boost::posix_time::ptime>(
+                               boost::log::aux::default_attribute_names::timestamp(), rec)
+                               .get();
     writer[TIMESTAMP_NAME] = (timeStamp - EPOCH).total_milliseconds();
 
     auto level = boost::log::extract<CLogger::ELevel>(
@@ -102,9 +102,9 @@ void CJsonLogLayout::operator()(const boost::log::record_view& rec,
 
         writer[METHOD_NAME] = methodName;
 
-        const auto& fullFileName = boost::log::extract<std::string>(
-                                       CLogger::instance().fileAttributeName(), rec)
-                                       .get();
+        const auto fullFileName = boost::log::extract<std::string>(
+                                      CLogger::instance().fileAttributeName(), rec)
+                                      .get();
         writer[FILE_NAME] = CJsonLogLayout::cropPath(fullFileName);
 
         writer[LINE_NAME] =