Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/pytorch_inference/Main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ int main(int argc, char** argv) {
// allocations rather than per allocation. But macOS is not supported for
// production, but just as a convenience for developers. So the most
// important thing is that the threading works as intended on Linux.
at::set_num_threads(threadSettings.numThreadsPerAllocation());
at::set_num_threads(static_cast<int>(threadSettings.numThreadsPerAllocation()));

// This is not used as we don't call at::launch anywhere.
// Setting it to 1 to ensure there is no thread pool sitting around.
Expand Down
3 changes: 3 additions & 0 deletions cmake/compiler/clang.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ list(APPEND ML_C_FLAGS
"-Wno-padded"
"-Wno-poison-system-directories"
"-Wno-sign-conversion"
"-Wno-missing-noreturn"
"-Wno-nrvo"
"-Wno-switch-default"
"-Wno-unknown-warning-option"
"-Wno-unreachable-code"
"-Wno-used-but-marked-unused"
Expand Down
4 changes: 4 additions & 0 deletions cmake/compiler/msvc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@ list(APPEND ML_COMPILE_DEFINITIONS
_WIN32_WINNT=0x0601
Windows)

# Treat SYSTEM include directories as external — suppress warnings from
# third-party headers (Boost, Eigen, PyTorch, etc.). Requires MSVC 17.0+.
set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "/external:I ")
list(APPEND ML_C_FLAGS
"/X"
"/nologo"
"/W4"
"/external:W0"
"/EHsc"
"/Gw"
"/Zc:inline"
Expand Down
6 changes: 6 additions & 0 deletions cmake/functions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,12 @@ function(ml_add_test_executable _target)

set_property(TARGET ml_test_${_target} PROPERTY POSITION_INDEPENDENT_CODE TRUE)

# Boost.Test's BOOST_TEST_MODULE / BOOST_TEST_NO_MAIN macros are consumed
# by the subsequent #include <boost/test/unit_test.hpp> but Clang flags
# them as unused. Suppress for test targets only.
target_compile_options(ml_test_${_target} PRIVATE
$<$<CXX_COMPILER_ID:AppleClang,Clang>:-Wno-unused-macros>)

if(ML_PCH)
target_precompile_headers(ml_test_${_target} PRIVATE
<string>
Expand Down
2 changes: 1 addition & 1 deletion include/core/CConcurrentWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class CConcurrentWrapper final : private CNonCopyable {
//! The code inside of this lambda is guaranteed to be executed in an atomic fashion.
template<typename F>
void operator()(F f) const {
m_Queue.push([=] { f(m_Resource); });
m_Queue.push([this, f] { f(m_Resource); });
}

//! Debug the memory used by this component.
Expand Down
2 changes: 1 addition & 1 deletion include/maths/common/CBasicStatistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -1420,7 +1420,7 @@ struct SCentralMomentsCustomAdd {
static inline void add(const U& x,
typename SCoordinate<T>::Type n,
CBasicStatistics::SSampleCentralMoments<T, ORDER>& moments) {
moments.add(x, n, 0);
moments.add(x, static_cast<double>(n), 0);
}
};
}
Expand Down
11 changes: 6 additions & 5 deletions include/maths/common/CBootstrapClusterer.h
Original file line number Diff line number Diff line change
Expand Up @@ -677,8 +677,6 @@ class CBootstrapClusterer {
this->visit(next, graph, parities, state);

double lowestCost = state.cost();
double bestCut = state.s_Cut;
std::size_t bestA = state.s_A;
TBoolVec best = parities;

while (state.s_A + 1 < V) {
Expand Down Expand Up @@ -725,16 +723,19 @@ class CBootstrapClusterer {
double cutCost = state.cost();
if (cutCost < lowestCost) {
lowestCost = cutCost;
bestCut = state.s_Cut;
bestA = state.s_A;
best = parities;
}
}

cost = lowestCost;
parities.swap(best);

LOG_TRACE(<< "Best cut = " << bestCut << ", |A| = " << bestA << ", |B| = " << V - bestA
LOG_TRACE(<< "Best cut |A| = "
<< static_cast<std::size_t>(
std::count(parities.begin(), parities.end(), true))
<< ", |B| = "
<< V - static_cast<std::size_t>(
std::count(parities.begin(), parities.end(), true))
<< ", cost = " << cost << ", threshold = " << threshold);

return cost < threshold;
Expand Down
2 changes: 1 addition & 1 deletion include/model/CMetricModelFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class MODEL_EXPORT CMetricModelFactory final : public CModelFactory {
void features(const TFeatureVec& features) override;

//! Set the modeled bucket length.
virtual void bucketLength(core_t::TTime bucketLength);
void bucketLength(core_t::TTime bucketLength);
//@}

//! Get the minimum seasonal variance scale
Expand Down
3 changes: 0 additions & 3 deletions lib/api/CDataFrameAnalysisInstrumentation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,11 @@ const std::string MEMORY_TYPE_TAG{"analytics_memory_usage"};
const std::string OUTLIER_DETECTION_STATS{"outlier_detection_stats"};
const std::string PARAMETERS_TAG{"parameters"};
const std::string PEAK_MEMORY_USAGE_TAG{"peak_usage_bytes"};
const std::string PROGRESS_TAG{"progress"};
const std::string REGRESSION_STATS_TAG{"regression_stats"};
const std::string STEP_TAG{"step"};
const std::string TIMESTAMP_TAG{"timestamp"};
const std::string TIMING_ELAPSED_TIME_TAG{"elapsed_time"};
const std::string TIMING_ITERATION_TIME_TAG{"iteration_time"};
const std::string TIMING_STATS_TAG{"timing_stats"};
const std::string TYPE_TAG{"type"};
const std::string VALIDATION_FOLD_TAG{"fold"};
const std::string VALIDATION_FOLD_VALUES_TAG{"fold_values"};
const std::string VALIDATION_LOSS_TAG{"validation_loss"};
Expand Down
3 changes: 3 additions & 0 deletions lib/api/CDataFrameTrainBoostedTreeRunner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ std::size_t CDataFrameTrainBoostedTreeRunner::numberExtraColumns() const {
return maths::analytics::CBoostedTreeFactory::estimateExtraColumnsForPredict(
m_DimensionPrediction);
}
LOG_ABORT(<< "Unexpected task type");
}

std::size_t CDataFrameTrainBoostedTreeRunner::dataFrameSliceCapacity() const {
Expand Down Expand Up @@ -372,6 +373,7 @@ CDataFrameTrainBoostedTreeRunner::rowsToWriteMask(const core::CDataFrame& frame)
case api_t::E_Update:
return m_BoostedTree->newTrainingRowMask();
}
LOG_ABORT(<< "Unexpected task type");
}

const std::string& CDataFrameTrainBoostedTreeRunner::dependentVariableFieldName() const {
Expand Down Expand Up @@ -595,6 +597,7 @@ std::size_t CDataFrameTrainBoostedTreeRunner::estimateBookkeepingMemoryUsage(
return m_TrainedModelMemoryUsage + m_BoostedTreeFactory->estimateMemoryUsageForPredict(
numberTrainingRows, numberColumns);
}
LOG_ABORT(<< "Unexpected task type");
}

const CDataFrameAnalysisInstrumentation&
Expand Down
1 change: 0 additions & 1 deletion lib/api/CDetectionRulesJsonParser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ namespace {
const std::string ACTIONS("actions");
const std::string ACTUAL("actual");
const std::string APPLIES_TO("applies_to");
const std::string CONDITION("condition");
const std::string CONDITIONS("conditions");
const std::string DIFF_FROM_TYPICAL("diff_from_typical");
const std::string EXCLUDE("exclude");
Expand Down
6 changes: 3 additions & 3 deletions lib/api/CFieldDataCategorizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -603,11 +603,11 @@ bool CFieldDataCategorizer::periodicPersistStateInBackground() {
// Do NOT pass the captures by reference - they
// MUST be copied for thread safety
if (m_PersistenceManager->addPersistFunc([
this, partitionFieldValues = std::move(partitionFieldValues),
dataCategorizerPersistFuncs = std::move(dataCategorizerPersistFuncs),
this, partitionFieldValuesInner = std::move(partitionFieldValues),
dataCategorizerPersistFuncsInner = std::move(dataCategorizerPersistFuncs),
categorizerAllocationFailures = m_CategorizerAllocationFailures
](core::CDataAdder & persister) {
return this->doPersistState(partitionFieldValues, dataCategorizerPersistFuncs,
return this->doPersistState(partitionFieldValuesInner, dataCategorizerPersistFuncsInner,
categorizerAllocationFailures, persister);
}) == false) {
LOG_ERROR(<< "Failed to add categorizer background persistence function");
Expand Down
2 changes: 1 addition & 1 deletion lib/api/CForecastRunner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ bool CForecastRunner::parseAndValidateForecastRequest(const std::string& control
if (forecastJob.s_MaxForecastModelMemory != DEFAULT_MAX_FORECAST_MODEL_MEMORY &&
(forecastJob.s_MaxForecastModelMemory >= MAX_FORECAST_MODEL_PERSISTANCE_MEMORY ||
forecastJob.s_MaxForecastModelMemory >=
static_cast<std::size_t>(jobBytesSizeLimit * 0.40))) {
static_cast<std::size_t>(static_cast<double>(jobBytesSizeLimit) * 0.40))) {
errorFunction(forecastJob, ERROR_BAD_MODEL_MEMORY_LIMIT);
return false;
}
Expand Down
1 change: 0 additions & 1 deletion lib/api/CModelSizeStatsJsonWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ const std::string JOB_ID{"job_id"};
const std::string MODEL_SIZE_STATS{"model_size_stats"};
const std::string MODEL_BYTES{"model_bytes"};
const std::string PEAK_MODEL_BYTES{"peak_model_bytes"};
const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"};
const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"};
const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"};
const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"};
Expand Down
16 changes: 9 additions & 7 deletions lib/api/CSingleFieldDataCategorizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,11 @@ CSingleFieldDataCategorizer::makeForegroundPersistFunc() const {
model::CDataCategorizer::TPersistFunc categorizerPersistFunc{
m_DataCategorizer->makeForegroundPersistFunc()};

return [ categorizerPersistFunc = std::move(categorizerPersistFunc),
this ](core::CStatePersistInserter & inserter) {
return [
categorizerPersistFuncInner = std::move(categorizerPersistFunc), this
](core::CStatePersistInserter & inserter) {
CSingleFieldDataCategorizer::acceptPersistInserter(
categorizerPersistFunc, m_DataCategorizer->examplesCollector(),
categorizerPersistFuncInner, m_DataCategorizer->examplesCollector(),
*m_CategoryIdMapper, inserter);
};
}
Expand All @@ -126,12 +127,13 @@ CSingleFieldDataCategorizer::makeBackgroundPersistFunc() const {
// function must be able to operate in a different thread on a snapshot of
// the data at the time it was created.
return [
categorizerPersistFunc = std::move(categorizerPersistFunc),
examplesCollector = std::move(examplesCollector),
categoryIdMapperClone = std::move(categoryIdMapperClone)
categorizerPersistFuncInner = std::move(categorizerPersistFunc),
examplesCollectorInner = std::move(examplesCollector),
categoryIdMapperCloneInner = std::move(categoryIdMapperClone)
](core::CStatePersistInserter & inserter) {
CSingleFieldDataCategorizer::acceptPersistInserter(
categorizerPersistFunc, examplesCollector, *categoryIdMapperClone, inserter);
categorizerPersistFuncInner, examplesCollectorInner,
*categoryIdMapperCloneInner, inserter);
};
}

Expand Down
14 changes: 7 additions & 7 deletions lib/api/unittest/CAnomalyJobTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,8 @@ BOOST_AUTO_TEST_CASE(testOutputBucketResultsUntilGivenIncompleteInitialBucket) {
"testfiles/testLogErrors.boost.log.ini"));

// Start by creating a detector with non-trivial state
static const core_t::TTime BUCKET_SIZE{900};
static const std::string JOB_ID{"pop_sum_bytes_by_status_over_clientip"};
static const core_t::TTime testBucketSize{900};
static const std::string testJobId{"pop_sum_bytes_by_status_over_clientip"};

// Open the input and output files
std::ifstream inputStrm{inputFileName.c_str()};
Expand All @@ -321,15 +321,15 @@ BOOST_AUTO_TEST_CASE(testOutputBucketResultsUntilGivenIncompleteInitialBucket) {
BOOST_TEST_REQUIRE(jobConfig.initFromFile(configFileName));

model::CAnomalyDetectorModelConfig modelConfig =
model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE, model_t::E_None,
"", 0, false);
model::CAnomalyDetectorModelConfig::defaultConfig(
testBucketSize, model_t::E_None, "", 0, false);

core::CJsonOutputStreamWrapper wrappedOutputStream{outputStrm};

std::string origSnapshotId;
std::size_t numOrigDocs{0};

CTestAnomalyJob origJob{JOB_ID,
CTestAnomalyJob origJob{testJobId,
limits,
jobConfig,
modelConfig,
Expand Down Expand Up @@ -367,7 +367,7 @@ BOOST_AUTO_TEST_CASE(testOutputBucketResultsUntilGivenIncompleteInitialBucket) {
std::size_t numRestoredDocs{0};

CTestAnomalyJob restoredJob{
JOB_ID,
testJobId,
limits,
jobConfig,
modelConfig,
Expand Down Expand Up @@ -879,7 +879,7 @@ BOOST_AUTO_TEST_CASE(testConfigUpdate) {
auto generateRandomAlpha = [](int strLen) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution dis(0, 25);
std::uniform_int_distribution<int> dis(0, 25);

std::string str;
for (int i = 0; i < strLen; ++i) {
Expand Down
9 changes: 5 additions & 4 deletions lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,8 @@ BOOST_AUTO_TEST_CASE(testRegressionPredictionNumericalOnly, *utf::tolerance(0.00
}};

std::size_t numberExamples{
static_cast<std::size_t>(trainExamples * dataSummarizationFraction) + predictExamples};
static_cast<std::size_t>(static_cast<double>(trainExamples) * dataSummarizationFraction) +
predictExamples};
runAnalyzer(numberExamples, predictExamples, TTask::E_Predict, &restorerSupplier);
readPredictions(outputStream.str(), "target_prediction", actualPredictions);
}
Expand Down Expand Up @@ -992,7 +993,8 @@ BOOST_AUTO_TEST_CASE(testRegressionPredictionNumericalCategoricalMix,
}};

std::size_t numberExamples{
static_cast<std::size_t>(trainExamples * dataSummarizationFraction) + predictExamples};
static_cast<std::size_t>(static_cast<double>(trainExamples) * dataSummarizationFraction) +
predictExamples};
runAnalyzer(numberExamples, predictExamples, TTask::E_Predict, &restorerSupplier);
readPredictions(outputStream.str(), "target_prediction", actualPredictions);
}
Expand Down Expand Up @@ -2232,8 +2234,7 @@ BOOST_AUTO_TEST_CASE(testProgressMonitoringFromRestart) {
TLossFunctionType::E_MseRegression, fieldNames, fieldValues, analyzer, 400);
analyzer.handleRecord(fieldNames, {"", "", "", "", "", "", "", "$"});

TStrVec persistedStates{
splitOnNull(std::stringstream{std::move(persistenceStream->str())})};
TStrVec persistedStates{splitOnNull(std::stringstream{persistenceStream->str()})};

LOG_DEBUG(<< "# states = " << persistedStates.size());

Expand Down
2 changes: 1 addition & 1 deletion lib/api/unittest/CInferenceModelMetadataTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ BOOST_AUTO_TEST_CASE(testDataSummarization) {

// check correct number of rows up to a rounding error
BOOST_REQUIRE_CLOSE_ABSOLUTE(static_cast<double>(dataSummarizationNumRows),
numRows * summarizationFraction, 1.0);
static_cast<double>(numRows) * summarizationFraction, 1.0);
}

BOOST_AUTO_TEST_SUITE_END()
2 changes: 1 addition & 1 deletion lib/api/unittest/CTestAnomalyJob.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ ml::api::CAnomalyJobConfig CTestAnomalyJob::makeJobConfig(const std::string& det
ml::api::CAnomalyJobConfig jobConfig;
jobConfig.analysisConfig().parseDetectorsConfig(obj);
return jobConfig;
}
}
35 changes: 18 additions & 17 deletions lib/core/CDataFrame.cc
Original file line number Diff line number Diff line change
Expand Up @@ -508,8 +508,9 @@ std::size_t CDataFrame::estimateMemoryUsage(bool inMainMemory,
// We use an "uncertainty percentage factor" to account for this.
static constexpr double containerMemoryEstimateUncertaintyPercentage{2.5};

std::size_t additionalMemory{static_cast<std::size_t>(
estimatedMemoryUsage * containerMemoryEstimateUncertaintyPercentage / 100)};
std::size_t additionalMemory{
static_cast<std::size_t>(static_cast<double>(estimatedMemoryUsage) *
containerMemoryEstimateUncertaintyPercentage / 100)};

return estimatedMemoryUsage + additionalMemory;
}
Expand Down Expand Up @@ -550,7 +551,7 @@ bool CDataFrame::parallelApplyToAllRows(std::size_t beginRows,
sliceFuncs.reserve(funcs.size());

for (auto& func : funcs) {
sliceFuncs.push_back([=, &func, &successful](const TRowSlicePtr& slice) mutable {
sliceFuncs.push_back([=, this, &func, &successful](const TRowSlicePtr& slice) mutable {
if (successful.load() == false) {
return;
}
Expand Down Expand Up @@ -637,23 +638,23 @@ bool CDataFrame::sequentialApplyToAllRows(std::size_t beginRows,
// We wait here so at most one slice is copied into memory.
wait_for_valid(backgroundApply);

backgroundApply = async(
defaultAsyncExecutor(),
[ =, &func, readSlice_ = std::move(readSlice) ]() mutable {
backgroundApply = async(defaultAsyncExecutor(), [
=, this, &func, readSlice_ = std::move(readSlice)
]() mutable {

TOptionalPopMaskedRow popMaskedRow;
if (rowMask != nullptr) {
beginSliceRows = *maskedRow;
popMaskedRow = CPopMaskedRow{endSliceRows, maskedRow, endMaskedRows};
}
TOptionalPopMaskedRow popMaskedRow;
if (rowMask != nullptr) {
beginSliceRows = *maskedRow;
popMaskedRow = CPopMaskedRow{endSliceRows, maskedRow, endMaskedRows};
}

this->applyToRowsOfOneSlice(func[0], beginSliceRows, endSliceRows,
popMaskedRow, readSlice_);
this->applyToRowsOfOneSlice(func[0], beginSliceRows, endSliceRows,
popMaskedRow, readSlice_);

if (commitResult) {
(*slice)->write(readSlice_.rows(), readSlice_.docHashes());
}
});
if (commitResult) {
(*slice)->write(readSlice_.rows(), readSlice_.docHashes());
}
});
}
break;
}
Expand Down
12 changes: 6 additions & 6 deletions lib/core/CJsonLogLayout.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ void CJsonLogLayout::operator()(const boost::log::record_view& rec,
json::object writer;
writer[LOGGER_NAME] = LOGGER;

const auto& timeStamp = boost::log::extract<boost::posix_time::ptime>(
boost::log::aux::default_attribute_names::timestamp(), rec)
.get();
const auto timeStamp = boost::log::extract<boost::posix_time::ptime>(
boost::log::aux::default_attribute_names::timestamp(), rec)
.get();
writer[TIMESTAMP_NAME] = (timeStamp - EPOCH).total_milliseconds();

auto level = boost::log::extract<CLogger::ELevel>(
Expand Down Expand Up @@ -102,9 +102,9 @@ void CJsonLogLayout::operator()(const boost::log::record_view& rec,

writer[METHOD_NAME] = methodName;

const auto& fullFileName = boost::log::extract<std::string>(
CLogger::instance().fileAttributeName(), rec)
.get();
const auto fullFileName = boost::log::extract<std::string>(
CLogger::instance().fileAttributeName(), rec)
.get();
writer[FILE_NAME] = CJsonLogLayout::cropPath(fullFileName);

writer[LINE_NAME] =
Expand Down
Loading