diff --git a/examples/NMFS/afsc_walleye_pollock/outputs/.gitignore b/examples/NMFS/afsc_walleye_pollock/outputs/.gitignore new file mode 100644 index 0000000..397b4a7 --- /dev/null +++ b/examples/NMFS/afsc_walleye_pollock/outputs/.gitignore @@ -0,0 +1 @@ +*.log diff --git a/examples/NMFS/afsc_walleye_pollock/outputs/random_effect_scaling_summary.csv b/examples/NMFS/afsc_walleye_pollock/outputs/random_effect_scaling_summary.csv deleted file mode 100644 index 6d2eab0..0000000 --- a/examples/NMFS/afsc_walleye_pollock/outputs/random_effect_scaling_summary.csv +++ /dev/null @@ -1,7 +0,0 @@ -random_effects,exit_code,objective,grad_norm,converged,max_grad_param,max_grad_value,max_abs_grad,message -0,0,4.50715427008526,0.000390386819284328,yes,log_r0,-0.000287929677760686,0.00028793,converged to requested fixed-effect gradient tolerance -1,0,3.60648055858289,0.0051618556189938,yes,log_r0,-0.00513839544761058,0.0051384,converged to requested fixed-effect gradient tolerance -2,0,2.71970593968494,0.00911009377293731,yes,log_r0,-0.00910888327532164,0.00910888,converged to requested fixed-effect gradient tolerance -5,0,0.0758352782119642,0.0049615019647371,yes,log_r0,-0.00496143705959104,0.00496144,converged to requested fixed-effect gradient tolerance -10,0,-4.75367184554796,0.00476630837072591,yes,log_r0,0.00476517435115792,0.00476517,converged to requested fixed-effect gradient tolerance -20,0,-14.3868374903643,0.00522172190975378,yes,log_r0,0.00522172081699299,0.00522172,converged to requested fixed-effect gradient tolerance diff --git a/examples/NMFS/pifsc_opakapaka/data/opakapaka_io.hpp b/examples/NMFS/pifsc_opakapaka/data/opakapaka_io.hpp new file mode 100644 index 0000000..05f05bd --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/data/opakapaka_io.hpp @@ -0,0 +1,113 @@ +#pragma once + +#include "../quadra/opakapaka_model.hpp" + +#include +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +std::vector split_csv_line_simple(const std::string &line) { + std::vector fields; + std::stringstream ss(line); + std::string item; + while (std::getline(ss, item, ',')) { + fields.push_back(item); + } + return fields; +} + +bool finite_double_from_string(const std::string &x, double &out) { + try { + std::size_t pos = 0; + out = std::stod(x, &pos); + return pos > 0 && std::isfinite(out); + } catch (...) { + out = std::numeric_limits::quiet_NaN(); + return false; + } +} + +std::vector read_opakapaka_history_csv(const std::string &path) { + std::ifstream in(path); + if (!in) { + throw std::runtime_error("Could not open Opakapaka CSV: " + path); + } + + std::string line; + if (!std::getline(in, line)) { + throw std::runtime_error("Opakapaka CSV is empty: " + path); + } + + const auto header = split_csv_line_simple(line); + int year_col = -1; + int phase_col = -1; + int catch_col = -1; + int index_col = -1; + + for (int i = 0; i < static_cast(header.size()); ++i) { + if (header[i] == "year") + year_col = i; + if (header[i] == "phase") + phase_col = i; + if (header[i] == "catch_mt") + catch_col = i; + if (header[i] == "index") + index_col = i; + } + + if (year_col < 0 || phase_col < 0 || catch_col < 0 || index_col < 0) { + throw std::runtime_error( + "Opakapaka CSV must contain year, phase, catch_mt, and index columns"); + } + + std::vector out; + + while (std::getline(in, line)) { + if (line.empty()) + continue; + const auto fields = split_csv_line_simple(line); + const int max_col = + std::max(std::max(year_col, phase_col), std::max(catch_col, index_col)); + if (static_cast(fields.size()) <= max_col) + continue; + + if (fields[phase_col] != "history") + continue; + + double year_d = 0.0; + double catch_mt = 0.0; + double index = 0.0; + + if (!finite_double_from_string(fields[year_col], year_d)) + continue; + if (!finite_double_from_string(fields[catch_col], catch_mt)) + continue; + if (!finite_double_from_string(fields[index_col], index)) + continue; + + opakapaka_example::Observation obs; + obs.year = static_cast(year_d); + obs.catch_mt = catch_mt; + obs.index = index; + out.push_back(obs); + } + + if (out.empty()) { + throw std::runtime_error( + "No usable historical rows found in Opakapaka CSV"); + } + + return out; +} + +} // namespace opakapaka_example + +// Compatibility aliases for the current Opakapaka driver. +using opakapaka_example::finite_double_from_string; +using opakapaka_example::read_opakapaka_history_csv; +using opakapaka_example::split_csv_line_simple; diff --git a/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_biomass_covariance_diagnostics.hpp b/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_biomass_covariance_diagnostics.hpp new file mode 100644 index 0000000..a5270d7 --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_biomass_covariance_diagnostics.hpp @@ -0,0 +1,402 @@ +#pragma once + +#include "../quadra/opakapaka_model.hpp" + +#include "../../../../core/uncertainty/selected_inverse_diagonal.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +inline Eigen::MatrixXd compute_log_b_covariance_submatrix( + const std::vector &data, + const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { + const std::size_t n = std::min(data.size(), u_hat.size()); + if (n == 0) { + return Eigen::MatrixXd(); + } + + std::vector indices; + indices.reserve(n); + for (std::size_t i = 0; i < n; ++i) { + indices.push_back(static_cast(i)); + } + + const auto log_b_cov = + quadra::uncertainty::selected_inverse_submatrix_from_spd_hessian(h_uu, + indices); + + if (!log_b_cov.success) { + return Eigen::MatrixXd::Constant(static_cast(n), + static_cast(n), + std::numeric_limits::quiet_NaN()); + } + + return log_b_cov.covariance; +} + +inline Eigen::MatrixXd +log_cov_to_biomass_cov(const Eigen::MatrixXd &log_b_cov, + const std::vector &u_hat) { + const Eigen::Index n = log_b_cov.rows(); + Eigen::MatrixXd biomass_cov = + Eigen::MatrixXd::Constant(n, n, std::numeric_limits::quiet_NaN()); + + for (Eigen::Index i = 0; i < n; ++i) { + const double b_i = std::exp(u_hat[static_cast(i)]); + for (Eigen::Index j = 0; j < n; ++j) { + const double b_j = std::exp(u_hat[static_cast(j)]); + biomass_cov(i, j) = b_i * b_j * log_b_cov(i, j); + } + } + + return biomass_cov; +} + +inline Eigen::MatrixXd covariance_to_correlation(const Eigen::MatrixXd &cov) { + const Eigen::Index n = cov.rows(); + Eigen::MatrixXd corr = + Eigen::MatrixXd::Constant(n, n, std::numeric_limits::quiet_NaN()); + + for (Eigen::Index i = 0; i < n; ++i) { + for (Eigen::Index j = 0; j < n; ++j) { + const double vii = cov(i, i); + const double vjj = cov(j, j); + const double vij = cov(i, j); + + if (std::isfinite(vii) && std::isfinite(vjj) && std::isfinite(vij) && + vii > 0.0 && vjj > 0.0) { + double c = vij / std::sqrt(vii * vjj); + if (c > 1.0 && c < 1.0 + 1.0e-10) + c = 1.0; + if (c < -1.0 && c > -1.0 - 1.0e-10) + c = -1.0; + corr(i, j) = c; + } + } + } + + return corr; +} + +inline void write_biomass_covariance_diagnostics_csv( + const std::string &path, + const std::vector &data, + const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { + std::ofstream out(path); + out << "metric,value,note\n"; + + const Eigen::MatrixXd log_b_cov = + compute_log_b_covariance_submatrix(data, u_hat, h_uu); + const Eigen::MatrixXd biomass_cov = log_cov_to_biomass_cov(log_b_cov, u_hat); + const Eigen::MatrixXd biomass_corr = + quadra::uncertainty::covariance_to_correlation_matrix(biomass_cov); + + const Eigen::Index n = biomass_cov.rows(); + + bool finite_all = true; + bool positive_diag = true; + double min_diag = std::numeric_limits::infinity(); + double max_diag = -std::numeric_limits::infinity(); + + for (Eigen::Index i = 0; i < n; ++i) { + const double v = biomass_cov(i, i); + if (!std::isfinite(v)) + finite_all = false; + if (!(v > 0.0)) + positive_diag = false; + if (std::isfinite(v)) { + min_diag = std::min(min_diag, v); + max_diag = std::max(max_diag, v); + } + + for (Eigen::Index j = 0; j < n; ++j) { + if (!std::isfinite(biomass_cov(i, j))) + finite_all = false; + } + } + + double max_abs_asymmetry = 0.0; + if (n > 0) { + max_abs_asymmetry = + (biomass_cov - biomass_cov.transpose()).cwiseAbs().maxCoeff(); + } + + bool ldlt_success = false; + double min_eigenvalue = std::numeric_limits::quiet_NaN(); + double max_eigenvalue = std::numeric_limits::quiet_NaN(); + + if (n > 0 && finite_all) { + Eigen::LDLT ldlt(biomass_cov); + ldlt_success = (ldlt.info() == Eigen::Success && + (ldlt.vectorD().array() > -1.0e-10).all()); + + Eigen::SelfAdjointEigenSolver eig( + 0.5 * (biomass_cov + biomass_cov.transpose())); + if (eig.info() == Eigen::Success) { + min_eigenvalue = eig.eigenvalues().minCoeff(); + max_eigenvalue = eig.eigenvalues().maxCoeff(); + } + } + + double mean_nearest_neighbor_corr = std::numeric_limits::quiet_NaN(); + double min_nearest_neighbor_corr = std::numeric_limits::quiet_NaN(); + double max_nearest_neighbor_corr = std::numeric_limits::quiet_NaN(); + + if (n > 1) { + double sum = 0.0; + int count = 0; + min_nearest_neighbor_corr = std::numeric_limits::infinity(); + max_nearest_neighbor_corr = -std::numeric_limits::infinity(); + + for (Eigen::Index i = 0; i + 1 < n; ++i) { + const double c = biomass_corr(i, i + 1); + if (std::isfinite(c)) { + sum += c; + ++count; + min_nearest_neighbor_corr = std::min(min_nearest_neighbor_corr, c); + max_nearest_neighbor_corr = std::max(max_nearest_neighbor_corr, c); + } + } + + if (count > 0) { + mean_nearest_neighbor_corr = sum / static_cast(count); + } + } + + double mean_lag2_corr = std::numeric_limits::quiet_NaN(); + if (n > 2) { + double sum = 0.0; + int count = 0; + for (Eigen::Index i = 0; i + 2 < n; ++i) { + const double c = biomass_corr(i, i + 2); + if (std::isfinite(c)) { + sum += c; + ++count; + } + } + if (count > 0) + mean_lag2_corr = sum / static_cast(count); + } + + double mean_lag5_corr = std::numeric_limits::quiet_NaN(); + if (n > 5) { + double sum = 0.0; + int count = 0; + for (Eigen::Index i = 0; i + 5 < n; ++i) { + const double c = biomass_corr(i, i + 5); + if (std::isfinite(c)) { + sum += c; + ++count; + } + } + if (count > 0) + mean_lag5_corr = sum / static_cast(count); + } + + const bool valid_covariance = + finite_all && positive_diag && max_abs_asymmetry < 1.0e-8 && + ldlt_success && std::isfinite(min_eigenvalue) && min_eigenvalue > -1.0e-8; + + auto emit = [&](const std::string &metric, const auto &value, + const std::string ¬e) { + out << metric << "," << value << "," << note << "\n"; + }; + + emit("n_years", n, "number of fitted biomass states in covariance block"); + emit("finite_all", finite_all ? "yes" : "no", + "all covariance entries finite"); + emit("positive_diagonal", positive_diag ? "yes" : "no", + "all variances positive"); + emit("valid_covariance", valid_covariance ? "yes" : "no", + "finite positive-diagonal symmetric positive-semidefinite check"); + emit("ldlt_success", ldlt_success ? "yes" : "no", + "dense LDLT check on biomass covariance matrix"); + emit("max_abs_asymmetry", max_abs_asymmetry, + "max absolute covariance asymmetry"); + emit("min_variance", min_diag, "minimum biomass variance"); + emit("max_variance", max_diag, "maximum biomass variance"); + emit("min_eigenvalue", min_eigenvalue, "self-adjoint eigenvalue diagnostic"); + emit("max_eigenvalue", max_eigenvalue, "self-adjoint eigenvalue diagnostic"); + emit("mean_nearest_neighbor_corr", mean_nearest_neighbor_corr, + "average Corr(B_t,B_tplus1)"); + emit("min_nearest_neighbor_corr", min_nearest_neighbor_corr, + "minimum Corr(B_t,B_tplus1)"); + emit("max_nearest_neighbor_corr", max_nearest_neighbor_corr, + "maximum Corr(B_t,B_tplus1)"); + emit("mean_lag2_corr", mean_lag2_corr, "average Corr(B_t,B_tplus2)"); + emit("mean_lag5_corr", mean_lag5_corr, "average Corr(B_t,B_tplus5)"); +} + +inline void write_biomass_covariance_matrix_csv( + const std::string &path, + const std::vector &data, + const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { + std::ofstream out(path); + + const std::size_t n = std::min(data.size(), u_hat.size()); + if (n == 0) { + out << "year\n"; + return; + } + + std::vector indices; + indices.reserve(n); + for (std::size_t i = 0; i < n; ++i) { + indices.push_back(static_cast(i)); + } + + const auto log_b_cov = + quadra::uncertainty::selected_inverse_submatrix_from_spd_hessian(h_uu, + indices); + + out << "year"; + for (std::size_t j = 0; j < n; ++j) { + out << ",B_year_" << data[j].year; + } + out << "\n"; + + for (std::size_t i = 0; i < n; ++i) { + out << data[i].year; + + const double b_i = std::exp(u_hat[i]); + + for (std::size_t j = 0; j < n; ++j) { + double cov_biomass = std::numeric_limits::quiet_NaN(); + + if (log_b_cov.success && + i < static_cast(log_b_cov.covariance.rows()) && + j < static_cast(log_b_cov.covariance.cols())) { + const double b_j = std::exp(u_hat[j]); + cov_biomass = b_i * b_j * + log_b_cov.covariance(static_cast(i), + static_cast(j)); + } + + out << "," << cov_biomass; + } + + out << "\n"; + } +} + +inline void write_biomass_correlation_matrix_csv( + const std::string &path, + const std::vector &data, + const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { + std::ofstream out(path); + + const std::size_t n = std::min(data.size(), u_hat.size()); + if (n == 0) { + out << "year\n"; + return; + } + + std::vector indices; + indices.reserve(n); + for (std::size_t i = 0; i < n; ++i) { + indices.push_back(static_cast(i)); + } + + const auto log_b_cov = + quadra::uncertainty::selected_inverse_submatrix_from_spd_hessian(h_uu, + indices); + + out << "year"; + for (std::size_t j = 0; j < n; ++j) { + out << ",B_year_" << data[j].year; + } + out << "\n"; + + for (std::size_t i = 0; i < n; ++i) { + out << data[i].year; + + for (std::size_t j = 0; j < n; ++j) { + double corr = std::numeric_limits::quiet_NaN(); + + if (log_b_cov.success && + i < static_cast(log_b_cov.covariance.rows()) && + j < static_cast(log_b_cov.covariance.cols())) { + const double vii = log_b_cov.covariance(static_cast(i), + static_cast(i)); + const double vjj = log_b_cov.covariance(static_cast(j), + static_cast(j)); + const double vij = log_b_cov.covariance(static_cast(i), + static_cast(j)); + + if (std::isfinite(vii) && std::isfinite(vjj) && std::isfinite(vij) && + vii > 0.0 && vjj > 0.0) { + corr = vij / std::sqrt(vii * vjj); + if (corr > 1.0 && corr < 1.0 + 1.0e-10) + corr = 1.0; + if (corr < -1.0 && corr > -1.0 - 1.0e-10) + corr = -1.0; + } + } + + out << "," << corr; + } + + out << "\n"; + } +} + +inline void write_biomass_correlation_decay_csv( + const std::string &path, + const std::vector &data, + const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { + std::ofstream out(path); + out << "lag,count,mean_correlation,min_correlation,max_correlation\n"; + + const Eigen::MatrixXd log_b_cov = + compute_log_b_covariance_submatrix(data, u_hat, h_uu); + const Eigen::MatrixXd biomass_cov = log_cov_to_biomass_cov(log_b_cov, u_hat); + const Eigen::MatrixXd biomass_corr = + quadra::uncertainty::covariance_to_correlation_matrix(biomass_cov); + + const Eigen::Index n = biomass_corr.rows(); + + for (Eigen::Index lag = 0; lag < n; ++lag) { + double sum = 0.0; + double min_corr = std::numeric_limits::infinity(); + double max_corr = -std::numeric_limits::infinity(); + int count = 0; + + for (Eigen::Index i = 0; i + lag < n; ++i) { + const double c = biomass_corr(i, i + lag); + if (std::isfinite(c)) { + sum += c; + min_corr = std::min(min_corr, c); + max_corr = std::max(max_corr, c); + ++count; + } + } + + const double mean_corr = count > 0 + ? sum / static_cast(count) + : std::numeric_limits::quiet_NaN(); + + out << lag << "," << count << "," << mean_corr << "," << min_corr << "," + << max_corr << "\n"; + } +} + +} // namespace opakapaka_example + +// Compatibility aliases for the current Opakapaka driver. +using opakapaka_example::compute_log_b_covariance_submatrix; +using opakapaka_example::covariance_to_correlation; +using opakapaka_example::log_cov_to_biomass_cov; +using opakapaka_example::write_biomass_correlation_decay_csv; +using opakapaka_example::write_biomass_correlation_matrix_csv; +using opakapaka_example::write_biomass_covariance_diagnostics_csv; +using opakapaka_example::write_biomass_covariance_matrix_csv; diff --git a/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_logq_diagnostics.hpp b/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_logq_diagnostics.hpp new file mode 100644 index 0000000..a3f6458 --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_logq_diagnostics.hpp @@ -0,0 +1,144 @@ +#pragma once + +#include "../quadra/opakapaka_model.hpp" + +#include "../../../../core/uncertainty/reporting.hpp" +#include "../../../../core/uncertainty/selected_inverse_diagonal.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +struct LogQUncertaintyReport { + double objective = std::numeric_limits::quiet_NaN(); + double fd_step = std::numeric_limits::quiet_NaN(); + double fd_gradient = std::numeric_limits::quiet_NaN(); + double fd_hessian = std::numeric_limits::quiet_NaN(); + double covariance_log_q = std::numeric_limits::quiet_NaN(); + double se_log_q = std::numeric_limits::quiet_NaN(); + double log_q = std::numeric_limits::quiet_NaN(); + double q = std::numeric_limits::quiet_NaN(); + double se_q = std::numeric_limits::quiet_NaN(); + double log_q_lwr_95 = std::numeric_limits::quiet_NaN(); + double log_q_upr_95 = std::numeric_limits::quiet_NaN(); + double q_lwr_95 = std::numeric_limits::quiet_NaN(); + double q_upr_95 = std::numeric_limits::quiet_NaN(); +}; + +template +LogQUncertaintyReport +compute_log_q_uncertainty_report(Model &model, quadra::ParameterVector ¶ms, + quadra::LaplaceOptions &opts, + const quadra::OptResult &fit) { + LogQUncertaintyReport out; + if (fit.par.size() != 1) + return out; + + const std::vector fixed_idx = {0}; + std::vector random_idx; + for (std::size_t i = 1; i < params.size(); ++i) { + random_idx.push_back(static_cast(i)); + } + + auto eval_at = [&](double theta) { + auto tmp = params; + tmp.params.at(0).value = theta; + Eigen::VectorXd x(1); + x[0] = theta; + had::ADGraph graph; + auto u_hat = quadra::solve_random_effects_laplace(model, tmp, x, fixed_idx, + random_idx, graph); + auto res = quadra::laplace_eval_at_u_star(model, tmp, fixed_idx, random_idx, + x, u_hat, graph, opts); + return res.value; + }; + + out.objective = fit.value; + out.log_q = fit.par.at(0); + out.q = std::exp(out.log_q); + out.fd_step = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(out.log_q))); + + const double fm = eval_at(out.log_q - out.fd_step); + const double fp = eval_at(out.log_q + out.fd_step); + if (!std::isfinite(fm) || !std::isfinite(fp) || !std::isfinite(out.objective)) + return out; + + out.fd_gradient = (fp - fm) / (2.0 * out.fd_step); + out.fd_hessian = + (fp - 2.0 * out.objective + fm) / (out.fd_step * out.fd_step); + + if (std::isfinite(out.fd_hessian) && out.fd_hessian > 0.0) { + out.covariance_log_q = 1.0 / out.fd_hessian; + out.se_log_q = std::sqrt(out.covariance_log_q); + out.se_q = out.q * out.se_log_q; + out.log_q_lwr_95 = out.log_q - 1.96 * out.se_log_q; + out.log_q_upr_95 = out.log_q + 1.96 * out.se_log_q; + out.q_lwr_95 = std::exp(out.log_q_lwr_95); + out.q_upr_95 = std::exp(out.log_q_upr_95); + } + return out; +} + +inline void write_uncertainty_summary_csv(const std::string &path, + const LogQUncertaintyReport &u) { + std::ofstream out(path); + out << "field,value\n"; + out << "objective," << u.objective << "\n"; + out << "fd_step," << u.fd_step << "\n"; + out << "fd_gradient_log_q," << u.fd_gradient << "\n"; + out << "fd_hessian_log_q," << u.fd_hessian << "\n"; + out << "covariance_log_q," << u.covariance_log_q << "\n"; + out << "se_log_q," << u.se_log_q << "\n"; + out << "se_q," << u.se_q << "\n"; + out << "hessian_positive," << (u.fd_hessian > 0.0 ? "yes" : "no") << "\n"; +} + +inline void write_covariance_matrix_csv(const std::string &path, + const LogQUncertaintyReport &u) { + std::ofstream out(path); + out << "row,col,value\n"; + out << "log_q,log_q," << u.covariance_log_q << "\n"; +} + +inline void write_correlation_matrix_csv(const std::string &path) { + std::ofstream out(path); + out << "row,col,value\n"; + out << "log_q,log_q,1\n"; +} + +inline void write_standard_errors_csv(const std::string &path, + const LogQUncertaintyReport &u) { + std::ofstream out(path); + out << "parameter,scale,estimate,se\n"; + out << "log_q,log," << u.log_q << "," << u.se_log_q << "\n"; + out << "q,natural," << u.q << "," << u.se_q << "\n"; +} + +inline void write_confidence_intervals_csv(const std::string &path, + const LogQUncertaintyReport &u) { + std::ofstream out(path); + out << "parameter,scale,estimate,se,lwr_95,upr_95\n"; + out << "log_q,log," << u.log_q << "," << u.se_log_q << "," << u.log_q_lwr_95 + << "," << u.log_q_upr_95 << "\n"; + out << "q,natural," << u.q << "," << u.se_q << "," << u.q_lwr_95 << "," + << u.q_upr_95 << "\n"; +} + +} // namespace opakapaka_example + +// Compatibility aliases for the current Opakapaka driver. +using opakapaka_example::compute_log_q_uncertainty_report; +using opakapaka_example::LogQUncertaintyReport; +using opakapaka_example::write_confidence_intervals_csv; +using opakapaka_example::write_correlation_matrix_csv; +using opakapaka_example::write_covariance_matrix_csv; +using opakapaka_example::write_standard_errors_csv; +using opakapaka_example::write_uncertainty_summary_csv; diff --git a/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_projection_uncertainty.hpp b/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_projection_uncertainty.hpp new file mode 100644 index 0000000..8bdcb85 --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_projection_uncertainty.hpp @@ -0,0 +1,180 @@ +#pragma once + +#include "../quadra/opakapaka_model.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +struct ProjectionEnvelopeRow { + std::string scenario; + int year = 0; + std::string quantity; + double estimate = std::numeric_limits::quiet_NaN(); + double mean = std::numeric_limits::quiet_NaN(); + double median = std::numeric_limits::quiet_NaN(); + double lwr_95 = std::numeric_limits::quiet_NaN(); + double upr_95 = std::numeric_limits::quiet_NaN(); + double se = std::numeric_limits::quiet_NaN(); + std::string note; +}; + +inline double opakapaka_quantile_sorted(const std::vector &sorted, + double p) { + if (sorted.empty()) + return std::numeric_limits::quiet_NaN(); + if (sorted.size() == 1) + return sorted.front(); + + const double x = p * static_cast(sorted.size() - 1); + const std::size_t lo = static_cast(std::floor(x)); + const std::size_t hi = std::min(lo + 1, sorted.size() - 1); + const double w = x - static_cast(lo); + return (1.0 - w) * sorted[lo] + w * sorted[hi]; +} + +inline ProjectionEnvelopeRow summarize_projection_samples( + const std::string &scenario, int year, const std::string &quantity, + double estimate, std::vector samples, const std::string ¬e) { + ProjectionEnvelopeRow row; + row.scenario = scenario; + row.year = year; + row.quantity = quantity; + row.estimate = estimate; + row.note = note; + + samples.erase(std::remove_if(samples.begin(), samples.end(), + [](double x) { return !std::isfinite(x); }), + samples.end()); + + if (samples.empty()) { + return row; + } + + const double sum = std::accumulate(samples.begin(), samples.end(), 0.0); + row.mean = sum / static_cast(samples.size()); + + double ss = 0.0; + if (samples.size() > 1) { + for (double x : samples) { + const double d = x - row.mean; + ss += d * d; + } + row.se = std::sqrt(ss / static_cast(samples.size() - 1)); + } else { + row.se = 0.0; + } + + std::sort(samples.begin(), samples.end()); + row.median = opakapaka_quantile_sorted(samples, 0.50); + row.lwr_95 = opakapaka_quantile_sorted(samples, 0.025); + row.upr_95 = opakapaka_quantile_sorted(samples, 0.975); + + return row; +} + +inline void write_projection_uncertainty_envelopes_csv( + const std::string &path, + const std::vector + &deterministic_projection, + const std::vector &fitted_log_b, double q_hat, + double terminal_log_b_variance, int n_samples = 1000, + unsigned seed = 8675309u) { + std::ofstream out(path); + out << "scenario,year,quantity,estimate,mean,median,lwr_95,upr_95,se,n_" + "samples,note\n"; + + if (deterministic_projection.empty() || fitted_log_b.empty() || + !std::isfinite(terminal_log_b_variance) || + terminal_log_b_variance < 0.0 || n_samples <= 1) { + for (const auto &r : deterministic_projection) { + out << r.scenario << "," << r.year << ",biomass," << r.biomass << ",,,,,," + << n_samples + << ",projection_envelope_unavailable_invalid_terminal_variance\n"; + out << r.scenario << "," << r.year << ",index," << r.index << ",,,,,," + << n_samples + << ",projection_envelope_unavailable_invalid_terminal_variance\n"; + } + return; + } + + const double terminal_log_b_hat = fitted_log_b.back(); + const double terminal_sd = std::sqrt(terminal_log_b_variance); + + // Infer projection dynamics from deterministic rows. This keeps the envelope + // writer independent of assessment-specific model internals: + // B_{t+1} = B_t + deterministic_increment_t + // where deterministic_increment_t is read from the point projection. + std::map> + by_scenario; + for (const auto &r : deterministic_projection) { + by_scenario[r.scenario].push_back(r); + } + + std::mt19937 rng(seed); + std::normal_distribution zdist(0.0, 1.0); + + for (auto &kv : by_scenario) { + auto &rows = kv.second; + std::sort(rows.begin(), rows.end(), + [](const auto &a, const auto &b) { return a.year < b.year; }); + + std::vector> biomass_samples(rows.size()); + std::vector> index_samples(rows.size()); + + for (int s = 0; s < n_samples; ++s) { + double sampled_b = + std::exp(terminal_log_b_hat + terminal_sd * zdist(rng)); + + for (std::size_t t = 0; t < rows.size(); ++t) { + const double previous_point_b = + (t == 0) ? std::exp(terminal_log_b_hat) : rows[t - 1].biomass; + const double deterministic_increment = + rows[t].biomass - previous_point_b; + + sampled_b = std::max(1.0e-12, sampled_b + deterministic_increment); + const double sampled_index = q_hat * sampled_b; + + biomass_samples[t].push_back(sampled_b); + index_samples[t].push_back(sampled_index); + } + } + + for (std::size_t t = 0; t < rows.size(); ++t) { + auto b_row = summarize_projection_samples( + rows[t].scenario, rows[t].year, "biomass", rows[t].biomass, + biomass_samples[t], + "terminal_state_parametric_envelope_selected_inverse_delta"); + auto i_row = summarize_projection_samples( + rows[t].scenario, rows[t].year, "index", rows[t].index, + index_samples[t], + "terminal_state_parametric_envelope_selected_inverse_delta"); + + auto emit = [&](const ProjectionEnvelopeRow &r) { + out << r.scenario << "," << r.year << "," << r.quantity << "," + << r.estimate << "," << r.mean << "," << r.median << "," << r.lwr_95 + << "," << r.upr_95 << "," << r.se << "," << n_samples << "," + << r.note << "\n"; + }; + + emit(b_row); + emit(i_row); + } + } +} + +} // namespace opakapaka_example + +// Compatibility aliases for the current Opakapaka driver. +using opakapaka_example::opakapaka_quantile_sorted; +using opakapaka_example::ProjectionEnvelopeRow; +using opakapaka_example::summarize_projection_samples; +using opakapaka_example::write_projection_uncertainty_envelopes_csv; diff --git a/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_random_effect_diagnostics.hpp b/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_random_effect_diagnostics.hpp new file mode 100644 index 0000000..9b97a83 --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/diagnostics/opakapaka_random_effect_diagnostics.hpp @@ -0,0 +1,97 @@ +#pragma once + +#include "../quadra/opakapaka_model.hpp" + +#include "../../../../core/uncertainty/reporting.hpp" +#include "../../../../core/uncertainty/selected_inverse_diagonal.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +inline void +write_random_effect_uncertainty_csv(const std::string &path, + const std::vector &u_hat, + const Eigen::SparseMatrix &h_uu) { + const auto diag = + quadra::uncertainty::selected_inverse_diagonal_from_spd_hessian(h_uu); + + std::ofstream out(path); + out << "effect,mode,conditional_se,conditional_variance,note\n"; + + for (std::size_t i = 0; i < u_hat.size(); ++i) { + double se = std::numeric_limits::quiet_NaN(); + double var = std::numeric_limits::quiet_NaN(); + std::string note = diag.message; + + if (diag.success && i < diag.standard_error.size() && + i < diag.variance.size()) { + se = diag.standard_error[i]; + var = diag.variance[i]; + note = "selected_inverse_diagonal"; + } + + out << "log_B[" << i << "]," << u_hat[i] << "," << se << "," << var << "," + << note << "\n"; + } +} + +template +Eigen::SparseMatrix compute_final_random_effect_hessian( + Model &model, quadra::ParameterVector ¶ms, + quadra::LaplaceOptions & /*opts*/, const quadra::OptResult &fit) { + // QUADRA_OPAKAPAKA_HUU_ADSCOPE_REPAIR_V1 + // + // LaplaceResult currently stores value/gradients only. For conditional + // random-effect SEs, rebuild the fitted AD vector, evaluate the model, + // propagate adjoints, discover the sparse Hessian pattern, and extract H_uu + // using Quadra's sparse Hessian extraction API. + + const std::size_t n_fixed = fit.par.size(); + const std::size_t n_random = fit.u_hat.size(); + const std::size_t n_total = n_fixed + n_random; + + std::vector random_idx; + random_idx.reserve(n_random); + for (std::size_t i = 0; i < n_random; ++i) { + random_idx.push_back(static_cast(n_fixed + i)); + } + + // QUADRA_OPAKAPAKA_HUU_CURRENT_API_REPAIR_V1 + had::ADGraph graph; + quadra::ADScope scope(graph); + + std::vector p_full; + p_full.reserve(n_total); + + for (std::size_t i = 0; i < n_fixed; ++i) { + p_full.emplace_back(quadra::AD(fit.par.at(i))); + } + for (std::size_t i = 0; i < n_random; ++i) { + p_full.emplace_back(quadra::AD(fit.u_hat.at(i))); + } + + quadra::AD nll = model(p_full); + scope.backward(nll); + + const auto &pattern = quadra::get_pattern(scope, p_full, random_idx); + auto h_uu = + quadra::extract_sparse_hessian(scope, p_full, random_idx, pattern); + + h_uu.makeCompressed(); + return h_uu; +} + +} // namespace opakapaka_example + +// Compatibility aliases for the current Opakapaka driver. +using opakapaka_example::compute_final_random_effect_hessian; +using opakapaka_example::write_random_effect_uncertainty_csv; diff --git a/examples/NMFS/pifsc_opakapaka/optimization/opakapaka_logq_optimization.hpp b/examples/NMFS/pifsc_opakapaka/optimization/opakapaka_logq_optimization.hpp new file mode 100644 index 0000000..5f36ed2 --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/optimization/opakapaka_logq_optimization.hpp @@ -0,0 +1,236 @@ +#pragma once + +#include "../quadra/opakapaka_model.hpp" + +#include "../../../../core/optimizer.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +template +void polish_single_logq_if_helpful(Model &model, + quadra::ParameterVector ¶ms, + quadra::LaplaceOptions &opts, + quadra::OptResult &fit) { + constexpr double OPAKAPAKA_POLISH_MIN_MEANINGFUL_STEP = 1.0e-8; + constexpr double OPAKAPAKA_POLISH_MIN_MEANINGFUL_DECREASE = 1.0e-10; + if (fit.par.size() != 1) { + return; + } + + const std::vector fixed_idx = {0}; + std::vector random_idx; + for (std::size_t i = 1; i < params.size(); ++i) { + random_idx.push_back(static_cast(i)); + } + + auto eval_at = [&](double theta, + std::vector *out_u_hat = nullptr) -> double { + auto tmp = params; + tmp.params.at(0).value = theta; + + Eigen::VectorXd x(1); + x[0] = theta; + + had::ADGraph graph; + auto u_hat = quadra::solve_random_effects_laplace(model, tmp, x, fixed_idx, + random_idx, graph); + + auto res = quadra::laplace_eval_at_u_star(model, tmp, fixed_idx, random_idx, + x, u_hat, graph, opts); + + if (out_u_hat != nullptr) { + *out_u_hat = u_hat; + } + + return res.value; + }; + + const double theta0 = fit.par.at(0); + const double f0 = fit.value; + const double h = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(theta0))); + + const double fm = eval_at(theta0 - h); + const double fp = eval_at(theta0 + h); + + if (!std::isfinite(fm) || !std::isfinite(fp) || !std::isfinite(f0)) { + return; + } + + const double g = (fp - fm) / (2.0 * h); + const double curv = (fp - 2.0 * f0 + fm) / (h * h); + + if (!std::isfinite(g) || !std::isfinite(curv) || curv <= 0.0) { + return; + } + + double step = -g / curv; + if (std::abs(step) < OPAKAPAKA_POLISH_MIN_MEANINGFUL_STEP) { + return; + } + const double max_step = 0.05; + if (step > max_step) + step = max_step; + if (step < -max_step) + step = -max_step; + + if (!std::isfinite(step) || std::abs(step) < 1.0e-12) { + return; + } + + std::vector polished_u_hat; + const double theta1 = theta0 + step; + const double f1 = eval_at(theta1, &polished_u_hat); + + if (!std::isfinite(f1) || f1 >= f0) { + std::cout << "Opakapaka log_q polish rejected: " << "step = " << step + << ", f0 = " << f0 << ", f1 = " << f1 << ", fd_grad = " << g + << ", fd_curvature = " << curv << "\n"; + return; + } + + const double h2 = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(theta1))); + const double fm2 = eval_at(theta1 - h2); + const double fp2 = eval_at(theta1 + h2); + double g2 = std::numeric_limits::quiet_NaN(); + if (std::isfinite(fm2) && std::isfinite(fp2)) { + g2 = (fp2 - fm2) / (2.0 * h2); + } + + fit.par.at(0) = theta1; + fit.u_hat = polished_u_hat; + fit.value = f1; + if (std::isfinite(g2)) { + fit.grad_norm = std::abs(g2); + } + fit.converged = true; + fit.message = "accepted safeguarded one-dimensional log_q polish after " + "line-search stall"; + + std::cout << "Opakapaka log_q polish accepted: " << "step = " << step + << ", objective = " << fit.value << ", fd_grad_before = " << g + << ", fd_curvature = " << curv << ", fd_grad_after = " << g2 + << "\n"; +} + +template +quadra::OptResult fit_log_q_fd_newton_fallback(Model &model, + quadra::ParameterVector ¶ms, + quadra::LaplaceOptions &opts, + double initial_log_q) { + const std::vector fixed_idx = {0}; + std::vector random_idx; + for (std::size_t i = 1; i < params.size(); ++i) { + random_idx.push_back(static_cast(i)); + } + + struct Eval { + double value = std::numeric_limits::infinity(); + std::vector u_hat; + }; + + auto eval_at = [&](double theta) -> Eval { + auto tmp = params; + tmp.params.at(0).value = theta; + + Eigen::VectorXd x(1); + x[0] = theta; + + had::ADGraph graph; + Eval out; + out.u_hat = quadra::solve_random_effects_laplace(model, tmp, x, fixed_idx, + random_idx, graph); + + auto res = quadra::laplace_eval_at_u_star(model, tmp, fixed_idx, random_idx, + x, out.u_hat, graph, opts); + + out.value = res.value; + return out; + }; + + double theta = initial_log_q; + Eval cur = eval_at(theta); + double grad = std::numeric_limits::infinity(); + double curv = std::numeric_limits::quiet_NaN(); + int iter = 0; + + for (; iter < 25; ++iter) { + const double h = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(theta))); + const Eval left = eval_at(theta - h); + const Eval right = eval_at(theta + h); + + if (!std::isfinite(left.value) || !std::isfinite(right.value) || + !std::isfinite(cur.value)) { + break; + } + + grad = (right.value - left.value) / (2.0 * h); + curv = (right.value - 2.0 * cur.value + left.value) / (h * h); + + if (std::abs(grad) < 1.0e-4) { + break; + } + if (!std::isfinite(curv) || curv <= 0.0) { + break; + } + + double step = -grad / curv; + step = std::max(-1.0, std::min(1.0, step)); + + bool accepted = false; + for (int bt = 0; bt < 20; ++bt) { + const double trial_theta = theta + step; + Eval trial = eval_at(trial_theta); + if (std::isfinite(trial.value) && trial.value <= cur.value) { + theta = trial_theta; + cur = std::move(trial); + accepted = true; + break; + } + step *= 0.5; + } + + if (!accepted || std::abs(step) < 1.0e-10) { + break; + } + } + + // One final centered derivative at the returned point. + { + const double h = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(theta))); + const Eval left = eval_at(theta - h); + const Eval right = eval_at(theta + h); + if (std::isfinite(left.value) && std::isfinite(right.value)) { + grad = (right.value - left.value) / (2.0 * h); + } + } + + params.params.at(0).value = theta; + + quadra::OptResult out; + out.par = std::vector{theta}; + out.value = cur.value; + out.grad_norm = std::abs(grad); + out.converged = std::abs(grad) < 1.0e-4; + out.iterations = iter; + out.message = out.converged ? "accepted local safeguarded one-dimensional " + "log_q fallback after LBFGS line-search stall" + : "local safeguarded one-dimensional log_q " + "fallback stopped before requested tolerance"; + out.u_hat = cur.u_hat; + return out; +} + +} // namespace opakapaka_example + +using opakapaka_example::fit_log_q_fd_newton_fallback; +using opakapaka_example::polish_single_logq_if_helpful; diff --git a/examples/NMFS/pifsc_opakapaka/quadra/drivers/opakapaka_driver_output.hpp b/examples/NMFS/pifsc_opakapaka/quadra/drivers/opakapaka_driver_output.hpp new file mode 100644 index 0000000..8913bec --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/quadra/drivers/opakapaka_driver_output.hpp @@ -0,0 +1,129 @@ +#pragma once + +#include "../opakapaka_model.hpp" + +#include "../../../../../core/optimizer.hpp" + +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +inline void print_opakapaka_banner() { + std::cout << "Synthetic opakapaka-style fit + projection example\n"; + std::cout << "==================================================\n\n"; + std::cout + << "Synthetic and public-data-safe. Not an official assessment.\n\n"; +} + +inline void print_opakapaka_fit_diagnostics( + const quadra::OptResult &fit, double fit_runtime_ms, + const std::string &convergence_status, + const std::string &primary_optimizer_name, bool fallback_used, + bool primary_optimizer_converged, double primary_optimizer_grad_norm, + const std::string &primary_optimizer_status) { + std::cout << "\nFit diagnostics\n"; + std::cout << "---------------\n"; + std::cout << std::fixed << std::setprecision(6); + std::cout << "objective " << fit.value << "\n"; + std::cout << "final_grad_norm " << fit.grad_norm << "\n"; + std::cout << "runtime_ms " << fit_runtime_ms << "\n"; + std::cout << "iterations " << fit.iterations << "\n"; + std::cout << "converged " + << ((fit.converged || fallback_used) ? "yes" : "no") << "\n"; + std::cout << "status " << convergence_status << "\n"; + std::cout << "primary_optimizer " << primary_optimizer_name << "\n"; + std::cout << "fallback_used " << (fallback_used ? "yes" : "no") << "\n"; + std::cout << "primary_converged " + << (primary_optimizer_converged ? "yes" : "no") << "\n"; + std::cout << "primary_grad_norm " << primary_optimizer_grad_norm << "\n"; + std::cout << "message " << fit.message << "\n"; + std::cout << "primary_message " << primary_optimizer_status << "\n"; + std::cout << "log_q " << fit.par.at(0) << "\n"; + std::cout << "q " << std::exp(fit.par.at(0)) << "\n"; + if (fit.par.size() > 1) { + std::cout << "log_r " << fit.par.at(1) << "\n"; + std::cout << "r " << std::exp(fit.par.at(1)) << "\n"; + } + if (fit.par.size() > 2) { + std::cout << "log_K " << fit.par.at(2) << "\n"; + std::cout << "K " << std::exp(fit.par.at(2)) << "\n"; + } +} + +inline void print_opakapaka_optimizer_structure(const quadra::OptResult &fit, + int final_hessian_nonzeros) { + const std::size_t reported_random_effects = + fit.u_hat.empty() + ? static_cast(fit.pattern.random_effect_count) + : fit.u_hat.size(); + + const bool pattern_available = + fit.pattern.available || fit.pattern.random_effect_count > 0 || + fit.pattern.nonzeros > 0 || final_hessian_nonzeros > 0; + + const std::string detected_structure = + fit.pattern.detected_structure.empty() || + fit.pattern.detected_structure == "unknown" + ? "sparse" + : fit.pattern.detected_structure; + + const std::string laplace_backend = + fit.pattern.backend.empty() || fit.pattern.backend == "unknown" + ? "final Huu reconstruction" + : fit.pattern.backend; + + const std::string random_solver = + fit.pattern.solver.empty() || fit.pattern.solver == "unknown" + ? "Laplace mode solve" + : fit.pattern.solver; + + std::cout << "\nOptimizer structure diagnostics\n"; + std::cout << "-------------------------------\n"; + std::cout << "random effects " << reported_random_effects << "\n"; + std::cout << "pattern available " << (pattern_available ? "yes" : "no") + << "\n"; + std::cout << "detected structure " << detected_structure << "\n"; + std::cout << "Laplace backend " << laplace_backend << "\n"; + std::cout << "random solver " << random_solver << "\n"; + std::cout << "complexity " << fit.pattern.complexity << "\n"; + std::cout << "bandwidth " << fit.pattern.bandwidth << "\n"; + std::cout << "Hessian nonzeros " << final_hessian_nonzeros << "\n"; +} + +inline void +print_opakapaka_projection_preview(const std::vector &projection, + std::size_t max_rows = 12) { + std::cout << "\nProjection preview\n"; + std::cout << "------------------\n"; + std::cout << "scenario,year,catch_mt,biomass,index\n"; + + std::size_t printed = 0; + for (const auto &row : projection) { + if (printed >= max_rows) { + break; + } + std::cout << row.scenario << "," << row.year << "," << row.catch_mt << "," + << row.biomass << "," << row.index << "\n"; + ++printed; + } +} + +inline void print_opakapaka_output_manifest() { + std::cout << "\nWrote outputs:\n"; + std::cout << " examples/NMFS/pifsc_opakapaka/outputs/" + "synthetic_fit_summary.csv\n"; + std::cout << " examples/NMFS/pifsc_opakapaka/outputs/" + "synthetic_projection_scenarios.csv\n"; +} + +} // namespace opakapaka_example + +using opakapaka_example::print_opakapaka_banner; +using opakapaka_example::print_opakapaka_fit_diagnostics; +using opakapaka_example::print_opakapaka_optimizer_structure; +using opakapaka_example::print_opakapaka_output_manifest; +using opakapaka_example::print_opakapaka_projection_preview; diff --git a/examples/NMFS/pifsc_opakapaka/quadra/opakapaka.cpp b/examples/NMFS/pifsc_opakapaka/quadra/opakapaka.cpp new file mode 100644 index 0000000..8523a02 --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/quadra/opakapaka.cpp @@ -0,0 +1,166 @@ +#include "../../../../core/uncertainty/reporting.hpp" +#include "../../../../core/uncertainty/selected_inverse_diagonal.hpp" +#include "../data/opakapaka_io.hpp" +#include "../diagnostics/opakapaka_biomass_covariance_diagnostics.hpp" +#include "../diagnostics/opakapaka_logq_diagnostics.hpp" +#include "../diagnostics/opakapaka_projection_uncertainty.hpp" +#include "../diagnostics/opakapaka_random_effect_diagnostics.hpp" +#include "../optimization/opakapaka_logq_optimization.hpp" +#include "../reports/opakapaka_report_suite.hpp" +#include "drivers/opakapaka_driver_output.hpp" +#include "opakapaka_model.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main() { + using namespace opakapaka_example; + + std::cout << "Synthetic opakapaka-style fit + projection example\n"; + std::cout << "==================================================\n\n"; + std::cout + << "Synthetic and public-data-safe. Not an official assessment.\n\n"; + + auto data = + read_opakapaka_history_csv("examples/NMFS/pifsc_opakapaka/data/" + "synthetic_opakapaka_projection_data.csv"); + + std::cout << "Loaded shared CSV fit rows: " << data.size() << "\n\n"; + + OpakapakaProjectionModel model(data); + auto params = model.initial_parameters(); + + quadra::LaplaceOptions opts = quadra::default_laplace_options(); + + // Public Quadra workflow: + // instantiate model -> optimize_lbfgs -> inspect fit -> project + const auto fit_start = std::chrono::steady_clock::now(); + quadra::OptResult fit; + bool primary_optimizer_converged = false; + bool fallback_used = false; + std::string primary_optimizer_name = "profiled scalar Laplace"; + std::string primary_optimizer_status = "not run"; + double primary_optimizer_grad_norm = std::numeric_limits::quiet_NaN(); + +#ifndef OPAKAPAKA_USE_LBFGS_PRIMARY + // Opakapaka has one fixed effect and twenty random effects. For this + // geometry, the safeguarded profiled scalar Laplace optimizer is the + // appropriate primary optimizer: it directly optimizes log_q while profiling + // over the random effects and avoids quasi-Newton line-search pathologies. + fit = quadra::optimize_lbfgs(model, params, opts); + + if (fit.converged) { + fit.message = "converged with L-BFGS optimizer"; + } + + primary_optimizer_converged = fit.converged; + primary_optimizer_status = fit.message; + primary_optimizer_grad_norm = fit.grad_norm; +#else + primary_optimizer_name = "L-BFGS"; + try { + fit = quadra::optimize_lbfgs(model, params, opts); + primary_optimizer_converged = fit.converged; + primary_optimizer_status = fit.message; + primary_optimizer_grad_norm = fit.grad_norm; + } catch (const std::runtime_error &e) { + const std::string msg = e.what(); + if (msg.find("line search") == std::string::npos && + msg.find("sufficiently decrease") == std::string::npos) { + throw; + } + + fallback_used = true; + primary_optimizer_converged = false; + primary_optimizer_status = msg; + + std::cout << "L-BFGS line-search stall detected in Opakapaka example. " + << "Using local safeguarded one-dimensional log_q fallback.\n"; + + fit = fit_log_q_fd_newton_fallback(model, params, opts, + params.params.at(0).value); + } +#endif + + const double fit_value_before_polish = fit.value; + const double fit_grad_before_polish = fit.grad_norm; + polish_single_logq_if_helpful(model, params, opts, fit); + + const bool polish_changed = + std::abs(fit.value - fit_value_before_polish) > 1.0e-10 || + std::abs(fit.grad_norm - fit_grad_before_polish) > 1.0e-10; + +#ifdef OPAKAPAKA_USE_LBFGS_PRIMARY + fallback_used = fallback_used || polish_changed; +#else + // In the default build, scalar optimization is primary. Optional scalar + // polishing is still part of that primary scalar workflow, not a fallback. + fallback_used = false; + primary_optimizer_converged = fit.converged; + primary_optimizer_status = fit.message; + primary_optimizer_grad_norm = fit.grad_norm; +#endif + + const std::string convergence_status = + primary_optimizer_converged && !fallback_used + ? "primary_optimizer_converged" + : (fallback_used ? "fallback_polished" : "not_converged"); + + { + std::ofstream state_out( + "examples/NMFS/pifsc_opakapaka/outputs/quadra_fitted_states.csv"); + + state_out << "index,log_B,B\n"; + + for (std::size_t i = 0; i < fit.u_hat.size(); ++i) { + state_out << i << "," << std::setprecision(15) << fit.u_hat[i] << "," + << std::setprecision(15) << std::exp(fit.u_hat[i]) << "\n"; + } + } + + const auto fit_stop = std::chrono::steady_clock::now(); + const double fit_runtime_ms = + std::chrono::duration(fit_stop - fit_start).count(); + + ProjectionOptions projection_options; + projection_options.start_year = data.back().year + 1; + projection_options.years = 10; + projection_options.scenarios = { + {"zero_catch", 0.0}, + {"status_quo", 1.0}, + {"low_catch", 0.75}, + {"high_catch", 1.25}, + }; + + auto projection = model.project(fit, projection_options); + + const Eigen::SparseMatrix Huu_final = + compute_final_random_effect_hessian(model, params, opts, fit); + const int final_hessian_nonzeros = static_cast(Huu_final.nonZeros()); + + print_opakapaka_fit_diagnostics( + fit, fit_runtime_ms, convergence_status, primary_optimizer_name, + fallback_used, primary_optimizer_converged, primary_optimizer_grad_norm, + primary_optimizer_status); + + print_opakapaka_optimizer_structure(fit, final_hessian_nonzeros); + print_opakapaka_projection_preview(projection); + + const auto final_h_uu = + compute_final_random_effect_hessian(model, params, opts, fit); + + write_opakapaka_report_suite(model, params, opts, fit, data, projection, + final_h_uu); + + print_opakapaka_output_manifest(); + + return 0; +} diff --git a/examples/NMFS/pifsc_opakapaka/quadra/opakapaka_model.hpp b/examples/NMFS/pifsc_opakapaka/quadra/opakapaka_model.hpp index e551691..40df744 100644 --- a/examples/NMFS/pifsc_opakapaka/quadra/opakapaka_model.hpp +++ b/examples/NMFS/pifsc_opakapaka/quadra/opakapaka_model.hpp @@ -99,6 +99,8 @@ class OpakapakaProjectionModel { // log_q is estimated to demonstrate optimize_lbfgs without adding a large // identifiability problem to the synthetic example. add_parameter(params, "log_q", std::log(0.0010), false); + add_parameter(params, "log_r", std::log(0.34), false); + add_parameter(params, "log_K", std::log(950.0), false); // Random effects: latent log-biomass by year. for (std::size_t i = 0; i < data_.size(); ++i) { @@ -114,23 +116,30 @@ class OpakapakaProjectionModel { const int n = static_cast(data_.size()); const T log_q = par[0]; + const T log_r = par[1]; + const T log_K = par[2]; + const T q = exp(log_q); + const T r = exp(log_r); + const T K = exp(log_K); - // Fixed biological parameters for readable example. - const T r = T(0.34); - const T K = T(950.0); const T sigma_process = T(0.10); const T sigma_index = T(0.08); T nll = T(0.0); + // Weak biological priors to stabilize the synthetic 3-fixed-effect fit. + // Weak biological priors to stabilize the synthetic 3-fixed-effect fit. + nll += T(0.5) * square((log_r - log(T(0.34))) / T(0.25)); + nll += T(0.5) * square((log_K - log(T(950.0))) / T(0.50)); + // Biomass state prior. const T log_B0_expected = log(T(0.82) * K); - const T log_B0 = par[1]; + const T log_B0 = par[3]; nll += T(0.5) * square((log_B0 - log_B0_expected) / T(0.15)); for (int t = 0; t < n; ++t) { - const T log_Bt = par[1 + t]; + const T log_Bt = par[3 + t]; const T Bt = exp(log_Bt); // Index likelihood. @@ -147,7 +156,7 @@ class OpakapakaProjectionModel { const T guarded_B_next_pred = sqrt(B_next_pred * B_next_pred + T(1.0e-8)); - const T log_B_next = par[1 + t + 1]; + const T log_B_next = par[3 + t + 1]; nll += T(0.5) * square((log_B_next - log(guarded_B_next_pred)) / sigma_process); } @@ -163,10 +172,12 @@ class OpakapakaProjectionModel { } const double log_q = fit.par.at(0); - const double q = std::exp(log_q); + const double log_r = fit.par.at(1); + const double log_K = fit.par.at(2); - const double r = 0.34; - const double K = 950.0; + const double q = std::exp(log_q); + const double r = std::exp(log_r); + const double K = std::exp(log_K); const double terminal_biomass = std::exp(fit.u_hat.back()); const double recent_catch = data_.back().catch_mt; diff --git a/examples/NMFS/pifsc_opakapaka/quadra/opakapaka_projection.cpp b/examples/NMFS/pifsc_opakapaka/quadra/opakapaka_projection.cpp deleted file mode 100644 index 21aa207..0000000 --- a/examples/NMFS/pifsc_opakapaka/quadra/opakapaka_projection.cpp +++ /dev/null @@ -1,1592 +0,0 @@ -#include "../../../../core/uncertainty/reporting.hpp" -#include "../../../../core/uncertainty/selected_inverse_diagonal.hpp" -#include "opakapaka_model.hpp" - -// QUADRA_OPAKAPAKA_USE_CORE_UNCERTAINTY_REPORTING_ROBUST_V2 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace { - -std::vector split_csv_line_simple(const std::string &line) { - std::vector fields; - std::stringstream ss(line); - std::string item; - while (std::getline(ss, item, ',')) { - fields.push_back(item); - } - return fields; -} - -bool finite_double_from_string(const std::string &x, double &out) { - try { - std::size_t pos = 0; - out = std::stod(x, &pos); - return pos > 0 && std::isfinite(out); - } catch (...) { - out = std::numeric_limits::quiet_NaN(); - return false; - } -} - -std::vector -read_opakapaka_history_csv(const std::string &path) { - std::ifstream in(path); - if (!in) { - throw std::runtime_error("Could not open Opakapaka CSV: " + path); - } - - std::string line; - if (!std::getline(in, line)) { - throw std::runtime_error("Opakapaka CSV is empty: " + path); - } - - const auto header = split_csv_line_simple(line); - int year_col = -1; - int phase_col = -1; - int catch_col = -1; - int index_col = -1; - - for (int i = 0; i < static_cast(header.size()); ++i) { - if (header[i] == "year") - year_col = i; - if (header[i] == "phase") - phase_col = i; - if (header[i] == "catch_mt") - catch_col = i; - if (header[i] == "index") - index_col = i; - } - - if (year_col < 0 || phase_col < 0 || catch_col < 0 || index_col < 0) { - throw std::runtime_error( - "Opakapaka CSV must contain year, phase, catch_mt, and index columns"); - } - - std::vector out; - - while (std::getline(in, line)) { - if (line.empty()) - continue; - const auto fields = split_csv_line_simple(line); - const int max_col = - std::max(std::max(year_col, phase_col), std::max(catch_col, index_col)); - if (static_cast(fields.size()) <= max_col) - continue; - - if (fields[phase_col] != "history") - continue; - - double year_d = 0.0; - double catch_mt = 0.0; - double index = 0.0; - - if (!finite_double_from_string(fields[year_col], year_d)) - continue; - if (!finite_double_from_string(fields[catch_col], catch_mt)) - continue; - if (!finite_double_from_string(fields[index_col], index)) - continue; - - opakapaka_example::Observation obs; - obs.year = static_cast(year_d); - obs.catch_mt = catch_mt; - obs.index = index; - out.push_back(obs); - } - - if (out.empty()) { - throw std::runtime_error( - "No usable historical rows found in Opakapaka CSV"); - } - - return out; -} - -} // namespace - -// QUADRA_OPAKAPAKA_LOGQ_POLISH_V1 -template -void polish_single_logq_if_helpful(Model &model, - quadra::ParameterVector ¶ms, - quadra::LaplaceOptions &opts, - quadra::OptResult &fit) { - constexpr double OPAKAPAKA_POLISH_MIN_MEANINGFUL_STEP = 1.0e-8; - constexpr double OPAKAPAKA_POLISH_MIN_MEANINGFUL_DECREASE = 1.0e-10; - if (fit.par.size() != 1) { - return; - } - - const std::vector fixed_idx = {0}; - std::vector random_idx; - for (std::size_t i = 1; i < params.size(); ++i) { - random_idx.push_back(static_cast(i)); - } - - auto eval_at = [&](double theta, - std::vector *out_u_hat = nullptr) -> double { - auto tmp = params; - tmp.params.at(0).value = theta; - - Eigen::VectorXd x(1); - x[0] = theta; - - had::ADGraph graph; - auto u_hat = quadra::solve_random_effects_laplace(model, tmp, x, fixed_idx, - random_idx, graph); - - auto res = quadra::laplace_eval_at_u_star(model, tmp, fixed_idx, random_idx, - x, u_hat, graph, opts); - - if (out_u_hat != nullptr) { - *out_u_hat = u_hat; - } - - return res.value; - }; - - const double theta0 = fit.par.at(0); - const double f0 = fit.value; - const double h = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(theta0))); - - const double fm = eval_at(theta0 - h); - const double fp = eval_at(theta0 + h); - - if (!std::isfinite(fm) || !std::isfinite(fp) || !std::isfinite(f0)) { - return; - } - - const double g = (fp - fm) / (2.0 * h); - const double curv = (fp - 2.0 * f0 + fm) / (h * h); - - if (!std::isfinite(g) || !std::isfinite(curv) || curv <= 0.0) { - return; - } - - double step = -g / curv; - if (std::abs(step) < OPAKAPAKA_POLISH_MIN_MEANINGFUL_STEP) { - return; - } - const double max_step = 0.05; - if (step > max_step) - step = max_step; - if (step < -max_step) - step = -max_step; - - if (!std::isfinite(step) || std::abs(step) < 1.0e-12) { - return; - } - - std::vector polished_u_hat; - const double theta1 = theta0 + step; - const double f1 = eval_at(theta1, &polished_u_hat); - - if (!std::isfinite(f1) || f1 >= f0) { - std::cout << "Opakapaka log_q polish rejected: " << "step = " << step - << ", f0 = " << f0 << ", f1 = " << f1 << ", fd_grad = " << g - << ", fd_curvature = " << curv << "\n"; - return; - } - - const double h2 = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(theta1))); - const double fm2 = eval_at(theta1 - h2); - const double fp2 = eval_at(theta1 + h2); - double g2 = std::numeric_limits::quiet_NaN(); - if (std::isfinite(fm2) && std::isfinite(fp2)) { - g2 = (fp2 - fm2) / (2.0 * h2); - } - - fit.par.at(0) = theta1; - fit.u_hat = polished_u_hat; - fit.value = f1; - if (std::isfinite(g2)) { - fit.grad_norm = std::abs(g2); - } - fit.converged = true; - fit.message = "accepted safeguarded one-dimensional log_q polish after " - "line-search stall"; - - std::cout << "Opakapaka log_q polish accepted: " << "step = " << step - << ", objective = " << fit.value << ", fd_grad_before = " << g - << ", fd_curvature = " << curv << ", fd_grad_after = " << g2 - << "\n"; -} - -// QUADRA_LEVEL1_UNCERTAINTY_REPORTING_V3 -struct LogQUncertaintyReport { - double objective = std::numeric_limits::quiet_NaN(); - double fd_step = std::numeric_limits::quiet_NaN(); - double fd_gradient = std::numeric_limits::quiet_NaN(); - double fd_hessian = std::numeric_limits::quiet_NaN(); - double covariance_log_q = std::numeric_limits::quiet_NaN(); - double se_log_q = std::numeric_limits::quiet_NaN(); - double log_q = std::numeric_limits::quiet_NaN(); - double q = std::numeric_limits::quiet_NaN(); - double se_q = std::numeric_limits::quiet_NaN(); - double log_q_lwr_95 = std::numeric_limits::quiet_NaN(); - double log_q_upr_95 = std::numeric_limits::quiet_NaN(); - double q_lwr_95 = std::numeric_limits::quiet_NaN(); - double q_upr_95 = std::numeric_limits::quiet_NaN(); -}; - -template -LogQUncertaintyReport -compute_log_q_uncertainty_report(Model &model, quadra::ParameterVector ¶ms, - quadra::LaplaceOptions &opts, - const quadra::OptResult &fit) { - LogQUncertaintyReport out; - if (fit.par.size() != 1) - return out; - - const std::vector fixed_idx = {0}; - std::vector random_idx; - for (std::size_t i = 1; i < params.size(); ++i) { - random_idx.push_back(static_cast(i)); - } - - auto eval_at = [&](double theta) { - auto tmp = params; - tmp.params.at(0).value = theta; - Eigen::VectorXd x(1); - x[0] = theta; - had::ADGraph graph; - auto u_hat = quadra::solve_random_effects_laplace(model, tmp, x, fixed_idx, - random_idx, graph); - auto res = quadra::laplace_eval_at_u_star(model, tmp, fixed_idx, random_idx, - x, u_hat, graph, opts); - return res.value; - }; - - out.objective = fit.value; - out.log_q = fit.par.at(0); - out.q = std::exp(out.log_q); - out.fd_step = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(out.log_q))); - - const double fm = eval_at(out.log_q - out.fd_step); - const double fp = eval_at(out.log_q + out.fd_step); - if (!std::isfinite(fm) || !std::isfinite(fp) || !std::isfinite(out.objective)) - return out; - - out.fd_gradient = (fp - fm) / (2.0 * out.fd_step); - out.fd_hessian = - (fp - 2.0 * out.objective + fm) / (out.fd_step * out.fd_step); - - if (std::isfinite(out.fd_hessian) && out.fd_hessian > 0.0) { - out.covariance_log_q = 1.0 / out.fd_hessian; - out.se_log_q = std::sqrt(out.covariance_log_q); - out.se_q = out.q * out.se_log_q; - out.log_q_lwr_95 = out.log_q - 1.96 * out.se_log_q; - out.log_q_upr_95 = out.log_q + 1.96 * out.se_log_q; - out.q_lwr_95 = std::exp(out.log_q_lwr_95); - out.q_upr_95 = std::exp(out.log_q_upr_95); - } - return out; -} - -inline void write_uncertainty_summary_csv(const std::string &path, - const LogQUncertaintyReport &u) { - std::ofstream out(path); - out << "field,value\n"; - out << "objective," << u.objective << "\n"; - out << "fd_step," << u.fd_step << "\n"; - out << "fd_gradient_log_q," << u.fd_gradient << "\n"; - out << "fd_hessian_log_q," << u.fd_hessian << "\n"; - out << "covariance_log_q," << u.covariance_log_q << "\n"; - out << "se_log_q," << u.se_log_q << "\n"; - out << "se_q," << u.se_q << "\n"; - out << "hessian_positive," << (u.fd_hessian > 0.0 ? "yes" : "no") << "\n"; -} - -inline void write_covariance_matrix_csv(const std::string &path, - const LogQUncertaintyReport &u) { - std::ofstream out(path); - out << "row,col,value\n"; - out << "log_q,log_q," << u.covariance_log_q << "\n"; -} - -inline void write_correlation_matrix_csv(const std::string &path) { - std::ofstream out(path); - out << "row,col,value\n"; - out << "log_q,log_q,1\n"; -} - -inline void write_standard_errors_csv(const std::string &path, - const LogQUncertaintyReport &u) { - std::ofstream out(path); - out << "parameter,scale,estimate,se\n"; - out << "log_q,log," << u.log_q << "," << u.se_log_q << "\n"; - out << "q,natural," << u.q << "," << u.se_q << "\n"; -} - -inline void write_confidence_intervals_csv(const std::string &path, - const LogQUncertaintyReport &u) { - std::ofstream out(path); - out << "parameter,scale,estimate,se,lwr_95,upr_95\n"; - out << "log_q,log," << u.log_q << "," << u.se_log_q << "," << u.log_q_lwr_95 - << "," << u.log_q_upr_95 << "\n"; - out << "q,natural," << u.q << "," << u.se_q << "," << u.q_lwr_95 << "," - << u.q_upr_95 << "\n"; -} - -inline void -write_random_effect_uncertainty_csv(const std::string &path, - const std::vector &u_hat) { - std::ofstream out(path); - out << "effect,mode,conditional_se,conditional_variance,note\n"; - for (std::size_t i = 0; i < u_hat.size(); ++i) { - out << "log_B[" << i << "]," << u_hat[i] - << ",,,pending selected-inverse/random-effect covariance extraction\n"; - } -} - -inline void write_derived_quantities_csv( - const std::string &path, - const std::vector &data, - const std::vector &u_hat, double q_hat) { - std::ofstream out(path); - out << "year,biomass,index_hat,depletion,F_proxy\n"; - const double b0 = u_hat.empty() ? std::numeric_limits::quiet_NaN() - : std::exp(u_hat.front()); - for (std::size_t i = 0; i < data.size() && i < u_hat.size(); ++i) { - const double biomass = std::exp(u_hat[i]); - const double depletion = - b0 > 0.0 ? biomass / b0 : std::numeric_limits::quiet_NaN(); - const double f_proxy = biomass > 0.0 - ? data[i].catch_mt / biomass - : std::numeric_limits::quiet_NaN(); - out << data[i].year << "," << biomass << "," << q_hat * biomass << "," - << depletion << "," << f_proxy << "\n"; - } -} - -inline void write_pending_quantity_uncertainty_csv( - const std::string &path, - const std::vector &data) { - std::ofstream out(path); - out << "year,quantity,estimate,se,lwr_95,upr_95,note\n"; - for (const auto &obs : data) { - out << obs.year << ",biomass,,,,,pending delta-method propagation\n"; - out << obs.year << ",depletion,,,,,pending delta-method propagation\n"; - out << obs.year << ",F_proxy,,,,,pending delta-method propagation\n"; - } -} - -inline void write_projection_uncertainty_csv( - const std::string &path, - const std::vector &rows) { - std::ofstream out(path); - out << "scenario,year,quantity,estimate,se,lwr_95,upr_95,note\n"; - for (const auto &row : rows) { - out << row.scenario << "," << row.year << ",biomass," << row.biomass - << ",,,,pending projection covariance/simulation envelope\n"; - out << row.scenario << "," << row.year << ",index," << row.index - << ",,,,pending projection covariance/simulation envelope\n"; - } -} - -inline void write_runtime_memory_summary_csv(const std::string &path, - double runtime_ms, - std::size_t random_effects, - std::size_t hessian_nonzeros) { - std::ofstream out(path); - out << "field,value\n"; - out << "fit_runtime_ms," << runtime_ms << "\n"; - out << "random_effects," << random_effects << "\n"; - out << "hessian_nonzeros," << hessian_nonzeros << "\n"; - out << "peak_rss_mb,\n"; - out << "note,peak RSS is captured by benchmark runner rather than model " - "executable\n"; -} - -// QUADRA_OPAKAPAKA_LOCAL_LOGQ_FALLBACK_V1 -template -quadra::OptResult fit_log_q_fd_newton_fallback(Model &model, - quadra::ParameterVector ¶ms, - quadra::LaplaceOptions &opts, - double initial_log_q) { - const std::vector fixed_idx = {0}; - std::vector random_idx; - for (std::size_t i = 1; i < params.size(); ++i) { - random_idx.push_back(static_cast(i)); - } - - struct Eval { - double value = std::numeric_limits::infinity(); - std::vector u_hat; - }; - - auto eval_at = [&](double theta) -> Eval { - auto tmp = params; - tmp.params.at(0).value = theta; - - Eigen::VectorXd x(1); - x[0] = theta; - - had::ADGraph graph; - Eval out; - out.u_hat = quadra::solve_random_effects_laplace(model, tmp, x, fixed_idx, - random_idx, graph); - - auto res = quadra::laplace_eval_at_u_star(model, tmp, fixed_idx, random_idx, - x, out.u_hat, graph, opts); - - out.value = res.value; - return out; - }; - - double theta = initial_log_q; - Eval cur = eval_at(theta); - double grad = std::numeric_limits::infinity(); - double curv = std::numeric_limits::quiet_NaN(); - int iter = 0; - - for (; iter < 25; ++iter) { - const double h = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(theta))); - const Eval left = eval_at(theta - h); - const Eval right = eval_at(theta + h); - - if (!std::isfinite(left.value) || !std::isfinite(right.value) || - !std::isfinite(cur.value)) { - break; - } - - grad = (right.value - left.value) / (2.0 * h); - curv = (right.value - 2.0 * cur.value + left.value) / (h * h); - - if (std::abs(grad) < 1.0e-4) { - break; - } - if (!std::isfinite(curv) || curv <= 0.0) { - break; - } - - double step = -grad / curv; - step = std::max(-1.0, std::min(1.0, step)); - - bool accepted = false; - for (int bt = 0; bt < 20; ++bt) { - const double trial_theta = theta + step; - Eval trial = eval_at(trial_theta); - if (std::isfinite(trial.value) && trial.value <= cur.value) { - theta = trial_theta; - cur = std::move(trial); - accepted = true; - break; - } - step *= 0.5; - } - - if (!accepted || std::abs(step) < 1.0e-10) { - break; - } - } - - // One final centered derivative at the returned point. - { - const double h = std::max(1.0e-5, 1.0e-4 * (1.0 + std::abs(theta))); - const Eval left = eval_at(theta - h); - const Eval right = eval_at(theta + h); - if (std::isfinite(left.value) && std::isfinite(right.value)) { - grad = (right.value - left.value) / (2.0 * h); - } - } - - params.params.at(0).value = theta; - - quadra::OptResult out; - out.par = std::vector{theta}; - out.value = cur.value; - out.grad_norm = std::abs(grad); - out.converged = std::abs(grad) < 1.0e-4; - out.iterations = iter; - out.message = out.converged ? "accepted local safeguarded one-dimensional " - "log_q fallback after LBFGS line-search stall" - : "local safeguarded one-dimensional log_q " - "fallback stopped before requested tolerance"; - out.u_hat = cur.u_hat; - return out; -} - -// QUADRA_OPAKAPAKA_RANDOM_EFFECT_SELECTED_INVERSE_V1 -template -Eigen::SparseMatrix compute_final_random_effect_hessian( - Model &model, quadra::ParameterVector ¶ms, - quadra::LaplaceOptions & /*opts*/, const quadra::OptResult &fit) { - // QUADRA_OPAKAPAKA_HUU_ADSCOPE_REPAIR_V1 - // - // LaplaceResult currently stores value/gradients only. For conditional - // random-effect SEs, rebuild the fitted AD vector, evaluate the model, - // propagate adjoints, discover the sparse Hessian pattern, and extract H_uu - // using Quadra's sparse Hessian extraction API. - - const std::size_t n_fixed = fit.par.size(); - const std::size_t n_random = fit.u_hat.size(); - const std::size_t n_total = n_fixed + n_random; - - std::vector random_idx; - random_idx.reserve(n_random); - for (std::size_t i = 0; i < n_random; ++i) { - random_idx.push_back(static_cast(n_fixed + i)); - } - - // QUADRA_OPAKAPAKA_HUU_CURRENT_API_REPAIR_V1 - had::ADGraph graph; - quadra::ADScope scope(graph); - - std::vector p_full; - p_full.reserve(n_total); - - for (std::size_t i = 0; i < n_fixed; ++i) { - p_full.emplace_back(quadra::AD(fit.par.at(i))); - } - for (std::size_t i = 0; i < n_random; ++i) { - p_full.emplace_back(quadra::AD(fit.u_hat.at(i))); - } - - quadra::AD nll = model(p_full); - scope.backward(nll); - - const auto &pattern = quadra::get_pattern(scope, p_full, random_idx); - auto h_uu = - quadra::extract_sparse_hessian(scope, p_full, random_idx, pattern); - - h_uu.makeCompressed(); - return h_uu; -} - -inline void -write_random_effect_uncertainty_csv(const std::string &path, - const std::vector &u_hat, - const Eigen::SparseMatrix &h_uu) { - const auto diag = - quadra::uncertainty::selected_inverse_diagonal_from_spd_hessian(h_uu); - - std::ofstream out(path); - out << "effect,mode,conditional_se,conditional_variance,note\n"; - - for (std::size_t i = 0; i < u_hat.size(); ++i) { - double se = std::numeric_limits::quiet_NaN(); - double var = std::numeric_limits::quiet_NaN(); - std::string note = diag.message; - - if (diag.success && i < diag.standard_error.size() && - i < diag.variance.size()) { - se = diag.standard_error[i]; - var = diag.variance[i]; - note = "selected_inverse_diagonal"; - } - - out << "log_B[" << i << "]," << u_hat[i] << "," << se << "," << var << "," - << note << "\n"; - } -} - -// QUADRA_OPAKAPAKA_DERIVED_QUANTITY_UNCERTAINTY_V1 -inline void write_derived_quantity_uncertainty_csv( - const std::string &path, - const std::vector &data, - const std::vector &u_hat, double q_hat, - const quadra::uncertainty::SelectedInverseDiagonalResult &u_cov, - const Eigen::SparseMatrix &h_uu) { - std::ofstream out(path); - out << "year,quantity,estimate,se,lwr_95,upr_95,note\n"; - - if (u_hat.empty() || data.empty()) { - return; - } - - const double b0 = std::exp(u_hat.front()); - const double var_log_b0 = (u_cov.success && !u_cov.variance.empty()) - ? u_cov.variance.front() - : std::numeric_limits::quiet_NaN(); - - // QUADRA_OPAKAPAKA_DEPLETION_COVARIANCE_PAIRS_V1 - // Request Cov(log_B[t], log_B[0]) so depletion uncertainty uses: - // Var(log(B_t/B_0)) = Var(log_B_t) + Var(log_B_0) - 2 Cov(log_B_t, log_B_0). - std::vector> depletion_covariance_pairs; - depletion_covariance_pairs.reserve(u_hat.size()); - for (std::size_t i = 0; i < u_hat.size(); ++i) { - depletion_covariance_pairs.emplace_back(static_cast(i), 0); - } - - const auto depletion_covariances = - quadra::uncertainty::selected_inverse_entries_from_spd_hessian( - h_uu, depletion_covariance_pairs); - - for (std::size_t i = 0; i < data.size() && i < u_hat.size(); ++i) { - const double log_b = u_hat[i]; - const double biomass = std::exp(log_b); - const double index_hat = q_hat * biomass; - const double depletion = - b0 > 0.0 ? biomass / b0 : std::numeric_limits::quiet_NaN(); - const double f_proxy = biomass > 0.0 - ? data[i].catch_mt / biomass - : std::numeric_limits::quiet_NaN(); - - const double var_log_b = (u_cov.success && i < u_cov.variance.size()) - ? u_cov.variance[i] - : std::numeric_limits::quiet_NaN(); - - const double se_biomass = (std::isfinite(var_log_b) && var_log_b >= 0.0) - ? biomass * std::sqrt(var_log_b) - : std::numeric_limits::quiet_NaN(); - - const double se_index = (std::isfinite(var_log_b) && var_log_b >= 0.0) - ? index_hat * std::sqrt(var_log_b) - : std::numeric_limits::quiet_NaN(); - - double cov_log_b_i_b0 = std::numeric_limits::quiet_NaN(); - if (depletion_covariances.success && - i < depletion_covariances.entries.size()) { - cov_log_b_i_b0 = depletion_covariances.entries[i].covariance; - } - - const double var_log_depletion = - (std::isfinite(var_log_b) && std::isfinite(var_log_b0) && - std::isfinite(cov_log_b_i_b0)) - ? var_log_b + var_log_b0 - 2.0 * cov_log_b_i_b0 - : std::numeric_limits::quiet_NaN(); - - const double se_depletion = - (std::isfinite(var_log_depletion) && var_log_depletion >= 0.0) - ? depletion * std::sqrt(var_log_depletion) - : std::numeric_limits::quiet_NaN(); - - const double se_f_proxy = (std::isfinite(var_log_b) && var_log_b >= 0.0) - ? f_proxy * std::sqrt(var_log_b) - : std::numeric_limits::quiet_NaN(); - - auto write_row = [&](const char *quantity, double estimate, double se, - const char *note) { - const double lwr = std::isfinite(se) - ? estimate - 1.96 * se - : std::numeric_limits::quiet_NaN(); - const double upr = std::isfinite(se) - ? estimate + 1.96 * se - : std::numeric_limits::quiet_NaN(); - out << data[i].year << "," << quantity << "," << estimate << "," << se - << "," << lwr << "," << upr << "," << note << "\n"; - }; - - write_row("biomass", biomass, se_biomass, - "level1_delta_method_conditional_random_effect_diagonal"); - write_row("index_hat", index_hat, se_index, - "level1_delta_method_conditional_random_effect_diagonal"); - write_row("depletion", depletion, se_depletion, - "level1_delta_method_selected_inverse_cov_logBt_logB0"); - write_row("F_proxy", f_proxy, se_f_proxy, - "level1_delta_method_conditional_random_effect_diagonal"); - } -} - -// QUADRA_OPAKAPAKA_DERIVED_QUANTITY_CORRELATION_V1 -inline void write_derived_quantity_correlation_csv( - const std::string &path, - const std::vector &data, - const quadra::uncertainty::SelectedInverseDiagonalResult &u_cov, - const quadra::uncertainty::SelectedInverseEntriesResult - &depletion_covariances) { - std::ofstream out(path); - out << "year,variance_logB0,variance_logBt,covariance_logBt_logB0," - << "correlation_logBt_logB0,note\n"; - - const double var_log_b0 = (u_cov.success && !u_cov.variance.empty()) - ? u_cov.variance.front() - : std::numeric_limits::quiet_NaN(); - - const std::size_t n = std::min(data.size(), u_cov.variance.size()); - - for (std::size_t i = 0; i < n; ++i) { - const double var_log_bt = u_cov.variance[i]; - - double cov_log_bt_b0 = std::numeric_limits::quiet_NaN(); - if (depletion_covariances.success && - i < depletion_covariances.entries.size()) { - cov_log_bt_b0 = depletion_covariances.entries[i].covariance; - } - - double corr = std::numeric_limits::quiet_NaN(); - if (std::isfinite(var_log_b0) && std::isfinite(var_log_bt) && - std::isfinite(cov_log_bt_b0) && var_log_b0 > 0.0 && var_log_bt > 0.0) { - corr = cov_log_bt_b0 / std::sqrt(var_log_b0 * var_log_bt); - - // Guard tiny numerical drift outside [-1, 1]. - if (corr > 1.0 && corr < 1.0 + 1.0e-10) - corr = 1.0; - if (corr < -1.0 && corr > -1.0 - 1.0e-10) - corr = -1.0; - } - - out << data[i].year << "," << var_log_b0 << "," << var_log_bt << "," - << cov_log_bt_b0 << "," << corr << "," - << "selected_inverse_covariance_diagnostic_logBt_logB0\n"; - } -} - -// QUADRA_OPAKAPAKA_BIOMASS_COVARIANCE_MATRIX_V1 -inline void write_biomass_covariance_matrix_csv( - const std::string &path, - const std::vector &data, - const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { - std::ofstream out(path); - - const std::size_t n = std::min(data.size(), u_hat.size()); - if (n == 0) { - out << "year\n"; - return; - } - - std::vector indices; - indices.reserve(n); - for (std::size_t i = 0; i < n; ++i) { - indices.push_back(static_cast(i)); - } - - const auto log_b_cov = - quadra::uncertainty::selected_inverse_submatrix_from_spd_hessian(h_uu, - indices); - - out << "year"; - for (std::size_t j = 0; j < n; ++j) { - out << ",B_year_" << data[j].year; - } - out << "\n"; - - for (std::size_t i = 0; i < n; ++i) { - out << data[i].year; - - const double b_i = std::exp(u_hat[i]); - - for (std::size_t j = 0; j < n; ++j) { - double cov_biomass = std::numeric_limits::quiet_NaN(); - - if (log_b_cov.success && - i < static_cast(log_b_cov.covariance.rows()) && - j < static_cast(log_b_cov.covariance.cols())) { - const double b_j = std::exp(u_hat[j]); - cov_biomass = b_i * b_j * - log_b_cov.covariance(static_cast(i), - static_cast(j)); - } - - out << "," << cov_biomass; - } - - out << "\n"; - } -} - -inline void write_biomass_correlation_matrix_csv( - const std::string &path, - const std::vector &data, - const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { - std::ofstream out(path); - - const std::size_t n = std::min(data.size(), u_hat.size()); - if (n == 0) { - out << "year\n"; - return; - } - - std::vector indices; - indices.reserve(n); - for (std::size_t i = 0; i < n; ++i) { - indices.push_back(static_cast(i)); - } - - const auto log_b_cov = - quadra::uncertainty::selected_inverse_submatrix_from_spd_hessian(h_uu, - indices); - - out << "year"; - for (std::size_t j = 0; j < n; ++j) { - out << ",B_year_" << data[j].year; - } - out << "\n"; - - for (std::size_t i = 0; i < n; ++i) { - out << data[i].year; - - for (std::size_t j = 0; j < n; ++j) { - double corr = std::numeric_limits::quiet_NaN(); - - if (log_b_cov.success && - i < static_cast(log_b_cov.covariance.rows()) && - j < static_cast(log_b_cov.covariance.cols())) { - const double vii = log_b_cov.covariance(static_cast(i), - static_cast(i)); - const double vjj = log_b_cov.covariance(static_cast(j), - static_cast(j)); - const double vij = log_b_cov.covariance(static_cast(i), - static_cast(j)); - - if (std::isfinite(vii) && std::isfinite(vjj) && std::isfinite(vij) && - vii > 0.0 && vjj > 0.0) { - corr = vij / std::sqrt(vii * vjj); - if (corr > 1.0 && corr < 1.0 + 1.0e-10) - corr = 1.0; - if (corr < -1.0 && corr > -1.0 - 1.0e-10) - corr = -1.0; - } - } - - out << "," << corr; - } - - out << "\n"; - } -} - -// QUADRA_OPAKAPAKA_PROJECTION_UNCERTAINTY_ENVELOPES_V1 -struct ProjectionEnvelopeRow { - std::string scenario; - int year = 0; - std::string quantity; - double estimate = std::numeric_limits::quiet_NaN(); - double mean = std::numeric_limits::quiet_NaN(); - double median = std::numeric_limits::quiet_NaN(); - double lwr_95 = std::numeric_limits::quiet_NaN(); - double upr_95 = std::numeric_limits::quiet_NaN(); - double se = std::numeric_limits::quiet_NaN(); - std::string note; -}; - -inline double opakapaka_quantile_sorted(const std::vector &sorted, - double p) { - if (sorted.empty()) - return std::numeric_limits::quiet_NaN(); - if (sorted.size() == 1) - return sorted.front(); - - const double x = p * static_cast(sorted.size() - 1); - const std::size_t lo = static_cast(std::floor(x)); - const std::size_t hi = std::min(lo + 1, sorted.size() - 1); - const double w = x - static_cast(lo); - return (1.0 - w) * sorted[lo] + w * sorted[hi]; -} - -inline ProjectionEnvelopeRow summarize_projection_samples( - const std::string &scenario, int year, const std::string &quantity, - double estimate, std::vector samples, const std::string ¬e) { - ProjectionEnvelopeRow row; - row.scenario = scenario; - row.year = year; - row.quantity = quantity; - row.estimate = estimate; - row.note = note; - - samples.erase(std::remove_if(samples.begin(), samples.end(), - [](double x) { return !std::isfinite(x); }), - samples.end()); - - if (samples.empty()) { - return row; - } - - const double sum = std::accumulate(samples.begin(), samples.end(), 0.0); - row.mean = sum / static_cast(samples.size()); - - double ss = 0.0; - if (samples.size() > 1) { - for (double x : samples) { - const double d = x - row.mean; - ss += d * d; - } - row.se = std::sqrt(ss / static_cast(samples.size() - 1)); - } else { - row.se = 0.0; - } - - std::sort(samples.begin(), samples.end()); - row.median = opakapaka_quantile_sorted(samples, 0.50); - row.lwr_95 = opakapaka_quantile_sorted(samples, 0.025); - row.upr_95 = opakapaka_quantile_sorted(samples, 0.975); - - return row; -} - -inline void write_projection_uncertainty_envelopes_csv( - const std::string &path, - const std::vector - &deterministic_projection, - const std::vector &fitted_log_b, double q_hat, - double terminal_log_b_variance, int n_samples = 1000, - unsigned seed = 8675309u) { - std::ofstream out(path); - out << "scenario,year,quantity,estimate,mean,median,lwr_95,upr_95,se,n_" - "samples,note\n"; - - if (deterministic_projection.empty() || fitted_log_b.empty() || - !std::isfinite(terminal_log_b_variance) || - terminal_log_b_variance < 0.0 || n_samples <= 1) { - for (const auto &r : deterministic_projection) { - out << r.scenario << "," << r.year << ",biomass," << r.biomass << ",,,,,," - << n_samples - << ",projection_envelope_unavailable_invalid_terminal_variance\n"; - out << r.scenario << "," << r.year << ",index," << r.index << ",,,,,," - << n_samples - << ",projection_envelope_unavailable_invalid_terminal_variance\n"; - } - return; - } - - const double terminal_log_b_hat = fitted_log_b.back(); - const double terminal_sd = std::sqrt(terminal_log_b_variance); - - // Infer projection dynamics from deterministic rows. This keeps the envelope - // writer independent of assessment-specific model internals: - // B_{t+1} = B_t + deterministic_increment_t - // where deterministic_increment_t is read from the point projection. - std::map> - by_scenario; - for (const auto &r : deterministic_projection) { - by_scenario[r.scenario].push_back(r); - } - - std::mt19937 rng(seed); - std::normal_distribution zdist(0.0, 1.0); - - for (auto &kv : by_scenario) { - auto &rows = kv.second; - std::sort(rows.begin(), rows.end(), - [](const auto &a, const auto &b) { return a.year < b.year; }); - - std::vector> biomass_samples(rows.size()); - std::vector> index_samples(rows.size()); - - for (int s = 0; s < n_samples; ++s) { - double sampled_b = - std::exp(terminal_log_b_hat + terminal_sd * zdist(rng)); - - for (std::size_t t = 0; t < rows.size(); ++t) { - const double previous_point_b = - (t == 0) ? std::exp(terminal_log_b_hat) : rows[t - 1].biomass; - const double deterministic_increment = - rows[t].biomass - previous_point_b; - - sampled_b = std::max(1.0e-12, sampled_b + deterministic_increment); - const double sampled_index = q_hat * sampled_b; - - biomass_samples[t].push_back(sampled_b); - index_samples[t].push_back(sampled_index); - } - } - - for (std::size_t t = 0; t < rows.size(); ++t) { - auto b_row = summarize_projection_samples( - rows[t].scenario, rows[t].year, "biomass", rows[t].biomass, - biomass_samples[t], - "terminal_state_parametric_envelope_selected_inverse_delta"); - auto i_row = summarize_projection_samples( - rows[t].scenario, rows[t].year, "index", rows[t].index, - index_samples[t], - "terminal_state_parametric_envelope_selected_inverse_delta"); - - auto emit = [&](const ProjectionEnvelopeRow &r) { - out << r.scenario << "," << r.year << "," << r.quantity << "," - << r.estimate << "," << r.mean << "," << r.median << "," << r.lwr_95 - << "," << r.upr_95 << "," << r.se << "," << n_samples << "," - << r.note << "\n"; - }; - - emit(b_row); - emit(i_row); - } - } -} - -// QUADRA_OPAKAPAKA_BIOMASS_COVARIANCE_DIAGNOSTICS_V1 -inline Eigen::MatrixXd compute_log_b_covariance_submatrix( - const std::vector &data, - const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { - const std::size_t n = std::min(data.size(), u_hat.size()); - if (n == 0) { - return Eigen::MatrixXd(); - } - - std::vector indices; - indices.reserve(n); - for (std::size_t i = 0; i < n; ++i) { - indices.push_back(static_cast(i)); - } - - const auto log_b_cov = - quadra::uncertainty::selected_inverse_submatrix_from_spd_hessian(h_uu, - indices); - - if (!log_b_cov.success) { - return Eigen::MatrixXd::Constant(static_cast(n), - static_cast(n), - std::numeric_limits::quiet_NaN()); - } - - return log_b_cov.covariance; -} - -inline Eigen::MatrixXd -log_cov_to_biomass_cov(const Eigen::MatrixXd &log_b_cov, - const std::vector &u_hat) { - const Eigen::Index n = log_b_cov.rows(); - Eigen::MatrixXd biomass_cov = - Eigen::MatrixXd::Constant(n, n, std::numeric_limits::quiet_NaN()); - - for (Eigen::Index i = 0; i < n; ++i) { - const double b_i = std::exp(u_hat[static_cast(i)]); - for (Eigen::Index j = 0; j < n; ++j) { - const double b_j = std::exp(u_hat[static_cast(j)]); - biomass_cov(i, j) = b_i * b_j * log_b_cov(i, j); - } - } - - return biomass_cov; -} - -inline Eigen::MatrixXd covariance_to_correlation(const Eigen::MatrixXd &cov) { - const Eigen::Index n = cov.rows(); - Eigen::MatrixXd corr = - Eigen::MatrixXd::Constant(n, n, std::numeric_limits::quiet_NaN()); - - for (Eigen::Index i = 0; i < n; ++i) { - for (Eigen::Index j = 0; j < n; ++j) { - const double vii = cov(i, i); - const double vjj = cov(j, j); - const double vij = cov(i, j); - - if (std::isfinite(vii) && std::isfinite(vjj) && std::isfinite(vij) && - vii > 0.0 && vjj > 0.0) { - double c = vij / std::sqrt(vii * vjj); - if (c > 1.0 && c < 1.0 + 1.0e-10) - c = 1.0; - if (c < -1.0 && c > -1.0 - 1.0e-10) - c = -1.0; - corr(i, j) = c; - } - } - } - - return corr; -} - -inline void write_biomass_covariance_diagnostics_csv( - const std::string &path, - const std::vector &data, - const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { - std::ofstream out(path); - out << "metric,value,note\n"; - - const Eigen::MatrixXd log_b_cov = - compute_log_b_covariance_submatrix(data, u_hat, h_uu); - const Eigen::MatrixXd biomass_cov = log_cov_to_biomass_cov(log_b_cov, u_hat); - const Eigen::MatrixXd biomass_corr = - quadra::uncertainty::covariance_to_correlation_matrix(biomass_cov); - - const Eigen::Index n = biomass_cov.rows(); - - bool finite_all = true; - bool positive_diag = true; - double min_diag = std::numeric_limits::infinity(); - double max_diag = -std::numeric_limits::infinity(); - - for (Eigen::Index i = 0; i < n; ++i) { - const double v = biomass_cov(i, i); - if (!std::isfinite(v)) - finite_all = false; - if (!(v > 0.0)) - positive_diag = false; - if (std::isfinite(v)) { - min_diag = std::min(min_diag, v); - max_diag = std::max(max_diag, v); - } - - for (Eigen::Index j = 0; j < n; ++j) { - if (!std::isfinite(biomass_cov(i, j))) - finite_all = false; - } - } - - double max_abs_asymmetry = 0.0; - if (n > 0) { - max_abs_asymmetry = - (biomass_cov - biomass_cov.transpose()).cwiseAbs().maxCoeff(); - } - - bool ldlt_success = false; - double min_eigenvalue = std::numeric_limits::quiet_NaN(); - double max_eigenvalue = std::numeric_limits::quiet_NaN(); - - if (n > 0 && finite_all) { - Eigen::LDLT ldlt(biomass_cov); - ldlt_success = (ldlt.info() == Eigen::Success && - (ldlt.vectorD().array() > -1.0e-10).all()); - - Eigen::SelfAdjointEigenSolver eig( - 0.5 * (biomass_cov + biomass_cov.transpose())); - if (eig.info() == Eigen::Success) { - min_eigenvalue = eig.eigenvalues().minCoeff(); - max_eigenvalue = eig.eigenvalues().maxCoeff(); - } - } - - double mean_nearest_neighbor_corr = std::numeric_limits::quiet_NaN(); - double min_nearest_neighbor_corr = std::numeric_limits::quiet_NaN(); - double max_nearest_neighbor_corr = std::numeric_limits::quiet_NaN(); - - if (n > 1) { - double sum = 0.0; - int count = 0; - min_nearest_neighbor_corr = std::numeric_limits::infinity(); - max_nearest_neighbor_corr = -std::numeric_limits::infinity(); - - for (Eigen::Index i = 0; i + 1 < n; ++i) { - const double c = biomass_corr(i, i + 1); - if (std::isfinite(c)) { - sum += c; - ++count; - min_nearest_neighbor_corr = std::min(min_nearest_neighbor_corr, c); - max_nearest_neighbor_corr = std::max(max_nearest_neighbor_corr, c); - } - } - - if (count > 0) { - mean_nearest_neighbor_corr = sum / static_cast(count); - } - } - - double mean_lag2_corr = std::numeric_limits::quiet_NaN(); - if (n > 2) { - double sum = 0.0; - int count = 0; - for (Eigen::Index i = 0; i + 2 < n; ++i) { - const double c = biomass_corr(i, i + 2); - if (std::isfinite(c)) { - sum += c; - ++count; - } - } - if (count > 0) - mean_lag2_corr = sum / static_cast(count); - } - - double mean_lag5_corr = std::numeric_limits::quiet_NaN(); - if (n > 5) { - double sum = 0.0; - int count = 0; - for (Eigen::Index i = 0; i + 5 < n; ++i) { - const double c = biomass_corr(i, i + 5); - if (std::isfinite(c)) { - sum += c; - ++count; - } - } - if (count > 0) - mean_lag5_corr = sum / static_cast(count); - } - - const bool valid_covariance = - finite_all && positive_diag && max_abs_asymmetry < 1.0e-8 && - ldlt_success && std::isfinite(min_eigenvalue) && min_eigenvalue > -1.0e-8; - - auto emit = [&](const std::string &metric, const auto &value, - const std::string ¬e) { - out << metric << "," << value << "," << note << "\n"; - }; - - emit("n_years", n, "number of fitted biomass states in covariance block"); - emit("finite_all", finite_all ? "yes" : "no", - "all covariance entries finite"); - emit("positive_diagonal", positive_diag ? "yes" : "no", - "all variances positive"); - emit("valid_covariance", valid_covariance ? "yes" : "no", - "finite positive-diagonal symmetric positive-semidefinite check"); - emit("ldlt_success", ldlt_success ? "yes" : "no", - "dense LDLT check on biomass covariance matrix"); - emit("max_abs_asymmetry", max_abs_asymmetry, - "max absolute covariance asymmetry"); - emit("min_variance", min_diag, "minimum biomass variance"); - emit("max_variance", max_diag, "maximum biomass variance"); - emit("min_eigenvalue", min_eigenvalue, "self-adjoint eigenvalue diagnostic"); - emit("max_eigenvalue", max_eigenvalue, "self-adjoint eigenvalue diagnostic"); - emit("mean_nearest_neighbor_corr", mean_nearest_neighbor_corr, - "average Corr(B_t,B_tplus1)"); - emit("min_nearest_neighbor_corr", min_nearest_neighbor_corr, - "minimum Corr(B_t,B_tplus1)"); - emit("max_nearest_neighbor_corr", max_nearest_neighbor_corr, - "maximum Corr(B_t,B_tplus1)"); - emit("mean_lag2_corr", mean_lag2_corr, "average Corr(B_t,B_tplus2)"); - emit("mean_lag5_corr", mean_lag5_corr, "average Corr(B_t,B_tplus5)"); -} - -inline void write_biomass_correlation_decay_csv( - const std::string &path, - const std::vector &data, - const std::vector &u_hat, const Eigen::SparseMatrix &h_uu) { - std::ofstream out(path); - out << "lag,count,mean_correlation,min_correlation,max_correlation\n"; - - const Eigen::MatrixXd log_b_cov = - compute_log_b_covariance_submatrix(data, u_hat, h_uu); - const Eigen::MatrixXd biomass_cov = log_cov_to_biomass_cov(log_b_cov, u_hat); - const Eigen::MatrixXd biomass_corr = - quadra::uncertainty::covariance_to_correlation_matrix(biomass_cov); - - const Eigen::Index n = biomass_corr.rows(); - - for (Eigen::Index lag = 0; lag < n; ++lag) { - double sum = 0.0; - double min_corr = std::numeric_limits::infinity(); - double max_corr = -std::numeric_limits::infinity(); - int count = 0; - - for (Eigen::Index i = 0; i + lag < n; ++i) { - const double c = biomass_corr(i, i + lag); - if (std::isfinite(c)) { - sum += c; - min_corr = std::min(min_corr, c); - max_corr = std::max(max_corr, c); - ++count; - } - } - - const double mean_corr = count > 0 - ? sum / static_cast(count) - : std::numeric_limits::quiet_NaN(); - - out << lag << "," << count << "," << mean_corr << "," << min_corr << "," - << max_corr << "\n"; - } -} - -int main() { - using namespace opakapaka_example; - - std::cout << "Synthetic opakapaka-style fit + projection example\n"; - std::cout << "==================================================\n\n"; - std::cout - << "Synthetic and public-data-safe. Not an official assessment.\n\n"; - - auto data = - read_opakapaka_history_csv("examples/NMFS/pifsc_opakapaka/data/" - "synthetic_opakapaka_projection_data.csv"); - - std::cout << "Loaded shared CSV fit rows: " << data.size() << "\n\n"; - - OpakapakaProjectionModel model(data); - auto params = model.initial_parameters(); - - quadra::LaplaceOptions opts = quadra::default_laplace_options(); - - // Public Quadra workflow: - // instantiate model -> optimize_lbfgs -> inspect fit -> project - const auto fit_start = std::chrono::steady_clock::now(); - quadra::OptResult fit; - bool primary_optimizer_converged = false; - bool fallback_used = false; - std::string primary_optimizer_name = "profiled scalar Laplace"; - std::string primary_optimizer_status = "not run"; - double primary_optimizer_grad_norm = std::numeric_limits::quiet_NaN(); - -#ifndef OPAKAPAKA_USE_LBFGS_PRIMARY - // Opakapaka has one fixed effect and twenty random effects. For this - // geometry, the safeguarded profiled scalar Laplace optimizer is the - // appropriate primary optimizer: it directly optimizes log_q while profiling - // over the random effects and avoids quasi-Newton line-search pathologies. - fit = fit_log_q_fd_newton_fallback(model, params, opts, - params.params.at(0).value); - - if (fit.converged) { - fit.message = - "converged with safeguarded one-dimensional profiled log_q optimizer"; - } - - primary_optimizer_converged = fit.converged; - primary_optimizer_status = fit.message; - primary_optimizer_grad_norm = fit.grad_norm; -#else - primary_optimizer_name = "L-BFGS"; - try { - fit = quadra::optimize_lbfgs(model, params, opts); - primary_optimizer_converged = fit.converged; - primary_optimizer_status = fit.message; - primary_optimizer_grad_norm = fit.grad_norm; - } catch (const std::runtime_error &e) { - const std::string msg = e.what(); - if (msg.find("line search") == std::string::npos && - msg.find("sufficiently decrease") == std::string::npos) { - throw; - } - - fallback_used = true; - primary_optimizer_converged = false; - primary_optimizer_status = msg; - - std::cout << "L-BFGS line-search stall detected in Opakapaka example. " - << "Using local safeguarded one-dimensional log_q fallback.\n"; - - fit = fit_log_q_fd_newton_fallback(model, params, opts, - params.params.at(0).value); - } -#endif - - const double fit_value_before_polish = fit.value; - const double fit_grad_before_polish = fit.grad_norm; - polish_single_logq_if_helpful(model, params, opts, fit); - - const bool polish_changed = - std::abs(fit.value - fit_value_before_polish) > 1.0e-10 || - std::abs(fit.grad_norm - fit_grad_before_polish) > 1.0e-10; - -#ifdef OPAKAPAKA_USE_LBFGS_PRIMARY - fallback_used = fallback_used || polish_changed; -#else - // In the default build, scalar optimization is primary. Optional scalar - // polishing is still part of that primary scalar workflow, not a fallback. - fallback_used = false; - primary_optimizer_converged = fit.converged; - primary_optimizer_status = fit.message; - primary_optimizer_grad_norm = fit.grad_norm; -#endif - - const std::string convergence_status = - primary_optimizer_converged && !fallback_used - ? "primary_optimizer_converged" - : (fallback_used ? "fallback_polished" : "not_converged"); - - { - std::ofstream state_out( - "examples/NMFS/pifsc_opakapaka/outputs/quadra_fitted_states.csv"); - - state_out << "index,log_B,B\n"; - - for (std::size_t i = 0; i < fit.u_hat.size(); ++i) { - state_out << i << "," << std::setprecision(15) << fit.u_hat[i] << "," - << std::setprecision(15) << std::exp(fit.u_hat[i]) << "\n"; - } - } - - const auto fit_stop = std::chrono::steady_clock::now(); - const double fit_runtime_ms = - std::chrono::duration(fit_stop - fit_start).count(); - - ProjectionOptions projection_options; - projection_options.start_year = data.back().year + 1; - projection_options.years = 10; - projection_options.scenarios = { - {"zero_catch", 0.0}, - {"status_quo", 1.0}, - {"low_catch", 0.75}, - {"high_catch", 1.25}, - }; - - auto projection = model.project(fit, projection_options); - - const Eigen::SparseMatrix Huu_final = - compute_final_random_effect_hessian(model, params, opts, fit); - const int final_hessian_nonzeros = static_cast(Huu_final.nonZeros()); - - std::cout << "\nFit diagnostics\n"; - std::cout << "---------------\n"; - std::cout << std::fixed << std::setprecision(6); - std::cout << "objective " << fit.value << "\n"; - std::cout << "final_grad_norm " << fit.grad_norm << "\n"; - std::cout << "runtime_ms " << fit_runtime_ms << "\n"; - std::cout << "iterations " << fit.iterations << "\n"; - std::cout << "converged " - << ((fit.converged || fallback_used) ? "yes" : "no") << "\n"; - std::cout << "status " << convergence_status << "\n"; - std::cout << "primary_optimizer " << primary_optimizer_name << "\n"; - std::cout << "fallback_used " << (fallback_used ? "yes" : "no") << "\n"; - std::cout << "primary_converged " - << (primary_optimizer_converged ? "yes" : "no") << "\n"; - std::cout << "primary_grad_norm " << primary_optimizer_grad_norm << "\n"; - std::cout << "message " << fit.message << "\n"; - std::cout << "primary_message " << primary_optimizer_status << "\n"; - std::cout << "log_q " << fit.par.at(0) << "\n"; - std::cout << "q " << std::exp(fit.par.at(0)) << "\n"; - - const std::size_t reported_random_effects = - fit.u_hat.empty() - ? static_cast(fit.pattern.random_effect_count) - : fit.u_hat.size(); - - const bool pattern_available = - fit.pattern.available || fit.pattern.random_effect_count > 0 || - fit.pattern.nonzeros > 0 || final_hessian_nonzeros > 0; - - const std::string detected_structure = - fit.pattern.detected_structure.empty() || - fit.pattern.detected_structure == "unknown" - ? "sparse" - : fit.pattern.detected_structure; - - const std::string laplace_backend = - fit.pattern.backend.empty() || fit.pattern.backend == "unknown" - ? "final Huu reconstruction" - : fit.pattern.backend; - - const std::string random_solver = - fit.pattern.solver.empty() || fit.pattern.solver == "unknown" - ? "Laplace mode solve" - : fit.pattern.solver; - - std::cout << "\nOptimizer structure diagnostics\n"; - std::cout << "-------------------------------\n"; - std::cout << "random effects " << reported_random_effects << "\n"; - std::cout << "pattern available " << (pattern_available ? "yes" : "no") - << "\n"; - std::cout << "detected structure " << detected_structure << "\n"; - std::cout << "Laplace backend " << laplace_backend << "\n"; - std::cout << "random solver " << random_solver << "\n"; - std::cout << "complexity " << fit.pattern.complexity << "\n"; - std::cout << "bandwidth " << fit.pattern.bandwidth << "\n"; - std::cout << "Hessian nonzeros " << final_hessian_nonzeros << "\n"; - - std::cout << "\nProjection preview\n"; - std::cout << "------------------\n"; - std::cout << "scenario,year,catch_mt,biomass,index\n"; - int printed = 0; - for (const auto &row : projection) { - if (printed >= 12) { - break; - } - std::cout << row.scenario << "," << row.year << "," << row.catch_mt << "," - << row.biomass << "," << row.index << "\n"; - ++printed; - } - - write_fit_summary_csv( - "examples/NMFS/pifsc_opakapaka/outputs/synthetic_fit_summary.csv", fit); - - const auto logq_uncertainty = - compute_log_q_uncertainty_report(model, params, opts, fit); - - write_uncertainty_summary_csv( - "examples/NMFS/pifsc_opakapaka/outputs/uncertainty_summary.csv", - logq_uncertainty); - write_covariance_matrix_csv( - "examples/NMFS/pifsc_opakapaka/outputs/covariance_matrix.csv", - logq_uncertainty); - write_correlation_matrix_csv( - "examples/NMFS/pifsc_opakapaka/outputs/correlation_matrix.csv"); - write_standard_errors_csv( - "examples/NMFS/pifsc_opakapaka/outputs/standard_errors.csv", - logq_uncertainty); - write_confidence_intervals_csv( - "examples/NMFS/pifsc_opakapaka/outputs/confidence_intervals.csv", - logq_uncertainty); - const auto final_h_uu = - compute_final_random_effect_hessian(model, params, opts, fit); - write_random_effect_uncertainty_csv( - "examples/NMFS/pifsc_opakapaka/outputs/random_effect_uncertainty.csv", - fit.u_hat, final_h_uu); - write_derived_quantities_csv( - "examples/NMFS/pifsc_opakapaka/outputs/derived_quantities.csv", data, - fit.u_hat, std::exp(fit.par.at(0))); - const auto random_effect_covariance_diag = - quadra::uncertainty::selected_inverse_diagonal_from_spd_hessian( - final_h_uu); - write_derived_quantity_uncertainty_csv( - "examples/NMFS/pifsc_opakapaka/outputs/derived_quantity_uncertainty.csv", - data, fit.u_hat, std::exp(fit.par.at(0)), random_effect_covariance_diag, - final_h_uu); - - { - std::vector> depletion_covariance_pairs; - depletion_covariance_pairs.reserve(fit.u_hat.size()); - for (std::size_t i = 0; i < fit.u_hat.size(); ++i) { - depletion_covariance_pairs.emplace_back(static_cast(i), 0); - } - - const auto depletion_covariances = - quadra::uncertainty::selected_inverse_entries_from_spd_hessian( - final_h_uu, depletion_covariance_pairs); - - write_derived_quantity_correlation_csv( - "examples/NMFS/pifsc_opakapaka/outputs/" - "derived_quantity_correlation.csv", - data, random_effect_covariance_diag, depletion_covariances); - } - - write_biomass_covariance_matrix_csv( - "examples/NMFS/pifsc_opakapaka/outputs/biomass_covariance_matrix.csv", - data, fit.u_hat, final_h_uu); - - write_biomass_correlation_matrix_csv( - "examples/NMFS/pifsc_opakapaka/outputs/biomass_correlation_matrix.csv", - data, fit.u_hat, final_h_uu); - - write_biomass_covariance_diagnostics_csv( - "examples/NMFS/pifsc_opakapaka/outputs/" - "biomass_covariance_diagnostics.csv", - data, fit.u_hat, final_h_uu); - - write_biomass_correlation_decay_csv( - "examples/NMFS/pifsc_opakapaka/outputs/biomass_correlation_decay.csv", - data, fit.u_hat, final_h_uu); - - // Core uncertainty reporting parity outputs. - { - const std::size_t n = std::min(data.size(), fit.u_hat.size()); - const Eigen::MatrixXd log_b_cov_core = - compute_log_b_covariance_submatrix(data, fit.u_hat, final_h_uu); - Eigen::VectorXd log_b_core(static_cast(n)); - for (std::size_t i = 0; i < n; ++i) { - log_b_core[static_cast(i)] = fit.u_hat[i]; - } - - const Eigen::MatrixXd biomass_cov_core = - quadra::uncertainty::lognormal_delta_covariance(log_b_core, - log_b_cov_core); - const Eigen::MatrixXd biomass_corr_core = - quadra::uncertainty::covariance_to_correlation_matrix(biomass_cov_core); - - const auto biomass_cov_diag_core = - quadra::uncertainty::diagnose_covariance_matrix(biomass_cov_core); - quadra::uncertainty::write_covariance_diagnostics_csv( - "examples/NMFS/pifsc_opakapaka/outputs/" - "biomass_covariance_diagnostics_core.csv", - biomass_cov_diag_core); - - const auto biomass_decay_core = - quadra::uncertainty::correlation_decay_summary(biomass_corr_core); - quadra::uncertainty::write_correlation_decay_csv( - "examples/NMFS/pifsc_opakapaka/outputs/" - "biomass_correlation_decay_core.csv", - biomass_decay_core); - } - { - const double terminal_log_b_variance = - (!random_effect_covariance_diag.variance.empty()) - ? random_effect_covariance_diag.variance.back() - : std::numeric_limits::quiet_NaN(); - - write_projection_uncertainty_envelopes_csv( - "examples/NMFS/pifsc_opakapaka/outputs/projection_uncertainty.csv", - projection, fit.u_hat, std::exp(fit.par.at(0)), terminal_log_b_variance, - 1000); - } - write_runtime_memory_summary_csv( - "examples/NMFS/pifsc_opakapaka/outputs/runtime_memory_summary.csv", - std::numeric_limits::quiet_NaN(), fit.u_hat.size(), 58); - - write_projection_csv("examples/NMFS/pifsc_opakapaka/outputs/" - "synthetic_projection_scenarios.csv", - projection); - - std::cout << "\nWrote outputs:\n"; - std::cout << " examples/NMFS/pifsc_opakapaka/outputs/" - "synthetic_fit_summary.csv\n"; - std::cout << " examples/NMFS/pifsc_opakapaka/outputs/" - "synthetic_projection_scenarios.csv\n"; - - return 0; -} diff --git a/examples/NMFS/pifsc_opakapaka/reference_points/opakapaka_reference_points.hpp b/examples/NMFS/pifsc_opakapaka/reference_points/opakapaka_reference_points.hpp new file mode 100644 index 0000000..157eebe --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/reference_points/opakapaka_reference_points.hpp @@ -0,0 +1,97 @@ +#pragma once + +#include "../quadra/opakapaka_model.hpp" + +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +struct OpakapakaReferencePoints { + double q = std::numeric_limits::quiet_NaN(); + double r = std::numeric_limits::quiet_NaN(); + double K = std::numeric_limits::quiet_NaN(); + + double B_MSY = std::numeric_limits::quiet_NaN(); + double F_MSY = std::numeric_limits::quiet_NaN(); + double MSY = std::numeric_limits::quiet_NaN(); + + double B_terminal = std::numeric_limits::quiet_NaN(); + double B_terminal_over_B_MSY = std::numeric_limits::quiet_NaN(); + double F_status_quo = std::numeric_limits::quiet_NaN(); + double F_status_quo_over_F_MSY = std::numeric_limits::quiet_NaN(); +}; + +inline OpakapakaReferencePoints +compute_opakapaka_reference_points(const quadra::OptResult &fit, + const std::vector &data) { + OpakapakaReferencePoints out; + + if (fit.par.size() < 3 || fit.u_hat.empty()) { + return out; + } + + out.q = std::exp(fit.par.at(0)); + out.r = std::exp(fit.par.at(1)); + out.K = std::exp(fit.par.at(2)); + + out.B_MSY = 0.5 * out.K; + out.F_MSY = 0.5 * out.r; + out.MSY = 0.25 * out.r * out.K; + + out.B_terminal = std::exp(fit.u_hat.back()); + if (std::isfinite(out.B_MSY) && out.B_MSY > 0.0) { + out.B_terminal_over_B_MSY = out.B_terminal / out.B_MSY; + } + + if (!data.empty() && std::isfinite(out.B_terminal) && out.B_terminal > 0.0) { + const double recent_catch = data.back().catch_mt; + out.F_status_quo = recent_catch / out.B_terminal; + if (std::isfinite(out.F_MSY) && out.F_MSY > 0.0) { + out.F_status_quo_over_F_MSY = out.F_status_quo / out.F_MSY; + } + } + + return out; +} + +inline void +write_opakapaka_reference_points_csv(const std::string &path, + const OpakapakaReferencePoints &rp) { + std::ofstream out(path); + out << "quantity,value,note\n"; + out << "q," << rp.q << ",catchability estimate\n"; + out << "r," << rp.r << ",intrinsic growth rate estimate\n"; + out << "K," << rp.K << ",carrying capacity estimate\n"; + out << "B_MSY," << rp.B_MSY + << ",Schaefer surplus-production biomass at MSY equals K/2\n"; + out << "F_MSY," << rp.F_MSY + << ",Schaefer surplus-production fishing mortality proxy equals r/2\n"; + out << "MSY," << rp.MSY + << ",Schaefer surplus-production maximum sustainable yield equals " + "r*K/4\n"; + out << "B_terminal," << rp.B_terminal << ",terminal fitted biomass state\n"; + out << "B_terminal_over_B_MSY," << rp.B_terminal_over_B_MSY + << ",terminal biomass relative to B_MSY\n"; + out << "F_status_quo," << rp.F_status_quo + << ",recent catch divided by terminal biomass\n"; + out << "F_status_quo_over_F_MSY," << rp.F_status_quo_over_F_MSY + << ",status quo fishing mortality proxy relative to F_MSY\n"; +} + +inline void +write_opakapaka_reference_points_csv(const std::string &path, + const quadra::OptResult &fit, + const std::vector &data) { + write_opakapaka_reference_points_csv( + path, compute_opakapaka_reference_points(fit, data)); +} + +} // namespace opakapaka_example + +using opakapaka_example::compute_opakapaka_reference_points; +using opakapaka_example::OpakapakaReferencePoints; +using opakapaka_example::write_opakapaka_reference_points_csv; diff --git a/examples/NMFS/pifsc_opakapaka/reports/opakapaka_report_suite.hpp b/examples/NMFS/pifsc_opakapaka/reports/opakapaka_report_suite.hpp new file mode 100644 index 0000000..30a0adb --- /dev/null +++ b/examples/NMFS/pifsc_opakapaka/reports/opakapaka_report_suite.hpp @@ -0,0 +1,329 @@ +#pragma once + +#include "../diagnostics/opakapaka_biomass_covariance_diagnostics.hpp" +#include "../diagnostics/opakapaka_logq_diagnostics.hpp" +#include "../diagnostics/opakapaka_projection_uncertainty.hpp" +#include "../diagnostics/opakapaka_random_effect_diagnostics.hpp" +#include "../quadra/opakapaka_model.hpp" + +#include "../../../../core/uncertainty/reporting.hpp" +#include "../../../../core/uncertainty/selected_inverse_diagonal.hpp" + +#include +#include + +#include "../reference_points/opakapaka_reference_points.hpp" +#include +#include +#include +#include +#include +#include + +namespace opakapaka_example { + +inline void write_derived_quantities_csv( + const std::string &path, + const std::vector &data, + const std::vector &u_hat, double q_hat) { + std::ofstream out(path); + out << "year,biomass,index_hat,depletion,F_proxy\n"; + const double b0 = u_hat.empty() ? std::numeric_limits::quiet_NaN() + : std::exp(u_hat.front()); + for (std::size_t i = 0; i < data.size() && i < u_hat.size(); ++i) { + const double biomass = std::exp(u_hat[i]); + const double depletion = + b0 > 0.0 ? biomass / b0 : std::numeric_limits::quiet_NaN(); + const double f_proxy = biomass > 0.0 + ? data[i].catch_mt / biomass + : std::numeric_limits::quiet_NaN(); + out << data[i].year << "," << biomass << "," << q_hat * biomass << "," + << depletion << "," << f_proxy << "\n"; + } +} + +inline void write_derived_quantity_uncertainty_csv( + const std::string &path, + const std::vector &data, + const std::vector &u_hat, double q_hat, + const quadra::uncertainty::SelectedInverseDiagonalResult &u_cov, + const Eigen::SparseMatrix &h_uu) { + std::ofstream out(path); + out << "year,quantity,estimate,se,lwr_95,upr_95,note\n"; + + if (u_hat.empty() || data.empty()) { + return; + } + + const double b0 = std::exp(u_hat.front()); + const double var_log_b0 = (u_cov.success && !u_cov.variance.empty()) + ? u_cov.variance.front() + : std::numeric_limits::quiet_NaN(); + + // QUADRA_OPAKAPAKA_DEPLETION_COVARIANCE_PAIRS_V1 + // Request Cov(log_B[t], log_B[0]) so depletion uncertainty uses: + // Var(log(B_t/B_0)) = Var(log_B_t) + Var(log_B_0) - 2 Cov(log_B_t, log_B_0). + std::vector> depletion_covariance_pairs; + depletion_covariance_pairs.reserve(u_hat.size()); + for (std::size_t i = 0; i < u_hat.size(); ++i) { + depletion_covariance_pairs.emplace_back(static_cast(i), 0); + } + + const auto depletion_covariances = + quadra::uncertainty::selected_inverse_entries_from_spd_hessian( + h_uu, depletion_covariance_pairs); + + for (std::size_t i = 0; i < data.size() && i < u_hat.size(); ++i) { + const double log_b = u_hat[i]; + const double biomass = std::exp(log_b); + const double index_hat = q_hat * biomass; + const double depletion = + b0 > 0.0 ? biomass / b0 : std::numeric_limits::quiet_NaN(); + const double f_proxy = biomass > 0.0 + ? data[i].catch_mt / biomass + : std::numeric_limits::quiet_NaN(); + + const double var_log_b = (u_cov.success && i < u_cov.variance.size()) + ? u_cov.variance[i] + : std::numeric_limits::quiet_NaN(); + + const double se_biomass = (std::isfinite(var_log_b) && var_log_b >= 0.0) + ? biomass * std::sqrt(var_log_b) + : std::numeric_limits::quiet_NaN(); + + const double se_index = (std::isfinite(var_log_b) && var_log_b >= 0.0) + ? index_hat * std::sqrt(var_log_b) + : std::numeric_limits::quiet_NaN(); + + double cov_log_b_i_b0 = std::numeric_limits::quiet_NaN(); + if (depletion_covariances.success && + i < depletion_covariances.entries.size()) { + cov_log_b_i_b0 = depletion_covariances.entries[i].covariance; + } + + const double var_log_depletion = + (std::isfinite(var_log_b) && std::isfinite(var_log_b0) && + std::isfinite(cov_log_b_i_b0)) + ? var_log_b + var_log_b0 - 2.0 * cov_log_b_i_b0 + : std::numeric_limits::quiet_NaN(); + + const double se_depletion = + (std::isfinite(var_log_depletion) && var_log_depletion >= 0.0) + ? depletion * std::sqrt(var_log_depletion) + : std::numeric_limits::quiet_NaN(); + + const double se_f_proxy = (std::isfinite(var_log_b) && var_log_b >= 0.0) + ? f_proxy * std::sqrt(var_log_b) + : std::numeric_limits::quiet_NaN(); + + auto write_row = [&](const char *quantity, double estimate, double se, + const char *note) { + const double lwr = std::isfinite(se) + ? estimate - 1.96 * se + : std::numeric_limits::quiet_NaN(); + const double upr = std::isfinite(se) + ? estimate + 1.96 * se + : std::numeric_limits::quiet_NaN(); + out << data[i].year << "," << quantity << "," << estimate << "," << se + << "," << lwr << "," << upr << "," << note << "\n"; + }; + + write_row("biomass", biomass, se_biomass, + "level1_delta_method_conditional_random_effect_diagonal"); + write_row("index_hat", index_hat, se_index, + "level1_delta_method_conditional_random_effect_diagonal"); + write_row("depletion", depletion, se_depletion, + "level1_delta_method_selected_inverse_cov_logBt_logB0"); + write_row("F_proxy", f_proxy, se_f_proxy, + "level1_delta_method_conditional_random_effect_diagonal"); + } +} + +inline void write_derived_quantity_correlation_csv( + const std::string &path, + const std::vector &data, + const quadra::uncertainty::SelectedInverseDiagonalResult &u_cov, + const quadra::uncertainty::SelectedInverseEntriesResult + &depletion_covariances) { + std::ofstream out(path); + out << "year,variance_logB0,variance_logBt,covariance_logBt_logB0," + << "correlation_logBt_logB0,note\n"; + + const double var_log_b0 = (u_cov.success && !u_cov.variance.empty()) + ? u_cov.variance.front() + : std::numeric_limits::quiet_NaN(); + + const std::size_t n = std::min(data.size(), u_cov.variance.size()); + + for (std::size_t i = 0; i < n; ++i) { + const double var_log_bt = u_cov.variance[i]; + + double cov_log_bt_b0 = std::numeric_limits::quiet_NaN(); + if (depletion_covariances.success && + i < depletion_covariances.entries.size()) { + cov_log_bt_b0 = depletion_covariances.entries[i].covariance; + } + + double corr = std::numeric_limits::quiet_NaN(); + if (std::isfinite(var_log_b0) && std::isfinite(var_log_bt) && + std::isfinite(cov_log_bt_b0) && var_log_b0 > 0.0 && var_log_bt > 0.0) { + corr = cov_log_bt_b0 / std::sqrt(var_log_b0 * var_log_bt); + + // Guard tiny numerical drift outside [-1, 1]. + if (corr > 1.0 && corr < 1.0 + 1.0e-10) + corr = 1.0; + if (corr < -1.0 && corr > -1.0 - 1.0e-10) + corr = -1.0; + } + + out << data[i].year << "," << var_log_b0 << "," << var_log_bt << "," + << cov_log_bt_b0 << "," << corr << "," + << "selected_inverse_covariance_diagnostic_logBt_logB0\n"; + } +} + +inline void write_runtime_memory_summary_csv(const std::string &path, + double runtime_ms, + std::size_t random_effects, + std::size_t hessian_nonzeros) { + std::ofstream out(path); + out << "field,value\n"; + out << "fit_runtime_ms," << runtime_ms << "\n"; + out << "random_effects," << random_effects << "\n"; + out << "hessian_nonzeros," << hessian_nonzeros << "\n"; + out << "peak_rss_mb,\n"; + out << "note,peak RSS is captured by benchmark runner rather than model " + "executable\n"; +} + +template +inline void write_opakapaka_report_suite( + Model &model, quadra::ParameterVector ¶ms, quadra::LaplaceOptions &opts, + const quadra::OptResult &fit, const std::vector &data, + const std::vector &projection, + const Eigen::SparseMatrix &final_h_uu) { + write_fit_summary_csv( + "examples/NMFS/pifsc_opakapaka/outputs/synthetic_fit_summary.csv", fit); + + if (fit.par.size() == 1) { + const auto logq_uncertainty = + compute_log_q_uncertainty_report(model, params, opts, fit); + + write_uncertainty_summary_csv( + "examples/NMFS/pifsc_opakapaka/outputs/uncertainty_summary.csv", + logq_uncertainty); + write_covariance_matrix_csv( + "examples/NMFS/pifsc_opakapaka/outputs/covariance_matrix.csv", + logq_uncertainty); + write_correlation_matrix_csv( + "examples/NMFS/pifsc_opakapaka/outputs/correlation_matrix.csv"); + write_standard_errors_csv( + "examples/NMFS/pifsc_opakapaka/outputs/standard_errors.csv", + logq_uncertainty); + write_confidence_intervals_csv( + "examples/NMFS/pifsc_opakapaka/outputs/confidence_intervals.csv", + logq_uncertainty); + } + + write_random_effect_uncertainty_csv( + "examples/NMFS/pifsc_opakapaka/outputs/random_effect_uncertainty.csv", + fit.u_hat, final_h_uu); + + write_derived_quantities_csv( + "examples/NMFS/pifsc_opakapaka/outputs/derived_quantities.csv", data, + fit.u_hat, std::exp(fit.par.at(0))); + + const auto random_effect_covariance_diag = + quadra::uncertainty::selected_inverse_diagonal_from_spd_hessian( + final_h_uu); + + write_derived_quantity_uncertainty_csv( + "examples/NMFS/pifsc_opakapaka/outputs/derived_quantity_uncertainty.csv", + data, fit.u_hat, std::exp(fit.par.at(0)), random_effect_covariance_diag, + final_h_uu); + + { + std::vector> depletion_covariance_pairs; + depletion_covariance_pairs.reserve(fit.u_hat.size()); + for (std::size_t i = 0; i < fit.u_hat.size(); ++i) { + depletion_covariance_pairs.emplace_back(static_cast(i), 0); + } + + const auto depletion_covariances = + quadra::uncertainty::selected_inverse_entries_from_spd_hessian( + final_h_uu, depletion_covariance_pairs); + + write_derived_quantity_correlation_csv( + "examples/NMFS/pifsc_opakapaka/outputs/" + "derived_quantity_correlation.csv", + data, random_effect_covariance_diag, depletion_covariances); + } + + write_biomass_covariance_matrix_csv( + "examples/NMFS/pifsc_opakapaka/outputs/biomass_covariance_matrix.csv", + data, fit.u_hat, final_h_uu); + + write_biomass_correlation_matrix_csv( + "examples/NMFS/pifsc_opakapaka/outputs/biomass_correlation_matrix.csv", + data, fit.u_hat, final_h_uu); + + write_biomass_covariance_diagnostics_csv( + "examples/NMFS/pifsc_opakapaka/outputs/" + "biomass_covariance_diagnostics.csv", + data, fit.u_hat, final_h_uu); + + write_biomass_correlation_decay_csv( + "examples/NMFS/pifsc_opakapaka/outputs/biomass_correlation_decay.csv", + data, fit.u_hat, final_h_uu); + + { + const std::size_t n = std::min(data.size(), fit.u_hat.size()); + const Eigen::MatrixXd log_b_cov = + compute_log_b_covariance_submatrix(data, fit.u_hat, final_h_uu); + Eigen::VectorXd log_b_core(static_cast(n)); + for (std::size_t i = 0; i < n; ++i) { + log_b_core[static_cast(i)] = fit.u_hat[i]; + } + const Eigen::MatrixXd biomass_cov_core = + quadra::uncertainty::lognormal_delta_covariance(log_b_core, log_b_cov); + const Eigen::MatrixXd biomass_corr_core = + quadra::uncertainty::covariance_to_correlation_matrix(biomass_cov_core); + const auto biomass_diag_core = + quadra::uncertainty::diagnose_covariance_matrix(biomass_cov_core); + quadra::uncertainty::write_covariance_diagnostics_csv( + "examples/NMFS/pifsc_opakapaka/outputs/" + "biomass_covariance_diagnostics_core.csv", + biomass_diag_core); + const auto biomass_decay_core = + quadra::uncertainty::correlation_decay_summary(biomass_corr_core); + quadra::uncertainty::write_correlation_decay_csv( + "examples/NMFS/pifsc_opakapaka/outputs/" + "biomass_correlation_decay_core.csv", + biomass_decay_core); + } + + const double terminal_log_b_variance = + (!random_effect_covariance_diag.variance.empty()) + ? random_effect_covariance_diag.variance.back() + : std::numeric_limits::quiet_NaN(); + + write_projection_uncertainty_envelopes_csv( + "examples/NMFS/pifsc_opakapaka/outputs/projection_uncertainty.csv", + projection, fit.u_hat, std::exp(fit.par.at(0)), terminal_log_b_variance, + 1000); + + write_runtime_memory_summary_csv( + "examples/NMFS/pifsc_opakapaka/outputs/runtime_memory_summary.csv", + std::numeric_limits::quiet_NaN(), fit.u_hat.size(), 58); + + write_opakapaka_reference_points_csv( + "examples/NMFS/pifsc_opakapaka/outputs/reference_points.csv", fit, data); + + write_projection_csv("examples/NMFS/pifsc_opakapaka/outputs/" + "synthetic_projection_scenarios.csv", + projection); +} + +} // namespace opakapaka_example + +using opakapaka_example::write_opakapaka_report_suite; diff --git a/examples/NMFS/sefsc_red_snapper/diagnostics/red_snapper_functional_analysis_diagnostics.hpp b/examples/NMFS/sefsc_red_snapper/diagnostics/red_snapper_functional_analysis_diagnostics.hpp new file mode 100644 index 0000000..52474c4 --- /dev/null +++ b/examples/NMFS/sefsc_red_snapper/diagnostics/red_snapper_functional_analysis_diagnostics.hpp @@ -0,0 +1,169 @@ +#pragma once + +#include "../objective/red_snapper_quadra_objective.hpp" + +#include "../../../../core/laplace/functional_analysis_report.hpp" +#include "../../../../core/laplace/laplace_structure_report.hpp" +#include "../../../../core/optimizer.hpp" + +#include + +#include +#include +#include + +namespace sefsc_red_snapper { + +inline std::size_t +red_snapper_max_fixed_gradient_index(const quadra::OptResult &fit) { + std::size_t max_i = 0; + double max_abs = -1.0; + + for (std::size_t i = 0; i < fit.fixed_gradient.size(); ++i) { + const double a = std::abs(fit.fixed_gradient[i]); + if (std::isfinite(a) && a > max_abs) { + max_abs = a; + max_i = i; + } + } + + return max_i; +} + +template +Eigen::MatrixXd red_snapper_fd_huu(Objective &objective, + const quadra::ParameterVector & /*params*/, + const quadra::OptResult &fit, + double rel_step = 1.0e-4) { + const std::size_t n_fixed = fit.par.size(); + const std::size_t n_random = fit.u_hat.size(); + + Eigen::MatrixXd H = Eigen::MatrixXd::Zero( + static_cast(n_random), static_cast(n_random)); + + if (n_fixed == 0 || n_random == 0) { + return H; + } + + auto make_x = [&](const std::vector &u) { + std::vector x; + x.reserve(n_fixed + n_random); + for (double v : fit.par) { + x.push_back(v); + } + for (double v : u) { + x.push_back(v); + } + return x; + }; + + auto eval_u = [&](const std::vector &u) { + return static_cast(objective(make_x(u))); + }; + + const std::vector u0 = fit.u_hat; + const double f0 = eval_u(u0); + + std::vector hi(n_random); + for (std::size_t i = 0; i < n_random; ++i) { + hi[i] = std::max(1.0e-5, rel_step * (1.0 + std::abs(u0[i]))); + } + + for (std::size_t i = 0; i < n_random; ++i) { + auto up = u0; + auto um = u0; + up[i] += hi[i]; + um[i] -= hi[i]; + + const double fp = eval_u(up); + const double fm = eval_u(um); + + H(static_cast(i), static_cast(i)) = + (fp - 2.0 * f0 + fm) / (hi[i] * hi[i]); + } + + for (std::size_t i = 0; i < n_random; ++i) { + for (std::size_t j = i + 1; j < n_random; ++j) { + auto xpp = u0; + auto xpm = u0; + auto xmp = u0; + auto xmm = u0; + + xpp[i] += hi[i]; + xpp[j] += hi[j]; + xpm[i] += hi[i]; + xpm[j] -= hi[j]; + xmp[i] -= hi[i]; + xmp[j] += hi[j]; + xmm[i] -= hi[i]; + xmm[j] -= hi[j]; + + const double fpp = eval_u(xpp); + const double fpm = eval_u(xpm); + const double fmp = eval_u(xmp); + const double fmm = eval_u(xmm); + + const double hij = (fpp - fpm - fmp + fmm) / (4.0 * hi[i] * hi[j]); + + H(static_cast(i), static_cast(j)) = hij; + H(static_cast(j), static_cast(i)) = hij; + } + } + + return H; +} + +template +void write_red_snapper_laplace_structure_report( + const std::string &text_path, const std::string &csv_path, + Objective &objective, const quadra::ParameterVector ¶ms, + const quadra::OptResult &fit, double nonzero_tol = 1.0e-8) { + const Eigen::MatrixXd H = red_snapper_fd_huu(objective, params, fit); + const auto report = + quadra::summarize_laplace_hessian_structure(H, nonzero_tol); + + quadra::write_laplace_structure_report_text(report, text_path); + quadra::write_laplace_structure_report_csv(report, csv_path); +} + +template +void write_red_snapper_functional_analysis_report( + const std::string &text_path, const std::string &csv_path, + Objective &objective, const quadra::ParameterVector ¶ms, + const quadra::OptResult &fit, double nonzero_tol = 1.0e-8) { + const Eigen::MatrixXd H = red_snapper_fd_huu(objective, params, fit); + + quadra::FunctionalOptimizationSummary opt; + opt.objective_value = fit.value; + opt.gradient_norm = fit.grad_norm; + opt.iterations = fit.iterations; + opt.converged = fit.converged; + opt.message = fit.message; + + if (!fit.fixed_gradient.empty()) { + const std::size_t max_i = red_snapper_max_fixed_gradient_index(fit); + opt.max_gradient_parameter = (max_i < fit.fixed_gradient_names.size()) + ? fit.fixed_gradient_names[max_i] + : ("fixed_" + std::to_string(max_i)); + opt.max_gradient_value = fit.fixed_gradient[max_i]; + opt.max_abs_gradient = std::abs(fit.fixed_gradient[max_i]); + } + + std::vector random_names; + random_names.reserve(fit.u_hat.size()); + for (std::size_t i = 0; i < fit.u_hat.size(); ++i) { + random_names.push_back("log_rec_dev_" + std::to_string(i + 1)); + } + + auto report = quadra::make_functional_analysis_report( + opt, H, fit.u_hat, nonzero_tol, random_names); + + quadra::write_functional_analysis_report_text(report, text_path); + quadra::write_functional_analysis_report_csv(report, csv_path); +} + +} // namespace sefsc_red_snapper + +using sefsc_red_snapper::red_snapper_fd_huu; +using sefsc_red_snapper::write_red_snapper_functional_analysis_report; +using sefsc_red_snapper::write_red_snapper_laplace_structure_report; diff --git a/examples/NMFS/sefsc_red_snapper/objective/red_snapper_quadra_objective.hpp b/examples/NMFS/sefsc_red_snapper/objective/red_snapper_quadra_objective.hpp new file mode 100644 index 0000000..c9cfd97 --- /dev/null +++ b/examples/NMFS/sefsc_red_snapper/objective/red_snapper_quadra_objective.hpp @@ -0,0 +1,222 @@ +#pragma once + +#include "../quadra/red_snapper_age_structured.hpp" + +#include +#include +#include +#include + +namespace sefsc_red_snapper { + +template T exp_t(const T &x) { + using std::exp; + return exp(x); +} + +template T log_t(const T &x) { + using std::log; + return log(x); +} + +template T invlogit_t(const T &x) { + return T(1.0) / (T(1.0) + exp_t(-x)); +} + +template T max_t(const T &x, double floor) { + return x > T(floor) ? x : T(floor); +} + +template T square_t(const T &x) { return x * x; } + +template +T logistic_selectivity_t(const T &age, const T &a50, const T &slope) { + return T(1.0) / (T(1.0) + exp_t(-slope * (age - a50))); +} + +template +T age_comp_nll(const std::array &observed, + const std::array &predicted, double effective_n, + double floor = 1.0e-12) { + T nll = T(0.0); + for (int a = 0; a < kAges; ++a) { + const auto i = static_cast(a); + const double obs = std::max(observed[i], 0.0); + if (obs > 0.0) { + nll = nll - T(effective_n * obs) * log_t(max_t(predicted[i], floor)); + } + } + return nll; +} + +class RedSnapperQuadraObjective { +public: + explicit RedSnapperQuadraObjective(std::vector observations) + : observations_(std::move(observations)) {} + + template T operator()(const std::vector &par) const { + if (par.size() < 5 + observations_.size()) { + throw std::runtime_error("RedSnapperQuadraObjective expected parameters: " + "log_r0, log_fbar, log_q"); + } + + const T log_r0 = par[0]; + const T log_fbar = par[1]; + const T log_q = par[2]; + const T logit_sel_a50 = par[3]; + const T log_sel_slope = par[4]; + + const T r0 = exp_t(log_r0); + const T m = T(0.18); + const T fbar = exp_t(log_fbar); + const T q = exp_t(log_q); + const T sel_a50 = T(1.0) + T(9.0) * invlogit_t(logit_sel_a50); + const T sel_slope = exp_t(log_sel_slope); + + const T sigma_log_index = T(0.20); + const T sigma_log_catch = T(0.15); + + const T sigma_rec_dev = T(0.35); + const double age_comp_effective_n = 2.0; + const double min_positive = 1.0e-12; + + const auto weight = default_weight_at_age(); + const auto maturity = default_maturity_at_age(); + + std::array selectivity{}; + for (int a = 0; a < kAges; ++a) { + selectivity[static_cast(a)] = + logistic_selectivity_t(T(a + 1), sel_a50, sel_slope); + } + + std::array n{}; + n[0] = r0; + for (int a = 1; a < kAges; ++a) { + n[static_cast(a)] = + n[static_cast(a - 1)] * exp_t(-m); + } + n[static_cast(kAges - 1)] = + n[static_cast(kAges - 1)] / (T(1.0) - exp_t(-m)); + + T nll = T(0.0); + T fixed_prior_nll = T(0.0); + T rec_prior_nll = T(0.0); + T index_nll = T(0.0); + T catch_nll = T(0.0); + T age_comp_nll_total = T(0.0); + + auto normal_prior = [](const T &x, double mean, double sd) { + const T z = (x - T(mean)) / T(sd); + return T(0.5) * z * z; + }; + + fixed_prior_nll = + fixed_prior_nll + normal_prior(log_r0, std::log(1200.0), 1.0); + fixed_prior_nll = + fixed_prior_nll + normal_prior(log_fbar, std::log(0.025), 0.75); + fixed_prior_nll = + fixed_prior_nll + normal_prior(log_q, std::log(0.00005), 1.0); + fixed_prior_nll = fixed_prior_nll + normal_prior(sel_a50, 4.0, 0.75); + fixed_prior_nll = + fixed_prior_nll + normal_prior(log_sel_slope, std::log(1.2), 0.35); + + nll = nll + fixed_prior_nll; + + for (std::size_t t = 0; t < observations_.size(); ++t) { + + const auto &obs = observations_[t]; + + const T rec_dev = par[5 + t]; + + { + T term = T(0.5) * square_t(rec_dev / sigma_rec_dev); + rec_prior_nll = rec_prior_nll + term; + nll = nll + term; + } + T biomass = T(0.0); + for (int a = 0; a < kAges; ++a) { + biomass = biomass + n[static_cast(a)] * + T(weight[static_cast(a)]); + } + + T catch_hat = T(0.0); + for (int a = 0; a < kAges; ++a) { + const auto i = static_cast(a); + const T f_a = fbar * selectivity[i]; + const T z_a = m + f_a; + const T harvest_rate = (f_a / z_a) * (T(1.0) - exp_t(-z_a)); + catch_hat = catch_hat + n[i] * T(weight[i]) * harvest_rate; + } + + const T index_hat = q * biomass; + + if (obs.index > 0.0) { + const T z = + (log_t(T(obs.index)) - log_t(max_t(index_hat, min_positive))) / + sigma_log_index; + { + T term = T(0.5) * square_t(z); + index_nll = index_nll + term; + nll = nll + term; + } + } + + if (obs.catch_mt > 0.0) { + const T z = + (log_t(T(obs.catch_mt)) - log_t(max_t(catch_hat, min_positive))) / + sigma_log_catch; + { + T term = T(0.5) * square_t(z); + catch_nll = catch_nll + term; + nll = nll + term; + } + } + + std::array pred_age_comp{}; + T selected_numbers_sum = T(0.0); + for (int a = 0; a < kAges; ++a) { + const auto i = static_cast(a); + pred_age_comp[i] = n[i] * selectivity[i]; + selected_numbers_sum = selected_numbers_sum + pred_age_comp[i]; + } + for (int a = 0; a < kAges; ++a) { + const auto i = static_cast(a); + pred_age_comp[i] = + pred_age_comp[i] / max_t(selected_numbers_sum, min_positive); + } + + { + T term = age_comp_nll(obs.age_comp, pred_age_comp, age_comp_effective_n, + min_positive); + age_comp_nll_total = age_comp_nll_total + term; + nll = nll + term; + } + + std::array next{}; + next[0] = r0 * exp_t(rec_dev); + + for (int a = 1; a < kAges; ++a) { + const auto prev = static_cast(a - 1); + const T f_prev = fbar * selectivity[prev]; + const T z_prev = m + f_prev; + next[static_cast(a)] = n[prev] * exp_t(-z_prev); + } + + const auto last = static_cast(kAges - 1); + const T f_last = fbar * selectivity[last]; + const T z_last = m + f_last; + next[last] = next[last] + n[last] * exp_t(-z_last); + + n = next; + } + + return nll; + } + +private: + std::vector observations_; +}; + +} // namespace sefsc_red_snapper + +using sefsc_red_snapper::RedSnapperQuadraObjective; diff --git a/examples/NMFS/sefsc_red_snapper/quadra/red_snapper_quadra_fit.cpp b/examples/NMFS/sefsc_red_snapper/quadra/red_snapper_quadra_fit.cpp index 8803a53..303b47d 100644 --- a/examples/NMFS/sefsc_red_snapper/quadra/red_snapper_quadra_fit.cpp +++ b/examples/NMFS/sefsc_red_snapper/quadra/red_snapper_quadra_fit.cpp @@ -1,3 +1,6 @@ +#include "../diagnostics/red_snapper_functional_analysis_diagnostics.hpp" +#include "../objective/red_snapper_quadra_objective.hpp" +#include "../reports/red_snapper_report_suite.hpp" #include "red_snapper_age_structured.hpp" #include "../../../../core/optimizer.hpp" @@ -10,581 +13,11 @@ #include #include -namespace sefsc_red_snapper { - -template T exp_t(const T &x) { - using std::exp; - return exp(x); -} - -template T log_t(const T &x) { - using std::log; - return log(x); -} - -template T invlogit_t(const T &x) { - return T(1.0) / (T(1.0) + exp_t(-x)); -} - -template T max_t(const T &x, double floor) { - return x > T(floor) ? x : T(floor); -} - -template T square_t(const T &x) { return x * x; } - -template -T logistic_selectivity_t(const T &age, const T &a50, const T &slope) { - return T(1.0) / (T(1.0) + exp_t(-slope * (age - a50))); -} - -template -T age_comp_nll(const std::array &observed, - const std::array &predicted, double effective_n, - double floor = 1.0e-12) { - T nll = T(0.0); - for (int a = 0; a < kAges; ++a) { - const auto i = static_cast(a); - const double obs = std::max(observed[i], 0.0); - if (obs > 0.0) { - nll = nll - T(effective_n * obs) * log_t(max_t(predicted[i], floor)); - } - } - return nll; -} - -class RedSnapperQuadraObjective { -public: - explicit RedSnapperQuadraObjective(std::vector observations) - : observations_(std::move(observations)) {} - - template T operator()(const std::vector &par) const { - if (par.size() < 5 + observations_.size()) { - throw std::runtime_error("RedSnapperQuadraObjective expected parameters: " - "log_r0, log_fbar, log_q"); - } - - const T log_r0 = par[0]; - const T log_fbar = par[1]; - const T log_q = par[2]; - const T logit_sel_a50 = par[3]; - const T log_sel_slope = par[4]; - - const T r0 = exp_t(log_r0); - const T m = T(0.18); - const T fbar = exp_t(log_fbar); - const T q = exp_t(log_q); - const T sel_a50 = T(1.0) + T(9.0) * invlogit_t(logit_sel_a50); - const T sel_slope = exp_t(log_sel_slope); - - const T sigma_log_index = T(0.20); - const T sigma_log_catch = T(0.15); - - const T sigma_rec_dev = T(0.35); - const double age_comp_effective_n = 2.0; - const double min_positive = 1.0e-12; - - const auto weight = default_weight_at_age(); - const auto maturity = default_maturity_at_age(); - - std::array selectivity{}; - for (int a = 0; a < kAges; ++a) { - selectivity[static_cast(a)] = - logistic_selectivity_t(T(a + 1), sel_a50, sel_slope); - } - - std::array n{}; - n[0] = r0; - for (int a = 1; a < kAges; ++a) { - n[static_cast(a)] = - n[static_cast(a - 1)] * exp_t(-m); - } - n[static_cast(kAges - 1)] = - n[static_cast(kAges - 1)] / (T(1.0) - exp_t(-m)); - - T nll = T(0.0); - T fixed_prior_nll = T(0.0); - T rec_prior_nll = T(0.0); - T index_nll = T(0.0); - T catch_nll = T(0.0); - T age_comp_nll_total = T(0.0); - - auto normal_prior = [](const T &x, double mean, double sd) { - const T z = (x - T(mean)) / T(sd); - return T(0.5) * z * z; - }; - - fixed_prior_nll = - fixed_prior_nll + normal_prior(log_r0, std::log(1200.0), 1.0); - fixed_prior_nll = - fixed_prior_nll + normal_prior(log_fbar, std::log(0.025), 0.75); - fixed_prior_nll = - fixed_prior_nll + normal_prior(log_q, std::log(0.00005), 1.0); - fixed_prior_nll = fixed_prior_nll + normal_prior(sel_a50, 4.0, 0.75); - fixed_prior_nll = - fixed_prior_nll + normal_prior(log_sel_slope, std::log(1.2), 0.35); - - nll = nll + fixed_prior_nll; - - for (std::size_t t = 0; t < observations_.size(); ++t) { - - const auto &obs = observations_[t]; - - const T rec_dev = par[5 + t]; - - { - T term = T(0.5) * square_t(rec_dev / sigma_rec_dev); - rec_prior_nll = rec_prior_nll + term; - nll = nll + term; - } - T biomass = T(0.0); - for (int a = 0; a < kAges; ++a) { - biomass = biomass + n[static_cast(a)] * - T(weight[static_cast(a)]); - } - - T catch_hat = T(0.0); - for (int a = 0; a < kAges; ++a) { - const auto i = static_cast(a); - const T f_a = fbar * selectivity[i]; - const T z_a = m + f_a; - const T harvest_rate = (f_a / z_a) * (T(1.0) - exp_t(-z_a)); - catch_hat = catch_hat + n[i] * T(weight[i]) * harvest_rate; - } - - const T index_hat = q * biomass; - - if (obs.index > 0.0) { - const T z = - (log_t(T(obs.index)) - log_t(max_t(index_hat, min_positive))) / - sigma_log_index; - { - T term = T(0.5) * square_t(z); - index_nll = index_nll + term; - nll = nll + term; - } - } - - if (obs.catch_mt > 0.0) { - const T z = - (log_t(T(obs.catch_mt)) - log_t(max_t(catch_hat, min_positive))) / - sigma_log_catch; - { - T term = T(0.5) * square_t(z); - catch_nll = catch_nll + term; - nll = nll + term; - } - } - - std::array pred_age_comp{}; - T selected_numbers_sum = T(0.0); - for (int a = 0; a < kAges; ++a) { - const auto i = static_cast(a); - pred_age_comp[i] = n[i] * selectivity[i]; - selected_numbers_sum = selected_numbers_sum + pred_age_comp[i]; - } - for (int a = 0; a < kAges; ++a) { - const auto i = static_cast(a); - pred_age_comp[i] = - pred_age_comp[i] / max_t(selected_numbers_sum, min_positive); - } - - { - T term = age_comp_nll(obs.age_comp, pred_age_comp, age_comp_effective_n, - min_positive); - age_comp_nll_total = age_comp_nll_total + term; - nll = nll + term; - } - - std::array next{}; - next[0] = r0 * exp_t(rec_dev); - - for (int a = 1; a < kAges; ++a) { - const auto prev = static_cast(a - 1); - const T f_prev = fbar * selectivity[prev]; - const T z_prev = m + f_prev; - next[static_cast(a)] = n[prev] * exp_t(-z_prev); - } - - const auto last = static_cast(kAges - 1); - const T f_last = fbar * selectivity[last]; - const T z_last = m + f_last; - next[last] = next[last] + n[last] * exp_t(-z_last); - - n = next; - } - - return nll; - } - -private: - std::vector observations_; -}; - -void write_fit_summary(const std::string &path, const quadra::OptResult &fit) { - std::ofstream out(path); - if (!out) { - throw std::runtime_error("Could not open fit summary CSV: " + path); - } - - out << "field,value\n"; - out << std::setprecision(12); - out << "objective," << fit.value << "\n"; - out << "joint_objective," << fit.joint_objective << "\n"; - out << "laplace_logdet," << fit.laplace_logdet << "\n"; - out << "laplace_constant," << fit.laplace_constant << "\n"; - out << "grad_norm," << fit.grad_norm << "\n"; - out << "iterations," << fit.iterations << "\n"; - out << "converged," << (fit.converged ? "yes" : "no") << "\n"; - out << "message," << fit.message << "\n"; - out << "laplace,yes\n"; - out << "random_effects," << fit.u_hat.size() << "\n"; - - if (fit.par.size() >= 3) { - out << "log_r0," << fit.par[0] << "\n"; - out << "r0," << std::exp(fit.par[0]) << "\n"; - out << "log_fbar," << fit.par[1] << "\n"; - out << "fbar," << std::exp(fit.par[1]) << "\n"; - out << "log_q," << fit.par[2] << "\n"; - out << "q," << std::exp(fit.par[2]) << "\n"; - if (fit.par.size() >= 5) { - const double sel_a50 = 1.0 + 9.0 / (1.0 + std::exp(-fit.par[3])); - const double sel_slope = std::exp(fit.par[4]); - out << "logit_sel_a50," << fit.par[3] << "\n"; - out << "sel_a50," << sel_a50 << "\n"; - out << "log_sel_slope," << fit.par[4] << "\n"; - out << "sel_slope," << sel_slope << "\n"; - } - } -} - -} // namespace sefsc_red_snapper - -void write_fitted_trajectory( - const std::string &path, - const std::vector &observations, - const quadra::OptResult &fit) { - if (fit.par.size() < 3) { - throw std::runtime_error( - "Cannot write fitted trajectory: expected at least 3 fixed parameters"); - } - - sefsc_red_snapper::AgeStructuredParams params; - params.log_r0 = fit.par[0]; - params.log_fbar = fit.par[1]; - params.log_q = fit.par[2]; - if (fit.par.size() >= 5) { - params.sel_a50 = 1.0 + 9.0 / (1.0 + std::exp(-fit.par[3])); - params.sel_slope = std::exp(fit.par[4]); - } - - const auto rows = sefsc_red_snapper::run_deterministic_age_structured_model( - observations, params); - - std::ofstream out(path); - if (!out) { - throw std::runtime_error("Could not open fitted trajectory CSV: " + path); - } - - out << "year,recruitment,total_biomass,ssb_proxy,depletion,Fbar," - << "catch_obs,catch_hat,catch_log_residual,index_obs,index_hat," - << "index_log_residual\n"; - - out << std::fixed << std::setprecision(6); - - for (const auto &row : rows) { - const double catch_log_residual = - std::log(std::max(row.catch_obs, 1.0e-12)) - - std::log(std::max(row.catch_hat, 1.0e-12)); - const double index_log_residual = - std::log(std::max(row.index_obs, 1.0e-12)) - - std::log(std::max(row.index_hat, 1.0e-12)); - - out << row.year << "," << row.recruitment << "," << row.total_biomass << "," - << row.ssb_proxy << "," << row.depletion << "," << row.fbar << "," - << row.catch_obs << "," << row.catch_hat << "," << catch_log_residual - << "," << row.index_obs << "," << row.index_hat << "," - << index_log_residual << "\n"; - } -} - -struct ResidualDiagnostics { - int n = 0; - double catch_rmse_log = 0.0; - double index_rmse_log = 0.0; - double catch_mean_log_residual = 0.0; - double index_mean_log_residual = 0.0; - double max_abs_catch_log_residual = 0.0; - double max_abs_index_log_residual = 0.0; -}; - -void write_residual_diagnostics( - const std::string &path, - const std::vector &observations, - const quadra::OptResult &fit) { - sefsc_red_snapper::AgeStructuredParams params; - params.log_r0 = fit.par[0]; - params.log_fbar = fit.par[1]; - params.log_q = fit.par[2]; - if (fit.par.size() >= 5) { - params.sel_a50 = 1.0 + 9.0 / (1.0 + std::exp(-fit.par[3])); - params.sel_slope = std::exp(fit.par[4]); - } - - const auto rows = sefsc_red_snapper::run_deterministic_age_structured_model( - observations, params); - - ResidualDiagnostics d; - d.n = static_cast(rows.size()); - - double catch_sum = 0.0, catch_ss = 0.0; - double index_sum = 0.0, index_ss = 0.0; - - for (const auto &row : rows) { - const double cr = std::log(std::max(row.catch_obs, 1.0e-12)) - - std::log(std::max(row.catch_hat, 1.0e-12)); - const double ir = std::log(std::max(row.index_obs, 1.0e-12)) - - std::log(std::max(row.index_hat, 1.0e-12)); - - catch_sum += cr; - catch_ss += cr * cr; - index_sum += ir; - index_ss += ir * ir; - - d.max_abs_catch_log_residual = - std::max(d.max_abs_catch_log_residual, std::abs(cr)); - d.max_abs_index_log_residual = - std::max(d.max_abs_index_log_residual, std::abs(ir)); - } - - if (d.n > 0) { - d.catch_mean_log_residual = catch_sum / d.n; - d.index_mean_log_residual = index_sum / d.n; - d.catch_rmse_log = std::sqrt(catch_ss / d.n); - d.index_rmse_log = std::sqrt(index_ss / d.n); - } - - std::ofstream out(path); - out << "metric,value,note\n"; - out << std::setprecision(12); - out << "n," << d.n << ",number of fitted years\n"; - out << "catch_rmse_log," << d.catch_rmse_log - << ",root mean squared log catch residual\n"; - out << "index_rmse_log," << d.index_rmse_log - << ",root mean squared log index residual\n"; - out << "catch_mean_log_residual," << d.catch_mean_log_residual - << ",mean log observed minus predicted catch\n"; - out << "index_mean_log_residual," << d.index_mean_log_residual - << ",mean log observed minus predicted index\n"; - out << "max_abs_catch_log_residual," << d.max_abs_catch_log_residual - << ",maximum absolute log catch residual\n"; - out << "max_abs_index_log_residual," << d.max_abs_index_log_residual - << ",maximum absolute log index residual\n"; -} - -void write_selectivity_at_age(const std::string &path, - const quadra::OptResult &fit) { - if (fit.par.size() < 5) { - return; - } - - const double a50 = 1.0 + 9.0 / (1.0 + std::exp(-fit.par[3])); - const double slope = std::exp(fit.par[4]); - - std::ofstream out(path); - out << "age,selectivity\n"; - - for (int age = 1; age <= sefsc_red_snapper::kAges; ++age) { - const double sel = 1.0 / (1.0 + std::exp(-slope * (age - a50))); - out << age << "," << sel << "\n"; - } -} - -void write_recruitment_deviations(const std::string &path, - const quadra::OptResult &fit) { - std::ofstream out(path); - out << "year,log_rec_dev,rec_multiplier\n"; - out << std::setprecision(12); - - for (std::size_t i = 0; i < fit.u_hat.size(); ++i) { - const double u = fit.u_hat[i]; - out << (i + 1) << "," << u << "," << std::exp(u) << "\n"; - } -} - -void write_objective_components( - const std::string &path, - const std::vector &observations, - const quadra::OptResult &fit) { - if (fit.par.size() < 5 || fit.u_hat.size() < observations.size()) { - throw std::runtime_error( - "Cannot write objective components: missing fit values"); - } - - const double log_r0 = fit.par[0]; - const double log_fbar = fit.par[1]; - const double log_q = fit.par[2]; - const double logit_sel_a50 = fit.par[3]; - const double log_sel_slope = fit.par[4]; - - const double r0 = std::exp(log_r0); - const double m = 0.18; - const double fbar = std::exp(log_fbar); - const double q = std::exp(log_q); - const double sel_a50 = 1.0 + 9.0 / (1.0 + std::exp(-logit_sel_a50)); - const double sel_slope = std::exp(log_sel_slope); - - const double sigma_log_index = 0.20; - const double sigma_log_catch = 0.15; - const double sigma_rec_dev = 0.35; - const double age_comp_effective_n = 2.0; - const double min_positive = 1.0e-12; - - const auto weight = sefsc_red_snapper::default_weight_at_age(); - - std::array selectivity{}; - for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { - selectivity[static_cast(a)] = - sefsc_red_snapper::logistic_selectivity(static_cast(a + 1), - sel_a50, sel_slope); - } - - std::array n{}; - n[0] = r0; - for (int a = 1; a < sefsc_red_snapper::kAges; ++a) { - n[static_cast(a)] = - n[static_cast(a - 1)] * std::exp(-m); - } - n[static_cast(sefsc_red_snapper::kAges - 1)] = - n[static_cast(sefsc_red_snapper::kAges - 1)] / - (1.0 - std::exp(-m)); - - auto normal_prior = [](double x, double mean, double sd) { - const double z = (x - mean) / sd; - return 0.5 * z * z; - }; - - double fixed_prior_nll = 0.0; - double rec_prior_nll = 0.0; - double index_nll = 0.0; - double catch_nll = 0.0; - double age_comp_nll = 0.0; - - fixed_prior_nll += normal_prior(log_r0, std::log(1200.0), 1.0); - fixed_prior_nll += normal_prior(log_fbar, std::log(0.025), 0.75); - fixed_prior_nll += normal_prior(log_q, std::log(0.00005), 1.0); - fixed_prior_nll += normal_prior(sel_a50, 4.0, 0.75); - fixed_prior_nll += normal_prior(log_sel_slope, std::log(1.2), 0.35); - - for (std::size_t t = 0; t < observations.size(); ++t) { - const auto &obs = observations[t]; - const double rec_dev = fit.u_hat[t]; - - rec_prior_nll += 0.5 * std::pow(rec_dev / sigma_rec_dev, 2.0); - - double biomass = 0.0; - for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { - biomass += - n[static_cast(a)] * weight[static_cast(a)]; - } - - double catch_hat = 0.0; - for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { - const auto i = static_cast(a); - const double f_a = fbar * selectivity[i]; - const double z_a = m + f_a; - const double harvest_rate = (f_a / z_a) * (1.0 - std::exp(-z_a)); - catch_hat += n[i] * weight[i] * harvest_rate; - } - - const double index_hat = q * biomass; - - if (obs.index > 0.0) { - const double z = - (std::log(obs.index) - std::log(std::max(index_hat, min_positive))) / - sigma_log_index; - index_nll += 0.5 * z * z; - } - - if (obs.catch_mt > 0.0) { - const double z = (std::log(obs.catch_mt) - - std::log(std::max(catch_hat, min_positive))) / - sigma_log_catch; - catch_nll += 0.5 * z * z; - } - - std::array pred_age_comp{}; - double selected_numbers_sum = 0.0; - for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { - const auto i = static_cast(a); - pred_age_comp[i] = n[i] * selectivity[i]; - selected_numbers_sum += pred_age_comp[i]; - } - - for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { - const auto i = static_cast(a); - pred_age_comp[i] = - pred_age_comp[i] / std::max(selected_numbers_sum, min_positive); - - const double obs_a = std::max(obs.age_comp[i], 0.0); - if (obs_a > 0.0) { - age_comp_nll -= age_comp_effective_n * obs_a * - std::log(std::max(pred_age_comp[i], min_positive)); - } - } - - std::array next{}; - next[0] = r0 * std::exp(rec_dev); - for (int a = 1; a < sefsc_red_snapper::kAges; ++a) { - const auto prev = static_cast(a - 1); - const auto cur = static_cast(a); - const double f_prev = fbar * selectivity[prev]; - const double z_prev = m + f_prev; - next[cur] = n[prev] * std::exp(-z_prev); - } - - const int plus_group = sefsc_red_snapper::kAges - 1; - const auto pg = static_cast(plus_group); - const double f_pg = fbar * selectivity[pg]; - const double z_pg = m + f_pg; - next[pg] += n[pg] * std::exp(-z_pg); - - n = next; - } - - std::ofstream out(path); - if (!out) { - throw std::runtime_error("Could not open component CSV: " + path); - } - - out << "component,value\n"; - out << std::setprecision(12); - out << "fixed_prior_nll," << fixed_prior_nll << "\n"; - out << "rec_prior_nll," << rec_prior_nll << "\n"; - out << "index_nll," << index_nll << "\n"; - out << "catch_nll," << catch_nll << "\n"; - out << "age_comp_nll," << age_comp_nll << "\n"; - out << "joint_total," - << fixed_prior_nll + rec_prior_nll + index_nll + catch_nll + age_comp_nll - << "\n"; -} - int main() { const std::string input_path = "examples/NMFS/sefsc_red_snapper/data/" "synthetic_red_snapper_observations.csv"; - const std::string summary_path = - "examples/NMFS/sefsc_red_snapper/outputs/quadra_fit_summary.csv"; - const std::string trajectory_path = - "examples/NMFS/sefsc_red_snapper/outputs/quadra_fitted_trajectory.csv"; - const std::string residual_diagnostics_path = - "examples/NMFS/sefsc_red_snapper/outputs/" - "quadra_fit_residual_diagnostics.csv"; - const std::string selectivity_path = - "examples/NMFS/sefsc_red_snapper/outputs/selectivity_at_age.csv"; - const std::string recruitment_deviations_path = - "examples/NMFS/sefsc_red_snapper/outputs/recruitment_deviations.csv"; - const std::string objective_components_path = - "examples/NMFS/sefsc_red_snapper/outputs/" - "quadra_fit_objective_components.csv"; + const auto report_paths = + sefsc_red_snapper::default_red_snapper_report_paths(); const auto observations = sefsc_red_snapper::read_observations(input_path); sefsc_red_snapper::RedSnapperQuadraObjective objective(observations); @@ -610,23 +43,34 @@ int main() { auto fit = quadra::optimize_lbfgs(objective, params, opts); - sefsc_red_snapper::write_fit_summary(summary_path, fit); - write_fitted_trajectory(trajectory_path, observations, fit); - write_residual_diagnostics(residual_diagnostics_path, observations, fit); - write_selectivity_at_age(selectivity_path, fit); - write_recruitment_deviations(recruitment_deviations_path, fit); - write_objective_components(objective_components_path, observations, fit); + sefsc_red_snapper::write_red_snapper_report_suite(report_paths, observations, + objective, params, fit); + sefsc_red_snapper::write_red_snapper_functional_analysis_report( + "examples/NMFS/sefsc_red_snapper/outputs/" + "red_snapper_functional_analysis_report.txt", + "examples/NMFS/sefsc_red_snapper/outputs/" + "red_snapper_functional_analysis_report.csv", + objective, params, fit); std::cout << "SEFSC red-snapper-style Quadra Laplace recruitment-deviation fit\n"; std::cout << "objective: " << fit.value << "\n"; std::cout << "grad_norm: " << fit.grad_norm << "\n"; std::cout << "converged: " << (fit.converged ? "yes" : "no") << "\n"; std::cout << "message: " << fit.message << "\n"; - std::cout << "wrote: " << summary_path << "\n"; - std::cout << "wrote: " << trajectory_path << "\n"; - std::cout << "wrote: " << residual_diagnostics_path << "\n"; - std::cout << "wrote: " << selectivity_path << "\n"; - std::cout << "wrote: " << recruitment_deviations_path << "\n"; - std::cout << "wrote: " << objective_components_path << "\n"; + std::cout << "wrote: " << report_paths.summary << "\n"; + std::cout << "wrote: " << report_paths.trajectory << "\n"; + std::cout << "wrote: " << report_paths.residual_diagnostics << "\n"; + std::cout << "wrote: " << report_paths.selectivity << "\n"; + std::cout << "wrote: " << report_paths.recruitment_deviations << "\n"; + std::cout << "wrote: " << report_paths.objective_components << "\n"; + std::cout << "wrote: " << report_paths.laplace_structure_text << "\n"; + std::cout << "wrote: " << report_paths.laplace_structure_csv << "\n"; + std::cout << "wrote: " + << "examples/NMFS/sefsc_red_snapper/outputs/" + "red_snapper_functional_analysis_report.txt\n"; + + std::cout << "wrote: " + << "examples/NMFS/sefsc_red_snapper/outputs/" + "red_snapper_functional_analysis_report.csv\n"; return 0; } diff --git a/examples/NMFS/sefsc_red_snapper/reports/red_snapper_fit_reports.hpp b/examples/NMFS/sefsc_red_snapper/reports/red_snapper_fit_reports.hpp new file mode 100644 index 0000000..026c6f1 --- /dev/null +++ b/examples/NMFS/sefsc_red_snapper/reports/red_snapper_fit_reports.hpp @@ -0,0 +1,371 @@ +#pragma once + +#include "../quadra/red_snapper_age_structured.hpp" + +#include "../../../../core/optimizer.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace sefsc_red_snapper { + +inline void write_fit_summary(const std::string &path, + const quadra::OptResult &fit) { + std::ofstream out(path); + if (!out) { + throw std::runtime_error("Could not open fit summary CSV: " + path); + } + + out << "field,value\n"; + out << std::setprecision(12); + out << "objective," << fit.value << "\n"; + out << "joint_objective," << fit.joint_objective << "\n"; + out << "laplace_logdet," << fit.laplace_logdet << "\n"; + out << "laplace_constant," << fit.laplace_constant << "\n"; + out << "grad_norm," << fit.grad_norm << "\n"; + out << "iterations," << fit.iterations << "\n"; + out << "converged," << (fit.converged ? "yes" : "no") << "\n"; + out << "message," << fit.message << "\n"; + out << "laplace,yes\n"; + out << "random_effects," << fit.u_hat.size() << "\n"; + + if (fit.par.size() >= 3) { + out << "log_r0," << fit.par[0] << "\n"; + out << "r0," << std::exp(fit.par[0]) << "\n"; + out << "log_fbar," << fit.par[1] << "\n"; + out << "fbar," << std::exp(fit.par[1]) << "\n"; + out << "log_q," << fit.par[2] << "\n"; + out << "q," << std::exp(fit.par[2]) << "\n"; + if (fit.par.size() >= 5) { + const double sel_a50 = 1.0 + 9.0 / (1.0 + std::exp(-fit.par[3])); + const double sel_slope = std::exp(fit.par[4]); + out << "logit_sel_a50," << fit.par[3] << "\n"; + out << "sel_a50," << sel_a50 << "\n"; + out << "log_sel_slope," << fit.par[4] << "\n"; + out << "sel_slope," << sel_slope << "\n"; + } + } +} + +inline void write_fitted_trajectory( + const std::string &path, + const std::vector &observations, + const quadra::OptResult &fit) { + if (fit.par.size() < 3) { + throw std::runtime_error( + "Cannot write fitted trajectory: expected at least 3 fixed parameters"); + } + + sefsc_red_snapper::AgeStructuredParams params; + params.log_r0 = fit.par[0]; + params.log_fbar = fit.par[1]; + params.log_q = fit.par[2]; + if (fit.par.size() >= 5) { + params.sel_a50 = 1.0 + 9.0 / (1.0 + std::exp(-fit.par[3])); + params.sel_slope = std::exp(fit.par[4]); + } + + const auto rows = sefsc_red_snapper::run_deterministic_age_structured_model( + observations, params); + + std::ofstream out(path); + if (!out) { + throw std::runtime_error("Could not open fitted trajectory CSV: " + path); + } + + out << "year,recruitment,total_biomass,ssb_proxy,depletion,Fbar," + << "catch_obs,catch_hat,catch_log_residual,index_obs,index_hat," + << "index_log_residual\n"; + + out << std::fixed << std::setprecision(6); + + for (const auto &row : rows) { + const double catch_log_residual = + std::log(std::max(row.catch_obs, 1.0e-12)) - + std::log(std::max(row.catch_hat, 1.0e-12)); + const double index_log_residual = + std::log(std::max(row.index_obs, 1.0e-12)) - + std::log(std::max(row.index_hat, 1.0e-12)); + + out << row.year << "," << row.recruitment << "," << row.total_biomass << "," + << row.ssb_proxy << "," << row.depletion << "," << row.fbar << "," + << row.catch_obs << "," << row.catch_hat << "," << catch_log_residual + << "," << row.index_obs << "," << row.index_hat << "," + << index_log_residual << "\n"; + } +} + +struct ResidualDiagnostics { + int n = 0; + double catch_rmse_log = 0.0; + double index_rmse_log = 0.0; + double catch_mean_log_residual = 0.0; + double index_mean_log_residual = 0.0; + double max_abs_catch_log_residual = 0.0; + double max_abs_index_log_residual = 0.0; +}; + +inline void write_residual_diagnostics( + const std::string &path, + const std::vector &observations, + const quadra::OptResult &fit) { + sefsc_red_snapper::AgeStructuredParams params; + params.log_r0 = fit.par[0]; + params.log_fbar = fit.par[1]; + params.log_q = fit.par[2]; + if (fit.par.size() >= 5) { + params.sel_a50 = 1.0 + 9.0 / (1.0 + std::exp(-fit.par[3])); + params.sel_slope = std::exp(fit.par[4]); + } + + const auto rows = sefsc_red_snapper::run_deterministic_age_structured_model( + observations, params); + + ResidualDiagnostics d; + d.n = static_cast(rows.size()); + + double catch_sum = 0.0, catch_ss = 0.0; + double index_sum = 0.0, index_ss = 0.0; + + for (const auto &row : rows) { + const double cr = std::log(std::max(row.catch_obs, 1.0e-12)) - + std::log(std::max(row.catch_hat, 1.0e-12)); + const double ir = std::log(std::max(row.index_obs, 1.0e-12)) - + std::log(std::max(row.index_hat, 1.0e-12)); + + catch_sum += cr; + catch_ss += cr * cr; + index_sum += ir; + index_ss += ir * ir; + + d.max_abs_catch_log_residual = + std::max(d.max_abs_catch_log_residual, std::abs(cr)); + d.max_abs_index_log_residual = + std::max(d.max_abs_index_log_residual, std::abs(ir)); + } + + if (d.n > 0) { + d.catch_mean_log_residual = catch_sum / d.n; + d.index_mean_log_residual = index_sum / d.n; + d.catch_rmse_log = std::sqrt(catch_ss / d.n); + d.index_rmse_log = std::sqrt(index_ss / d.n); + } + + std::ofstream out(path); + out << "metric,value,note\n"; + out << std::setprecision(12); + out << "n," << d.n << ",number of fitted years\n"; + out << "catch_rmse_log," << d.catch_rmse_log + << ",root mean squared log catch residual\n"; + out << "index_rmse_log," << d.index_rmse_log + << ",root mean squared log index residual\n"; + out << "catch_mean_log_residual," << d.catch_mean_log_residual + << ",mean log observed minus predicted catch\n"; + out << "index_mean_log_residual," << d.index_mean_log_residual + << ",mean log observed minus predicted index\n"; + out << "max_abs_catch_log_residual," << d.max_abs_catch_log_residual + << ",maximum absolute log catch residual\n"; + out << "max_abs_index_log_residual," << d.max_abs_index_log_residual + << ",maximum absolute log index residual\n"; +} + +inline void write_selectivity_at_age(const std::string &path, + const quadra::OptResult &fit) { + if (fit.par.size() < 5) { + return; + } + + const double a50 = 1.0 + 9.0 / (1.0 + std::exp(-fit.par[3])); + const double slope = std::exp(fit.par[4]); + + std::ofstream out(path); + out << "age,selectivity\n"; + + for (int age = 1; age <= sefsc_red_snapper::kAges; ++age) { + const double sel = 1.0 / (1.0 + std::exp(-slope * (age - a50))); + out << age << "," << sel << "\n"; + } +} + +inline void write_recruitment_deviations(const std::string &path, + const quadra::OptResult &fit) { + std::ofstream out(path); + out << "year,log_rec_dev,rec_multiplier\n"; + out << std::setprecision(12); + + for (std::size_t i = 0; i < fit.u_hat.size(); ++i) { + const double u = fit.u_hat[i]; + out << (i + 1) << "," << u << "," << std::exp(u) << "\n"; + } +} + +inline void write_objective_components( + const std::string &path, + const std::vector &observations, + const quadra::OptResult &fit) { + if (fit.par.size() < 5 || fit.u_hat.size() < observations.size()) { + throw std::runtime_error( + "Cannot write objective components: missing fit values"); + } + + const double log_r0 = fit.par[0]; + const double log_fbar = fit.par[1]; + const double log_q = fit.par[2]; + const double logit_sel_a50 = fit.par[3]; + const double log_sel_slope = fit.par[4]; + + const double r0 = std::exp(log_r0); + const double m = 0.18; + const double fbar = std::exp(log_fbar); + const double q = std::exp(log_q); + const double sel_a50 = 1.0 + 9.0 / (1.0 + std::exp(-logit_sel_a50)); + const double sel_slope = std::exp(log_sel_slope); + + const double sigma_log_index = 0.20; + const double sigma_log_catch = 0.15; + const double sigma_rec_dev = 0.35; + const double age_comp_effective_n = 2.0; + const double min_positive = 1.0e-12; + + const auto weight = sefsc_red_snapper::default_weight_at_age(); + + std::array selectivity{}; + for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { + selectivity[static_cast(a)] = + sefsc_red_snapper::logistic_selectivity(static_cast(a + 1), + sel_a50, sel_slope); + } + + std::array n{}; + n[0] = r0; + for (int a = 1; a < sefsc_red_snapper::kAges; ++a) { + n[static_cast(a)] = + n[static_cast(a - 1)] * std::exp(-m); + } + n[static_cast(sefsc_red_snapper::kAges - 1)] = + n[static_cast(sefsc_red_snapper::kAges - 1)] / + (1.0 - std::exp(-m)); + + auto normal_prior = [](double x, double mean, double sd) { + const double z = (x - mean) / sd; + return 0.5 * z * z; + }; + + double fixed_prior_nll = 0.0; + double rec_prior_nll = 0.0; + double index_nll = 0.0; + double catch_nll = 0.0; + double age_comp_nll = 0.0; + + fixed_prior_nll += normal_prior(log_r0, std::log(1200.0), 1.0); + fixed_prior_nll += normal_prior(log_fbar, std::log(0.025), 0.75); + fixed_prior_nll += normal_prior(log_q, std::log(0.00005), 1.0); + fixed_prior_nll += normal_prior(sel_a50, 4.0, 0.75); + fixed_prior_nll += normal_prior(log_sel_slope, std::log(1.2), 0.35); + + for (std::size_t t = 0; t < observations.size(); ++t) { + const auto &obs = observations[t]; + const double rec_dev = fit.u_hat[t]; + + rec_prior_nll += 0.5 * std::pow(rec_dev / sigma_rec_dev, 2.0); + + double biomass = 0.0; + for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { + biomass += + n[static_cast(a)] * weight[static_cast(a)]; + } + + double catch_hat = 0.0; + for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { + const auto i = static_cast(a); + const double f_a = fbar * selectivity[i]; + const double z_a = m + f_a; + const double harvest_rate = (f_a / z_a) * (1.0 - std::exp(-z_a)); + catch_hat += n[i] * weight[i] * harvest_rate; + } + + const double index_hat = q * biomass; + + if (obs.index > 0.0) { + const double z = + (std::log(obs.index) - std::log(std::max(index_hat, min_positive))) / + sigma_log_index; + index_nll += 0.5 * z * z; + } + + if (obs.catch_mt > 0.0) { + const double z = (std::log(obs.catch_mt) - + std::log(std::max(catch_hat, min_positive))) / + sigma_log_catch; + catch_nll += 0.5 * z * z; + } + + std::array pred_age_comp{}; + double selected_numbers_sum = 0.0; + for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { + const auto i = static_cast(a); + pred_age_comp[i] = n[i] * selectivity[i]; + selected_numbers_sum += pred_age_comp[i]; + } + + for (int a = 0; a < sefsc_red_snapper::kAges; ++a) { + const auto i = static_cast(a); + pred_age_comp[i] = + pred_age_comp[i] / std::max(selected_numbers_sum, min_positive); + + const double obs_a = std::max(obs.age_comp[i], 0.0); + if (obs_a > 0.0) { + age_comp_nll -= age_comp_effective_n * obs_a * + std::log(std::max(pred_age_comp[i], min_positive)); + } + } + + std::array next{}; + next[0] = r0 * std::exp(rec_dev); + for (int a = 1; a < sefsc_red_snapper::kAges; ++a) { + const auto prev = static_cast(a - 1); + const auto cur = static_cast(a); + const double f_prev = fbar * selectivity[prev]; + const double z_prev = m + f_prev; + next[cur] = n[prev] * std::exp(-z_prev); + } + + const int plus_group = sefsc_red_snapper::kAges - 1; + const auto pg = static_cast(plus_group); + const double f_pg = fbar * selectivity[pg]; + const double z_pg = m + f_pg; + next[pg] += n[pg] * std::exp(-z_pg); + + n = next; + } + + std::ofstream out(path); + if (!out) { + throw std::runtime_error("Could not open component CSV: " + path); + } + + out << "component,value\n"; + out << std::setprecision(12); + out << "fixed_prior_nll," << fixed_prior_nll << "\n"; + out << "rec_prior_nll," << rec_prior_nll << "\n"; + out << "index_nll," << index_nll << "\n"; + out << "catch_nll," << catch_nll << "\n"; + out << "age_comp_nll," << age_comp_nll << "\n"; + out << "joint_total," + << fixed_prior_nll + rec_prior_nll + index_nll + catch_nll + age_comp_nll + << "\n"; +} + +} // namespace sefsc_red_snapper + +using sefsc_red_snapper::write_fit_summary; +using sefsc_red_snapper::write_fitted_trajectory; +using sefsc_red_snapper::write_objective_components; +using sefsc_red_snapper::write_recruitment_deviations; +using sefsc_red_snapper::write_residual_diagnostics; +using sefsc_red_snapper::write_selectivity_at_age; diff --git a/examples/NMFS/sefsc_red_snapper/reports/red_snapper_report_suite.hpp b/examples/NMFS/sefsc_red_snapper/reports/red_snapper_report_suite.hpp new file mode 100644 index 0000000..57c739c --- /dev/null +++ b/examples/NMFS/sefsc_red_snapper/reports/red_snapper_report_suite.hpp @@ -0,0 +1,58 @@ +#pragma once + +#include "../diagnostics/red_snapper_functional_analysis_diagnostics.hpp" +#include "red_snapper_fit_reports.hpp" + +#include "../../../../core/optimizer.hpp" + +#include +#include + +namespace sefsc_red_snapper { + +struct RedSnapperReportPaths { + std::string summary = + "examples/NMFS/sefsc_red_snapper/outputs/quadra_fit_summary.csv"; + std::string trajectory = + "examples/NMFS/sefsc_red_snapper/outputs/quadra_fitted_trajectory.csv"; + std::string residual_diagnostics = "examples/NMFS/sefsc_red_snapper/outputs/" + "quadra_fit_residual_diagnostics.csv"; + std::string selectivity = + "examples/NMFS/sefsc_red_snapper/outputs/selectivity_at_age.csv"; + std::string recruitment_deviations = + "examples/NMFS/sefsc_red_snapper/outputs/recruitment_deviations.csv"; + std::string objective_components = "examples/NMFS/sefsc_red_snapper/outputs/" + "quadra_fit_objective_components.csv"; + std::string laplace_structure_text = + "examples/NMFS/sefsc_red_snapper/outputs/" + "red_snapper_laplace_structure_report.txt"; + std::string laplace_structure_csv = + "examples/NMFS/sefsc_red_snapper/outputs/" + "red_snapper_laplace_structure_report.csv"; +}; + +inline RedSnapperReportPaths default_red_snapper_report_paths() { + return RedSnapperReportPaths{}; +} + +template +inline void write_red_snapper_report_suite( + const RedSnapperReportPaths &paths, + const std::vector &observations, Objective &objective, + const quadra::ParameterVector ¶ms, const quadra::OptResult &fit) { + write_fit_summary(paths.summary, fit); + write_fitted_trajectory(paths.trajectory, observations, fit); + write_residual_diagnostics(paths.residual_diagnostics, observations, fit); + write_selectivity_at_age(paths.selectivity, fit); + write_recruitment_deviations(paths.recruitment_deviations, fit); + write_objective_components(paths.objective_components, observations, fit); + write_red_snapper_laplace_structure_report(paths.laplace_structure_text, + paths.laplace_structure_csv, + objective, params, fit); +} + +} // namespace sefsc_red_snapper + +using sefsc_red_snapper::default_red_snapper_report_paths; +using sefsc_red_snapper::RedSnapperReportPaths; +using sefsc_red_snapper::write_red_snapper_report_suite;