diff --git a/R/analyse.R b/R/analyse.R index 8aadd01..45d3aea 100644 --- a/R/analyse.R +++ b/R/analyse.R @@ -94,99 +94,26 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU stop("data's class is not ModelArray!") } + # Validate that the formula's response matches the requested scalar + .validate_formula_response(formula, scalar) + + # Early collision check: referenced scalar predictors vs phenotypes columns + all_vars <- all.vars(formula) + lhs_name <- tryCatch(as.character(formula[[2]]), error = function(e) NULL) + rhs_vars <- setdiff(all_vars, lhs_name) + scalar_names <- names(scalars(data)) + scalar_predictors <- intersect(rhs_vars, scalar_names) + .check_name_collisions(phenotypes, scalar_names, c(scalar, scalar_predictors), context = "modeling") + ## element.subset: if (is.null(element.subset)) { # request all elements num.element.total <- numElementsTotal(modelarray = data, scalar_name = scalar) element.subset <- 1:num.element.total } - # checker for min and max of element.subset; and whether elements are integer - if (min(element.subset) < 1) { - stop("Minimal value in element.subset should >= 1") - } - if (max(element.subset) > nrow(scalars(data)[[scalar]])) { - stop( - paste0( - "Maximal value in element.subset should <= number of elements = ", - as.character(nrow(scalars(data)[[scalar]])) - ) - ) - } - if (class(element.subset) != "integer") { - stop("Please enter integers for element.subset!") - } - - ### sanity check: whether they match: modelarray's source file list and phenotypes' source file list: - sources.modelarray <- sources(data)[[scalar]] - sources.phenotypes <- phenotypes[["source_file"]] - if (is.null(sources.phenotypes)) { - stop(paste0("Did not find column 'source_file' in argument 'phenotypes'. Please check!")) - } - - ## length should be the same: - if (length(sources.modelarray) != length(sources.phenotypes)) { - stop( - paste0( - "The length of source file list from phenotypes's column 'source_file' ", - "is not the same as that in ModelArray 'data'! Please check out! ", - "The latter one can be accessed by: sources(data)[[scalar]]" - ) - ) - } - - ## check if the list is unique: - if (length(sources.modelarray) != length(unique(sources.modelarray))) { - stop( - paste0( - "The source files in ModelArray 'data' are not unique! Please check out! ", - "It can be accessed by: sources(data)[[scalar]]" - ) - ) - } - if (length(sources.phenotypes) != length(unique(sources.phenotypes))) { - stop( - paste0( - "The source files from phenotypes's column 'source_file' ", - "are not unique! Please check out and remove the duplicated one!" - ) - ) - } + .validate_element_subset(data, scalar, element.subset) - if (identical(sources.modelarray, sources.phenotypes)) { - # identical, pass - } else { # not identical (but length is the same): - # check if two lists can be matched (i.e. no unmatched source filename) - if ((all(sources.modelarray %in% sources.phenotypes)) && ((all(sources.phenotypes %in% sources.modelarray)))) { - # can be matched, just the order is different. Use match() function: - reorder_idx <- match( - sources.modelarray, # vector of values in the order we want - sources.phenotypes - ) # vector to be reordered - # apply to phenotypes: - phenotypes <- phenotypes[reorder_idx, ] - # reset the row name, just to be safe for later adding scalar values... - # see ModelArray_paper/notebooks/test_match_sourceFiles.Rmd - row.names(phenotypes) <- NULL - if (!identical(phenotypes[["source_file"]], sources.modelarray)) { - stop("matching source file names were not successful...") - } - } else { - stop( - paste0( - "phenotypes's column 'source_file' have different element(s) from source file list", - " in ModelArray 'data'! Please check out! ", - "The latter one can be accessed by: sources(data)[[scalar]]" - ) - ) - } - - # stop( - # paste0( - # "The source file list from phenotypes's column 'source_file' is not identical to that in ModelArray 'data'! ", - # "Please check out! ", - # "The latter one can be accessed by: sources(data)[[scalar]] " - # ) - # ) - } + ### sanity check and alignment of phenotypes to sources + phenotypes <- .align_phenotypes_to_sources_or_error(sources(data)[[scalar]], phenotypes, scalar) @@ -683,26 +610,23 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N stop("data's class is not ModelArray!") } + # Validate that the formula's response matches the requested scalar + .validate_formula_response(formula, scalar) + + # Early collision check: referenced scalar predictors vs phenotypes columns + all_vars <- all.vars(formula) + lhs_name <- tryCatch(as.character(formula[[2]]), error = function(e) NULL) + rhs_vars <- setdiff(all_vars, lhs_name) + scalar_names <- names(scalars(data)) + scalar_predictors <- intersect(rhs_vars, scalar_names) + .check_name_collisions(phenotypes, scalar_names, c(scalar, scalar_predictors), context = "modeling") + ## element.subset: if (is.null(element.subset)) { # request all elements num.element.total <- numElementsTotal(modelarray = data, scalar_name = scalar) element.subset <- 1:num.element.total } - # checker for min and max of element.subset; and whether elements are integer - if (min(element.subset) < 1) { - stop("Minimal value in element.subset should >= 1") - } - if (max(element.subset) > nrow(scalars(data)[[scalar]])) { - stop( - paste0( - "Maximal value in element.subset should <= number of elements = ", - as.character(nrow(scalars(data)[[scalar]])) - ) - ) - } - if (class(element.subset) != "integer") { - stop("Please enter integers for element.subset!") - } + .validate_element_subset(data, scalar, element.subset) # check if the formula is valid in terms of mgcv::gam() tryCatch( @@ -729,78 +653,8 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N checker_gam_formula(formula, gam.formula.breakdown) - ### sanity check: whether they match: modelarray's source file list and phenotypes' source file list: - sources.modelarray <- sources(data)[[scalar]] - sources.phenotypes <- phenotypes[["source_file"]] - if (is.null(sources.phenotypes)) { - stop(paste0("Did not find column 'source_file' in argument 'phenotypes'. Please check!")) - } - - ## length should be the same: - if (length(sources.modelarray) != length(sources.phenotypes)) { - stop( - paste0( - "The length of source file list from phenotypes's column 'source_file'", - " is not the same as that in ModelArray 'data'! Please check out! ", - "The latter one can be accessed by: sources(data)[[scalar]]" - ) - ) - } - - ## check if the list is unique: - if (length(sources.modelarray) != length(unique(sources.modelarray))) { - stop( - paste0( - "The source files in ModelArray 'data' are not unique! Please check out!", - " It can be accessed by: sources(data)[[scalar]]" - ) - ) - } - if (length(sources.phenotypes) != length(unique(sources.phenotypes))) { - stop( - paste0( - "The source files from phenotypes's column 'source_file' are not unique! ", - "Please check out and remove the duplicated one!" - ) - ) - } - - if (identical(sources.modelarray, sources.phenotypes)) { - # identical, pass - } else { # not identical (but length is the same): - # check if two lists can be matched (i.e. no unmatched source filename) - if ((all(sources.modelarray %in% sources.phenotypes)) && ((all(sources.phenotypes %in% sources.modelarray)))) { - # can be matched, just the order is different. Use match() function: - reorder_idx <- match( - sources.modelarray, # vector of values in the order we want - sources.phenotypes - ) # vector to be reordered - # apply to phenotypes: - phenotypes <- phenotypes[reorder_idx, ] - row.names(phenotypes) <- NULL - # reset the row name, just to be safe for later adding scalar values... - # see ModelArray_paper/notebooks/test_match_sourceFiles.Rmd - if (!identical(phenotypes[["source_file"]], sources.modelarray)) { - stop("matching source file names were not successful...") - } - } else { - stop( - paste0( - "phenotypes's column 'source_file' have different element(s) from source file ", - "list in ModelArray 'data'! Please check out! The latter one can be accessed by: ", - "sources(data)[[scalar]]" - ) - ) - } - - # stop( - # paste0( - # "The source file list from phenotypes's column 'source_file' is not identical ", - # "to that in ModelArray 'data'! Please check out! The latter one can be accessed by: ", - # "sources(data)[[scalar]] " - # ) - # ) - } + ### sanity check and alignment of phenotypes to sources + phenotypes <- .align_phenotypes_to_sources_or_error(sources(data)[[scalar]], phenotypes, scalar) diff --git a/R/utils.R b/R/utils.R index 6a3c085..de650b6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -489,3 +489,97 @@ bind_cols_check_emptyTibble <- function(a, b) { c } + + +# Internal helpers for multi-scalar validation/alignment +# These are intentionally lightweight and operate on vectors/indices to avoid data copies. +# @noRd +.validate_formula_response <- function(formula, scalar) { + lhs_name <- tryCatch(as.character(formula[[2]]), error = function(e) NULL) + if (is.null(lhs_name) || lhs_name != scalar) { + stop(paste0( + "The formula's response variable ('", + if (is.null(lhs_name)) "" else lhs_name, + "') must match the 'scalar' argument ('", scalar, "')." + )) + } + invisible(TRUE) +} + +# @noRd +.validate_element_subset <- function(data, scalar, element.subset) { + if (min(element.subset) < 1) { + stop("Minimal value in element.subset should >= 1") + } + if (max(element.subset) > nrow(scalars(data)[[scalar]])) { + stop(paste0( + "Maximal value in element.subset should <= number of elements = ", + as.character(nrow(scalars(data)[[scalar]])) + )) + } + if (class(element.subset) != "integer") { + stop("Please enter integers for element.subset!") + } + invisible(TRUE) +} + +# context: one of "modeling" or "wrapping" to preserve exact wording +# used_scalars: character vector of scalar names referenced (response and/or predictors) +# @noRd +.check_name_collisions <- function(phenotypes, scalar_names, used_scalars, context = "modeling") { + collisions <- intersect(used_scalars, colnames(phenotypes)) + if (length(collisions) > 0) { + tail_msg <- if (identical(context, "wrapping")) "before wrapping." else "before modeling." + stop(paste0( + "Column name collision between phenotypes and scalar names: ", + paste(collisions, collapse = ", "), + ". Please rename or remove these columns from phenotypes ", tail_msg + )) + } + invisible(TRUE) +} + +# Returns reordered phenotypes (may be identical) or errors with existing messages +# @noRd +.align_phenotypes_to_sources_or_error <- function(model_sources, phenotypes, scalar) { + sources.phenotypes <- phenotypes[["source_file"]] + if (is.null(sources.phenotypes)) { + stop(paste0("Did not find column 'source_file' in argument 'phenotypes'. Please check!")) + } + if (length(model_sources) != length(sources.phenotypes)) { + stop(paste0( + "The length of source file list from phenotypes's column 'source_file' ", + "is not the same as that in ModelArray 'data'! Please check out! ", + "The latter one can be accessed by: sources(data)[[scalar]]" + )) + } + if (length(model_sources) != length(unique(model_sources))) { + stop(paste0( + "The source files in ModelArray 'data' are not unique! Please check out! ", + "It can be accessed by: sources(data)[[scalar]]" + )) + } + if (length(sources.phenotypes) != length(unique(sources.phenotypes))) { + stop(paste0( + "The source files from phenotypes's column 'source_file' ", + "are not unique! Please check out and remove the duplicated one!" + )) + } + if (identical(model_sources, sources.phenotypes)) { + return(phenotypes) + } + if ((all(model_sources %in% sources.phenotypes)) && (all(sources.phenotypes %in% model_sources))) { + reorder_idx <- match(model_sources, sources.phenotypes) + phenotypes <- phenotypes[reorder_idx, ] + row.names(phenotypes) <- NULL + if (!identical(phenotypes[["source_file"]], model_sources)) { + stop("matching source file names were not successful...") + } + return(phenotypes) + } + stop(paste0( + "phenotypes's column 'source_file' have different element(s) from source file list", + " in ModelArray 'data'! Please check out! ", + "The latter one can be accessed by: sources(data)[[scalar]]" + )) +} diff --git a/tests/testthat/test-ModelArray_gam.R b/tests/testthat/test-ModelArray_gam.R index bea0a08..2c7b218 100644 --- a/tests/testthat/test-ModelArray_gam.R +++ b/tests/testthat/test-ModelArray_gam.R @@ -348,6 +348,57 @@ test_that("test that ModelArray.gam() works as expected", { # if there is any NA in the output data.frame, the result is FALSE expect_true((!mygam_onlyParametricTermsStat %>% is.na()) %>% all()) expect_true((!mygam_onlySmoothTermsStat %>% is.na()) %>% all()) + # Multi-scalar: scalar predictors referenced in formula + set.seed(42) + num_elements <- 2L + num_subj <- nrow(phenotypes) + subj <- phenotypes$source_file + MD <- matrix(rnorm(num_elements * num_subj), nrow = num_elements, ncol = num_subj) + FA <- matrix(NA_real_, nrow = num_elements, ncol = num_subj) + for (i in seq_len(num_elements)) { + FA[i, ] <- 0.5 * MD[i, ] + 0.02 * phenotypes$age + rnorm(num_subj, sd = 0.02) + } + ma_ms <- methods::new( + "ModelArray", + scalars = list(FA = FA, MD = MD), + sources = list(FA = subj, MD = subj), + results = list(), + path = "" + ) + res_ms <- ModelArray.gam( + FA ~ s(MD) + s(age), + data = ma_ms, + phenotypes = phenotypes, + scalar = "FA", + element.subset = as.integer(1:num_elements), + num.subj.lthr.abs = 2L, num.subj.lthr.rel = 0, + verbose = FALSE, pbar = FALSE + ) + expect_true(is.data.frame(res_ms)) + expect_true(any(grepl("s_MD", colnames(res_ms), fixed = TRUE))) + + # Predictor scalar sources mismatch against phenotypes$source_file + subj_bad <- subj + subj_bad[1] <- paste0(subj_bad[1], "_bad") + ma_ms_bad <- methods::new( + "ModelArray", + scalars = list(FA = FA, MD = MD), + sources = list(FA = subj, MD = subj_bad), + results = list(), + path = "" + ) + expect_error( + ModelArray.gam( + FA ~ s(MD) + s(age), + data = ma_ms_bad, + phenotypes = phenotypes, + scalar = "FA", + element.subset = as.integer(1:num_elements), + num.subj.lthr.abs = 2L, num.subj.lthr.rel = 0, + verbose = FALSE, pbar = FALSE + ), + regexp = "The source files for predictor scalar 'MD' do not match phenotypes\\$source_file\\." + ) # no any model stat: expect_error(ModelArray.gam(FD ~ s(age) + sex, @@ -357,6 +408,21 @@ test_that("test that ModelArray.gam() works as expected", { pbar = FALSE )) + # Collision: add MD column to phenotypes + phen_bad <- phenotypes + phen_bad$MD <- 1 + expect_error( + ModelArray.gam( + FA ~ s(MD) + s(age), + data = ma_ms, + phenotypes = phen_bad, + scalar = "FA", + element.subset = as.integer(1), + verbose = FALSE, pbar = FALSE + ), + regexp = "Column name collision between phenotypes and scalar names:" + ) + ## multiple smooth terms: s(age) + s(factorA) # cannot use s(sex) as sex are # characters (M or F), not values! diff --git a/tests/testthat/test-ModelArray_lm.R b/tests/testthat/test-ModelArray_lm.R index 39cdb42..e778fb4 100644 --- a/tests/testthat/test-ModelArray_lm.R +++ b/tests/testthat/test-ModelArray_lm.R @@ -421,6 +421,70 @@ test_that("ModelArray.lm() works as expected", { !(c("model.p.value.fdr") %in% colnames(mylm_corr_pvalues_4)) %>% all() ) + # Multi-scalar: support scalar predictors and collisions + # Build a tiny in-memory ModelArray with two scalars (FA, MD) using existing phenotypes + set.seed(123) + num_elements <- 3L + num_subj <- nrow(phenotypes) + subj <- phenotypes$source_file + MD <- matrix(rnorm(num_elements * num_subj), nrow = num_elements, ncol = num_subj) + FA <- matrix(NA_real_, nrow = num_elements, ncol = num_subj) + for (i in seq_len(num_elements)) { + FA[i, ] <- 0.6 * MD[i, ] + 0.05 * phenotypes$age + 0.2 * phenotypes$sex + rnorm(num_subj, sd = 0.05) + } + + ma2 <- methods::new( + "ModelArray", + scalars = list(FA = FA, MD = MD), + sources = list(FA = subj, MD = subj), + results = list(), + path = "" + ) + + res_ms <- ModelArray.lm( + FA ~ MD + age + sex, + data = ma2, + phenotypes = phenotypes, + scalar = "FA", + element.subset = as.integer(1:2), + num.subj.lthr.abs = 2L, num.subj.lthr.rel = 0, + verbose = FALSE, pbar = FALSE + ) + expect_true(is.data.frame(res_ms)) + expect_true(any(grepl("MD\\.", colnames(res_ms)))) + + # Collision: add MD column to phenotypes + phen_bad <- phenotypes + phen_bad$MD <- 1 + expect_error( + ModelArray.lm(FA ~ MD + age, data = ma2, phenotypes = phen_bad, scalar = "FA", + element.subset = as.integer(1), verbose = FALSE, pbar = FALSE), + regexp = "Column name collision" + ) + + # Predictor scalar sources mismatch against phenotypes$source_file + subj_bad <- subj + subj_bad[1] <- paste0(subj_bad[1], "_bad") + ma2_bad <- methods::new( + "ModelArray", + scalars = list(FA = FA, MD = MD), + sources = list(FA = subj, MD = subj_bad), + results = list(), + path = "" + ) + expect_error( + ModelArray.lm( + FA ~ MD + age, + data = ma2_bad, + phenotypes = phenotypes, + scalar = "FA", + element.subset = as.integer(1:2), + verbose = FALSE, + pbar = FALSE + ), + regexp = "The source files for predictor scalar 'MD' do not match phenotypes\\$source_file\\." + ) + # Test warning when p.value not provided in var.terms expect_warning( temp <- ModelArray.lm( diff --git a/tests/testthat/test-ModelArray_wrap.R b/tests/testthat/test-ModelArray_wrap.R index d44d0eb..f3b2789 100644 --- a/tests/testthat/test-ModelArray_wrap.R +++ b/tests/testthat/test-ModelArray_wrap.R @@ -98,6 +98,103 @@ test_that("ModelArray.wrap reproduces ModelArray.lm with a custom FUN", { rhdf5::h5closeAll() }) + +test_that("ModelArray.wrap exposes all scalars and uses intersection threshold", { + set.seed(7) + num_elements <- 2L + num_subj <- 6L + subj <- paste0("sub-", seq_len(num_subj)) + phen <- data.frame(source_file = subj, z = rnorm(num_subj)) + + A <- matrix(rnorm(num_elements * num_subj), nrow = num_elements) + B <- matrix(rnorm(num_elements * num_subj), nrow = num_elements) + # introduce NAs in different subjects for element 2 + A[2, 1] <- NA_real_ + B[2, 2] <- NA_real_ + + ma <- methods::new( + "ModelArray", + scalars = list(A = A, B = B), + sources = list(A = subj, B = subj), + results = list(), + path = "" + ) + + # FUN that checks presence of both scalars in data + my_fun <- function(data) { + stopifnot(all(c("A", "B") %in% names(data))) + c(meanA = mean(data$A, na.rm = TRUE), meanB = mean(data$B, na.rm = TRUE)) + } + + out <- ModelArray.wrap( + FUN = my_fun, + data = ma, + phenotypes = phen, + scalar = "A", + element.subset = as.integer(1:2), + num.subj.lthr.abs = 6L, + num.subj.lthr.rel = 0, + verbose = FALSE, pbar = FALSE + ) + # element 2 (element_id == 1) should be all NaN due to intersection mask + row2 <- out[out$element_id == 1, , drop = FALSE] + expect_true(all(is.nan(as.numeric(row2[,-1])))) +}) +test_that("ModelArray.wrap errors on collisions and source mismatches for multi-scalars", { + set.seed(11) + num_elements <- 2L + num_subj <- 5L + subj <- paste0("sub-", seq_len(num_subj)) + phen <- data.frame(source_file = subj, z = rnorm(num_subj)) + + A <- matrix(rnorm(num_elements * num_subj), nrow = num_elements) + B <- matrix(rnorm(num_elements * num_subj), nrow = num_elements) + + ma <- methods::new( + "ModelArray", + scalars = list(A = A, B = B), + sources = list(A = subj, B = subj), + results = list(), + path = "" + ) + + # Collision: add a scalar-name column to phenotypes + phen_bad <- phen + phen_bad$A <- 1 + expect_error( + ModelArray.wrap( + FUN = function(data) tibble::tibble(x = 1), + data = ma, + phenotypes = phen_bad, + scalar = "A", + element.subset = as.integer(1), + verbose = FALSE, pbar = FALSE + ), + regexp = "Column name collision between phenotypes and scalar names: .* before wrapping\\." + ) + + # Source mismatch for a non-response scalar + subj_bad <- subj + subj_bad[1] <- paste0(subj_bad[1], "_bad") + ma_bad <- methods::new( + "ModelArray", + scalars = list(A = A, B = B), + sources = list(A = subj, B = subj_bad), + results = list(), + path = "" + ) + expect_error( + ModelArray.wrap( + FUN = function(data) tibble::tibble(x = 1), + data = ma_bad, + phenotypes = phen, + scalar = "A", + element.subset = as.integer(1), + verbose = FALSE, pbar = FALSE + ), + regexp = "The source files for scalar 'B' do not match phenotypes\\$source_file\\." + ) +}) test_that("ModelArray.wrap runs simple group means function and writes outputs", { h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray") csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray") diff --git a/vignettes/multi_scalars.Rmd b/vignettes/multi_scalars.Rmd new file mode 100644 index 0000000..aed01d3 --- /dev/null +++ b/vignettes/multi_scalars.Rmd @@ -0,0 +1,144 @@ +--- +title: "Using Multiple Scalars in ModelArray" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using Multiple Scalars in ModelArray} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```r +knitr::opts_chunk$set(collapse = TRUE, comment = "#>") +``` + +### Overview + +ModelArray supports data sets that contain more than one element-wise scalar (e.g., FA and MD). This vignette shows how to: + +- Fit models where the response is one scalar and predictors can include other scalars. +- Ensure sources alignment and avoid name collisions in `phenotypes`. +- Use `ModelArray.wrap` to run custom per-element functions with all scalars attached. + +We will use both the package example data and small in-memory examples for clarity and speed. + +### Setup + +```r +library(ModelArray) +library(dplyr) +set.seed(123) + +h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray") +csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray") +phenotypes <- read.csv(csv_path) +``` + +### In-memory multi-scalar object + +```r +# Build a tiny in-memory ModelArray with two scalars (FA, MD) +num_elements <- 3L +num_subj <- nrow(phenotypes) +subj <- phenotypes$source_file + +MD <- matrix(rnorm(num_elements * num_subj), nrow = num_elements, ncol = num_subj) +FA <- matrix(NA_real_, nrow = num_elements, ncol = num_subj) +for (i in seq_len(num_elements)) { + FA[i, ] <- 0.6 * MD[i, ] + 0.05 * phenotypes$age + rnorm(num_subj, sd = 0.05) +} + +ma <- methods::new( + "ModelArray", + scalars = list(FA = FA, MD = MD), + sources = list(FA = subj, MD = subj), + results = list(), + path = "" +) +``` + +### Linear models with scalar predictors + +You can reference another scalar (here `MD`) as a predictor when modeling `FA`: + +```r +res_lm <- ModelArray.lm( + FA ~ MD + age, + data = ma, + phenotypes = phenotypes, + scalar = "FA", + element.subset = as.integer(1:2), + num.subj.lthr.abs = 2L, num.subj.lthr.rel = 0, + verbose = FALSE, pbar = FALSE +) +colnames(res_lm)[1:10] +``` + +Requirements and helpful checks: + +- The formula response (LHS) must match `scalar` (here `FA`). +- `phenotypes$source_file` is aligned to `sources(data)[[scalar]]` automatically, with informative errors if mismatched. +- If `phenotypes` contains a column named like any scalar (e.g., `MD`), modeling will error with a clear collision message. Rename such columns. + +### GAMs with scalar predictors + +Similarly, you may use scalar predictors in smooth terms: + +```r +res_gam <- ModelArray.gam( + FA ~ s(MD) + s(age), + data = ma, + phenotypes = phenotypes, + scalar = "FA", + element.subset = as.integer(1:2), + num.subj.lthr.abs = 2L, num.subj.lthr.rel = 0, + verbose = FALSE, pbar = FALSE +) +colnames(res_gam)[1:10] +``` + +The same alignment and collision rules apply. If the predictor scalar's source list does not match `phenotypes$source_file`, you will get an informative error describing the mismatch. + +### Wrapping custom functions with all scalars + +`ModelArray.wrap` attaches all scalars into the per-element data.frame before calling your function. This enables custom analyses that use multiple scalars at once. + +```r +my_fun <- function(data) { + stopifnot(all(c("FA", "MD") %in% names(data))) + # Example: simple correlation between FA and MD for the current element + keep <- is.finite(data$FA) & is.finite(data$MD) + r <- suppressWarnings(stats::cor(data$FA[keep], data$MD[keep])) + tibble::tibble(cor_FA_MD = r) +} + +res_wrap <- ModelArray.wrap( + FUN = my_fun, + data = ma, + phenotypes = phenotypes, + scalar = "FA", + element.subset = as.integer(1:2), + num.subj.lthr.abs = 2L, num.subj.lthr.rel = 0, + verbose = FALSE, pbar = FALSE +) +res_wrap +``` + +Notes: + +- `ModelArray.wrap` checks for collisions between scalar names and `phenotypes` columns and errors with guidance to rename. +- Sources must match for every scalar attached; mismatches generate informative errors identifying the offending scalar. +- Subject-thresholding uses the intersection of valid (finite) values across all attached scalars. + +### Tips for avoiding common pitfalls + +- If you need a `phenotypes` column with the same name as a scalar (e.g., for an external measurement), rename it (e.g., `MD_ext`). +- When bringing additional scalars, ensure their `sources` vectors refer to the same subject IDs as `phenotypes$source_file` (order is handled automatically). + +### See also + +- The wrapping vignette: see `vignettes/wrap_function.Rmd` for a broader introduction to custom per-element functions. +```r +# For pkgdown sites, this will appear in the Articles menu +``` + + diff --git a/vignettes/wrap_function.Rmd b/vignettes/wrap_function.Rmd index 06fbcd6..22cd003 100644 --- a/vignettes/wrap_function.Rmd +++ b/vignettes/wrap_function.Rmd @@ -22,6 +22,10 @@ computed as part of the Reproducible Brain Charts project. We'll develop a function that computes the mean cortical thickness within each site, and then use `ModelArray.wrap` to run it across elements. +If you are looking for examples that attach and use multiple scalars (e.g., referencing one scalar as a predictor for another, or exposing all scalars in a wrapped function), also see the companion vignette: + +- Using Multiple Scalars in ModelArray (multi_scalars.Rmd) + ## Step 1. Prepare your data