diff --git a/DESCRIPTION b/DESCRIPTION index 6cffdb3d7d..a3a9165519 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Imports: cardx (>= 0.2.4.9003), cvTools, dobson, + dplyr, forcats, fs, glmmTMB, @@ -35,12 +36,12 @@ Imports: stringr, table1, tibble, + utils, webshot2 Suggests: tidyverse, conflicted, dagitty, - dplyr, eha, ggfortify, ggdag, diff --git a/NAMESPACE b/NAMESPACE index 41bd850088..46f21cbd0f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(km_by_hand_table) +export(na_by_hand_table) importFrom(fs,path_package) importFrom(rlang,.data) diff --git a/R/km_by_hand_table.R b/R/km_by_hand_table.R new file mode 100644 index 0000000000..6280c7ab99 --- /dev/null +++ b/R/km_by_hand_table.R @@ -0,0 +1,146 @@ +#' Build a Kaplan-Meier by-hand table +#' +#' Takes a data frame with `time` and `death` columns and returns a +#' `knitr_kable` table showing the time-point, risk set, contribution, and +#' survival estimate at each event or censoring time. +#' +#' @param data A data frame with columns `time` (numeric) and `death` +#' (0/1 integer or logical). +#' +#' @returns A `knitr_kable` object (LaTeX/HTML math strings, `escape = FALSE`). +#' @export +#' @examples +#' d <- data.frame(time = c(1, 2, 3, 3, 5), death = c(1, 0, 1, 1, 0)) +#' km_by_hand_table(d) +km_by_hand_table <- function(data) { + stopifnot( + "data must have a 'time' column" = "time" %in% names(data), + "data must have a 'death' column" = "death" %in% names(data), + "'death' must be 0/1 or logical" = all(data$death %in% c(0, 1)), + "'time' must not contain NA" = !anyNA(data$time) + ) + + time <- data$time + status <- data$death + n <- length(time) + + gcd_fn <- function(a, b) { + while (b != 0) { + tmp <- b + b <- a %% b + a <- tmp + } + a + } + + # `format()` falls back to scientific notation for small values (e.g. + # `format(1e-6)` -> "1e-06"), which is invalid inside a `$...$` math + # environment; this always produces fixed-point output. + fmt_dec <- function(x) { + formatC(x, format = "f", digits = 4, drop0trailing = TRUE) + } + + df <- dplyr::tibble(time = time, status = status) |> + dplyr::arrange(time) |> + dplyr::group_by(time) |> + dplyr::summarise( + deaths = sum(status), + censored = sum(1L - as.integer(status)), + .groups = "drop" + ) + + # Number at risk at each time = sample size minus everyone who left + # (via death or censoring) at an *earlier* time point. + left_before <- cumsum(c(0L, utils::head(df$deaths + df$censored, -1L))) + df$n_risk <- n - left_before + + # Running survival numerator/denominator are doubles so that whole-number + # products beyond .Machine$integer.max stay exact instead of overflowing. + surv_n <- 1 + surv_d <- 1 + prev_surv_str <- "1" + + tbl <- dplyr::tibble( + `Time-point` = numeric(nrow(df)), + `Risk set` = integer(nrow(df)), + `Contribution` = character(nrow(df)), + `Survival Estimate` = character(nrow(df)) + ) + + for (i in seq_len(nrow(df))) { + ni <- as.integer(df$n_risk[i]) + di <- as.integer(df$deaths[i]) + cn_raw <- ni - di + + if (cn_raw == ni) { + c_n <- 1 + c_d <- 1 + } else if (cn_raw == 0L) { + c_n <- 0 + c_d <- 1 + } else { + g <- gcd_fn(cn_raw, ni) + c_n <- cn_raw %/% g + c_d <- ni %/% g + } + + surv_n <- surv_n * c_n + surv_d <- surv_d * c_d + if (surv_n > 0) { + g <- gcd_fn(surv_n, surv_d) + surv_n <- surv_n %/% g + surv_d <- surv_d %/% g + } + + frac_str <- if (c_n == 1 && c_d == 1) { + "1" + } else if (c_n == 0) { + "0" + } else { + sprintf("\\frac{%.0f}{%.0f}", c_n, c_d) + } + + contrib_cell <- if (di == 0L) { + sprintf( + "$\\left(1 - \\frac{%d}{%d}\\right) = 1$", + di, ni + ) + } else if (c_n == 0) { + sprintf( + "$\\left(1 - \\frac{%d}{%d}\\right) = 0$", + di, ni + ) + } else { + sprintf( + "$\\left(1 - \\frac{%d}{%d}\\right) = \\frac{%.0f}{%.0f} = %s$", + di, ni, c_n, c_d, fmt_dec(c_n / c_d) + ) + } + + surv_cell <- if (di == 0L) { + sprintf("$%s$", prev_surv_str) + } else if (surv_n == 0) { + sprintf("$%s \\times %s = 0$", prev_surv_str, frac_str) + } else { + sprintf( + "$%s \\times %s = \\frac{%.0f}{%.0f} = %s$", + prev_surv_str, frac_str, surv_n, surv_d, fmt_dec(surv_n / surv_d) + ) + } + + if (di > 0L) { + prev_surv_str <- if (surv_n == 0) { + "0" + } else { + sprintf("\\frac{%.0f}{%.0f}", surv_n, surv_d) + } + } + + tbl$`Time-point`[i] <- df$time[i] + tbl$`Risk set`[i] <- ni + tbl$`Contribution`[i] <- contrib_cell + tbl$`Survival Estimate`[i] <- surv_cell + } + + knitr::kable(tbl, escape = FALSE) +} diff --git a/R/na_by_hand_table.R b/R/na_by_hand_table.R new file mode 100644 index 0000000000..a2a967ec48 --- /dev/null +++ b/R/na_by_hand_table.R @@ -0,0 +1,127 @@ +#' Build a Nelson-Aalen by-hand table +#' +#' Takes a data frame with `time` and `death` columns and returns a +#' `knitr_kable` table showing the time-point, risk set, hazard increment, +#' cumulative hazard, and Nelson-Aalen survival estimate at each time point. +#' +#' @param data A data frame with columns `time` (numeric) and `death` +#' (0/1 integer or logical). +#' +#' @details +#' The "NA Survival" column is rendered with the `\expf` macro from the +#' book's `latex-macros/macros.qmd`, so this function is intended for use +#' inside the `rme` book render pipeline. Outside that environment (a bare +#' `knitr` document or `pkgdown` page) define `\expf` or that column will +#' show an undefined control sequence. +#' +#' @returns A `knitr_kable` object (LaTeX/HTML math strings, `escape = FALSE`). +#' @export +#' @examples +#' d <- data.frame(time = c(1, 2, 3, 3, 5), death = c(1, 0, 1, 1, 0)) +#' na_by_hand_table(d) +na_by_hand_table <- function(data) { + stopifnot( + "data must have a 'time' column" = "time" %in% names(data), + "data must have a 'death' column" = "death" %in% names(data), + "'death' must be 0/1 or logical" = all(data$death %in% c(0, 1)), + "'time' must not contain NA" = !anyNA(data$time) + ) + + time <- data$time + status <- data$death + n <- length(time) + + gcd_fn <- function(a, b) { + while (b != 0) { + tmp <- b + b <- a %% b + a <- tmp + } + a + } + + # `format()` falls back to scientific notation for small values (e.g. + # `format(1e-6)` -> "1e-06"), which is invalid inside a `$...$` math + # environment; this always produces fixed-point output. + fmt_dec <- function(x) { + formatC(x, format = "f", digits = 4, drop0trailing = TRUE) + } + + df <- dplyr::tibble(time = time, status = status) |> + dplyr::arrange(time) |> + dplyr::group_by(time) |> + dplyr::summarise( + deaths = sum(status), + censored = sum(1L - as.integer(status)), + .groups = "drop" + ) + + # Number at risk at each time = sample size minus everyone who left + # (via death or censoring) at an *earlier* time point. + left_before <- cumsum(c(0L, utils::head(df$deaths + df$censored, -1L))) + df$n_risk <- n - left_before + + # Running cumulative-hazard numerator/denominator are doubles so that + # whole-number products beyond .Machine$integer.max stay exact instead + # of overflowing. + cum_haz_n <- 0 + cum_haz_d <- 1 + cum_haz_str <- "0" + + tbl <- dplyr::tibble( + `Time-point` = numeric(nrow(df)), + `Risk set` = integer(nrow(df)), + `Hazard increment` = character(nrow(df)), + `Cumulative hazard` = character(nrow(df)), + `NA Survival` = character(nrow(df)) + ) + + for (i in seq_len(nrow(df))) { + ni <- as.integer(df$n_risk[i]) + di <- as.integer(df$deaths[i]) + + inc_str <- if (di == 0L) { + sprintf("$\\frac{0}{%d} = 0$", ni) + } else { + sprintf("$\\frac{%d}{%d}$", di, ni) + } + + prev_cum_haz_str <- cum_haz_str + + if (di > 0L) { + new_num <- cum_haz_n * ni + di * cum_haz_d + new_den <- cum_haz_d * ni + g <- gcd_fn(new_num, new_den) + cum_haz_n <- new_num %/% g + cum_haz_d <- new_den %/% g + + cum_haz_str <- if (cum_haz_d == 1) { + sprintf("%.0f", cum_haz_n) + } else { + sprintf("\\frac{%.0f}{%.0f}", cum_haz_n, cum_haz_d) + } + + cum_haz_cell <- sprintf( + "$%s + \\frac{%d}{%d} = %s$", + prev_cum_haz_str, di, ni, cum_haz_str + ) + } else { + cum_haz_cell <- sprintf("$%s$", cum_haz_str) + } + + surv_val <- exp(-cum_haz_n / cum_haz_d) + surv_cell <- sprintf( + "$\\expf{-%s} \\approx %s$", + cum_haz_str, + fmt_dec(surv_val) + ) + + tbl$`Time-point`[i] <- df$time[i] + tbl$`Risk set`[i] <- ni + tbl$`Hazard increment`[i] <- inc_str + tbl$`Cumulative hazard`[i] <- cum_haz_cell + tbl$`NA Survival`[i] <- surv_cell + } + + knitr::kable(tbl, escape = FALSE) +} diff --git a/man/km_by_hand_table.Rd b/man/km_by_hand_table.Rd new file mode 100644 index 0000000000..aea357ea62 --- /dev/null +++ b/man/km_by_hand_table.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/km_by_hand_table.R +\name{km_by_hand_table} +\alias{km_by_hand_table} +\title{Build a Kaplan-Meier by-hand table} +\usage{ +km_by_hand_table(data) +} +\arguments{ +\item{data}{A data frame with columns \code{time} (numeric) and \code{death} +(0/1 integer or logical).} +} +\value{ +A \code{knitr_kable} object (LaTeX/HTML math strings, \code{escape = FALSE}). +} +\description{ +Takes a data frame with \code{time} and \code{death} columns and returns a +\code{knitr_kable} table showing the time-point, risk set, contribution, and +survival estimate at each event or censoring time. +} +\examples{ +d <- data.frame(time = c(1, 2, 3, 3, 5), death = c(1, 0, 1, 1, 0)) +km_by_hand_table(d) +} diff --git a/man/na_by_hand_table.Rd b/man/na_by_hand_table.Rd new file mode 100644 index 0000000000..f8670766fc --- /dev/null +++ b/man/na_by_hand_table.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/na_by_hand_table.R +\name{na_by_hand_table} +\alias{na_by_hand_table} +\title{Build a Nelson-Aalen by-hand table} +\usage{ +na_by_hand_table(data) +} +\arguments{ +\item{data}{A data frame with columns \code{time} (numeric) and \code{death} +(0/1 integer or logical).} +} +\value{ +A \code{knitr_kable} object (LaTeX/HTML math strings, \code{escape = FALSE}). +} +\description{ +Takes a data frame with \code{time} and \code{death} columns and returns a +\code{knitr_kable} table showing the time-point, risk set, hazard increment, +cumulative hazard, and Nelson-Aalen survival estimate at each time point. +} +\details{ +The "NA Survival" column is rendered with the \verb{\\expf} macro from the +book's \code{latex-macros/macros.qmd}, so this function is intended for use +inside the \code{rme} book render pipeline. Outside that environment (a bare +\code{knitr} document or \code{pkgdown} page) define \verb{\\expf} or that column will +show an undefined control sequence. +} +\examples{ +d <- data.frame(time = c(1, 2, 3, 3, 5), death = c(1, 0, 1, 1, 0)) +na_by_hand_table(d) +}