Skip to content
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Imports:
cardx (>= 0.2.4.9003),
cvTools,
dobson,
dplyr,
forcats,
fs,
glmmTMB,
Expand All @@ -35,12 +36,12 @@ Imports:
stringr,
table1,
tibble,
utils,
webshot2
Suggests:
tidyverse,
conflicted,
dagitty,
dplyr,
eha,
ggfortify,
ggdag,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(km_by_hand_table)
export(na_by_hand_table)
importFrom(fs,path_package)
importFrom(rlang,.data)
146 changes: 146 additions & 0 deletions R/km_by_hand_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#' Build a Kaplan-Meier by-hand table
#'
#' Takes a data frame with `time` and `death` columns and returns a
#' `knitr_kable` table showing the time-point, risk set, contribution, and
#' survival estimate at each event or censoring time.
#'
#' @param data A data frame with columns `time` (numeric) and `death`
#' (0/1 integer or logical).
#'
#' @returns A `knitr_kable` object (LaTeX/HTML math strings, `escape = FALSE`).
#' @export
#' @examples
#' d <- data.frame(time = c(1, 2, 3, 3, 5), death = c(1, 0, 1, 1, 0))
#' km_by_hand_table(d)
km_by_hand_table <- function(data) {
stopifnot(
"data must have a 'time' column" = "time" %in% names(data),
"data must have a 'death' column" = "death" %in% names(data),
"'death' must be 0/1 or logical" = all(data$death %in% c(0, 1)),
"'time' must not contain NA" = !anyNA(data$time)
)

time <- data$time
status <- data$death
n <- length(time)

gcd_fn <- function(a, b) {
while (b != 0) {
tmp <- b
b <- a %% b
a <- tmp
}
a
}

# `format()` falls back to scientific notation for small values (e.g.
# `format(1e-6)` -> "1e-06"), which is invalid inside a `$...$` math
# environment; this always produces fixed-point output.
fmt_dec <- function(x) {
formatC(x, format = "f", digits = 4, drop0trailing = TRUE)
}

df <- dplyr::tibble(time = time, status = status) |>
dplyr::arrange(time) |>
dplyr::group_by(time) |>
dplyr::summarise(
deaths = sum(status),
censored = sum(1L - as.integer(status)),
.groups = "drop"
)

# Number at risk at each time = sample size minus everyone who left
# (via death or censoring) at an *earlier* time point.
left_before <- cumsum(c(0L, utils::head(df$deaths + df$censored, -1L)))
df$n_risk <- n - left_before

# Running survival numerator/denominator are doubles so that whole-number
# products beyond .Machine$integer.max stay exact instead of overflowing.
surv_n <- 1
surv_d <- 1
prev_surv_str <- "1"

tbl <- dplyr::tibble(
`Time-point` = numeric(nrow(df)),
`Risk set` = integer(nrow(df)),
`Contribution` = character(nrow(df)),
`Survival Estimate` = character(nrow(df))
)

for (i in seq_len(nrow(df))) {
ni <- as.integer(df$n_risk[i])
di <- as.integer(df$deaths[i])
cn_raw <- ni - di

if (cn_raw == ni) {
c_n <- 1
c_d <- 1
} else if (cn_raw == 0L) {
c_n <- 0
c_d <- 1
} else {
g <- gcd_fn(cn_raw, ni)
c_n <- cn_raw %/% g
c_d <- ni %/% g
}

surv_n <- surv_n * c_n
surv_d <- surv_d * c_d
if (surv_n > 0) {
g <- gcd_fn(surv_n, surv_d)
surv_n <- surv_n %/% g
surv_d <- surv_d %/% g
}

frac_str <- if (c_n == 1 && c_d == 1) {
"1"
} else if (c_n == 0) {
"0"
} else {
sprintf("\\frac{%.0f}{%.0f}", c_n, c_d)
}

contrib_cell <- if (di == 0L) {
sprintf(
"$\\left(1 - \\frac{%d}{%d}\\right) = 1$",
di, ni
)
} else if (c_n == 0) {
sprintf(
"$\\left(1 - \\frac{%d}{%d}\\right) = 0$",
di, ni
)
} else {
sprintf(
"$\\left(1 - \\frac{%d}{%d}\\right) = \\frac{%.0f}{%.0f} = %s$",
di, ni, c_n, c_d, fmt_dec(c_n / c_d)
)
}

surv_cell <- if (di == 0L) {
sprintf("$%s$", prev_surv_str)
} else if (surv_n == 0) {
sprintf("$%s \\times %s = 0$", prev_surv_str, frac_str)
} else {
sprintf(
"$%s \\times %s = \\frac{%.0f}{%.0f} = %s$",
prev_surv_str, frac_str, surv_n, surv_d, fmt_dec(surv_n / surv_d)
)
}

if (di > 0L) {
prev_surv_str <- if (surv_n == 0) {
"0"
} else {
sprintf("\\frac{%.0f}{%.0f}", surv_n, surv_d)
}
}

tbl$`Time-point`[i] <- df$time[i]
tbl$`Risk set`[i] <- ni
tbl$`Contribution`[i] <- contrib_cell
tbl$`Survival Estimate`[i] <- surv_cell
}

knitr::kable(tbl, escape = FALSE)
}
127 changes: 127 additions & 0 deletions R/na_by_hand_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#' Build a Nelson-Aalen by-hand table
#'
#' Takes a data frame with `time` and `death` columns and returns a
#' `knitr_kable` table showing the time-point, risk set, hazard increment,
#' cumulative hazard, and Nelson-Aalen survival estimate at each time point.
#'
#' @param data A data frame with columns `time` (numeric) and `death`
#' (0/1 integer or logical).
#'
#' @details
#' The "NA Survival" column is rendered with the `\expf` macro from the
#' book's `latex-macros/macros.qmd`, so this function is intended for use
#' inside the `rme` book render pipeline. Outside that environment (a bare
#' `knitr` document or `pkgdown` page) define `\expf` or that column will
#' show an undefined control sequence.
#'
#' @returns A `knitr_kable` object (LaTeX/HTML math strings, `escape = FALSE`).
#' @export
#' @examples
#' d <- data.frame(time = c(1, 2, 3, 3, 5), death = c(1, 0, 1, 1, 0))
#' na_by_hand_table(d)
na_by_hand_table <- function(data) {
stopifnot(
"data must have a 'time' column" = "time" %in% names(data),
"data must have a 'death' column" = "death" %in% names(data),
"'death' must be 0/1 or logical" = all(data$death %in% c(0, 1)),
"'time' must not contain NA" = !anyNA(data$time)
)

time <- data$time
status <- data$death
n <- length(time)

gcd_fn <- function(a, b) {
while (b != 0) {
tmp <- b
b <- a %% b
a <- tmp
}
a
}

# `format()` falls back to scientific notation for small values (e.g.
# `format(1e-6)` -> "1e-06"), which is invalid inside a `$...$` math
# environment; this always produces fixed-point output.
fmt_dec <- function(x) {
formatC(x, format = "f", digits = 4, drop0trailing = TRUE)
}

df <- dplyr::tibble(time = time, status = status) |>
dplyr::arrange(time) |>
dplyr::group_by(time) |>
dplyr::summarise(
deaths = sum(status),
censored = sum(1L - as.integer(status)),
.groups = "drop"
)

# Number at risk at each time = sample size minus everyone who left
# (via death or censoring) at an *earlier* time point.
left_before <- cumsum(c(0L, utils::head(df$deaths + df$censored, -1L)))
df$n_risk <- n - left_before

# Running cumulative-hazard numerator/denominator are doubles so that
# whole-number products beyond .Machine$integer.max stay exact instead
# of overflowing.
cum_haz_n <- 0
cum_haz_d <- 1
cum_haz_str <- "0"

tbl <- dplyr::tibble(
`Time-point` = numeric(nrow(df)),
`Risk set` = integer(nrow(df)),
`Hazard increment` = character(nrow(df)),
`Cumulative hazard` = character(nrow(df)),
`NA Survival` = character(nrow(df))
)

for (i in seq_len(nrow(df))) {
ni <- as.integer(df$n_risk[i])
di <- as.integer(df$deaths[i])

inc_str <- if (di == 0L) {
sprintf("$\\frac{0}{%d} = 0$", ni)
} else {
sprintf("$\\frac{%d}{%d}$", di, ni)
}

prev_cum_haz_str <- cum_haz_str

if (di > 0L) {
new_num <- cum_haz_n * ni + di * cum_haz_d
new_den <- cum_haz_d * ni
g <- gcd_fn(new_num, new_den)
cum_haz_n <- new_num %/% g
cum_haz_d <- new_den %/% g

cum_haz_str <- if (cum_haz_d == 1) {
sprintf("%.0f", cum_haz_n)
} else {
sprintf("\\frac{%.0f}{%.0f}", cum_haz_n, cum_haz_d)
}

cum_haz_cell <- sprintf(
"$%s + \\frac{%d}{%d} = %s$",
prev_cum_haz_str, di, ni, cum_haz_str
)
} else {
cum_haz_cell <- sprintf("$%s$", cum_haz_str)
}

surv_val <- exp(-cum_haz_n / cum_haz_d)
surv_cell <- sprintf(
"$\\expf{-%s} \\approx %s$",
cum_haz_str,
fmt_dec(surv_val)
)

tbl$`Time-point`[i] <- df$time[i]
tbl$`Risk set`[i] <- ni
tbl$`Hazard increment`[i] <- inc_str
tbl$`Cumulative hazard`[i] <- cum_haz_cell
tbl$`NA Survival`[i] <- surv_cell
}

knitr::kable(tbl, escape = FALSE)
}
24 changes: 24 additions & 0 deletions man/km_by_hand_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/na_by_hand_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading