SWADA/forestplot.r at main · rvpanaro/SWADA · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
# interaction_forest_plot.R
#
#    Copyright (C) 2024  Renato Panaro
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
# R code to generate interaction forest plots for subgroup analyses,
# visualizing effect size estimates and confidence intervals.
#
#   Authors: Renato Panaro
#   Last Updated: December 2024
#
#' Generate Interaction Forest Plots
#'
#' This function creates forest plots to visualize effect size estimates and confidence intervals
#' for subgroup analyses, including interaction effects.
#' The function relies on the `ckbplotr` package for forest plot visualization and the `metafor`
#' package for effect size and variance calculations.
#'
#' @param yi Numeric vector of effect size estimates.
#' @param vi Numeric vector of variances for the effect size estimates.
#' @param labels Character vector of labels for each subgroup.
#' @param subgroups Character vector indicating subgroup membership.
#' @param subgroup_intervals Data frame containing confidence intervals for subgroups.
#' @param interaction_intervals Data frame containing confidence intervals for interaction effects.
#' @param model_names Character vector of model names.
#' @param treat.events Numeric vector of events in the treatment group.
#' @param treat.total Numeric vector of total subjects in the treatment group.
#' @param cont.events Numeric vector of events in the control group.
#' @param cont.total Numeric vector of total subjects in the control group.
#' @param reference_group Integer specifying the reference group index.
#' @param group_labels Character vector for labeling groups.
#' @param xlab Character string for x-axis label.
#' @param exponentiate Logical; whether to exponentiate effect sizes.
#' @param col_right_heading Character vector for right column headings.
#' @param stroke Numeric; stroke width of points.
#' @param pointsize Numeric; size of points.
#' @param base_size Numeric; base font size.
#' @param prob Numeric; probability level for confidence intervals.
#' @param mid.space Unit; spacing for mid-section (from `grid::unit()`).
#' @param left.space Unit; spacing for left section (from `grid::unit()`).
#' @param plot.margin Margin; plot margin settings (from `ggplot2::margin()`).
#' @param color_palette Character vector of colors for subgroups.
#' @param xlim Numeric vector defining x-axis limits.
#' @param xticks Numeric vector of x-axis tick positions.
#' @param ... Additional arguments passed to `ckbplotr::forest_plot`.
#'
#' @return A forest plot object (from `ckbplotr`).
#'
#' @details
#' This function leverages:
#' - **`ckbplotr::forest_plot()`**: For rendering forest plots.
#' - **`metafor::escalc()`**: For computing effect sizes and variances.
#' - **`magrittr` pipes (`%>%`)**: For streamlined data manipulation.
#' - **`dplyr::group_by()` and `dplyr::do()`**: For grouped calculations.
#' - **`grid::unit()`**: To define spacing in plot layouts.
#' - **`ggplot2::margin()`**: For setting plot margins.
#'
#' @examples
#' # Example usage
#' interaction_forest_plot(
#'   yi = c(0.5, 1.2, 0.8),
#'   vi = c(0.04, 0.06, 0.05),
#'   labels = c("Subgroup 1", "Subgroup 2", "Subgroup 3"),
#'   subgroups = c("Group A", "Group B", "Group C"),
#'   subgroup_intervals = data.frame(
#'     lower = c(0.3, 1.0, 0.6),
#'     upper = c(0.7, 1.4, 1.0)
#'   ),
#'   interaction_intervals = data.frame(
#'     lower = c(0.2, 0.9, 0.5),
#'     upper = c(0.8, 1.5, 1.1)
#'   )
#' )
#'
#' @import ckbplotr
#' @import metafor
#' @import dplyr
#' @import magrittr
#' @importFrom grid unit
#' @importFrom ggplot2 margin
#' @export

interaction_forest_plot <-
  function(yi, vi, labels, subgroups,
           subgroup_intervals, interaction_intervals,
           model_names = "Model 1",
           treat.events = "",
           treat.total = "",
           cont.events = "",
           cont.total = "",
           subgroup.total = "",
           total = "",
           reference_group = 1,
           xlab, exponentiate = TRUE, col_right_heading,
           stroke = 3,
           digits = 2,
           fill = "white",
           pointsize = 13, base_size = 20, prob = 0.95,
           left.space = unit(30, "mm"),
           xlim = c(0.125, 8),
           xticks = c(0.125, 1, 8),
           addaes = c("identity", "log"),
           grouping,
           row.labels.space = c(0,1,0,0),
           estimates_only = FALSE, ...) {

    group_labels = rep("", length(labels))     # (deprecated)

    # Load required libraries
    library(ckbplotr)
    library(metafor)
    library(dplyr)
    library(magrittr)
    library(grid)
    library(ggplot2)
    library(ggtext)
    # Argument validation
    addaes <- match.arg(addaes)  # Garante que seja "identity" ou "log"

    # switch function application
    addaes <- switch(
      addaes,
      log = list(
        paste0(
          "stroke = stroke / 10 * ((2 * 1.96) / ",
          "(log(uci_transformed) - log(lci_transformed)))^(1/2)"
        )
      ),
      identity = list(
        paste0(
          "stroke = stroke / 10 * ((2 * 1.96) / ",
          "(uci_transformed - lci_transformed))^(1/2)"
        )
      )
    )

    # Ensure subgroup_intervals has row names
    stopifnot(!is.null(rownames(subgroup_intervals)))

    # Extract relevant properties for plotting
    num_subgroups <- length(unique(rownames(subgroup_intervals))) # Count unique subgroups
    k <- nrow(subgroup_intervals)                          # Number of rows in subgroups intervals

    # Prepare row labels data frame
    row_labels <- data.frame(
      subheading = labels,
      heading = group_labels,
      label = subgroups
    )

    # Extend row labels for model names and subgroups details
    n <- nrow(row_labels) # Get current number of rows in row_labels
    additional_rows <- num_subgroups * length(model_names) # Calculate additional rows needed

    row_labels[(n + 1):(n + additional_rows), ] <- cbind(
      rep(model_names, each = num_subgroups),                     # Repeat model names for each subgroups
      sub("\\|.*", "", rownames(subgroup_intervals)),         # Extract subgroup name before '|' as heading
      sub(".*\\|", "", rownames(subgroup_intervals))          # Extract subgroup detail after '|' as label
    )

    # Calculate effect size summaries for plotting
    resultsA <- metafor::escalc(yi = yi, vi = vi) %>%
      summary(level = 100 * prob, transf = identity) # Summarize with confidence intervals

    # Replace large confidence intervals with NA for better visualization
    resultsA[resultsA$ci.ub > 195, ] <- NA

    # Append subgroup intervals to results
    resultsA[(n + 1):(n + additional_rows), ] <- subgroup_intervals

    # Set colors and formatting for results
    resultsA$fill <- resultsA$colour <- NA
    resultsA$fill[row_labels$subheading %in% labels] <- "black"
    resultsA$colour[row_labels$subheading %in% labels] <- "black"
    resultsA$fill[!(row_labels$subheading %in% labels)] <- "white"
    resultsA$colour[!(row_labels$subheading %in% labels)] <- "royalblue3"
    resultsA$bold <- resultsA$colour != "black"
    resultsA$colour[resultsA$bold] <- "royalblue3"

    # Assign row identifiers for linking labels and results
    resultsA$variable <- row_labels$variable <- apply(row_labels, 1, paste0, collapse = "")
    resultsB <- resultsA

    # Calculate interaction effects based on subgroup endpoints
    compute_interactions <- function(grouped, reference = 1, point = TRUE) {
      if (!all(c("endpoints", "yi", "vi") %in% colnames(grouped))) {
        stop("Input data 'grouped' must contain 'endpoints', 'yi', and 'vi' columns.")
      }

      unique_subgroups <- unique(subgroups)

      if (reference < 1 || reference > length(unique_subgroups)) {
        stop("Reference group index is out of bounds.")
      }

      C <- diag(length(unique_subgroups))
      C[, reference] <- C[, reference] - 1

      idx <- match(grouped$endpoints, unique_subgroups)
      if (any(is.na(idx))) {
        stop("Some endpoints in 'grouped' do not match the subgroups.")
      }

      if (!point) {
        C <- abs(C)
        result <- C[idx, idx] %*% grouped$vi
      } else {
        result <- C[idx, idx] %*% grouped$yi
      }

      return(result)
    }

    # Step 1: Create the subset data frame
    data_subset <- data.frame(
      yi = yi,
      vi = vi,
      labels = labels,
      endpoints = subgroups,
      groups = group_labels
    )

    # Step 2: Group data by 'groups' and 'labels'
    grouped_data <- data_subset %>%
      group_by(groups, labels)

    # Step 3: Compute interactions for 'yi' and 'vi'
    interaction_results_yi <- grouped_data %>%
      do(data.frame(yi = compute_interactions(., reference = reference_group)))

    interaction_results_vi <- grouped_data %>%
      do(data.frame(vi = compute_interactions(., point = FALSE, reference = reference_group)))

    # Step 4: Match labels and extract corresponding 'yi' and 'vi' values
    matched_yi <- interaction_results_yi %>%
      .[pmatch(labels, .$labels), ] %>%
      .$yi

    matched_vi <- interaction_results_vi %>%
      .[pmatch(labels, .$labels), ] %>%
      .$vi

    # Step 5: Update 'resultsB$yi' and 'resultsB$vi' for entries where 'bold' is FALSE
    resultsB$yi[!resultsB$bold] <- matched_yi
    resultsB$vi[!resultsB$bold] <- matched_vi

    resultsB[, c("ci.lb", "ci.ub")] <-
      summary(escalc(yi = resultsB$yi, vi = resultsB$vi),
              level = 100 * prob)[, c("ci.lb", "ci.ub")]

    # Step 1: Identify rows to update based on subheading matching model names
    rows_to_update <- which(row_labels$subheading %in% model_names, c("yi", "ci.lb", "ci.ub"))

    # Step 2: Calculate indices to update
    include_indices <- seq(rows_to_update[1], rows_to_update[k], num_subgroups) + reference_group-1

    # Step 3: Update selected rows with non-missing interaction intervals
    resultsB[include_indices, c("yi", "ci.lb", "ci.ub")] <- na.omit(interaction_intervals)
    resultsB[resultsB$ci.ub > 195 & !is.na(resultsB$ci.ub), ] <- NA
    resultsB[which(resultsB$vi == 0), c("yi", "ci.lb", "ci.ub")] <- NA
    resultsB[is.na(resultsB$ci.lb), c("yi", "ci.lb", "ci.ub")] <- NA
    resultsB$colour[!is.na(resultsB$yi)] <- "black"
    resultsB$colour[resultsB$bold] <- "chartreuse3"

    if (missing(xlab)) {
      xlab <- c("", "")
    }
    if (missing(col_right_heading)) {
      col_right_heading <- list(paste0("Subgroup \n(", 100 * prob, "% interval)"), paste0("Interaction \n(", 100 * prob, "% interval)"))
    }

    row_labels$heading <- row_labels$subheading
    row_labels$subheading <- NA_character_

    # Define new shapes
    resultsA$shape <- 22
    resultsB$shape <- 21

    # Define diamond shape for summary estimates
    resultsA$shape[resultsA$bold] <- 23
    resultsB$shape[resultsB$bold] <- 23

    # Initialize outputs
    col.left <- NULL
    col.left.heading <- ""

    resultsA$treat <- resultsA$cont <- resultsA$prev <- ""

    # Check if totals are provided
    if(all(treat.total != "")) {
      # Totals for control and treatment groups
      resultsA$treat <- c(paste0(treat.events, "/", treat.total),
                          rep("", nrow(resultsA) - length(treat.total)))
      resultsA$cont <- c(paste0(cont.events, "/", cont.total),
                         rep("", nrow(resultsA) - length(treat.total)))
      col.left.heading_list1 <- list(c("Control \n (n/N)", "Treatment \n (n/N)"), c("", ""))

      resultsA$treat[resultsA$treat == "0/0"] <- ""
      resultsA$cont[resultsA$cont == "0/0"] <- ""
    }


    # Check if prevalence is provided
    if(all(total != "")) {
      # Prevalence formatting
      resultsA$prev <- c(paste0("       ", subgroup.total, " (",round(subgroup.total/total*100, digits =  0), "%)"),
                         rep("", nrow(resultsA) - length(total)))
      col.left.heading_list2 <- list(paste0("Prevalence \n (n/N)"), "")
    }

    # Determine which columns to display
    if(all(treat.total != "") & !all(total != "")) {
      # Only totals
      col.left = c("cont", "treat")
      col.left.heading <- col.left.heading_list1

    } else if(!all(treat.total != "") & all(total != "")) {
      # Only prevalence
      col.left = c("prev")
      col.left.heading <- col.left.heading_list2

    } else if(all(treat.total != "") & all(total != "")) {
      # Both totals
      col.left = c("cont", "treat", "prev")
      col.left.heading <- c(col.left.heading_list1[[1]], col.left.heading_list2)

    } else {
      # Neither totals nor prevalence provided
      col.left <- NULL
      col.left.heading <- ""
    }

    resultsB$treat <- resultsB$cont <- resultsB$prev <- ""

    ## remove points with "no weight"
    idxA <- which(resultsA[,"ci.ub"] - resultsA[,"ci.lb"] > 39)
    idxB <- which(resultsB[,"ci.ub"] - resultsB[,"ci.lb"] > 39)

    resultsA[idxA, c("yi", "ci.lb", "ci.ub")] <- c(NA,NA,NA)
    resultsB[idxB, c("yi", "ci.lb", "ci.ub")] <- c(NA,NA,NA)

    if (is.list(fill)) {
      if (length(fill) != 2) {
        stop("`fill` must be a list of length 2 when provided as a list")
      }
      fill_list <- fill

    } else if (is.null(fill)) {
      # NULL → both white
      fill_list <- list("white", "white")

    } else {
      # anything else (a single string) → replicate to length 2
      fill_list <- rep(fill, 2)
    }

    ## define filling
    resultsA$fill <- fill_list[[1]]
    resultsB$fill <- fill_list[[2]]

    ## define grouping
    if(!missing(grouping)) {
      row_labels$subheading <- grouping
    }
    bold <- is.na(row_labels$heading)

    ## whether to plot only the estimates
    if(estimates_only){
      idx <- resultsB$bold
      idx[is.na(idx)] <- FALSE

      row_labels <- row_labels[idx, ]
      resultsA <- resultsA[idx, ]
      resultsB <- resultsB[idx, ]


      bold <- bold[resultsB$bold]
    }

    forestplot <-
      ckbplotr::forest_plot(
        base_size = base_size,
        panels = list(resultsA, resultsB),
        col.key = "variable", col.estimate = "yi",
        col.lci = "ci.lb", col.uci = "ci.ub",
        col.right.heading = col_right_heading,
        col.heading.space = 1,
        row.labels = row_labels,
        row.labels.space = row.labels.space,
        xlab = xlab,
        showcode = FALSE,
        digits = digits,
        fill = "fill",
        shape = "shape",
        colour = "colour",
        col.left = col.left,
        col.left.heading = col.left.heading,
        bold.labels = bold,
        pointsize = pointsize,
        envir = environment(),
        left.space = left.space,
        exponentiate = exponentiate,
        xlim = xlim,
        xticks = xticks,
        addaes = list(point = paste0("stroke = stroke/10*((2*1.96)/(log(uci_transformed)-log(lci_transformed)))^(1/2)")), ...
      )

    attr(forestplot, "panels") <- list(resultsA, resultsB)

    return(forestplot)
  }