Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 98 additions & 7 deletions R/google_sheets.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,27 @@ uploadDataToGoogleSheets <- function(df, type = "newSpreadSheet", spreadSheetNam
stop("Invalid 'type' parameter provided.")
}
}
#' Helper function to pad col_types string when column count changes
#' @param col_types - original col_types string
#' @param error_msg - error message from googlesheets4
#' @return padded col_types string or NULL if cannot pad
#' @keywords internal
.pad_col_types_for_column_mismatch <- function(col_types, error_msg) {
# Extract actual column count from error message
# Pattern: "But there are X columns found in sheets" or similar
match <- regmatches(error_msg, regexec("there are (\\d+) columns", error_msg))
if (length(match[[1]]) >= 2) {
actual_count <- as.integer(match[[1]][2])
current_count <- nchar(col_types)
if (actual_count > current_count) {
# Pad with '?' (guess/default) characters for extra columns
padding <- paste(rep("?", actual_count - current_count), collapse = "")
return(paste0(col_types, padding))
}
}
return(NULL) # Return NULL if can't pad
}

#' API to normalize data for Google Sheets Export
#' @param df - data frame
#
Expand All @@ -145,6 +166,80 @@ normalizeDataForGoogleSheetsExport <- function (df) {
}


#' Helper function to read Google Sheet with col_types padding support
#' @param gsheet - Google Sheet object from googledrive
#' @param sheetName - name of worksheet
#' @param skipNRows - rows to skip
#' @param treatTheseAsNA - NA values
#' @param firstRowAsHeader - use first row as header
#' @param col_types - column types specification
#' @param guess_max - max rows to guess types
#' @param original_col_types - original col_types for retry logic
#' @keywords internal
.read_sheet_with_col_types_padding <- function(gsheet, sheetName, skipNRows, treatTheseAsNA, firstRowAsHeader, col_types, guess_max, original_col_types = NULL) {
# Use original_col_types when provided to control the retry/padding behavior,
# otherwise fall back to the current col_types argument.
col_types_to_use <- if (!is.null(original_col_types)) original_col_types else col_types
tryCatch({
if (!is.null(treatTheseAsNA)) {
df <- gsheet %>% googlesheets4::read_sheet(
range = sheetName,
skip = skipNRows,
na = treatTheseAsNA,
col_names = firstRowAsHeader,
col_types = col_types_to_use,
guess_max = guess_max
)
} else {
df <- gsheet %>% googlesheets4::read_sheet(
range = sheetName,
skip = skipNRows,
col_names = firstRowAsHeader,
col_types = col_types_to_use,
guess_max = guess_max
)
}
df
}, error = function(e) {
# Check if this is a col_types length mismatch error and we have a string col_types to pad
# Error pattern: "Length of `col_types` is not compatible with columns found in sheets"
if (is.character(col_types_to_use) && length(col_types_to_use) == 1 && is.null(names(col_types_to_use)) &&
stringr::str_detect(e$message, "Length of `col_types` is not compatible with columns found in sheets")) {
# Try to pad col_types with '?' for extra columns
padded_types <- .pad_col_types_for_column_mismatch(col_types_to_use, e$message)
if (!is.null(padded_types)) {
# Retry with padded col_types
if (!is.null(treatTheseAsNA)) {
return(
gsheet %>%
googlesheets4::read_sheet(
range = sheetName,
skip = skipNRows,
na = treatTheseAsNA,
col_names = firstRowAsHeader,
col_types = padded_types,
guess_max = guess_max
)
)
} else {
return(
gsheet %>%
googlesheets4::read_sheet(
range = sheetName,
skip = skipNRows,
col_names = firstRowAsHeader,
col_types = padded_types,
guess_max = guess_max
)
)
}
}
}
# If we can't handle it, re-throw the original error
stop(e)
})
}

#' API to get google sheet data
#' @export
#' @param title name of a sheet on Google Sheets.
Expand Down Expand Up @@ -192,13 +287,9 @@ getGoogleSheet <- function(title, sheetName, skipNRows = 0, treatTheseAsNA = NUL
col_types <- NULL
}
}
# The "na" argument of googlesheets4::read_sheet does not accept null,
# so if the treatTheseAsNA is null, do not pass it to googlesheets4::read_sheet
if(!is.null(treatTheseAsNA)) {
df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, na = treatTheseAsNA, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max)
} else {
df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max)
}
# Read the sheet with col_types padding support for column count changes
df <- .read_sheet_with_col_types_padding(gsheet, sheetName, skipNRows, treatTheseAsNA, firstRowAsHeader, col_types, guess_max)

if(!is.null(tzone)) { # if timezone is specified, apply the timezone to POSIXct columns
df <- df %>% dplyr::mutate(across(where(lubridate::is.POSIXct), ~ lubridate::force_tz(.x, tzone=tzone)))
}
Expand Down
58 changes: 58 additions & 0 deletions tests/testthat/test_google_sheets.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,61 @@ test_that("normalizeDataForGoogleSheetsExport", {
expect_equal(class(df_cleaned$numAllInf), "numeric")
expect_equal(class(df_cleaned$numAllMixed), "numeric")
})

# Tests for col_types padding helper function (Issue #33700)
test_that(".pad_col_types_for_column_mismatch pads col_types correctly", {
# Test case 1: Standard error message with column count
error_msg <- "Length of `col_types` is not compatible with columns found in sheets:\n- 14 column types specified.\n- 14 un-skipped column types specified.\n- But there are 15 columns found in sheets."
result <- exploratory:::.pad_col_types_for_column_mismatch("cinTDDDDDDDDDT", error_msg)
expect_equal(result, "cinTDDDDDDDDDT?") # Original 14 chars + 1 '?' for the extra column
Comment thread
hidekoji marked this conversation as resolved.

# Test case 2: Multiple extra columns
error_msg2 <- "Length of `col_types` is not compatible with columns found in sheets:\n- 5 column types specified.\n- 5 un-skipped column types specified.\n- But there are 8 columns found in sheets."
result2 <- exploratory:::.pad_col_types_for_column_mismatch("cinTD", error_msg2)
expect_equal(result2, "cinTD???") # Original 5 chars + 3 '?' for extra columns

# Test case 3: Error message doesn't match expected pattern
error_msg3 <- "Some other error message"
result3 <- exploratory:::.pad_col_types_for_column_mismatch("cinT", error_msg3)
expect_null(result3)

# Test case 4: col_types already matches or exceeds actual columns (edge case)
error_msg4 <- "Length of `col_types` is not compatible with columns found in sheets:\n- 5 column types specified.\n- But there are 3 columns found in sheets."
result4 <- exploratory:::.pad_col_types_for_column_mismatch("cinTD", error_msg4)
expect_null(result4) # Should return NULL since actual < specified
})

test_that(".read_sheet_with_col_types_padding retries with padded col_types", {
skip_if_not_installed("googlesheets4")

calls <- list()
error_msg <- "Length of `col_types` is not compatible with columns found in sheets:\n- 4 column types specified.\n- 4 un-skipped column types specified.\n- But there are 5 columns found in sheets."

stub_read_sheet <- function(...) {
args <- list(...)
calls <<- c(calls, list(args$col_types))
if (length(calls) == 1) {
stop(error_msg)
}
data.frame(a = 1)
}

original_read_sheet <- get("read_sheet", envir = asNamespace("googlesheets4"))
assignInNamespace("read_sheet", stub_read_sheet, ns = "googlesheets4")
on.exit(assignInNamespace("read_sheet", original_read_sheet, ns = "googlesheets4"), add = TRUE)

df <- exploratory:::.read_sheet_with_col_types_padding(
gsheet = "dummy",
sheetName = "Sheet1",
skipNRows = 0,
treatTheseAsNA = NULL,
firstRowAsHeader = TRUE,
col_types = "cinT",
guess_max = 100
)

expect_equal(length(calls), 2)
expect_equal(calls[[1]], "cinT")
expect_equal(calls[[2]], "cinT?")
expect_equal(df$a, 1)
})