From 81b61dfbf4270b4ffb5b0552fc06b2cd976f530a Mon Sep 17 00:00:00 2001 From: Hide Kojima Date: Sun, 1 Feb 2026 20:32:21 -0800 Subject: [PATCH 1/3] fix(googlesheets): auto-pad col_types when column count changes When a Google Sheet has more columns than specified in col_types, automatically pad with '?' (guess/default) for the extra columns instead of failing with "Length of col_types is not compatible" error. This fixes issues where: - User imports Google Sheet with column types specified - Additional columns are added to the sheet later - On refresh (Desktop or Server), the data source now succeeds with the new columns using automatic type detection Fixes exploratory-io/tam#33700 Co-Authored-By: Claude Opus 4.5 --- R/google_sheets.R | 70 ++++++++++++++++++++++++++--- tests/testthat/test_google_sheets.R | 23 ++++++++++ 2 files changed, 86 insertions(+), 7 deletions(-) diff --git a/R/google_sheets.R b/R/google_sheets.R index 2a9eca8c9..e8ab1aa68 100644 --- a/R/google_sheets.R +++ b/R/google_sheets.R @@ -127,6 +127,27 @@ uploadDataToGoogleSheets <- function(df, type = "newSpreadSheet", spreadSheetNam stop("Invalid 'type' parameter provided.") } } +#' Helper function to pad col_types string when column count changes +#' @param col_types - original col_types string +#' @param error_msg - error message from googlesheets4 +#' @return padded col_types string or NULL if cannot pad +#' @keywords internal +.pad_col_types_for_column_mismatch <- function(col_types, error_msg) { + # Extract actual column count from error message + # Pattern: "But there are X columns found in sheets" or similar + match <- regmatches(error_msg, regexec("there are (\\d+) columns", error_msg)) + if (length(match[[1]]) >= 2) { + actual_count <- as.integer(match[[1]][2]) + current_count <- nchar(col_types) + if (actual_count > current_count) { + # Pad with '?' (guess/default) characters for extra columns + padding <- paste(rep("?", actual_count - current_count), collapse = "") + return(paste0(col_types, padding)) + } + } + return(NULL) # Return NULL if can't pad +} + #' API to normalize data for Google Sheets Export #' @param df - data frame # @@ -145,6 +166,45 @@ normalizeDataForGoogleSheetsExport <- function (df) { } +#' Helper function to read Google Sheet with col_types padding support +#' @param gsheet - Google Sheet object from googledrive +#' @param sheetName - name of worksheet +#' @param skipNRows - rows to skip +#' @param treatTheseAsNA - NA values +#' @param firstRowAsHeader - use first row as header +#' @param col_types - column types specification +#' @param guess_max - max rows to guess types +#' @param original_col_types - original col_types for retry logic +#' @keywords internal +.read_sheet_with_col_types_padding <- function(gsheet, sheetName, skipNRows, treatTheseAsNA, firstRowAsHeader, col_types, guess_max, original_col_types = NULL) { + tryCatch({ + if (!is.null(treatTheseAsNA)) { + df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, na = treatTheseAsNA, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max) + } else { + df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max) + } + df + }, error = function(e) { + # Check if this is a col_types length mismatch error and we have a string col_types to pad + # Error pattern: "Length of `col_types` is not compatible with columns found in sheets" + if (is.character(col_types) && length(col_types) == 1 && + stringr::str_detect(e$message, "Length of `col_types` is not compatible with columns found in sheets")) { + # Try to pad col_types with '?' for extra columns + padded_types <- .pad_col_types_for_column_mismatch(col_types, e$message) + if (!is.null(padded_types)) { + # Retry with padded col_types + if (!is.null(treatTheseAsNA)) { + return(gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, na = treatTheseAsNA, col_names = firstRowAsHeader, col_types = padded_types, guess_max = guess_max)) + } else { + return(gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, col_names = firstRowAsHeader, col_types = padded_types, guess_max = guess_max)) + } + } + } + # If we can't handle it, re-throw the original error + stop(e) + }) +} + #' API to get google sheet data #' @export #' @param title name of a sheet on Google Sheets. @@ -192,13 +252,9 @@ getGoogleSheet <- function(title, sheetName, skipNRows = 0, treatTheseAsNA = NUL col_types <- NULL } } - # The "na" argument of googlesheets4::read_sheet does not accept null, - # so if the treatTheseAsNA is null, do not pass it to googlesheets4::read_sheet - if(!is.null(treatTheseAsNA)) { - df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, na = treatTheseAsNA, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max) - } else { - df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max) - } + # Read the sheet with col_types padding support for column count changes + df <- .read_sheet_with_col_types_padding(gsheet, sheetName, skipNRows, treatTheseAsNA, firstRowAsHeader, col_types, guess_max) + if(!is.null(tzone)) { # if timezone is specified, apply the timezone to POSIXct columns df <- df %>% dplyr::mutate(across(where(lubridate::is.POSIXct), ~ lubridate::force_tz(.x, tzone=tzone))) } diff --git a/tests/testthat/test_google_sheets.R b/tests/testthat/test_google_sheets.R index 671a434f3..acbed6a47 100644 --- a/tests/testthat/test_google_sheets.R +++ b/tests/testthat/test_google_sheets.R @@ -12,3 +12,26 @@ test_that("normalizeDataForGoogleSheetsExport", { expect_equal(class(df_cleaned$numAllInf), "numeric") expect_equal(class(df_cleaned$numAllMixed), "numeric") }) + +# Tests for col_types padding helper function (Issue #33700) +test_that(".pad_col_types_for_column_mismatch pads col_types correctly", { + # Test case 1: Standard error message with column count + error_msg <- "Length of `col_types` is not compatible with columns found in sheets:\n- 14 column types specified.\n- 14 un-skipped column types specified.\n- But there are 15 columns found in sheets." + result <- exploratory:::.pad_col_types_for_column_mismatch("cinTDDDDDDDDDT", error_msg) + expect_equal(result, "cinTDDDDDDDDDT?") # Original 14 chars + 1 '?' for the extra column + + # Test case 2: Multiple extra columns + error_msg2 <- "Length of `col_types` is not compatible with columns found in sheets:\n- 5 column types specified.\n- 5 un-skipped column types specified.\n- But there are 8 columns found in sheets." + result2 <- exploratory:::.pad_col_types_for_column_mismatch("cinTD", error_msg2) + expect_equal(result2, "cinTD???") # Original 5 chars + 3 '?' for extra columns + + # Test case 3: Error message doesn't match expected pattern + error_msg3 <- "Some other error message" + result3 <- exploratory:::.pad_col_types_for_column_mismatch("cinT", error_msg3) + expect_null(result3) + + # Test case 4: col_types already matches or exceeds actual columns (edge case) + error_msg4 <- "Length of `col_types` is not compatible with columns found in sheets:\n- 5 column types specified.\n- But there are 3 columns found in sheets." + result4 <- exploratory:::.pad_col_types_for_column_mismatch("cinTD", error_msg4) + expect_null(result4) # Should return NULL since actual < specified +}) From a109c4654ab10a27b7eca16bd840542c2f7eae58 Mon Sep 17 00:00:00 2001 From: hide kojima Date: Tue, 3 Feb 2026 23:07:56 -0800 Subject: [PATCH 2/3] Update R/google_sheets.R Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- R/google_sheets.R | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/R/google_sheets.R b/R/google_sheets.R index e8ab1aa68..395fa7a54 100644 --- a/R/google_sheets.R +++ b/R/google_sheets.R @@ -177,26 +177,61 @@ normalizeDataForGoogleSheetsExport <- function (df) { #' @param original_col_types - original col_types for retry logic #' @keywords internal .read_sheet_with_col_types_padding <- function(gsheet, sheetName, skipNRows, treatTheseAsNA, firstRowAsHeader, col_types, guess_max, original_col_types = NULL) { + # Use original_col_types when provided to control the retry/padding behavior, + # otherwise fall back to the current col_types argument. + col_types_to_use <- if (!is.null(original_col_types)) original_col_types else col_types tryCatch({ if (!is.null(treatTheseAsNA)) { - df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, na = treatTheseAsNA, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max) + df <- gsheet %>% googlesheets4::read_sheet( + range = sheetName, + skip = skipNRows, + na = treatTheseAsNA, + col_names = firstRowAsHeader, + col_types = col_types_to_use, + guess_max = guess_max + ) } else { - df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max) + df <- gsheet %>% googlesheets4::read_sheet( + range = sheetName, + skip = skipNRows, + col_names = firstRowAsHeader, + col_types = col_types_to_use, + guess_max = guess_max + ) } df }, error = function(e) { # Check if this is a col_types length mismatch error and we have a string col_types to pad # Error pattern: "Length of `col_types` is not compatible with columns found in sheets" - if (is.character(col_types) && length(col_types) == 1 && + if (is.character(col_types_to_use) && length(col_types_to_use) == 1 && stringr::str_detect(e$message, "Length of `col_types` is not compatible with columns found in sheets")) { # Try to pad col_types with '?' for extra columns - padded_types <- .pad_col_types_for_column_mismatch(col_types, e$message) + padded_types <- .pad_col_types_for_column_mismatch(col_types_to_use, e$message) if (!is.null(padded_types)) { # Retry with padded col_types if (!is.null(treatTheseAsNA)) { - return(gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, na = treatTheseAsNA, col_names = firstRowAsHeader, col_types = padded_types, guess_max = guess_max)) + return( + gsheet %>% + googlesheets4::read_sheet( + range = sheetName, + skip = skipNRows, + na = treatTheseAsNA, + col_names = firstRowAsHeader, + col_types = padded_types, + guess_max = guess_max + ) + ) } else { - return(gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, col_names = firstRowAsHeader, col_types = padded_types, guess_max = guess_max)) + return( + gsheet %>% + googlesheets4::read_sheet( + range = sheetName, + skip = skipNRows, + col_names = firstRowAsHeader, + col_types = padded_types, + guess_max = guess_max + ) + ) } } } From 57dd0fdd11ebdc0762cbd87fb4ae1bcbcd170ac0 Mon Sep 17 00:00:00 2001 From: Hide Kojima Date: Tue, 3 Feb 2026 23:40:47 -0800 Subject: [PATCH 3/3] Fix Google Sheets col_types retry --- R/google_sheets.R | 5 ++--- tests/testthat/test_google_sheets.R | 35 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/R/google_sheets.R b/R/google_sheets.R index e8ab1aa68..a12f35010 100644 --- a/R/google_sheets.R +++ b/R/google_sheets.R @@ -174,9 +174,8 @@ normalizeDataForGoogleSheetsExport <- function (df) { #' @param firstRowAsHeader - use first row as header #' @param col_types - column types specification #' @param guess_max - max rows to guess types -#' @param original_col_types - original col_types for retry logic #' @keywords internal -.read_sheet_with_col_types_padding <- function(gsheet, sheetName, skipNRows, treatTheseAsNA, firstRowAsHeader, col_types, guess_max, original_col_types = NULL) { +.read_sheet_with_col_types_padding <- function(gsheet, sheetName, skipNRows, treatTheseAsNA, firstRowAsHeader, col_types, guess_max) { tryCatch({ if (!is.null(treatTheseAsNA)) { df <- gsheet %>% googlesheets4::read_sheet(range = sheetName, skip = skipNRows, na = treatTheseAsNA, col_names = firstRowAsHeader, col_types = col_types, guess_max = guess_max) @@ -187,7 +186,7 @@ normalizeDataForGoogleSheetsExport <- function (df) { }, error = function(e) { # Check if this is a col_types length mismatch error and we have a string col_types to pad # Error pattern: "Length of `col_types` is not compatible with columns found in sheets" - if (is.character(col_types) && length(col_types) == 1 && + if (is.character(col_types) && length(col_types) == 1 && is.null(names(col_types)) && stringr::str_detect(e$message, "Length of `col_types` is not compatible with columns found in sheets")) { # Try to pad col_types with '?' for extra columns padded_types <- .pad_col_types_for_column_mismatch(col_types, e$message) diff --git a/tests/testthat/test_google_sheets.R b/tests/testthat/test_google_sheets.R index acbed6a47..d677cf11b 100644 --- a/tests/testthat/test_google_sheets.R +++ b/tests/testthat/test_google_sheets.R @@ -35,3 +35,38 @@ test_that(".pad_col_types_for_column_mismatch pads col_types correctly", { result4 <- exploratory:::.pad_col_types_for_column_mismatch("cinTD", error_msg4) expect_null(result4) # Should return NULL since actual < specified }) + +test_that(".read_sheet_with_col_types_padding retries with padded col_types", { + skip_if_not_installed("googlesheets4") + + calls <- list() + error_msg <- "Length of `col_types` is not compatible with columns found in sheets:\n- 4 column types specified.\n- 4 un-skipped column types specified.\n- But there are 5 columns found in sheets." + + stub_read_sheet <- function(...) { + args <- list(...) + calls <<- c(calls, list(args$col_types)) + if (length(calls) == 1) { + stop(error_msg) + } + data.frame(a = 1) + } + + original_read_sheet <- get("read_sheet", envir = asNamespace("googlesheets4")) + assignInNamespace("read_sheet", stub_read_sheet, ns = "googlesheets4") + on.exit(assignInNamespace("read_sheet", original_read_sheet, ns = "googlesheets4"), add = TRUE) + + df <- exploratory:::.read_sheet_with_col_types_padding( + gsheet = "dummy", + sheetName = "Sheet1", + skipNRows = 0, + treatTheseAsNA = NULL, + firstRowAsHeader = TRUE, + col_types = "cinT", + guess_max = 100 + ) + + expect_equal(length(calls), 2) + expect_equal(calls[[1]], "cinT") + expect_equal(calls[[2]], "cinT?") + expect_equal(df$a, 1) +})