diff --git a/DESCRIPTION b/DESCRIPTION index 4e907041..d1d5e9a4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: vroom Title: Read and Write Rectangular Text Data Quickly -Version: 1.7.0.2 +Version: 1.7.0.3 Authors@R: c( person("Jim", "Hester", role = "aut", comment = c(ORCID = "0000-0002-2739-7082")), diff --git a/src/DateTimeParser.h b/src/DateTimeParser.h index ce6a77bf..77649de1 100644 --- a/src/DateTimeParser.h +++ b/src/DateTimeParser.h @@ -205,7 +205,7 @@ class DateTimeParser { switch (datePart[i]) { case 'y': - if (!consumeInteger(4, &year_)) return false; + if (!consumeYearFlexible()) return false; break; case 'm': if (!consumeInteger(2, &mon_, false)) return false; @@ -252,43 +252,87 @@ class DateTimeParser { return isComplete(); } - // Heuristic for year-last date patterns: D/M/YYYY or M/D/YYYY - // Matches: \d{1,2}[sep]\d{1,2}[sep]\d{4} - // Disambiguation: if part1 > 12 → DMY; if part2 > 12 → MDY; else → MDY (default) - bool parseYearLastHeuristic() { - int part1, part2; - - if (!consumeInteger(2, &part1, false)) return false; - if (!consumeDateSeparator()) return false; - if (!consumeInteger(2, &part2, false)) return false; - if (!consumeDateSeparator()) return false; - if (!consumeInteger(4, &year_)) return false; - if (!isComplete()) return false; - - // Validate year is plausible - if (year_ < 1000) return false; + // Consume a year that may be 2 or 4 digits. 2-digit years use the same pivot + // as the %y format specifier (00-68 -> 2000s, 69-99 -> 1900s). 3-digit values + // (100-999) are implausible and rejected. (Issue #36088) + bool consumeYearFlexible() { + if (!consumeInteger(4, &year_, false)) return false; + if (year_ < 100) { + year_ += (year_ < 69) ? 2000 : 1900; + } else if (year_ < 1000) { + return false; + } + return true; + } + // Disambiguate a year-last date's first two components into month and day. + // part1 > 12 -> DMY; part2 > 12 -> MDY; otherwise default to MDY (US). + // Returns false if the resulting month/day are out of range. + bool disambiguateDayMonth(int part1, int part2) { if (part1 > 12) { - // Must be DMY day_ = part1; mon_ = part2; } else if (part2 > 12) { - // Must be MDY mon_ = part1; day_ = part2; } else { - // Ambiguous: default to MDY (US convention) mon_ = part1; day_ = part2; } - - // Validate month and day are in plausible range if (mon_ < 1 || mon_ > 12) return false; if (day_ < 1 || day_ > 31) return false; - return true; } + // Heuristic for year-last date patterns: D/M/Y or M/D/Y (Y = 2 or 4 digits) + // Matches: \d{1,2}[sep]\d{1,2}[sep]\d{2,4} + // Disambiguation: if part1 > 12 → DMY; if part2 > 12 → MDY; else → MDY (default) + bool parseYearLastHeuristic() { + int part1, part2; + + if (!consumeInteger(2, &part1, false)) return false; + if (!consumeDateSeparator()) return false; + if (!consumeInteger(2, &part2, false)) return false; + if (!consumeDateSeparator()) return false; + if (!consumeYearFlexible()) return false; + if (!isComplete()) return false; + + return disambiguateDayMonth(part1, part2); + } + + // Year-last datetime heuristic: a year-last date (M/D/Y or D/M/Y, 2 or 4 digit + // year) followed by a T/space separator and a HH[:MM[:SS]] time with optional + // timezone. Mirrors the time tail of parseISO8601. (Issue #36088) + bool parseYearLastHeuristicDateTime() { + int part1, part2; + + if (!consumeInteger(2, &part1, false)) return false; + if (!consumeDateSeparator()) return false; + if (!consumeInteger(2, &part2, false)) return false; + if (!consumeDateSeparator()) return false; + if (!consumeYearFlexible()) return false; + if (!disambiguateDayMonth(part1, part2)) return false; + + // Time portion is required (date-only is handled by parseYearLastHeuristic). + char next; + if (!consumeChar(&next)) return false; + if (next != 'T' && next != ' ') return false; + + if (!consumeInteger(2, &hour_)) return false; + consumeThisChar(':'); + consumeInteger(2, &min_); + consumeThisChar(':'); + consumeSeconds(&sec_, &psec_); + + if (isComplete()) return true; + + // Optional timezone + tz_ = "UTC"; + if (!consumeTzOffset(&tzOffsetHours_, &tzOffsetMinutes_)) return false; + + return isComplete(); + } + bool isComplete() { return dateItr_ == dateEnd_; } void setDate(const char* start, const char* end) { diff --git a/src/guess_type.cc b/src/guess_type.cc index 9b119728..050e12bb 100644 --- a/src/guess_type.cc +++ b/src/guess_type.cc @@ -120,11 +120,17 @@ static bool isDateTime(const std::string& x, LocaleInfo* pLocale) { return false; } - // Auto-detection: ISO8601 only (existing behavior — no change) - bool ok = parser.parseISO8601(); - if (!ok) return false; - DateTime dt = parser.makeDateTime(); - return dt.validDateTime(); + // Auto-detection: ISO8601 first, then year-last (M/D/Y or D/M/Y) heuristic + // so MDY/DMY datetimes (including 2-digit years) are recognized. (Issue #36088) + if (parser.parseISO8601()) { + DateTime dt = parser.makeDateTime(); + if (dt.validDateTime()) return true; + } + + parser.setDate(x.c_str(), x.c_str() + x.size()); + if (!parser.parseYearLastHeuristicDateTime()) return false; + DateTime dt2 = parser.makeDateTime(); + return dt2.validDateTime(); } std::string guess_type__( diff --git a/src/vroom_dttm.cc b/src/vroom_dttm.cc index 194d526a..0a9188a2 100644 --- a/src/vroom_dttm.cc +++ b/src/vroom_dttm.cc @@ -21,6 +21,12 @@ double parse_dttm( res = parser.parseDateOrder(locale->dateOrder_); } else if (format.empty()) { res = parser.parseISO8601(); + if (!res) { + // Fall back to the year-last (M/D/Y or D/M/Y) heuristic so MDY/DMY + // datetimes (including 2-digit years) materialize. (Issue #36088) + parser.setDate(begin, end); + res = parser.parseYearLastHeuristicDateTime(); + } } else { res = parser.parse(format); } diff --git a/tests/testthat/test-datetime.R b/tests/testthat/test-datetime.R index db46ccc5..7c5b973a 100644 --- a/tests/testthat/test-datetime.R +++ b/tests/testthat/test-datetime.R @@ -596,3 +596,71 @@ test_that("vroom() reads dot-separated MDY dates", { expect_s3_class(result$date, "Date") expect_equal(result$date, as.Date(c("2024-10-02", "2024-03-15"))) }) + +# --- 2-digit-year (M/D/YY) auto-detection (Issue exploratory-io/tam#36088) --- + +test_that("vroom() auto-detects 2-digit-year MDY dates (M/D/YY)", { + csv <- "id,date\n1,5/29/26\n2,5/31/26\n3,12/25/26" + result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE) + expect_s3_class(result$date, "Date") + expect_equal(result$date, as.Date(c("2026-05-29", "2026-05-31", "2026-12-25"))) +}) + +test_that("vroom() auto-detects 2-digit-year DMY dates (D/M/YY)", { + # 29 > 12 in first part: unambiguously DMY + csv <- "id,date\n1,29/5/26\n2,20/1/26" + result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE) + expect_s3_class(result$date, "Date") + expect_equal(result$date, as.Date(c("2026-05-29", "2026-01-20"))) +}) + +test_that("vroom() applies the %y pivot to 2-digit years (00-68 -> 2000s, 69-99 -> 1900s)", { + csv <- "id,date\n1,5/29/68\n2,5/29/69" + result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE) + expect_s3_class(result$date, "Date") + expect_equal(result$date, as.Date(c("2068-05-29", "1969-05-29"))) +}) + +test_that("vroom guess_type detects 2-digit-year year-last dates", { + expect_true(inherits(vroom::guess_type(c("5/29/26", "5/31/26")), "collector_date")) +}) + +test_that("vroom() does not treat invalid or 3-digit-year values as dates", { + # 13/25/26: invalid as both MDY and DMY; 100/200/300: 3-digit year rejected + for (v in c("13/25/26", "100/200/300")) { + result <- vroom::vroom(I(paste0("x\n", v, "\n")), delim = ",", show_col_types = FALSE) + expect_type(result$x, "character") + } +}) + +test_that("vroom() auto-detects 2-digit-year MDY datetimes (M/D/YY HH:MM:SS)", { + csv <- "id,dt\n1,5/29/26 14:30:00\n2,12/25/26 23:59:59" + result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE) + expect_s3_class(result$dt, "POSIXct") + expect_equal( + result$dt, + as.POSIXct(c("2026-05-29 14:30:00", "2026-12-25 23:59:59"), tz = "UTC") + ) +}) + +test_that("vroom() auto-detects 4-digit-year MDY datetimes (M/D/YYYY HH:MM:SS)", { + csv <- "id,dt\n1,5/29/2026 14:30:00\n2,10/15/2024 09:00:00" + result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE) + expect_s3_class(result$dt, "POSIXct") + expect_equal( + result$dt, + as.POSIXct(c("2026-05-29 14:30:00", "2024-10-15 09:00:00"), tz = "UTC") + ) +}) + +test_that("vroom() reads 2-digit-year dates with explicit date_order", { + csv_mdy <- "id,date\n1,5/29/26\n2,3/15/26" + res_mdy <- vroom::vroom(I(csv_mdy), locale = locale(date_order = "mdy"), show_col_types = FALSE) + expect_s3_class(res_mdy$date, "Date") + expect_equal(res_mdy$date, as.Date(c("2026-05-29", "2026-03-15"))) + + csv_dmy <- "id,date\n1,29/5/26\n2,15/3/26" + res_dmy <- vroom::vroom(I(csv_dmy), locale = locale(date_order = "dmy"), show_col_types = FALSE) + expect_s3_class(res_dmy$date, "Date") + expect_equal(res_dmy$date, as.Date(c("2026-05-29", "2026-03-15"))) +})