Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: vroom
Title: Read and Write Rectangular Text Data Quickly
Version: 1.7.0.2
Version: 1.7.0.3
Authors@R: c(
person("Jim", "Hester", role = "aut",
comment = c(ORCID = "0000-0002-2739-7082")),
Expand Down
88 changes: 66 additions & 22 deletions src/DateTimeParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ class DateTimeParser {

switch (datePart[i]) {
case 'y':
if (!consumeInteger(4, &year_)) return false;
if (!consumeYearFlexible()) return false;
break;
case 'm':
if (!consumeInteger(2, &mon_, false)) return false;
Expand Down Expand Up @@ -252,43 +252,87 @@ class DateTimeParser {
return isComplete();
}

// Heuristic for year-last date patterns: D/M/YYYY or M/D/YYYY
// Matches: \d{1,2}[sep]\d{1,2}[sep]\d{4}
// Disambiguation: if part1 > 12 → DMY; if part2 > 12 → MDY; else → MDY (default)
bool parseYearLastHeuristic() {
int part1, part2;

if (!consumeInteger(2, &part1, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeInteger(2, &part2, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeInteger(4, &year_)) return false;
if (!isComplete()) return false;

// Validate year is plausible
if (year_ < 1000) return false;
// Consume a year that may be 2 or 4 digits. 2-digit years use the same pivot
// as the %y format specifier (00-68 -> 2000s, 69-99 -> 1900s). 3-digit values
// (100-999) are implausible and rejected. (Issue #36088)
bool consumeYearFlexible() {
if (!consumeInteger(4, &year_, false)) return false;
if (year_ < 100) {
year_ += (year_ < 69) ? 2000 : 1900;
} else if (year_ < 1000) {
return false;
}
return true;
}

// Disambiguate a year-last date's first two components into month and day.
// part1 > 12 -> DMY; part2 > 12 -> MDY; otherwise default to MDY (US).
// Returns false if the resulting month/day are out of range.
bool disambiguateDayMonth(int part1, int part2) {
if (part1 > 12) {
// Must be DMY
day_ = part1;
mon_ = part2;
} else if (part2 > 12) {
// Must be MDY
mon_ = part1;
day_ = part2;
} else {
// Ambiguous: default to MDY (US convention)
mon_ = part1;
day_ = part2;
}

// Validate month and day are in plausible range
if (mon_ < 1 || mon_ > 12) return false;
if (day_ < 1 || day_ > 31) return false;

return true;
}

// Heuristic for year-last date patterns: D/M/Y or M/D/Y (Y = 2 or 4 digits)
// Matches: \d{1,2}[sep]\d{1,2}[sep]\d{2,4}
// Disambiguation: if part1 > 12 → DMY; if part2 > 12 → MDY; else → MDY (default)
bool parseYearLastHeuristic() {
int part1, part2;

if (!consumeInteger(2, &part1, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeInteger(2, &part2, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeYearFlexible()) return false;
if (!isComplete()) return false;

return disambiguateDayMonth(part1, part2);
}

// Year-last datetime heuristic: a year-last date (M/D/Y or D/M/Y, 2 or 4 digit
// year) followed by a T/space separator and a HH[:MM[:SS]] time with optional
// timezone. Mirrors the time tail of parseISO8601. (Issue #36088)
bool parseYearLastHeuristicDateTime() {
int part1, part2;

if (!consumeInteger(2, &part1, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeInteger(2, &part2, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeYearFlexible()) return false;
if (!disambiguateDayMonth(part1, part2)) return false;

// Time portion is required (date-only is handled by parseYearLastHeuristic).
char next;
if (!consumeChar(&next)) return false;
if (next != 'T' && next != ' ') return false;

if (!consumeInteger(2, &hour_)) return false;
consumeThisChar(':');
consumeInteger(2, &min_);
consumeThisChar(':');
consumeSeconds(&sec_, &psec_);

if (isComplete()) return true;

// Optional timezone
tz_ = "UTC";
if (!consumeTzOffset(&tzOffsetHours_, &tzOffsetMinutes_)) return false;

return isComplete();
}

bool isComplete() { return dateItr_ == dateEnd_; }

void setDate(const char* start, const char* end) {
Expand Down
16 changes: 11 additions & 5 deletions src/guess_type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,17 @@ static bool isDateTime(const std::string& x, LocaleInfo* pLocale) {
return false;
}

// Auto-detection: ISO8601 only (existing behavior — no change)
bool ok = parser.parseISO8601();
if (!ok) return false;
DateTime dt = parser.makeDateTime();
return dt.validDateTime();
// Auto-detection: ISO8601 first, then year-last (M/D/Y or D/M/Y) heuristic
// so MDY/DMY datetimes (including 2-digit years) are recognized. (Issue #36088)
if (parser.parseISO8601()) {
DateTime dt = parser.makeDateTime();
if (dt.validDateTime()) return true;
}

parser.setDate(x.c_str(), x.c_str() + x.size());
if (!parser.parseYearLastHeuristicDateTime()) return false;
DateTime dt2 = parser.makeDateTime();
return dt2.validDateTime();
}

std::string guess_type__(
Expand Down
6 changes: 6 additions & 0 deletions src/vroom_dttm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ double parse_dttm(
res = parser.parseDateOrder(locale->dateOrder_);
} else if (format.empty()) {
res = parser.parseISO8601();
if (!res) {
// Fall back to the year-last (M/D/Y or D/M/Y) heuristic so MDY/DMY
// datetimes (including 2-digit years) materialize. (Issue #36088)
parser.setDate(begin, end);
res = parser.parseYearLastHeuristicDateTime();
}
} else {
res = parser.parse(format);
}
Expand Down
68 changes: 68 additions & 0 deletions tests/testthat/test-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -596,3 +596,71 @@ test_that("vroom() reads dot-separated MDY dates", {
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2024-10-02", "2024-03-15")))
})

# --- 2-digit-year (M/D/YY) auto-detection (Issue exploratory-io/tam#36088) ---

test_that("vroom() auto-detects 2-digit-year MDY dates (M/D/YY)", {
csv <- "id,date\n1,5/29/26\n2,5/31/26\n3,12/25/26"
result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE)
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2026-05-29", "2026-05-31", "2026-12-25")))
})

test_that("vroom() auto-detects 2-digit-year DMY dates (D/M/YY)", {
# 29 > 12 in first part: unambiguously DMY
csv <- "id,date\n1,29/5/26\n2,20/1/26"
result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE)
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2026-05-29", "2026-01-20")))
})

test_that("vroom() applies the %y pivot to 2-digit years (00-68 -> 2000s, 69-99 -> 1900s)", {
csv <- "id,date\n1,5/29/68\n2,5/29/69"
result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE)
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2068-05-29", "1969-05-29")))
})

test_that("vroom guess_type detects 2-digit-year year-last dates", {
expect_true(inherits(vroom::guess_type(c("5/29/26", "5/31/26")), "collector_date"))
})

test_that("vroom() does not treat invalid or 3-digit-year values as dates", {
# 13/25/26: invalid as both MDY and DMY; 100/200/300: 3-digit year rejected
for (v in c("13/25/26", "100/200/300")) {
result <- vroom::vroom(I(paste0("x\n", v, "\n")), delim = ",", show_col_types = FALSE)
expect_type(result$x, "character")
}
})

test_that("vroom() auto-detects 2-digit-year MDY datetimes (M/D/YY HH:MM:SS)", {
csv <- "id,dt\n1,5/29/26 14:30:00\n2,12/25/26 23:59:59"
result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE)
expect_s3_class(result$dt, "POSIXct")
expect_equal(
result$dt,
as.POSIXct(c("2026-05-29 14:30:00", "2026-12-25 23:59:59"), tz = "UTC")
)
})

test_that("vroom() auto-detects 4-digit-year MDY datetimes (M/D/YYYY HH:MM:SS)", {
csv <- "id,dt\n1,5/29/2026 14:30:00\n2,10/15/2024 09:00:00"
result <- vroom::vroom(I(csv), delim = ",", show_col_types = FALSE)
expect_s3_class(result$dt, "POSIXct")
expect_equal(
result$dt,
as.POSIXct(c("2026-05-29 14:30:00", "2024-10-15 09:00:00"), tz = "UTC")
)
})

test_that("vroom() reads 2-digit-year dates with explicit date_order", {
csv_mdy <- "id,date\n1,5/29/26\n2,3/15/26"
res_mdy <- vroom::vroom(I(csv_mdy), locale = locale(date_order = "mdy"), show_col_types = FALSE)
expect_s3_class(res_mdy$date, "Date")
expect_equal(res_mdy$date, as.Date(c("2026-05-29", "2026-03-15")))

csv_dmy <- "id,date\n1,29/5/26\n2,15/3/26"
res_dmy <- vroom::vroom(I(csv_dmy), locale = locale(date_order = "dmy"), show_col_types = FALSE)
expect_s3_class(res_dmy$date, "Date")
expect_equal(res_dmy$date, as.Date(c("2026-05-29", "2026-03-15")))
})