Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: readr
Title: Read Rectangular Text Data
Version: 2.2.0.2
Version: 2.2.0.3
Authors@R: c(
person("Hadley", "Wickham", , "hadley@posit.co", role = "aut"),
person("Jim", "Hester", role = "aut"),
Expand Down
14 changes: 14 additions & 0 deletions src/Collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ void CollectorDate::setValue(int i, const Token& t) {
res = parser_.parseDateOrder(pLocale_->dateOrder_);
} else {
res = parser_.parseLocaleDate();
if (!res) {
// Fall back to the year-last (M/D/Y or D/M/Y) heuristic so MDY/DMY
// dates (including 2-digit years) materialize, matching the guesser
// in CollectorGuess.cpp isDate(). (Issue #36088)
parser_.setDate(std_string.c_str());
res = parser_.parseYearLastHeuristic();
}
}

if (!res) {
Expand Down Expand Up @@ -165,6 +172,13 @@ void CollectorDateTime::setValue(int i, const Token& t) {
res = parser_.parseDateOrder(pLocale_->dateOrder_);
} else {
res = parser_.parseISO8601();
if (!res) {
// Fall back to the year-last (M/D/Y or D/M/Y) datetime heuristic so
// MDY/DMY datetimes (including 2-digit years) materialize, matching
// the guesser in CollectorGuess.cpp isDateTime(). (Issue #36088)
parser_.setDate(std_string.c_str());
res = parser_.parseYearLastHeuristicDateTime();
}
}

if (!res) {
Expand Down
14 changes: 9 additions & 5 deletions src/CollectorGuess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,16 @@ static bool isDateTime(const std::string& x, LocaleInfo* pLocale) {
return false;
}

// Auto-detection: ISO8601 only (YMD, existing behavior — no change)
bool ok = parser.parseISO8601();
if (!ok) return false;
// Auto-detection: ISO8601 first (YMD), then year-last (M/D/Y or D/M/Y)
// heuristic so MDY/DMY datetimes (including 2-digit years) are recognized.
// (Issue #36088)
if (parser.parseISO8601()) {
if (!parser.compactDate()) return true;
return parser.year() > 999;
}

if (!parser.compactDate()) return true;
return parser.year() > 999;
parser.setDate(x.c_str());
return parser.parseYearLastHeuristicDateTime();
}

[[cpp11::register]] std::string collectorGuess(
Expand Down
88 changes: 66 additions & 22 deletions src/DateTimeParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class DateTimeParser {

switch (datePart[i]) {
case 'y':
if (!consumeInteger(4, &year_)) return false;
if (!consumeYearFlexible()) return false;
break;
case 'm':
if (!consumeInteger(2, &mon_, false)) return false;
Expand Down Expand Up @@ -192,43 +192,87 @@ class DateTimeParser {
return isComplete();
}

// Heuristic for year-last date patterns: D/M/YYYY or M/D/YYYY
// Matches: \d{1,2}[sep]\d{1,2}[sep]\d{4}
// Disambiguation: if part1 > 12 → DMY; if part2 > 12 → MDY; else → MDY (default)
bool parseYearLastHeuristic() {
int part1, part2;

if (!consumeInteger(2, &part1, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeInteger(2, &part2, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeInteger(4, &year_)) return false;
if (!isComplete()) return false;

// Validate year is plausible
if (year_ < 1000) return false;
// Consume a year that may be 2 or 4 digits. 2-digit years use the same pivot
// as the %y format specifier (00-68 -> 2000s, 69-99 -> 1900s). 3-digit values
// (100-999) are implausible and rejected. (Issue #36088)
bool consumeYearFlexible() {
if (!consumeInteger(4, &year_, false)) return false;
if (year_ < 100) {
year_ += (year_ < 69) ? 2000 : 1900;
} else if (year_ < 1000) {
return false;
}
return true;
}

// Disambiguate a year-last date's first two components into month and day.
// part1 > 12 -> DMY; part2 > 12 -> MDY; otherwise default to MDY (US).
// Returns false if the resulting month/day are out of range.
bool disambiguateDayMonth(int part1, int part2) {
if (part1 > 12) {
// Must be DMY
day_ = part1;
mon_ = part2;
} else if (part2 > 12) {
// Must be MDY
mon_ = part1;
day_ = part2;
} else {
// Ambiguous: default to MDY (US convention)
mon_ = part1;
day_ = part2;
}

// Validate month and day are in plausible range
if (mon_ < 1 || mon_ > 12) return false;
if (day_ < 1 || day_ > 31) return false;

return true;
}

// Heuristic for year-last date patterns: D/M/Y or M/D/Y (Y = 2 or 4 digits)
// Matches: \d{1,2}[sep]\d{1,2}[sep]\d{2,4}
// Disambiguation: if part1 > 12 → DMY; if part2 > 12 → MDY; else → MDY (default)
bool parseYearLastHeuristic() {
int part1, part2;

if (!consumeInteger(2, &part1, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeInteger(2, &part2, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeYearFlexible()) return false;
if (!isComplete()) return false;

return disambiguateDayMonth(part1, part2);
}

// Year-last datetime heuristic: a year-last date (M/D/Y or D/M/Y, 2 or 4 digit
// year) followed by a T/space separator and a HH[:MM[:SS]] time with optional
// timezone. Mirrors the time tail of parseISO8601. (Issue #36088)
bool parseYearLastHeuristicDateTime() {
int part1, part2;

if (!consumeInteger(2, &part1, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeInteger(2, &part2, false)) return false;
if (!consumeDateSeparator()) return false;
if (!consumeYearFlexible()) return false;
if (!disambiguateDayMonth(part1, part2)) return false;

// Time portion is required (date-only is handled by parseYearLastHeuristic).
char next;
if (!consumeChar(&next)) return false;
if (next != 'T' && next != ' ') return false;

if (!consumeInteger(2, &hour_)) return false;
consumeThisChar(':');
consumeInteger(2, &min_);
consumeThisChar(':');
consumeSeconds(&sec_, &psec_);

if (isComplete()) return true;

// Optional timezone
tz_ = "UTC";
if (!consumeTzOffset(&tzOffsetHours_, &tzOffsetMinutes_)) return false;

return isComplete();
}

bool isComplete() { return dateItr_ == dateEnd_; }

void setDate(const char* date) {
Expand Down
75 changes: 75 additions & 0 deletions tests/testthat/test-parsing-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -446,3 +446,78 @@ test_that("read_csv() reads dot-separated MDY dates", {
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2024-10-02", "2024-03-15")))
})

# --- 2-digit-year (M/D/YY) auto-detection (Issue exploratory-io/tam#36088) ---

test_that("read_csv() auto-detects 2-digit-year MDY dates (M/D/YY)", {
csv <- "id,date\n1,5/29/26\n2,5/31/26\n3,12/25/26"
result <- read_csv(I(csv), show_col_types = FALSE)
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2026-05-29", "2026-05-31", "2026-12-25")))
})

test_that("read_csv() auto-detects 2-digit-year DMY dates (D/M/YY)", {
# 29 > 12 in first part: unambiguously DMY
csv <- "id,date\n1,29/5/26\n2,20/1/26"
result <- read_csv(I(csv), show_col_types = FALSE)
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2026-05-29", "2026-01-20")))
})

test_that("type_convert() materializes 4-digit-year MDY dates (regression: was NA)", {
# The guesser returned "date" but CollectorDate::setValue used only
# parseLocaleDate(), so year-last dates materialized to NA. (#36088)
df <- data.frame(date = c("5/29/2026", "3/15/2026"), stringsAsFactors = FALSE)
result <- type_convert(df, guess_integer = FALSE)
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2026-05-29", "2026-03-15")))
})

test_that("read_csv() applies the %y pivot to 2-digit years", {
csv <- "id,date\n1,5/29/68\n2,5/29/69"
result <- read_csv(I(csv), show_col_types = FALSE)
expect_s3_class(result$date, "Date")
expect_equal(result$date, as.Date(c("2068-05-29", "1969-05-29")))
})

test_that("guess_parser detects 2-digit-year year-last dates", {
expect_equal(guess_parser(c("5/29/26", "5/31/26"), locale()), "date")
})

test_that("read_csv() does not treat invalid or 3-digit-year values as dates", {
for (v in c("13/25/26", "100/200/300")) {
result <- read_csv(I(paste0("x\n", v, "\n")), show_col_types = FALSE)
expect_type(result$x, "character")
}
})

test_that("read_csv() auto-detects 2-digit-year MDY datetimes (M/D/YY HH:MM:SS)", {
csv <- "id,dt\n1,5/29/26 14:30:00\n2,12/25/26 23:59:59"
result <- read_csv(I(csv), show_col_types = FALSE)
expect_s3_class(result$dt, "POSIXct")
expect_equal(
result$dt,
as.POSIXct(c("2026-05-29 14:30:00", "2026-12-25 23:59:59"), tz = "UTC")
)
})

test_that("read_csv() auto-detects 4-digit-year MDY datetimes (M/D/YYYY HH:MM:SS)", {
csv <- "id,dt\n1,5/29/2026 14:30:00\n2,10/15/2024 09:00:00"
result <- read_csv(I(csv), show_col_types = FALSE)
expect_s3_class(result$dt, "POSIXct")
expect_equal(
result$dt,
as.POSIXct(c("2026-05-29 14:30:00", "2024-10-15 09:00:00"), tz = "UTC")
)
})

test_that("parse_date parses 2-digit-year MDY/DMY with locale date_order", {
expect_equal(
parse_date("5/29/26", locale = locale(date_order = "mdy")),
as.Date("2026-05-29")
)
expect_equal(
parse_date("29/5/26", locale = locale(date_order = "dmy")),
as.Date("2026-05-29")
)
})
Loading