From 200afd1ea91415d7129120376e8cd4548b65a994 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 12 Feb 2026 23:18:56 +0100 Subject: [PATCH] date: fix subfmt-up1, fill-1, pct-pct, and invalid-high-bit-set tests --- Cargo.lock | 1 + fuzz/Cargo.lock | 33 ++ src/uu/date/Cargo.toml | 1 + src/uu/date/src/date.rs | 197 ++++++++-- src/uu/date/src/format_modifiers.rs | 569 ++++++++++++++++++++++++++++ tests/by-util/test_date.rs | 407 ++++++++++++++++++++ 6 files changed, 1181 insertions(+), 27 deletions(-) create mode 100644 src/uu/date/src/format_modifiers.rs diff --git a/Cargo.lock b/Cargo.lock index bb70d341367..0ae80cc4654 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3348,6 +3348,7 @@ dependencies = [ "jiff-icu", "nix", "parse_datetime", + "regex", "tempfile", "uucore", "windows-sys 0.61.2", diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index e6d6ad3c558..520fd775028 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -21,6 +21,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -1398,11 +1407,34 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "rust-ini" @@ -1754,6 +1786,7 @@ dependencies = [ "jiff-icu", "nix", "parse_datetime", + "regex", "uucore", "windows-sys 0.61.2", ] diff --git a/src/uu/date/Cargo.toml b/src/uu/date/Cargo.toml index c1a6040358e..36604d0c337 100644 --- a/src/uu/date/Cargo.toml +++ b/src/uu/date/Cargo.toml @@ -40,6 +40,7 @@ jiff = { workspace = true, features = [ "tzdb-concatenated", ] } parse_datetime = { workspace = true } +regex = { workspace = true } uucore = { workspace = true, features = ["parser", "i18n-datetime"] } [target.'cfg(unix)'.dependencies] diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 80a97e0de25..02321c0c5f4 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -5,6 +5,7 @@ // spell-checker:ignore strtime ; (format) DATEFILE MMDDhhmm ; (vars) datetime datetimes getres AWST ACST AEST foobarbaz +mod format_modifiers; mod locale; use clap::{Arg, ArgAction, Command}; @@ -14,7 +15,7 @@ use jiff::{Timestamp, Zoned}; use std::borrow::Cow; use std::collections::HashMap; use std::fs::File; -use std::io::{BufRead, BufReader, BufWriter, Write}; +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; use std::path::PathBuf; use std::sync::OnceLock; use uucore::display::Quotable; @@ -57,6 +58,25 @@ struct Settings { format: Format, date_source: DateSource, set_to: Option, + debug: bool, +} + +/// Options for parsing dates +#[derive(Clone, Copy)] +struct DebugOptions { + /// Enable debug output + debug: bool, + /// Warn when midnight is used without explicit time specification + warn_midnight: bool, +} + +impl DebugOptions { + fn new(debug: bool, warn_midnight: bool) -> Self { + Self { + debug, + warn_midnight, + } + } } /// Various ways of displaying the date @@ -133,6 +153,40 @@ enum DayDelta { Next, } +/// Escape invalid UTF-8 bytes in GNU-compatible octal notation. +/// +/// Converts bytes to a string with printable ASCII characters preserved +/// and non-printable/invalid UTF-8 bytes escaped as `\NNN` octal sequences. +/// +/// This matches GNU date's behavior for invalid input. +/// +/// # Arguments +/// * `bytes` - The byte sequence to escape +/// +/// # Returns +/// A string with invalid bytes escaped in octal notation +/// +/// # Example +/// ```ignore +/// let invalid = b"\xb0"; +/// assert_eq!(escape_invalid_bytes(invalid), "\\260"); +/// ``` +fn escape_invalid_bytes(bytes: &[u8]) -> String { + let escaped = bytes + .iter() + .flat_map(|&b| { + // Preserve printable ASCII except backslash + if (0x20..0x7f).contains(&b) && b != b'\\' { + vec![b] + } else { + // Escape as octal: \NNN + format!("\\{b:03o}").into_bytes() + } + }) + .collect::>(); + String::from_utf8_lossy(&escaped).into_owned() +} + /// Strip parenthesized comments from a date string. /// /// GNU date removes balanced parentheses and their content, treating them as comments. @@ -270,6 +324,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }; let utc = matches.get_flag(OPT_UNIVERSAL); + let debug_mode = matches.get_flag(OPT_DEBUG); // Get the current time, either in the local time zone or UTC. let now = if utc { @@ -278,7 +333,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { Zoned::now() }; - let date_source = if let Some(date) = matches.get_one::(OPT_DATE) { + let date_source = if let Some(date_os) = matches.get_one::(OPT_DATE) { + // Convert OsString to String, handling invalid UTF-8 with GNU-compatible error + let date = date_os.to_str().ok_or_else(|| { + let bytes = date_os.as_encoded_bytes(); + let escaped_str = escape_invalid_bytes(bytes); + USimpleError::new(1, format!("invalid date '{escaped_str}'")) + })?; DateSource::Human(date.into()) } else if let Some(file) = matches.get_one::(OPT_FILE) { match file.as_ref() { @@ -295,7 +356,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let set_to = match matches .get_one::(OPT_SET) - .map(|s| parse_date(s, &now)) + .map(|s| parse_date(s, &now, DebugOptions::new(debug_mode, true))) { None => None, Some(Err((input, _err))) => { @@ -312,6 +373,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { format, date_source, set_to, + debug: debug_mode, }; if let Some(date) = settings.set_to { @@ -363,7 +425,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } else { format!("{date_part} 00:00 {offset}") }; - parse_date(composed, &now) + if settings.debug { + eprintln!("date: warning: using midnight as starting time: 00:00:00"); + } + parse_date(composed, &now, DebugOptions::new(settings.debug, false)) } else if let Some((total_hours, day_delta)) = military_tz_with_offset { // Military timezone with optional hour offset // Convert to UTC time: midnight + military_tz_offset + additional_hours @@ -383,7 +448,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { DayDelta::Previous => format_date_with_epoch_fallback(now.yesterday()), }; let composed = format!("{date_part} {total_hours:02}:00:00 +00:00"); - parse_date(composed, &now) + parse_date(composed, &now, DebugOptions::new(settings.debug, false)) } else if is_pure_digits { // Derive HH and MM from the input let (hh_opt, mm_opt) = if input.len() <= 2 { @@ -409,23 +474,23 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } else { format!("{date_part} {hh:02}:{mm:02} {offset}") }; - parse_date(composed, &now) + parse_date(composed, &now, DebugOptions::new(settings.debug, false)) } else { // Fallback on parse failure of digits - parse_date(input, &now) + parse_date(input, &now, DebugOptions::new(settings.debug, true)) } } else { - parse_date(input, &now) + parse_date(input, &now, DebugOptions::new(settings.debug, true)) }; let iter = std::iter::once(date); Box::new(iter) } - DateSource::Stdin => { - let lines = BufReader::new(std::io::stdin()).lines(); - let iter = lines.map_while(Result::ok).map(|s| parse_date(s, &now)); - Box::new(iter) - } + DateSource::Stdin => parse_dates_from_reader( + std::io::stdin(), + &now, + DebugOptions::new(settings.debug, true), + ), DateSource::File(ref path) => { if path.is_dir() { return Err(USimpleError::new( @@ -435,9 +500,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } let file = File::open(path).map_err_context(|| path.as_os_str().maybe_quote().to_string())?; - let lines = BufReader::new(file).lines(); - let iter = lines.map_while(Result::ok).map(|s| parse_date(s, &now)); - Box::new(iter) + parse_dates_from_reader(file, &now, DebugOptions::new(settings.debug, true)) } DateSource::FileMtime(ref path) => { let metadata = std::fs::metadata(path) @@ -528,6 +591,7 @@ pub fn uu_app() -> Command { .value_name("STRING") .allow_hyphen_values(true) .overrides_with(OPT_DATE) + .value_parser(clap::value_parser!(std::ffi::OsString)) .help(translate!("date-help-date")), ) .arg( @@ -630,15 +694,25 @@ fn format_date_with_locale_aware_months( format_string: &str, config: &Config, skip_localization: bool, -) -> Result { +) -> Result { + // First check if format string has GNU modifiers (width/flags) and format if present + // This optimization combines detection and formatting in a single pass + if let Some(result) = + format_modifiers::format_with_modifiers_if_present(date, format_string, config) + { + return result.map_err(|e| e.to_string()); + } + let broken_down = BrokenDownTime::from(date); - if !should_use_icu_locale() || skip_localization { - return broken_down.to_string_with_config(config, format_string); - } + let result = if !should_use_icu_locale() || skip_localization { + broken_down.to_string_with_config(config, format_string) + } else { + let fmt = localize_format_string(format_string, date.date()); + broken_down.to_string_with_config(config, &fmt) + }; - let fmt = localize_format_string(format_string, date.date()); - broken_down.to_string_with_config(config, &fmt) + result.map_err(|e| e.to_string()) } /// Return the appropriate format string for the given settings. @@ -788,6 +862,23 @@ fn try_parse_with_abbreviation>(date_str: S) -> Option { /// Parse a `String` into a `DateTime`. /// If it fails, return a tuple of the `String` along with its `ParseError`. +/// Helper function to parse dates from a line-based reader (stdin or file) +/// +/// Takes any `Read` source, reads it line by line, and parses each line as a date. +/// Returns a boxed iterator over the parse results. +fn parse_dates_from_reader( + reader: R, + now: &Zoned, + dbg_opts: DebugOptions, +) -> Box> + '_> { + let lines = BufReader::new(reader).lines(); + Box::new( + lines + .map_while(Result::ok) + .map(move |s| parse_date(s, now, dbg_opts)), + ) +} + /// /// **Update for parse_datetime 0.13:** /// - parse_datetime 0.11: returned `chrono::DateTime` → required conversion to `jiff::Zoned` @@ -798,17 +889,64 @@ fn try_parse_with_abbreviation>(date_str: S) -> Option { fn parse_date + Clone>( s: S, now: &Zoned, + dbg_opts: DebugOptions, ) -> Result { + let input_str = s.as_ref(); + + if dbg_opts.debug { + eprintln!("date: input string: {input_str}"); + } + // First, try to parse any timezone abbreviations - if let Some(zoned) = try_parse_with_abbreviation(s.as_ref()) { + if let Some(zoned) = try_parse_with_abbreviation(input_str) { + if dbg_opts.debug { + eprintln!( + "date: parsed date part: (Y-M-D) {}", + strtime::format("%Y-%m-%d", &zoned).unwrap_or_default() + ); + eprintln!( + "date: parsed time part: {}", + strtime::format("%H:%M:%S", &zoned).unwrap_or_default() + ); + let tz_display = zoned.time_zone().iana_name().unwrap_or("system default"); + eprintln!("date: input timezone: {tz_display}"); + } return Ok(zoned); } - match parse_datetime::parse_datetime_at_date(now.clone(), s.as_ref()) { + match parse_datetime::parse_datetime_at_date(now.clone(), input_str) { // Convert to system timezone for display // (parse_datetime 0.13 returns Zoned in the input's timezone) - Ok(date) => Ok(date.timestamp().to_zoned(now.time_zone().clone())), - Err(e) => Err((s.as_ref().into(), e)), + Ok(date) => { + let result = date.timestamp().to_zoned(now.time_zone().clone()); + if dbg_opts.debug { + // Show final parsed date and time + eprintln!( + "date: parsed date part: (Y-M-D) {}", + strtime::format("%Y-%m-%d", &result).unwrap_or_default() + ); + eprintln!( + "date: parsed time part: {}", + strtime::format("%H:%M:%S", &result).unwrap_or_default() + ); + + // Show timezone information + eprintln!("date: input timezone: system default"); + + // Check if time component was specified, if not warn about midnight usage + // Only warn for date-only inputs (no time specified), but not for epoch formats (@N) + // or inputs that explicitly specify a time (containing ':') + if dbg_opts.warn_midnight && !input_str.contains(':') && !input_str.contains('@') { + // Input likely didn't specify a time, so midnight was assumed + let time_str = strtime::format("%H:%M:%S", &result).unwrap_or_default(); + if time_str == "00:00:00" { + eprintln!("date: warning: using midnight as starting time: 00:00:00"); + } + } + } + Ok(result) + } + Err(e) => Err((input_str.into(), e)), } } @@ -965,7 +1103,12 @@ mod tests { fn test_utc_conversion_preserves_offset() { let now = Zoned::now(); - let date = parse_date("Sat 20 Mar 2021 14:53:01 AWST", &now).unwrap(); + let date = parse_date( + "Sat 20 Mar 2021 14:53:01 AWST", + &now, + DebugOptions::new(false, false), + ) + .unwrap(); let utc = convert_for_set(date, true); assert_eq!((utc.hour(), utc.minute(), utc.second()), (6, 53, 1)); // AWST(+08:00) -> -8h } diff --git a/src/uu/date/src/format_modifiers.rs b/src/uu/date/src/format_modifiers.rs new file mode 100644 index 00000000000..c6a3d01c857 --- /dev/null +++ b/src/uu/date/src/format_modifiers.rs @@ -0,0 +1,569 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:ignore strtime + +//! GNU date format modifier support +//! +//! This module implements GNU-compatible format modifiers for date formatting. +//! These modifiers extend standard strftime format specifiers with optional +//! width and flag modifiers. +//! +//! ## Syntax +//! +//! Format: `%[flags][width]specifier` +//! +//! ### Flags +//! - `-`: Do not pad the field +//! - `_`: Pad with spaces instead of zeros +//! - `0`: Pad with zeros (default for numeric fields) +//! - `^`: Convert to uppercase +//! - `#`: Use opposite case (uppercase becomes lowercase and vice versa) +//! - `+`: Force display of sign (+ for positive, - for negative) +//! +//! ### Width +//! - One or more digits specifying minimum field width +//! - Field will be padded to this width using the padding character +//! +//! ### Examples +//! - `%10Y`: Year padded to 10 digits with zeros (0000001999) +//! - `%_10m`: Month padded to 10 digits with spaces ( 06) +//! - `%-d`: Day without padding (1 instead of 01) +//! - `%^B`: Month name in uppercase (JUNE) +//! - `%+4C`: Century with sign, padded to 4 characters (+019) + +use jiff::Zoned; +use jiff::fmt::strtime::{BrokenDownTime, Config, PosixCustom}; +use regex::Regex; +use std::fmt; +use std::sync::OnceLock; + +/// Error type for format modifier operations +#[derive(Debug)] +pub enum FormatError { + /// Error from the underlying jiff library + JiffError(jiff::Error), + /// Custom error message (reserved for future use) + #[allow(dead_code)] + Custom(String), +} + +impl fmt::Display for FormatError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::JiffError(e) => write!(f, "{e}"), + Self::Custom(s) => write!(f, "{s}"), + } + } +} + +impl From for FormatError { + fn from(e: jiff::Error) -> Self { + Self::JiffError(e) + } +} + +/// Regex to match format specifiers with optional modifiers +/// Pattern: % \[flags\] \[width\] specifier +/// Flags: -, _, 0, ^, #, + +/// Width: one or more digits +/// Specifier: any letter or special sequence like :z, ::z, :::z +fn format_spec_regex() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"%([_0^#+-]*)(\d*)(:*[a-zA-Z])").unwrap()) +} + +/// Check if format string contains any GNU modifiers and format if present. +/// +/// This function combines modifier detection and formatting in a single pass +/// for better performance. If no modifiers are found, returns None and the +/// caller should use standard formatting. If modifiers are found, returns +/// the formatted string. +pub fn format_with_modifiers_if_present( + date: &Zoned, + format_string: &str, + config: &Config, +) -> Option> { + let re = format_spec_regex(); + + // Quick check: does the string contain any modifiers? + let has_modifiers = re.captures_iter(format_string).any(|cap| { + let flags = cap.get(1).map_or("", |m| m.as_str()); + let width_str = cap.get(2).map_or("", |m| m.as_str()); + !flags.is_empty() || !width_str.is_empty() + }); + + if !has_modifiers { + return None; + } + + // If we have modifiers, format the string + Some(format_with_modifiers(date, format_string, config)) +} + +/// Process a format string with GNU modifiers. +/// +/// # Arguments +/// * `date` - The date to format +/// * `format_string` - Format string with GNU modifiers +/// * `config` - Strftime configuration +/// +/// # Returns +/// Formatted string with modifiers applied +/// +/// # Errors +/// Returns `FormatError` if formatting fails +fn format_with_modifiers( + date: &Zoned, + format_string: &str, + config: &Config, +) -> Result { + // First, replace %% with a placeholder to avoid matching it + let placeholder = "\x00PERCENT\x00"; + let temp_format = format_string.replace("%%", placeholder); + + let re = format_spec_regex(); + let mut result = String::new(); + let mut last_end = 0; + + let broken_down = BrokenDownTime::from(date); + + for cap in re.captures_iter(&temp_format) { + let whole_match = cap.get(0).unwrap(); + let flags = cap.get(1).map_or("", |m| m.as_str()); + let width_str = cap.get(2).map_or("", |m| m.as_str()); + let spec = cap.get(3).unwrap().as_str(); + + // Add text before this match + result.push_str(&temp_format[last_end..whole_match.start()]); + + // Format the base specifier first + let base_format = format!("%{spec}"); + let formatted = broken_down.to_string_with_config(config, &base_format)?; + + // Check if this specifier has modifiers + if !flags.is_empty() || !width_str.is_empty() { + // Apply modifiers to the formatted value + let width: usize = width_str.parse().unwrap_or(0); + let modified = apply_modifiers(&formatted, flags, width, spec); + result.push_str(&modified); + } else { + // No modifiers, use formatted value as-is + result.push_str(&formatted); + } + + last_end = whole_match.end(); + } + + // Add remaining text + result.push_str(&temp_format[last_end..]); + + // Restore %% by converting placeholder to % + let result = result.replace(placeholder, "%"); + + Ok(result) +} + +/// Returns true if the specifier produces text output (default pad is space) +/// rather than numeric output (default pad is zero). +fn is_text_specifier(specifier: &str) -> bool { + matches!( + specifier.chars().last(), + Some('A' | 'a' | 'B' | 'b' | 'h' | 'Z' | 'p' | 'P') + ) +} + +/// Strip default padding (leading zeros or leading spaces) from a value, +/// preserving at least one character. +fn strip_default_padding(value: &str) -> String { + if value.starts_with('0') && value.len() >= 2 { + let stripped = value.trim_start_matches('0'); + if stripped.is_empty() { + return "0".to_string(); + } + if let Some(first_char) = stripped.chars().next() { + if first_char.is_ascii_digit() { + return stripped.to_string(); + } + } + } + if value.starts_with(' ') { + let stripped = value.trim_start(); + if !stripped.is_empty() { + return stripped.to_string(); + } + } + value.to_string() +} + +/// Apply width and flag modifiers to a formatted value. +/// +/// The `specifier` parameter is the format specifier (e.g., "d", "B", "Y") +/// which determines the default padding character (space for text, zero for numeric). +/// Flags are processed in order so that when conflicting flags appear, +/// the last one takes precedence (e.g., `_+` means `+` wins for padding). +fn apply_modifiers(value: &str, flags: &str, width: usize, specifier: &str) -> String { + let mut result = value.to_string(); + + // Determine default pad character based on specifier type + let default_pad = if is_text_specifier(specifier) { + ' ' + } else { + '0' + }; + + // Process flags in order - last conflicting flag wins + let mut pad_char = default_pad; + let mut no_pad = false; + let mut uppercase = false; + let mut swap_case = false; + let mut force_sign = false; + + for flag in flags.chars() { + match flag { + '-' => { + no_pad = true; + } + '_' => { + no_pad = false; + pad_char = ' '; + } + '0' => { + no_pad = false; + pad_char = '0'; + } + '^' => { + uppercase = true; + swap_case = false; // ^ overrides # + } + '#' => { + if !uppercase { + // Only apply # if ^ hasn't been set + swap_case = true; + } + } + '+' => { + force_sign = true; + no_pad = false; + pad_char = '0'; + } + _ => {} + } + } + + // Apply case modifications (uppercase takes precedence over swap_case) + if uppercase { + result = result.to_uppercase(); + } else if swap_case { + if result + .chars() + .all(|c| !c.is_alphabetic() || c.is_uppercase()) + { + result = result.to_lowercase(); + } else { + result = result.to_uppercase(); + } + } + + // If no_pad flag is active, suppress all padding and return + if no_pad { + return strip_default_padding(&result); + } + + // Handle width smaller than result: strip default padding to fit + if width > 0 && width < result.len() { + return strip_default_padding(&result); + } + + // Strip leading zeros when switching to space padding on numeric fields + if pad_char == ' ' + && !is_text_specifier(specifier) + && result.starts_with('0') + && result.len() >= 2 + { + result = strip_default_padding(&result); + } + + // Apply force sign for numeric values + if force_sign && !result.starts_with('+') && !result.starts_with('-') { + if result.chars().next().is_some_and(|c| c.is_ascii_digit()) { + result.insert(0, '+'); + } + } + + // Apply width padding + if width > result.len() { + let padding = width - result.len(); + let has_sign = result.starts_with('+') || result.starts_with('-'); + + if pad_char == '0' && has_sign { + // Zero padding: sign first, then zeros (e.g., "-0022") + let sign = result.chars().next().unwrap(); + let rest = &result[1..]; + result = format!("{sign}{}{rest}", "0".repeat(padding)); + } else { + // Default: pad on the left (e.g., " -22" or " 1999") + result = format!("{}{result}", pad_char.to_string().repeat(padding)); + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use jiff::{civil, tz::TimeZone}; + + fn make_test_date(year: i16, month: i8, day: i8, hour: i8) -> Zoned { + civil::date(year, month, day) + .at(hour, 0, 0, 0) + .to_zoned(TimeZone::UTC) + .unwrap() + } + + fn get_config() -> Config { + Config::new().custom(PosixCustom::new()).lenient(true) + } + + #[test] + fn test_width_and_padding_modifiers() { + let date = make_test_date(1999, 6, 1, 0); + let config = get_config(); + + // Test basic width with zero padding + let result = format_with_modifiers(&date, "%10Y", &config).unwrap(); + assert_eq!(result, "0000001999"); + + // Test large width + let result = format_with_modifiers(&date, "%20Y", &config).unwrap(); + assert_eq!(result, "00000000000000001999"); + assert_eq!(result.len(), 20); + + // Test underscore (space) padding with month + let result = format_with_modifiers(&date, "%_10m", &config).unwrap(); + assert_eq!(result, " 6"); + assert_eq!(result.len(), 10); + + // Test underscore padding with day + let date_day5 = make_test_date(1999, 6, 5, 0); + let result = format_with_modifiers(&date_day5, "%_10d", &config).unwrap(); + assert_eq!(result, " 5"); + } + + #[test] + fn test_no_pad_and_case_flags() { + let date = make_test_date(1999, 6, 1, 0); + let config = get_config(); + + // Test no-pad: %-10Y suppresses all padding (width ignored) + let result = format_with_modifiers(&date, "%-10Y", &config).unwrap(); + assert_eq!(result, "1999"); + + // Test no-pad: %-d strips default zero padding + let result = format_with_modifiers(&date, "%-d", &config).unwrap(); + assert_eq!(result, "1"); + + // Test uppercase: %^B should uppercase month name + let result = format_with_modifiers(&date, "%^B", &config).unwrap(); + assert_eq!(result, "JUNE"); + + // Test uppercase with width: %^10B should uppercase and space-pad (text specifier) + let result = format_with_modifiers(&date, "%^10B", &config).unwrap(); + assert_eq!(result, " JUNE"); + assert_eq!(result.len(), 10); + } + + #[test] + fn test_sign_flags() { + let date = make_test_date(1970, 1, 1, 0); + let config = get_config(); + + // Test force sign with century: %+4C + let result = format_with_modifiers(&date, "%+4C", &config).unwrap(); + assert!(result.starts_with('+')); + assert_eq!(result.len(), 4); + + // Test force sign with zero padding: %+6Y + let result = format_with_modifiers(&date, "%+6Y", &config).unwrap(); + assert_eq!(result, "+01970"); + } + + #[test] + fn test_combined_flags_underscore_and_sign() { + let date = make_test_date(1970, 1, 1, 0); + let config = get_config(); + // %_+6Y: _ sets space pad, then + overrides to zero pad with sign (last wins) + let result = format_with_modifiers(&date, "%_+6Y", &config).unwrap(); + assert_eq!(result, "+01970"); + } + + #[test] + fn test_combined_flags_no_pad_and_uppercase() { + let date = make_test_date(1999, 6, 1, 0); + let config = get_config(); + // %-^10B: uppercase + no-pad (- suppresses all padding, width ignored) + let result = format_with_modifiers(&date, "%-^10B", &config).unwrap(); + assert_eq!(result, "JUNE"); + } + + #[test] + fn test_swap_case_flag() { + let date = make_test_date(1999, 6, 1, 0); + let config = get_config(); + // %#B: swap case on "June" (mixed case) → uppercase + let result = format_with_modifiers(&date, "%#B", &config).unwrap(); + assert_eq!(result, "JUNE"); + } + + #[test] + fn test_width_smaller_than_result() { + let date = make_test_date(1999, 6, 1, 0); + let config = get_config(); + // %1d: width 1 < "01".len() → strip zero padding → "1" + let result = format_with_modifiers(&date, "%1d", &config).unwrap(); + assert_eq!(result, "1"); + } + + #[test] + fn test_edge_cases_and_special_formats() { + let date = make_test_date(1999, 6, 1, 0); + let config = get_config(); + + // Test width zero (no effect) + let result = format_with_modifiers(&date, "%Y", &config).unwrap(); + assert_eq!(result, "1999"); + + // Test no modifiers (standard format) + let result = format_with_modifiers(&date, "%Y-%m-%d", &config).unwrap(); + assert_eq!(result, "1999-06-01"); + + // Test %% escape sequence + let result = format_with_modifiers(&date, "%%Y=%Y", &config).unwrap(); + assert_eq!(result, "%Y=1999"); + + // Test multiple modifiers in one format string + // %-5d: no-pad suppresses all padding → "1" (width ignored) + let result = format_with_modifiers(&date, "%10Y-%_5m-%-5d", &config).unwrap(); + assert_eq!(result, "0000001999- 6-1"); + } + + #[test] + fn test_modifier_detection() { + let date = make_test_date(1999, 6, 1, 0); + let config = get_config(); + + // Should detect modifiers + let result = format_with_modifiers_if_present(&date, "%10Y", &config); + assert!(result.is_some()); + + // Should not detect modifiers + let result = format_with_modifiers_if_present(&date, "%Y-%m-%d", &config); + assert!(result.is_none()); + + // Should detect flag without width + let result = format_with_modifiers_if_present(&date, "%^B", &config); + assert!(result.is_some()); + } + + #[test] + fn test_negative_values_with_space_padding() { + // Test case from GNU test: neg-secs2 + // Format: %_5s with value -22 should produce " -22" (space-padded) + use jiff::Timestamp; + + let ts = Timestamp::from_second(-22).unwrap(); + let date = ts.to_zoned(TimeZone::UTC); + let config = get_config(); + + let result = format_with_modifiers(&date, "%_5s", &config).unwrap(); + assert_eq!( + result, " -22", + "Space padding should pad before the sign for negative numbers" + ); + } + + // Unit tests for apply_modifiers function + #[test] + fn test_apply_modifiers_basic() { + // No modifiers (numeric specifier) + assert_eq!(apply_modifiers("1999", "", 0, "Y"), "1999"); + // Zero padding + assert_eq!(apply_modifiers("1999", "0", 10, "Y"), "0000001999"); + // Space padding (strips leading zeros) + assert_eq!(apply_modifiers("06", "_", 5, "m"), " 6"); + // No-pad (strips leading zeros, width ignored) + assert_eq!(apply_modifiers("01", "-", 5, "d"), "1"); + // Uppercase + assert_eq!(apply_modifiers("june", "^", 0, "B"), "JUNE"); + // Swap case: all uppercase → lowercase + assert_eq!(apply_modifiers("UTC", "#", 0, "Z"), "utc"); + // Swap case: mixed case → uppercase + assert_eq!(apply_modifiers("June", "#", 0, "B"), "JUNE"); + } + + #[test] + fn test_apply_modifiers_signs() { + // Force sign + assert_eq!(apply_modifiers("1970", "+", 6, "Y"), "+01970"); + // Negative with zero padding: sign first, then zeros + assert_eq!(apply_modifiers("-22", "0", 5, "s"), "-0022"); + // Negative with space padding: spaces first, then sign + assert_eq!(apply_modifiers("-22", "_", 5, "s"), " -22"); + // Force sign (_+): + is last, overrides _ → zero pad with sign + assert_eq!(apply_modifiers("5", "_+", 5, "s"), "+0005"); + // No-pad + uppercase: no padding applied + assert_eq!(apply_modifiers("june", "-^", 10, "B"), "JUNE"); + } + + #[test] + fn test_case_flag_precedence() { + // Test that ^ (uppercase) overrides # (swap case) + assert_eq!(apply_modifiers("June", "^#", 0, "B"), "JUNE"); + assert_eq!(apply_modifiers("June", "#^", 0, "B"), "JUNE"); + // Test # alone (swap case) + assert_eq!(apply_modifiers("June", "#", 0, "B"), "JUNE"); + assert_eq!(apply_modifiers("JUNE", "#", 0, "B"), "june"); + } + + #[test] + fn test_apply_modifiers_text_specifiers() { + // Text specifiers default to space padding + assert_eq!(apply_modifiers("June", "", 10, "B"), " June"); + assert_eq!(apply_modifiers("Mon", "", 10, "a"), " Mon"); + // Numeric specifiers default to zero padding + assert_eq!(apply_modifiers("6", "", 10, "m"), "0000000006"); + } + + #[test] + fn test_apply_modifiers_width_smaller_than_result() { + // Width smaller than result strips default padding + assert_eq!(apply_modifiers("01", "", 1, "d"), "1"); + assert_eq!(apply_modifiers("06", "", 1, "m"), "6"); + } + + #[test] + fn test_apply_modifiers_parametrized() { + let test_cases = vec![ + ("1", "0", 3, "Y", "001"), + ("1", "_", 3, "d", " 1"), + ("1", "-", 3, "d", "1"), // no-pad: width ignored + ("abc", "^", 5, "B", " ABC"), // text specifier: space pad + ("5", "+", 4, "s", "+005"), + ("5", "_+", 4, "s", "+005"), // + is last: zero pad with sign + ("-3", "0", 5, "s", "-0003"), + ("05", "_", 3, "d", " 5"), + ("09", "-", 4, "d", "9"), // no-pad: width ignored + ("1970", "_+", 6, "Y", "+01970"), // + is last: zero pad with sign + ]; + + for (value, flags, width, spec, expected) in test_cases { + assert_eq!( + apply_modifiers(value, flags, width, spec), + expected, + "value='{value}', flags='{flags}', width={width}, spec='{spec}'", + ); + } + } +} diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index ffbb4a2bea9..e037e83d309 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -2171,3 +2171,410 @@ fn test_date_cross_tz_mishandled() { .stdout_contains("21:00:00") .stdout_contains("1969"); } + +// Tests for GNU test invalid-high-bit-set: invalid UTF-8 in date string +#[test] +#[cfg(unix)] +fn test_date_invalid_high_bit_set() { + use std::os::unix::ffi::OsStrExt; + + // GNU test invalid-high-bit-set: Invalid UTF-8 byte (0xb0) should produce + // GNU-compatible error message with octal escape sequence + let invalid_bytes = b"\xb0"; + let invalid_arg = std::ffi::OsStr::from_bytes(invalid_bytes); + + new_ucmd!() + .args(&[std::ffi::OsStr::new("-d"), invalid_arg]) + .fails() + .code_is(1) + .stderr_contains("invalid date '\\260'"); +} + +// Tests for GNU format modifiers +#[test] +fn test_date_format_modifier_width() { + // Test width modifier: %10Y should pad year to 10 digits + new_ucmd!() + .env("TZ", "UTC") + .args(&["-d", "1999-06-01", "+%10Y"]) + .succeeds() + .stdout_is("0000001999\n"); +} + +#[test] +fn test_date_format_modifier_underscore_padding() { + // Test underscore flag: %_10m should pad month with spaces + new_ucmd!() + .env("TZ", "UTC") + .args(&["-d", "1999-06-01", "+%_10m"]) + .succeeds() + .stdout_is(" 6\n"); +} + +#[test] +fn test_date_format_modifier_no_pad() { + // Test no-pad flag: %-10Y suppresses all padding (width ignored) + new_ucmd!() + .env("TZ", "UTC") + .args(&["-d", "1999-06-01", "+%-10Y"]) + .succeeds() + .stdout_is("1999\n"); + + // Test no-pad on day: %-d strips default zero padding + new_ucmd!() + .env("TZ", "UTC") + .args(&["-d", "1999-06-01", "+%-d"]) + .succeeds() + .stdout_is("1\n"); +} + +#[test] +fn test_date_format_modifier_uppercase() { + // Test uppercase flag: %^B should uppercase month name + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%^B"]) + .succeeds() + .stdout_is("JUNE\n"); +} + +#[test] +fn test_date_format_modifier_force_sign() { + // Test force sign flag: %+6Y should show + sign for positive years + new_ucmd!() + .env("TZ", "UTC") + .args(&["-d", "1970-01-01", "+%+6Y"]) + .succeeds() + .stdout_is("+01970\n"); +} + +#[test] +fn test_date_format_modifier_combined_flags() { + // Test combined flags: %-^10B should uppercase, no-pad suppresses all padding + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%-^10B"]) + .succeeds() + .stdout_is("JUNE\n"); +} + +#[test] +fn test_date_format_modifier_case_precedence() { + // Test that ^ (uppercase) takes precedence over # (swap case) regardless of order + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%^#B"]) + .succeeds() + .stdout_is("JUNE\n"); + + new_ucmd!() + .env("TZ", "UTC") + .env("LC_ALL", "C") + .args(&["-d", "1999-06-01", "+%#^B"]) + .succeeds() + .stdout_is("JUNE\n"); +} + +#[test] +fn test_date_format_modifier_multiple() { + // Test multiple modifiers in one format string + // %-5d: no-pad suppresses all padding → "1" + new_ucmd!() + .env("TZ", "UTC") + .args(&["-d", "1999-06-01", "+%10Y-%_5m-%-5d"]) + .succeeds() + .stdout_is("0000001999- 6-1\n"); +} + +#[test] +fn test_date_format_modifier_percent_escape() { + // Test that %% is preserved correctly with modifiers + new_ucmd!() + .env("TZ", "UTC") + .args(&["-d", "1999-06-01", "+%%Y=%10Y"]) + .succeeds() + .stdout_is("%Y=0000001999\n"); +} + +// Tests for --debug flag +#[test] +fn test_date_debug_basic() { + // Test that --debug outputs to stderr, not stdout + let result = new_ucmd!() + .env("TZ", "UTC") + .args(&["--debug", "-d", "2005-01-01", "+%Y"]) + .succeeds(); + + // Stdout should contain only the formatted date + assert_eq!(result.stdout_str().trim(), "2005"); + + // Stderr should contain debug information + let stderr = result.stderr_str(); + assert!(stderr.contains("date: input string:")); + assert!(stderr.contains("date: parsed date part:")); + assert!(stderr.contains("date: parsed time part:")); + assert!(stderr.contains("date: input timezone:")); +} + +#[test] +fn test_date_debug_various_formats() { + // Test debug mode with various date formats and expected output + let test_cases = [ + // (input, format, expected_stdout_contains, expected_stderr_contains, stderr_not_contains, check_input_string) + ( + "2005-01-01 +345 day", + "+%Y-%m-%d", + "2005-12-12", + "date: parsed date part: (Y-M-D) 2005-12-12", + "", + true, + ), + ( + "@0", + "+%Y-%m-%d", + "1970-01-01", + "date: parsed date part: (Y-M-D) 1970-01-01", + "warning: using midnight", + true, + ), + ( + "@-22", + "+%s", + "-22", + "date: parsed date part: (Y-M-D) 1969-12-31", + "", + true, + ), + ( + "2021-03-20 14:53:01 EST", + "+%Y-%m-%d", + "2021-03-20", + "date: parsed date part: (Y-M-D) 2021-03-20", + "", + true, + ), + ( + "m9", + "+%T", + "21:00:00", + "date: parsed time part:", + "", + false, + ), // Military TZ is composed before parsing + ( + " ", + "+%T", + "00:00:00", + "date: warning: using midnight", + "", + false, + ), // Whitespace is composed + ( + "1 day ago", + "+%Y-%m-%d", + "", + "date: parsed date part: (Y-M-D)", + "", + true, + ), + ]; + + for ( + input, + format, + stdout_contains, + stderr_contains, + stderr_not_contains, + check_input_string, + ) in test_cases + { + let result = new_ucmd!() + .env("TZ", "UTC") + .args(&["--debug", "-d", input, format]) + .succeeds(); + + if !stdout_contains.is_empty() { + assert!( + result.stdout_str().contains(stdout_contains), + "For input '{input}': stdout should contain '{stdout_contains}', got: {}", + result.stdout_str() + ); + } + + let stderr = result.stderr_str(); + assert!( + stderr.contains(stderr_contains), + "For input '{input}': stderr should contain '{stderr_contains}'" + ); + + if check_input_string { + assert!( + stderr.contains(&format!("date: input string: {input}")), + "For input '{input}': stderr should contain input string" + ); + } else { + // Just check that there is some input string + assert!( + stderr.contains("date: input string:"), + "For input '{input}': stderr should contain some input string" + ); + } + + if !stderr_not_contains.is_empty() { + assert!( + !stderr.contains(stderr_not_contains), + "For input '{input}': stderr should not contain '{stderr_not_contains}'" + ); + } + } +} + +#[test] +fn test_date_debug_midnight_warnings() { + // Test midnight warning behavior with various inputs + let test_cases = [ + // (input, format, should_warn) + ("2005-01-01", "+%Y", true), // No time specified + ("1997-01-19 08:17:48 +0", "+%Y-%m-%d", false), // Time specified + ("@0", "+%Y-%m-%d", false), // Epoch format + (" ", "+%T", true), // Whitespace (defaults to midnight) + ]; + + for (input, format, should_warn) in test_cases { + let result = new_ucmd!() + .env("TZ", "UTC") + .args(&["--debug", "-d", input, format]) + .succeeds(); + + let stderr = result.stderr_str(); + if should_warn { + assert!( + stderr.contains("date: warning: using midnight"), + "Input '{input}' should produce midnight warning" + ); + } else { + assert!( + !stderr.contains("warning: using midnight"), + "Input '{input}' should not produce midnight warning" + ); + } + } +} + +#[test] +fn test_date_debug_without_flag() { + // Test that without --debug, no debug output appears + let result = new_ucmd!() + .env("TZ", "UTC") + .args(&["-d", "2005-01-01", "+%Y"]) + .succeeds(); + + let stderr = result.stderr_str(); + assert!(!stderr.contains("date: input string:")); + assert!(!stderr.contains("date: parsed date part:")); +} + +#[test] +fn test_date_debug_with_multiple_inputs() { + // Test debug mode with file and stdin input (multiple dates) + let (at, mut ucmd) = at_and_ucmd!(); + let file = "debug_test_file"; + at.write(file, "2005-01-01\n2006-02-02\n"); + + let result = ucmd + .env("TZ", "UTC") + .args(&["--debug", "-f", file, "+%Y"]) + .succeeds(); + + assert_eq!(result.stdout_str(), "2005\n2006\n"); + + let stderr = result.stderr_str(); + // Should show debug output for both lines + assert!(stderr.contains("date: input string: 2005-01-01")); + assert!(stderr.contains("date: input string: 2006-02-02")); + assert!(stderr.contains("date: parsed date part: (Y-M-D) 2005-01-01")); + assert!(stderr.contains("date: parsed date part: (Y-M-D) 2006-02-02")); + + // Test with stdin + let result = new_ucmd!() + .env("TZ", "UTC") + .args(&["--debug", "-f", "-", "+%Y"]) + .pipe_in("2005-01-01\n2006-02-02\n") + .succeeds(); + + assert_eq!(result.stdout_str(), "2005\n2006\n"); + let stderr = result.stderr_str(); + assert!(stderr.contains("date: input string: 2005-01-01")); + assert!(stderr.contains("date: input string: 2006-02-02")); +} + +#[test] +fn test_date_debug_with_flags() { + // Test debug mode combined with other flags and exit codes + let test_cases = [ + // (args, should_succeed, stdout_contains, stderr_contains) + ( + vec!["--debug", "-d", "2005-01-01", "+%Y"], + true, + "2005", + "date: input string:", + ), + ( + vec!["--debug", "-u", "-d", "2005-01-01", "+%Y-%m-%d %Z"], + true, + "UTC", + "date: parsed date part:", + ), + ( + vec!["--debug", "-R", "-d", "2005-01-01"], + true, + "Sat, 01 Jan 2005", + "date: input string:", + ), + ( + vec!["--debug", "-d", "invalid", "+%Y"], + false, + "", + "invalid date", + ), + ]; + + for (args, should_succeed, stdout_contains, stderr_contains) in test_cases { + let mut cmd = new_ucmd!(); + cmd.env("TZ", "UTC").args(&args); + + if should_succeed { + let result = cmd.succeeds(); + assert!( + result.stdout_str().contains(stdout_contains), + "Args {args:?}: stdout should contain '{stdout_contains}'" + ); + assert!( + result.stderr_str().contains(stderr_contains), + "Args {args:?}: stderr should contain '{stderr_contains}'" + ); + } else { + let result = cmd.fails(); + assert!( + result.stderr_str().contains(stderr_contains), + "Args {args:?}: stderr should contain '{stderr_contains}'" + ); + } + } +} + +#[test] +fn test_date_debug_current_time() { + // Test that debug mode without -d doesn't produce debug output (no parsing) + let result = new_ucmd!() + .env("TZ", "UTC") + .args(&["--debug", "+%Y"]) + .succeeds(); + + let stderr = result.stderr_str(); + // No parsing happens for "now", so no debug output + assert_eq!(stderr, ""); +}