diff --git a/docs/content/docs/(messaging)/email-setup.mdx b/docs/content/docs/(messaging)/email-setup.mdx index 1e3dc6769..ab9592312 100644 --- a/docs/content/docs/(messaging)/email-setup.mdx +++ b/docs/content/docs/(messaging)/email-setup.mdx @@ -61,6 +61,7 @@ from_name = "Spacebot" poll_interval_secs = 30 folders = ["INBOX"] allowed_senders = [] +sync_max_age_days = 0 # 0 = no limit; cap backfill at N days of unread mail (IMAP SINCE = inclusive midnight boundary) ``` Credentials support `env:VAR_NAME` references. @@ -142,6 +143,29 @@ poll_interval_secs = 30 Use a longer interval if your provider rate limits IMAP polling. +## Limiting backfill on first connect + +By default, the first poll after connecting an account imports every unread email in the configured folders. On an inbox with years of unread mail this floods the agent with stale messages it shouldn't respond to. + +Set `sync_max_age_days` to cap how far back the poller looks. The IMAP server filters server-side, so the check is cheap even on large mailboxes. + +```toml +[messaging.email] +sync_max_age_days = 1 # cap backfill at one day of unread mail +``` + +The same knob works on named instances: + +```toml +[[messaging.email.instances]] +name = "support" +sync_max_age_days = 7 # cap backfill at one week of unread support mail +``` + +Default is `0` (no limit), which preserves the original behavior. + +**Semantics.** IMAP `SINCE` (RFC 3501 §6.4.4) is inclusive and matches against whole dates at midnight in the server's local time, not rolling 24-hour windows. `sync_max_age_days = 1` therefore bounds the *oldest* mail the poller will import to "received on or after yesterday's date", which can include mail up to ~48 hours old depending on the current time and the server's timezone. Treat the value as a backfill cap, not a literal "last N hours" window: a smaller value is stricter (imports less old mail) and a larger value is more lenient, with an effective floor of roughly N×24h. Round up if you'd rather over-fetch than miss mail near the date boundary. + ## Verify it's working 1. Send an email to the configured mailbox from an allowed sender. diff --git a/src/config.rs b/src/config.rs index 7a4c1e3f0..a54549d3a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1749,6 +1749,7 @@ id = "main" allowed_senders: vec![], max_body_bytes: 1_000_000, max_attachment_bytes: 10_000_000, + sync_max_age_days: 0, instances: vec![], }), webhook: None, diff --git a/src/config/load.rs b/src/config/load.rs index b4c4e5df5..9e7ffb6cb 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -2210,6 +2210,7 @@ impl Config { allowed_senders: instance.allowed_senders, max_body_bytes: instance.max_body_bytes, max_attachment_bytes: instance.max_attachment_bytes, + sync_max_age_days: instance.sync_max_age_days, } }) .collect::>(); @@ -2280,6 +2281,7 @@ impl Config { allowed_senders: email.allowed_senders, max_body_bytes: email.max_body_bytes, max_attachment_bytes: email.max_attachment_bytes, + sync_max_age_days: email.sync_max_age_days, instances, }) }), diff --git a/src/config/toml_schema.rs b/src/config/toml_schema.rs index b336ad354..8c56208ef 100644 --- a/src/config/toml_schema.rs +++ b/src/config/toml_schema.rs @@ -647,6 +647,8 @@ pub(super) struct TomlEmailConfig { #[serde(default = "default_email_max_attachment_bytes")] pub(super) max_attachment_bytes: usize, #[serde(default)] + pub(super) sync_max_age_days: u64, + #[serde(default)] pub(super) instances: Vec, } @@ -681,6 +683,8 @@ pub(super) struct TomlEmailInstanceConfig { pub(super) max_body_bytes: usize, #[serde(default = "default_email_max_attachment_bytes")] pub(super) max_attachment_bytes: usize, + #[serde(default)] + pub(super) sync_max_age_days: u64, } #[derive(Deserialize)] diff --git a/src/config/types.rs b/src/config/types.rs index 65d646c8f..f05bc7c5b 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -2721,6 +2721,7 @@ pub struct EmailConfig { pub allowed_senders: Vec, pub max_body_bytes: usize, pub max_attachment_bytes: usize, + pub sync_max_age_days: u64, pub instances: Vec, } @@ -2746,6 +2747,7 @@ pub struct EmailInstanceConfig { pub allowed_senders: Vec, pub max_body_bytes: usize, pub max_attachment_bytes: usize, + pub sync_max_age_days: u64, } impl std::fmt::Debug for EmailInstanceConfig { @@ -2770,6 +2772,7 @@ impl std::fmt::Debug for EmailInstanceConfig { .field("allowed_senders", &"[REDACTED]") .field("max_body_bytes", &self.max_body_bytes) .field("max_attachment_bytes", &self.max_attachment_bytes) + .field("sync_max_age_days", &self.sync_max_age_days) .finish() } } @@ -2795,6 +2798,7 @@ impl std::fmt::Debug for EmailConfig { .field("allowed_senders", &"[REDACTED]") .field("max_body_bytes", &self.max_body_bytes) .field("max_attachment_bytes", &self.max_attachment_bytes) + .field("sync_max_age_days", &self.sync_max_age_days) .finish() } } diff --git a/src/messaging/email.rs b/src/messaging/email.rs index 884dab226..06ff993a8 100644 --- a/src/messaging/email.rs +++ b/src/messaging/email.rs @@ -90,6 +90,7 @@ struct EmailPollConfig { poll_interval: Duration, allowed_senders: Vec, max_body_bytes: usize, + sync_max_age_days: u64, runtime_key: String, } @@ -142,6 +143,7 @@ pub struct EmailAdapter { allowed_senders: Vec, max_body_bytes: usize, max_attachment_bytes: usize, + sync_max_age_days: u64, smtp_transport: AsyncSmtpTransport, shutdown_tx: Arc>>>, poll_task: Arc>>>, @@ -199,6 +201,7 @@ impl EmailAdapter { allowed_senders: config.allowed_senders.clone(), max_body_bytes: config.max_body_bytes, max_attachment_bytes: config.max_attachment_bytes, + sync_max_age_days: config.sync_max_age_days, instances: Vec::new(), }; Self::build(runtime_key.into(), &email_config) @@ -238,6 +241,7 @@ impl EmailAdapter { allowed_senders: config.allowed_senders.clone(), max_body_bytes: config.max_body_bytes.max(1024), max_attachment_bytes: config.max_attachment_bytes.max(1024), + sync_max_age_days: config.sync_max_age_days, smtp_transport, shutdown_tx: Arc::new(RwLock::new(None)), poll_task: Arc::new(RwLock::new(None)), @@ -257,6 +261,7 @@ impl EmailAdapter { poll_interval: self.poll_interval, allowed_senders: self.allowed_senders.clone(), max_body_bytes: self.max_body_bytes, + sync_max_age_days: self.sync_max_age_days, runtime_key: self.runtime_key.clone(), } } @@ -713,8 +718,14 @@ fn poll_inbox_once(config: &EmailPollConfig) -> anyhow::Result String { clauses.push(format!("TEXT {}", quote_imap_search_value(&text))); } - if let Some(since_days) = query.since_days.filter(|days| *days > 0) { - let since_date = (Utc::now() - ChronoDuration::days(since_days as i64)) - .format("%d-%b-%Y") - .to_string(); + if let Some(since_date) = build_since_date(query.since_days) { clauses.push(format!("SINCE {since_date}")); } @@ -1394,6 +1403,50 @@ fn build_imap_search_criterion(query: &EmailSearchQuery) -> String { } } +/// Compute a `dd-MMM-YYYY` IMAP SINCE date for the given day count, or +/// `None` if the count is zero / missing / larger than `MAX_SINCE_DAYS`. +/// +/// `MAX_SINCE_DAYS` (~2739 years) is well past any realistic config value +/// and stays inside chrono's `TimeDelta` bounds. Without this clamp a +/// sufficiently large input would panic during `Utc::now() - Duration::days(n)`. +/// +/// Shared between the poll path (`poll_inbox_once`) and the search path +/// (`build_imap_search_criterion`) so the date format and the overflow +/// guard stay in lockstep. +fn build_since_date(days: Option) -> Option { + let days = days.filter(|d| *d > 0 && *d <= MAX_SINCE_DAYS)?; + let days_i64 = i64::from(days); + Some( + (Utc::now() - ChronoDuration::days(days_i64)) + .format("%d-%b-%Y") + .to_string(), + ) +} + +/// Build the IMAP search query for a poll cycle. +/// +/// Returns `"UNSEEN"` when `sync_max_age_days` is zero (no limit) or +/// outside a safe range; returns `"UNSEEN SINCE "` otherwise. +/// +/// The IMAP `SINCE` filter operates on whole dates with an inclusive, +/// midnight-anchored boundary (RFC 3501 §6.4.4). That means +/// `sync_max_age_days = 1` can include mail up to ~48h old depending on +/// the current local time and the server's date, not strictly the last +/// 24h. Document and name the field as a *backfill cap*, not a literal +/// time window. +fn build_poll_search_query(sync_max_age_days: u64) -> String { + let since_days = u32::try_from(sync_max_age_days).ok(); + match build_since_date(since_days) { + Some(date) => format!("UNSEEN SINCE {date}"), + None => "UNSEEN".to_string(), + } +} + +/// Maximum day count accepted by `build_since_date`. 1_000_000 days is +/// ~2739 years, far past anything a user would type in TOML, and small +/// enough to stay inside chrono's internal `TimeDelta` range. +const MAX_SINCE_DAYS: u32 = 1_000_000; + fn sanitize_imap_search_value(value: Option<&str>) -> Option { let value = value?.trim(); if value.is_empty() { @@ -1755,9 +1808,10 @@ struct EmailReplyContext { #[cfg(test)] mod tests { use super::{ - EmailSearchHit, EmailSearchQuery, build_imap_search_criterion, derive_thread_key, - extract_message_ids, is_local_mail_host, normalize_email_target, normalize_reply_subject, - normalize_search_folders, parse_primary_mailbox, sort_and_limit_search_hits, + EmailSearchHit, EmailSearchQuery, build_imap_search_criterion, build_poll_search_query, + build_since_date, derive_thread_key, extract_message_ids, is_local_mail_host, + normalize_email_target, normalize_reply_subject, normalize_search_folders, + parse_primary_mailbox, sort_and_limit_search_hits, }; #[test] @@ -1862,6 +1916,75 @@ mod tests { assert!(criterion.contains("TEXT \"release \\\\\\\"candidate\\\\\\\"\"")); } + #[test] + fn build_since_date_returns_none_for_zero_and_missing() { + assert_eq!(build_since_date(None), None); + assert_eq!(build_since_date(Some(0)), None); + } + + #[test] + fn build_poll_search_query_returns_unseen_for_zero() { + // No backfill cap means we use the original `UNSEEN` query exactly. + assert_eq!(build_poll_search_query(0), "UNSEEN"); + } + + #[test] + fn build_poll_search_query_appends_since_for_nonzero() { + // A non-zero cap composes `UNSEEN SINCE `. The exact date + // depends on the local clock; only assert the structure here. + let query = build_poll_search_query(7); + assert!(query.starts_with("UNSEEN SINCE "), "got {query:?}"); + // The date suffix is 11 chars (dd-MMM-YYYY). + assert_eq!(query.len(), "UNSEEN SINCE ".len() + 11, "got {query:?}"); + } + + #[test] + fn build_poll_search_query_degrades_to_unseen_for_absurd_inputs() { + // u64::MAX is way past MAX_SINCE_DAYS. The helper must not panic and + // must not produce a future-dated query (which would silently exclude + // every message). Falling back to plain `UNSEEN` is the safe behavior. + assert_eq!(build_poll_search_query(u64::MAX), "UNSEEN"); + } + + #[test] + fn build_since_date_emits_imap_date_format() { + // The IMAP SINCE clause requires dd-MMM-YYYY with a 4-digit year + // (RFC 3501 §6.4.4). Verify length and dash positions. + let date = build_since_date(Some(1)).expect("non-zero days should produce a date"); + assert_eq!(date.len(), 11, "expected dd-MMM-YYYY (got {date:?})"); + let bytes = date.as_bytes(); + assert!( + bytes[2] == b'-' && bytes[6] == b'-', + "expected dashes at idx 2 and 6 (got {date:?})" + ); + assert!( + bytes[7..].iter().all(|b| b.is_ascii_digit()), + "year must be ASCII digits (got {date:?})" + ); + } + + #[test] + fn build_since_date_handles_large_day_counts_without_overflow() { + // A year is ~365 days; 1_000 days is ~3 years. Should produce a + // well-formed date in the past. + let date = build_since_date(Some(1_000)).expect("1000 days should produce a date"); + let year: u32 = date[7..].parse().expect("year must parse as u32"); + assert!( + (1900..=2100).contains(&year), + "expected a sane past year, got {date:?}" + ); + + // Chrono's TimeDelta is internally bounded — past a few million days + // the `Utc::now() - ChronoDuration::days(n)` call panics. We accept + // those values by returning None (i.e. degrade to "no SINCE clause" + // rather than crashing the poll task). + assert_eq!( + build_since_date(Some(100_000_000)), + None, + "absurdly large day counts must not produce a date" + ); + } + #[test] fn normalize_search_folders_falls_back_to_inbox() { let folders = normalize_search_folders(&[], &[]);