From 3b808556fcd425175c0d2e53239f0a5c293520d8 Mon Sep 17 00:00:00 2001 From: Connor Black Date: Mon, 6 Apr 2026 15:31:45 -0400 Subject: [PATCH 1/4] add sync_max_age_days to email adapter When connecting an email account for the first time, every unread email in the inbox gets imported and treated as new. This floods the agent with stale messages it shouldn't respond to. Add a `sync_max_age_days` config option (default: 0 / no limit) that combines IMAP's UNSEEN flag with a SINCE date filter so only recent unread emails are imported. The SINCE query is evaluated server-side by the IMAP server, so it's efficient even on large mailboxes. Supported on both the default email config and per-instance configs. --- src/config.rs | 1 + src/config/load.rs | 2 ++ src/config/toml_schema.rs | 4 ++++ src/config/types.rs | 2 ++ src/messaging/email.rs | 19 ++++++++++++++++++- 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index eb0891a9a..04677cef0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1706,6 +1706,7 @@ maintenance_merge_similarity_threshold = 1.1 allowed_senders: vec![], max_body_bytes: 1_000_000, max_attachment_bytes: 10_000_000, + sync_max_age_days: 0, instances: vec![], }), webhook: None, diff --git a/src/config/load.rs b/src/config/load.rs index 1e6997515..c0fd56d01 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -2161,6 +2161,7 @@ impl Config { allowed_senders: instance.allowed_senders, max_body_bytes: instance.max_body_bytes, max_attachment_bytes: instance.max_attachment_bytes, + sync_max_age_days: instance.sync_max_age_days, } }) .collect::>(); @@ -2231,6 +2232,7 @@ impl Config { allowed_senders: email.allowed_senders, max_body_bytes: email.max_body_bytes, max_attachment_bytes: email.max_attachment_bytes, + sync_max_age_days: email.sync_max_age_days, instances, }) }), diff --git a/src/config/toml_schema.rs b/src/config/toml_schema.rs index 5ab153d86..9f8f7b447 100644 --- a/src/config/toml_schema.rs +++ b/src/config/toml_schema.rs @@ -627,6 +627,8 @@ pub(super) struct TomlEmailConfig { #[serde(default = "default_email_max_attachment_bytes")] pub(super) max_attachment_bytes: usize, #[serde(default)] + pub(super) sync_max_age_days: u64, + #[serde(default)] pub(super) instances: Vec, } @@ -661,6 +663,8 @@ pub(super) struct TomlEmailInstanceConfig { pub(super) max_body_bytes: usize, #[serde(default = "default_email_max_attachment_bytes")] pub(super) max_attachment_bytes: usize, + #[serde(default)] + pub(super) sync_max_age_days: u64, } #[derive(Deserialize)] diff --git a/src/config/types.rs b/src/config/types.rs index 1cbd07501..5e508fd06 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -2600,6 +2600,7 @@ pub struct EmailConfig { pub allowed_senders: Vec, pub max_body_bytes: usize, pub max_attachment_bytes: usize, + pub sync_max_age_days: u64, pub instances: Vec, } @@ -2625,6 +2626,7 @@ pub struct EmailInstanceConfig { pub allowed_senders: Vec, pub max_body_bytes: usize, pub max_attachment_bytes: usize, + pub sync_max_age_days: u64, } impl std::fmt::Debug for EmailInstanceConfig { diff --git a/src/messaging/email.rs b/src/messaging/email.rs index 884dab226..1c12237d8 100644 --- a/src/messaging/email.rs +++ b/src/messaging/email.rs @@ -90,6 +90,7 @@ struct EmailPollConfig { poll_interval: Duration, allowed_senders: Vec, max_body_bytes: usize, + sync_max_age_days: u64, runtime_key: String, } @@ -142,6 +143,7 @@ pub struct EmailAdapter { allowed_senders: Vec, max_body_bytes: usize, max_attachment_bytes: usize, + sync_max_age_days: u64, smtp_transport: AsyncSmtpTransport, shutdown_tx: Arc>>>, poll_task: Arc>>>, @@ -199,6 +201,7 @@ impl EmailAdapter { allowed_senders: config.allowed_senders.clone(), max_body_bytes: config.max_body_bytes, max_attachment_bytes: config.max_attachment_bytes, + sync_max_age_days: config.sync_max_age_days, instances: Vec::new(), }; Self::build(runtime_key.into(), &email_config) @@ -238,6 +241,7 @@ impl EmailAdapter { allowed_senders: config.allowed_senders.clone(), max_body_bytes: config.max_body_bytes.max(1024), max_attachment_bytes: config.max_attachment_bytes.max(1024), + sync_max_age_days: config.sync_max_age_days, smtp_transport, shutdown_tx: Arc::new(RwLock::new(None)), poll_task: Arc::new(RwLock::new(None)), @@ -257,6 +261,7 @@ impl EmailAdapter { poll_interval: self.poll_interval, allowed_senders: self.allowed_senders.clone(), max_body_bytes: self.max_body_bytes, + sync_max_age_days: self.sync_max_age_days, runtime_key: self.runtime_key.clone(), } } @@ -713,8 +718,19 @@ fn poll_inbox_once(config: &EmailPollConfig) -> anyhow::Result 0 { + let since_date = (Utc::now() - ChronoDuration::days(config.sync_max_age_days as i64)) + .format("%d-%b-%Y") + .to_string(); + format!("UNSEEN SINCE {since_date}") + } else { + "UNSEEN".to_string() + }; + let message_uids = session - .uid_search("UNSEEN") + .uid_search(&search_query) .with_context(|| format!("failed to search unseen messages in folder '{folder}'"))?; for uid in message_uids { @@ -1210,6 +1226,7 @@ pub fn search_mailbox( poll_interval: Duration::from_secs(config.poll_interval_secs.max(5)), allowed_senders: config.allowed_senders.clone(), max_body_bytes: config.max_body_bytes.max(1024), + sync_max_age_days: config.sync_max_age_days, runtime_key: "email".to_string(), })?; From 6531919de91b024f1c20e28edd1bb2676dea76ac Mon Sep 17 00:00:00 2001 From: Connor Black Date: Fri, 12 Jun 2026 18:05:42 -0400 Subject: [PATCH 2/4] fix(email): share since-date helper, guard against day-count overflow Extract the UNSEEN SINCE construction into build_since_date() and use it from both poll_inbox_once and build_imap_search_criterion. The helper clamps day counts to MAX_SINCE_DAYS (1_000_000) and returns None for values past chrono::TimeDelta's internal bounds, replacing the original 'u64 as i64' cast in poll_inbox_once that would wrap to a negative ChronoDuration for inputs past i64::MAX and produce a future SINCE date that silently excluded every message. Also adds 3 unit tests for build_since_date and updates the email-setup docs to document sync_max_age_days on the default config and per-instance configs. --- docs/content/docs/(messaging)/email-setup.mdx | 22 +++++ src/messaging/email.rs | 97 ++++++++++++++++--- 2 files changed, 105 insertions(+), 14 deletions(-) diff --git a/docs/content/docs/(messaging)/email-setup.mdx b/docs/content/docs/(messaging)/email-setup.mdx index 1e3dc6769..e93364fd4 100644 --- a/docs/content/docs/(messaging)/email-setup.mdx +++ b/docs/content/docs/(messaging)/email-setup.mdx @@ -61,6 +61,7 @@ from_name = "Spacebot" poll_interval_secs = 30 folders = ["INBOX"] allowed_senders = [] +sync_max_age_days = 0 # 0 = no limit; only import unread mail newer than N days ``` Credentials support `env:VAR_NAME` references. @@ -142,6 +143,27 @@ poll_interval_secs = 30 Use a longer interval if your provider rate limits IMAP polling. +## Limiting backfill on first connect + +By default, the first poll after connecting an account imports every unread email in the configured folders. On an inbox with years of unread mail this floods the agent with stale messages it shouldn't respond to. + +Set `sync_max_age_days` to bound how far back the poller looks. The IMAP server filters server-side, so the check is cheap even on large mailboxes. + +```toml +[messaging.email] +sync_max_age_days = 1 # only import unread emails from the last 24h +``` + +The same knob works on named instances: + +```toml +[[messaging.email.instances]] +name = "support" +sync_max_age_days = 7 # import the last week of unread support mail +``` + +Default is `0` (no limit), which preserves the original behavior. + ## Verify it's working 1. Send an email to the configured mailbox from an allowed sender. diff --git a/src/messaging/email.rs b/src/messaging/email.rs index 1c12237d8..454893779 100644 --- a/src/messaging/email.rs +++ b/src/messaging/email.rs @@ -720,13 +720,14 @@ fn poll_inbox_once(config: &EmailPollConfig) -> anyhow::Result 0 { - let since_date = (Utc::now() - ChronoDuration::days(config.sync_max_age_days as i64)) - .format("%d-%b-%Y") - .to_string(); - format!("UNSEEN SINCE {since_date}") - } else { - "UNSEEN".to_string() + // Narrow the config field from u64 to u32 here (IMAP dates don't need + // more than ~5 billion years of range); build_since_date further guards + // against pathological inputs by returning None instead of producing a + // future SINCE date. + let since_days = u32::try_from(config.sync_max_age_days).ok(); + let search_query = match build_since_date(since_days) { + Some(date) => format!("UNSEEN SINCE {date}"), + None => "UNSEEN".to_string(), }; let message_uids = session @@ -1397,10 +1398,7 @@ fn build_imap_search_criterion(query: &EmailSearchQuery) -> String { clauses.push(format!("TEXT {}", quote_imap_search_value(&text))); } - if let Some(since_days) = query.since_days.filter(|days| *days > 0) { - let since_date = (Utc::now() - ChronoDuration::days(since_days as i64)) - .format("%d-%b-%Y") - .to_string(); + if let Some(since_date) = build_since_date(query.since_days) { clauses.push(format!("SINCE {since_date}")); } @@ -1411,6 +1409,31 @@ fn build_imap_search_criterion(query: &EmailSearchQuery) -> String { } } +/// Compute a `dd-MMM-YYYY` IMAP SINCE date for the given day count, or +/// `None` if the count is zero / missing / larger than `MAX_SINCE_DAYS`. +/// +/// `MAX_SINCE_DAYS` (~2739 years) is well past any realistic config value +/// and stays inside chrono's `TimeDelta` bounds. Without this clamp a +/// sufficiently large input would panic during `Utc::now() - Duration::days(n)`. +/// +/// Shared between the poll path (`poll_inbox_once`) and the search path +/// (`build_imap_search_criterion`) so the date format and the overflow +/// guard stay in lockstep. +fn build_since_date(days: Option) -> Option { + let days = days.filter(|d| *d > 0 && *d <= MAX_SINCE_DAYS)?; + let days_i64 = i64::from(days); + Some( + (Utc::now() - ChronoDuration::days(days_i64)) + .format("%d-%b-%Y") + .to_string(), + ) +} + +/// Maximum day count accepted by `build_since_date`. 1_000_000 days is +/// ~2739 years, far past anything a user would type in TOML, and small +/// enough to stay inside chrono's internal `TimeDelta` range. +const MAX_SINCE_DAYS: u32 = 1_000_000; + fn sanitize_imap_search_value(value: Option<&str>) -> Option { let value = value?.trim(); if value.is_empty() { @@ -1772,9 +1795,10 @@ struct EmailReplyContext { #[cfg(test)] mod tests { use super::{ - EmailSearchHit, EmailSearchQuery, build_imap_search_criterion, derive_thread_key, - extract_message_ids, is_local_mail_host, normalize_email_target, normalize_reply_subject, - normalize_search_folders, parse_primary_mailbox, sort_and_limit_search_hits, + EmailSearchHit, EmailSearchQuery, build_imap_search_criterion, build_since_date, + derive_thread_key, extract_message_ids, is_local_mail_host, normalize_email_target, + normalize_reply_subject, normalize_search_folders, parse_primary_mailbox, + sort_and_limit_search_hits, }; #[test] @@ -1879,6 +1903,51 @@ mod tests { assert!(criterion.contains("TEXT \"release \\\\\\\"candidate\\\\\\\"\"")); } + #[test] + fn build_since_date_returns_none_for_zero_and_missing() { + assert_eq!(build_since_date(None), None); + assert_eq!(build_since_date(Some(0)), None); + } + + #[test] + fn build_since_date_emits_imap_date_format() { + // The IMAP SINCE clause requires dd-MMM-YYYY with a 4-digit year + // (RFC 3501 §6.4.4). Verify length and dash positions. + let date = build_since_date(Some(1)).expect("non-zero days should produce a date"); + assert_eq!(date.len(), 11, "expected dd-MMM-YYYY (got {date:?})"); + let bytes = date.as_bytes(); + assert!( + bytes[2] == b'-' && bytes[6] == b'-', + "expected dashes at idx 2 and 6 (got {date:?})" + ); + assert!( + bytes[7..].iter().all(|b| b.is_ascii_digit()), + "year must be ASCII digits (got {date:?})" + ); + } + + #[test] + fn build_since_date_handles_large_day_counts_without_overflow() { + // A year is ~365 days; 1_000 days is ~3 years. Should produce a + // well-formed date in the past. + let date = build_since_date(Some(1_000)).expect("1000 days should produce a date"); + let year: u32 = date[7..].parse().expect("year must parse as u32"); + assert!( + (1900..=2100).contains(&year), + "expected a sane past year, got {date:?}" + ); + + // Chrono's TimeDelta is internally bounded — past a few million days + // the `Utc::now() - ChronoDuration::days(n)` call panics. We accept + // those values by returning None (i.e. degrade to "no SINCE clause" + // rather than crashing the poll task). + assert_eq!( + build_since_date(Some(100_000_000)), + None, + "absurdly large day counts must not produce a date" + ); + } + #[test] fn normalize_search_folders_falls_back_to_inbox() { let folders = normalize_search_folders(&[], &[]); From ca32577ada565713683e6fa5f1ba903d275468b7 Mon Sep 17 00:00:00 2001 From: Connor Black Date: Sat, 13 Jun 2026 02:24:56 -0400 Subject: [PATCH 3/4] fix(email): extract poll-query helper, fix Debug impls, clarify SINCE semantics Addresses the remaining review comments on PR #547: - Extract build_poll_search_query() so the UNSEEN / UNSEEN SINCE assembly is unit-testable independently of the IMAP session. Add three tests covering zero, non-zero, and absurd input. - Add sync_max_age_days to the Debug impls for both EmailConfig and EmailInstanceConfig. The field is non-sensitive and omitting it made config dumps misleading when diagnosing polling behavior. - Update docs and PR description to describe the actual IMAP SINCE semantics precisely: inclusive midnight boundary, not a rolling 24h window. Treat the value as a backfill cap. --- docs/content/docs/(messaging)/email-setup.mdx | 10 +-- src/config/types.rs | 2 + src/messaging/email.rs | 67 ++++++++++++++----- 3 files changed, 60 insertions(+), 19 deletions(-) diff --git a/docs/content/docs/(messaging)/email-setup.mdx b/docs/content/docs/(messaging)/email-setup.mdx index e93364fd4..f6510b134 100644 --- a/docs/content/docs/(messaging)/email-setup.mdx +++ b/docs/content/docs/(messaging)/email-setup.mdx @@ -61,7 +61,7 @@ from_name = "Spacebot" poll_interval_secs = 30 folders = ["INBOX"] allowed_senders = [] -sync_max_age_days = 0 # 0 = no limit; only import unread mail newer than N days +sync_max_age_days = 0 # 0 = no limit; cap backfill at N days of unread mail (IMAP SINCE = inclusive midnight boundary) ``` Credentials support `env:VAR_NAME` references. @@ -147,11 +147,11 @@ Use a longer interval if your provider rate limits IMAP polling. By default, the first poll after connecting an account imports every unread email in the configured folders. On an inbox with years of unread mail this floods the agent with stale messages it shouldn't respond to. -Set `sync_max_age_days` to bound how far back the poller looks. The IMAP server filters server-side, so the check is cheap even on large mailboxes. +Set `sync_max_age_days` to cap how far back the poller looks. The IMAP server filters server-side, so the check is cheap even on large mailboxes. ```toml [messaging.email] -sync_max_age_days = 1 # only import unread emails from the last 24h +sync_max_age_days = 1 # cap backfill at one day of unread mail ``` The same knob works on named instances: @@ -159,11 +159,13 @@ The same knob works on named instances: ```toml [[messaging.email.instances]] name = "support" -sync_max_age_days = 7 # import the last week of unread support mail +sync_max_age_days = 7 # cap backfill at one week of unread support mail ``` Default is `0` (no limit), which preserves the original behavior. +**Semantics.** IMAP `SINCE` (RFC 3501 §6.4.4) is inclusive and matches against whole dates at midnight in the server's local time, not rolling 24-hour windows. `sync_max_age_days = 1` therefore bounds the *oldest* mail the poller will import to "received on or after yesterday's date", which can include mail up to ~48 hours old depending on the current time and the server's timezone. Treat the value as a backfill cap, not a literal "last N hours" window — pick a value that's comfortably larger than your real cutoff if you need to be strict (e.g. use 2 to approximate "last 24h"). + ## Verify it's working 1. Send an email to the configured mailbox from an allowed sender. diff --git a/src/config/types.rs b/src/config/types.rs index 18ebbdb9a..f05bc7c5b 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -2772,6 +2772,7 @@ impl std::fmt::Debug for EmailInstanceConfig { .field("allowed_senders", &"[REDACTED]") .field("max_body_bytes", &self.max_body_bytes) .field("max_attachment_bytes", &self.max_attachment_bytes) + .field("sync_max_age_days", &self.sync_max_age_days) .finish() } } @@ -2797,6 +2798,7 @@ impl std::fmt::Debug for EmailConfig { .field("allowed_senders", &"[REDACTED]") .field("max_body_bytes", &self.max_body_bytes) .field("max_attachment_bytes", &self.max_attachment_bytes) + .field("sync_max_age_days", &self.sync_max_age_days) .finish() } } diff --git a/src/messaging/email.rs b/src/messaging/email.rs index 454893779..06ff993a8 100644 --- a/src/messaging/email.rs +++ b/src/messaging/email.rs @@ -718,17 +718,11 @@ fn poll_inbox_once(config: &EmailPollConfig) -> anyhow::Result format!("UNSEEN SINCE {date}"), - None => "UNSEEN".to_string(), - }; + // Combine UNSEEN with a SINCE date filter when sync_max_age_days is + // set, so first-connect doesn't flood the agent with years of unread + // email. Assembly is in build_poll_search_query so the query + // construction is unit-testable. + let search_query = build_poll_search_query(config.sync_max_age_days); let message_uids = session .uid_search(&search_query) @@ -1429,6 +1423,25 @@ fn build_since_date(days: Option) -> Option { ) } +/// Build the IMAP search query for a poll cycle. +/// +/// Returns `"UNSEEN"` when `sync_max_age_days` is zero (no limit) or +/// outside a safe range; returns `"UNSEEN SINCE "` otherwise. +/// +/// The IMAP `SINCE` filter operates on whole dates with an inclusive, +/// midnight-anchored boundary (RFC 3501 §6.4.4). That means +/// `sync_max_age_days = 1` can include mail up to ~48h old depending on +/// the current local time and the server's date, not strictly the last +/// 24h. Document and name the field as a *backfill cap*, not a literal +/// time window. +fn build_poll_search_query(sync_max_age_days: u64) -> String { + let since_days = u32::try_from(sync_max_age_days).ok(); + match build_since_date(since_days) { + Some(date) => format!("UNSEEN SINCE {date}"), + None => "UNSEEN".to_string(), + } +} + /// Maximum day count accepted by `build_since_date`. 1_000_000 days is /// ~2739 years, far past anything a user would type in TOML, and small /// enough to stay inside chrono's internal `TimeDelta` range. @@ -1795,10 +1808,10 @@ struct EmailReplyContext { #[cfg(test)] mod tests { use super::{ - EmailSearchHit, EmailSearchQuery, build_imap_search_criterion, build_since_date, - derive_thread_key, extract_message_ids, is_local_mail_host, normalize_email_target, - normalize_reply_subject, normalize_search_folders, parse_primary_mailbox, - sort_and_limit_search_hits, + EmailSearchHit, EmailSearchQuery, build_imap_search_criterion, build_poll_search_query, + build_since_date, derive_thread_key, extract_message_ids, is_local_mail_host, + normalize_email_target, normalize_reply_subject, normalize_search_folders, + parse_primary_mailbox, sort_and_limit_search_hits, }; #[test] @@ -1909,6 +1922,30 @@ mod tests { assert_eq!(build_since_date(Some(0)), None); } + #[test] + fn build_poll_search_query_returns_unseen_for_zero() { + // No backfill cap means we use the original `UNSEEN` query exactly. + assert_eq!(build_poll_search_query(0), "UNSEEN"); + } + + #[test] + fn build_poll_search_query_appends_since_for_nonzero() { + // A non-zero cap composes `UNSEEN SINCE `. The exact date + // depends on the local clock; only assert the structure here. + let query = build_poll_search_query(7); + assert!(query.starts_with("UNSEEN SINCE "), "got {query:?}"); + // The date suffix is 11 chars (dd-MMM-YYYY). + assert_eq!(query.len(), "UNSEEN SINCE ".len() + 11, "got {query:?}"); + } + + #[test] + fn build_poll_search_query_degrades_to_unseen_for_absurd_inputs() { + // u64::MAX is way past MAX_SINCE_DAYS. The helper must not panic and + // must not produce a future-dated query (which would silently exclude + // every message). Falling back to plain `UNSEEN` is the safe behavior. + assert_eq!(build_poll_search_query(u64::MAX), "UNSEEN"); + } + #[test] fn build_since_date_emits_imap_date_format() { // The IMAP SINCE clause requires dd-MMM-YYYY with a 4-digit year From 7d09764c102cd5aec4c0e64d9085a66c07ff67c2 Mon Sep 17 00:00:00 2001 From: Connor Black Date: Fri, 19 Jun 2026 12:21:31 -0400 Subject: [PATCH 4/4] docs(email): clarify sync_max_age_days boundary semantics --- docs/content/docs/(messaging)/email-setup.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/(messaging)/email-setup.mdx b/docs/content/docs/(messaging)/email-setup.mdx index f6510b134..ab9592312 100644 --- a/docs/content/docs/(messaging)/email-setup.mdx +++ b/docs/content/docs/(messaging)/email-setup.mdx @@ -164,7 +164,7 @@ sync_max_age_days = 7 # cap backfill at one week of unread support mail Default is `0` (no limit), which preserves the original behavior. -**Semantics.** IMAP `SINCE` (RFC 3501 §6.4.4) is inclusive and matches against whole dates at midnight in the server's local time, not rolling 24-hour windows. `sync_max_age_days = 1` therefore bounds the *oldest* mail the poller will import to "received on or after yesterday's date", which can include mail up to ~48 hours old depending on the current time and the server's timezone. Treat the value as a backfill cap, not a literal "last N hours" window — pick a value that's comfortably larger than your real cutoff if you need to be strict (e.g. use 2 to approximate "last 24h"). +**Semantics.** IMAP `SINCE` (RFC 3501 §6.4.4) is inclusive and matches against whole dates at midnight in the server's local time, not rolling 24-hour windows. `sync_max_age_days = 1` therefore bounds the *oldest* mail the poller will import to "received on or after yesterday's date", which can include mail up to ~48 hours old depending on the current time and the server's timezone. Treat the value as a backfill cap, not a literal "last N hours" window: a smaller value is stricter (imports less old mail) and a larger value is more lenient, with an effective floor of roughly N×24h. Round up if you'd rather over-fetch than miss mail near the date boundary. ## Verify it's working