diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 798ec7de6..9613cdb9e 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -2,3 +2,7 @@ **Vulnerability:** The `OperationQueue` worker in `mill-server` executed file operations (create, write, delete, rename) using raw paths from the operation object without validating they were within the project root. **Learning:** Background workers that process serialized operations are a common bypass for security checks enforced at the API layer. The API layer might validate the request, but if the worker is "dumb" and blindly executes the queued operation, an internal attacker or a buggy component can exploit it. **Prevention:** Validation must happen at the *execution point* (in the worker), not just at the ingestion point. We introduced `validate_path` in the worker loop to enforce project root containment using `canonicalize` (handling non-existent files correctly). +## 2026-02-14 - SSRF vulnerability in web_fetch tool +**Vulnerability:** The `web_fetch` tool used a naive `reqwest::blocking::get` call, allowing it to fetch internal network resources or localhost services, exposing the system to Server-Side Request Forgery (SSRF). +**Learning:** Tools that fetch external URLs must explicitly block internal/private IPs. Relying on default HTTP client behavior is insufficient as it seamlessly follows redirects to internal resources and resolves local hostnames. +**Prevention:** Implement strict IP validation (`is_allowed_ip`) and pin DNS resolution to a verified IP using `reqwest::blocking::ClientBuilder::resolve()`. Ensure all resolved IPs for a hostname are validated to prevent DNS rebinding attacks where an attacker controls multiple A records. diff --git a/crates/mill-plugin-system/src/system_tools_plugin.rs b/crates/mill-plugin-system/src/system_tools_plugin.rs index e20b7d1ae..175108906 100644 --- a/crates/mill-plugin-system/src/system_tools_plugin.rs +++ b/crates/mill-plugin-system/src/system_tools_plugin.rs @@ -293,7 +293,69 @@ async fn handle_bulk_update_dependencies(params: Value) -> PluginResult { } /// Handle web_fetch tool +fn is_allowed_ip(ip: &std::net::IpAddr) -> bool { + match ip { + std::net::IpAddr::V4(ipv4) => { + let octets = ipv4.octets(); + // Block 0.0.0.0/8 + if octets[0] == 0 { + return false; + } + // Block loopback (127.0.0.0/8) + if octets[0] == 127 { + return false; + } + // Block private networks (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16) + if octets[0] == 10 { + return false; + } + if octets[0] == 172 && octets[1] >= 16 && octets[1] <= 31 { + return false; + } + if octets[0] == 192 && octets[1] == 168 { + return false; + } + // Block link-local (169.254.0.0/16) + if octets[0] == 169 && octets[1] == 254 { + return false; + } + // Block broadcast/multicast (224.0.0.0/4, 255.255.255.255) + if octets[0] >= 224 { + return false; + } + true + } + std::net::IpAddr::V6(ipv6) => { + // Check for IPv4-mapped IPv6 + if let Some(ipv4) = ipv6.to_ipv4_mapped() { + return is_allowed_ip(&std::net::IpAddr::V4(ipv4)); + } + let segments = ipv6.segments(); + // Block loopback (::1) and unspecified (::) + if ipv6.is_loopback() || ipv6.is_unspecified() { + return false; + } + // Block Unique Local (fc00::/7) + if segments[0] & 0xfe00 == 0xfc00 { + return false; + } + // Block Link-Local (fe80::/10) + if segments[0] & 0xffc0 == 0xfe80 { + return false; + } + // Block multicast (ff00::/8) + if segments[0] & 0xff00 == 0xff00 { + return false; + } + true + } + } +} + fn handle_web_fetch(params: Value) -> PluginResult { + use std::net::ToSocketAddrs; + use url::Url; + #[derive(Debug, Deserialize)] #[serde(rename_all = "snake_case")] struct WebFetchArgs { @@ -307,14 +369,118 @@ fn handle_web_fetch(params: Value) -> PluginResult { debug!(url = %args.url, "Fetching URL content"); - // Use reqwest to fetch the URL content - let response = reqwest::blocking::get(&args.url).map_err(|e| PluginSystemError::IoError { - message: format!("Failed to fetch URL: {}", e), - })?; + let mut current_url_str = args.url.clone(); + let mut redirects = 0; + const MAX_REDIRECTS: usize = 5; - let html_content = response.text().map_err(|e| PluginSystemError::IoError { - message: format!("Failed to read response text: {}", e), - })?; + let html_content = loop { + if redirects > MAX_REDIRECTS { + return Err(PluginSystemError::IoError { + message: "Too many redirects".to_string(), + }); + } + + let parsed_url = Url::parse(¤t_url_str).map_err(|e| PluginSystemError::IoError { + message: format!("Invalid URL: {}", e), + })?; + + if parsed_url.scheme() != "http" && parsed_url.scheme() != "https" { + return Err(PluginSystemError::IoError { + message: "Only HTTP/HTTPS allowed".to_string(), + }); + } + + let host = parsed_url + .host_str() + .ok_or_else(|| PluginSystemError::IoError { + message: "No host in URL".to_string(), + })?; + let port = + parsed_url + .port_or_known_default() + .ok_or_else(|| PluginSystemError::IoError { + message: "No port could be determined".to_string(), + })?; + + // Important memory: do not strip brackets when appending port for resolution + let addr_str = format!("{}:{}", host, port); + + // DNS Resolution (SSRF prevention) + let addrs = addr_str + .to_socket_addrs() + .map_err(|e| PluginSystemError::IoError { + message: format!("DNS error: {}", e), + })?; + + let mut resolved_ips = Vec::new(); + for addr in addrs { + resolved_ips.push(addr.ip()); + } + + if resolved_ips.is_empty() { + return Err(PluginSystemError::IoError { + message: "No IPs resolved".to_string(), + }); + } + + // Validate all resolved IPs to prevent DNS rebinding attacks + for ip in &resolved_ips { + if !is_allowed_ip(ip) { + return Err(PluginSystemError::IoError { + message: format!("Blocked access to private/internal IP: {}", ip), + }); + } + } + + // Pin DNS resolution to the first safe IP to prevent TOCTOU DNS rebinding, while preserving original host for SNI + let client = reqwest::blocking::Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .resolve(host, std::net::SocketAddr::new(resolved_ips[0], port)) + .build() + .map_err(|e| PluginSystemError::IoError { + message: format!("Client build error: {}", e), + })?; + + let response = + client + .get(parsed_url.clone()) + .send() + .map_err(|e| PluginSystemError::IoError { + message: format!("Failed to fetch URL: {}", e), + })?; + + // Manual redirect tracking + if response.status().is_redirection() { + if let Some(loc) = response.headers().get(reqwest::header::LOCATION) { + let loc_str = loc.to_str().map_err(|_| PluginSystemError::IoError { + message: "Invalid location header".to_string(), + })?; + let next_url = + parsed_url + .join(loc_str) + .map_err(|_| PluginSystemError::IoError { + message: "Invalid redirect URL".to_string(), + })?; + current_url_str = next_url.to_string(); + redirects += 1; + continue; + } else { + return Err(PluginSystemError::IoError { + message: "Redirect missing location header".to_string(), + }); + } + } + + if !response.status().is_success() { + return Err(PluginSystemError::IoError { + message: format!("HTTP error: {}", response.status()), + }); + } + + break response.text().map_err(|e| PluginSystemError::IoError { + message: format!("Failed to read response text: {}", e), + })?; + }; // Convert HTML to Markdown for easier AI processing let markdown_content = html2md_rs::to_md::safe_from_html_to_md(html_content).map_err(|e| {