diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 798ec7de6..198ab3be3 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -2,3 +2,8 @@ **Vulnerability:** The `OperationQueue` worker in `mill-server` executed file operations (create, write, delete, rename) using raw paths from the operation object without validating they were within the project root. **Learning:** Background workers that process serialized operations are a common bypass for security checks enforced at the API layer. The API layer might validate the request, but if the worker is "dumb" and blindly executes the queued operation, an internal attacker or a buggy component can exploit it. **Prevention:** Validation must happen at the *execution point* (in the worker), not just at the ingestion point. We introduced `validate_path` in the worker loop to enforce project root containment using `canonicalize` (handling non-existent files correctly). + +## 2024-05-23 - SSRF in Web Fetch Tool +**Vulnerability:** The `web_fetch` tool used `reqwest::blocking::get` directly on user-provided URLs, allowing SSRF. +**Learning:** Simple HTTP clients follow redirects and resolve DNS without checking if the destination IP is internal, allowing attackers to access internal services and metadata endpoints. +**Prevention:** Mitigate SSRF by disabling automatic redirects, manually tracking them, resolving DNS ahead of the request, validating all resolved IP addresses, and pinning the validated IP using `ClientBuilder::resolve` to prevent TOCTOU DNS rebinding. diff --git a/crates/mill-plugin-system/src/system_tools_plugin.rs b/crates/mill-plugin-system/src/system_tools_plugin.rs index e20b7d1ae..553398a7c 100644 --- a/crates/mill-plugin-system/src/system_tools_plugin.rs +++ b/crates/mill-plugin-system/src/system_tools_plugin.rs @@ -12,9 +12,11 @@ use ignore::WalkBuilder; use mill_plugin_api::language::detect_package_manager; use serde::Deserialize; use serde_json::{json, Value}; +use std::net::{IpAddr, ToSocketAddrs}; use std::path::Path; use std::sync::Arc; use tracing::{debug, warn}; +use url::Url; /// System tools plugin for non-LSP workspace operations pub struct SystemToolsPlugin { @@ -292,6 +294,71 @@ async fn handle_bulk_update_dependencies(params: Value) -> PluginResult { })) } +fn is_allowed_ip(ip: std::net::IpAddr) -> bool { + match ip { + IpAddr::V4(ipv4) => { + let octets = ipv4.octets(); + // Block 0.0.0.0/8 (Unspecified/current network) + if octets[0] == 0 { + return false; + } + // Block loopback (127.0.0.0/8) + if octets[0] == 127 { + return false; + } + // Block private network (10.0.0.0/8) + if octets[0] == 10 { + return false; + } + // Block private network (172.16.0.0/12) + if octets[0] == 172 && (16..=31).contains(&octets[1]) { + return false; + } + // Block private network (192.168.0.0/16) + if octets[0] == 192 && octets[1] == 168 { + return false; + } + // Block link-local (169.254.0.0/16) + if octets[0] == 169 && octets[1] == 254 { + return false; + } + // Block broadcast (255.255.255.255) + if ipv4.is_broadcast() { + return false; + } + // Block multicast (224.0.0.0/4) + if ipv4.is_multicast() { + return false; + } + true + } + IpAddr::V6(ipv6) => { + // If IPv4-mapped, check it against IPv4 rules + if let Some(ipv4) = ipv6.to_ipv4_mapped() { + return is_allowed_ip(IpAddr::V4(ipv4)); + } + // Block loopback (::1) and unspecified (::) + if ipv6.is_loopback() || ipv6.is_unspecified() { + return false; + } + let segments = ipv6.segments(); + // Block Unique Local Addresses (fc00::/7) + if segments[0] & 0xfe00 == 0xfc00 { + return false; + } + // Block Link-Local (fe80::/10) + if segments[0] & 0xffc0 == 0xfe80 { + return false; + } + // Block multicast (ff00::/8) + if segments[0] & 0xff00 == 0xff00 { + return false; + } + true + } + } +} + /// Handle web_fetch tool fn handle_web_fetch(params: Value) -> PluginResult { #[derive(Debug, Deserialize)] @@ -307,14 +374,108 @@ fn handle_web_fetch(params: Value) -> PluginResult { debug!(url = %args.url, "Fetching URL content"); - // Use reqwest to fetch the URL content - let response = reqwest::blocking::get(&args.url).map_err(|e| PluginSystemError::IoError { - message: format!("Failed to fetch URL: {}", e), + let mut current_url = Url::parse(&args.url).map_err(|e| PluginSystemError::IoError { + message: format!("Invalid URL: {}", e), })?; - let html_content = response.text().map_err(|e| PluginSystemError::IoError { - message: format!("Failed to read response text: {}", e), - })?; + let mut redirects = 0; + let max_redirects = 5; + + let html_content = loop { + let host_str = current_url + .host_str() + .ok_or_else(|| PluginSystemError::IoError { + message: "URL must have a host".to_string(), + })?; + + let port = current_url.port_or_known_default().unwrap_or(80); + + // Use proper IPv6 literal format for ToSocketAddrs + let socket_addr_str = if host_str.contains(':') && !host_str.starts_with('[') { + format!("[{}]:{}", host_str, port) + } else { + format!("{}:{}", host_str, port) + }; + + // Resolve DNS and validate ALL IPs + let mut addrs = socket_addr_str + .to_socket_addrs() + .map_err(|e| PluginSystemError::IoError { + message: format!("Failed to resolve host: {}", e), + })? + .peekable(); + + if addrs.peek().is_none() { + return Err(PluginSystemError::IoError { + message: "Host did not resolve to any IP addresses".to_string(), + }); + } + + let mut pinned_ip = None; + + for addr in addrs { + let ip = addr.ip(); + if !is_allowed_ip(ip) { + return Err(PluginSystemError::IoError { + message: "Access to private or local IP addresses is not allowed".to_string(), + }); + } + if pinned_ip.is_none() { + pinned_ip = Some(addr); + } + } + + let pinned_ip = pinned_ip.unwrap(); + + // Ensure we send the request to the exact IP we validated, preserving the SNI/Host header + let request_client = reqwest::blocking::Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .resolve(host_str, pinned_ip) + .build() + .map_err(|e| PluginSystemError::IoError { + message: format!("Failed to build client with pinned IP: {}", e), + })?; + + let response = request_client + .get(current_url.clone()) + .send() + .map_err(|e| PluginSystemError::IoError { + message: format!("Failed to fetch URL: {}", e), + })?; + + if response.status().is_redirection() { + if redirects >= max_redirects { + return Err(PluginSystemError::IoError { + message: "Too many redirects".to_string(), + }); + } + + let location = response + .headers() + .get(reqwest::header::LOCATION) + .and_then(|l| l.to_str().ok()) + .ok_or_else(|| PluginSystemError::IoError { + message: "Redirect missing Location header".to_string(), + })?; + + current_url = current_url + .join(location) + .map_err(|e| PluginSystemError::IoError { + message: format!("Invalid redirect URL: {}", e), + })?; + + redirects += 1; + continue; + } else if !response.status().is_success() { + return Err(PluginSystemError::IoError { + message: format!("HTTP error: {}", response.status()), + }); + } + + break response.text().map_err(|e| PluginSystemError::IoError { + message: format!("Failed to read response text: {}", e), + })?; + }; // Convert HTML to Markdown for easier AI processing let markdown_content = html2md_rs::to_md::safe_from_html_to_md(html_content).map_err(|e| {