From 7974192c8c736df5a37ee459bdf88a1c834856d2 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:12:24 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[CRITICAL]?= =?UTF-8?q?=20Fix=20SSRF=20vulnerability=20in=20web=5Ffetch=20tool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🚨 Severity: CRITICAL 💡 Vulnerability: The `web_fetch` tool used `reqwest::blocking::get` without any validation of the target URL or IPs, allowing it to fetch arbitrary internal network resources, rendering it vulnerable to Server-Side Request Forgery (SSRF) and Time-of-Check to Time-of-Use (TOCTOU) DNS Rebinding attacks. 🎯 Impact: Attackers could trick the server into interacting with internal network services (e.g. metadata servers, internal databases) or bypassing external firewalls. 🔧 Fix: - Added `is_allowed_ip` helper function to block loopback, private networks, unspecified, multicast, broadcast, and link-local IPs across both IPv4 and IPv6. - Overhauled `handle_web_fetch` to manually resolve DNS, validate *all* returned IPs against the blocklist, and pin the validated IP to the HTTP client using `reqwest::blocking::ClientBuilder::resolve`. - Disabled automatic redirects and implemented manual redirect tracking (up to 5 redirects) through the same strict validation pipeline. ✅ Verification: `cargo test --package mill-plugin-system` passed. Tested with internal IPs (127.0.0.1) which are now successfully blocked. Recorded the vulnerability pattern in `.jules/sentinel.md`. Co-authored-by: mudcube <101564+mudcube@users.noreply.github.com> --- .jules/sentinel.md | 5 + .../src/system_tools_plugin.rs | 124 +++++++++++++++++- 2 files changed, 123 insertions(+), 6 deletions(-) diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 798ec7de6..189626471 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -2,3 +2,8 @@ **Vulnerability:** The `OperationQueue` worker in `mill-server` executed file operations (create, write, delete, rename) using raw paths from the operation object without validating they were within the project root. **Learning:** Background workers that process serialized operations are a common bypass for security checks enforced at the API layer. The API layer might validate the request, but if the worker is "dumb" and blindly executes the queued operation, an internal attacker or a buggy component can exploit it. **Prevention:** Validation must happen at the *execution point* (in the worker), not just at the ingestion point. We introduced `validate_path` in the worker loop to enforce project root containment using `canonicalize` (handling non-existent files correctly). + +## 2024-06-01 - SSRF in Web Fetch Tool +**Vulnerability:** The `web_fetch` tool used `reqwest::blocking::get` without any validation of the target URL or IPs, making it vulnerable to Server-Side Request Forgery (SSRF) and DNS Rebinding. +**Learning:** Blindly fetching user-provided URLs in AI tools or background tasks allows access to internal network resources. Even with a blocklist, TOCTOU vulnerabilities (DNS Rebinding) exist if the resolved IP isn't pinned. +**Prevention:** Manually resolve the URL's hostname to IP addresses, strictly validate *all* returned IPs against an allowlist (blocking private, loopback, link-local, etc.), and pin the HTTP client to the validated IP using `reqwest::blocking::ClientBuilder::resolve`. Also, disable automatic redirects and manually track/validate them up to a safe limit. diff --git a/crates/mill-plugin-system/src/system_tools_plugin.rs b/crates/mill-plugin-system/src/system_tools_plugin.rs index e20b7d1ae..340d421d3 100644 --- a/crates/mill-plugin-system/src/system_tools_plugin.rs +++ b/crates/mill-plugin-system/src/system_tools_plugin.rs @@ -12,10 +12,42 @@ use ignore::WalkBuilder; use mill_plugin_api::language::detect_package_manager; use serde::Deserialize; use serde_json::{json, Value}; +use std::net::{IpAddr, ToSocketAddrs}; use std::path::Path; use std::sync::Arc; use tracing::{debug, warn}; +/// Validates an IP address to prevent SSRF by rejecting loopback, private, +/// unspecified, multicast, broadcast, and link-local addresses. +fn is_allowed_ip(ip: IpAddr) -> bool { + match ip { + IpAddr::V4(ipv4) => { + let o = ipv4.octets(); + // Block 0.0.0.0/8, 10.0.0.0/8, 127.0.0.0/8, 169.254.0.0/16, 172.16.0.0/12, 192.168.0.0/16, 224.0.0.0/4 and broadcast + !(o[0] == 0 + || o[0] == 10 + || o[0] == 127 + || (o[0] == 169 && o[1] == 254) + || (o[0] == 172 && (16..=31).contains(&o[1])) + || (o[0] == 192 && o[1] == 168) + || (224..=239).contains(&o[0]) + || ipv4.is_broadcast()) + } + IpAddr::V6(ipv6) => { + if let Some(mapped) = ipv6.to_ipv4_mapped() { + return is_allowed_ip(IpAddr::V4(mapped)); + } + let s = ipv6.segments(); + // Block loopback, unspecified, multicast, Unique Local (fc00::/7), and Link-Local (fe80::/10) + !(ipv6.is_loopback() + || ipv6.is_unspecified() + || ipv6.is_multicast() + || (s[0] & 0xfe00) == 0xfc00 + || (s[0] & 0xffc0) == 0xfe80) + } + } +} + /// System tools plugin for non-LSP workspace operations pub struct SystemToolsPlugin { metadata: PluginMetadata, @@ -307,14 +339,94 @@ fn handle_web_fetch(params: Value) -> PluginResult { debug!(url = %args.url, "Fetching URL content"); - // Use reqwest to fetch the URL content - let response = reqwest::blocking::get(&args.url).map_err(|e| PluginSystemError::IoError { - message: format!("Failed to fetch URL: {}", e), + let mut current_url = url::Url::parse(&args.url).map_err(|e| PluginSystemError::IoError { + message: format!("Invalid URL: {}", e), })?; - let html_content = response.text().map_err(|e| PluginSystemError::IoError { - message: format!("Failed to read response text: {}", e), - })?; + let mut html_content = String::new(); + let mut redirect_count = 0; + let max_redirects = 5; + + while redirect_count < max_redirects { + let host_str = current_url + .host_str() + .ok_or_else(|| PluginSystemError::IoError { + message: "URL must have a host".to_string(), + })?; + let port = current_url.port_or_known_default().unwrap_or(80); + + let valid_addrs: Vec<_> = format!("{}:{}", host_str, port) + .to_socket_addrs() + .map_err(|e| PluginSystemError::IoError { + message: format!("Failed to resolve DNS: {}", e), + })? + .collect(); + + if valid_addrs.is_empty() { + return Err(PluginSystemError::IoError { + message: "No IPs resolved for host".to_string(), + }); + } + + // Verify *all* resolved IPs to prevent DNS rebinding attacks returning multiple A records + for addr in &valid_addrs { + if !is_allowed_ip(addr.ip()) { + warn!(ip = %addr.ip(), url = %current_url, "Blocked fetch to disallowed IP"); + return Err(PluginSystemError::IoError { + message: "Fetch to internal or disallowed IP address blocked for security" + .to_string(), + }); + } + } + + let pinned_ip = valid_addrs[0]; + + let client = reqwest::blocking::Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .resolve(host_str, pinned_ip) + .build() + .map_err(|e| PluginSystemError::IoError { + message: format!("Failed to build HTTP client: {}", e), + })?; + + let response = + client + .get(current_url.clone()) + .send() + .map_err(|e| PluginSystemError::IoError { + message: format!("Failed to fetch URL: {}", e), + })?; + + if response.status().is_redirection() { + let loc = + response + .headers() + .get("location") + .ok_or_else(|| PluginSystemError::IoError { + message: "Redirect missing location header".to_string(), + })?; + let loc_str = loc.to_str().map_err(|_| PluginSystemError::IoError { + message: "Invalid location header".to_string(), + })?; + current_url = current_url + .join(loc_str) + .map_err(|e| PluginSystemError::IoError { + message: format!("Failed to parse redirect URL: {}", e), + })?; + redirect_count += 1; + } else { + html_content = response.text().map_err(|e| PluginSystemError::IoError { + message: format!("Failed to read response text: {}", e), + })?; + break; + } + } + + if redirect_count >= max_redirects { + return Err(PluginSystemError::IoError { + message: "Too many redirects".to_string(), + }); + } // Convert HTML to Markdown for easier AI processing let markdown_content = html2md_rs::to_md::safe_from_html_to_md(html_content).map_err(|e| {