Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .jules/sentinel.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
**Vulnerability:** The `OperationQueue` worker in `mill-server` executed file operations (create, write, delete, rename) using raw paths from the operation object without validating they were within the project root.
**Learning:** Background workers that process serialized operations are a common bypass for security checks enforced at the API layer. The API layer might validate the request, but if the worker is "dumb" and blindly executes the queued operation, an internal attacker or a buggy component can exploit it.
**Prevention:** Validation must happen at the *execution point* (in the worker), not just at the ingestion point. We introduced `validate_path` in the worker loop to enforce project root containment using `canonicalize` (handling non-existent files correctly).

## 2025-06-03 - SSRF Vulnerability in Web Fetch Tool
**Vulnerability:** The `web_fetch` tool in `mill-plugin-system` used `reqwest::blocking::get` to fetch arbitrary user-provided URLs without validating the resolved IP addresses, allowing Server-Side Request Forgery (SSRF) against internal services.
**Learning:** Checking the URL string is not enough to prevent SSRF. Attackers can use DNS Rebinding (TOCTOU) to point a seemingly benign domain to an internal IP after the initial validation. Furthermore, if a domain resolves to multiple IPs, an attacker can mix safe and malicious IPs.
**Prevention:** We must manually resolve the domain to its IPs using `std::net::ToSocketAddrs`, validate that *all* returned IPs are allowed (not local/private subnets), and then pin the connection to one of the validated IPs using `reqwest::blocking::ClientBuilder::resolve()`. We must also disable automatic redirects and manually track them to apply this same validation to each redirect destination, up to a maximum limit.
187 changes: 180 additions & 7 deletions crates/mill-plugin-system/src/system_tools_plugin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,68 @@ use ignore::WalkBuilder;
use mill_plugin_api::language::detect_package_manager;
use serde::Deserialize;
use serde_json::{json, Value};
use std::net::{IpAddr, ToSocketAddrs};
use std::path::Path;
use std::sync::Arc;
use tracing::{debug, warn};
use url::Url;

/// Check if an IP address is allowed to be accessed by the web_fetch tool.
/// Blocks loopback, private, unspecified, multicast, broadcast, and link-local IPs.
fn is_allowed_ip(ip: &IpAddr) -> bool {
match ip {
IpAddr::V4(ipv4) => {
let octets = ipv4.octets();
if octets[0] == 127 {
return false;
}
if octets[0] == 10 {
return false;
}
if octets[0] == 172 && (16..=31).contains(&octets[1]) {
return false;
}
if octets[0] == 192 && octets[1] == 168 {
return false;
}
if octets[0] == 0 {
return false;
}
if (224..=239).contains(&octets[0]) {
return false;
}
if octets == [255, 255, 255, 255] {
return false;
}
if octets[0] == 169 && octets[1] == 254 {
return false;
}
true

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Block shared internal IPv4 ranges

Because this fall-through allows every IPv4 address not explicitly listed, web_fetch still permits non-public ranges such as 100.64.0.0/10 and 198.18.0.0/15. In deployments that use those ranges for VPC/container/internal services, an attacker can fetch http://100.64.x.y/... and bypass the SSRF protection even though the address is not globally routable; the allowlist should reject all special-use/non-global ranges, not just RFC1918/link-local.

Useful? React with πŸ‘Β / πŸ‘Ž.

}
IpAddr::V6(ipv6) => {
if let Some(ipv4) = ipv6.to_ipv4_mapped() {
return is_allowed_ip(&IpAddr::V4(ipv4));
}
if ipv6.is_loopback() {
return false;
}
if ipv6.is_unspecified() {
return false;
}
if ipv6.is_multicast() {
return false;
}
let segments = ipv6.segments();
if (segments[0] & 0xfe00) == 0xfc00 {
return false;
}
if (segments[0] & 0xffc0) == 0xfe80 {
return false;
}
true
}
}
}

/// System tools plugin for non-LSP workspace operations
pub struct SystemToolsPlugin {
Expand Down Expand Up @@ -307,14 +366,128 @@ fn handle_web_fetch(params: Value) -> PluginResult<Value> {

debug!(url = %args.url, "Fetching URL content");

// Use reqwest to fetch the URL content
let response = reqwest::blocking::get(&args.url).map_err(|e| PluginSystemError::IoError {
message: format!("Failed to fetch URL: {}", e),
})?;
let mut current_url_str = args.url.clone();
let max_redirects = 5;
let mut redirect_count = 0;
let html_content: String;

let html_content = response.text().map_err(|e| PluginSystemError::IoError {
message: format!("Failed to read response text: {}", e),
})?;
loop {
if redirect_count >= max_redirects {
return Err(PluginSystemError::IoError {
message: "Too many redirects".to_string(),
});
}

let parsed_url = Url::parse(&current_url_str).map_err(|e| PluginSystemError::IoError {
message: format!("Invalid URL: {}", e),
})?;

// Only allow HTTP/HTTPS
let scheme = parsed_url.scheme();
if scheme != "http" && scheme != "https" {
return Err(PluginSystemError::IoError {
message: "Unsupported URL scheme. Only HTTP and HTTPS are allowed.".to_string(),
});
}

let host = parsed_url
.host_str()
.ok_or_else(|| PluginSystemError::IoError {
message: "Missing host in URL".to_string(),
})?;

let port = parsed_url
.port_or_known_default()
.unwrap_or(if scheme == "https" { 443 } else { 80 });

// Resolve IPs
// Note: For IPv6 literals we need to make sure the brackets are preserved before appending port
let host_for_resolution = if host.contains(':') && !host.starts_with('[') {
format!("[{}]:{}", host, port)
} else {
format!("{}:{}", host, port)
};

let addrs_iter =
host_for_resolution
.to_socket_addrs()
.map_err(|e| PluginSystemError::IoError {
message: format!("Failed to resolve hostname: {}", e),
})?;

let addrs: Vec<_> = addrs_iter.collect();
if addrs.is_empty() {
return Err(PluginSystemError::IoError {
message: "No IP addresses found for hostname".to_string(),
});
}

// Validate ALL resolved IPs to prevent DNS rebinding attacks mixing safe and malicious IPs
for addr in &addrs {
if !is_allowed_ip(&addr.ip()) {
return Err(PluginSystemError::IoError {
message: "Access to private/local IP address is blocked (SSRF Protection)"
.to_string(),
});
}
}

// Pin the connection to the first validated IP, disabling automatic redirects.
// This ensures the connection is made to a validated IP, mitigating DNS rebinding (TOCTOU) attacks,
// while preserving the original host for SNI and HTTP headers.
let pinned_ip = addrs[0];

let client = reqwest::blocking::Client::builder()
.redirect(reqwest::redirect::Policy::none())
.resolve(host, pinned_ip)
.build()
.map_err(|e| PluginSystemError::IoError {
message: format!("Failed to build HTTP client: {}", e),
})?;

let response =
client
.get(parsed_url.clone())
.send()
.map_err(|e| PluginSystemError::IoError {
message: format!("Failed to fetch URL: {}", e),
})?;

if response.status().is_redirection() {
if let Some(loc) = response.headers().get(reqwest::header::LOCATION) {
let loc_str = loc.to_str().map_err(|_| PluginSystemError::IoError {
message: "Invalid Location header".to_string(),
})?;

let next_url =
parsed_url
.join(loc_str)
.map_err(|e| PluginSystemError::IoError {
message: format!("Invalid redirect URL: {}", e),
})?;

current_url_str = next_url.into();
redirect_count += 1;
continue;
} else {
return Err(PluginSystemError::IoError {
message: "Redirect missing Location header".to_string(),
});
}
}

if !response.status().is_success() {
return Err(PluginSystemError::IoError {
message: format!("HTTP error: {}", response.status()),
});
}

let content = response.text().map_err(|e| PluginSystemError::IoError {
message: format!("Failed to read response text: {}", e),
})?;
html_content = content;
break;
}

// Convert HTML to Markdown for easier AI processing
let markdown_content = html2md_rs::to_md::safe_from_html_to_md(html_content).map_err(|e| {
Expand Down
Loading
Loading