diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 798ec7de6..ba65020dd 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -2,3 +2,8 @@ **Vulnerability:** The `OperationQueue` worker in `mill-server` executed file operations (create, write, delete, rename) using raw paths from the operation object without validating they were within the project root. **Learning:** Background workers that process serialized operations are a common bypass for security checks enforced at the API layer. The API layer might validate the request, but if the worker is "dumb" and blindly executes the queued operation, an internal attacker or a buggy component can exploit it. **Prevention:** Validation must happen at the *execution point* (in the worker), not just at the ingestion point. We introduced `validate_path` in the worker loop to enforce project root containment using `canonicalize` (handling non-existent files correctly). + +## 2025-06-07 - Path Traversal Bypass via `starts_with` and `..` Components +**Vulnerability:** The `validate_path` function in `mill-server` checked if a canonical path was within the `project_root` using `canonical.starts_with(&canonical_root)`. However, if the requested path did not exist on the filesystem, the code constructed the path by appending components (including `..`) to the nearest existing ancestor. The resulting `PathBuf` contained unresolved `..` components. Since `Path::starts_with` operates on components lexically without resolving them, a path like `/tmp/root/../../etc/passwd` correctly bypassed the check, evaluating as "starting with" `/tmp/root`. +**Learning:** `Path::starts_with` is unsafe for containment checks if the path has not been fully lexically normalized or canonicalized, as it evaluates component-by-component, not by absolute filesystem location. +**Prevention:** If `fs::canonicalize` cannot be used (e.g., the path doesn't exist), you must strictly lexically normalize the path (resolving all `.` and `..` components) before using `starts_with` for boundary checks. diff --git a/crates/mill-server/src/lib.rs b/crates/mill-server/src/lib.rs index 944370357..5aac51f69 100644 --- a/crates/mill-server/src/lib.rs +++ b/crates/mill-server/src/lib.rs @@ -281,6 +281,25 @@ impl ServerHandle { } } +/// Helper function to lexically normalize a path, resolving `.` and `..` components. +/// Security concern: we need this because `fs::canonicalize` on non-existent paths +/// can't resolve `..` components fully, leaving them in the path string, +/// which can later bypass `starts_with` checks if not strictly normalized. +fn normalize_path(path: &std::path::Path) -> PathBuf { + use std::path::Component; + let mut normalized = PathBuf::new(); + for component in path.components() { + match component { + Component::ParentDir => { + normalized.pop(); + } + Component::CurDir => {} + _ => normalized.push(component), + } + } + normalized +} + /// Convert path to absolute and verify it's within project root async fn validate_path( project_root: &std::path::Path, @@ -358,15 +377,20 @@ async fn validate_path( canonical }; + // Security concern: the reconstructed canonical path might contain `..` + // components if the path doesn't exist, which can spoof `starts_with` checks. + // Lexically normalizing the path fully removes these `..` components. + let normalized_canonical = normalize_path(&canonical); + // Verify containment within project root - if !canonical.starts_with(&canonical_root) { + if !normalized_canonical.starts_with(&canonical_root) { return Err(ServerError::permission_denied(format!( "Path traversal detected: {:?} escapes project root {:?}", path, project_root ))); } - Ok(canonical) + Ok(normalized_canonical) } /// Spawn a worker to process file operations in the background diff --git a/crates/mill-server/src/worker_tests.rs b/crates/mill-server/src/worker_tests.rs index 04d3b37eb..8ceabda56 100644 --- a/crates/mill-server/src/worker_tests.rs +++ b/crates/mill-server/src/worker_tests.rs @@ -64,6 +64,67 @@ async fn test_worker_path_traversal_prevention() { } } +#[tokio::test] +async fn test_worker_path_traversal_non_existent() { + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path().to_path_buf(); + + // Create a mock "outside" dir parallel to root just to verify it can write there + let parent_dir = root.parent().unwrap(); + let malicious_target = parent_dir.join("hacked_via_traversal.txt"); + + // Setup queue + let lock_manager = Arc::new(LockManager::new()); + let queue = Arc::new(OperationQueue::new(lock_manager)); + + // Spawn worker + spawn_operation_worker(queue.clone(), root.clone()); + + // Exploit: path is non_existent_dir/../../hacked_via_traversal.txt + // Since non_existent_dir doesn't exist, validate_path steps back to root, + // canonicalizes root, and without `normalize_path`, literally appends ".." and ".." + // and "hacked_via_traversal.txt". + // This results in /tmp/root/../../hacked_via_traversal.txt. + // canonical.starts_with(root) returns TRUE because the literal string "/tmp/root/../../..." + // starts with "/tmp/root". + // BUT the OS will interpret it as escaping! + + let exploit_path = PathBuf::from("non_existent_dir") + .join("..") + .join("..") + .join("hacked_via_traversal.txt"); + + let op = FileOperation::new( + "malicious_tool".to_string(), + OperationType::Write, + exploit_path.clone(), + json!({"content": "hacked"}), + ); + + queue.enqueue(op).await.unwrap(); + + // Wait for processing + tokio::time::sleep(Duration::from_millis(500)).await; + + let stats = queue.get_stats().await; + + // Check if it created the file + let created = malicious_target.exists(); + if created { + std::fs::remove_file(&malicious_target).unwrap(); + } + + assert!( + !created, + "Vulnerability works! File created at: {:?}", + malicious_target + ); + assert_eq!( + stats.failed_operations, 1, + "Should have failed the operation due to path traversal prevention" + ); +} + #[tokio::test] async fn test_worker_absolute_path_traversal() { let temp_dir = TempDir::new().unwrap();