From 6ccc5cc99376ccf049ba8d6bcad397fb0679e919 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 4 Mar 2026 12:47:00 -0800 Subject: [PATCH 01/14] feat(vm): add navigator-vm crate with libkrun microVM and k3s gateway Add a new navigator-vm library crate that boots k3s inside a libkrun microVM, accessible from the host via gvproxy port forwarding. Key components: - FFI bindings to libkrun C API (krun_create_ctx, krun_add_net_unixgram, etc.) - VmConfig with gateway() preset for k3s and custom exec mode - gvproxy integration: virtio-net via unixgram, DHCP, native HTTP port forwarding - gateway-init.sh: PID 1 init script with DHCP via udhcpc, mounts, k3s exec - build-rootfs.sh: builds Ubuntu 22.04 arm64 rootfs with k3s + busybox-static - Kubeconfig auto-extraction to ~/.kube/gateway.yaml - CLI integration as 'ncl gateway' with --exec, --port, --net flags - macOS codesigning and DYLD_FALLBACK_LIBRARY_PATH in ncl wrapper --- .gitignore | 3 + Cargo.lock | 810 +++++++++----------- crates/openshell-bootstrap/src/lib.rs | 2 +- crates/openshell-bootstrap/src/paths.rs | 12 +- crates/openshell-cli/Cargo.toml | 1 + crates/openshell-cli/src/main.rs | 116 +++ crates/openshell-core/src/paths.rs | 13 + crates/openshell-vm/Cargo.toml | 23 + crates/openshell-vm/build.rs | 35 + crates/openshell-vm/entitlements.plist | 8 + crates/openshell-vm/scripts/api-proxy.py | 132 ++++ crates/openshell-vm/scripts/build-rootfs.sh | 107 +++ crates/openshell-vm/scripts/gateway-init.sh | 124 +++ crates/openshell-vm/scripts/hello-server.py | 49 ++ crates/openshell-vm/src/ffi.rs | 86 +++ crates/openshell-vm/src/lib.rs | 718 +++++++++++++++++ scripts/bin/openshell | 19 +- 17 files changed, 1791 insertions(+), 467 deletions(-) create mode 100644 crates/openshell-vm/Cargo.toml create mode 100644 crates/openshell-vm/build.rs create mode 100644 crates/openshell-vm/entitlements.plist create mode 100644 crates/openshell-vm/scripts/api-proxy.py create mode 100755 crates/openshell-vm/scripts/build-rootfs.sh create mode 100755 crates/openshell-vm/scripts/gateway-init.sh create mode 100644 crates/openshell-vm/scripts/hello-server.py create mode 100644 crates/openshell-vm/src/ffi.rs create mode 100644 crates/openshell-vm/src/lib.rs diff --git a/.gitignore b/.gitignore index 32610f71..145c3069 100644 --- a/.gitignore +++ b/.gitignore @@ -181,6 +181,9 @@ kubeconfig # Documentation build output _build/ +# Gateway microVM rootfs build artifacts +rootfs/ + # Docker build artifacts (image tarballs, packaged helm charts) deploy/docker/.build/ diff --git a/Cargo.lock b/Cargo.lock index 3d01356a..8417effa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,9 +91,9 @@ dependencies = [ [[package]] name = "anstream" -version = "1.0.0" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -112,9 +112,9 @@ checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" -version = "1.0.0" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] @@ -141,9 +141,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.102" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" [[package]] name = "argon2" @@ -186,7 +186,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -197,7 +197,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -287,7 +287,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core 0.5.6", - "base64 0.22.1", "bytes", "form_urlencoded", "futures-util", @@ -306,10 +305,8 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sha1 0.10.6", "sync_wrapper", "tokio", - "tokio-tungstenite 0.28.0", "tower 0.5.3", "tower-layer", "tower-service", @@ -433,9 +430,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.11.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" dependencies = [ "serde_core", ] @@ -542,9 +539,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.20.2" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "byteorder" @@ -584,9 +581,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.56" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "jobserver", @@ -643,9 +640,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.6.0" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" +checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" dependencies = [ "clap_builder", "clap_derive", @@ -653,45 +650,51 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.6.0" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" dependencies = [ "anstream", "anstyle", - "clap_lex", + "clap_lex 0.7.7", "strsim", ] [[package]] name = "clap_complete" -version = "4.6.0" +version = "4.5.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19c9f1dde76b736e3681f28cec9d5a61299cbaae0fce80a68e43724ad56031eb" +checksum = "c757a3b7e39161a4e56f9365141ada2a6c915a8622c408ab6bb4b5d047371031" dependencies = [ "clap", - "clap_lex", + "clap_lex 1.0.0", "is_executable", "shlex", ] [[package]] name = "clap_derive" -version = "4.6.0" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] name = "clap_lex" -version = "1.1.0" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + +[[package]] +name = "clap_lex" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" @@ -983,7 +986,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -1007,7 +1010,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -1018,7 +1021,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -1053,7 +1056,7 @@ checksum = "780eb241654bf097afb00fc5f054a09b687dad862e485fdcf8399bb056565370" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -1080,9 +1083,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.8" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" dependencies = [ "powerfmt", ] @@ -1142,7 +1145,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -1247,7 +1250,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -1267,7 +1270,7 @@ checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -1403,9 +1406,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "futures" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -1418,9 +1421,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -1428,15 +1431,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -1456,38 +1459,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-macro" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] name = "futures-sink" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.32" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -1497,6 +1500,7 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", + "pin-utils", "slab", ] @@ -1683,7 +1687,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -1797,9 +1801,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hybrid-array" -version = "0.4.8" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8655f91cd07f2b9d0c24137bd650fe69617773435ee5ec83022377777ce65ef1" +checksum = "e1b229d73f5803b562cc26e4da0396c8610a4ee209f4fac8fa4f8d709166dc45" dependencies = [ "typenum", ] @@ -1891,7 +1895,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2 0.6.2", "tokio", "tower-service", "tracing", @@ -2137,9 +2141,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.12.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" @@ -2217,9 +2221,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -2341,7 +2345,7 @@ dependencies = [ "proc-macro2", "quote", "serde_json", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -2398,9 +2402,9 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" -version = "0.2.183" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libcrux-intrinsics" @@ -2476,14 +2480,13 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.14" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ "bitflags", "libc", - "plain", - "redox_syscall 0.7.3", + "redox_syscall 0.7.0", ] [[package]] @@ -2505,9 +2508,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "linux-raw-sys" -version = "0.12.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" @@ -2584,9 +2587,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.8.0" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "miette" @@ -2615,7 +2618,7 @@ checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -2674,135 +2677,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" [[package]] -name = "nix" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" -dependencies = [ - "bitflags", - "cfg-if", - "cfg_aliases", - "libc", -] - -[[package]] -name = "nu-ansi-term" -version = "0.50.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", - "rand 0.8.5", -] - -[[package]] -name = "num-bigint-dig" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" -dependencies = [ - "lazy_static", - "libm", - "num-integer", - "num-iter", - "num-traits", - "rand 0.8.5", - "serde", - "smallvec", - "zeroize", -] - -[[package]] -name = "num-conv" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" - -[[package]] -name = "once_cell_polyfill" -version = "1.70.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" - -[[package]] -name = "opaque-debug" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" - -[[package]] -name = "openshell-bootstrap" +name = "navigator-bootstrap" version = "0.1.0" dependencies = [ "base64 0.22.1", @@ -2810,10 +2685,10 @@ dependencies = [ "bytes", "futures", "miette", - "openshell-core", "rcgen", "serde", "serde_json", + "serde_yaml", "tar", "tempfile", "tokio", @@ -2821,7 +2696,7 @@ dependencies = [ ] [[package]] -name = "openshell-cli" +name = "navigator-cli" version = "0.1.0" dependencies = [ "anyhow", @@ -2837,12 +2712,12 @@ dependencies = [ "hyper-util", "indicatif", "miette", - "nix", - "openshell-bootstrap", - "openshell-core", - "openshell-policy", - "openshell-providers", - "openshell-tui", + "navigator-bootstrap", + "navigator-core", + "navigator-policy", + "navigator-providers", + "navigator-tui", + "navigator-vm", "owo-colors", "prost-types", "rcgen", @@ -2851,14 +2726,13 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", + "serde_yaml", "tar", - "temp-env", "tempfile", "thiserror 2.0.18", "tokio", "tokio-rustls", "tokio-stream", - "tokio-tungstenite 0.26.2", "tonic", "tracing", "tracing-subscriber", @@ -2866,7 +2740,7 @@ dependencies = [ ] [[package]] -name = "openshell-core" +name = "navigator-core" version = "0.1.0" dependencies = [ "miette", @@ -2875,7 +2749,6 @@ dependencies = [ "protobuf-src", "serde", "serde_json", - "tempfile", "thiserror 2.0.18", "tonic", "tonic-build", @@ -2883,29 +2756,29 @@ dependencies = [ ] [[package]] -name = "openshell-policy" +name = "navigator-policy" version = "0.1.0" dependencies = [ "miette", - "openshell-core", + "navigator-core", "serde", "serde_yaml", ] [[package]] -name = "openshell-providers" +name = "navigator-providers" version = "0.1.0" dependencies = [ - "openshell-core", + "navigator-core", "thiserror 2.0.18", ] [[package]] -name = "openshell-router" +name = "navigator-router" version = "0.1.0" dependencies = [ "bytes", - "openshell-core", + "navigator-core", "reqwest", "serde", "serde_json", @@ -2919,7 +2792,7 @@ dependencies = [ ] [[package]] -name = "openshell-sandbox" +name = "navigator-sandbox" version = "0.1.0" dependencies = [ "anyhow", @@ -2931,10 +2804,10 @@ dependencies = [ "landlock", "libc", "miette", + "navigator-core", + "navigator-policy", + "navigator-router", "nix", - "openshell-core", - "openshell-policy", - "openshell-router", "rand_core 0.6.4", "rcgen", "regorus", @@ -2945,7 +2818,6 @@ dependencies = [ "serde_json", "serde_yaml", "sha2 0.10.9", - "temp-env", "tempfile", "thiserror 2.0.18", "tokio", @@ -2960,7 +2832,7 @@ dependencies = [ ] [[package]] -name = "openshell-server" +name = "navigator-server" version = "0.1.0" dependencies = [ "anyhow", @@ -2968,7 +2840,6 @@ dependencies = [ "bytes", "clap", "futures", - "futures-util", "hex", "hmac", "http", @@ -2981,16 +2852,14 @@ dependencies = [ "kube", "kube-runtime", "miette", - "openshell-core", - "openshell-policy", - "openshell-router", + "navigator-core", + "navigator-policy", "petname", "pin-project-lite", "prost", "prost-types", "rand 0.9.2", "rcgen", - "reqwest", "russh", "rustls", "rustls-pemfile", @@ -3003,37 +2872,170 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-stream", - "tokio-tungstenite 0.26.2", "tonic", "tower 0.5.3", "tower-http 0.6.8", "tracing", "tracing-subscriber", "uuid", - "wiremock", ] [[package]] -name = "openshell-tui" +name = "navigator-tui" version = "0.1.0" dependencies = [ - "base64 0.22.1", "crossterm 0.28.1", "miette", - "openshell-bootstrap", - "openshell-core", - "openshell-policy", - "openshell-providers", + "navigator-bootstrap", + "navigator-core", + "navigator-policy", + "navigator-providers", "owo-colors", "ratatui", "serde", - "terminal-colorsaurus", "tokio", "tonic", "tracing", "url", ] +[[package]] +name = "navigator-vm" +version = "0.1.0" +dependencies = [ + "libc", + "miette", + "thiserror 2.0.18", +] + +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "serde", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "openssh" version = "0.11.6" @@ -3065,9 +3067,9 @@ dependencies = [ [[package]] name = "owo-colors" -version = "4.3.0" +version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" +checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" [[package]] name = "p256" @@ -3252,7 +3254,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -3291,29 +3293,29 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.11" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.11" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] name = "pin-project-lite" -version = "0.2.17" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -3385,12 +3387,6 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" -[[package]] -name = "plain" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" - [[package]] name = "poly1305" version = "0.8.0" @@ -3451,7 +3447,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -3482,7 +3478,7 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -3520,7 +3516,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.117", + "syn 2.0.114", "tempfile", ] @@ -3534,7 +3530,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -3568,7 +3564,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.6.3", + "socket2 0.6.2", "thiserror 2.0.18", "tokio", "tracing", @@ -3577,9 +3573,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.14" +version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ "bytes", "getrandom 0.3.4", @@ -3605,16 +3601,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2 0.6.2", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.45" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -3740,9 +3736,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.7.3" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" +checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" dependencies = [ "bitflags", ] @@ -3772,9 +3768,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.10" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "regorus" @@ -4020,22 +4016,22 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.4" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.12.1", + "linux-raw-sys 0.11.0", "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.37" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "log", "once_cell", @@ -4096,9 +4092,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.23" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" [[package]] name = "salsa20" @@ -4111,9 +4107,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.29" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ "windows-sys 0.61.2", ] @@ -4139,7 +4135,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -4194,9 +4190,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.7.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ "bitflags", "core-foundation", @@ -4207,9 +4203,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.17.0" +version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" dependencies = [ "core-foundation-sys", "libc", @@ -4258,7 +4254,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -4269,7 +4265,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -4304,7 +4300,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -4498,12 +4494,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -4594,7 +4590,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -4617,7 +4613,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.117", + "syn 2.0.114", "tokio", "url", ] @@ -4761,7 +4757,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d904e7009df136af5297832a3ace3370cd14ff1546a232f4f185036c2736fcac" dependencies = [ "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -4812,7 +4808,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -4855,9 +4851,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.117" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -4881,7 +4877,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -4895,51 +4891,16 @@ dependencies = [ "xattr", ] -[[package]] -name = "temp-env" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" -dependencies = [ - "parking_lot", -] - [[package]] name = "tempfile" -version = "3.27.0" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", - "getrandom 0.4.2", + "getrandom 0.3.4", "once_cell", - "rustix 1.1.4", - "windows-sys 0.61.2", -] - -[[package]] -name = "terminal-colorsaurus" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a46bb5364467da040298c573c8a95dbf9a512efc039630409a03126e3703e90" -dependencies = [ - "cfg-if", - "libc", - "memchr", - "mio 1.1.1", - "terminal-trx", - "windows-sys 0.61.2", - "xterm-color", -] - -[[package]] -name = "terminal-trx" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3f27d9a8a177e57545481faec87acb45c6e854ed1e5a3658ad186c106f38ed" -dependencies = [ - "cfg-if", - "libc", + "rustix 1.1.3", "windows-sys 0.61.2", ] @@ -4949,7 +4910,7 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" dependencies = [ - "rustix 1.1.4", + "rustix 1.1.3", "windows-sys 0.60.2", ] @@ -4989,7 +4950,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -5000,7 +4961,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -5086,14 +5047,14 @@ checksum = "2d2e76690929402faae40aebdda620a2c0e25dd6d3b9afe48867dfd95991f4bd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] name = "tokio" -version = "1.50.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ "bytes", "libc", @@ -5101,20 +5062,20 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.3", + "socket2 0.6.2", "tokio-macros", "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.6.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -5138,34 +5099,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-tungstenite" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9daff607c6d2bf6c16fd681ccb7eecc83e4e2cdc1ca067ffaadfca5de7f084" -dependencies = [ - "futures-util", - "log", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "tokio", - "tokio-rustls", - "tungstenite 0.26.2", -] - -[[package]] -name = "tokio-tungstenite" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857" -dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite 0.28.0", -] - [[package]] name = "tokio-util" version = "0.7.18" @@ -5201,7 +5134,6 @@ dependencies = [ "percent-encoding", "pin-project", "prost", - "rustls-native-certs", "rustls-pemfile", "socket2 0.5.10", "tokio", @@ -5224,7 +5156,7 @@ dependencies = [ "prost-build", "prost-types", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -5345,7 +5277,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -5406,42 +5338,6 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "tungstenite" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4793cb5e56680ecbb1d843515b23b6de9a75eb04b66643e256a396d43be33c13" -dependencies = [ - "bytes", - "data-encoding", - "http", - "httparse", - "log", - "rand 0.9.2", - "rustls", - "rustls-pki-types", - "sha1 0.10.6", - "thiserror 2.0.18", - "utf-8", -] - -[[package]] -name = "tungstenite" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" -dependencies = [ - "bytes", - "data-encoding", - "http", - "httparse", - "log", - "rand 0.9.2", - "sha1 0.10.6", - "thiserror 2.0.18", - "utf-8", -] - [[package]] name = "typenum" version = "1.19.0" @@ -5462,9 +5358,9 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.24" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-linebreak" @@ -5562,12 +5458,6 @@ dependencies = [ "serde", ] -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -5582,9 +5472,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.22.0" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -5650,9 +5540,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -5663,9 +5553,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.64" +version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ "cfg-if", "futures-util", @@ -5677,9 +5567,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5687,22 +5577,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] @@ -5743,9 +5633,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.91" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -5864,7 +5754,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -5875,7 +5765,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -6205,7 +6095,7 @@ dependencies = [ "heck", "indexmap 2.13.0", "prettyplease", - "syn 2.0.117", + "syn 2.0.114", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -6221,7 +6111,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -6276,15 +6166,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix 1.1.4", + "rustix 1.1.3", ] -[[package]] -name = "xterm-color" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7008a9d8ba97a7e47d9b2df63fcdb8dade303010c5a7cd5bf2469d4da6eba673" - [[package]] name = "yasna" version = "0.5.2" @@ -6313,28 +6197,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.42" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.42" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -6354,7 +6238,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", "synstructure", ] @@ -6375,7 +6259,7 @@ checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] @@ -6408,11 +6292,11 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.117", + "syn 2.0.114", ] [[package]] name = "zmij" -version = "1.0.21" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" +checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index 8bcb60fd..40acf737 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -10,7 +10,7 @@ mod constants; mod docker; mod metadata; mod mtls; -mod paths; +pub mod paths; mod pki; pub(crate) mod push; mod runtime; diff --git a/crates/openshell-bootstrap/src/paths.rs b/crates/openshell-bootstrap/src/paths.rs index cd3cb769..ff31b021 100644 --- a/crates/openshell-bootstrap/src/paths.rs +++ b/crates/openshell-bootstrap/src/paths.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use miette::Result; -use openshell_core::paths::xdg_config_dir; +use openshell_core::paths::{xdg_config_dir, xdg_data_dir}; use std::path::PathBuf; /// Path to the file that stores the active gateway name. @@ -26,6 +26,16 @@ pub fn last_sandbox_path(gateway: &str) -> Result { Ok(gateways_dir()?.join(gateway).join("last_sandbox")) } +/// Default rootfs directory for gateway microVMs. +/// +/// Location: `$XDG_DATA_HOME/openshell/gateway/rootfs` +pub fn default_rootfs_dir() -> Result { + Ok(xdg_data_dir()? + .join("openshell") + .join("gateway") + .join("rootfs")) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index 61c20450..69db0281 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -20,6 +20,7 @@ openshell-core = { path = "../openshell-core" } openshell-policy = { path = "../openshell-policy" } openshell-providers = { path = "../openshell-providers" } openshell-tui = { path = "../openshell-tui" } +openshell-vm = { path = "../openshell-vm" } serde = { workspace = true } serde_json = { workspace = true } prost-types = { workspace = true } diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 84a323b5..5dfe86a5 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -9,6 +9,7 @@ use clap_complete::env::CompleteEnv; use miette::Result; use owo_colors::OwoColorize; use std::io::Write; +use std::path::PathBuf; use openshell_bootstrap::{ edge_token::load_edge_token, get_gateway_metadata, list_gateways, load_active_gateway, @@ -457,6 +458,55 @@ enum Commands { theme: openshell_tui::ThemeMode, }, + /// Boot a libkrun microVM. + /// + /// By default, starts a k3s Kubernetes cluster inside the VM with the + /// API server on port 6443. Use `--exec` to run a custom process instead. + #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] + Vm { + /// Path to the rootfs directory (aarch64 Linux). + /// Defaults to `~/.local/share/openshell/gateway/rootfs`. + #[arg(long, value_hint = ValueHint::DirPath)] + rootfs: Option, + + /// Executable path inside the VM. When set, runs this instead of + /// the default k3s server. + #[arg(long)] + exec: Option, + + /// Arguments to the executable (requires `--exec`). + #[arg(long, num_args = 1..)] + args: Vec, + + /// Environment variables in `KEY=VALUE` form (requires `--exec`). + #[arg(long, num_args = 1..)] + env: Vec, + + /// Working directory inside the VM. + #[arg(long, default_value = "/")] + workdir: String, + + /// Port mappings (`host_port:guest_port`). + #[arg(long, short, num_args = 1..)] + port: Vec, + + /// Number of virtual CPUs. + #[arg(long, default_value_t = 2)] + vcpus: u8, + + /// RAM in MiB. + #[arg(long, default_value_t = 2048)] + mem: u32, + + /// libkrun log level (0=Off .. 5=Trace). + #[arg(long, default_value_t = 1)] + krun_log_level: u32, + + /// Networking backend: "gvproxy" (default), "tsi", or "none". + #[arg(long, default_value = "gvproxy")] + net: String, + }, + /// Generate shell completions. #[command(after_long_help = COMPLETIONS_HELP, help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] Completions { @@ -2145,6 +2195,72 @@ async fn main() -> Result<()> { let channel = openshell_cli::tls::build_channel(&ctx.endpoint, &tls).await?; openshell_tui::run(channel, &ctx.name, &ctx.endpoint, theme).await?; } + Some(Commands::Vm { + rootfs, + exec, + args, + env, + workdir, + port, + vcpus, + mem, + krun_log_level, + net, + }) => { + let net_backend = match net.as_str() { + "tsi" => openshell_vm::NetBackend::Tsi, + "none" => openshell_vm::NetBackend::None, + "gvproxy" => openshell_vm::NetBackend::Gvproxy { + binary: PathBuf::from( + [ + "/opt/podman/bin/gvproxy", + "/opt/homebrew/bin/gvproxy", + "/usr/local/bin/gvproxy", + ] + .iter() + .find(|p| std::path::Path::new(p).exists()) + .unwrap_or(&"/opt/podman/bin/gvproxy"), + ), + }, + other => { + return Err(miette::miette!( + "unknown --net backend: {other} (expected: gvproxy, tsi, none)" + )); + } + }; + + let rootfs = + rootfs.map_or_else(openshell_bootstrap::paths::default_rootfs_dir, Ok)?; + let mut config = if let Some(exec_path) = exec { + openshell_vm::VmConfig { + rootfs, + vcpus, + mem_mib: mem, + exec_path, + args, + env, + workdir, + port_map: port, + log_level: krun_log_level, + console_output: None, + net: net_backend.clone(), + } + } else { + let mut c = openshell_vm::VmConfig::gateway(rootfs); + if !port.is_empty() { + c.port_map = port; + } + c.vcpus = vcpus; + c.mem_mib = mem; + c.net = net_backend; + c + }; + config.log_level = krun_log_level; + let code = openshell_vm::launch(&config).map_err(|e| miette::miette!("{e}"))?; + if code != 0 { + std::process::exit(code); + } + } Some(Commands::Completions { shell }) => { let exe = std::env::current_exe() .map_err(|e| miette::miette!("failed to find current executable: {e}"))?; diff --git a/crates/openshell-core/src/paths.rs b/crates/openshell-core/src/paths.rs index bd9ce23d..fd0a141b 100644 --- a/crates/openshell-core/src/paths.rs +++ b/crates/openshell-core/src/paths.rs @@ -29,6 +29,19 @@ pub fn openshell_config_dir() -> Result { Ok(xdg_config_dir()?.join("openshell")) } +/// Resolve the XDG data base directory. +/// +/// Returns `$XDG_DATA_HOME` if set, otherwise `$HOME/.local/share`. +pub fn xdg_data_dir() -> Result { + if let Ok(path) = std::env::var("XDG_DATA_HOME") { + return Ok(PathBuf::from(path)); + } + let home = std::env::var("HOME") + .into_diagnostic() + .wrap_err("HOME is not set")?; + Ok(PathBuf::from(home).join(".local").join("share")) +} + /// Create a directory (and parents) with owner-only permissions (`0o700`) on /// Unix. On non-Unix platforms, falls back to default permissions. /// diff --git a/crates/openshell-vm/Cargo.toml b/crates/openshell-vm/Cargo.toml new file mode 100644 index 00000000..4b9e85f5 --- /dev/null +++ b/crates/openshell-vm/Cargo.toml @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "openshell-vm" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true +description = "MicroVM runtime using libkrun for hardware-isolated execution" + +[lib] +name = "openshell_vm" +path = "src/lib.rs" + +[dependencies] +libc = "0.2" +miette = { workspace = true } +thiserror = { workspace = true } + +[lints] +workspace = true diff --git a/crates/openshell-vm/build.rs b/crates/openshell-vm/build.rs new file mode 100644 index 00000000..7f789395 --- /dev/null +++ b/crates/openshell-vm/build.rs @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Build script for openshell-vm. +//! +//! Discovers the Homebrew library path for libkrun and emits the appropriate +//! cargo link-search directives. On macOS ARM64, libkrun is typically installed +//! via `brew tap slp/krun && brew install libkrun`. + +fn main() { + // Discover Homebrew prefix (handles both /opt/homebrew and /usr/local) + let homebrew_prefix = std::process::Command::new("brew") + .args(["--prefix"]) + .output() + .ok() + .and_then(|o| { + if o.status.success() { + String::from_utf8(o.stdout) + .ok() + .map(|s| s.trim().to_string()) + } else { + None + } + }) + .unwrap_or_else(|| "/opt/homebrew".to_string()); + + let lib_dir = format!("{homebrew_prefix}/lib"); + + println!("cargo:rustc-link-search=native={lib_dir}"); + println!("cargo:rustc-link-lib=dylib=krun"); + + // Re-run if the library changes + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-env-changed=LIBRARY_PATH"); +} diff --git a/crates/openshell-vm/entitlements.plist b/crates/openshell-vm/entitlements.plist new file mode 100644 index 00000000..154f3308 --- /dev/null +++ b/crates/openshell-vm/entitlements.plist @@ -0,0 +1,8 @@ + + + + + com.apple.security.hypervisor + + + diff --git a/crates/openshell-vm/scripts/api-proxy.py b/crates/openshell-vm/scripts/api-proxy.py new file mode 100644 index 00000000..6da224f1 --- /dev/null +++ b/crates/openshell-vm/scripts/api-proxy.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +TCP proxy that waits for the k3s apiserver to be ready on 127.0.0.1:6444, +then accepts connections on 0.0.0.0:6443 and forwards them to the apiserver. + +This decouples the TSI-exposed port from k3s's internal dynamiclistener, +which has TLS handshake issues when accessed through TSI. +""" + +import os +import socket +import sys +import threading +import time + +LISTEN_HOST = "0.0.0.0" +LISTEN_PORT = int(os.environ.get("PROXY_LISTEN_PORT", "6443")) +UPSTREAM_HOST = "127.0.0.1" +UPSTREAM_PORT = int(os.environ.get("PROXY_UPSTREAM_PORT", "6444")) +BUFFER_SIZE = 65536 + + +def wait_for_upstream(): + """Block until the upstream apiserver completes a TLS handshake. + + A raw TCP connect succeeds as soon as the port is bound, but the TLS + server may not be ready yet. We do a full TLS handshake to confirm. + """ + import ssl + + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + attempt = 0 + while True: + attempt += 1 + try: + sock = socket.create_connection((UPSTREAM_HOST, UPSTREAM_PORT), timeout=5) + ssock = ctx.wrap_socket(sock, server_hostname="localhost") + ssock.close() + print(f"[proxy] upstream TLS ready after {attempt} attempts", flush=True) + return + except ( + ConnectionRefusedError, + ConnectionResetError, + OSError, + ssl.SSLError, + ) as e: + if attempt % 5 == 0: + print( + f"[proxy] waiting for upstream (attempt {attempt}): {e}", flush=True + ) + time.sleep(1) + + +def forward(src, dst, label): + """Forward data between two sockets until one closes.""" + try: + while True: + data = src.recv(BUFFER_SIZE) + if not data: + break + dst.sendall(data) + except (BrokenPipeError, ConnectionResetError, OSError): + pass + finally: + try: + dst.shutdown(socket.SHUT_WR) + except OSError: + pass + + +def handle_client(client_sock, client_addr): + """Connect to upstream and forward bidirectionally.""" + print(f"[proxy] accepted connection from {client_addr}", flush=True) + try: + upstream = socket.create_connection((UPSTREAM_HOST, UPSTREAM_PORT), timeout=5) + print(f"[proxy] connected to upstream for {client_addr}", flush=True) + except OSError as e: + print( + f"[proxy] failed to connect to upstream for {client_addr}: {e}", flush=True + ) + client_sock.close() + return + + # Forward in both directions + t1 = threading.Thread( + target=forward, args=(client_sock, upstream, "client->upstream"), daemon=True + ) + t2 = threading.Thread( + target=forward, args=(upstream, client_sock, "upstream->client"), daemon=True + ) + t1.start() + t2.start() + t1.join() + t2.join() + print(f"[proxy] connection closed for {client_addr}", flush=True) + client_sock.close() + upstream.close() + + +def main(): + # Wait for the real apiserver to be ready before accepting connections + print( + f"[proxy] waiting for upstream at {UPSTREAM_HOST}:{UPSTREAM_PORT}...", + flush=True, + ) + wait_for_upstream() + + # Start listening + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind((LISTEN_HOST, LISTEN_PORT)) + server.listen(64) + print( + f"[proxy] listening on {LISTEN_HOST}:{LISTEN_PORT} -> {UPSTREAM_HOST}:{UPSTREAM_PORT}", + flush=True, + ) + + while True: + client_sock, client_addr = server.accept() + threading.Thread( + target=handle_client, args=(client_sock, client_addr), daemon=True + ).start() + + +if __name__ == "__main__": + main() diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh new file mode 100755 index 00000000..9f01a5b7 --- /dev/null +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build an aarch64 Ubuntu rootfs for the gateway microVM. +# +# Produces a rootfs with k3s pre-installed, plus the gateway-init.sh script +# that runs as PID 1 inside the libkrun VM. +# +# Usage: +# ./crates/navigator-vm/scripts/build-rootfs.sh [output_dir] +# +# Requires: Docker (or compatible container runtime), curl + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEFAULT_ROOTFS="${XDG_DATA_HOME:-${HOME}/.local/share}/nemoclaw/gateway/rootfs" +ROOTFS_DIR="${1:-${DEFAULT_ROOTFS}}" +CONTAINER_NAME="krun-rootfs-builder" +IMAGE_TAG="krun-rootfs:gateway" +# K3S_VERSION uses the semver "+" form for GitHub releases. +# The mise env may provide the Docker-tag form with "-" instead of "+"; +# normalise to "+" so the GitHub download URL works. +K3S_VERSION="${K3S_VERSION:-v1.29.8+k3s1}" +K3S_VERSION="${K3S_VERSION//-k3s/+k3s}" + +echo "==> Building gateway rootfs" +echo " k3s version: ${K3S_VERSION}" +echo " Output: ${ROOTFS_DIR}" + +# ── Download k3s binary (outside Docker — much faster) ───────────────── + +K3S_BIN="/tmp/k3s-arm64-${K3S_VERSION}" +if [ -f "${K3S_BIN}" ]; then + echo "==> Using cached k3s binary: ${K3S_BIN}" +else + echo "==> Downloading k3s ${K3S_VERSION} for arm64..." + curl -fSL "https://github.com/k3s-io/k3s/releases/download/${K3S_VERSION}/k3s-arm64" \ + -o "${K3S_BIN}" + chmod +x "${K3S_BIN}" +fi + +# ── Build base image with dependencies ───────────────────────────────── + +# Clean up any previous run +docker rm -f "${CONTAINER_NAME}" 2>/dev/null || true + +echo "==> Building base image..." +docker build --platform linux/arm64 -t "${IMAGE_TAG}" -f - . <<'DOCKERFILE' +FROM ubuntu:22.04 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + iptables \ + iproute2 \ + python3 \ + busybox-static \ + && rm -rf /var/lib/apt/lists/* +# busybox-static provides udhcpc for DHCP inside the VM. +RUN mkdir -p /usr/share/udhcpc && \ + ln -sf /bin/busybox /sbin/udhcpc +RUN mkdir -p /var/lib/rancher/k3s /etc/rancher/k3s +DOCKERFILE + +# Create a container and export the filesystem +echo "==> Creating container..." +docker create --platform linux/arm64 --name "${CONTAINER_NAME}" "${IMAGE_TAG}" /bin/true + +echo "==> Exporting filesystem..." +rm -rf "${ROOTFS_DIR}" +mkdir -p "${ROOTFS_DIR}" +docker export "${CONTAINER_NAME}" | tar -C "${ROOTFS_DIR}" -xf - + +docker rm "${CONTAINER_NAME}" + +# ── Inject k3s binary ──────────────────────────────────────────────── + +echo "==> Injecting k3s binary..." +cp "${K3S_BIN}" "${ROOTFS_DIR}/usr/local/bin/k3s" +chmod +x "${ROOTFS_DIR}/usr/local/bin/k3s" +ln -sf /usr/local/bin/k3s "${ROOTFS_DIR}/usr/local/bin/kubectl" + +# ── Inject scripts ──────────────────────────────────────────────────── + +echo "==> Injecting gateway-init.sh..." +mkdir -p "${ROOTFS_DIR}/srv" +cp "${SCRIPT_DIR}/gateway-init.sh" "${ROOTFS_DIR}/srv/gateway-init.sh" +chmod +x "${ROOTFS_DIR}/srv/gateway-init.sh" + +# Keep the hello server around for debugging +cp "${SCRIPT_DIR}/hello-server.py" "${ROOTFS_DIR}/srv/hello-server.py" +chmod +x "${ROOTFS_DIR}/srv/hello-server.py" + +# ── Verify ──────────────────────────────────────────────────────────── + +if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then + echo "ERROR: k3s binary not found in rootfs. Something went wrong." + exit 1 +fi + +echo "" +echo "==> Rootfs ready at: ${ROOTFS_DIR}" +echo " Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)" +echo "" +echo "Next steps:" +echo " 1. Run: ncl gateway" diff --git a/crates/openshell-vm/scripts/gateway-init.sh b/crates/openshell-vm/scripts/gateway-init.sh new file mode 100755 index 00000000..f59b2906 --- /dev/null +++ b/crates/openshell-vm/scripts/gateway-init.sh @@ -0,0 +1,124 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Init script for the gateway microVM. Runs as PID 1 inside the libkrun VM. +# +# Mounts essential virtual filesystems, then execs k3s server. + +set -e + +# ── Mount essential filesystems ───────────────────────────────────────── + +mount -t proc proc /proc 2>/dev/null || true +mount -t sysfs sysfs /sys 2>/dev/null || true +mount -t tmpfs tmpfs /tmp 2>/dev/null || true +mount -t tmpfs tmpfs /run 2>/dev/null || true + +# devtmpfs is usually auto-mounted by the kernel, but ensure it's there. +mount -t devtmpfs devtmpfs /dev 2>/dev/null || true +mkdir -p /dev/pts /dev/shm +mount -t devpts devpts /dev/pts 2>/dev/null || true +mount -t tmpfs tmpfs /dev/shm 2>/dev/null || true + +# cgroup2 (unified hierarchy) — required by k3s/containerd. +mkdir -p /sys/fs/cgroup +mount -t cgroup2 cgroup2 /sys/fs/cgroup 2>/dev/null || true + +# ── Networking ────────────────────────────────────────────────────────── + +hostname gateway 2>/dev/null || true + +# Ensure loopback is up (k3s binds to 127.0.0.1). +ip link set lo up 2>/dev/null || true + +# Detect whether we have a real network interface (gvproxy) or need a +# dummy interface (TSI / no networking). +if ip link show eth0 >/dev/null 2>&1; then + # gvproxy networking — bring up eth0 and get an IP via DHCP. + # gvproxy has a built-in DHCP server that assigns 192.168.127.2/24 + # with gateway 192.168.127.1 and configures ARP properly. + echo "[gateway-init] detected eth0 (gvproxy networking)" + ip link set eth0 up 2>/dev/null || true + + # Use DHCP to get IP and configure routes. gvproxy's DHCP server + # handles ARP resolution which static config does not. + if command -v udhcpc >/dev/null 2>&1; then + echo "[gateway-init] running DHCP (udhcpc)..." + # udhcpc needs a script to apply the lease. Use the busybox + # default script if available, otherwise write a minimal one. + UDHCPC_SCRIPT="/usr/share/udhcpc/default.script" + if [ ! -f "$UDHCPC_SCRIPT" ]; then + mkdir -p /usr/share/udhcpc + cat > "$UDHCPC_SCRIPT" << 'DHCP_SCRIPT' +#!/bin/sh +case "$1" in + bound|renew) + ip addr flush dev "$interface" + ip addr add "$ip/$mask" dev "$interface" + if [ -n "$router" ]; then + ip route add default via $router dev "$interface" + fi + if [ -n "$dns" ]; then + echo -n > /etc/resolv.conf + for d in $dns; do + echo "nameserver $d" >> /etc/resolv.conf + done + fi + ;; +esac +DHCP_SCRIPT + chmod +x "$UDHCPC_SCRIPT" + fi + # -f: stay in foreground, -q: quit after obtaining lease, + # -n: exit if no lease, -T 2: 2s between retries, -t 5: 5 retries + udhcpc -i eth0 -f -q -n -T 2 -t 5 -s "$UDHCPC_SCRIPT" 2>&1 || true + else + # Fallback to static config if no DHCP client available. + echo "[gateway-init] no DHCP client, using static config" + ip addr add 192.168.127.2/24 dev eth0 2>/dev/null || true + ip route add default via 192.168.127.1 2>/dev/null || true + fi + + # Read back the IP we got (from DHCP or static). + NODE_IP=$(ip -4 addr show eth0 | grep -oP 'inet \K[^/]+' || echo "192.168.127.2") + echo "[gateway-init] eth0 IP: $NODE_IP" +else + # TSI or no networking — create a dummy interface for k3s. + echo "[gateway-init] no eth0 found, using dummy interface (TSI mode)" + ip link add dummy0 type dummy 2>/dev/null || true + ip addr add 10.0.2.15/24 dev dummy0 2>/dev/null || true + ip link set dummy0 up 2>/dev/null || true + ip route add default dev dummy0 2>/dev/null || true + + NODE_IP="10.0.2.15" +fi + +echo "[gateway-init] node IP: $NODE_IP" + +# ── k3s data directories ─────────────────────────────────────────────── + +mkdir -p /var/lib/rancher/k3s +mkdir -p /etc/rancher/k3s + +# Clean stale runtime artifacts from previous boots (virtio-fs persists +# the rootfs between VM restarts). +echo "[gateway-init] cleaning stale runtime artifacts..." +rm -rf /var/lib/rancher/k3s/server/tls/temporary-certs 2>/dev/null || true +rm -f /var/lib/rancher/k3s/server/kine.sock 2>/dev/null || true +# Also clean any stale pid files and unix sockets +find /var/lib/rancher/k3s -name '*.sock' -delete 2>/dev/null || true +find /run -name '*.sock' -delete 2>/dev/null || true + +# ── Start k3s ────────────────────────────────────────────────────────── + +echo "[gateway-init] starting k3s server..." +exec /usr/local/bin/k3s server \ + --disable=traefik \ + --write-kubeconfig-mode=644 \ + --node-ip="$NODE_IP" \ + --flannel-backend=none \ + --disable-network-policy \ + --disable-kube-proxy \ + --kube-apiserver-arg=bind-address=0.0.0.0 \ + --tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2 diff --git a/crates/openshell-vm/scripts/hello-server.py b/crates/openshell-vm/scripts/hello-server.py new file mode 100644 index 00000000..f02d7d72 --- /dev/null +++ b/crates/openshell-vm/scripts/hello-server.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Minimal HTTP server that responds with 'Hello from libkrun VM!' on port 8080.""" + +import json +import os +import platform +from http.server import HTTPServer, BaseHTTPRequestHandler + + +class HelloHandler(BaseHTTPRequestHandler): + def do_GET(self): + body = json.dumps( + { + "message": "Hello from libkrun VM!", + "hostname": platform.node(), + "platform": platform.platform(), + "arch": platform.machine(), + "pid": os.getpid(), + "path": self.path, + }, + indent=2, + ) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body.encode()) + + def log_message(self, format, *args): + print(f"[hello-server] {args[0]}") + + +def main(): + host = "0.0.0.0" + port = 8080 + server = HTTPServer((host, port), HelloHandler) + print(f"Hello server listening on {host}:{port}") + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nShutting down.") + server.server_close() + + +if __name__ == "__main__": + main() diff --git a/crates/openshell-vm/src/ffi.rs b/crates/openshell-vm/src/ffi.rs new file mode 100644 index 00000000..b9bb59d4 --- /dev/null +++ b/crates/openshell-vm/src/ffi.rs @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Minimal FFI bindings for the libkrun C API. +//! +//! libkrun is a `cdylib` — it cannot be consumed as a Rust dependency. We link +//! against the Homebrew-installed system library and declare `extern "C"` for +//! the subset of functions we need. +//! +//! See: + +use libc::c_char; + +#[link(name = "krun")] +#[allow(dead_code)] +unsafe extern "C" { + /// Sets the log level for the library (0=Off .. 5=Trace). + pub fn krun_set_log_level(level: u32) -> i32; + + /// Creates a configuration context. Returns context ID (>= 0) or negative error. + pub fn krun_create_ctx() -> i32; + + /// Frees a configuration context. + pub fn krun_free_ctx(ctx_id: u32) -> i32; + + /// Sets vCPUs and RAM (MiB) for the microVM. + pub fn krun_set_vm_config(ctx_id: u32, num_vcpus: u8, ram_mib: u32) -> i32; + + /// Sets the root filesystem path (virtio-fs backed directory). + pub fn krun_set_root(ctx_id: u32, root_path: *const c_char) -> i32; + + /// Sets the working directory inside the VM. + pub fn krun_set_workdir(ctx_id: u32, workdir_path: *const c_char) -> i32; + + /// Sets the executable path, argv, and envp for the process inside the VM. + /// + /// **Important:** If `envp` is NULL, libkrun serializes the entire host + /// environment into the kernel command line, which can overflow its 4096-byte + /// limit. Always pass an explicit minimal env. + pub fn krun_set_exec( + ctx_id: u32, + exec_path: *const c_char, + argv: *const *const c_char, + envp: *const *const c_char, + ) -> i32; + + /// Configures host-to-guest TCP port mapping. + /// + /// Format: null-terminated array of `"host_port:guest_port"` C strings. + /// Passing NULL auto-exposes all listening guest ports. + pub fn krun_set_port_map(ctx_id: u32, port_map: *const *const c_char) -> i32; + + /// Redirects console output to a file (ignores stdin). + pub fn krun_set_console_output(ctx_id: u32, filepath: *const c_char) -> i32; + + /// Starts and enters the microVM. **Never returns** on success — calls + /// `exit()` with the workload's exit code. Only returns on config error. + pub fn krun_start_enter(ctx_id: u32) -> i32; + + /// Disables the implicit vsock device. Must be called before + /// `krun_add_vsock` to manually configure TSI features. + pub fn krun_disable_implicit_vsock(ctx_id: u32) -> i32; + + /// Adds a vsock device with specified TSI features. + /// + /// `tsi_features` is a bitmask: + /// - `KRUN_TSI_HIJACK_INET` (1 << 0): intercept AF_INET sockets + /// - `KRUN_TSI_HIJACK_UNIX` (1 << 1): intercept AF_UNIX sockets + /// - 0: vsock without any TSI hijacking + pub fn krun_add_vsock(ctx_id: u32, tsi_features: u32) -> i32; + + /// Adds a virtio-net device connected to a unixgram-based backend + /// (e.g., gvproxy in vfkit mode). + /// + /// `c_path` and `fd` are mutually exclusive: set one to NULL/-1. + /// `c_mac` is 6 bytes. `features` is virtio-net feature bitmask. + /// `flags` may include `NET_FLAG_VFKIT` (1 << 0) for gvproxy vfkit mode. + pub fn krun_add_net_unixgram( + ctx_id: u32, + c_path: *const c_char, + fd: i32, + c_mac: *const u8, + features: u32, + flags: u32, + ) -> i32; +} diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs new file mode 100644 index 00000000..49617e5f --- /dev/null +++ b/crates/openshell-vm/src/lib.rs @@ -0,0 +1,718 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! `MicroVM` runtime using libkrun for hardware-isolated execution. +//! +//! This crate provides a thin wrapper around the libkrun C API to boot +//! lightweight VMs backed by virtio-fs root filesystems. On macOS ARM64, +//! it uses Apple's Hypervisor.framework; on Linux it uses KVM. +//! +//! # Codesigning (macOS) +//! +//! The calling binary must be codesigned with the +//! `com.apple.security.hypervisor` entitlement. See `entitlements.plist`. + +#![allow(unsafe_code)] + +mod ffi; + +use std::ffi::CString; +use std::os::unix::process::CommandExt as _; +use std::path::{Path, PathBuf}; +use std::ptr; + +// ── Error type ───────────────────────────────────────────────────────── + +/// Errors that can occur when configuring or launching a microVM. +#[derive(Debug, thiserror::Error, miette::Diagnostic)] +pub enum VmError { + /// A libkrun FFI call returned a negative error code. + #[error("{func} failed with error code {code}")] + Krun { func: &'static str, code: i32 }, + + /// The rootfs directory does not exist. + #[error( + "rootfs directory not found: {path}\nRun: ./crates/openshell-vm/scripts/build-rootfs.sh" + )] + RootfsNotFound { path: String }, + + /// A path contained invalid UTF-8. + #[error("path is not valid UTF-8: {0}")] + InvalidPath(String), + + /// `CString::new` failed (embedded NUL byte). + #[error("invalid C string: {0}")] + CString(#[from] std::ffi::NulError), + + /// A required host binary was not found. + #[error("required binary not found: {path}\n{hint}")] + BinaryNotFound { path: String, hint: String }, + + /// `fork()` failed. + #[error("fork() failed: {0}")] + Fork(String), +} + +/// Check a libkrun return code; negative values are errors. +fn check(ret: i32, func: &'static str) -> Result<(), VmError> { + if ret < 0 { + Err(VmError::Krun { func, code: ret }) + } else { + Ok(()) + } +} + +// ── Configuration ────────────────────────────────────────────────────── + +/// Networking backend for the microVM. +#[derive(Debug, Clone)] +pub enum NetBackend { + /// TSI (Transparent Socket Impersonation) — default libkrun networking. + /// Simple but intercepts guest loopback connections, breaking k3s. + Tsi, + + /// No networking — disable vsock/TSI entirely. For debugging only. + None, + + /// gvproxy (vfkit mode) — real `eth0` interface via virtio-net. + /// Requires gvproxy binary on the host. Port forwarding is done + /// through gvproxy's HTTP API. + Gvproxy { + /// Path to the gvproxy binary. + binary: PathBuf, + }, +} + +/// Configuration for a libkrun microVM. +pub struct VmConfig { + /// Path to the extracted rootfs directory (aarch64 Linux). + pub rootfs: PathBuf, + + /// Number of virtual CPUs. + pub vcpus: u8, + + /// RAM in MiB. + pub mem_mib: u32, + + /// Executable path inside the VM. + pub exec_path: String, + + /// Arguments to the executable (argv, excluding argv\[0\]). + pub args: Vec, + + /// Environment variables in `KEY=VALUE` form. + /// If empty, a minimal default set is used. + pub env: Vec, + + /// Working directory inside the VM. + pub workdir: String, + + /// TCP port mappings in `"host_port:guest_port"` form. + /// Only used with TSI networking. + pub port_map: Vec, + + /// libkrun log level (0=Off .. 5=Trace). + pub log_level: u32, + + /// Optional file path for VM console output. If `None`, console output + /// goes to the parent directory of the rootfs as `console.log`. + pub console_output: Option, + + /// Networking backend. + pub net: NetBackend, +} + +impl VmConfig { + /// Default gateway configuration: boots k3s server inside the VM. + /// + /// Runs `/srv/gateway-init.sh` which mounts essential filesystems and + /// execs `k3s server`. Exposes the Kubernetes API on port 6443. + pub fn gateway(rootfs: PathBuf) -> Self { + Self { + rootfs, + vcpus: 2, + mem_mib: 2048, + exec_path: "/srv/gateway-init.sh".to_string(), + args: vec![], + env: vec![ + "HOME=/root".to_string(), + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(), + "TERM=xterm".to_string(), + ], + workdir: "/".to_string(), + // Map host 6443 -> guest 6444 (real kube-apiserver). + // The k3s dynamiclistener on 6443 has TLS issues through + // port forwarding, so we go directly to the apiserver. + port_map: vec!["6443:6444".to_string()], + log_level: 3, // Info — for debugging + console_output: None, + net: NetBackend::Gvproxy { + binary: find_gvproxy().unwrap_or_else(|| PathBuf::from("/opt/podman/bin/gvproxy")), + }, + } + } +} + +// ── Helpers ───────────────────────────────────────────────────────────── + +/// Build a null-terminated C string array from a slice of strings. +/// +/// Returns both the `CString` owners (to keep them alive) and the pointer array. +fn c_string_array(strings: &[&str]) -> Result<(Vec, Vec<*const libc::c_char>), VmError> { + let owned: Vec = strings + .iter() + .map(|s| CString::new(*s)) + .collect::, _>>()?; + let mut ptrs: Vec<*const libc::c_char> = owned.iter().map(|c| c.as_ptr()).collect(); + ptrs.push(ptr::null()); // null terminator + Ok((owned, ptrs)) +} + +/// Discover the Homebrew lib directory. +fn homebrew_lib_dir() -> String { + std::process::Command::new("brew") + .args(["--prefix"]) + .output() + .ok() + .and_then(|o| { + if o.status.success() { + String::from_utf8(o.stdout) + .ok() + .map(|s| format!("{}/lib", s.trim())) + } else { + None + } + }) + .unwrap_or_else(|| "/opt/homebrew/lib".to_string()) +} + +/// Ensure `DYLD_FALLBACK_LIBRARY_PATH` includes the Homebrew lib directory. +/// +/// libkrun loads `libkrunfw.5.dylib` at runtime via `dlopen`. On macOS, dyld +/// only reads `DYLD_FALLBACK_LIBRARY_PATH` at process startup — setting it +/// programmatically after launch has no effect. If the variable isn't already +/// set, we re-exec the current process with it configured so dyld picks it up. +/// +/// Returns `Ok(())` if the path is already set, or does not return (re-execs). +fn ensure_krunfw_path() -> Result<(), VmError> { + let key = "DYLD_FALLBACK_LIBRARY_PATH"; + let homebrew_lib = homebrew_lib_dir(); + + if let Ok(existing) = std::env::var(key) + && existing.contains(&homebrew_lib) + { + return Ok(()); // Already set — nothing to do. + } + + // Re-exec ourselves with the library path set. dyld will process it + // at startup, making libkrunfw discoverable for libkrun's dlopen. + let exe = std::env::current_exe().map_err(|e| VmError::Fork(e.to_string()))?; + let args: Vec = std::env::args().collect(); + + let new_val = match std::env::var(key) { + Ok(existing) => format!("{homebrew_lib}:{existing}"), + Err(_) => homebrew_lib, + }; + + eprintln!("re-exec: setting {key} for libkrunfw discovery"); + // SAFETY: single-threaded at this point (before fork). + unsafe { + std::env::set_var(key, &new_val); + } + + // exec replaces the process — if it returns, something went wrong. + let err = std::process::Command::new(exe).args(&args[1..]).exec(); + Err(VmError::Fork(format!("re-exec failed: {err}"))) +} + +/// Try to find gvproxy in common locations. +fn find_gvproxy() -> Option { + // Check PATH first + if let Ok(output) = std::process::Command::new("which").arg("gvproxy").output() { + if output.status.success() { + let path = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if !path.is_empty() { + return Some(PathBuf::from(path)); + } + } + } + // Common Podman installation paths + for p in &[ + "/opt/podman/bin/gvproxy", + "/opt/homebrew/bin/gvproxy", + "/usr/local/bin/gvproxy", + ] { + let path = PathBuf::from(p); + if path.exists() { + return Some(path); + } + } + None +} + +/// Issue a gvproxy expose call via its HTTP API (unix socket). +/// +/// Sends a raw HTTP/1.1 POST request over the unix socket to avoid +/// depending on `curl` being installed on the host. +fn gvproxy_expose(api_sock: &Path, body: &str) -> Result<(), String> { + use std::io::{Read, Write}; + use std::os::unix::net::UnixStream; + + let mut stream = + UnixStream::connect(api_sock).map_err(|e| format!("connect to gvproxy API socket: {e}"))?; + + let request = format!( + "POST /services/forwarder/expose HTTP/1.1\r\n\ + Host: localhost\r\n\ + Content-Type: application/json\r\n\ + Content-Length: {}\r\n\ + Connection: close\r\n\ + \r\n\ + {}", + body.len(), + body, + ); + + stream + .write_all(request.as_bytes()) + .map_err(|e| format!("write to gvproxy API: {e}"))?; + + // Read just enough of the response to get the status line. + let mut buf = [0u8; 1024]; + let n = stream + .read(&mut buf) + .map_err(|e| format!("read from gvproxy API: {e}"))?; + let response = String::from_utf8_lossy(&buf[..n]); + + // Parse the HTTP status code from the first line (e.g. "HTTP/1.1 200 OK"). + let status = response + .lines() + .next() + .and_then(|line| line.split_whitespace().nth(1)) + .unwrap_or("0"); + + match status { + "200" | "204" => Ok(()), + _ => { + let first_line = response.lines().next().unwrap_or(""); + Err(format!("gvproxy API: {first_line}")) + } + } +} + +fn path_to_cstring(path: &Path) -> Result { + let s = path + .to_str() + .ok_or_else(|| VmError::InvalidPath(path.display().to_string()))?; + Ok(CString::new(s)?) +} + +// ── Launch ────────────────────────────────────────────────────────────── + +/// Configure and launch a libkrun microVM. +/// +/// This forks the process. The child enters the VM (never returns); the +/// parent blocks until the VM exits or a signal is received. +/// +/// Returns the VM exit code (from `waitpid`). +#[allow(clippy::similar_names)] +pub fn launch(config: &VmConfig) -> Result { + // Validate rootfs + if !config.rootfs.is_dir() { + return Err(VmError::RootfsNotFound { + path: config.rootfs.display().to_string(), + }); + } + + eprintln!("rootfs: {}", config.rootfs.display()); + eprintln!("vm: {} vCPU(s), {} MiB RAM", config.vcpus, config.mem_mib); + + // Ensure libkrunfw is discoverable. On macOS, dyld only reads + // DYLD_FALLBACK_LIBRARY_PATH at startup, so if it's not set we + // re-exec ourselves with it configured (this call won't return). + ensure_krunfw_path()?; + + // ── Configure the microVM ────────────────────────────────────── + + unsafe { + check( + ffi::krun_set_log_level(config.log_level), + "krun_set_log_level", + )?; + } + + let ctx_id = unsafe { ffi::krun_create_ctx() }; + if ctx_id < 0 { + return Err(VmError::Krun { + func: "krun_create_ctx", + code: ctx_id, + }); + } + #[allow(clippy::cast_sign_loss)] + let ctx_id = ctx_id as u32; + + unsafe { + check( + ffi::krun_set_vm_config(ctx_id, config.vcpus, config.mem_mib), + "krun_set_vm_config", + )?; + } + + // Root filesystem (virtio-fs) + let rootfs_c = path_to_cstring(&config.rootfs)?; + unsafe { + check( + ffi::krun_set_root(ctx_id, rootfs_c.as_ptr()), + "krun_set_root", + )?; + } + + // Working directory + let workdir_c = CString::new(config.workdir.as_str())?; + unsafe { + check( + ffi::krun_set_workdir(ctx_id, workdir_c.as_ptr()), + "krun_set_workdir", + )?; + } + + // Networking setup + let mut gvproxy_child: Option = None; + let mut gvproxy_api_sock: Option = None; + + match &config.net { + NetBackend::Tsi => { + // Default TSI — no special setup needed. + } + NetBackend::None => { + unsafe { + check( + ffi::krun_disable_implicit_vsock(ctx_id), + "krun_disable_implicit_vsock", + )?; + check(ffi::krun_add_vsock(ctx_id, 0), "krun_add_vsock")?; + } + eprintln!("Networking: disabled (no TSI, no virtio-net)"); + } + NetBackend::Gvproxy { binary } => { + if !binary.exists() { + return Err(VmError::BinaryNotFound { + path: binary.display().to_string(), + hint: "Install Podman Desktop or place gvproxy in PATH".to_string(), + }); + } + + // Create temp socket paths + let run_dir = config + .rootfs + .parent() + .unwrap_or(&config.rootfs) + .to_path_buf(); + let vfkit_sock = run_dir.join("gvproxy-vfkit.sock"); + let api_sock = run_dir.join("gvproxy-api.sock"); + + // Clean stale sockets + let _ = std::fs::remove_file(&vfkit_sock); + let _ = std::fs::remove_file(&api_sock); + + // Start gvproxy + eprintln!("Starting gvproxy: {}", binary.display()); + let gvproxy_log = run_dir.join("gvproxy.log"); + let gvproxy_log_file = std::fs::File::create(&gvproxy_log) + .map_err(|e| VmError::Fork(format!("failed to create gvproxy log: {e}")))?; + let child = std::process::Command::new(binary) + .arg("-listen-vfkit") + .arg(format!("unixgram://{}", vfkit_sock.display())) + .arg("-listen") + .arg(format!("unix://{}", api_sock.display())) + .stdout(std::process::Stdio::null()) + .stderr(gvproxy_log_file) + .spawn() + .map_err(|e| VmError::Fork(format!("failed to start gvproxy: {e}")))?; + + eprintln!("gvproxy started (pid {})", child.id()); + + // Wait for the socket to appear + for _ in 0..50 { + if vfkit_sock.exists() { + break; + } + std::thread::sleep(std::time::Duration::from_millis(100)); + } + if !vfkit_sock.exists() { + return Err(VmError::Fork( + "gvproxy socket did not appear within 5s".to_string(), + )); + } + + // Disable implicit TSI and add virtio-net via gvproxy + unsafe { + check( + ffi::krun_disable_implicit_vsock(ctx_id), + "krun_disable_implicit_vsock", + )?; + check(ffi::krun_add_vsock(ctx_id, 0), "krun_add_vsock")?; + } + + let sock_c = path_to_cstring(&vfkit_sock)?; + // This MAC matches gvproxy's default static DHCP lease for + // 192.168.127.2. Using a different MAC can cause the gVisor + // network stack to misroute or drop packets. + let mac: [u8; 6] = [0x5a, 0x94, 0xef, 0xe4, 0x0c, 0xee]; + + // COMPAT_NET_FEATURES from libkrun.h + const NET_FEATURE_CSUM: u32 = 1 << 0; + const NET_FEATURE_GUEST_CSUM: u32 = 1 << 1; + const NET_FEATURE_GUEST_TSO4: u32 = 1 << 7; + const NET_FEATURE_GUEST_UFO: u32 = 1 << 10; + const NET_FEATURE_HOST_TSO4: u32 = 1 << 11; + const NET_FEATURE_HOST_UFO: u32 = 1 << 14; + const COMPAT_NET_FEATURES: u32 = NET_FEATURE_CSUM + | NET_FEATURE_GUEST_CSUM + | NET_FEATURE_GUEST_TSO4 + | NET_FEATURE_GUEST_UFO + | NET_FEATURE_HOST_TSO4 + | NET_FEATURE_HOST_UFO; + const NET_FLAG_VFKIT: u32 = 1 << 0; + + unsafe { + check( + ffi::krun_add_net_unixgram( + ctx_id, + sock_c.as_ptr(), + -1, + mac.as_ptr(), + COMPAT_NET_FEATURES, + NET_FLAG_VFKIT, + ), + "krun_add_net_unixgram", + )?; + } + + eprintln!("Networking: gvproxy (virtio-net via {vfkit_sock:?})"); + gvproxy_child = Some(child); + gvproxy_api_sock = Some(api_sock); + } + } + + // Port mapping (TSI only) + if !config.port_map.is_empty() && matches!(config.net, NetBackend::Tsi) { + let port_strs: Vec<&str> = config.port_map.iter().map(String::as_str).collect(); + let (_port_owners, port_ptrs) = c_string_array(&port_strs)?; + unsafe { + check( + ffi::krun_set_port_map(ctx_id, port_ptrs.as_ptr()), + "krun_set_port_map", + )?; + } + } + + // Console output + let console_log = config.console_output.clone().unwrap_or_else(|| { + config + .rootfs + .parent() + .unwrap_or(&config.rootfs) + .join("console.log") + }); + let console_c = path_to_cstring(&console_log)?; + unsafe { + check( + ffi::krun_set_console_output(ctx_id, console_c.as_ptr()), + "krun_set_console_output", + )?; + } + + // Executable, argv, envp + let exec_c = CString::new(config.exec_path.as_str())?; + + // argv: libkrun's init sets argv[0] from exec_path internally, + // so we only pass the actual arguments here. + let argv_strs: Vec<&str> = config.args.iter().map(String::as_str).collect(); + let (_argv_owners, argv_ptrs) = c_string_array(&argv_strs)?; + + // envp: use provided env or minimal defaults + let env_strs: Vec<&str> = if config.env.is_empty() { + vec![ + "HOME=/root", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm", + ] + } else { + config.env.iter().map(String::as_str).collect() + }; + let (_env_owners, env_ptrs) = c_string_array(&env_strs)?; + + unsafe { + check( + ffi::krun_set_exec( + ctx_id, + exec_c.as_ptr(), + argv_ptrs.as_ptr(), + env_ptrs.as_ptr(), + ), + "krun_set_exec", + )?; + } + + // ── Fork and enter the VM ────────────────────────────────────── + // + // krun_start_enter() never returns — it calls exit() when the guest + // process exits. We fork so the parent can monitor and report. + + eprintln!("Booting microVM..."); + + let pid = unsafe { libc::fork() }; + match pid { + -1 => Err(VmError::Fork(std::io::Error::last_os_error().to_string())), + 0 => { + // Child process: enter the VM (never returns on success) + let ret = unsafe { ffi::krun_start_enter(ctx_id) }; + eprintln!("krun_start_enter failed: {ret}"); + std::process::exit(1); + } + _ => { + // Parent: wait for child + eprintln!("VM started (child pid {pid})"); + for pm in &config.port_map { + let host_port = pm.split(':').next().unwrap_or(pm); + eprintln!(" port {pm} -> http://localhost:{host_port}"); + } + eprintln!("Console output: {}", console_log.display()); + + // Set up gvproxy port forwarding via its HTTP API. + // The port_map entries use the same "host:guest" format + // as TSI, but here we translate them into gvproxy expose + // calls targeting the guest IP (192.168.127.2). + if let Some(ref api_sock) = gvproxy_api_sock { + // Wait for gvproxy API socket to be ready + std::thread::sleep(std::time::Duration::from_millis(500)); + eprintln!("Setting up gvproxy port forwarding..."); + + let guest_ip = "192.168.127.2"; + + for pm in &config.port_map { + let parts: Vec<&str> = pm.split(':').collect(); + let (host_port, guest_port) = match parts.len() { + 2 => (parts[0], parts[1]), + 1 => (parts[0], parts[0]), + _ => { + eprintln!(" skipping invalid port mapping: {pm}"); + continue; + } + }; + + let expose_body = format!( + r#"{{"local":":{host_port}","remote":"{guest_ip}:{guest_port}","protocol":"tcp"}}"# + ); + + match gvproxy_expose(api_sock, &expose_body) { + Ok(()) => { + eprintln!(" port {host_port} -> {guest_ip}:{guest_port}"); + } + Err(e) => { + eprintln!(" port {host_port}: {e}"); + } + } + } + } + + // Wait for k3s kubeconfig to appear (virtio-fs makes it + // visible on the host). Only do this for the gateway preset + // (when exec_path is the default init script). + if config.exec_path == "/srv/gateway-init.sh" { + let kubeconfig_src = config.rootfs.join("etc/rancher/k3s/k3s.yaml"); + eprintln!("Waiting for kubeconfig..."); + let mut found = false; + for _ in 0..60 { + if kubeconfig_src.is_file() + && std::fs::metadata(&kubeconfig_src) + .map(|m| m.len() > 0) + .unwrap_or(false) + { + found = true; + break; + } + std::thread::sleep(std::time::Duration::from_secs(1)); + } + + if found { + // Copy kubeconfig to ~/.kube/gateway.yaml, rewriting + // the server URL to point at the forwarded host port. + let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); + let kube_dir = PathBuf::from(home).join(".kube"); + let _ = std::fs::create_dir_all(&kube_dir); + let dest = kube_dir.join("gateway.yaml"); + + match std::fs::read_to_string(&kubeconfig_src) { + Ok(contents) => { + // The kubeconfig has server: https://127.0.0.1:6443 + // which is correct since we forward host:6443 -> guest:6444. + if let Err(e) = std::fs::write(&dest, &contents) { + eprintln!(" failed to write kubeconfig: {e}"); + } else { + eprintln!("Kubeconfig: {}", dest.display()); + eprintln!(" export KUBECONFIG={}", dest.display()); + } + } + Err(e) => { + eprintln!(" failed to read kubeconfig: {e}"); + } + } + } else { + eprintln!(" kubeconfig not found after 60s (k3s may still be starting)"); + } + } + + eprintln!("Press Ctrl+C to stop."); + + // Forward signals to child + unsafe { + libc::signal( + libc::SIGINT, + forward_signal as *const () as libc::sighandler_t, + ); + libc::signal( + libc::SIGTERM, + forward_signal as *const () as libc::sighandler_t, + ); + CHILD_PID.store(pid, std::sync::atomic::Ordering::Relaxed); + } + + let mut status: libc::c_int = 0; + unsafe { + libc::waitpid(pid, &raw mut status, 0); + } + + // Clean up gvproxy + if let Some(mut child) = gvproxy_child { + let _ = child.kill(); + let _ = child.wait(); + eprintln!("gvproxy stopped"); + } + + if libc::WIFEXITED(status) { + let code = libc::WEXITSTATUS(status); + eprintln!("VM exited with code {code}"); + return Ok(code); + } else if libc::WIFSIGNALED(status) { + let sig = libc::WTERMSIG(status); + eprintln!("VM killed by signal {sig}"); + return Ok(128 + sig); + } + + Ok(status) + } + } +} + +static CHILD_PID: std::sync::atomic::AtomicI32 = std::sync::atomic::AtomicI32::new(0); + +extern "C" fn forward_signal(_sig: libc::c_int) { + let pid = CHILD_PID.load(std::sync::atomic::Ordering::Relaxed); + if pid > 0 { + unsafe { + libc::kill(pid, libc::SIGTERM); + } + } +} diff --git a/scripts/bin/openshell b/scripts/bin/openshell index 8b8a9c21..9ca015ca 100755 --- a/scripts/bin/openshell +++ b/scripts/bin/openshell @@ -42,7 +42,7 @@ else return 0 ;; crates/openshell-cli/*|crates/openshell-core/*|crates/openshell-bootstrap/*) return 0 ;; - crates/openshell-policy/*|crates/openshell-providers/*|crates/openshell-tui/*) + crates/openshell-policy/*|crates/openshell-providers/*|crates/openshell-tui/*|crates/openshell-vm/*) return 0 ;; *) return 1 ;; @@ -90,6 +90,13 @@ fi if [[ "$needs_build" == "1" ]]; then echo "Recompiling openshell..." >&2 cargo build --package openshell-cli --quiet + + # On macOS, codesign with the hypervisor entitlement so libkrun can use + # Apple's Hypervisor.framework. Re-sign after every build. + ENTITLEMENTS="$PROJECT_ROOT/crates/openshell-vm/entitlements.plist" + if [[ "$(uname)" == "Darwin" ]] && [[ -f "$ENTITLEMENTS" ]]; then + codesign --entitlements "$ENTITLEMENTS" --force -s - "$BINARY" 2>/dev/null + fi # Persist state after successful build mkdir -p "$(dirname "$STATE_FILE")" cd "$PROJECT_ROOT" @@ -110,7 +117,7 @@ if [[ "$needs_build" == "1" ]]; then return 0 ;; crates/openshell-cli/*|crates/openshell-core/*|crates/openshell-bootstrap/*) return 0 ;; - crates/openshell-policy/*|crates/openshell-providers/*|crates/openshell-tui/*) + crates/openshell-policy/*|crates/openshell-providers/*|crates/openshell-tui/*|crates/openshell-vm/*) return 0 ;; *) return 1 ;; @@ -140,4 +147,12 @@ fingerprint=${new_fingerprint} EOF fi +# Ensure libkrunfw is discoverable by libkrun's dlopen on macOS. +# dyld only reads DYLD_FALLBACK_LIBRARY_PATH at process startup, so we +# set it here before exec. +if [[ "$(uname)" == "Darwin" ]]; then + HOMEBREW_LIB="$(brew --prefix 2>/dev/null || echo /opt/homebrew)/lib" + export DYLD_FALLBACK_LIBRARY_PATH="${HOMEBREW_LIB}${DYLD_FALLBACK_LIBRARY_PATH:+:$DYLD_FALLBACK_LIBRARY_PATH}" +fi + exec "$BINARY" "$@" From 2c9b04b7b3318ea9e97d7083e385135de92e0b7e Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 4 Mar 2026 16:11:13 -0800 Subject: [PATCH 02/14] feat(vm): deploy NemoClaw helm chart inside gateway VM Enable full NemoClaw control plane deployment inside the libkrun microVM so e2e tests can run against the VM instead of Docker. Build-time (build-rootfs.sh): - Package helm chart and inject into k3s static charts directory - Copy HelmChart CR and agent-sandbox manifests into rootfs - Pull and save arm64 container images as tarballs for airgap boot Boot-time (gateway-init.sh): - Enable flannel CNI (remove --flannel-backend=none and related flags) - Deploy bundled manifests to k3s auto-deploy directory - Patch HelmChart CR for VM context (pullPolicy, SSH placeholders) - Ensure DNS fallback when DHCP doesn't configure resolv.conf Post-boot (lib.rs): - Wait for navigator namespace created by Helm controller - Generate PKI and apply TLS secrets via host kubectl - Store cluster metadata and mTLS creds for CLI/SDK access - Set 'gateway' as active cluster for e2e test discovery Also bump VM to 8GB RAM / 4 vCPUs, add port 30051 forwarding, fix nemoclaw wrapper fingerprint to include navigator-vm crate, and add test:e2e:vm mise task. --- Cargo.lock | 3 + crates/openshell-bootstrap/src/lib.rs | 6 +- crates/openshell-vm/Cargo.toml | 3 + crates/openshell-vm/scripts/build-rootfs.sh | 88 +++++++- crates/openshell-vm/scripts/gateway-init.sh | 55 ++++- crates/openshell-vm/src/lib.rs | 237 +++++++++++++++++++- tasks/test.toml | 6 + 7 files changed, 377 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8417effa..4eaecdb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2903,8 +2903,11 @@ dependencies = [ name = "navigator-vm" version = "0.1.0" dependencies = [ + "base64 0.22.1", "libc", "miette", + "navigator-bootstrap", + "serde_json", "thiserror 2.0.18", ] diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index 40acf737..2d7db436 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -6,12 +6,12 @@ pub mod edge_token; pub mod errors; pub mod image; -mod constants; +pub mod constants; mod docker; mod metadata; -mod mtls; +pub mod mtls; pub mod paths; -mod pki; +pub mod pki; pub(crate) mod push; mod runtime; diff --git a/crates/openshell-vm/Cargo.toml b/crates/openshell-vm/Cargo.toml index 4b9e85f5..d76be7aa 100644 --- a/crates/openshell-vm/Cargo.toml +++ b/crates/openshell-vm/Cargo.toml @@ -15,8 +15,11 @@ name = "openshell_vm" path = "src/lib.rs" [dependencies] +base64 = "0.22" libc = "0.2" miette = { workspace = true } +navigator-bootstrap = { path = "../navigator-bootstrap" } +serde_json = "1" thiserror = { workspace = true } [lints] diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh index 9f01a5b7..2b58a2ce 100755 --- a/crates/openshell-vm/scripts/build-rootfs.sh +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -4,13 +4,13 @@ # Build an aarch64 Ubuntu rootfs for the gateway microVM. # -# Produces a rootfs with k3s pre-installed, plus the gateway-init.sh script -# that runs as PID 1 inside the libkrun VM. +# Produces a rootfs with k3s pre-installed, the NemoClaw helm chart and +# manifests baked in, and container images pre-loaded for airgap boot. # # Usage: # ./crates/navigator-vm/scripts/build-rootfs.sh [output_dir] # -# Requires: Docker (or compatible container runtime), curl +# Requires: Docker (or compatible container runtime), curl, helm set -euo pipefail @@ -25,8 +25,19 @@ IMAGE_TAG="krun-rootfs:gateway" K3S_VERSION="${K3S_VERSION:-v1.29.8+k3s1}" K3S_VERSION="${K3S_VERSION//-k3s/+k3s}" +# Project root (two levels up from crates/navigator-vm/scripts/) +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" + +# Container images to pre-load into k3s (arm64). +IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-d1i0nduu2f6qxk.cloudfront.net/navigator}" +IMAGE_TAG="${IMAGE_TAG:-latest}" +SERVER_IMAGE="${IMAGE_REPO_BASE}/server:${IMAGE_TAG}" +SANDBOX_IMAGE="${IMAGE_REPO_BASE}/sandbox:${IMAGE_TAG}" +AGENT_SANDBOX_IMAGE="registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0" + echo "==> Building gateway rootfs" echo " k3s version: ${K3S_VERSION}" +echo " Images: ${SERVER_IMAGE}, ${SANDBOX_IMAGE}" echo " Output: ${ROOTFS_DIR}" # ── Download k3s binary (outside Docker — much faster) ───────────────── @@ -92,6 +103,69 @@ chmod +x "${ROOTFS_DIR}/srv/gateway-init.sh" cp "${SCRIPT_DIR}/hello-server.py" "${ROOTFS_DIR}/srv/hello-server.py" chmod +x "${ROOTFS_DIR}/srv/hello-server.py" +# ── Package and inject helm chart ──────────────────────────────────── + +HELM_CHART_DIR="${PROJECT_ROOT}/deploy/helm/navigator" +CHART_DEST="${ROOTFS_DIR}/var/lib/rancher/k3s/server/static/charts" + +if [ -d "${HELM_CHART_DIR}" ]; then + echo "==> Packaging helm chart..." + mkdir -p "${CHART_DEST}" + helm package "${HELM_CHART_DIR}" -d "${CHART_DEST}" + echo " $(ls "${CHART_DEST}"/*.tgz 2>/dev/null | xargs -I{} basename {})" +else + echo "WARNING: Helm chart not found at ${HELM_CHART_DIR}, skipping" +fi + +# ── Inject Kubernetes manifests ────────────────────────────────────── +# These are copied to /opt/navigator/manifests/ (staging). gateway-init.sh +# moves them to /var/lib/rancher/k3s/server/manifests/ at boot so the +# k3s Helm Controller auto-deploys them. + +MANIFEST_SRC="${PROJECT_ROOT}/deploy/kube/manifests" +MANIFEST_DEST="${ROOTFS_DIR}/opt/navigator/manifests" + +echo "==> Injecting Kubernetes manifests..." +mkdir -p "${MANIFEST_DEST}" + +for manifest in navigator-helmchart.yaml agent-sandbox.yaml; do + if [ -f "${MANIFEST_SRC}/${manifest}" ]; then + cp "${MANIFEST_SRC}/${manifest}" "${MANIFEST_DEST}/" + echo " ${manifest}" + else + echo "WARNING: ${manifest} not found in ${MANIFEST_SRC}" + fi +done + +# ── Pre-load container images ──────────────────────────────────────── +# Pull arm64 images and save as tarballs in the k3s airgap images +# directory. k3s auto-imports from /var/lib/rancher/k3s/agent/images/ +# on startup, so no internet access is needed at boot time. + +IMAGES_DIR="${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images" +mkdir -p "${IMAGES_DIR}" + +echo "==> Pre-loading container images (arm64)..." + +pull_and_save() { + local image="$1" + local output="$2" + + if [ -f "${output}" ]; then + echo " cached: $(basename "${output}")" + return 0 + fi + + echo " pulling: ${image}..." + docker pull --platform linux/arm64 "${image}" --quiet + echo " saving: $(basename "${output}")..." + docker save "${image}" -o "${output}" +} + +pull_and_save "${SERVER_IMAGE}" "${IMAGES_DIR}/navigator-server.tar" +pull_and_save "${SANDBOX_IMAGE}" "${IMAGES_DIR}/navigator-sandbox.tar" +pull_and_save "${AGENT_SANDBOX_IMAGE}" "${IMAGES_DIR}/agent-sandbox-controller.tar" + # ── Verify ──────────────────────────────────────────────────────────── if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then @@ -102,6 +176,14 @@ fi echo "" echo "==> Rootfs ready at: ${ROOTFS_DIR}" echo " Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)" + +# Show image sizes +echo " Images:" +for img in "${IMAGES_DIR}"/*.tar; do + [ -f "$img" ] || continue + echo " $(basename "$img"): $(du -sh "$img" | cut -f1)" +done + echo "" echo "Next steps:" echo " 1. Run: ncl gateway" diff --git a/crates/openshell-vm/scripts/gateway-init.sh b/crates/openshell-vm/scripts/gateway-init.sh index f59b2906..af8c6566 100755 --- a/crates/openshell-vm/scripts/gateway-init.sh +++ b/crates/openshell-vm/scripts/gateway-init.sh @@ -4,7 +4,8 @@ # Init script for the gateway microVM. Runs as PID 1 inside the libkrun VM. # -# Mounts essential virtual filesystems, then execs k3s server. +# Mounts essential virtual filesystems, deploys bundled manifests (helm chart, +# agent-sandbox controller), then execs k3s server. set -e @@ -80,6 +81,14 @@ DHCP_SCRIPT ip route add default via 192.168.127.1 2>/dev/null || true fi + # Ensure DNS is configured. DHCP should have set /etc/resolv.conf, + # but if it didn't (or static fallback was used), provide a default. + if [ ! -s /etc/resolv.conf ]; then + echo "[gateway-init] no DNS configured, using public DNS" + echo "nameserver 8.8.8.8" > /etc/resolv.conf + echo "nameserver 8.8.4.4" >> /etc/resolv.conf + fi + # Read back the IP we got (from DHCP or static). NODE_IP=$(ip -4 addr show eth0 | grep -oP 'inet \K[^/]+' || echo "192.168.127.2") echo "[gateway-init] eth0 IP: $NODE_IP" @@ -110,6 +119,46 @@ rm -f /var/lib/rancher/k3s/server/kine.sock 2>/dev/null || true find /var/lib/rancher/k3s -name '*.sock' -delete 2>/dev/null || true find /run -name '*.sock' -delete 2>/dev/null || true +# ── Deploy bundled manifests ──────────────────────────────────────────── +# Copy manifests from the staging directory to the k3s auto-deploy path. +# This mirrors the approach in cluster-entrypoint.sh for the Docker path. + +K3S_MANIFESTS="/var/lib/rancher/k3s/server/manifests" +BUNDLED_MANIFESTS="/opt/navigator/manifests" + +mkdir -p "$K3S_MANIFESTS" + +if [ -d "$BUNDLED_MANIFESTS" ]; then + echo "[gateway-init] deploying bundled manifests..." + for manifest in "$BUNDLED_MANIFESTS"/*.yaml; do + [ ! -f "$manifest" ] && continue + cp "$manifest" "$K3S_MANIFESTS/" + echo " $(basename "$manifest")" + done + + # Remove stale navigator-managed manifests from previous boots. + for existing in "$K3S_MANIFESTS"/navigator-*.yaml \ + "$K3S_MANIFESTS"/agent-*.yaml; do + [ ! -f "$existing" ] && continue + basename=$(basename "$existing") + if [ ! -f "$BUNDLED_MANIFESTS/$basename" ]; then + echo " removing stale: $basename" + rm -f "$existing" + fi + done +fi + +# Patch the HelmChart manifest for VM deployment. +HELMCHART="$K3S_MANIFESTS/navigator-helmchart.yaml" +if [ -f "$HELMCHART" ]; then + echo "[gateway-init] patching HelmChart manifest..." + # Use pre-loaded images — don't pull from registry. + sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" + # Clear SSH gateway placeholders (default 127.0.0.1 is correct for local VM). + sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" + sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" +fi + # ── Start k3s ────────────────────────────────────────────────────────── echo "[gateway-init] starting k3s server..." @@ -117,8 +166,6 @@ exec /usr/local/bin/k3s server \ --disable=traefik \ --write-kubeconfig-mode=644 \ --node-ip="$NODE_IP" \ - --flannel-backend=none \ - --disable-network-policy \ - --disable-kube-proxy \ --kube-apiserver-arg=bind-address=0.0.0.0 \ + --resolv-conf=/etc/resolv.conf \ --tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2 diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 49617e5f..060801ed 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -51,6 +51,10 @@ pub enum VmError { /// `fork()` failed. #[error("fork() failed: {0}")] Fork(String), + + /// Post-boot bootstrap failed. + #[error("bootstrap failed: {0}")] + Bootstrap(String), } /// Check a libkrun return code; negative values are errors. @@ -125,13 +129,15 @@ pub struct VmConfig { impl VmConfig { /// Default gateway configuration: boots k3s server inside the VM. /// - /// Runs `/srv/gateway-init.sh` which mounts essential filesystems and - /// execs `k3s server`. Exposes the Kubernetes API on port 6443. + /// Runs `/srv/gateway-init.sh` which mounts essential filesystems, + /// deploys the `NemoClaw` helm chart, and execs `k3s server`. + /// Exposes the Kubernetes API on port 6443 and the `NemoClaw` + /// gateway (navigator server `NodePort`) on port 30051. pub fn gateway(rootfs: PathBuf) -> Self { Self { rootfs, - vcpus: 2, - mem_mib: 2048, + vcpus: 4, + mem_mib: 8192, exec_path: "/srv/gateway-init.sh".to_string(), args: vec![], env: vec![ @@ -140,10 +146,15 @@ impl VmConfig { "TERM=xterm".to_string(), ], workdir: "/".to_string(), - // Map host 6443 -> guest 6444 (real kube-apiserver). - // The k3s dynamiclistener on 6443 has TLS issues through - // port forwarding, so we go directly to the apiserver. - port_map: vec!["6443:6444".to_string()], + port_map: vec![ + // Map host 6443 -> guest 6444 (real kube-apiserver). + // The k3s dynamiclistener on 6443 has TLS issues through + // port forwarding, so we go directly to the apiserver. + "6443:6444".to_string(), + // Navigator server NodePort — the gateway endpoint for + // CLI clients and e2e tests. + "30051:30051".to_string(), + ], log_level: 3, // Info — for debugging console_output: None, net: NetBackend::Gvproxy { @@ -624,7 +635,7 @@ pub fn launch(config: &VmConfig) -> Result { let kubeconfig_src = config.rootfs.join("etc/rancher/k3s/k3s.yaml"); eprintln!("Waiting for kubeconfig..."); let mut found = false; - for _ in 0..60 { + for _ in 0..120 { if kubeconfig_src.is_file() && std::fs::metadata(&kubeconfig_src) .map(|m| m.len() > 0) @@ -640,7 +651,7 @@ pub fn launch(config: &VmConfig) -> Result { // Copy kubeconfig to ~/.kube/gateway.yaml, rewriting // the server URL to point at the forwarded host port. let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); - let kube_dir = PathBuf::from(home).join(".kube"); + let kube_dir = PathBuf::from(&home).join(".kube"); let _ = std::fs::create_dir_all(&kube_dir); let dest = kube_dir.join("gateway.yaml"); @@ -659,8 +670,16 @@ pub fn launch(config: &VmConfig) -> Result { eprintln!(" failed to read kubeconfig: {e}"); } } + + // Bootstrap the NemoClaw control plane: generate PKI, + // create TLS secrets, and store cluster metadata so CLI + // clients and e2e tests can connect. + if let Err(e) = bootstrap_gateway(&dest) { + eprintln!("Bootstrap failed: {e}"); + eprintln!(" The VM is running but NemoClaw may not be fully operational."); + } } else { - eprintln!(" kubeconfig not found after 60s (k3s may still be starting)"); + eprintln!(" kubeconfig not found after 120s (k3s may still be starting)"); } } @@ -706,6 +725,202 @@ pub fn launch(config: &VmConfig) -> Result { } } +// ── Post-boot bootstrap ──────────────────────────────────────────────── + +/// Cluster name used for metadata and mTLS storage. +const GATEWAY_CLUSTER_NAME: &str = "gateway"; + +/// Gateway port: the host port mapped to the navigator `NodePort` (30051). +const GATEWAY_PORT: u16 = 30051; + +/// Bootstrap the `NemoClaw` control plane after k3s is ready. +/// +/// This mirrors the Docker bootstrap path in `navigator-bootstrap` but runs +/// kubectl from the host against the VM's forwarded kube-apiserver port. +/// +/// Steps: +/// 1. Wait for the `navigator` namespace (created by the Helm controller) +/// 2. Generate a PKI bundle (CA, server cert, client cert) +/// 3. Apply TLS secrets to the cluster via `kubectl` +/// 4. Store cluster metadata and mTLS credentials on the host +fn bootstrap_gateway(kubeconfig: &Path) -> Result<(), VmError> { + let kc = kubeconfig + .to_str() + .ok_or_else(|| VmError::InvalidPath(kubeconfig.display().to_string()))?; + + // 1. Wait for the navigator namespace. + eprintln!("Waiting for navigator namespace..."); + wait_for_namespace(kc)?; + + // 2. Generate PKI. + eprintln!("Generating TLS certificates..."); + let pki_bundle = navigator_bootstrap::pki::generate_pki(&[]) + .map_err(|e| VmError::Bootstrap(format!("PKI generation failed: {e}")))?; + + // 3. Apply TLS secrets. + eprintln!("Creating TLS secrets..."); + apply_tls_secrets(kc, &pki_bundle)?; + + // 4. Store cluster metadata and mTLS credentials. + eprintln!("Storing cluster metadata..."); + let metadata = navigator_bootstrap::ClusterMetadata { + name: GATEWAY_CLUSTER_NAME.to_string(), + gateway_endpoint: format!("https://127.0.0.1:{GATEWAY_PORT}"), + is_remote: false, + gateway_port: GATEWAY_PORT, + kube_port: Some(6443), + remote_host: None, + resolved_host: None, + }; + + navigator_bootstrap::store_cluster_metadata(GATEWAY_CLUSTER_NAME, &metadata) + .map_err(|e| VmError::Bootstrap(format!("failed to store cluster metadata: {e}")))?; + + navigator_bootstrap::mtls::store_pki_bundle(GATEWAY_CLUSTER_NAME, &pki_bundle) + .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS credentials: {e}")))?; + + navigator_bootstrap::save_active_cluster(GATEWAY_CLUSTER_NAME) + .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?; + + eprintln!("Bootstrap complete."); + eprintln!(" Cluster: {GATEWAY_CLUSTER_NAME}"); + eprintln!(" Gateway: https://127.0.0.1:{GATEWAY_PORT}"); + eprintln!(" mTLS: ~/.config/nemoclaw/clusters/{GATEWAY_CLUSTER_NAME}/mtls/"); + + Ok(()) +} + +/// Poll kubectl until the `navigator` namespace exists. +fn wait_for_namespace(kubeconfig: &str) -> Result<(), VmError> { + let max_attempts = 120; + for attempt in 0..max_attempts { + let output = std::process::Command::new("kubectl") + .args(["--kubeconfig", kubeconfig]) + .args(["get", "namespace", "navigator", "-o", "name"]) + .output(); + + if let Ok(output) = output + && output.status.success() + { + let stdout = String::from_utf8_lossy(&output.stdout); + if stdout.contains("navigator") { + return Ok(()); + } + } + + if attempt % 10 == 9 { + eprintln!( + " still waiting for navigator namespace ({}/{})", + attempt + 1, + max_attempts + ); + } + std::thread::sleep(std::time::Duration::from_secs(2)); + } + + Err(VmError::Bootstrap( + "timed out waiting for navigator namespace (240s). \ + Check console.log for k3s errors." + .to_string(), + )) +} + +/// Apply the three TLS K8s secrets required by the `NemoClaw` server. +/// +/// Uses `kubectl apply -f -` on the host, piping JSON manifests via stdin. +fn apply_tls_secrets( + kubeconfig: &str, + bundle: &navigator_bootstrap::pki::PkiBundle, +) -> Result<(), VmError> { + use base64::Engine; + use base64::engine::general_purpose::STANDARD; + + let secrets = [ + // 1. navigator-server-tls (kubernetes.io/tls) + serde_json::json!({ + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": navigator_bootstrap::constants::SERVER_TLS_SECRET_NAME, + "namespace": "navigator" + }, + "type": "kubernetes.io/tls", + "data": { + "tls.crt": STANDARD.encode(&bundle.server_cert_pem), + "tls.key": STANDARD.encode(&bundle.server_key_pem) + } + }), + // 2. navigator-server-client-ca (Opaque) + serde_json::json!({ + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": navigator_bootstrap::constants::SERVER_CLIENT_CA_SECRET_NAME, + "namespace": "navigator" + }, + "type": "Opaque", + "data": { + "ca.crt": STANDARD.encode(&bundle.ca_cert_pem) + } + }), + // 3. navigator-client-tls (Opaque) — shared by CLI and sandbox pods + serde_json::json!({ + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": navigator_bootstrap::constants::CLIENT_TLS_SECRET_NAME, + "namespace": "navigator" + }, + "type": "Opaque", + "data": { + "tls.crt": STANDARD.encode(&bundle.client_cert_pem), + "tls.key": STANDARD.encode(&bundle.client_key_pem), + "ca.crt": STANDARD.encode(&bundle.ca_cert_pem) + } + }), + ]; + + for secret in &secrets { + let name = secret["metadata"]["name"].as_str().unwrap_or("unknown"); + kubectl_apply(kubeconfig, &secret.to_string()) + .map_err(|e| VmError::Bootstrap(format!("failed to create secret {name}: {e}")))?; + eprintln!(" secret/{name} created"); + } + + Ok(()) +} + +/// Run `kubectl apply -f -` with the given manifest piped via stdin. +fn kubectl_apply(kubeconfig: &str, manifest: &str) -> Result<(), String> { + use std::io::Write; + use std::process::{Command, Stdio}; + + let mut child = Command::new("kubectl") + .args(["--kubeconfig", kubeconfig, "apply", "-f", "-"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| format!("failed to spawn kubectl: {e}"))?; + + if let Some(mut stdin) = child.stdin.take() { + stdin + .write_all(manifest.as_bytes()) + .map_err(|e| format!("failed to write manifest to kubectl stdin: {e}"))?; + } + + let output = child + .wait_with_output() + .map_err(|e| format!("failed to wait for kubectl: {e}"))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("kubectl apply failed: {stderr}")); + } + + Ok(()) +} + static CHILD_PID: std::sync::atomic::AtomicI32 = std::sync::atomic::AtomicI32::new(0); extern "C" fn forward_signal(_sig: libc::c_int) { diff --git a/tasks/test.toml b/tasks/test.toml index f53f9152..af1955d0 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -43,3 +43,9 @@ description = "Run Python GPU e2e tests" depends = ["python:proto", "cluster"] env = { UV_NO_SYNC = "1", PYTHONPATH = "python" } run = "uv run pytest -o python_files='test_*.py' -m gpu -n ${E2E_PARALLEL:-1} e2e/python" + +["e2e:vm"] +description = "Run e2e tests against a gateway VM (macOS ARM64)" +depends = ["python:proto"] +env = { UV_NO_SYNC = "1", PYTHONPATH = "python", OPENSHELL_GATEWAY = "gateway" } +run = "uv run pytest -o python_files='test_*.py' e2e/python" From 14ba703aff135b54591e52f59f833d05faf9ab9f Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Fri, 6 Mar 2026 11:37:15 -0800 Subject: [PATCH 03/14] fix(vm): preserve containerd metadata across boots for fast startup Stop deleting meta.db in gateway-init.sh and include the native snapshotter, content store, and metadata DB in the rootfs built by build-rootfs.sh. Without meta.db, containerd re-extracts all image layers on every boot (~2 min for navigator/server on virtio-fs), causing kubelet CreateContainer timeouts. Also replace the etcd-snapshot approach with direct SQLite cleanup of the kine DB to remove stale pod/event/lease records. --- crates/openshell-cli/src/main.rs | 24 +- crates/openshell-vm/scripts/build-rootfs.sh | 537 +++++++++++++++++- crates/openshell-vm/scripts/gateway-init.sh | 246 ++++++-- crates/openshell-vm/src/lib.rs | 536 ++++++++++++++--- .../helm/openshell/templates/statefulset.yaml | 29 + deploy/helm/openshell/values.yaml | 26 + .../kube/manifests/openshell-helmchart.yaml | 7 + 7 files changed, 1251 insertions(+), 154 deletions(-) diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 5dfe86a5..404dc7e4 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -490,13 +490,13 @@ enum Commands { #[arg(long, short, num_args = 1..)] port: Vec, - /// Number of virtual CPUs. - #[arg(long, default_value_t = 2)] - vcpus: u8, + /// Number of virtual CPUs (default: 4 for gateway, 2 for --exec). + #[arg(long)] + vcpus: Option, - /// RAM in MiB. - #[arg(long, default_value_t = 2048)] - mem: u32, + /// RAM in MiB (default: 8192 for gateway, 2048 for --exec). + #[arg(long)] + mem: Option, /// libkrun log level (0=Off .. 5=Trace). #[arg(long, default_value_t = 1)] @@ -2234,8 +2234,8 @@ async fn main() -> Result<()> { let mut config = if let Some(exec_path) = exec { openshell_vm::VmConfig { rootfs, - vcpus, - mem_mib: mem, + vcpus: vcpus.unwrap_or(2), + mem_mib: mem.unwrap_or(2048), exec_path, args, env, @@ -2250,8 +2250,12 @@ async fn main() -> Result<()> { if !port.is_empty() { c.port_map = port; } - c.vcpus = vcpus; - c.mem_mib = mem; + if let Some(v) = vcpus { + c.vcpus = v; + } + if let Some(m) = mem { + c.mem_mib = m; + } c.net = net_backend; c }; diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh index 2b58a2ce..68c7b4ac 100755 --- a/crates/openshell-vm/scripts/build-rootfs.sh +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -5,12 +5,16 @@ # Build an aarch64 Ubuntu rootfs for the gateway microVM. # # Produces a rootfs with k3s pre-installed, the NemoClaw helm chart and -# manifests baked in, and container images pre-loaded for airgap boot. +# manifests baked in, container images pre-loaded, AND a fully initialized +# k3s cluster state (database, TLS, images imported, all services deployed). +# +# On first VM boot, k3s resumes from this pre-baked state instead of +# cold-starting, achieving ~3-5s startup times. # # Usage: # ./crates/navigator-vm/scripts/build-rootfs.sh [output_dir] # -# Requires: Docker (or compatible container runtime), curl, helm +# Requires: Docker (or compatible container runtime), curl, helm, zstd set -euo pipefail @@ -18,19 +22,20 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DEFAULT_ROOTFS="${XDG_DATA_HOME:-${HOME}/.local/share}/nemoclaw/gateway/rootfs" ROOTFS_DIR="${1:-${DEFAULT_ROOTFS}}" CONTAINER_NAME="krun-rootfs-builder" -IMAGE_TAG="krun-rootfs:gateway" +INIT_CONTAINER_NAME="krun-k3s-init" +BASE_IMAGE_TAG="krun-rootfs:gateway" # K3S_VERSION uses the semver "+" form for GitHub releases. # The mise env may provide the Docker-tag form with "-" instead of "+"; # normalise to "+" so the GitHub download URL works. -K3S_VERSION="${K3S_VERSION:-v1.29.8+k3s1}" +K3S_VERSION="${K3S_VERSION:-v1.35.2+k3s1}" K3S_VERSION="${K3S_VERSION//-k3s/+k3s}" # Project root (two levels up from crates/navigator-vm/scripts/) PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" # Container images to pre-load into k3s (arm64). -IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-d1i0nduu2f6qxk.cloudfront.net/navigator}" -IMAGE_TAG="${IMAGE_TAG:-latest}" +IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-navigator}" +IMAGE_TAG="${IMAGE_TAG:-dev}" SERVER_IMAGE="${IMAGE_REPO_BASE}/server:${IMAGE_TAG}" SANDBOX_IMAGE="${IMAGE_REPO_BASE}/sandbox:${IMAGE_TAG}" AGENT_SANDBOX_IMAGE="registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0" @@ -56,9 +61,10 @@ fi # Clean up any previous run docker rm -f "${CONTAINER_NAME}" 2>/dev/null || true +docker rm -f "${INIT_CONTAINER_NAME}" 2>/dev/null || true echo "==> Building base image..." -docker build --platform linux/arm64 -t "${IMAGE_TAG}" -f - . <<'DOCKERFILE' +docker build --platform linux/arm64 -t "${BASE_IMAGE_TAG}" -f - . <<'DOCKERFILE' FROM ubuntu:22.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -67,6 +73,7 @@ RUN apt-get update && \ iproute2 \ python3 \ busybox-static \ + zstd \ && rm -rf /var/lib/apt/lists/* # busybox-static provides udhcpc for DHCP inside the VM. RUN mkdir -p /usr/share/udhcpc && \ @@ -76,10 +83,15 @@ DOCKERFILE # Create a container and export the filesystem echo "==> Creating container..." -docker create --platform linux/arm64 --name "${CONTAINER_NAME}" "${IMAGE_TAG}" /bin/true +docker create --platform linux/arm64 --name "${CONTAINER_NAME}" "${BASE_IMAGE_TAG}" /bin/true echo "==> Exporting filesystem..." -rm -rf "${ROOTFS_DIR}" +# Previous builds may leave overlayfs work/ dirs with permissions that +# prevent rm on macOS. Force-fix permissions before removing. +if [ -d "${ROOTFS_DIR}" ]; then + chmod -R u+rwx "${ROOTFS_DIR}" 2>/dev/null || true + rm -rf "${ROOTFS_DIR}" +fi mkdir -p "${ROOTFS_DIR}" docker export "${CONTAINER_NAME}" | tar -C "${ROOTFS_DIR}" -xf - @@ -141,30 +153,501 @@ done # Pull arm64 images and save as tarballs in the k3s airgap images # directory. k3s auto-imports from /var/lib/rancher/k3s/agent/images/ # on startup, so no internet access is needed at boot time. +# +# Tarballs are cached in a persistent directory outside the rootfs so +# they survive rebuilds. This avoids re-pulling and re-saving ~1 GiB +# of images each time. IMAGES_DIR="${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images" -mkdir -p "${IMAGES_DIR}" +IMAGE_CACHE_DIR="${XDG_CACHE_HOME:-${HOME}/.cache}/nemoclaw/gateway/images" +mkdir -p "${IMAGES_DIR}" "${IMAGE_CACHE_DIR}" echo "==> Pre-loading container images (arm64)..." pull_and_save() { local image="$1" local output="$2" + local cache="${IMAGE_CACHE_DIR}/$(basename "${output}")" - if [ -f "${output}" ]; then + # Use cached tarball if available. + if [ -f "${cache}" ]; then echo " cached: $(basename "${output}")" + cp "${cache}" "${output}" return 0 fi + # Try to pull; if the registry is unavailable, fall back to the + # local Docker image cache (image may exist from a previous pull). echo " pulling: ${image}..." - docker pull --platform linux/arm64 "${image}" --quiet + if ! docker pull --platform linux/arm64 "${image}" --quiet 2>/dev/null; then + echo " pull failed, checking local Docker cache..." + if ! docker image inspect "${image}" >/dev/null 2>&1; then + echo "ERROR: image ${image} not available locally or from registry" + exit 1 + fi + echo " using locally cached image" + fi + echo " saving: $(basename "${output}")..." - docker save "${image}" -o "${output}" + # Pipe through zstd for faster decompression and smaller tarballs. + # k3s auto-imports .tar.zst files from the airgap images directory. + # -T0 uses all CPU cores; -3 is a good speed/ratio tradeoff. + docker save "${image}" | zstd -T0 -3 -o "${output}" + # Cache for next rebuild. + cp "${output}" "${cache}" +} + +pull_and_save "${SERVER_IMAGE}" "${IMAGES_DIR}/navigator-server.tar.zst" +pull_and_save "${SANDBOX_IMAGE}" "${IMAGES_DIR}/navigator-sandbox.tar.zst" +pull_and_save "${AGENT_SANDBOX_IMAGE}" "${IMAGES_DIR}/agent-sandbox-controller.tar.zst" + +# ── Pre-initialize k3s cluster state ───────────────────────────────── +# Boot k3s inside a Docker container using the rootfs we just built. +# Wait for it to fully initialize (import images, deploy manifests, +# create database), then capture the state back into the rootfs. +# +# This eliminates cold-start latency: on VM boot, k3s finds existing +# state and resumes in ~3-5 seconds instead of 30-60s. + +echo "" +echo "==> Pre-initializing k3s cluster state..." +echo " This boots k3s in a container, waits for full readiness," +echo " then captures the initialized state into the rootfs." + +# Patch the HelmChart manifest for the init container (same patches +# gateway-init.sh applies at runtime). +INIT_MANIFESTS="${ROOTFS_DIR}/var/lib/rancher/k3s/server/manifests" +mkdir -p "${INIT_MANIFESTS}" + +# Copy manifests from staging to the k3s manifest directory. +for manifest in "${MANIFEST_DEST}"/*.yaml; do + [ -f "$manifest" ] || continue + cp "$manifest" "${INIT_MANIFESTS}/" +done + +# Patch HelmChart for local images and VM settings. +HELMCHART="${INIT_MANIFESTS}/navigator-helmchart.yaml" +if [ -f "$HELMCHART" ]; then + # Use local images — explicitly imported into containerd. + sed -i '' 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" 2>/dev/null \ + || sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" + # Fill image placeholders. + sed -i '' "s|__IMAGE_REPO_BASE__/server|${SERVER_IMAGE%:*}|g" "$HELMCHART" 2>/dev/null \ + || sed -i "s|__IMAGE_REPO_BASE__/server|${SERVER_IMAGE%:*}|g" "$HELMCHART" + sed -i '' "s|__IMAGE_REPO_BASE__/sandbox:__IMAGE_TAG__|${SANDBOX_IMAGE}|g" "$HELMCHART" 2>/dev/null \ + || sed -i "s|__IMAGE_REPO_BASE__/sandbox:__IMAGE_TAG__|${SANDBOX_IMAGE}|g" "$HELMCHART" + sed -i '' "s|__IMAGE_TAG__|${IMAGE_TAG}|g" "$HELMCHART" 2>/dev/null \ + || sed -i "s|__IMAGE_TAG__|${IMAGE_TAG}|g" "$HELMCHART" + # Enable hostNetwork for VM (no kube-proxy / iptables). + sed -i '' 's|__HOST_NETWORK__|true|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__HOST_NETWORK__|true|g' "$HELMCHART" + # Disable SA token automount. The projected volume at + # /var/run/secrets/kubernetes.io/serviceaccount fails on sandbox + # re-creation because /var/run is a symlink to /run in the container + # image and the native snapshotter + virtiofs combination can't + # resolve it correctly on the second mount. + sed -i '' 's|__AUTOMOUNT_SA_TOKEN__|false|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__AUTOMOUNT_SA_TOKEN__|false|g' "$HELMCHART" + # Mount the k3s kubeconfig into the pod since SA token isn't mounted. + sed -i '' 's|__KUBECONFIG_HOST_PATH__|"/etc/rancher/k3s"|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__KUBECONFIG_HOST_PATH__|"/etc/rancher/k3s"|g' "$HELMCHART" + # Disable persistence — use /tmp for the SQLite database. PVC mounts + # are unreliable on virtiofs. + sed -i '' 's|__PERSISTENCE_ENABLED__|false|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__PERSISTENCE_ENABLED__|false|g' "$HELMCHART" + sed -i '' 's|__DB_URL__|"sqlite:/tmp/navigator.db"|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__DB_URL__|"sqlite:/tmp/navigator.db"|g' "$HELMCHART" + # Clear SSH gateway placeholders. + sed -i '' 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" + sed -i '' 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" +fi + +# Boot k3s in a privileged container. We use a Docker volume for the +# k3s data directory because kine (SQLite) creates Unix sockets that +# don't work over bind mounts from macOS. After k3s is ready, we +# copy the state back into the rootfs. +docker rm -f "${INIT_CONTAINER_NAME}" 2>/dev/null || true +docker volume rm krun-k3s-init-data 2>/dev/null || true +docker volume create krun-k3s-init-data >/dev/null + +# Seed the volume with the airgap images and manifests from the rootfs. +echo " Seeding Docker volume with airgap images and manifests..." +docker run --rm \ + --platform linux/arm64 \ + -v krun-k3s-init-data:/var/lib/rancher/k3s \ + -v "${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images:/src/images:ro" \ + -v "${ROOTFS_DIR}/var/lib/rancher/k3s/server/static/charts:/src/charts:ro" \ + -v "${ROOTFS_DIR}/var/lib/rancher/k3s/server/manifests:/src/manifests:ro" \ + "${BASE_IMAGE_TAG}" \ + sh -c ' + mkdir -p /var/lib/rancher/k3s/agent/images \ + /var/lib/rancher/k3s/server/static/charts \ + /var/lib/rancher/k3s/server/manifests && + cp /src/images/* /var/lib/rancher/k3s/agent/images/ 2>/dev/null || true && + cp /src/charts/* /var/lib/rancher/k3s/server/static/charts/ 2>/dev/null || true && + cp /src/manifests/* /var/lib/rancher/k3s/server/manifests/ 2>/dev/null || true + ' + +echo " Starting k3s in container..." +# Use --hostname=gateway so the k3s node name matches the VM's hostname. +# This ensures the pre-baked pod schedule (node affinity) is valid when +# the VM boots — avoiding a stale Docker-hostname node in the cluster. +docker run -d \ + --name "${INIT_CONTAINER_NAME}" \ + --hostname gateway \ + --platform linux/arm64 \ + --privileged \ + --tmpfs /run \ + --tmpfs /tmp \ + -v "${K3S_BIN}:/usr/local/bin/k3s:ro" \ + -v krun-k3s-init-data:/var/lib/rancher/k3s \ + "${BASE_IMAGE_TAG}" \ + /usr/local/bin/k3s server \ + --disable=traefik,servicelb,metrics-server,coredns,local-path-provisioner \ + --disable-network-policy \ + --write-kubeconfig-mode=644 \ + --flannel-backend=host-gw \ + --snapshotter=native + +# Wait for kubeconfig to appear. k3s writes it to +# /etc/rancher/k3s/k3s.yaml inside the container. +echo " Waiting for kubeconfig..." +for i in $(seq 1 90); do + if docker exec "${INIT_CONTAINER_NAME}" test -s /etc/rancher/k3s/k3s.yaml 2>/dev/null; then + echo " Kubeconfig ready (${i}s)" + break + fi + if [ "$i" -eq 90 ]; then + echo "ERROR: kubeconfig did not appear in 90s" + docker logs "${INIT_CONTAINER_NAME}" --tail 50 + docker rm -f "${INIT_CONTAINER_NAME}" 2>/dev/null || true + docker volume rm krun-k3s-init-data 2>/dev/null || true + exit 1 + fi + sleep 1 +done + +# Wait for containerd to be fully ready before importing images. +# The kubeconfig may appear before containerd's gRPC socket is +# accepting requests. `k3s ctr version` exercises the full path. +echo " Waiting for containerd..." +for i in $(seq 1 60); do + if docker exec "${INIT_CONTAINER_NAME}" /usr/local/bin/k3s ctr version >/dev/null 2>&1; then + echo " Containerd ready (${i}s)" + break + fi + if [ "$i" -eq 60 ]; then + echo "ERROR: containerd did not become ready in 60s" + docker logs "${INIT_CONTAINER_NAME}" --tail 30 + docker rm -f "${INIT_CONTAINER_NAME}" 2>/dev/null || true + docker volume rm krun-k3s-init-data 2>/dev/null || true + exit 1 + fi + sleep 1 +done + +# Explicitly import images into containerd's k8s.io namespace, then +# tag them with the docker.io/ prefix that kubelet expects. +# +# When Docker saves "navigator/server:dev", the tarball stores the +# reference as "navigator/server:dev". But kubelet normalises all +# short names to "docker.io/navigator/server:dev". Without the +# re-tag, kubelet can't find the image and falls back to pulling. +echo " Importing images into containerd..." +docker exec "${INIT_CONTAINER_NAME}" sh -c ' + # Prefer system zstd (installed in base image), fall back to k3s bundled. + if command -v zstd >/dev/null 2>&1; then + ZSTD=zstd + else + ZSTD=$(find /var/lib/rancher/k3s/data -name zstd -type f 2>/dev/null | head -1) + fi + + for f in /var/lib/rancher/k3s/agent/images/*.tar.zst; do + [ -f "$f" ] || continue + base=$(basename "$f") + echo " importing ${base}..." + if [ -n "$ZSTD" ]; then + "$ZSTD" -d -c "$f" | /usr/local/bin/k3s ctr images import - + rc=$? + else + echo " ERROR: no zstd available, cannot decompress ${base}" + rc=1 + fi + if [ $rc -ne 0 ]; then + echo " ERROR: import failed for ${base} (rc=$rc)" + fi + done + + echo "" + echo " Images after import:" + /usr/local/bin/k3s ctr images list -q | grep -v "^sha256:" | sort + + # Re-tag short-name images with docker.io/ prefix so kubelet can + # find them. kubelet normalises "navigator/server:dev" to + # "docker.io/navigator/server:dev". Only re-tag images that look + # like short Docker Hub names (contain "/" but no "." before the + # first "/", i.e. not registry.k8s.io/... or ghcr.io/...). + echo "" + echo " Re-tagging short names with docker.io/ prefix..." + for ref in $(/usr/local/bin/k3s ctr images list -q | grep -v "^sha256:"); do + # Skip already-qualified names (contain a dot before the first slash). + case "$ref" in + *.*/*) continue ;; + esac + fqdn="docker.io/${ref}" + echo " ${ref} -> ${fqdn}" + /usr/local/bin/k3s ctr images tag "${ref}" "${fqdn}" 2>/dev/null || true + done + + echo "" + echo " Final image list:" + /usr/local/bin/k3s ctr images list -q | grep -v "^sha256:" | sort +' 2>&1 | sed 's/^/ /' + +# Wait for the navigator namespace (Helm controller creates it). +echo " Waiting for navigator namespace..." +for i in $(seq 1 120); do + if docker exec "${INIT_CONTAINER_NAME}" \ + /usr/local/bin/k3s kubectl get namespace navigator -o name 2>/dev/null | grep -q navigator; then + echo " Namespace ready (${i}s)" + break + fi + if [ "$i" -eq 120 ]; then + echo "ERROR: navigator namespace did not appear in 120s" + docker logs "${INIT_CONTAINER_NAME}" --tail 50 + docker rm -f "${INIT_CONTAINER_NAME}" 2>/dev/null || true + docker volume rm krun-k3s-init-data 2>/dev/null || true + exit 1 + fi + sleep 1 +done + +# Generate PKI and create TLS secrets inside the cluster. +echo " Generating TLS certificates and creating secrets..." + +# We generate certs outside the container, then apply them via kubectl. +# Use openssl for cert generation at build time (simpler than pulling in +# the Rust PKI library). The navigator-bootstrap Rust code will detect +# these pre-baked secrets at runtime and skip its own generation. + +PKI_DIR=$(mktemp -d) +trap 'rm -rf "${PKI_DIR}"' EXIT + +# Generate CA +openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "${PKI_DIR}/ca.key" -out "${PKI_DIR}/ca.crt" \ + -days 3650 -nodes -subj "/O=navigator/CN=navigator-ca" 2>/dev/null + +# Generate server cert with SANs +cat > "${PKI_DIR}/server.cnf" </dev/null +openssl x509 -req -in "${PKI_DIR}/server.csr" \ + -CA "${PKI_DIR}/ca.crt" -CAkey "${PKI_DIR}/ca.key" -CAcreateserial \ + -out "${PKI_DIR}/server.crt" -days 3650 \ + -extensions v3_req -extfile "${PKI_DIR}/server.cnf" 2>/dev/null + +# Generate client cert +openssl req -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ + -keyout "${PKI_DIR}/client.key" -out "${PKI_DIR}/client.csr" \ + -nodes -subj "/CN=navigator-client" 2>/dev/null +openssl x509 -req -in "${PKI_DIR}/client.csr" \ + -CA "${PKI_DIR}/ca.crt" -CAkey "${PKI_DIR}/ca.key" -CAcreateserial \ + -out "${PKI_DIR}/client.crt" -days 3650 2>/dev/null + +# Apply TLS secrets to the cluster via kubectl inside the container. +# We create JSON manifests and pipe them in. +apply_secret() { + local name="$1" + local json="$2" + echo "$json" | docker exec -i "${INIT_CONTAINER_NAME}" \ + /usr/local/bin/k3s kubectl apply -f - 2>&1 | sed 's/^/ /' } -pull_and_save "${SERVER_IMAGE}" "${IMAGES_DIR}/navigator-server.tar" -pull_and_save "${SANDBOX_IMAGE}" "${IMAGES_DIR}/navigator-sandbox.tar" -pull_and_save "${AGENT_SANDBOX_IMAGE}" "${IMAGES_DIR}/agent-sandbox-controller.tar" +# Base64 encode the cert files +CA_CRT_B64=$(base64 < "${PKI_DIR}/ca.crt" | tr -d '\n') +SERVER_CRT_B64=$(base64 < "${PKI_DIR}/server.crt" | tr -d '\n') +SERVER_KEY_B64=$(base64 < "${PKI_DIR}/server.key" | tr -d '\n') +CLIENT_CRT_B64=$(base64 < "${PKI_DIR}/client.crt" | tr -d '\n') +CLIENT_KEY_B64=$(base64 < "${PKI_DIR}/client.key" | tr -d '\n') + +apply_secret "navigator-server-tls" "$(cat </dev/null || echo "0") + if [ "$ready" = "1" ]; then + echo " Navigator pod ready (${i}s)" + break + fi + if [ "$i" -eq 120 ]; then + echo "WARNING: navigator pod not ready after 120s, continuing anyway" + docker exec "${INIT_CONTAINER_NAME}" \ + /usr/local/bin/k3s kubectl -n navigator get pods 2>/dev/null | sed 's/^/ /' || true + break + fi + sleep 1 +done + +# Bake PKI materials into the rootfs so the host-side bootstrap can +# find them without waiting for the cluster. This is the key to +# skipping the namespace wait + kubectl apply on every boot. +echo " Baking PKI into rootfs..." +PKI_DEST="${ROOTFS_DIR}/opt/navigator/pki" +mkdir -p "${PKI_DEST}" +cp "${PKI_DIR}/ca.crt" "${PKI_DEST}/ca.crt" +cp "${PKI_DIR}/ca.key" "${PKI_DEST}/ca.key" +cp "${PKI_DIR}/server.crt" "${PKI_DEST}/server.crt" +cp "${PKI_DIR}/server.key" "${PKI_DEST}/server.key" +cp "${PKI_DIR}/client.crt" "${PKI_DEST}/client.crt" +cp "${PKI_DIR}/client.key" "${PKI_DEST}/client.key" + +# Stop k3s gracefully so the kine SQLite DB is flushed. +echo " Stopping k3s..." +docker stop "${INIT_CONTAINER_NAME}" --timeout 10 + +# Surgically clean the kine SQLite DB. While k3s was running, +# controllers maintained pods, events, leases, and endpoints. These +# runtime objects would cause the VM's kubelet to reconcile against an +# empty containerd (SandboxChanged) on boot. With k3s stopped, we can +# safely strip them directly from the DB — no race condition, no auth. +echo " Cleaning runtime objects from kine DB..." +CLEANUP_SQL=$(mktemp) +cat > "$CLEANUP_SQL" << 'EOSQL' +DELETE FROM kine WHERE name LIKE '/registry/pods/%'; +DELETE FROM kine WHERE name LIKE '/registry/events/%'; +DELETE FROM kine WHERE name LIKE '/registry/leases/%'; +DELETE FROM kine WHERE name LIKE '/registry/endpointslices/%'; +DELETE FROM kine WHERE name LIKE '/registry/masterleases/%'; +PRAGMA wal_checkpoint(TRUNCATE); +VACUUM; +EOSQL +docker run --rm \ + -v krun-k3s-init-data:/data \ + -v "${CLEANUP_SQL}:/tmp/clean.sql:ro" \ + alpine:latest \ + sh -c ' + apk add --no-cache sqlite >/dev/null 2>&1 + DB=/data/server/db/state.db + if [ ! -f "$DB" ]; then echo "ERROR: state.db not found"; exit 1; fi + echo " Before: $(sqlite3 "$DB" "SELECT COUNT(*) FROM kine;") kine records" + sqlite3 "$DB" < /tmp/clean.sql + echo " After: $(sqlite3 "$DB" "SELECT COUNT(*) FROM kine;") kine records" + ' 2>&1 | sed 's/^/ /' +rm -f "$CLEANUP_SQL" + +# Copy the initialized k3s state from the Docker volume back into the +# rootfs. We use a helper container to access the volume. +echo " Extracting k3s state from Docker volume..." +if [ -d "${ROOTFS_DIR}/var/lib/rancher/k3s" ]; then + chmod -R u+rwx "${ROOTFS_DIR}/var/lib/rancher/k3s" 2>/dev/null || true + rm -rf "${ROOTFS_DIR}/var/lib/rancher/k3s" +fi +mkdir -p "${ROOTFS_DIR}/var/lib/rancher/k3s" +# Use tar instead of cp to handle special files that can't be created +# on the macOS-backed bind mount. tar's --ignore-failed-read and +# warning suppression let us capture everything that matters (database, +# TLS, containerd image store in native snapshotter format) while +# skipping uncopiable metadata. +# +# Exclude the overlayfs snapshotter — Docker's init container uses it +# but we use the native snapshotter in the VM. The overlayfs snapshots +# contain full image layer trees that are massive and create files with +# Docker Desktop VirtioFS ownership xattrs that are undeletable on macOS. +# Also exclude runtime task state (stale shim PIDs, sockets) and the +# containerd bolt database (we'll wipe it in the surgical cleanup below). +# Use alpine (native platform) instead of the arm64 base image to avoid +# QEMU emulation overhead. tar doesn't need ARM — it's just copying files. +# Include the containerd native snapshotter, content store, and metadata +# database (meta.db) so the VM doesn't need to re-extract image layers +# at boot time. Exclude the overlayfs snapshotter (Docker's init uses +# overlayfs internally but the VM uses native), runtime task state (stale +# PIDs/sockets), and airgap tarballs (restored from cache below). +# +# The native snapshotter data is ~1-3 GB depending on images. Copying +# through Docker Desktop VirtioFS is slower than native but necessary +# for fast boot times — without it, each boot spends >2 min extracting +# layers on virtio-fs, causing kubelet CreateContainer timeouts. +docker run --rm \ + -v krun-k3s-init-data:/src:ro \ + -v "${ROOTFS_DIR}/var/lib/rancher/k3s:/dst" \ + alpine:latest \ + sh -c 'cd /src && tar cf - \ + --exclude="./agent/containerd/io.containerd.snapshotter.v1.overlayfs" \ + --exclude="./agent/containerd/io.containerd.runtime.v2.task" \ + --exclude="./agent/containerd/io.containerd.sandbox.controller.v1.shim" \ + --exclude="./agent/containerd/tmpmounts" \ + --exclude="./agent/containerd/containerd.log" \ + --exclude="./agent/images" \ + . 2>/dev/null | (cd /dst && tar xf - 2>/dev/null); true' + +# Clean up runtime artifacts that shouldn't persist (same cleanup +# gateway-init.sh does on warm boot). +echo " Cleaning runtime artifacts..." +rm -rf "${ROOTFS_DIR}/var/lib/rancher/k3s/server/tls/temporary-certs" 2>/dev/null || true +rm -f "${ROOTFS_DIR}/var/lib/rancher/k3s/server/kine.sock" 2>/dev/null || true +find "${ROOTFS_DIR}/var/lib/rancher/k3s" -name '*.sock' -delete 2>/dev/null || true +find "${ROOTFS_DIR}/run" -name '*.sock' -delete 2>/dev/null || true + +# Restore airgap image tarballs. The extraction above excluded +# ./agent/images (to avoid pulling them from the Docker volume) and the +# rm -rf earlier wiped the pre-loaded copies. Copy them back from the +# persistent cache so k3s can import them on first VM boot. +echo " Restoring airgap image tarballs..." +mkdir -p "${IMAGES_DIR}" +for f in "${IMAGE_CACHE_DIR}"/*.tar.zst; do + [ -f "$f" ] || continue + cp "$f" "${IMAGES_DIR}/" +done +echo " Images: $(ls "${IMAGES_DIR}"/*.tar.zst 2>/dev/null | wc -l | tr -d ' ') tarballs ($(du -sh "${IMAGES_DIR}" 2>/dev/null | cut -f1))" + +# Write sentinel file so gateway-init.sh and the host-side bootstrap +# know this rootfs has pre-initialized state. +echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "${ROOTFS_DIR}/opt/navigator/.initialized" + +docker rm "${INIT_CONTAINER_NAME}" 2>/dev/null || true +docker volume rm krun-k3s-init-data 2>/dev/null || true + +echo " Pre-initialization complete." # ── Verify ──────────────────────────────────────────────────────────── @@ -173,17 +656,27 @@ if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then exit 1 fi +if [ ! -f "${ROOTFS_DIR}/opt/navigator/.initialized" ]; then + echo "WARNING: Pre-initialization sentinel not found. Cold starts will be slow." +fi + echo "" echo "==> Rootfs ready at: ${ROOTFS_DIR}" echo " Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)" +echo " Pre-initialized: $(cat "${ROOTFS_DIR}/opt/navigator/.initialized" 2>/dev/null || echo 'no')" -# Show image sizes -echo " Images:" -for img in "${IMAGES_DIR}"/*.tar; do - [ -f "$img" ] || continue - echo " $(basename "$img"): $(du -sh "$img" | cut -f1)" -done +# Show k3s data size +K3S_DATA="${ROOTFS_DIR}/var/lib/rancher/k3s" +if [ -d "${K3S_DATA}" ]; then + echo " k3s state: $(du -sh "${K3S_DATA}" | cut -f1)" +fi + +# Show PKI +if [ -d "${ROOTFS_DIR}/opt/navigator/pki" ]; then + echo " PKI: baked ($(ls "${ROOTFS_DIR}/opt/navigator/pki/" | wc -l | tr -d ' ') files)" +fi echo "" echo "Next steps:" echo " 1. Run: ncl gateway" +echo " Expected startup time: ~3-5 seconds (pre-initialized)" diff --git a/crates/openshell-vm/scripts/gateway-init.sh b/crates/openshell-vm/scripts/gateway-init.sh index af8c6566..bc37541f 100755 --- a/crates/openshell-vm/scripts/gateway-init.sh +++ b/crates/openshell-vm/scripts/gateway-init.sh @@ -4,27 +4,49 @@ # Init script for the gateway microVM. Runs as PID 1 inside the libkrun VM. # -# Mounts essential virtual filesystems, deploys bundled manifests (helm chart, -# agent-sandbox controller), then execs k3s server. +# Mounts essential virtual filesystems, configures networking, then execs +# k3s server. If the rootfs was pre-initialized by build-rootfs.sh (sentinel +# at /opt/navigator/.initialized), the full manifest setup is skipped and +# k3s resumes from its persisted state (~3-5s startup). set -e -# ── Mount essential filesystems ───────────────────────────────────────── +BOOT_START=$(date +%s%3N 2>/dev/null || date +%s) -mount -t proc proc /proc 2>/dev/null || true -mount -t sysfs sysfs /sys 2>/dev/null || true -mount -t tmpfs tmpfs /tmp 2>/dev/null || true -mount -t tmpfs tmpfs /run 2>/dev/null || true +ts() { + local now + now=$(date +%s%3N 2>/dev/null || date +%s) + local elapsed=$(( (now - BOOT_START) )) + printf "[%d.%03ds] %s\n" $((elapsed / 1000)) $((elapsed % 1000)) "$*" +} -# devtmpfs is usually auto-mounted by the kernel, but ensure it's there. -mount -t devtmpfs devtmpfs /dev 2>/dev/null || true +PRE_INITIALIZED=false +if [ -f /opt/navigator/.initialized ]; then + PRE_INITIALIZED=true + ts "pre-initialized rootfs detected (fast path)" +fi + +# ── Mount essential filesystems (parallel) ────────────────────────────── +# These are independent; mount them concurrently. + +mount -t proc proc /proc 2>/dev/null & +mount -t sysfs sysfs /sys 2>/dev/null & +mount -t tmpfs tmpfs /tmp 2>/dev/null & +mount -t tmpfs tmpfs /run 2>/dev/null & +mount -t devtmpfs devtmpfs /dev 2>/dev/null & +wait + +# These depend on /dev being mounted. mkdir -p /dev/pts /dev/shm -mount -t devpts devpts /dev/pts 2>/dev/null || true -mount -t tmpfs tmpfs /dev/shm 2>/dev/null || true +mount -t devpts devpts /dev/pts 2>/dev/null & +mount -t tmpfs tmpfs /dev/shm 2>/dev/null & # cgroup2 (unified hierarchy) — required by k3s/containerd. mkdir -p /sys/fs/cgroup -mount -t cgroup2 cgroup2 /sys/fs/cgroup 2>/dev/null || true +mount -t cgroup2 cgroup2 /sys/fs/cgroup 2>/dev/null & +wait + +ts "filesystems mounted" # ── Networking ────────────────────────────────────────────────────────── @@ -39,13 +61,12 @@ if ip link show eth0 >/dev/null 2>&1; then # gvproxy networking — bring up eth0 and get an IP via DHCP. # gvproxy has a built-in DHCP server that assigns 192.168.127.2/24 # with gateway 192.168.127.1 and configures ARP properly. - echo "[gateway-init] detected eth0 (gvproxy networking)" + ts "detected eth0 (gvproxy networking)" ip link set eth0 up 2>/dev/null || true # Use DHCP to get IP and configure routes. gvproxy's DHCP server # handles ARP resolution which static config does not. if command -v udhcpc >/dev/null 2>&1; then - echo "[gateway-init] running DHCP (udhcpc)..." # udhcpc needs a script to apply the lease. Use the busybox # default script if available, otherwise write a minimal one. UDHCPC_SCRIPT="/usr/share/udhcpc/default.script" @@ -72,11 +93,12 @@ DHCP_SCRIPT chmod +x "$UDHCPC_SCRIPT" fi # -f: stay in foreground, -q: quit after obtaining lease, - # -n: exit if no lease, -T 2: 2s between retries, -t 5: 5 retries - udhcpc -i eth0 -f -q -n -T 2 -t 5 -s "$UDHCPC_SCRIPT" 2>&1 || true + # -n: exit if no lease, -T 1: 1s between retries, -t 3: 3 retries + # -A 1: wait 1s before first retry (aggressive for local gvproxy) + udhcpc -i eth0 -f -q -n -T 1 -t 3 -A 1 -s "$UDHCPC_SCRIPT" 2>&1 || true else # Fallback to static config if no DHCP client available. - echo "[gateway-init] no DHCP client, using static config" + ts "no DHCP client, using static config" ip addr add 192.168.127.2/24 dev eth0 2>/dev/null || true ip route add default via 192.168.127.1 2>/dev/null || true fi @@ -84,17 +106,16 @@ DHCP_SCRIPT # Ensure DNS is configured. DHCP should have set /etc/resolv.conf, # but if it didn't (or static fallback was used), provide a default. if [ ! -s /etc/resolv.conf ]; then - echo "[gateway-init] no DNS configured, using public DNS" echo "nameserver 8.8.8.8" > /etc/resolv.conf echo "nameserver 8.8.4.4" >> /etc/resolv.conf fi # Read back the IP we got (from DHCP or static). NODE_IP=$(ip -4 addr show eth0 | grep -oP 'inet \K[^/]+' || echo "192.168.127.2") - echo "[gateway-init] eth0 IP: $NODE_IP" + ts "eth0 IP: $NODE_IP" else # TSI or no networking — create a dummy interface for k3s. - echo "[gateway-init] no eth0 found, using dummy interface (TSI mode)" + ts "no eth0 found, using dummy interface (TSI mode)" ip link add dummy0 type dummy 2>/dev/null || true ip addr add 10.0.2.15/24 dev dummy0 2>/dev/null || true ip link set dummy0 up 2>/dev/null || true @@ -103,8 +124,6 @@ else NODE_IP="10.0.2.15" fi -echo "[gateway-init] node IP: $NODE_IP" - # ── k3s data directories ─────────────────────────────────────────────── mkdir -p /var/lib/rancher/k3s @@ -112,60 +131,171 @@ mkdir -p /etc/rancher/k3s # Clean stale runtime artifacts from previous boots (virtio-fs persists # the rootfs between VM restarts). -echo "[gateway-init] cleaning stale runtime artifacts..." rm -rf /var/lib/rancher/k3s/server/tls/temporary-certs 2>/dev/null || true rm -f /var/lib/rancher/k3s/server/kine.sock 2>/dev/null || true +# Clean stale node password so k3s doesn't fail validation on reboot. +# Each k3s start generates a new random node password; the old hash in +# the database will not match. Removing the local password file forces +# k3s to re-register with a fresh one. +rm -f /var/lib/rancher/k3s/server/cred/node-passwd 2>/dev/null || true # Also clean any stale pid files and unix sockets find /var/lib/rancher/k3s -name '*.sock' -delete 2>/dev/null || true find /run -name '*.sock' -delete 2>/dev/null || true -# ── Deploy bundled manifests ──────────────────────────────────────────── -# Copy manifests from the staging directory to the k3s auto-deploy path. -# This mirrors the approach in cluster-entrypoint.sh for the Docker path. +# Clean stale containerd runtime state from previous boots. +# +# The rootfs persists across VM restarts via virtio-fs. We PRESERVE the +# bolt metadata database (meta.db) because it contains snapshot and image +# metadata that containerd needs to avoid re-extracting all image layers +# on every boot. The native snapshotter on virtio-fs takes ~2 min to +# extract the navigator/server image; keeping meta.db lets containerd +# know the snapshots already exist. +# +# The kine (SQLite) DB cleanup in build-rootfs.sh already removes stale +# pod/sandbox records from k3s etcd, preventing kubelet from reconciling +# against stale sandboxes. Containerd's internal sandbox records in +# meta.db are harmless because the CRI plugin reconciles with kubelet +# on startup — any sandboxes unknown to kubelet are cleaned up gracefully +# without triggering SandboxChanged events. +CONTAINERD_DIR="/var/lib/rancher/k3s/agent/containerd" +if [ -d "$CONTAINERD_DIR" ]; then + # Remove runtime task state (stale shim PIDs, sockets from dead processes). + rm -rf "${CONTAINERD_DIR}/io.containerd.runtime.v2.task" 2>/dev/null || true + # Clean stale ingest temp files from the content store. + rm -rf "${CONTAINERD_DIR}/io.containerd.content.v1.content/ingest" 2>/dev/null || true + mkdir -p "${CONTAINERD_DIR}/io.containerd.content.v1.content/ingest" + # Preserve meta.db — snapshot/image metadata avoids re-extraction. + ts "cleaned containerd runtime state (preserved meta.db + content store + snapshotter)" +fi +rm -rf /run/k3s 2>/dev/null || true + +ts "stale artifacts cleaned" -K3S_MANIFESTS="/var/lib/rancher/k3s/server/manifests" -BUNDLED_MANIFESTS="/opt/navigator/manifests" +# ── Deploy bundled manifests (cold boot only) ─────────────────────────── +# On pre-initialized rootfs, manifests are already in place from the +# build-time k3s boot. Skip this entirely for fast startup. -mkdir -p "$K3S_MANIFESTS" +if [ "$PRE_INITIALIZED" = false ]; then + K3S_MANIFESTS="/var/lib/rancher/k3s/server/manifests" + BUNDLED_MANIFESTS="/opt/navigator/manifests" -if [ -d "$BUNDLED_MANIFESTS" ]; then - echo "[gateway-init] deploying bundled manifests..." - for manifest in "$BUNDLED_MANIFESTS"/*.yaml; do - [ ! -f "$manifest" ] && continue - cp "$manifest" "$K3S_MANIFESTS/" - echo " $(basename "$manifest")" - done + mkdir -p "$K3S_MANIFESTS" - # Remove stale navigator-managed manifests from previous boots. - for existing in "$K3S_MANIFESTS"/navigator-*.yaml \ - "$K3S_MANIFESTS"/agent-*.yaml; do - [ ! -f "$existing" ] && continue - basename=$(basename "$existing") - if [ ! -f "$BUNDLED_MANIFESTS/$basename" ]; then - echo " removing stale: $basename" - rm -f "$existing" - fi - done + if [ -d "$BUNDLED_MANIFESTS" ]; then + ts "deploying bundled manifests (cold boot)..." + for manifest in "$BUNDLED_MANIFESTS"/*.yaml; do + [ ! -f "$manifest" ] && continue + cp "$manifest" "$K3S_MANIFESTS/" + done + + # Remove stale navigator-managed manifests from previous boots. + for existing in "$K3S_MANIFESTS"/navigator-*.yaml \ + "$K3S_MANIFESTS"/agent-*.yaml; do + [ ! -f "$existing" ] && continue + basename=$(basename "$existing") + if [ ! -f "$BUNDLED_MANIFESTS/$basename" ]; then + rm -f "$existing" + fi + done + fi + + # Patch the HelmChart manifest for VM deployment. + HELMCHART="$K3S_MANIFESTS/navigator-helmchart.yaml" + if [ -f "$HELMCHART" ]; then + # Use pre-loaded images — don't pull from registry. + sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" + # Clear SSH gateway placeholders (default 127.0.0.1 is correct for local VM). + sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" + sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" + fi + + ts "manifests deployed" +else + ts "skipping manifest deploy (pre-initialized)" fi -# Patch the HelmChart manifest for VM deployment. -HELMCHART="$K3S_MANIFESTS/navigator-helmchart.yaml" -if [ -f "$HELMCHART" ]; then - echo "[gateway-init] patching HelmChart manifest..." - # Use pre-loaded images — don't pull from registry. - sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" - # Clear SSH gateway placeholders (default 127.0.0.1 is correct for local VM). - sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" - sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" +# ── CNI configuration (iptables-free) ─────────────────────────────────── +# The libkrun VM kernel has no netfilter/iptables support. Flannel's +# masquerade rules and kube-proxy both require iptables and crash without +# it. We disable both and use a simple bridge CNI with host-local IPAM +# instead. This is sufficient for single-node pod networking. +# +# ipMasq=false avoids any iptables calls in the bridge plugin. +# portmap plugin removed — it requires iptables for DNAT rules. +# +# containerd falls back to default CNI paths: +# conf_dir = /etc/cni/net.d +# bin_dir = /opt/cni/bin +# We write the config to the default path and symlink k3s CNI binaries. + +CNI_CONF_DIR="/etc/cni/net.d" +CNI_BIN_DIR="/opt/cni/bin" +mkdir -p "$CNI_CONF_DIR" "$CNI_BIN_DIR" + +cat > "$CNI_CONF_DIR/10-bridge.conflist" << 'CNICFG' +{ + "cniVersion": "1.0.0", + "name": "bridge", + "plugins": [ + { + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "ipMasq": false, + "hairpinMode": true, + "ipam": { + "type": "host-local", + "ranges": [[{ "subnet": "10.42.0.0/24" }]], + "routes": [{ "dst": "0.0.0.0/0" }] + } + }, + { + "type": "loopback" + } + ] +} +CNICFG + +# Symlink k3s-bundled CNI binaries to the default containerd bin path. +# k3s extracts its tools to /var/lib/rancher/k3s/data//bin/. +K3S_DATA_BIN=$(find /var/lib/rancher/k3s/data -maxdepth 2 -name bin -type d 2>/dev/null | head -1) +if [ -n "$K3S_DATA_BIN" ]; then + for plugin in bridge host-local loopback bandwidth; do + [ -f "$K3S_DATA_BIN/$plugin" ] && ln -sf "$K3S_DATA_BIN/$plugin" "$CNI_BIN_DIR/$plugin" + done + ts "CNI binaries linked from $K3S_DATA_BIN" +else + ts "WARNING: k3s data bin dir not found, CNI binaries may be missing" fi +# Also clean up any flannel config from the k3s-specific CNI directory +# (pre-baked state from the Docker build used host-gw flannel). +rm -f "/var/lib/rancher/k3s/agent/etc/cni/net.d/10-flannel.conflist" 2>/dev/null || true + +ts "bridge CNI configured (iptables-free)" + # ── Start k3s ────────────────────────────────────────────────────────── +# Flags tuned for fast single-node startup: +# --disable=traefik,servicelb,metrics-server: skip unused controllers +# --disable=coredns,local-path-provisioner: can't run without bridge CNI +# (no CONFIG_BRIDGE in libkrunfw kernel). Only hostNetwork pods work. +# --disable-network-policy: skip network policy controller +# --disable-kube-proxy: VM kernel has no netfilter/iptables +# --flannel-backend=none: replaced with bridge CNI above +# --snapshotter=native: overlayfs is incompatible with virtiofs (the +# host-backed filesystem in libkrun). Operations inside overlayfs +# mounts on virtiofs fail with ECONNRESET. The native snapshotter +# uses simple directory copies and works reliably on any filesystem. -echo "[gateway-init] starting k3s server..." +ts "starting k3s server" exec /usr/local/bin/k3s server \ - --disable=traefik \ + --disable=traefik,servicelb,metrics-server,coredns,local-path-provisioner \ + --disable-network-policy \ + --disable-kube-proxy \ --write-kubeconfig-mode=644 \ --node-ip="$NODE_IP" \ --kube-apiserver-arg=bind-address=0.0.0.0 \ --resolv-conf=/etc/resolv.conf \ - --tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2 + --tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2 \ + --flannel-backend=none \ + --snapshotter=native diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 060801ed..db362c2b 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -20,6 +20,7 @@ use std::ffi::CString; use std::os::unix::process::CommandExt as _; use std::path::{Path, PathBuf}; use std::ptr; +use std::time::Instant; // ── Error type ───────────────────────────────────────────────────────── @@ -151,9 +152,12 @@ impl VmConfig { // The k3s dynamiclistener on 6443 has TLS issues through // port forwarding, so we go directly to the apiserver. "6443:6444".to_string(), - // Navigator server NodePort — the gateway endpoint for - // CLI clients and e2e tests. - "30051:30051".to_string(), + // Navigator server — with hostNetwork the server binds + // directly to port 8080 on the VM's interface, bypassing + // NodePort (which requires kube-proxy / iptables). + // Map host 30051 -> guest 8080 so the external-facing + // port stays the same for CLI clients. + "30051:8080".to_string(), ], log_level: 3, // Info — for debugging console_output: None, @@ -311,6 +315,24 @@ fn gvproxy_expose(api_sock: &Path, body: &str) -> Result<(), String> { } } +/// Kill any stale gvproxy process from a previous gateway run. +/// +/// If the CLI crashes or is killed before cleanup, gvproxy keeps running +/// and holds port 2222. A new gvproxy instance then fails with +/// "bind: address already in use". +fn kill_stale_gvproxy() { + let output = std::process::Command::new("pkill") + .args(["-x", "gvproxy"]) + .output(); + if let Ok(o) = output { + if o.status.success() { + eprintln!("Killed stale gvproxy process"); + // Brief pause for the port to be released. + std::thread::sleep(std::time::Duration::from_millis(200)); + } + } +} + fn path_to_cstring(path: &Path) -> Result { let s = path .to_str() @@ -335,6 +357,7 @@ pub fn launch(config: &VmConfig) -> Result { }); } + let launch_start = Instant::now(); eprintln!("rootfs: {}", config.rootfs.display()); eprintln!("vm: {} vCPU(s), {} MiB RAM", config.vcpus, config.mem_mib); @@ -422,9 +445,17 @@ pub fn launch(config: &VmConfig) -> Result { let vfkit_sock = run_dir.join("gvproxy-vfkit.sock"); let api_sock = run_dir.join("gvproxy-api.sock"); - // Clean stale sockets + // Kill any stale gvproxy process from a previous run. + // If gvproxy is still holding port 2222, the new instance + // will fail with "bind: address already in use". + kill_stale_gvproxy(); + + // Clean stale sockets (including the -krun.sock file that + // libkrun creates as its datagram endpoint). let _ = std::fs::remove_file(&vfkit_sock); let _ = std::fs::remove_file(&api_sock); + let krun_sock = run_dir.join("gvproxy-vfkit.sock-krun.sock"); + let _ = std::fs::remove_file(&krun_sock); // Start gvproxy eprintln!("Starting gvproxy: {}", binary.display()); @@ -441,19 +472,25 @@ pub fn launch(config: &VmConfig) -> Result { .spawn() .map_err(|e| VmError::Fork(format!("failed to start gvproxy: {e}")))?; - eprintln!("gvproxy started (pid {})", child.id()); + eprintln!( + "gvproxy started (pid {}) [{:.1}s]", + child.id(), + launch_start.elapsed().as_secs_f64() + ); - // Wait for the socket to appear - for _ in 0..50 { - if vfkit_sock.exists() { - break; + // Wait for the socket to appear (exponential backoff: 5ms → 100ms). + { + let deadline = Instant::now() + std::time::Duration::from_secs(5); + let mut interval = std::time::Duration::from_millis(5); + while !vfkit_sock.exists() { + if Instant::now() >= deadline { + return Err(VmError::Fork( + "gvproxy socket did not appear within 5s".to_string(), + )); + } + std::thread::sleep(interval); + interval = (interval * 2).min(std::time::Duration::from_millis(100)); } - std::thread::sleep(std::time::Duration::from_millis(100)); - } - if !vfkit_sock.exists() { - return Err(VmError::Fork( - "gvproxy socket did not appear within 5s".to_string(), - )); } // Disable implicit TSI and add virtio-net via gvproxy @@ -500,7 +537,10 @@ pub fn launch(config: &VmConfig) -> Result { )?; } - eprintln!("Networking: gvproxy (virtio-net via {vfkit_sock:?})"); + eprintln!( + "Networking: gvproxy (virtio-net) [{:.1}s]", + launch_start.elapsed().as_secs_f64() + ); gvproxy_child = Some(child); gvproxy_api_sock = Some(api_sock); } @@ -571,6 +611,7 @@ pub fn launch(config: &VmConfig) -> Result { // krun_start_enter() never returns — it calls exit() when the guest // process exits. We fork so the parent can monitor and report. + let boot_start = Instant::now(); eprintln!("Booting microVM..."); let pid = unsafe { libc::fork() }; @@ -584,7 +625,10 @@ pub fn launch(config: &VmConfig) -> Result { } _ => { // Parent: wait for child - eprintln!("VM started (child pid {pid})"); + eprintln!( + "VM started (child pid {pid}) [{:.1}s]", + boot_start.elapsed().as_secs_f64() + ); for pm in &config.port_map { let host_port = pm.split(':').next().unwrap_or(pm); eprintln!(" port {pm} -> http://localhost:{host_port}"); @@ -595,10 +639,27 @@ pub fn launch(config: &VmConfig) -> Result { // The port_map entries use the same "host:guest" format // as TSI, but here we translate them into gvproxy expose // calls targeting the guest IP (192.168.127.2). + // + // Instead of a fixed 500ms sleep, poll the API socket with + // exponential backoff (5ms → 200ms, ~1s total budget). if let Some(ref api_sock) = gvproxy_api_sock { - // Wait for gvproxy API socket to be ready - std::thread::sleep(std::time::Duration::from_millis(500)); - eprintln!("Setting up gvproxy port forwarding..."); + let fwd_start = Instant::now(); + // Wait for the API socket to appear (it lags slightly + // behind the vfkit data socket). + { + let deadline = Instant::now() + std::time::Duration::from_secs(2); + let mut interval = std::time::Duration::from_millis(5); + while !api_sock.exists() { + if Instant::now() >= deadline { + eprintln!( + "warning: gvproxy API socket not ready after 2s, attempting anyway" + ); + break; + } + std::thread::sleep(interval); + interval = (interval * 2).min(std::time::Duration::from_millis(200)); + } + } let guest_ip = "192.168.127.2"; @@ -626,6 +687,10 @@ pub fn launch(config: &VmConfig) -> Result { } } } + eprintln!( + "Port forwarding ready [{:.1}s]", + fwd_start.elapsed().as_secs_f64() + ); } // Wait for k3s kubeconfig to appear (virtio-fs makes it @@ -633,9 +698,15 @@ pub fn launch(config: &VmConfig) -> Result { // (when exec_path is the default init script). if config.exec_path == "/srv/gateway-init.sh" { let kubeconfig_src = config.rootfs.join("etc/rancher/k3s/k3s.yaml"); + let kc_start = Instant::now(); eprintln!("Waiting for kubeconfig..."); + + // Aggressive polling initially (100ms) then back off to 1s. + // Total budget: ~90s (enough for k3s cold start). let mut found = false; - for _ in 0..120 { + let deadline = Instant::now() + std::time::Duration::from_secs(90); + let mut interval = std::time::Duration::from_millis(100); + while Instant::now() < deadline { if kubeconfig_src.is_file() && std::fs::metadata(&kubeconfig_src) .map(|m| m.len() > 0) @@ -644,10 +715,15 @@ pub fn launch(config: &VmConfig) -> Result { found = true; break; } - std::thread::sleep(std::time::Duration::from_secs(1)); + std::thread::sleep(interval); + interval = (interval * 2).min(std::time::Duration::from_secs(1)); } if found { + eprintln!( + "Kubeconfig appeared [{:.1}s]", + kc_start.elapsed().as_secs_f64() + ); // Copy kubeconfig to ~/.kube/gateway.yaml, rewriting // the server URL to point at the forwarded host port. let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); @@ -674,15 +750,34 @@ pub fn launch(config: &VmConfig) -> Result { // Bootstrap the NemoClaw control plane: generate PKI, // create TLS secrets, and store cluster metadata so CLI // clients and e2e tests can connect. - if let Err(e) = bootstrap_gateway(&dest) { + // + // If the rootfs has pre-baked PKI (from build-rootfs.sh), + // this skips the namespace wait and kubectl apply entirely. + if let Err(e) = bootstrap_gateway(&dest, &config.rootfs) { eprintln!("Bootstrap failed: {e}"); eprintln!(" The VM is running but NemoClaw may not be fully operational."); } } else { - eprintln!(" kubeconfig not found after 120s (k3s may still be starting)"); + eprintln!(" kubeconfig not found after 90s (k3s may still be starting)"); } + + // On warm reboots (rootfs persists via virtio-fs), the k3s + // database may have stale pod records from the previous + // session. containerd v2 doesn't always recover these + // automatically. Force-delete any pods stuck in Unknown + // or failed state so the StatefulSet controller recreates + // them. + let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); + let kubeconfig_dest = PathBuf::from(&home).join(".kube/gateway.yaml"); + recover_stale_pods(&kubeconfig_dest); + + // Wait for the gRPC service to be reachable before + // declaring "Ready". The navigator pod needs a few + // seconds after k3s starts to bind its port. + wait_for_gateway_service(); } + eprintln!("Ready [{:.1}s total]", boot_start.elapsed().as_secs_f64()); eprintln!("Press Ctrl+C to stop."); // Forward signals to child @@ -735,34 +830,22 @@ const GATEWAY_PORT: u16 = 30051; /// Bootstrap the `NemoClaw` control plane after k3s is ready. /// -/// This mirrors the Docker bootstrap path in `navigator-bootstrap` but runs -/// kubectl from the host against the VM's forwarded kube-apiserver port. +/// Three paths, fastest first: /// -/// Steps: -/// 1. Wait for the `navigator` namespace (created by the Helm controller) -/// 2. Generate a PKI bundle (CA, server cert, client cert) -/// 3. Apply TLS secrets to the cluster via `kubectl` -/// 4. Store cluster metadata and mTLS credentials on the host -fn bootstrap_gateway(kubeconfig: &Path) -> Result<(), VmError> { - let kc = kubeconfig - .to_str() - .ok_or_else(|| VmError::InvalidPath(kubeconfig.display().to_string()))?; - - // 1. Wait for the navigator namespace. - eprintln!("Waiting for navigator namespace..."); - wait_for_namespace(kc)?; - - // 2. Generate PKI. - eprintln!("Generating TLS certificates..."); - let pki_bundle = navigator_bootstrap::pki::generate_pki(&[]) - .map_err(|e| VmError::Bootstrap(format!("PKI generation failed: {e}")))?; - - // 3. Apply TLS secrets. - eprintln!("Creating TLS secrets..."); - apply_tls_secrets(kc, &pki_bundle)?; +/// 1. **Pre-baked PKI** (from `build-rootfs.sh`): reads PEM files directly +/// from the rootfs, stores creds + metadata on the host. No cluster +/// interaction at all. Completes in <50ms. +/// +/// 2. **Warm boot**: host-side metadata + mTLS certs survive across VM +/// restarts. Waits for the navigator namespace, then returns. +/// +/// 3. **Cold boot**: generates fresh PKI, waits for namespace, applies +/// secrets via kubectl, stores everything on the host. +fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { + let bootstrap_start = Instant::now(); - // 4. Store cluster metadata and mTLS credentials. - eprintln!("Storing cluster metadata..."); + // Build cluster metadata early — it only depends on knowing the port and + // cluster name, not on the cluster being ready. let metadata = navigator_bootstrap::ClusterMetadata { name: GATEWAY_CLUSTER_NAME.to_string(), gateway_endpoint: format!("https://127.0.0.1:{GATEWAY_PORT}"), @@ -773,16 +856,98 @@ fn bootstrap_gateway(kubeconfig: &Path) -> Result<(), VmError> { resolved_host: None, }; + // ── Path 1: Pre-baked PKI from build-rootfs.sh ───────────────── + // + // If the rootfs was pre-initialized, PKI files are baked into + // /opt/navigator/pki/. Read them directly — no cluster interaction + // needed. The TLS secrets already exist inside the cluster from + // the build-time k3s boot. + let pki_dir = rootfs.join("opt/navigator/pki"); + if pki_dir.join("ca.crt").is_file() { + eprintln!("Pre-baked PKI detected — fast bootstrap"); + + let read = |name: &str| -> Result { + std::fs::read_to_string(pki_dir.join(name)) + .map_err(|e| VmError::Bootstrap(format!("failed to read {name}: {e}"))) + }; + + let pki_bundle = navigator_bootstrap::pki::PkiBundle { + ca_cert_pem: read("ca.crt")?, + ca_key_pem: read("ca.key")?, + server_cert_pem: read("server.crt")?, + server_key_pem: read("server.key")?, + client_cert_pem: read("client.crt")?, + client_key_pem: read("client.key")?, + }; + + // Store metadata and credentials on the host. + navigator_bootstrap::store_cluster_metadata(GATEWAY_CLUSTER_NAME, &metadata) + .map_err(|e| VmError::Bootstrap(format!("failed to store metadata: {e}")))?; + + navigator_bootstrap::mtls::store_pki_bundle(GATEWAY_CLUSTER_NAME, &pki_bundle) + .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS creds: {e}")))?; + + navigator_bootstrap::save_active_cluster(GATEWAY_CLUSTER_NAME) + .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?; + + eprintln!( + "Bootstrap complete [{:.1}s]", + bootstrap_start.elapsed().as_secs_f64() + ); + eprintln!(" Cluster: {GATEWAY_CLUSTER_NAME}"); + eprintln!(" Gateway: https://127.0.0.1:{GATEWAY_PORT}"); + eprintln!(" mTLS: ~/.config/nemoclaw/clusters/{GATEWAY_CLUSTER_NAME}/mtls/"); + return Ok(()); + } + + // ── Path 2: Warm boot ────────────────────────────────────────── + // + // Host-side metadata + mTLS certs survive from a previous boot. + // Just wait for the namespace to confirm k3s is ready. + let kc = kubeconfig + .to_str() + .ok_or_else(|| VmError::InvalidPath(kubeconfig.display().to_string()))?; + + if is_warm_boot() { + eprintln!("Warm boot detected — reusing existing PKI and metadata."); + eprintln!("Waiting for navigator namespace..."); + wait_for_namespace(kc)?; + eprintln!( + "Warm boot ready [{:.1}s]", + bootstrap_start.elapsed().as_secs_f64() + ); + eprintln!(" Cluster: {GATEWAY_CLUSTER_NAME}"); + eprintln!(" Gateway: https://127.0.0.1:{GATEWAY_PORT}"); + eprintln!(" mTLS: ~/.config/nemoclaw/clusters/{GATEWAY_CLUSTER_NAME}/mtls/"); + return Ok(()); + } + + // ── Path 3: Cold boot (no pre-baked state) ───────────────────── + eprintln!("Generating TLS certificates..."); + let pki_bundle = navigator_bootstrap::pki::generate_pki(&[]) + .map_err(|e| VmError::Bootstrap(format!("PKI generation failed: {e}")))?; + navigator_bootstrap::store_cluster_metadata(GATEWAY_CLUSTER_NAME, &metadata) .map_err(|e| VmError::Bootstrap(format!("failed to store cluster metadata: {e}")))?; + let ns_start = Instant::now(); + eprintln!("Waiting for navigator namespace..."); + wait_for_namespace(kc)?; + eprintln!("Namespace ready [{:.1}s]", ns_start.elapsed().as_secs_f64()); + + eprintln!("Creating TLS secrets..."); + apply_tls_secrets(kc, &pki_bundle)?; + navigator_bootstrap::mtls::store_pki_bundle(GATEWAY_CLUSTER_NAME, &pki_bundle) .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS credentials: {e}")))?; navigator_bootstrap::save_active_cluster(GATEWAY_CLUSTER_NAME) .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?; - eprintln!("Bootstrap complete."); + eprintln!( + "Bootstrap complete [{:.1}s]", + bootstrap_start.elapsed().as_secs_f64() + ); eprintln!(" Cluster: {GATEWAY_CLUSTER_NAME}"); eprintln!(" Gateway: https://127.0.0.1:{GATEWAY_PORT}"); eprintln!(" mTLS: ~/.config/nemoclaw/clusters/{GATEWAY_CLUSTER_NAME}/mtls/"); @@ -790,10 +955,249 @@ fn bootstrap_gateway(kubeconfig: &Path) -> Result<(), VmError> { Ok(()) } +/// Check whether a previous bootstrap left valid state on disk. +/// +/// A warm boot is detected when both: +/// - Cluster metadata exists: `$XDG_CONFIG_HOME/nemoclaw/clusters/gateway_metadata.json` +/// - mTLS certs exist: `$XDG_CONFIG_HOME/nemoclaw/clusters/gateway/mtls/{ca.crt,tls.crt,tls.key}` +/// +/// When true, the host-side bootstrap (PKI generation, kubectl apply, metadata +/// storage) can be skipped because the virtio-fs rootfs persists k3s state +/// (TLS certs, kine/sqlite, containerd images, helm releases) across VM restarts. +fn is_warm_boot() -> bool { + let Ok(home) = std::env::var("HOME") else { + return false; + }; + + let config_base = + std::env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| format!("{home}/.config")); + + let config_dir = PathBuf::from(&config_base) + .join("nemoclaw") + .join("clusters"); + + // Check metadata file. + let metadata_path = config_dir.join(format!("{GATEWAY_CLUSTER_NAME}_metadata.json")); + if !metadata_path.is_file() { + return false; + } + + // Check mTLS cert files. + let mtls_dir = config_dir.join(GATEWAY_CLUSTER_NAME).join("mtls"); + for name in &["ca.crt", "tls.crt", "tls.key"] { + let path = mtls_dir.join(name); + match std::fs::metadata(&path) { + Ok(m) if m.is_file() && m.len() > 0 => {} + _ => return false, + } + } + + true +} + +/// Wait for the navigator pod to become Ready inside the k3s cluster +/// and verify the gRPC service is reachable from the host. +/// +/// Stale pod/lease records are cleaned from the kine DB at build time +/// (see `build-rootfs.sh`). Containerd metadata (meta.db) is preserved +/// across boots so the native snapshotter doesn't re-extract image layers. +/// Runtime task state is cleaned by `gateway-init.sh` on each boot. +/// +/// We poll kubectl for `Ready=True`, then verify with a host-side TCP +/// probe to `127.0.0.1:30051` to confirm the full gvproxy->VM->pod +/// path works. gvproxy accepts TCP connections even when nothing listens +/// in the guest, but those connections reset immediately. A connection +/// that stays open (server waiting for TLS `ClientHello`) proves the pod +/// is genuinely serving. +fn wait_for_gateway_service() { + let start = Instant::now(); + let timeout = std::time::Duration::from_secs(90); + let poll_interval = std::time::Duration::from_secs(1); + + let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); + let kubeconfig = PathBuf::from(&home).join(".kube/gateway.yaml"); + let kc = kubeconfig.to_string_lossy(); + + eprintln!("Waiting for gateway service..."); + + loop { + // Check if the pod is Ready. + let is_ready = std::process::Command::new("kubectl") + .args(["--kubeconfig", &kc]) + .args([ + "-n", + "navigator", + "get", + "pod", + "navigator-0", + "-o", + "jsonpath={.status.conditions[?(@.type==\"Ready\")].status}", + ]) + .output() + .ok() + .filter(|o| o.status.success()) + .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string()) + .is_some_and(|s| s == "True"); + + if is_ready { + // Pod reports Ready — verify with a host-side TCP probe to + // confirm the full gvproxy -> VM -> pod path works. + if host_tcp_probe() { + eprintln!("Service healthy [{:.1}s]", start.elapsed().as_secs_f64()); + return; + } + eprintln!(" pod Ready but host TCP probe failed, retrying..."); + } + + if start.elapsed() >= timeout { + eprintln!( + " gateway service not ready after {:.0}s, continuing anyway", + timeout.as_secs_f64() + ); + return; + } + + std::thread::sleep(poll_interval); + } +} + +/// Force-delete pods stuck in `Unknown` or failed states (safety net). +/// +/// On warm reboots (virtio-fs persists rootfs across VM restarts), the +/// k3s database retains pod records from the previous session. Containerd +/// runtime task state is cleaned but metadata (meta.db) is preserved to +/// avoid re-extracting image layers. This function is a safety net for +/// edge cases where reconciliation fails — it force-deletes pods in +/// `Unknown` or `Failed` state so controllers can recreate them. +fn recover_stale_pods(kubeconfig: &Path) { + let kc = kubeconfig.to_string_lossy(); + + // Wait briefly for the API server to be responsive. + let deadline = Instant::now() + std::time::Duration::from_secs(30); + let mut interval = std::time::Duration::from_millis(500); + loop { + if let Ok(output) = std::process::Command::new("kubectl") + .args(["--kubeconfig", &kc]) + .args(["get", "nodes", "-o", "name"]) + .output() + { + if output.status.success() { + break; + } + } + if Instant::now() >= deadline { + eprintln!(" API server not ready after 30s, skipping pod recovery"); + return; + } + std::thread::sleep(interval); + interval = (interval * 2).min(std::time::Duration::from_secs(2)); + } + + // Get all pods in a parseable format: namespace/name status + let output = std::process::Command::new("kubectl") + .args(["--kubeconfig", &kc]) + .args([ + "get", "pods", "-A", + "-o", "jsonpath={range .items[*]}{.metadata.namespace}/{.metadata.name} {.status.phase}\\n{end}", + ]) + .output(); + + let Ok(output) = output else { return }; + if !output.status.success() { + return; + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut stale_count = 0u32; + + for line in stdout.lines() { + let parts: Vec<&str> = line.trim().split_whitespace().collect(); + if parts.len() != 2 { + continue; + } + let (ns_name, phase) = (parts[0], parts[1]); + // Delete pods in Unknown or Failed state — they can't recover + // from stale containerd sandbox state. + if phase == "Unknown" || phase == "Failed" { + let ns_and_name: Vec<&str> = ns_name.splitn(2, '/').collect(); + if ns_and_name.len() != 2 { + continue; + } + let (ns, name) = (ns_and_name[0], ns_and_name[1]); + let result = std::process::Command::new("kubectl") + .args(["--kubeconfig", &kc]) + .args([ + "-n", + ns, + "delete", + "pod", + name, + "--force", + "--grace-period=0", + ]) + .output(); + + if let Ok(r) = result { + if r.status.success() { + stale_count += 1; + } + } + } + } + + if stale_count > 0 { + eprintln!("Recovered {stale_count} stale pod(s)"); + } +} + +/// Probe `127.0.0.1:30051` from the host to verify the full +/// gvproxy → VM → pod path is working. +/// +/// gvproxy accepts TCP connections even when the guest port is closed, +/// but those connections are immediately reset. A server that is truly +/// listening will hold the connection open (waiting for a TLS +/// ClientHello). We exploit this: connect, then try a short read. If +/// the read **times out** the server is alive; if it returns an error +/// (reset/EOF) the server is down. +fn host_tcp_probe() -> bool { + use std::io::Read; + use std::net::{SocketAddr, TcpStream}; + use std::time::Duration; + + let addr: SocketAddr = ([127, 0, 0, 1], GATEWAY_PORT).into(); + let Ok(mut stream) = TcpStream::connect_timeout(&addr, Duration::from_secs(2)) else { + return false; + }; + + // A short read timeout: if the server is alive it will wait for us + // to send a TLS ClientHello, so the read will time out (= good). + // If the connection resets or closes, the server is dead. + stream + .set_read_timeout(Some(Duration::from_millis(200))) + .ok(); + let mut buf = [0u8; 1]; + match stream.read(&mut buf) { + Err(e) + if e.kind() == std::io::ErrorKind::WouldBlock + || e.kind() == std::io::ErrorKind::TimedOut => + { + true // Timeout = server alive, waiting for ClientHello. + } + _ => false, // Reset, EOF, or unexpected data = not healthy. + } +} + /// Poll kubectl until the `navigator` namespace exists. +/// +/// Uses exponential backoff (500ms → 3s) to minimize latency when the +/// namespace appears quickly while avoiding kubectl spam. fn wait_for_namespace(kubeconfig: &str) -> Result<(), VmError> { - let max_attempts = 120; - for attempt in 0..max_attempts { + let start = Instant::now(); + let timeout = std::time::Duration::from_secs(180); + let mut interval = std::time::Duration::from_millis(500); + let mut attempts = 0u32; + + loop { let output = std::process::Command::new("kubectl") .args(["--kubeconfig", kubeconfig]) .args(["get", "namespace", "navigator", "-o", "name"]) @@ -808,21 +1212,25 @@ fn wait_for_namespace(kubeconfig: &str) -> Result<(), VmError> { } } - if attempt % 10 == 9 { + if start.elapsed() >= timeout { + return Err(VmError::Bootstrap( + "timed out waiting for navigator namespace (180s). \ + Check console.log for k3s errors." + .to_string(), + )); + } + + attempts += 1; + if attempts.is_multiple_of(10) { eprintln!( - " still waiting for navigator namespace ({}/{})", - attempt + 1, - max_attempts + " still waiting for navigator namespace ({:.0}s elapsed)", + start.elapsed().as_secs_f64() ); } - std::thread::sleep(std::time::Duration::from_secs(2)); - } - Err(VmError::Bootstrap( - "timed out waiting for navigator namespace (240s). \ - Check console.log for k3s errors." - .to_string(), - )) + std::thread::sleep(interval); + interval = (interval * 2).min(std::time::Duration::from_secs(3)); + } } /// Apply the three TLS K8s secrets required by the `NemoClaw` server. diff --git a/deploy/helm/openshell/templates/statefulset.yaml b/deploy/helm/openshell/templates/statefulset.yaml index 83ece499..ee0b38e0 100644 --- a/deploy/helm/openshell/templates/statefulset.yaml +++ b/deploy/helm/openshell/templates/statefulset.yaml @@ -26,11 +26,16 @@ spec: {{- end }} spec: terminationGracePeriodSeconds: {{ .Values.podLifecycle.terminationGracePeriodSeconds }} + {{- if .Values.hostNetwork }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + {{- end }} {{- with .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "openshell.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} {{- if .Values.server.hostGatewayIP }} hostAliases: - ip: {{ .Values.server.hostGatewayIP | quote }} @@ -94,10 +99,16 @@ spec: - name: OPENSHELL_DISABLE_GATEWAY_AUTH value: "true" {{- end }} + {{- if and (not .Values.automountServiceAccountToken) .Values.kubeconfig.hostPath }} + - name: KUBECONFIG + value: /etc/openshell/kubeconfig + {{- end }} {{- end }} volumeMounts: + {{- if .Values.persistence.enabled }} - name: openshell-data mountPath: /var/openshell + {{- end }} {{- if not .Values.server.disableTls }} - name: tls-cert mountPath: /etc/openshell-tls/server @@ -105,6 +116,12 @@ spec: - name: tls-client-ca mountPath: /etc/openshell-tls/client-ca readOnly: true + {{- if and (not .Values.automountServiceAccountToken) .Values.kubeconfig.hostPath }} + - name: kubeconfig + mountPath: /etc/openshell/kubeconfig + subPath: k3s.yaml + readOnly: true + {{- end }} {{- end }} ports: - name: grpc @@ -134,6 +151,16 @@ spec: - name: tls-client-ca secret: secretName: {{ .Values.server.tls.clientCaSecretName }} + {{- if not .Values.persistence.enabled }} + - name: openshell-data + emptyDir: {} + {{- end }} + {{- if and (not .Values.automountServiceAccountToken) .Values.kubeconfig.hostPath }} + - name: kubeconfig + hostPath: + path: {{ .Values.kubeconfig.hostPath }} + type: Directory + {{- end }} {{- end }} {{- with .Values.nodeSelector }} nodeSelector: @@ -147,6 +174,7 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.persistence.enabled }} volumeClaimTemplates: - metadata: name: openshell-data @@ -155,3 +183,4 @@ spec: resources: requests: storage: 1Gi + {{- end }} diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index 2691fc48..4af63f23 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -19,6 +19,19 @@ serviceAccount: annotations: {} name: "" +# Whether to auto-mount the ServiceAccount token into the pod. Disabled +# in microVM gateway mode because the projected volume mount at +# /var/run/secrets/kubernetes.io/serviceaccount hits a containerd +# native-snapshotter + virtiofs incompatibility on sandbox re-creation. +automountServiceAccountToken: true + +# When automountServiceAccountToken is false, the navigator server needs +# a kubeconfig to reach the API server. Point this to the directory +# containing the k3s kubeconfig (k3s.yaml). Only used when +# automountServiceAccountToken is false. +kubeconfig: + hostPath: "" + podAnnotations: {} podLabels: {} @@ -56,6 +69,19 @@ probes: resources: {} +# Persistent storage for the navigator database. When disabled, an +# emptyDir volume is used instead of a PVC. This is useful in microVM +# environments where overlayfs-on-virtiofs doesn't support PVC mounts +# reliably. +persistence: + enabled: true + +# Run the pod directly on the host network. Useful in microVM +# environments where kube-proxy is unavailable (no iptables). +# When true, the pod binds to the VM's eth0 and NodePort is +# unnecessary — gvproxy forwards host ports to the pod directly. +hostNetwork: false + nodeSelector: {} tolerations: [] diff --git a/deploy/kube/manifests/openshell-helmchart.yaml b/deploy/kube/manifests/openshell-helmchart.yaml index 2245c72e..cf07bf00 100644 --- a/deploy/kube/manifests/openshell-helmchart.yaml +++ b/deploy/kube/manifests/openshell-helmchart.yaml @@ -28,11 +28,18 @@ spec: repository: ghcr.io/nvidia/openshell/gateway tag: latest pullPolicy: Always + hostNetwork: __HOST_NETWORK__ + automountServiceAccountToken: __AUTOMOUNT_SA_TOKEN__ + kubeconfig: + hostPath: __KUBECONFIG_HOST_PATH__ + persistence: + enabled: __PERSISTENCE_ENABLED__ server: sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest sshGatewayHost: __SSH_GATEWAY_HOST__ sshGatewayPort: __SSH_GATEWAY_PORT__ sshHandshakeSecret: __SSH_HANDSHAKE_SECRET__ + dbUrl: __DB_URL__ grpcEndpoint: "https://openshell.openshell.svc.cluster.local:8080" hostGatewayIP: __HOST_GATEWAY_IP__ disableGatewayAuth: __DISABLE_GATEWAY_AUTH__ From 3154fd248db017f079b24563197327aee33f3315 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Fri, 6 Mar 2026 13:30:30 -0800 Subject: [PATCH 04/14] refactor(vm): extract gateway into standalone binary Move the gateway VM launching out of `nemoclaw gateway` into its own `gateway` binary built from the navigator-vm crate. The nemoclaw CLI no longer links against libkrun or requires macOS hypervisor codesigning. Add scripts/bin/gateway wrapper (build + codesign + exec) and clean up scripts/bin/nemoclaw to remove navigator-vm artifacts. --- Cargo.lock | 4 +- crates/openshell-cli/Cargo.toml | 1 - crates/openshell-cli/src/main.rs | 120 ------------------------- crates/openshell-vm/Cargo.toml | 9 +- crates/openshell-vm/src/main.rs | 150 +++++++++++++++++++++++++++++++ scripts/bin/gateway | 25 ++++++ scripts/bin/openshell | 19 +--- 7 files changed, 189 insertions(+), 139 deletions(-) create mode 100644 crates/openshell-vm/src/main.rs create mode 100755 scripts/bin/gateway diff --git a/Cargo.lock b/Cargo.lock index 4eaecdb3..a0649536 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2717,7 +2717,6 @@ dependencies = [ "navigator-policy", "navigator-providers", "navigator-tui", - "navigator-vm", "owo-colors", "prost-types", "rcgen", @@ -2904,11 +2903,14 @@ name = "navigator-vm" version = "0.1.0" dependencies = [ "base64 0.22.1", + "clap", "libc", "miette", "navigator-bootstrap", "serde_json", "thiserror 2.0.18", + "tracing", + "tracing-subscriber", ] [[package]] diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index 69db0281..61c20450 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -20,7 +20,6 @@ openshell-core = { path = "../openshell-core" } openshell-policy = { path = "../openshell-policy" } openshell-providers = { path = "../openshell-providers" } openshell-tui = { path = "../openshell-tui" } -openshell-vm = { path = "../openshell-vm" } serde = { workspace = true } serde_json = { workspace = true } prost-types = { workspace = true } diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 404dc7e4..84a323b5 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -9,7 +9,6 @@ use clap_complete::env::CompleteEnv; use miette::Result; use owo_colors::OwoColorize; use std::io::Write; -use std::path::PathBuf; use openshell_bootstrap::{ edge_token::load_edge_token, get_gateway_metadata, list_gateways, load_active_gateway, @@ -458,55 +457,6 @@ enum Commands { theme: openshell_tui::ThemeMode, }, - /// Boot a libkrun microVM. - /// - /// By default, starts a k3s Kubernetes cluster inside the VM with the - /// API server on port 6443. Use `--exec` to run a custom process instead. - #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] - Vm { - /// Path to the rootfs directory (aarch64 Linux). - /// Defaults to `~/.local/share/openshell/gateway/rootfs`. - #[arg(long, value_hint = ValueHint::DirPath)] - rootfs: Option, - - /// Executable path inside the VM. When set, runs this instead of - /// the default k3s server. - #[arg(long)] - exec: Option, - - /// Arguments to the executable (requires `--exec`). - #[arg(long, num_args = 1..)] - args: Vec, - - /// Environment variables in `KEY=VALUE` form (requires `--exec`). - #[arg(long, num_args = 1..)] - env: Vec, - - /// Working directory inside the VM. - #[arg(long, default_value = "/")] - workdir: String, - - /// Port mappings (`host_port:guest_port`). - #[arg(long, short, num_args = 1..)] - port: Vec, - - /// Number of virtual CPUs (default: 4 for gateway, 2 for --exec). - #[arg(long)] - vcpus: Option, - - /// RAM in MiB (default: 8192 for gateway, 2048 for --exec). - #[arg(long)] - mem: Option, - - /// libkrun log level (0=Off .. 5=Trace). - #[arg(long, default_value_t = 1)] - krun_log_level: u32, - - /// Networking backend: "gvproxy" (default), "tsi", or "none". - #[arg(long, default_value = "gvproxy")] - net: String, - }, - /// Generate shell completions. #[command(after_long_help = COMPLETIONS_HELP, help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] Completions { @@ -2195,76 +2145,6 @@ async fn main() -> Result<()> { let channel = openshell_cli::tls::build_channel(&ctx.endpoint, &tls).await?; openshell_tui::run(channel, &ctx.name, &ctx.endpoint, theme).await?; } - Some(Commands::Vm { - rootfs, - exec, - args, - env, - workdir, - port, - vcpus, - mem, - krun_log_level, - net, - }) => { - let net_backend = match net.as_str() { - "tsi" => openshell_vm::NetBackend::Tsi, - "none" => openshell_vm::NetBackend::None, - "gvproxy" => openshell_vm::NetBackend::Gvproxy { - binary: PathBuf::from( - [ - "/opt/podman/bin/gvproxy", - "/opt/homebrew/bin/gvproxy", - "/usr/local/bin/gvproxy", - ] - .iter() - .find(|p| std::path::Path::new(p).exists()) - .unwrap_or(&"/opt/podman/bin/gvproxy"), - ), - }, - other => { - return Err(miette::miette!( - "unknown --net backend: {other} (expected: gvproxy, tsi, none)" - )); - } - }; - - let rootfs = - rootfs.map_or_else(openshell_bootstrap::paths::default_rootfs_dir, Ok)?; - let mut config = if let Some(exec_path) = exec { - openshell_vm::VmConfig { - rootfs, - vcpus: vcpus.unwrap_or(2), - mem_mib: mem.unwrap_or(2048), - exec_path, - args, - env, - workdir, - port_map: port, - log_level: krun_log_level, - console_output: None, - net: net_backend.clone(), - } - } else { - let mut c = openshell_vm::VmConfig::gateway(rootfs); - if !port.is_empty() { - c.port_map = port; - } - if let Some(v) = vcpus { - c.vcpus = v; - } - if let Some(m) = mem { - c.mem_mib = m; - } - c.net = net_backend; - c - }; - config.log_level = krun_log_level; - let code = openshell_vm::launch(&config).map_err(|e| miette::miette!("{e}"))?; - if code != 0 { - std::process::exit(code); - } - } Some(Commands::Completions { shell }) => { let exe = std::env::current_exe() .map_err(|e| miette::miette!("failed to find current executable: {e}"))?; diff --git a/crates/openshell-vm/Cargo.toml b/crates/openshell-vm/Cargo.toml index d76be7aa..5fc6f062 100644 --- a/crates/openshell-vm/Cargo.toml +++ b/crates/openshell-vm/Cargo.toml @@ -14,13 +14,20 @@ description = "MicroVM runtime using libkrun for hardware-isolated execution" name = "openshell_vm" path = "src/lib.rs" +[[bin]] +name = "gateway" +path = "src/main.rs" + [dependencies] base64 = "0.22" +clap = { workspace = true } libc = "0.2" miette = { workspace = true } -navigator-bootstrap = { path = "../navigator-bootstrap" } +openshell-bootstrap = { path = "../openshell-bootstrap" } serde_json = "1" thiserror = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } [lints] workspace = true diff --git a/crates/openshell-vm/src/main.rs b/crates/openshell-vm/src/main.rs new file mode 100644 index 00000000..144d28eb --- /dev/null +++ b/crates/openshell-vm/src/main.rs @@ -0,0 +1,150 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Standalone gateway binary. +//! +//! Boots a libkrun microVM running the OpenShell control plane (k3s + +//! openshell-server). By default it uses the pre-built rootfs at +//! `~/.local/share/openshell/gateway/rootfs`. +//! +//! # Codesigning (macOS) +//! +//! This binary must be codesigned with the `com.apple.security.hypervisor` +//! entitlement. See `entitlements.plist` in this crate. +//! +//! ```sh +//! codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - target/debug/gateway +//! ``` + +use std::path::PathBuf; + +use clap::{Parser, ValueHint}; + +/// Boot the OpenShell gateway microVM. +/// +/// Starts a libkrun microVM running a k3s Kubernetes cluster with the +/// OpenShell control plane. Use `--exec` to run a custom process instead. +#[derive(Parser)] +#[command(name = "gateway", version)] +struct Cli { + /// Path to the rootfs directory (aarch64 Linux). + /// Defaults to `~/.local/share/openshell/gateway/rootfs`. + #[arg(long, value_hint = ValueHint::DirPath)] + rootfs: Option, + + /// Executable path inside the VM. When set, runs this instead of + /// the default k3s server. + #[arg(long)] + exec: Option, + + /// Arguments to the executable (requires `--exec`). + #[arg(long, num_args = 1..)] + args: Vec, + + /// Environment variables in `KEY=VALUE` form (requires `--exec`). + #[arg(long, num_args = 1..)] + env: Vec, + + /// Working directory inside the VM. + #[arg(long, default_value = "/")] + workdir: String, + + /// Port mappings (`host_port:guest_port`). + #[arg(long, short, num_args = 1..)] + port: Vec, + + /// Number of virtual CPUs (default: 4 for gateway, 2 for --exec). + #[arg(long)] + vcpus: Option, + + /// RAM in MiB (default: 8192 for gateway, 2048 for --exec). + #[arg(long)] + mem: Option, + + /// libkrun log level (0=Off .. 5=Trace). + #[arg(long, default_value_t = 1)] + krun_log_level: u32, + + /// Networking backend: "gvproxy" (default), "tsi", or "none". + #[arg(long, default_value = "gvproxy")] + net: String, +} + +fn main() { + tracing_subscriber::fmt::init(); + + let cli = Cli::parse(); + + let code = match run(cli) { + Ok(code) => code, + Err(e) => { + eprintln!("Error: {e}"); + 1 + } + }; + + if code != 0 { + std::process::exit(code); + } +} + +fn run(cli: Cli) -> Result> { + let net_backend = match cli.net.as_str() { + "tsi" => openshell_vm::NetBackend::Tsi, + "none" => openshell_vm::NetBackend::None, + "gvproxy" => openshell_vm::NetBackend::Gvproxy { + binary: PathBuf::from( + [ + "/opt/podman/bin/gvproxy", + "/opt/homebrew/bin/gvproxy", + "/usr/local/bin/gvproxy", + ] + .iter() + .find(|p| std::path::Path::new(p).exists()) + .unwrap_or(&"/opt/podman/bin/gvproxy"), + ), + }, + other => { + return Err( + format!("unknown --net backend: {other} (expected: gvproxy, tsi, none)").into(), + ); + } + }; + + let rootfs = match cli.rootfs { + Some(p) => p, + None => openshell_bootstrap::paths::default_rootfs_dir()?, + }; + + let mut config = if let Some(exec_path) = cli.exec { + openshell_vm::VmConfig { + rootfs, + vcpus: cli.vcpus.unwrap_or(2), + mem_mib: cli.mem.unwrap_or(2048), + exec_path, + args: cli.args, + env: cli.env, + workdir: cli.workdir, + port_map: cli.port, + log_level: cli.krun_log_level, + console_output: None, + net: net_backend.clone(), + } + } else { + let mut c = openshell_vm::VmConfig::gateway(rootfs); + if !cli.port.is_empty() { + c.port_map = cli.port; + } + if let Some(v) = cli.vcpus { + c.vcpus = v; + } + if let Some(m) = cli.mem { + c.mem_mib = m; + } + c.net = net_backend; + c + }; + config.log_level = cli.krun_log_level; + + Ok(openshell_vm::launch(&config)?) +} diff --git a/scripts/bin/gateway b/scripts/bin/gateway new file mode 100755 index 00000000..8438dfdf --- /dev/null +++ b/scripts/bin/gateway @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +BINARY="$PROJECT_ROOT/target/debug/gateway" + +cargo build --package openshell-vm --bin gateway --quiet + +# On macOS, codesign with the hypervisor entitlement so libkrun can use +# Apple's Hypervisor.framework. Re-sign after every build. +ENTITLEMENTS="$PROJECT_ROOT/crates/openshell-vm/entitlements.plist" +if [[ "$(uname)" == "Darwin" ]] && [[ -f "$ENTITLEMENTS" ]]; then + codesign --entitlements "$ENTITLEMENTS" --force -s - "$BINARY" 2>/dev/null +fi + +# Ensure libkrunfw is discoverable by libkrun's dlopen on macOS. +# dyld only reads DYLD_FALLBACK_LIBRARY_PATH at process startup, so we +# set it here before exec. +if [[ "$(uname)" == "Darwin" ]]; then + HOMEBREW_LIB="$(brew --prefix 2>/dev/null || echo /opt/homebrew)/lib" + export DYLD_FALLBACK_LIBRARY_PATH="${HOMEBREW_LIB}${DYLD_FALLBACK_LIBRARY_PATH:+:$DYLD_FALLBACK_LIBRARY_PATH}" +fi + +exec "$BINARY" "$@" diff --git a/scripts/bin/openshell b/scripts/bin/openshell index 9ca015ca..19a55c2e 100755 --- a/scripts/bin/openshell +++ b/scripts/bin/openshell @@ -42,7 +42,7 @@ else return 0 ;; crates/openshell-cli/*|crates/openshell-core/*|crates/openshell-bootstrap/*) return 0 ;; - crates/openshell-policy/*|crates/openshell-providers/*|crates/openshell-tui/*|crates/openshell-vm/*) + crates/openshell-policy/*|crates/openshell-providers/*|crates/openshell-tui/*) return 0 ;; *) return 1 ;; @@ -91,12 +91,7 @@ if [[ "$needs_build" == "1" ]]; then echo "Recompiling openshell..." >&2 cargo build --package openshell-cli --quiet - # On macOS, codesign with the hypervisor entitlement so libkrun can use - # Apple's Hypervisor.framework. Re-sign after every build. - ENTITLEMENTS="$PROJECT_ROOT/crates/openshell-vm/entitlements.plist" - if [[ "$(uname)" == "Darwin" ]] && [[ -f "$ENTITLEMENTS" ]]; then - codesign --entitlements "$ENTITLEMENTS" --force -s - "$BINARY" 2>/dev/null - fi + # Persist state after successful build mkdir -p "$(dirname "$STATE_FILE")" cd "$PROJECT_ROOT" @@ -117,7 +112,7 @@ if [[ "$needs_build" == "1" ]]; then return 0 ;; crates/openshell-cli/*|crates/openshell-core/*|crates/openshell-bootstrap/*) return 0 ;; - crates/openshell-policy/*|crates/openshell-providers/*|crates/openshell-tui/*|crates/openshell-vm/*) + crates/openshell-policy/*|crates/openshell-providers/*|crates/openshell-tui/*) return 0 ;; *) return 1 ;; @@ -147,12 +142,4 @@ fingerprint=${new_fingerprint} EOF fi -# Ensure libkrunfw is discoverable by libkrun's dlopen on macOS. -# dyld only reads DYLD_FALLBACK_LIBRARY_PATH at process startup, so we -# set it here before exec. -if [[ "$(uname)" == "Darwin" ]]; then - HOMEBREW_LIB="$(brew --prefix 2>/dev/null || echo /opt/homebrew)/lib" - export DYLD_FALLBACK_LIBRARY_PATH="${HOMEBREW_LIB}${DYLD_FALLBACK_LIBRARY_PATH:+:$DYLD_FALLBACK_LIBRARY_PATH}" -fi - exec "$BINARY" "$@" From acd6befd0d1cd5fb4164db221ea069ebd8830f60 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Fri, 6 Mar 2026 13:39:34 -0800 Subject: [PATCH 05/14] test(vm): add e2e integration tests for gateway binary Two #[ignore] tests that require libkrun + pre-built rootfs: - gateway_boots_and_service_becomes_reachable: starts the full gateway and verifies the gRPC service on port 30051 - gateway_exec_runs_guest_command: runs /bin/true inside the VM via --exec and checks the exit code --- .../navigator-vm/tests/gateway_integration.rs | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 crates/navigator-vm/tests/gateway_integration.rs diff --git a/crates/navigator-vm/tests/gateway_integration.rs b/crates/navigator-vm/tests/gateway_integration.rs new file mode 100644 index 00000000..060ac533 --- /dev/null +++ b/crates/navigator-vm/tests/gateway_integration.rs @@ -0,0 +1,132 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Integration tests for the standalone `gateway` binary. +//! +//! These tests require: +//! - libkrun installed (e.g. `brew tap slp/krun && brew install libkrun`) +//! - macOS ARM64 with Apple Hypervisor.framework +//! - A pre-built rootfs at `~/.local/share/nemoclaw/gateway/rootfs` +//! +//! All tests are `#[ignore]` — run them explicitly: +//! +//! ```sh +//! cargo test -p navigator-vm --test gateway_integration -- --ignored +//! ``` + +#![allow(unsafe_code)] + +use std::net::{SocketAddr, TcpStream}; +use std::process::{Command, Stdio}; +use std::time::{Duration, Instant}; + +/// Path to the built `gateway` binary (resolved by Cargo at compile time). +const GATEWAY: &str = env!("CARGO_BIN_EXE_gateway"); + +// ── Helpers ──────────────────────────────────────────────────────────── + +/// Codesign the binary on macOS so it can access Hypervisor.framework. +fn codesign_if_needed() { + if cfg!(target_os = "macos") { + let entitlements = format!("{}/entitlements.plist", env!("CARGO_MANIFEST_DIR")); + let status = Command::new("codesign") + .args([ + "--entitlements", + &entitlements, + "--force", + "-s", + "-", + GATEWAY, + ]) + .status() + .expect("codesign command failed to execute"); + assert!(status.success(), "failed to codesign gateway binary"); + } +} + +/// Build environment variables so libkrun can find libkrunfw at runtime. +fn libkrun_env() -> Vec<(&'static str, String)> { + if cfg!(target_os = "macos") { + let homebrew_lib = Command::new("brew") + .args(["--prefix"]) + .output() + .ok() + .and_then(|o| String::from_utf8(o.stdout).ok()) + .map(|s| format!("{}/lib", s.trim())) + .unwrap_or_else(|| "/opt/homebrew/lib".to_string()); + + let existing = std::env::var("DYLD_FALLBACK_LIBRARY_PATH").unwrap_or_default(); + let val = if existing.is_empty() { + homebrew_lib + } else { + format!("{homebrew_lib}:{existing}") + }; + vec![("DYLD_FALLBACK_LIBRARY_PATH", val)] + } else { + vec![] + } +} + +// ── Tests ────────────────────────────────────────────────────────────── + +/// Boot the full NemoClaw gateway and verify the gRPC service becomes +/// reachable on port 30051. +#[test] +#[ignore] // requires libkrun + rootfs +fn gateway_boots_and_service_becomes_reachable() { + codesign_if_needed(); + + let mut cmd = Command::new(GATEWAY); + cmd.stdout(Stdio::null()).stderr(Stdio::piped()); + for (k, v) in libkrun_env() { + cmd.env(k, v); + } + + let mut child = cmd.spawn().expect("failed to start gateway"); + + // Poll for the navigator gRPC service. + let addr: SocketAddr = ([127, 0, 0, 1], 30051).into(); + let timeout = Duration::from_secs(180); + let start = Instant::now(); + let mut reachable = false; + + while start.elapsed() < timeout { + if TcpStream::connect_timeout(&addr, Duration::from_secs(1)).is_ok() { + reachable = true; + break; + } + std::thread::sleep(Duration::from_secs(2)); + } + + // Tear down regardless of result. + let _ = unsafe { libc::kill(child.id() as i32, libc::SIGTERM) }; + let _ = child.wait(); + + assert!( + reachable, + "gateway service on port 30051 not reachable within {timeout:?}" + ); +} + +/// Run a trivial command inside the VM via `--exec` and verify it exits +/// successfully, proving the VM boots and can execute guest processes. +#[test] +#[ignore] // requires libkrun + rootfs +fn gateway_exec_runs_guest_command() { + codesign_if_needed(); + + let mut cmd = Command::new(GATEWAY); + cmd.args(["--exec", "/bin/true"]); + for (k, v) in libkrun_env() { + cmd.env(k, v); + } + + let output = cmd.output().expect("failed to run gateway --exec"); + + assert!( + output.status.success(), + "gateway --exec /bin/true failed with status {:?}\nstderr: {}", + output.status, + String::from_utf8_lossy(&output.stderr), + ); +} From 2002e9cf7295021f04ba782cfe17c149a8a141e6 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Tue, 17 Mar 2026 15:57:36 -0700 Subject: [PATCH 06/14] fix(vm): migrate navigator-vm references to openshell naming convention Move orphaned integration test from crates/navigator-vm/ to crates/openshell-vm/tests/ and update all navigator_bootstrap references to openshell_bootstrap, including renamed types (ClusterMetadata -> GatewayMetadata) and functions. --- Cargo.lock | 442 +++++++++++------- crates/openshell-vm/src/lib.rs | 34 +- .../tests/gateway_integration.rs | 2 +- 3 files changed, 303 insertions(+), 175 deletions(-) rename crates/{navigator-vm => openshell-vm}/tests/gateway_integration.rs (98%) diff --git a/Cargo.lock b/Cargo.lock index a0649536..e3cd5611 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -287,6 +287,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core 0.5.6", + "base64 0.22.1", "bytes", "form_urlencoded", "futures-util", @@ -305,8 +306,10 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", + "sha1 0.10.6", "sync_wrapper", "tokio", + "tokio-tungstenite 0.28.0", "tower 0.5.3", "tower-layer", "tower-service", @@ -2677,7 +2680,135 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" [[package]] -name = "navigator-bootstrap" +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "serde", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + +[[package]] +name = "openshell-bootstrap" version = "0.1.0" dependencies = [ "base64 0.22.1", @@ -2685,10 +2816,10 @@ dependencies = [ "bytes", "futures", "miette", + "openshell-core", "rcgen", "serde", "serde_json", - "serde_yaml", "tar", "tempfile", "tokio", @@ -2696,7 +2827,7 @@ dependencies = [ ] [[package]] -name = "navigator-cli" +name = "openshell-cli" version = "0.1.0" dependencies = [ "anyhow", @@ -2712,11 +2843,12 @@ dependencies = [ "hyper-util", "indicatif", "miette", - "navigator-bootstrap", - "navigator-core", - "navigator-policy", - "navigator-providers", - "navigator-tui", + "nix", + "openshell-bootstrap", + "openshell-core", + "openshell-policy", + "openshell-providers", + "openshell-tui", "owo-colors", "prost-types", "rcgen", @@ -2725,13 +2857,14 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", - "serde_yaml", "tar", + "temp-env", "tempfile", "thiserror 2.0.18", "tokio", "tokio-rustls", "tokio-stream", + "tokio-tungstenite 0.26.2", "tonic", "tracing", "tracing-subscriber", @@ -2739,7 +2872,7 @@ dependencies = [ ] [[package]] -name = "navigator-core" +name = "openshell-core" version = "0.1.0" dependencies = [ "miette", @@ -2748,6 +2881,7 @@ dependencies = [ "protobuf-src", "serde", "serde_json", + "tempfile", "thiserror 2.0.18", "tonic", "tonic-build", @@ -2755,29 +2889,29 @@ dependencies = [ ] [[package]] -name = "navigator-policy" +name = "openshell-policy" version = "0.1.0" dependencies = [ "miette", - "navigator-core", + "openshell-core", "serde", "serde_yaml", ] [[package]] -name = "navigator-providers" +name = "openshell-providers" version = "0.1.0" dependencies = [ - "navigator-core", + "openshell-core", "thiserror 2.0.18", ] [[package]] -name = "navigator-router" +name = "openshell-router" version = "0.1.0" dependencies = [ "bytes", - "navigator-core", + "openshell-core", "reqwest", "serde", "serde_json", @@ -2791,7 +2925,7 @@ dependencies = [ ] [[package]] -name = "navigator-sandbox" +name = "openshell-sandbox" version = "0.1.0" dependencies = [ "anyhow", @@ -2803,10 +2937,10 @@ dependencies = [ "landlock", "libc", "miette", - "navigator-core", - "navigator-policy", - "navigator-router", "nix", + "openshell-core", + "openshell-policy", + "openshell-router", "rand_core 0.6.4", "rcgen", "regorus", @@ -2817,6 +2951,7 @@ dependencies = [ "serde_json", "serde_yaml", "sha2 0.10.9", + "temp-env", "tempfile", "thiserror 2.0.18", "tokio", @@ -2831,7 +2966,7 @@ dependencies = [ ] [[package]] -name = "navigator-server" +name = "openshell-server" version = "0.1.0" dependencies = [ "anyhow", @@ -2839,6 +2974,7 @@ dependencies = [ "bytes", "clap", "futures", + "futures-util", "hex", "hmac", "http", @@ -2851,14 +2987,16 @@ dependencies = [ "kube", "kube-runtime", "miette", - "navigator-core", - "navigator-policy", + "openshell-core", + "openshell-policy", + "openshell-router", "petname", "pin-project-lite", "prost", "prost-types", "rand 0.9.2", "rcgen", + "reqwest", "russh", "rustls", "rustls-pemfile", @@ -2871,27 +3009,31 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-stream", + "tokio-tungstenite 0.26.2", "tonic", "tower 0.5.3", "tower-http 0.6.8", "tracing", "tracing-subscriber", "uuid", + "wiremock", ] [[package]] -name = "navigator-tui" +name = "openshell-tui" version = "0.1.0" dependencies = [ + "base64 0.22.1", "crossterm 0.28.1", "miette", - "navigator-bootstrap", - "navigator-core", - "navigator-policy", - "navigator-providers", + "openshell-bootstrap", + "openshell-core", + "openshell-policy", + "openshell-providers", "owo-colors", "ratatui", "serde", + "terminal-colorsaurus", "tokio", "tonic", "tracing", @@ -2899,148 +3041,20 @@ dependencies = [ ] [[package]] -name = "navigator-vm" +name = "openshell-vm" version = "0.1.0" dependencies = [ "base64 0.22.1", "clap", "libc", "miette", - "navigator-bootstrap", + "openshell-bootstrap", "serde_json", "thiserror 2.0.18", "tracing", "tracing-subscriber", ] -[[package]] -name = "nix" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" -dependencies = [ - "bitflags", - "cfg-if", - "cfg_aliases", - "libc", -] - -[[package]] -name = "nu-ansi-term" -version = "0.50.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", - "rand 0.8.5", -] - -[[package]] -name = "num-bigint-dig" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" -dependencies = [ - "lazy_static", - "libm", - "num-integer", - "num-iter", - "num-traits", - "rand 0.8.5", - "serde", - "smallvec", - "zeroize", -] - -[[package]] -name = "num-conv" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "once_cell_polyfill" -version = "1.70.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" - -[[package]] -name = "opaque-debug" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" - [[package]] name = "openssh" version = "0.11.6" @@ -4896,6 +4910,15 @@ dependencies = [ "xattr", ] +[[package]] +name = "temp-env" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" +dependencies = [ + "parking_lot", +] + [[package]] name = "tempfile" version = "3.24.0" @@ -4909,6 +4932,32 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "terminal-colorsaurus" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a46bb5364467da040298c573c8a95dbf9a512efc039630409a03126e3703e90" +dependencies = [ + "cfg-if", + "libc", + "memchr", + "mio 1.1.1", + "terminal-trx", + "windows-sys 0.61.2", + "xterm-color", +] + +[[package]] +name = "terminal-trx" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3f27d9a8a177e57545481faec87acb45c6e854ed1e5a3658ad186c106f38ed" +dependencies = [ + "cfg-if", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "terminal_size" version = "0.4.3" @@ -5104,6 +5153,34 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-tungstenite" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9daff607c6d2bf6c16fd681ccb7eecc83e4e2cdc1ca067ffaadfca5de7f084" +dependencies = [ + "futures-util", + "log", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tungstenite 0.26.2", +] + +[[package]] +name = "tokio-tungstenite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite 0.28.0", +] + [[package]] name = "tokio-util" version = "0.7.18" @@ -5139,6 +5216,7 @@ dependencies = [ "percent-encoding", "pin-project", "prost", + "rustls-native-certs", "rustls-pemfile", "socket2 0.5.10", "tokio", @@ -5343,6 +5421,42 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tungstenite" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4793cb5e56680ecbb1d843515b23b6de9a75eb04b66643e256a396d43be33c13" +dependencies = [ + "bytes", + "data-encoding", + "http", + "httparse", + "log", + "rand 0.9.2", + "rustls", + "rustls-pki-types", + "sha1 0.10.6", + "thiserror 2.0.18", + "utf-8", +] + +[[package]] +name = "tungstenite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" +dependencies = [ + "bytes", + "data-encoding", + "http", + "httparse", + "log", + "rand 0.9.2", + "sha1 0.10.6", + "thiserror 2.0.18", + "utf-8", +] + [[package]] name = "typenum" version = "1.19.0" @@ -5463,6 +5577,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -6174,6 +6294,12 @@ dependencies = [ "rustix 1.1.3", ] +[[package]] +name = "xterm-color" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7008a9d8ba97a7e47d9b2df63fcdb8dade303010c5a7cd5bf2469d4da6eba673" + [[package]] name = "yasna" version = "0.5.2" diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index db362c2b..f55df64c 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -844,16 +844,18 @@ const GATEWAY_PORT: u16 = 30051; fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { let bootstrap_start = Instant::now(); - // Build cluster metadata early — it only depends on knowing the port and - // cluster name, not on the cluster being ready. - let metadata = navigator_bootstrap::ClusterMetadata { + // Build gateway metadata early — it only depends on knowing the port and + // gateway name, not on the cluster being ready. + let metadata = openshell_bootstrap::GatewayMetadata { name: GATEWAY_CLUSTER_NAME.to_string(), gateway_endpoint: format!("https://127.0.0.1:{GATEWAY_PORT}"), is_remote: false, gateway_port: GATEWAY_PORT, - kube_port: Some(6443), remote_host: None, resolved_host: None, + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, }; // ── Path 1: Pre-baked PKI from build-rootfs.sh ───────────────── @@ -871,7 +873,7 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { .map_err(|e| VmError::Bootstrap(format!("failed to read {name}: {e}"))) }; - let pki_bundle = navigator_bootstrap::pki::PkiBundle { + let pki_bundle = openshell_bootstrap::pki::PkiBundle { ca_cert_pem: read("ca.crt")?, ca_key_pem: read("ca.key")?, server_cert_pem: read("server.crt")?, @@ -881,13 +883,13 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { }; // Store metadata and credentials on the host. - navigator_bootstrap::store_cluster_metadata(GATEWAY_CLUSTER_NAME, &metadata) + openshell_bootstrap::store_gateway_metadata(GATEWAY_CLUSTER_NAME, &metadata) .map_err(|e| VmError::Bootstrap(format!("failed to store metadata: {e}")))?; - navigator_bootstrap::mtls::store_pki_bundle(GATEWAY_CLUSTER_NAME, &pki_bundle) + openshell_bootstrap::mtls::store_pki_bundle(GATEWAY_CLUSTER_NAME, &pki_bundle) .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS creds: {e}")))?; - navigator_bootstrap::save_active_cluster(GATEWAY_CLUSTER_NAME) + openshell_bootstrap::save_active_gateway(GATEWAY_CLUSTER_NAME) .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?; eprintln!( @@ -924,10 +926,10 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { // ── Path 3: Cold boot (no pre-baked state) ───────────────────── eprintln!("Generating TLS certificates..."); - let pki_bundle = navigator_bootstrap::pki::generate_pki(&[]) + let pki_bundle = openshell_bootstrap::pki::generate_pki(&[]) .map_err(|e| VmError::Bootstrap(format!("PKI generation failed: {e}")))?; - navigator_bootstrap::store_cluster_metadata(GATEWAY_CLUSTER_NAME, &metadata) + openshell_bootstrap::store_gateway_metadata(GATEWAY_CLUSTER_NAME, &metadata) .map_err(|e| VmError::Bootstrap(format!("failed to store cluster metadata: {e}")))?; let ns_start = Instant::now(); @@ -938,10 +940,10 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { eprintln!("Creating TLS secrets..."); apply_tls_secrets(kc, &pki_bundle)?; - navigator_bootstrap::mtls::store_pki_bundle(GATEWAY_CLUSTER_NAME, &pki_bundle) + openshell_bootstrap::mtls::store_pki_bundle(GATEWAY_CLUSTER_NAME, &pki_bundle) .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS credentials: {e}")))?; - navigator_bootstrap::save_active_cluster(GATEWAY_CLUSTER_NAME) + openshell_bootstrap::save_active_gateway(GATEWAY_CLUSTER_NAME) .map_err(|e| VmError::Bootstrap(format!("failed to set active cluster: {e}")))?; eprintln!( @@ -1238,7 +1240,7 @@ fn wait_for_namespace(kubeconfig: &str) -> Result<(), VmError> { /// Uses `kubectl apply -f -` on the host, piping JSON manifests via stdin. fn apply_tls_secrets( kubeconfig: &str, - bundle: &navigator_bootstrap::pki::PkiBundle, + bundle: &openshell_bootstrap::pki::PkiBundle, ) -> Result<(), VmError> { use base64::Engine; use base64::engine::general_purpose::STANDARD; @@ -1249,7 +1251,7 @@ fn apply_tls_secrets( "apiVersion": "v1", "kind": "Secret", "metadata": { - "name": navigator_bootstrap::constants::SERVER_TLS_SECRET_NAME, + "name": openshell_bootstrap::constants::SERVER_TLS_SECRET_NAME, "namespace": "navigator" }, "type": "kubernetes.io/tls", @@ -1263,7 +1265,7 @@ fn apply_tls_secrets( "apiVersion": "v1", "kind": "Secret", "metadata": { - "name": navigator_bootstrap::constants::SERVER_CLIENT_CA_SECRET_NAME, + "name": openshell_bootstrap::constants::SERVER_CLIENT_CA_SECRET_NAME, "namespace": "navigator" }, "type": "Opaque", @@ -1276,7 +1278,7 @@ fn apply_tls_secrets( "apiVersion": "v1", "kind": "Secret", "metadata": { - "name": navigator_bootstrap::constants::CLIENT_TLS_SECRET_NAME, + "name": openshell_bootstrap::constants::CLIENT_TLS_SECRET_NAME, "namespace": "navigator" }, "type": "Opaque", diff --git a/crates/navigator-vm/tests/gateway_integration.rs b/crates/openshell-vm/tests/gateway_integration.rs similarity index 98% rename from crates/navigator-vm/tests/gateway_integration.rs rename to crates/openshell-vm/tests/gateway_integration.rs index 060ac533..413d1b3c 100644 --- a/crates/navigator-vm/tests/gateway_integration.rs +++ b/crates/openshell-vm/tests/gateway_integration.rs @@ -11,7 +11,7 @@ //! All tests are `#[ignore]` — run them explicitly: //! //! ```sh -//! cargo test -p navigator-vm --test gateway_integration -- --ignored +//! cargo test -p openshell-vm --test gateway_integration -- --ignored //! ``` #![allow(unsafe_code)] From 8849291bd3cf66e4842f4be64bf8629c59e15953 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Tue, 17 Mar 2026 16:37:50 -0700 Subject: [PATCH 07/14] fix(ci): exclude openshell-vm from workspace CI tasks openshell-vm links against libkrun which is only available on macOS with Homebrew. Exclude it from cargo check, clippy, and test workspace commands so CI passes on Linux runners. --- tasks/rust.toml | 4 ++-- tasks/test.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/rust.toml b/tasks/rust.toml index 69214ce7..dfa4068f 100644 --- a/tasks/rust.toml +++ b/tasks/rust.toml @@ -5,12 +5,12 @@ ["rust:check"] description = "Check all Rust crates for errors" -run = "cargo check --workspace" +run = "cargo check --workspace --exclude openshell-vm" hide = true ["rust:lint"] description = "Lint Rust code with Clippy" -run = "cargo clippy --workspace --all-targets" +run = "cargo clippy --workspace --all-targets --exclude openshell-vm" hide = true ["rust:format"] diff --git a/tasks/test.toml b/tasks/test.toml index af1955d0..791ca2aa 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -17,7 +17,7 @@ depends = ["e2e:python:gpu"] ["test:rust"] description = "Run Rust tests" -run = "cargo test --workspace" +run = "cargo test --workspace --exclude openshell-vm" hide = true ["test:python"] From ab9a1260b48679a24eb4eedcc139ec31caf79002 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Mon, 23 Mar 2026 18:23:55 -0700 Subject: [PATCH 08/14] wip --- CONTRIBUTING.md | 22 + Cargo.lock | 11 + crates/openshell-vm/Cargo.toml | 1 + crates/openshell-vm/build.rs | 35 -- crates/openshell-vm/scripts/build-rootfs.sh | 94 +-- crates/openshell-vm/src/ffi.rs | 196 ++++-- crates/openshell-vm/src/lib.rs | 582 ++++++++++++------ crates/openshell-vm/src/main.rs | 12 +- .../openshell-vm/tests/gateway_integration.rs | 39 +- tasks/scripts/bundle-vm-runtime.sh | 88 +++ tasks/scripts/codesign-gateway.sh | 12 + tasks/scripts/package-gateway-runtime.sh | 27 + tasks/vm.toml | 41 ++ 13 files changed, 794 insertions(+), 366 deletions(-) delete mode 100644 crates/openshell-vm/build.rs create mode 100755 tasks/scripts/bundle-vm-runtime.sh create mode 100755 tasks/scripts/codesign-gateway.sh create mode 100755 tasks/scripts/package-gateway-runtime.sh create mode 100644 tasks/vm.toml diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f558cdeb..28104af8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -140,6 +140,28 @@ These are the primary `mise` tasks for day-to-day development: | `mise run docs` | Build and serve documentation locally | | `mise run clean` | Clean build artifacts | +### MicroVM runtime + +To build and run the standalone `gateway` microVM from `crates/openshell-vm`: + +```bash +mise run vm +``` + +That task builds `openshell-vm`, stages `gateway.runtime/`, builds the default rootfs under `$XDG_DATA_HOME/openshell/gateway/rootfs`, codesigns `target/debug/gateway` on macOS, and then launches the VM. + +If you only want to stage the sidecar runtime bundle without launching the VM: + +```bash +mise run vm:bundle-runtime +``` + +To create a local tarball that contains both `gateway` and `gateway.runtime/`: + +```bash +mise run vm:package:gateway +``` + ## Project Structure | Path | Purpose | diff --git a/Cargo.lock b/Cargo.lock index e3cd5611..76d0b3c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2475,6 +2475,16 @@ dependencies = [ "rand 0.9.2", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libm" version = "0.2.16" @@ -3047,6 +3057,7 @@ dependencies = [ "base64 0.22.1", "clap", "libc", + "libloading", "miette", "openshell-bootstrap", "serde_json", diff --git a/crates/openshell-vm/Cargo.toml b/crates/openshell-vm/Cargo.toml index 5fc6f062..c8319765 100644 --- a/crates/openshell-vm/Cargo.toml +++ b/crates/openshell-vm/Cargo.toml @@ -22,6 +22,7 @@ path = "src/main.rs" base64 = "0.22" clap = { workspace = true } libc = "0.2" +libloading = "0.8" miette = { workspace = true } openshell-bootstrap = { path = "../openshell-bootstrap" } serde_json = "1" diff --git a/crates/openshell-vm/build.rs b/crates/openshell-vm/build.rs deleted file mode 100644 index 7f789395..00000000 --- a/crates/openshell-vm/build.rs +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Build script for openshell-vm. -//! -//! Discovers the Homebrew library path for libkrun and emits the appropriate -//! cargo link-search directives. On macOS ARM64, libkrun is typically installed -//! via `brew tap slp/krun && brew install libkrun`. - -fn main() { - // Discover Homebrew prefix (handles both /opt/homebrew and /usr/local) - let homebrew_prefix = std::process::Command::new("brew") - .args(["--prefix"]) - .output() - .ok() - .and_then(|o| { - if o.status.success() { - String::from_utf8(o.stdout) - .ok() - .map(|s| s.trim().to_string()) - } else { - None - } - }) - .unwrap_or_else(|| "/opt/homebrew".to_string()); - - let lib_dir = format!("{homebrew_prefix}/lib"); - - println!("cargo:rustc-link-search=native={lib_dir}"); - println!("cargo:rustc-link-lib=dylib=krun"); - - // Re-run if the library changes - println!("cargo:rerun-if-changed=build.rs"); - println!("cargo:rerun-if-env-changed=LIBRARY_PATH"); -} diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh index 68c7b4ac..ece7ee45 100755 --- a/crates/openshell-vm/scripts/build-rootfs.sh +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -4,7 +4,7 @@ # Build an aarch64 Ubuntu rootfs for the gateway microVM. # -# Produces a rootfs with k3s pre-installed, the NemoClaw helm chart and +# Produces a rootfs with k3s pre-installed, the OpenShell helm chart and # manifests baked in, container images pre-loaded, AND a fully initialized # k3s cluster state (database, TLS, images imported, all services deployed). # @@ -12,14 +12,14 @@ # cold-starting, achieving ~3-5s startup times. # # Usage: -# ./crates/navigator-vm/scripts/build-rootfs.sh [output_dir] +# ./crates/openshell-vm/scripts/build-rootfs.sh [output_dir] # # Requires: Docker (or compatible container runtime), curl, helm, zstd set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -DEFAULT_ROOTFS="${XDG_DATA_HOME:-${HOME}/.local/share}/nemoclaw/gateway/rootfs" +DEFAULT_ROOTFS="${XDG_DATA_HOME:-${HOME}/.local/share}/openshell/gateway/rootfs" ROOTFS_DIR="${1:-${DEFAULT_ROOTFS}}" CONTAINER_NAME="krun-rootfs-builder" INIT_CONTAINER_NAME="krun-k3s-init" @@ -30,7 +30,7 @@ BASE_IMAGE_TAG="krun-rootfs:gateway" K3S_VERSION="${K3S_VERSION:-v1.35.2+k3s1}" K3S_VERSION="${K3S_VERSION//-k3s/+k3s}" -# Project root (two levels up from crates/navigator-vm/scripts/) +# Project root (two levels up from crates/openshell-vm/scripts/) PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" # Container images to pre-load into k3s (arm64). @@ -117,7 +117,7 @@ chmod +x "${ROOTFS_DIR}/srv/hello-server.py" # ── Package and inject helm chart ──────────────────────────────────── -HELM_CHART_DIR="${PROJECT_ROOT}/deploy/helm/navigator" +HELM_CHART_DIR="${PROJECT_ROOT}/deploy/helm/openshell" CHART_DEST="${ROOTFS_DIR}/var/lib/rancher/k3s/server/static/charts" if [ -d "${HELM_CHART_DIR}" ]; then @@ -140,7 +140,7 @@ MANIFEST_DEST="${ROOTFS_DIR}/opt/navigator/manifests" echo "==> Injecting Kubernetes manifests..." mkdir -p "${MANIFEST_DEST}" -for manifest in navigator-helmchart.yaml agent-sandbox.yaml; do +for manifest in openshell-helmchart.yaml agent-sandbox.yaml; do if [ -f "${MANIFEST_SRC}/${manifest}" ]; then cp "${MANIFEST_SRC}/${manifest}" "${MANIFEST_DEST}/" echo " ${manifest}" @@ -197,8 +197,8 @@ pull_and_save() { cp "${output}" "${cache}" } -pull_and_save "${SERVER_IMAGE}" "${IMAGES_DIR}/navigator-server.tar.zst" -pull_and_save "${SANDBOX_IMAGE}" "${IMAGES_DIR}/navigator-sandbox.tar.zst" +pull_and_save "${SERVER_IMAGE}" "${IMAGES_DIR}/openshell-server.tar.zst" +pull_and_save "${SANDBOX_IMAGE}" "${IMAGES_DIR}/openshell-sandbox.tar.zst" pull_and_save "${AGENT_SANDBOX_IMAGE}" "${IMAGES_DIR}/agent-sandbox-controller.tar.zst" # ── Pre-initialize k3s cluster state ───────────────────────────────── @@ -226,18 +226,19 @@ for manifest in "${MANIFEST_DEST}"/*.yaml; do done # Patch HelmChart for local images and VM settings. -HELMCHART="${INIT_MANIFESTS}/navigator-helmchart.yaml" +HELMCHART="${INIT_MANIFESTS}/openshell-helmchart.yaml" if [ -f "$HELMCHART" ]; then # Use local images — explicitly imported into containerd. sed -i '' 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" 2>/dev/null \ || sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" - # Fill image placeholders. - sed -i '' "s|__IMAGE_REPO_BASE__/server|${SERVER_IMAGE%:*}|g" "$HELMCHART" 2>/dev/null \ - || sed -i "s|__IMAGE_REPO_BASE__/server|${SERVER_IMAGE%:*}|g" "$HELMCHART" - sed -i '' "s|__IMAGE_REPO_BASE__/sandbox:__IMAGE_TAG__|${SANDBOX_IMAGE}|g" "$HELMCHART" 2>/dev/null \ - || sed -i "s|__IMAGE_REPO_BASE__/sandbox:__IMAGE_TAG__|${SANDBOX_IMAGE}|g" "$HELMCHART" - sed -i '' "s|__IMAGE_TAG__|${IMAGE_TAG}|g" "$HELMCHART" 2>/dev/null \ - || sed -i "s|__IMAGE_TAG__|${IMAGE_TAG}|g" "$HELMCHART" + # Use the locally imported image references. + sed -i '' -E "s|repository:[[:space:]]*[^[:space:]]+|repository: ${SERVER_IMAGE%:*}|" "$HELMCHART" 2>/dev/null \ + || sed -i -E "s|repository:[[:space:]]*[^[:space:]]+|repository: ${SERVER_IMAGE%:*}|" "$HELMCHART" + sed -i '' -E "s|tag:[[:space:]]*\"?[^\"[:space:]]+\"?|tag: \"${IMAGE_TAG}\"|" "$HELMCHART" 2>/dev/null \ + || sed -i -E "s|tag:[[:space:]]*\"?[^\"[:space:]]+\"?|tag: \"${IMAGE_TAG}\"|" "$HELMCHART" + sed -i '' "s|server:[[:space:]]*sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest|server:\n sandboxImage: ${SANDBOX_IMAGE}|g" "$HELMCHART" 2>/dev/null || true + sed -i '' "s|sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest|sandboxImage: ${SANDBOX_IMAGE}|g" "$HELMCHART" 2>/dev/null \ + || sed -i "s|sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest|sandboxImage: ${SANDBOX_IMAGE}|g" "$HELMCHART" # Enable hostNetwork for VM (no kube-proxy / iptables). sed -i '' 's|__HOST_NETWORK__|true|g' "$HELMCHART" 2>/dev/null \ || sed -i 's|__HOST_NETWORK__|true|g' "$HELMCHART" @@ -255,13 +256,24 @@ if [ -f "$HELMCHART" ]; then # are unreliable on virtiofs. sed -i '' 's|__PERSISTENCE_ENABLED__|false|g' "$HELMCHART" 2>/dev/null \ || sed -i 's|__PERSISTENCE_ENABLED__|false|g' "$HELMCHART" - sed -i '' 's|__DB_URL__|"sqlite:/tmp/navigator.db"|g' "$HELMCHART" 2>/dev/null \ - || sed -i 's|__DB_URL__|"sqlite:/tmp/navigator.db"|g' "$HELMCHART" + sed -i '' 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART" # Clear SSH gateway placeholders. sed -i '' 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" 2>/dev/null \ || sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" sed -i '' 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" 2>/dev/null \ || sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" + SSH_HANDSHAKE_SECRET="$(head -c 32 /dev/urandom | od -A n -t x1 | tr -d ' \n')" + sed -i '' "s|__SSH_HANDSHAKE_SECRET__|${SSH_HANDSHAKE_SECRET}|g" "$HELMCHART" 2>/dev/null \ + || sed -i "s|__SSH_HANDSHAKE_SECRET__|${SSH_HANDSHAKE_SECRET}|g" "$HELMCHART" + sed -i '' 's|__DISABLE_GATEWAY_AUTH__|false|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__DISABLE_GATEWAY_AUTH__|false|g' "$HELMCHART" + sed -i '' 's|__DISABLE_TLS__|false|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__DISABLE_TLS__|false|g' "$HELMCHART" + sed -i '' 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: ""|g' "$HELMCHART" + sed -i '' '/__CHART_CHECKSUM__/d' "$HELMCHART" 2>/dev/null \ + || sed -i '/__CHART_CHECKSUM__/d' "$HELMCHART" fi # Boot k3s in a privileged container. We use a Docker volume for the @@ -406,16 +418,16 @@ docker exec "${INIT_CONTAINER_NAME}" sh -c ' /usr/local/bin/k3s ctr images list -q | grep -v "^sha256:" | sort ' 2>&1 | sed 's/^/ /' -# Wait for the navigator namespace (Helm controller creates it). -echo " Waiting for navigator namespace..." +# Wait for the openshell namespace (Helm controller creates it). +echo " Waiting for openshell namespace..." for i in $(seq 1 120); do if docker exec "${INIT_CONTAINER_NAME}" \ - /usr/local/bin/k3s kubectl get namespace navigator -o name 2>/dev/null | grep -q navigator; then + /usr/local/bin/k3s kubectl get namespace openshell -o name 2>/dev/null | grep -q openshell; then echo " Namespace ready (${i}s)" break fi if [ "$i" -eq 120 ]; then - echo "ERROR: navigator namespace did not appear in 120s" + echo "ERROR: openshell namespace did not appear in 120s" docker logs "${INIT_CONTAINER_NAME}" --tail 50 docker rm -f "${INIT_CONTAINER_NAME}" 2>/dev/null || true docker volume rm krun-k3s-init-data 2>/dev/null || true @@ -429,7 +441,7 @@ echo " Generating TLS certificates and creating secrets..." # We generate certs outside the container, then apply them via kubectl. # Use openssl for cert generation at build time (simpler than pulling in -# the Rust PKI library). The navigator-bootstrap Rust code will detect +# the Rust PKI library). The bootstrap Rust code will detect # these pre-baked secrets at runtime and skip its own generation. PKI_DIR=$(mktemp -d) @@ -438,7 +450,7 @@ trap 'rm -rf "${PKI_DIR}"' EXIT # Generate CA openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:prime256v1 \ -keyout "${PKI_DIR}/ca.key" -out "${PKI_DIR}/ca.crt" \ - -days 3650 -nodes -subj "/O=navigator/CN=navigator-ca" 2>/dev/null + -days 3650 -nodes -subj "/O=openshell/CN=openshell-ca" 2>/dev/null # Generate server cert with SANs cat > "${PKI_DIR}/server.cnf" </dev/null + -nodes -subj "/CN=openshell-client" 2>/dev/null openssl x509 -req -in "${PKI_DIR}/client.csr" \ -CA "${PKI_DIR}/ca.crt" -CAkey "${PKI_DIR}/ca.key" -CAcreateserial \ -out "${PKI_DIR}/client.crt" -days 3650 2>/dev/null @@ -494,35 +506,35 @@ SERVER_KEY_B64=$(base64 < "${PKI_DIR}/server.key" | tr -d '\n') CLIENT_CRT_B64=$(base64 < "${PKI_DIR}/client.crt" | tr -d '\n') CLIENT_KEY_B64=$(base64 < "${PKI_DIR}/client.key" | tr -d '\n') -apply_secret "navigator-server-tls" "$(cat </dev/null || echo "0") if [ "$ready" = "1" ]; then - echo " Navigator pod ready (${i}s)" + echo " OpenShell pod ready (${i}s)" break fi if [ "$i" -eq 120 ]; then - echo "WARNING: navigator pod not ready after 120s, continuing anyway" + echo "WARNING: openshell pod not ready after 120s, continuing anyway" docker exec "${INIT_CONTAINER_NAME}" \ - /usr/local/bin/k3s kubectl -n navigator get pods 2>/dev/null | sed 's/^/ /' || true + /usr/local/bin/k3s kubectl -n openshell get pods 2>/dev/null | sed 's/^/ /' || true break fi sleep 1 diff --git a/crates/openshell-vm/src/ffi.rs b/crates/openshell-vm/src/ffi.rs index b9bb59d4..06fa2004 100644 --- a/crates/openshell-vm/src/ffi.rs +++ b/crates/openshell-vm/src/ffi.rs @@ -1,86 +1,148 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! Minimal FFI bindings for the libkrun C API. +//! Minimal runtime-loaded bindings for the libkrun C API. //! -//! libkrun is a `cdylib` — it cannot be consumed as a Rust dependency. We link -//! against the Homebrew-installed system library and declare `extern "C"` for -//! the subset of functions we need. -//! -//! See: +//! We intentionally do not link libkrun at build time. Instead, the +//! `gateway` binary loads `libkrun` from the staged `gateway.runtime/` +//! sidecar bundle on first use. -use libc::c_char; +use std::path::PathBuf; +use std::sync::OnceLock; -#[link(name = "krun")] -#[allow(dead_code)] -unsafe extern "C" { - /// Sets the log level for the library (0=Off .. 5=Trace). - pub fn krun_set_log_level(level: u32) -> i32; +use libc::c_char; +use libloading::Library; - /// Creates a configuration context. Returns context ID (>= 0) or negative error. - pub fn krun_create_ctx() -> i32; +use crate::VmError; - /// Frees a configuration context. - pub fn krun_free_ctx(ctx_id: u32) -> i32; +pub const KRUN_LOG_TARGET_DEFAULT: i32 = -1; +pub const KRUN_LOG_LEVEL_OFF: u32 = 0; +pub const KRUN_LOG_LEVEL_ERROR: u32 = 1; +pub const KRUN_LOG_LEVEL_WARN: u32 = 2; +pub const KRUN_LOG_LEVEL_INFO: u32 = 3; +pub const KRUN_LOG_LEVEL_DEBUG: u32 = 4; +pub const KRUN_LOG_LEVEL_TRACE: u32 = 5; +pub const KRUN_LOG_STYLE_AUTO: u32 = 0; +pub const KRUN_LOG_OPTION_NO_ENV: u32 = 1; - /// Sets vCPUs and RAM (MiB) for the microVM. - pub fn krun_set_vm_config(ctx_id: u32, num_vcpus: u8, ram_mib: u32) -> i32; +type KrunInitLog = + unsafe extern "C" fn(target_fd: i32, level: u32, style: u32, options: u32) -> i32; +type KrunCreateCtx = unsafe extern "C" fn() -> i32; +type KrunFreeCtx = unsafe extern "C" fn(ctx_id: u32) -> i32; +type KrunSetVmConfig = unsafe extern "C" fn(ctx_id: u32, num_vcpus: u8, ram_mib: u32) -> i32; +type KrunSetRoot = unsafe extern "C" fn(ctx_id: u32, root_path: *const c_char) -> i32; +type KrunSetWorkdir = unsafe extern "C" fn(ctx_id: u32, workdir_path: *const c_char) -> i32; +type KrunSetExec = unsafe extern "C" fn( + ctx_id: u32, + exec_path: *const c_char, + argv: *const *const c_char, + envp: *const *const c_char, +) -> i32; +type KrunSetPortMap = unsafe extern "C" fn(ctx_id: u32, port_map: *const *const c_char) -> i32; +type KrunSetConsoleOutput = unsafe extern "C" fn(ctx_id: u32, filepath: *const c_char) -> i32; +type KrunAddVsockPort2 = + unsafe extern "C" fn(ctx_id: u32, port: u32, c_filepath: *const c_char, listen: bool) -> i32; +type KrunStartEnter = unsafe extern "C" fn(ctx_id: u32) -> i32; +type KrunDisableImplicitVsock = unsafe extern "C" fn(ctx_id: u32) -> i32; +type KrunAddVsock = unsafe extern "C" fn(ctx_id: u32, tsi_features: u32) -> i32; +type KrunAddNetUnixgram = unsafe extern "C" fn( + ctx_id: u32, + c_path: *const c_char, + fd: i32, + c_mac: *const u8, + features: u32, + flags: u32, +) -> i32; - /// Sets the root filesystem path (virtio-fs backed directory). - pub fn krun_set_root(ctx_id: u32, root_path: *const c_char) -> i32; +pub struct LibKrun { + pub krun_init_log: KrunInitLog, + pub krun_create_ctx: KrunCreateCtx, + pub krun_free_ctx: KrunFreeCtx, + pub krun_set_vm_config: KrunSetVmConfig, + pub krun_set_root: KrunSetRoot, + pub krun_set_workdir: KrunSetWorkdir, + pub krun_set_exec: KrunSetExec, + pub krun_set_port_map: KrunSetPortMap, + pub krun_set_console_output: KrunSetConsoleOutput, + pub krun_add_vsock_port2: KrunAddVsockPort2, + pub krun_start_enter: KrunStartEnter, + pub krun_disable_implicit_vsock: KrunDisableImplicitVsock, + pub krun_add_vsock: KrunAddVsock, + pub krun_add_net_unixgram: KrunAddNetUnixgram, +} - /// Sets the working directory inside the VM. - pub fn krun_set_workdir(ctx_id: u32, workdir_path: *const c_char) -> i32; +static LIBKRUN: OnceLock = OnceLock::new(); - /// Sets the executable path, argv, and envp for the process inside the VM. - /// - /// **Important:** If `envp` is NULL, libkrun serializes the entire host - /// environment into the kernel command line, which can overflow its 4096-byte - /// limit. Always pass an explicit minimal env. - pub fn krun_set_exec( - ctx_id: u32, - exec_path: *const c_char, - argv: *const *const c_char, - envp: *const *const c_char, - ) -> i32; +pub fn libkrun() -> Result<&'static LibKrun, VmError> { + if let Some(lib) = LIBKRUN.get() { + return Ok(lib); + } - /// Configures host-to-guest TCP port mapping. - /// - /// Format: null-terminated array of `"host_port:guest_port"` C strings. - /// Passing NULL auto-exposes all listening guest ports. - pub fn krun_set_port_map(ctx_id: u32, port_map: *const *const c_char) -> i32; + let loaded = LibKrun::load()?; + let _ = LIBKRUN.set(loaded); + Ok(LIBKRUN.get().expect("libkrun should be initialized")) +} - /// Redirects console output to a file (ignores stdin). - pub fn krun_set_console_output(ctx_id: u32, filepath: *const c_char) -> i32; +impl LibKrun { + fn load() -> Result { + let path = runtime_libkrun_path()?; + let library = Box::leak(Box::new(unsafe { + Library::new(&path).map_err(|e| { + VmError::HostSetup(format!("load libkrun from {}: {e}", path.display())) + })? + })); - /// Starts and enters the microVM. **Never returns** on success — calls - /// `exit()` with the workload's exit code. Only returns on config error. - pub fn krun_start_enter(ctx_id: u32) -> i32; + Ok(Self { + krun_init_log: load_symbol(library, b"krun_init_log\0", &path)?, + krun_create_ctx: load_symbol(library, b"krun_create_ctx\0", &path)?, + krun_free_ctx: load_symbol(library, b"krun_free_ctx\0", &path)?, + krun_set_vm_config: load_symbol(library, b"krun_set_vm_config\0", &path)?, + krun_set_root: load_symbol(library, b"krun_set_root\0", &path)?, + krun_set_workdir: load_symbol(library, b"krun_set_workdir\0", &path)?, + krun_set_exec: load_symbol(library, b"krun_set_exec\0", &path)?, + krun_set_port_map: load_symbol(library, b"krun_set_port_map\0", &path)?, + krun_set_console_output: load_symbol(library, b"krun_set_console_output\0", &path)?, + krun_add_vsock_port2: load_symbol(library, b"krun_add_vsock_port2\0", &path)?, + krun_start_enter: load_symbol(library, b"krun_start_enter\0", &path)?, + krun_disable_implicit_vsock: load_symbol( + library, + b"krun_disable_implicit_vsock\0", + &path, + )?, + krun_add_vsock: load_symbol(library, b"krun_add_vsock\0", &path)?, + krun_add_net_unixgram: load_symbol(library, b"krun_add_net_unixgram\0", &path)?, + }) + } +} - /// Disables the implicit vsock device. Must be called before - /// `krun_add_vsock` to manually configure TSI features. - pub fn krun_disable_implicit_vsock(ctx_id: u32) -> i32; +fn runtime_libkrun_path() -> Result { + Ok(crate::configured_runtime_dir()?.join(required_runtime_lib_name())) +} - /// Adds a vsock device with specified TSI features. - /// - /// `tsi_features` is a bitmask: - /// - `KRUN_TSI_HIJACK_INET` (1 << 0): intercept AF_INET sockets - /// - `KRUN_TSI_HIJACK_UNIX` (1 << 1): intercept AF_UNIX sockets - /// - 0: vsock without any TSI hijacking - pub fn krun_add_vsock(ctx_id: u32, tsi_features: u32) -> i32; +fn required_runtime_lib_name() -> &'static str { + #[cfg(target_os = "macos")] + { + "libkrun.dylib" + } + #[cfg(not(target_os = "macos"))] + { + "libkrun.so" + } +} - /// Adds a virtio-net device connected to a unixgram-based backend - /// (e.g., gvproxy in vfkit mode). - /// - /// `c_path` and `fd` are mutually exclusive: set one to NULL/-1. - /// `c_mac` is 6 bytes. `features` is virtio-net feature bitmask. - /// `flags` may include `NET_FLAG_VFKIT` (1 << 0) for gvproxy vfkit mode. - pub fn krun_add_net_unixgram( - ctx_id: u32, - c_path: *const c_char, - fd: i32, - c_mac: *const u8, - features: u32, - flags: u32, - ) -> i32; +fn load_symbol( + library: &'static Library, + symbol: &[u8], + path: &std::path::Path, +) -> Result { + let loaded = unsafe { + library.get::(symbol).map_err(|e| { + VmError::HostSetup(format!( + "resolve {} from {}: {e}", + String::from_utf8_lossy(symbol).trim_end_matches('\0'), + path.display() + )) + })? + }; + Ok(*loaded) } diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index f55df64c..4105aadc 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -17,7 +17,6 @@ mod ffi; use std::ffi::CString; -use std::os::unix::process::CommandExt as _; use std::path::{Path, PathBuf}; use std::ptr; use std::time::Instant; @@ -49,6 +48,10 @@ pub enum VmError { #[error("required binary not found: {path}\n{hint}")] BinaryNotFound { path: String, hint: String }, + /// Host-side VM setup failed before boot. + #[error("host setup failed: {0}")] + HostSetup(String), + /// `fork()` failed. #[error("fork() failed: {0}")] Fork(String), @@ -88,6 +91,14 @@ pub enum NetBackend { }, } +/// Host Unix socket bridged into the guest as a vsock port. +#[derive(Debug, Clone)] +pub struct VsockPort { + pub port: u32, + pub socket_path: PathBuf, + pub listen: bool, +} + /// Configuration for a libkrun microVM. pub struct VmConfig { /// Path to the extracted rootfs directory (aarch64 Linux). @@ -116,6 +127,9 @@ pub struct VmConfig { /// Only used with TSI networking. pub port_map: Vec, + /// Optional host Unix sockets exposed to the guest over vsock. + pub vsock_ports: Vec, + /// libkrun log level (0=Off .. 5=Trace). pub log_level: u32, @@ -159,10 +173,11 @@ impl VmConfig { // port stays the same for CLI clients. "30051:8080".to_string(), ], + vsock_ports: vec![], log_level: 3, // Info — for debugging console_output: None, net: NetBackend::Gvproxy { - binary: find_gvproxy().unwrap_or_else(|| PathBuf::from("/opt/podman/bin/gvproxy")), + binary: default_runtime_gvproxy_path(), }, } } @@ -183,86 +198,303 @@ fn c_string_array(strings: &[&str]) -> Result<(Vec, Vec<*const libc::c_ Ok((owned, ptrs)) } -/// Discover the Homebrew lib directory. -fn homebrew_lib_dir() -> String { - std::process::Command::new("brew") - .args(["--prefix"]) - .output() - .ok() - .and_then(|o| { - if o.status.success() { - String::from_utf8(o.stdout) - .ok() - .map(|s| format!("{}/lib", s.trim())) - } else { - None - } - }) - .unwrap_or_else(|| "/opt/homebrew/lib".to_string()) +const VM_RUNTIME_DIR_NAME: &str = "gateway.runtime"; +const VM_RUNTIME_DIR_ENV: &str = "OPENSHELL_VM_RUNTIME_DIR"; + +pub(crate) fn configured_runtime_dir() -> Result { + if let Some(path) = std::env::var_os(VM_RUNTIME_DIR_ENV) { + return Ok(PathBuf::from(path)); + } + + let exe = std::env::current_exe().map_err(|e| VmError::HostSetup(e.to_string()))?; + let exe_dir = exe.parent().ok_or_else(|| { + VmError::HostSetup(format!( + "executable has no parent directory: {}", + exe.display() + )) + })?; + Ok(exe_dir.join(VM_RUNTIME_DIR_NAME)) } -/// Ensure `DYLD_FALLBACK_LIBRARY_PATH` includes the Homebrew lib directory. -/// -/// libkrun loads `libkrunfw.5.dylib` at runtime via `dlopen`. On macOS, dyld -/// only reads `DYLD_FALLBACK_LIBRARY_PATH` at process startup — setting it -/// programmatically after launch has no effect. If the variable isn't already -/// set, we re-exec the current process with it configured so dyld picks it up. -/// -/// Returns `Ok(())` if the path is already set, or does not return (re-execs). -fn ensure_krunfw_path() -> Result<(), VmError> { - let key = "DYLD_FALLBACK_LIBRARY_PATH"; - let homebrew_lib = homebrew_lib_dir(); +fn required_runtime_lib_name() -> &'static str { + #[cfg(target_os = "macos")] + { + "libkrun.dylib" + } + #[cfg(not(target_os = "macos"))] + { + "libkrun.so" + } +} + +fn validate_runtime_dir(dir: &Path) -> Result { + if !dir.is_dir() { + return Err(VmError::BinaryNotFound { + path: dir.display().to_string(), + hint: format!( + "stage the VM runtime bundle with `mise run vm:bundle-runtime` or set {VM_RUNTIME_DIR_ENV}" + ), + }); + } - if let Ok(existing) = std::env::var(key) - && existing.contains(&homebrew_lib) + let libkrun = dir.join(required_runtime_lib_name()); + if !libkrun.is_file() { + return Err(VmError::BinaryNotFound { + path: libkrun.display().to_string(), + hint: "runtime bundle is incomplete: missing libkrun".to_string(), + }); + } + + let has_krunfw = std::fs::read_dir(dir) + .map_err(|e| VmError::HostSetup(format!("read {}: {e}", dir.display())))? + .filter_map(Result::ok) + .any(|entry| { + entry + .file_name() + .to_string_lossy() + .starts_with("libkrunfw.") + }); + if !has_krunfw { + return Err(VmError::BinaryNotFound { + path: dir.display().to_string(), + hint: "runtime bundle is incomplete: missing libkrunfw".to_string(), + }); + } + + let gvproxy = dir.join("gvproxy"); + if !gvproxy.is_file() { + return Err(VmError::BinaryNotFound { + path: gvproxy.display().to_string(), + hint: "runtime bundle is incomplete: missing gvproxy".to_string(), + }); + } + + #[cfg(unix)] { - return Ok(()); // Already set — nothing to do. + use std::os::unix::fs::PermissionsExt as _; + + let mode = std::fs::metadata(&gvproxy) + .map_err(|e| VmError::HostSetup(format!("stat {}: {e}", gvproxy.display())))? + .permissions() + .mode(); + if mode & 0o111 == 0 { + return Err(VmError::HostSetup(format!( + "gvproxy is not executable: {}", + gvproxy.display() + ))); + } } - // Re-exec ourselves with the library path set. dyld will process it - // at startup, making libkrunfw discoverable for libkrun's dlopen. - let exe = std::env::current_exe().map_err(|e| VmError::Fork(e.to_string()))?; - let args: Vec = std::env::args().collect(); + Ok(gvproxy) +} - let new_val = match std::env::var(key) { - Ok(existing) => format!("{homebrew_lib}:{existing}"), - Err(_) => homebrew_lib, - }; +fn resolve_runtime_bundle() -> Result { + let runtime_dir = configured_runtime_dir()?; + validate_runtime_dir(&runtime_dir) +} + +pub fn default_runtime_gvproxy_path() -> PathBuf { + configured_runtime_dir() + .unwrap_or_else(|_| PathBuf::from(VM_RUNTIME_DIR_NAME)) + .join("gvproxy") +} - eprintln!("re-exec: setting {key} for libkrunfw discovery"); - // SAFETY: single-threaded at this point (before fork). +fn raise_nofile_limit() { + #[cfg(unix)] unsafe { - std::env::set_var(key, &new_val); + let mut rlim = libc::rlimit { + rlim_cur: 0, + rlim_max: 0, + }; + if libc::getrlimit(libc::RLIMIT_NOFILE, &raw mut rlim) == 0 { + rlim.rlim_cur = rlim.rlim_max; + let _ = libc::setrlimit(libc::RLIMIT_NOFILE, &rlim); + } } +} - // exec replaces the process — if it returns, something went wrong. - let err = std::process::Command::new(exe).args(&args[1..]).exec(); - Err(VmError::Fork(format!("re-exec failed: {err}"))) +fn clamp_log_level(level: u32) -> u32 { + match level { + 0 => ffi::KRUN_LOG_LEVEL_OFF, + 1 => ffi::KRUN_LOG_LEVEL_ERROR, + 2 => ffi::KRUN_LOG_LEVEL_WARN, + 3 => ffi::KRUN_LOG_LEVEL_INFO, + 4 => ffi::KRUN_LOG_LEVEL_DEBUG, + _ => ffi::KRUN_LOG_LEVEL_TRACE, + } } -/// Try to find gvproxy in common locations. -fn find_gvproxy() -> Option { - // Check PATH first - if let Ok(output) = std::process::Command::new("which").arg("gvproxy").output() { - if output.status.success() { - let path = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if !path.is_empty() { - return Some(PathBuf::from(path)); - } +struct VmContext { + krun: &'static ffi::LibKrun, + ctx_id: u32, +} + +impl VmContext { + fn create(log_level: u32) -> Result { + let krun = ffi::libkrun()?; + unsafe { + check( + (krun.krun_init_log)( + ffi::KRUN_LOG_TARGET_DEFAULT, + clamp_log_level(log_level), + ffi::KRUN_LOG_STYLE_AUTO, + ffi::KRUN_LOG_OPTION_NO_ENV, + ), + "krun_init_log", + )?; + } + + let ctx_id = unsafe { (krun.krun_create_ctx)() }; + if ctx_id < 0 { + return Err(VmError::Krun { + func: "krun_create_ctx", + code: ctx_id, + }); + } + + Ok(Self { + krun, + ctx_id: ctx_id as u32, + }) + } + + fn set_vm_config(&self, vcpus: u8, mem_mib: u32) -> Result<(), VmError> { + unsafe { + check( + (self.krun.krun_set_vm_config)(self.ctx_id, vcpus, mem_mib), + "krun_set_vm_config", + ) + } + } + + fn set_root(&self, rootfs: &Path) -> Result<(), VmError> { + let rootfs_c = path_to_cstring(rootfs)?; + unsafe { + check( + (self.krun.krun_set_root)(self.ctx_id, rootfs_c.as_ptr()), + "krun_set_root", + ) + } + } + + fn set_workdir(&self, workdir: &str) -> Result<(), VmError> { + let workdir_c = CString::new(workdir)?; + unsafe { + check( + (self.krun.krun_set_workdir)(self.ctx_id, workdir_c.as_ptr()), + "krun_set_workdir", + ) + } + } + + fn disable_implicit_vsock(&self) -> Result<(), VmError> { + unsafe { + check( + (self.krun.krun_disable_implicit_vsock)(self.ctx_id), + "krun_disable_implicit_vsock", + ) + } + } + + fn add_vsock(&self, tsi_features: u32) -> Result<(), VmError> { + unsafe { + check( + (self.krun.krun_add_vsock)(self.ctx_id, tsi_features), + "krun_add_vsock", + ) + } + } + + fn add_net_unixgram( + &self, + socket_path: &Path, + mac: &[u8; 6], + features: u32, + flags: u32, + ) -> Result<(), VmError> { + let sock_c = path_to_cstring(socket_path)?; + unsafe { + check( + (self.krun.krun_add_net_unixgram)( + self.ctx_id, + sock_c.as_ptr(), + -1, + mac.as_ptr(), + features, + flags, + ), + "krun_add_net_unixgram", + ) + } + } + + fn set_port_map(&self, port_map: &[String]) -> Result<(), VmError> { + let port_strs: Vec<&str> = port_map.iter().map(String::as_str).collect(); + let (_port_owners, port_ptrs) = c_string_array(&port_strs)?; + unsafe { + check( + (self.krun.krun_set_port_map)(self.ctx_id, port_ptrs.as_ptr()), + "krun_set_port_map", + ) + } + } + + fn add_vsock_port(&self, port: &VsockPort) -> Result<(), VmError> { + let socket_c = path_to_cstring(&port.socket_path)?; + unsafe { + check( + (self.krun.krun_add_vsock_port2)( + self.ctx_id, + port.port, + socket_c.as_ptr(), + port.listen, + ), + "krun_add_vsock_port2", + ) + } + } + + fn set_console_output(&self, path: &Path) -> Result<(), VmError> { + let console_c = path_to_cstring(path)?; + unsafe { + check( + (self.krun.krun_set_console_output)(self.ctx_id, console_c.as_ptr()), + "krun_set_console_output", + ) + } + } + + fn set_exec(&self, exec_path: &str, args: &[String], env: &[String]) -> Result<(), VmError> { + let exec_c = CString::new(exec_path)?; + let argv_strs: Vec<&str> = args.iter().map(String::as_str).collect(); + let (_argv_owners, argv_ptrs) = c_string_array(&argv_strs)?; + let env_strs: Vec<&str> = env.iter().map(String::as_str).collect(); + let (_env_owners, env_ptrs) = c_string_array(&env_strs)?; + + unsafe { + check( + (self.krun.krun_set_exec)( + self.ctx_id, + exec_c.as_ptr(), + argv_ptrs.as_ptr(), + env_ptrs.as_ptr(), + ), + "krun_set_exec", + ) } } - // Common Podman installation paths - for p in &[ - "/opt/podman/bin/gvproxy", - "/opt/homebrew/bin/gvproxy", - "/usr/local/bin/gvproxy", - ] { - let path = PathBuf::from(p); - if path.exists() { - return Some(path); + + fn start_enter(&self) -> i32 { + unsafe { (self.krun.krun_start_enter)(self.ctx_id) } + } +} + +impl Drop for VmContext { + fn drop(&mut self) { + unsafe { + let _ = (self.krun.krun_free_ctx)(self.ctx_id); } } - None } /// Issue a gvproxy expose call via its HTTP API (unix socket). @@ -361,54 +593,17 @@ pub fn launch(config: &VmConfig) -> Result { eprintln!("rootfs: {}", config.rootfs.display()); eprintln!("vm: {} vCPU(s), {} MiB RAM", config.vcpus, config.mem_mib); - // Ensure libkrunfw is discoverable. On macOS, dyld only reads - // DYLD_FALLBACK_LIBRARY_PATH at startup, so if it's not set we - // re-exec ourselves with it configured (this call won't return). - ensure_krunfw_path()?; + // The runtime must already be staged as a sidecar bundle next to the + // binary (or explicitly pointed to via OPENSHELL_VM_RUNTIME_DIR). + resolve_runtime_bundle()?; + raise_nofile_limit(); // ── Configure the microVM ────────────────────────────────────── - unsafe { - check( - ffi::krun_set_log_level(config.log_level), - "krun_set_log_level", - )?; - } - - let ctx_id = unsafe { ffi::krun_create_ctx() }; - if ctx_id < 0 { - return Err(VmError::Krun { - func: "krun_create_ctx", - code: ctx_id, - }); - } - #[allow(clippy::cast_sign_loss)] - let ctx_id = ctx_id as u32; - - unsafe { - check( - ffi::krun_set_vm_config(ctx_id, config.vcpus, config.mem_mib), - "krun_set_vm_config", - )?; - } - - // Root filesystem (virtio-fs) - let rootfs_c = path_to_cstring(&config.rootfs)?; - unsafe { - check( - ffi::krun_set_root(ctx_id, rootfs_c.as_ptr()), - "krun_set_root", - )?; - } - - // Working directory - let workdir_c = CString::new(config.workdir.as_str())?; - unsafe { - check( - ffi::krun_set_workdir(ctx_id, workdir_c.as_ptr()), - "krun_set_workdir", - )?; - } + let vm = VmContext::create(config.log_level)?; + vm.set_vm_config(config.vcpus, config.mem_mib)?; + vm.set_root(&config.rootfs)?; + vm.set_workdir(&config.workdir)?; // Networking setup let mut gvproxy_child: Option = None; @@ -419,13 +614,8 @@ pub fn launch(config: &VmConfig) -> Result { // Default TSI — no special setup needed. } NetBackend::None => { - unsafe { - check( - ffi::krun_disable_implicit_vsock(ctx_id), - "krun_disable_implicit_vsock", - )?; - check(ffi::krun_add_vsock(ctx_id, 0), "krun_add_vsock")?; - } + vm.disable_implicit_vsock()?; + vm.add_vsock(0)?; eprintln!("Networking: disabled (no TSI, no virtio-net)"); } NetBackend::Gvproxy { binary } => { @@ -494,15 +684,8 @@ pub fn launch(config: &VmConfig) -> Result { } // Disable implicit TSI and add virtio-net via gvproxy - unsafe { - check( - ffi::krun_disable_implicit_vsock(ctx_id), - "krun_disable_implicit_vsock", - )?; - check(ffi::krun_add_vsock(ctx_id, 0), "krun_add_vsock")?; - } - - let sock_c = path_to_cstring(&vfkit_sock)?; + vm.disable_implicit_vsock()?; + vm.add_vsock(0)?; // This MAC matches gvproxy's default static DHCP lease for // 192.168.127.2. Using a different MAC can cause the gVisor // network stack to misroute or drop packets. @@ -523,19 +706,7 @@ pub fn launch(config: &VmConfig) -> Result { | NET_FEATURE_HOST_UFO; const NET_FLAG_VFKIT: u32 = 1 << 0; - unsafe { - check( - ffi::krun_add_net_unixgram( - ctx_id, - sock_c.as_ptr(), - -1, - mac.as_ptr(), - COMPAT_NET_FEATURES, - NET_FLAG_VFKIT, - ), - "krun_add_net_unixgram", - )?; - } + vm.add_net_unixgram(&vfkit_sock, &mac, COMPAT_NET_FEATURES, NET_FLAG_VFKIT)?; eprintln!( "Networking: gvproxy (virtio-net) [{:.1}s]", @@ -548,14 +719,11 @@ pub fn launch(config: &VmConfig) -> Result { // Port mapping (TSI only) if !config.port_map.is_empty() && matches!(config.net, NetBackend::Tsi) { - let port_strs: Vec<&str> = config.port_map.iter().map(String::as_str).collect(); - let (_port_owners, port_ptrs) = c_string_array(&port_strs)?; - unsafe { - check( - ffi::krun_set_port_map(ctx_id, port_ptrs.as_ptr()), - "krun_set_port_map", - )?; - } + vm.set_port_map(&config.port_map)?; + } + + for vsock_port in &config.vsock_ports { + vm.add_vsock_port(vsock_port)?; } // Console output @@ -566,45 +734,22 @@ pub fn launch(config: &VmConfig) -> Result { .unwrap_or(&config.rootfs) .join("console.log") }); - let console_c = path_to_cstring(&console_log)?; - unsafe { - check( - ffi::krun_set_console_output(ctx_id, console_c.as_ptr()), - "krun_set_console_output", - )?; - } - - // Executable, argv, envp - let exec_c = CString::new(config.exec_path.as_str())?; - - // argv: libkrun's init sets argv[0] from exec_path internally, - // so we only pass the actual arguments here. - let argv_strs: Vec<&str> = config.args.iter().map(String::as_str).collect(); - let (_argv_owners, argv_ptrs) = c_string_array(&argv_strs)?; + vm.set_console_output(&console_log)?; // envp: use provided env or minimal defaults - let env_strs: Vec<&str> = if config.env.is_empty() { + let env: Vec = if config.env.is_empty() { vec![ "HOME=/root", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm", ] + .into_iter() + .map(ToOwned::to_owned) + .collect() } else { - config.env.iter().map(String::as_str).collect() + config.env.clone() }; - let (_env_owners, env_ptrs) = c_string_array(&env_strs)?; - - unsafe { - check( - ffi::krun_set_exec( - ctx_id, - exec_c.as_ptr(), - argv_ptrs.as_ptr(), - env_ptrs.as_ptr(), - ), - "krun_set_exec", - )?; - } + vm.set_exec(&config.exec_path, &config.args, &env)?; // ── Fork and enter the VM ────────────────────────────────────── // @@ -619,7 +764,7 @@ pub fn launch(config: &VmConfig) -> Result { -1 => Err(VmError::Fork(std::io::Error::last_os_error().to_string())), 0 => { // Child process: enter the VM (never returns on success) - let ret = unsafe { ffi::krun_start_enter(ctx_id) }; + let ret = vm.start_enter(); eprintln!("krun_start_enter failed: {ret}"); std::process::exit(1); } @@ -1189,7 +1334,7 @@ fn host_tcp_probe() -> bool { } } -/// Poll kubectl until the `navigator` namespace exists. +/// Poll kubectl until the `openshell` namespace exists. /// /// Uses exponential backoff (500ms → 3s) to minimize latency when the /// namespace appears quickly while avoiding kubectl spam. @@ -1202,21 +1347,21 @@ fn wait_for_namespace(kubeconfig: &str) -> Result<(), VmError> { loop { let output = std::process::Command::new("kubectl") .args(["--kubeconfig", kubeconfig]) - .args(["get", "namespace", "navigator", "-o", "name"]) + .args(["get", "namespace", "openshell", "-o", "name"]) .output(); if let Ok(output) = output && output.status.success() { let stdout = String::from_utf8_lossy(&output.stdout); - if stdout.contains("navigator") { + if stdout.contains("openshell") { return Ok(()); } } if start.elapsed() >= timeout { return Err(VmError::Bootstrap( - "timed out waiting for navigator namespace (180s). \ + "timed out waiting for openshell namespace (180s). \ Check console.log for k3s errors." .to_string(), )); @@ -1225,7 +1370,7 @@ fn wait_for_namespace(kubeconfig: &str) -> Result<(), VmError> { attempts += 1; if attempts.is_multiple_of(10) { eprintln!( - " still waiting for navigator namespace ({:.0}s elapsed)", + " still waiting for openshell namespace ({:.0}s elapsed)", start.elapsed().as_secs_f64() ); } @@ -1252,7 +1397,7 @@ fn apply_tls_secrets( "kind": "Secret", "metadata": { "name": openshell_bootstrap::constants::SERVER_TLS_SECRET_NAME, - "namespace": "navigator" + "namespace": "openshell" }, "type": "kubernetes.io/tls", "data": { @@ -1266,7 +1411,7 @@ fn apply_tls_secrets( "kind": "Secret", "metadata": { "name": openshell_bootstrap::constants::SERVER_CLIENT_CA_SECRET_NAME, - "namespace": "navigator" + "namespace": "openshell" }, "type": "Opaque", "data": { @@ -1279,7 +1424,7 @@ fn apply_tls_secrets( "kind": "Secret", "metadata": { "name": openshell_bootstrap::constants::CLIENT_TLS_SECRET_NAME, - "namespace": "navigator" + "namespace": "openshell" }, "type": "Opaque", "data": { @@ -1341,3 +1486,68 @@ extern "C" fn forward_signal(_sig: libc::c_int) { } } } + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_runtime_dir() -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos(); + std::env::temp_dir().join(format!( + "openshell-vm-runtime-{}-{nanos}", + std::process::id() + )) + } + + fn write_runtime_file(path: &Path) { + fs::write(path, b"test").expect("failed to write runtime file"); + } + + #[test] + fn validate_runtime_dir_accepts_minimal_bundle() { + let dir = temp_runtime_dir(); + fs::create_dir_all(&dir).expect("failed to create runtime dir"); + + write_runtime_file(&dir.join(required_runtime_lib_name())); + write_runtime_file(&dir.join("libkrunfw.test")); + let gvproxy = dir.join("gvproxy"); + write_runtime_file(&gvproxy); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt as _; + + let mut perms = fs::metadata(&gvproxy).expect("stat gvproxy").permissions(); + perms.set_mode(0o755); + fs::set_permissions(&gvproxy, perms).expect("chmod gvproxy"); + } + + let resolved_gvproxy = validate_runtime_dir(&dir).expect("runtime bundle should validate"); + assert_eq!(resolved_gvproxy, gvproxy); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn validate_runtime_dir_requires_gvproxy() { + let dir = temp_runtime_dir(); + fs::create_dir_all(&dir).expect("failed to create runtime dir"); + + write_runtime_file(&dir.join(required_runtime_lib_name())); + write_runtime_file(&dir.join("libkrunfw.test")); + + let err = validate_runtime_dir(&dir).expect_err("missing gvproxy should fail"); + match err { + VmError::BinaryNotFound { hint, .. } => { + assert!(hint.contains("missing gvproxy")); + } + other => panic!("unexpected error: {other:?}"), + } + + let _ = fs::remove_dir_all(&dir); + } +} diff --git a/crates/openshell-vm/src/main.rs b/crates/openshell-vm/src/main.rs index 144d28eb..bc1b5b10 100644 --- a/crates/openshell-vm/src/main.rs +++ b/crates/openshell-vm/src/main.rs @@ -93,16 +93,7 @@ fn run(cli: Cli) -> Result> { "tsi" => openshell_vm::NetBackend::Tsi, "none" => openshell_vm::NetBackend::None, "gvproxy" => openshell_vm::NetBackend::Gvproxy { - binary: PathBuf::from( - [ - "/opt/podman/bin/gvproxy", - "/opt/homebrew/bin/gvproxy", - "/usr/local/bin/gvproxy", - ] - .iter() - .find(|p| std::path::Path::new(p).exists()) - .unwrap_or(&"/opt/podman/bin/gvproxy"), - ), + binary: openshell_vm::default_runtime_gvproxy_path(), }, other => { return Err( @@ -126,6 +117,7 @@ fn run(cli: Cli) -> Result> { env: cli.env, workdir: cli.workdir, port_map: cli.port, + vsock_ports: vec![], log_level: cli.krun_log_level, console_output: None, net: net_backend.clone(), diff --git a/crates/openshell-vm/tests/gateway_integration.rs b/crates/openshell-vm/tests/gateway_integration.rs index 413d1b3c..528e1eaa 100644 --- a/crates/openshell-vm/tests/gateway_integration.rs +++ b/crates/openshell-vm/tests/gateway_integration.rs @@ -44,27 +44,16 @@ fn codesign_if_needed() { } } -/// Build environment variables so libkrun can find libkrunfw at runtime. -fn libkrun_env() -> Vec<(&'static str, String)> { - if cfg!(target_os = "macos") { - let homebrew_lib = Command::new("brew") - .args(["--prefix"]) - .output() - .ok() - .and_then(|o| String::from_utf8(o.stdout).ok()) - .map(|s| format!("{}/lib", s.trim())) - .unwrap_or_else(|| "/opt/homebrew/lib".to_string()); - - let existing = std::env::var("DYLD_FALLBACK_LIBRARY_PATH").unwrap_or_default(); - let val = if existing.is_empty() { - homebrew_lib - } else { - format!("{homebrew_lib}:{existing}") - }; - vec![("DYLD_FALLBACK_LIBRARY_PATH", val)] - } else { - vec![] - } +fn assert_runtime_bundle_staged() { + let bundle_dir = std::path::Path::new(GATEWAY) + .parent() + .expect("gateway binary has no parent") + .join("gateway.runtime"); + assert!( + bundle_dir.is_dir(), + "gateway.runtime is missing next to the test binary: {}. Run `mise run vm:bundle-runtime` first.", + bundle_dir.display() + ); } // ── Tests ────────────────────────────────────────────────────────────── @@ -75,12 +64,10 @@ fn libkrun_env() -> Vec<(&'static str, String)> { #[ignore] // requires libkrun + rootfs fn gateway_boots_and_service_becomes_reachable() { codesign_if_needed(); + assert_runtime_bundle_staged(); let mut cmd = Command::new(GATEWAY); cmd.stdout(Stdio::null()).stderr(Stdio::piped()); - for (k, v) in libkrun_env() { - cmd.env(k, v); - } let mut child = cmd.spawn().expect("failed to start gateway"); @@ -114,12 +101,10 @@ fn gateway_boots_and_service_becomes_reachable() { #[ignore] // requires libkrun + rootfs fn gateway_exec_runs_guest_command() { codesign_if_needed(); + assert_runtime_bundle_staged(); let mut cmd = Command::new(GATEWAY); cmd.args(["--exec", "/bin/true"]); - for (k, v) in libkrun_env() { - cmd.env(k, v); - } let output = cmd.output().expect("failed to run gateway --exec"); diff --git a/tasks/scripts/bundle-vm-runtime.sh b/tasks/scripts/bundle-vm-runtime.sh new file mode 100755 index 00000000..05a5a46f --- /dev/null +++ b/tasks/scripts/bundle-vm-runtime.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +if [ "$(uname -s)" != "Darwin" ]; then + echo "vm:bundle-runtime currently supports macOS only" >&2 + exit 1 +fi + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +LIB_DIR="${OPENSHELL_VM_RUNTIME_SOURCE_DIR:-}" +GVPROXY_BIN="${OPENSHELL_VM_GVPROXY:-}" + +if [ -z "$LIB_DIR" ]; then + BREW_PREFIX="$(brew --prefix 2>/dev/null || true)" + if [ -n "$BREW_PREFIX" ]; then + LIB_DIR="${BREW_PREFIX}/lib" + else + LIB_DIR="/opt/homebrew/lib" + fi +fi + +if [ -z "$GVPROXY_BIN" ]; then + if command -v gvproxy >/dev/null 2>&1; then + GVPROXY_BIN="$(command -v gvproxy)" + elif [ -x /opt/homebrew/bin/gvproxy ]; then + GVPROXY_BIN="/opt/homebrew/bin/gvproxy" + elif [ -x /opt/podman/bin/gvproxy ]; then + GVPROXY_BIN="/opt/podman/bin/gvproxy" + else + echo "gvproxy not found; set OPENSHELL_VM_GVPROXY or install gvproxy" >&2 + exit 1 + fi +fi + +LIBKRUN="${LIB_DIR}/libkrun.dylib" +if [ ! -e "$LIBKRUN" ]; then + echo "libkrun not found at ${LIBKRUN}; set OPENSHELL_VM_RUNTIME_SOURCE_DIR" >&2 + exit 1 +fi + +KRUNFW_FILES=() +while IFS= read -r line; do + KRUNFW_FILES+=("$line") +done < <(find "$LIB_DIR" -maxdepth 1 \( -type f -o -type l \) \( -name 'libkrunfw.dylib' -o -name 'libkrunfw.*.dylib' \) | sort -u) + +if [ "${#KRUNFW_FILES[@]}" -eq 0 ]; then + echo "libkrunfw not found under ${LIB_DIR}; set OPENSHELL_VM_RUNTIME_SOURCE_DIR" >&2 + exit 1 +fi + +TARGETS=( + "${ROOT}/target/debug" + "${ROOT}/target/release" + "${ROOT}/target/aarch64-apple-darwin/debug" + "${ROOT}/target/aarch64-apple-darwin/release" +) + +for target_dir in "${TARGETS[@]}"; do + runtime_dir="${target_dir}/gateway.runtime" + mkdir -p "$runtime_dir" + + install -m 0644 "$LIBKRUN" "${runtime_dir}/libkrun.dylib" + install -m 0755 "$GVPROXY_BIN" "${runtime_dir}/gvproxy" + for krunfw in "${KRUNFW_FILES[@]}"; do + install -m 0644 "$krunfw" "${runtime_dir}/$(basename "$krunfw")" + done + + manifest_entries=() + manifest_entries+=(' "libkrun.dylib"') + manifest_entries+=(' "gvproxy"') + for krunfw in "${KRUNFW_FILES[@]}"; do + manifest_entries+=(" \"$(basename "$krunfw")\"") + done + + cat > "${runtime_dir}/manifest.json" <&2 + exit 1 +fi + +if [ ! -d "${TARGET_DIR}/gateway.runtime" ]; then + echo "target/release/gateway.runtime not found; run mise run vm:bundle-runtime first" >&2 + exit 1 +fi + +mkdir -p "${ARTIFACT_DIR}" +tar -czf "${ARTIFACT_DIR}/gateway-aarch64-apple-darwin.tar.gz" \ + -C "${TARGET_DIR}" \ + gateway \ + gateway.runtime + +ls -lh "${ARTIFACT_DIR}/gateway-aarch64-apple-darwin.tar.gz" diff --git a/tasks/vm.toml b/tasks/vm.toml new file mode 100644 index 00000000..17940b56 --- /dev/null +++ b/tasks/vm.toml @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# openshell-vm development helpers + +[vm] +description = "Build and run the standalone gateway microVM" +depends = ["vm:build", "vm:bundle-runtime", "vm:rootfs", "vm:codesign"] +run = "target/debug/gateway" +hide = false + +["vm:build"] +description = "Build the standalone gateway binary" +run = "cargo build -p openshell-vm" +hide = true + +["vm:build:release"] +description = "Build the standalone gateway binary in release mode" +run = "cargo build -p openshell-vm --release" +hide = true + +["vm:rootfs"] +description = "Build the default gateway rootfs if needed" +run = "crates/openshell-vm/scripts/build-rootfs.sh" +hide = true + +["vm:codesign"] +description = "Codesign the gateway binary for Hypervisor.framework access on macOS" +run = "tasks/scripts/codesign-gateway.sh" +hide = true + +["vm:bundle-runtime"] +description = "Stage the gateway sidecar runtime bundle next to local build outputs" +run = "tasks/scripts/bundle-vm-runtime.sh" +hide = false + +["vm:package:gateway"] +description = "Package the gateway binary with its sidecar runtime bundle" +run = "tasks/scripts/package-gateway-runtime.sh" +depends = ["vm:build:release", "vm:bundle-runtime"] +hide = false From a82af6fd6f5156ec9b186889a5aff2590d003752 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Mon, 23 Mar 2026 21:03:07 -0700 Subject: [PATCH 09/14] wip --- CONTRIBUTING.md | 13 ++ crates/openshell-vm/scripts/build-rootfs.sh | 75 ++++++--- crates/openshell-vm/scripts/gateway-init.sh | 100 ++++++++---- crates/openshell-vm/src/ffi.rs | 62 ++++++- crates/openshell-vm/src/lib.rs | 87 ++++++---- crates/openshell-vm/src/main.rs | 153 +++++++++++++++++- .../openshell-vm/tests/gateway_integration.rs | 6 +- deploy/helm/openshell/values.yaml | 4 +- tasks/scripts/ensure-vm-rootfs.sh | 16 ++ tasks/scripts/run-vm.sh | 15 ++ tasks/vm.toml | 22 ++- 11 files changed, 462 insertions(+), 91 deletions(-) create mode 100755 tasks/scripts/ensure-vm-rootfs.sh create mode 100755 tasks/scripts/run-vm.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 28104af8..bdfae592 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -150,12 +150,25 @@ mise run vm That task builds `openshell-vm`, stages `gateway.runtime/`, builds the default rootfs under `$XDG_DATA_HOME/openshell/gateway/rootfs`, codesigns `target/debug/gateway` on macOS, and then launches the VM. +Once the VM is running, you can run cluster debug commands against its kubeconfig directly: + +```bash +target/debug/gateway exec -- kubectl get pods -A +target/debug/gateway exec -- kubectl -n openshell logs statefulset/openshell +``` + If you only want to stage the sidecar runtime bundle without launching the VM: ```bash mise run vm:bundle-runtime ``` +To force a fresh rebuild of the binary, bundled runtime, and rootfs without launching the VM: + +```bash +mise run vm:build +``` + To create a local tarball that contains both `gateway` and `gateway.runtime/`: ```bash diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh index ece7ee45..e5483a20 100755 --- a/crates/openshell-vm/scripts/build-rootfs.sh +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -34,9 +34,9 @@ K3S_VERSION="${K3S_VERSION//-k3s/+k3s}" PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" # Container images to pre-load into k3s (arm64). -IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-navigator}" +IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-openshell}" IMAGE_TAG="${IMAGE_TAG:-dev}" -SERVER_IMAGE="${IMAGE_REPO_BASE}/server:${IMAGE_TAG}" +SERVER_IMAGE="${IMAGE_REPO_BASE}/gateway:${IMAGE_TAG}" SANDBOX_IMAGE="${IMAGE_REPO_BASE}/sandbox:${IMAGE_TAG}" AGENT_SANDBOX_IMAGE="registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0" @@ -130,12 +130,12 @@ else fi # ── Inject Kubernetes manifests ────────────────────────────────────── -# These are copied to /opt/navigator/manifests/ (staging). gateway-init.sh +# These are copied to /opt/openshell/manifests/ (staging). gateway-init.sh # moves them to /var/lib/rancher/k3s/server/manifests/ at boot so the # k3s Helm Controller auto-deploys them. MANIFEST_SRC="${PROJECT_ROOT}/deploy/kube/manifests" -MANIFEST_DEST="${ROOTFS_DIR}/opt/navigator/manifests" +MANIFEST_DEST="${ROOTFS_DIR}/opt/openshell/manifests" echo "==> Injecting Kubernetes manifests..." mkdir -p "${MANIFEST_DEST}" @@ -159,7 +159,7 @@ done # of images each time. IMAGES_DIR="${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images" -IMAGE_CACHE_DIR="${XDG_CACHE_HOME:-${HOME}/.cache}/nemoclaw/gateway/images" +IMAGE_CACHE_DIR="${XDG_CACHE_HOME:-${HOME}/.cache}/openshell/gateway/images" mkdir -p "${IMAGES_DIR}" "${IMAGE_CACHE_DIR}" echo "==> Pre-loading container images (arm64)..." @@ -276,6 +276,45 @@ if [ -f "$HELMCHART" ]; then || sed -i '/__CHART_CHECKSUM__/d' "$HELMCHART" fi +# Patch agent-sandbox manifest for VM networking constraints. +AGENT_MANIFEST="${INIT_MANIFESTS}/agent-sandbox.yaml" +if [ -f "$AGENT_MANIFEST" ]; then + # Keep agent-sandbox on pod networking to avoid host port clashes. + # Point in-cluster client traffic at the API server node IP because + # kube-proxy is disabled in VM mode. + sed -i '' '/hostNetwork: true/d' "$AGENT_MANIFEST" 2>/dev/null \ + || sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" + sed -i '' '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" 2>/dev/null \ + || sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" + sed -i '' 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\ + args:\ + - -metrics-bind-address=:8082\ + env:\ + - name: KUBERNETES_SERVICE_HOST\ + value: 192.168.127.2\ + - name: KUBERNETES_SERVICE_PORT\ + value: "6443"|g' "$AGENT_MANIFEST" 2>/dev/null \ + || sed -i 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\ + args:\ + - -metrics-bind-address=:8082\ + env:\ + - name: KUBERNETES_SERVICE_HOST\ + value: 192.168.127.2\ + - name: KUBERNETES_SERVICE_PORT\ + value: "6443"|g' "$AGENT_MANIFEST" + if grep -q 'hostNetwork: true' "$AGENT_MANIFEST" \ + || grep -q 'ClusterFirstWithHostNet' "$AGENT_MANIFEST" \ + || ! grep -q 'KUBERNETES_SERVICE_HOST' "$AGENT_MANIFEST" \ + || ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then + echo "ERROR: failed to patch agent-sandbox manifest for VM networking constraints: $AGENT_MANIFEST" >&2 + exit 1 + fi +fi + +# local-storage implies local-path-provisioner, which requires CNI bridge +# networking that is unavailable in the VM kernel. +rm -f "${INIT_MANIFESTS}/local-storage.yaml" 2>/dev/null || true + # Boot k3s in a privileged container. We use a Docker volume for the # k3s data directory because kine (SQLite) creates Unix sockets that # don't work over bind mounts from macOS. After k3s is ready, we @@ -317,7 +356,7 @@ docker run -d \ -v krun-k3s-init-data:/var/lib/rancher/k3s \ "${BASE_IMAGE_TAG}" \ /usr/local/bin/k3s server \ - --disable=traefik,servicelb,metrics-server,coredns,local-path-provisioner \ + --disable=traefik,servicelb,metrics-server,coredns,local-storage \ --disable-network-policy \ --write-kubeconfig-mode=644 \ --flannel-backend=host-gw \ @@ -363,9 +402,9 @@ done # Explicitly import images into containerd's k8s.io namespace, then # tag them with the docker.io/ prefix that kubelet expects. # -# When Docker saves "navigator/server:dev", the tarball stores the -# reference as "navigator/server:dev". But kubelet normalises all -# short names to "docker.io/navigator/server:dev". Without the +# When Docker saves "openshell/gateway:dev", the tarball stores the +# reference as "openshell/gateway:dev". But kubelet normalises all +# short names to "docker.io/openshell/gateway:dev". Without the # re-tag, kubelet can't find the image and falls back to pulling. echo " Importing images into containerd..." docker exec "${INIT_CONTAINER_NAME}" sh -c ' @@ -397,8 +436,8 @@ docker exec "${INIT_CONTAINER_NAME}" sh -c ' /usr/local/bin/k3s ctr images list -q | grep -v "^sha256:" | sort # Re-tag short-name images with docker.io/ prefix so kubelet can - # find them. kubelet normalises "navigator/server:dev" to - # "docker.io/navigator/server:dev". Only re-tag images that look + # find them. kubelet normalises "openshell/gateway:dev" to + # "docker.io/openshell/gateway:dev". Only re-tag images that look # like short Docker Hub names (contain "/" but no "." before the # first "/", i.e. not registry.k8s.io/... or ghcr.io/...). echo "" @@ -544,7 +583,7 @@ done # find them without waiting for the cluster. This is the key to # skipping the namespace wait + kubectl apply on every boot. echo " Baking PKI into rootfs..." -PKI_DEST="${ROOTFS_DIR}/opt/navigator/pki" +PKI_DEST="${ROOTFS_DIR}/opt/openshell/pki" mkdir -p "${PKI_DEST}" cp "${PKI_DIR}/ca.crt" "${PKI_DEST}/ca.crt" cp "${PKI_DIR}/ca.key" "${PKI_DEST}/ca.key" @@ -654,7 +693,7 @@ echo " Images: $(ls "${IMAGES_DIR}"/*.tar.zst 2>/dev/null | wc -l | tr -d ' ' # Write sentinel file so gateway-init.sh and the host-side bootstrap # know this rootfs has pre-initialized state. -echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "${ROOTFS_DIR}/opt/navigator/.initialized" +echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "${ROOTFS_DIR}/opt/openshell/.initialized" docker rm "${INIT_CONTAINER_NAME}" 2>/dev/null || true docker volume rm krun-k3s-init-data 2>/dev/null || true @@ -668,14 +707,14 @@ if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then exit 1 fi -if [ ! -f "${ROOTFS_DIR}/opt/navigator/.initialized" ]; then +if [ ! -f "${ROOTFS_DIR}/opt/openshell/.initialized" ]; then echo "WARNING: Pre-initialization sentinel not found. Cold starts will be slow." fi echo "" echo "==> Rootfs ready at: ${ROOTFS_DIR}" echo " Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)" -echo " Pre-initialized: $(cat "${ROOTFS_DIR}/opt/navigator/.initialized" 2>/dev/null || echo 'no')" +echo " Pre-initialized: $(cat "${ROOTFS_DIR}/opt/openshell/.initialized" 2>/dev/null || echo 'no')" # Show k3s data size K3S_DATA="${ROOTFS_DIR}/var/lib/rancher/k3s" @@ -684,11 +723,11 @@ if [ -d "${K3S_DATA}" ]; then fi # Show PKI -if [ -d "${ROOTFS_DIR}/opt/navigator/pki" ]; then - echo " PKI: baked ($(ls "${ROOTFS_DIR}/opt/navigator/pki/" | wc -l | tr -d ' ') files)" +if [ -d "${ROOTFS_DIR}/opt/openshell/pki" ]; then + echo " PKI: baked ($(ls "${ROOTFS_DIR}/opt/openshell/pki/" | wc -l | tr -d ' ') files)" fi echo "" echo "Next steps:" -echo " 1. Run: ncl gateway" +echo " 1. Run: openshell gateway" echo " Expected startup time: ~3-5 seconds (pre-initialized)" diff --git a/crates/openshell-vm/scripts/gateway-init.sh b/crates/openshell-vm/scripts/gateway-init.sh index bc37541f..5e9d49d0 100755 --- a/crates/openshell-vm/scripts/gateway-init.sh +++ b/crates/openshell-vm/scripts/gateway-init.sh @@ -6,7 +6,7 @@ # # Mounts essential virtual filesystems, configures networking, then execs # k3s server. If the rootfs was pre-initialized by build-rootfs.sh (sentinel -# at /opt/navigator/.initialized), the full manifest setup is skipped and +# at /opt/openshell/.initialized), the full manifest setup is skipped and # k3s resumes from its persisted state (~3-5s startup). set -e @@ -21,7 +21,7 @@ ts() { } PRE_INITIALIZED=false -if [ -f /opt/navigator/.initialized ]; then +if [ -f /opt/openshell/.initialized ]; then PRE_INITIALIZED=true ts "pre-initialized rootfs detected (fast path)" fi @@ -148,7 +148,7 @@ find /run -name '*.sock' -delete 2>/dev/null || true # bolt metadata database (meta.db) because it contains snapshot and image # metadata that containerd needs to avoid re-extracting all image layers # on every boot. The native snapshotter on virtio-fs takes ~2 min to -# extract the navigator/server image; keeping meta.db lets containerd +# extract the openshell/gateway image; keeping meta.db lets containerd # know the snapshots already exist. # # The kine (SQLite) DB cleanup in build-rootfs.sh already removes stale @@ -175,9 +175,10 @@ ts "stale artifacts cleaned" # On pre-initialized rootfs, manifests are already in place from the # build-time k3s boot. Skip this entirely for fast startup. +K3S_MANIFESTS="/var/lib/rancher/k3s/server/manifests" +BUNDLED_MANIFESTS="/opt/openshell/manifests" + if [ "$PRE_INITIALIZED" = false ]; then - K3S_MANIFESTS="/var/lib/rancher/k3s/server/manifests" - BUNDLED_MANIFESTS="/opt/navigator/manifests" mkdir -p "$K3S_MANIFESTS" @@ -188,8 +189,8 @@ if [ "$PRE_INITIALIZED" = false ]; then cp "$manifest" "$K3S_MANIFESTS/" done - # Remove stale navigator-managed manifests from previous boots. - for existing in "$K3S_MANIFESTS"/navigator-*.yaml \ + # Remove stale OpenShell-managed manifests from previous boots. + for existing in "$K3S_MANIFESTS"/openshell-*.yaml \ "$K3S_MANIFESTS"/agent-*.yaml; do [ ! -f "$existing" ] && continue basename=$(basename "$existing") @@ -199,28 +200,66 @@ if [ "$PRE_INITIALIZED" = false ]; then done fi - # Patch the HelmChart manifest for VM deployment. - HELMCHART="$K3S_MANIFESTS/navigator-helmchart.yaml" - if [ -f "$HELMCHART" ]; then - # Use pre-loaded images — don't pull from registry. - sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" - # Clear SSH gateway placeholders (default 127.0.0.1 is correct for local VM). - sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" - sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" - fi - ts "manifests deployed" else ts "skipping manifest deploy (pre-initialized)" fi +# Patch manifests for VM deployment constraints. +HELMCHART="$K3S_MANIFESTS/openshell-helmchart.yaml" +if [ -f "$HELMCHART" ]; then + # Use pre-loaded images — don't pull from registry. + sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" + # VM bootstrap runs without CNI bridge networking. + sed -i 's|__HOST_NETWORK__|true|g' "$HELMCHART" + sed -i 's|__AUTOMOUNT_SA_TOKEN__|false|g' "$HELMCHART" + sed -i 's|__KUBECONFIG_HOST_PATH__|"/etc/rancher/k3s"|g' "$HELMCHART" + sed -i 's|__PERSISTENCE_ENABLED__|false|g' "$HELMCHART" + sed -i 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART" + # Clear SSH gateway placeholders (default 127.0.0.1 is correct for local VM). + sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART" + sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART" +fi + +AGENT_MANIFEST="$K3S_MANIFESTS/agent-sandbox.yaml" +if [ -f "$AGENT_MANIFEST" ]; then + # Keep agent-sandbox on pod networking to avoid host port clashes. + # Point in-cluster client traffic at the API server node IP because + # kube-proxy is disabled in VM mode. + sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" + sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" + if ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then + sed -i 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\ + args:\ + - -metrics-bind-address=:8082\ + env:\ + - name: KUBERNETES_SERVICE_HOST\ + value: 192.168.127.2\ + - name: KUBERNETES_SERVICE_PORT\ + value: "6443"|g' "$AGENT_MANIFEST" + else + sed -i 's|value: 127.0.0.1|value: 192.168.127.2|g' "$AGENT_MANIFEST" + fi + if grep -q 'hostNetwork: true' "$AGENT_MANIFEST" \ + || grep -q 'ClusterFirstWithHostNet' "$AGENT_MANIFEST" \ + || ! grep -q 'KUBERNETES_SERVICE_HOST' "$AGENT_MANIFEST" \ + || ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then + echo "ERROR: failed to patch agent-sandbox manifest for VM networking constraints: $AGENT_MANIFEST" >&2 + exit 1 + fi +fi + +# local-storage implies local-path-provisioner, which requires CNI bridge +# networking that is unavailable in the VM kernel. +rm -f "$K3S_MANIFESTS/local-storage.yaml" 2>/dev/null || true + # ── CNI configuration (iptables-free) ─────────────────────────────────── # The libkrun VM kernel has no netfilter/iptables support. Flannel's # masquerade rules and kube-proxy both require iptables and crash without -# it. We disable both and use a simple bridge CNI with host-local IPAM -# instead. This is sufficient for single-node pod networking. +# it. We disable both and use a simple ptp CNI with host-local IPAM +# instead. This avoids linux bridge requirements in the VM kernel. # -# ipMasq=false avoids any iptables calls in the bridge plugin. +# ipMasq=false avoids any iptables calls in the plugin. # portmap plugin removed — it requires iptables for DNAT rules. # # containerd falls back to default CNI paths: @@ -232,17 +271,14 @@ CNI_CONF_DIR="/etc/cni/net.d" CNI_BIN_DIR="/opt/cni/bin" mkdir -p "$CNI_CONF_DIR" "$CNI_BIN_DIR" -cat > "$CNI_CONF_DIR/10-bridge.conflist" << 'CNICFG' +cat > "$CNI_CONF_DIR/10-ptp.conflist" << 'CNICFG' { "cniVersion": "1.0.0", - "name": "bridge", + "name": "ptp", "plugins": [ { - "type": "bridge", - "bridge": "cni0", - "isGateway": true, + "type": "ptp", "ipMasq": false, - "hairpinMode": true, "ipam": { "type": "host-local", "ranges": [[{ "subnet": "10.42.0.0/24" }]], @@ -260,7 +296,7 @@ CNICFG # k3s extracts its tools to /var/lib/rancher/k3s/data//bin/. K3S_DATA_BIN=$(find /var/lib/rancher/k3s/data -maxdepth 2 -name bin -type d 2>/dev/null | head -1) if [ -n "$K3S_DATA_BIN" ]; then - for plugin in bridge host-local loopback bandwidth; do + for plugin in ptp host-local loopback bandwidth; do [ -f "$K3S_DATA_BIN/$plugin" ] && ln -sf "$K3S_DATA_BIN/$plugin" "$CNI_BIN_DIR/$plugin" done ts "CNI binaries linked from $K3S_DATA_BIN" @@ -272,16 +308,16 @@ fi # (pre-baked state from the Docker build used host-gw flannel). rm -f "/var/lib/rancher/k3s/agent/etc/cni/net.d/10-flannel.conflist" 2>/dev/null || true -ts "bridge CNI configured (iptables-free)" +ts "ptp CNI configured (iptables-free, no linux bridge)" # ── Start k3s ────────────────────────────────────────────────────────── # Flags tuned for fast single-node startup: # --disable=traefik,servicelb,metrics-server: skip unused controllers -# --disable=coredns,local-path-provisioner: can't run without bridge CNI -# (no CONFIG_BRIDGE in libkrunfw kernel). Only hostNetwork pods work. +# --disable=coredns,local-storage: local-storage implies local-path +# provisioner and requires bridge-based networking unavailable in VM # --disable-network-policy: skip network policy controller # --disable-kube-proxy: VM kernel has no netfilter/iptables -# --flannel-backend=none: replaced with bridge CNI above +# --flannel-backend=none: replaced with ptp CNI above # --snapshotter=native: overlayfs is incompatible with virtiofs (the # host-backed filesystem in libkrun). Operations inside overlayfs # mounts on virtiofs fail with ECONNRESET. The native snapshotter @@ -289,7 +325,7 @@ ts "bridge CNI configured (iptables-free)" ts "starting k3s server" exec /usr/local/bin/k3s server \ - --disable=traefik,servicelb,metrics-server,coredns,local-path-provisioner \ + --disable=traefik,servicelb,metrics-server,coredns,local-storage \ --disable-network-policy \ --disable-kube-proxy \ --write-kubeconfig-mode=644 \ diff --git a/crates/openshell-vm/src/ffi.rs b/crates/openshell-vm/src/ffi.rs index 06fa2004..c53cc47d 100644 --- a/crates/openshell-vm/src/ffi.rs +++ b/crates/openshell-vm/src/ffi.rs @@ -7,7 +7,8 @@ //! `gateway` binary loads `libkrun` from the staged `gateway.runtime/` //! sidecar bundle on first use. -use std::path::PathBuf; +use std::fs; +use std::path::{Path, PathBuf}; use std::sync::OnceLock; use libc::c_char; @@ -86,6 +87,9 @@ pub fn libkrun() -> Result<&'static LibKrun, VmError> { impl LibKrun { fn load() -> Result { let path = runtime_libkrun_path()?; + preload_runtime_support_libraries(path.parent().ok_or_else(|| { + VmError::HostSetup(format!("libkrun has no parent dir: {}", path.display())) + })?)?; let library = Box::leak(Box::new(unsafe { Library::new(&path).map_err(|e| { VmError::HostSetup(format!("load libkrun from {}: {e}", path.display())) @@ -119,6 +123,60 @@ fn runtime_libkrun_path() -> Result { Ok(crate::configured_runtime_dir()?.join(required_runtime_lib_name())) } +fn preload_runtime_support_libraries(runtime_dir: &Path) -> Result<(), VmError> { + let entries = fs::read_dir(runtime_dir) + .map_err(|e| VmError::HostSetup(format!("read {}: {e}", runtime_dir.display())))?; + + let mut support_libs: Vec = entries + .filter_map(Result::ok) + .map(|entry| entry.path()) + .filter(|path| { + path.file_name() + .and_then(|name| name.to_str()) + .map(|name| { + #[cfg(target_os = "macos")] + { + name.starts_with("libkrunfw") && name.ends_with(".dylib") + } + #[cfg(not(target_os = "macos"))] + { + name.starts_with("libkrunfw") && name.contains(".so") + } + }) + .unwrap_or(false) + }) + .collect(); + + support_libs.sort(); + + for path in support_libs { + let path_cstr = std::ffi::CString::new(path.to_string_lossy().as_bytes()).map_err(|e| { + VmError::HostSetup(format!( + "invalid support library path {}: {e}", + path.display() + )) + })?; + let handle = + unsafe { libc::dlopen(path_cstr.as_ptr(), libc::RTLD_NOW | libc::RTLD_GLOBAL) }; + if handle.is_null() { + let error = unsafe { + let err = libc::dlerror(); + if err.is_null() { + "unknown dlopen error".to_string() + } else { + std::ffi::CStr::from_ptr(err).to_string_lossy().into_owned() + } + }; + return Err(VmError::HostSetup(format!( + "preload runtime support library {}: {error}", + path.display() + ))); + } + } + + Ok(()) +} + fn required_runtime_lib_name() -> &'static str { #[cfg(target_os = "macos")] { @@ -133,7 +191,7 @@ fn required_runtime_lib_name() -> &'static str { fn load_symbol( library: &'static Library, symbol: &[u8], - path: &std::path::Path, + path: &Path, ) -> Result { let loaded = unsafe { library.get::(symbol).map_err(|e| { diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 4105aadc..46076ceb 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -145,9 +145,9 @@ impl VmConfig { /// Default gateway configuration: boots k3s server inside the VM. /// /// Runs `/srv/gateway-init.sh` which mounts essential filesystems, - /// deploys the `NemoClaw` helm chart, and execs `k3s server`. - /// Exposes the Kubernetes API on port 6443 and the `NemoClaw` - /// gateway (navigator server `NodePort`) on port 30051. + /// deploys the OpenShell helm chart, and execs `k3s server`. + /// Exposes the Kubernetes API on port 6443 and the OpenShell + /// gateway (`NodePort`) on port 30051. pub fn gateway(rootfs: PathBuf) -> Self { Self { rootfs, @@ -299,6 +299,26 @@ pub fn default_runtime_gvproxy_path() -> PathBuf { .join("gvproxy") } +#[cfg(target_os = "macos")] +fn configure_runtime_loader_env(runtime_dir: &Path) -> Result<(), VmError> { + let existing = std::env::var_os("DYLD_FALLBACK_LIBRARY_PATH"); + let mut paths = vec![runtime_dir.to_path_buf()]; + if let Some(existing) = existing { + paths.extend(std::env::split_paths(&existing)); + } + let joined = std::env::join_paths(paths) + .map_err(|e| VmError::HostSetup(format!("join DYLD_FALLBACK_LIBRARY_PATH: {e}")))?; + unsafe { + std::env::set_var("DYLD_FALLBACK_LIBRARY_PATH", joined); + } + Ok(()) +} + +#[cfg(not(target_os = "macos"))] +fn configure_runtime_loader_env(_runtime_dir: &Path) -> Result<(), VmError> { + Ok(()) +} + fn raise_nofile_limit() { #[cfg(unix)] unsafe { @@ -595,7 +615,14 @@ pub fn launch(config: &VmConfig) -> Result { // The runtime must already be staged as a sidecar bundle next to the // binary (or explicitly pointed to via OPENSHELL_VM_RUNTIME_DIR). - resolve_runtime_bundle()?; + let runtime_gvproxy = resolve_runtime_bundle()?; + let runtime_dir = runtime_gvproxy.parent().ok_or_else(|| { + VmError::HostSetup(format!( + "runtime bundle file has no parent directory: {}", + runtime_gvproxy.display() + )) + })?; + configure_runtime_loader_env(runtime_dir)?; raise_nofile_limit(); // ── Configure the microVM ────────────────────────────────────── @@ -892,7 +919,7 @@ pub fn launch(config: &VmConfig) -> Result { } } - // Bootstrap the NemoClaw control plane: generate PKI, + // Bootstrap the OpenShell control plane: generate PKI, // create TLS secrets, and store cluster metadata so CLI // clients and e2e tests can connect. // @@ -900,7 +927,9 @@ pub fn launch(config: &VmConfig) -> Result { // this skips the namespace wait and kubectl apply entirely. if let Err(e) = bootstrap_gateway(&dest, &config.rootfs) { eprintln!("Bootstrap failed: {e}"); - eprintln!(" The VM is running but NemoClaw may not be fully operational."); + eprintln!( + " The VM is running but OpenShell may not be fully operational." + ); } } else { eprintln!(" kubeconfig not found after 90s (k3s may still be starting)"); @@ -917,7 +946,7 @@ pub fn launch(config: &VmConfig) -> Result { recover_stale_pods(&kubeconfig_dest); // Wait for the gRPC service to be reachable before - // declaring "Ready". The navigator pod needs a few + // declaring "Ready". The openshell pod needs a few // seconds after k3s starts to bind its port. wait_for_gateway_service(); } @@ -970,10 +999,10 @@ pub fn launch(config: &VmConfig) -> Result { /// Cluster name used for metadata and mTLS storage. const GATEWAY_CLUSTER_NAME: &str = "gateway"; -/// Gateway port: the host port mapped to the navigator `NodePort` (30051). +/// Gateway port: the host port mapped to the OpenShell `NodePort` (30051). const GATEWAY_PORT: u16 = 30051; -/// Bootstrap the `NemoClaw` control plane after k3s is ready. +/// Bootstrap the OpenShell control plane after k3s is ready. /// /// Three paths, fastest first: /// @@ -982,7 +1011,7 @@ const GATEWAY_PORT: u16 = 30051; /// interaction at all. Completes in <50ms. /// /// 2. **Warm boot**: host-side metadata + mTLS certs survive across VM -/// restarts. Waits for the navigator namespace, then returns. +/// restarts. Waits for the openshell namespace, then returns. /// /// 3. **Cold boot**: generates fresh PKI, waits for namespace, applies /// secrets via kubectl, stores everything on the host. @@ -1006,10 +1035,10 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { // ── Path 1: Pre-baked PKI from build-rootfs.sh ───────────────── // // If the rootfs was pre-initialized, PKI files are baked into - // /opt/navigator/pki/. Read them directly — no cluster interaction + // /opt/openshell/pki/. Read them directly — no cluster interaction // needed. The TLS secrets already exist inside the cluster from // the build-time k3s boot. - let pki_dir = rootfs.join("opt/navigator/pki"); + let pki_dir = rootfs.join("opt/openshell/pki"); if pki_dir.join("ca.crt").is_file() { eprintln!("Pre-baked PKI detected — fast bootstrap"); @@ -1043,7 +1072,7 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { ); eprintln!(" Cluster: {GATEWAY_CLUSTER_NAME}"); eprintln!(" Gateway: https://127.0.0.1:{GATEWAY_PORT}"); - eprintln!(" mTLS: ~/.config/nemoclaw/clusters/{GATEWAY_CLUSTER_NAME}/mtls/"); + eprintln!(" mTLS: ~/.config/openshell/gateways/{GATEWAY_CLUSTER_NAME}/mtls/"); return Ok(()); } @@ -1057,7 +1086,7 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { if is_warm_boot() { eprintln!("Warm boot detected — reusing existing PKI and metadata."); - eprintln!("Waiting for navigator namespace..."); + eprintln!("Waiting for openshell namespace..."); wait_for_namespace(kc)?; eprintln!( "Warm boot ready [{:.1}s]", @@ -1065,7 +1094,7 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { ); eprintln!(" Cluster: {GATEWAY_CLUSTER_NAME}"); eprintln!(" Gateway: https://127.0.0.1:{GATEWAY_PORT}"); - eprintln!(" mTLS: ~/.config/nemoclaw/clusters/{GATEWAY_CLUSTER_NAME}/mtls/"); + eprintln!(" mTLS: ~/.config/openshell/gateways/{GATEWAY_CLUSTER_NAME}/mtls/"); return Ok(()); } @@ -1078,7 +1107,7 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { .map_err(|e| VmError::Bootstrap(format!("failed to store cluster metadata: {e}")))?; let ns_start = Instant::now(); - eprintln!("Waiting for navigator namespace..."); + eprintln!("Waiting for openshell namespace..."); wait_for_namespace(kc)?; eprintln!("Namespace ready [{:.1}s]", ns_start.elapsed().as_secs_f64()); @@ -1097,7 +1126,7 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { ); eprintln!(" Cluster: {GATEWAY_CLUSTER_NAME}"); eprintln!(" Gateway: https://127.0.0.1:{GATEWAY_PORT}"); - eprintln!(" mTLS: ~/.config/nemoclaw/clusters/{GATEWAY_CLUSTER_NAME}/mtls/"); + eprintln!(" mTLS: ~/.config/openshell/gateways/{GATEWAY_CLUSTER_NAME}/mtls/"); Ok(()) } @@ -1105,8 +1134,8 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { /// Check whether a previous bootstrap left valid state on disk. /// /// A warm boot is detected when both: -/// - Cluster metadata exists: `$XDG_CONFIG_HOME/nemoclaw/clusters/gateway_metadata.json` -/// - mTLS certs exist: `$XDG_CONFIG_HOME/nemoclaw/clusters/gateway/mtls/{ca.crt,tls.crt,tls.key}` +/// - Cluster metadata exists: `$XDG_CONFIG_HOME/openshell/gateways/gateway/metadata.json` +/// - mTLS certs exist: `$XDG_CONFIG_HOME/openshell/gateways/gateway/mtls/{ca.crt,tls.crt,tls.key}` /// /// When true, the host-side bootstrap (PKI generation, kubectl apply, metadata /// storage) can be skipped because the virtio-fs rootfs persists k3s state @@ -1120,11 +1149,11 @@ fn is_warm_boot() -> bool { std::env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| format!("{home}/.config")); let config_dir = PathBuf::from(&config_base) - .join("nemoclaw") - .join("clusters"); + .join("openshell") + .join("gateways"); // Check metadata file. - let metadata_path = config_dir.join(format!("{GATEWAY_CLUSTER_NAME}_metadata.json")); + let metadata_path = config_dir.join(GATEWAY_CLUSTER_NAME).join("metadata.json"); if !metadata_path.is_file() { return false; } @@ -1142,7 +1171,7 @@ fn is_warm_boot() -> bool { true } -/// Wait for the navigator pod to become Ready inside the k3s cluster +/// Wait for the openshell pod to become Ready inside the k3s cluster /// and verify the gRPC service is reachable from the host. /// /// Stale pod/lease records are cleaned from the kine DB at build time @@ -1173,10 +1202,10 @@ fn wait_for_gateway_service() { .args(["--kubeconfig", &kc]) .args([ "-n", - "navigator", + "openshell", "get", "pod", - "navigator-0", + "openshell-0", "-o", "jsonpath={.status.conditions[?(@.type==\"Ready\")].status}", ]) @@ -1380,7 +1409,7 @@ fn wait_for_namespace(kubeconfig: &str) -> Result<(), VmError> { } } -/// Apply the three TLS K8s secrets required by the `NemoClaw` server. +/// Apply the three TLS K8s secrets required by the OpenShell server. /// /// Uses `kubectl apply -f -` on the host, piping JSON manifests via stdin. fn apply_tls_secrets( @@ -1391,7 +1420,7 @@ fn apply_tls_secrets( use base64::engine::general_purpose::STANDARD; let secrets = [ - // 1. navigator-server-tls (kubernetes.io/tls) + // 1. openshell-server-tls (kubernetes.io/tls) serde_json::json!({ "apiVersion": "v1", "kind": "Secret", @@ -1405,7 +1434,7 @@ fn apply_tls_secrets( "tls.key": STANDARD.encode(&bundle.server_key_pem) } }), - // 2. navigator-server-client-ca (Opaque) + // 2. openshell-server-client-ca (Opaque) serde_json::json!({ "apiVersion": "v1", "kind": "Secret", @@ -1418,7 +1447,7 @@ fn apply_tls_secrets( "ca.crt": STANDARD.encode(&bundle.ca_cert_pem) } }), - // 3. navigator-client-tls (Opaque) — shared by CLI and sandbox pods + // 3. openshell-client-tls (Opaque) — shared by CLI and sandbox pods serde_json::json!({ "apiVersion": "v1", "kind": "Secret", diff --git a/crates/openshell-vm/src/main.rs b/crates/openshell-vm/src/main.rs index bc1b5b10..6e84c489 100644 --- a/crates/openshell-vm/src/main.rs +++ b/crates/openshell-vm/src/main.rs @@ -16,9 +16,11 @@ //! codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - target/debug/gateway //! ``` -use std::path::PathBuf; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; -use clap::{Parser, ValueHint}; +use clap::{Parser, Subcommand, ValueHint}; /// Boot the OpenShell gateway microVM. /// @@ -27,6 +29,31 @@ use clap::{Parser, ValueHint}; #[derive(Parser)] #[command(name = "gateway", version)] struct Cli { + #[command(subcommand)] + command: Option, + + #[command(flatten)] + run: RunArgs, +} + +#[derive(Subcommand)] +enum GatewayCommand { + /// Run a command with the gateway kubeconfig pre-configured. + /// + /// Examples: + /// gateway exec -- kubectl get pods -A + /// gateway exec -- kubectl -n openshell logs statefulset/openshell + /// gateway exec -- sh + Exec { + /// Command and arguments to run on the host with KUBECONFIG pointing + /// at the VM-backed gateway cluster. + #[arg(trailing_var_arg = true, required = true)] + command: Vec, + }, +} + +#[derive(clap::Args)] +struct RunArgs { /// Path to the rootfs directory (aarch64 Linux). /// Defaults to `~/.local/share/openshell/gateway/rootfs`. #[arg(long, value_hint = ValueHint::DirPath)] @@ -89,6 +116,14 @@ fn main() { } fn run(cli: Cli) -> Result> { + if let Some(command) = cli.command { + return match command { + GatewayCommand::Exec { command } => exec_with_gateway_kubeconfig(&command), + }; + } + + let cli = cli.run; + let net_backend = match cli.net.as_str() { "tsi" => openshell_vm::NetBackend::Tsi, "none" => openshell_vm::NetBackend::None, @@ -140,3 +175,117 @@ fn run(cli: Cli) -> Result> { Ok(openshell_vm::launch(&config)?) } + +fn gateway_kubeconfig_path() -> Result> { + let home = std::env::var("HOME")?; + Ok(PathBuf::from(home).join(".kube").join("gateway.yaml")) +} + +fn workspace_root() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .ancestors() + .nth(2) + .expect("workspace root") + .to_path_buf() +} + +fn openshell_kubectl_wrapper_path() -> PathBuf { + workspace_root().join("scripts/bin/kubectl") +} + +fn is_openshell_kubectl_wrapper(path: &Path) -> bool { + path.canonicalize().ok() == openshell_kubectl_wrapper_path().canonicalize().ok() +} + +fn filtered_path() -> OsString { + let wrapper_dir = openshell_kubectl_wrapper_path() + .parent() + .map(Path::to_path_buf) + .unwrap_or_default(); + let entries = std::env::var_os("PATH") + .map(|path| { + std::env::split_paths(&path) + .filter(|entry| entry != &wrapper_dir) + .collect::>() + }) + .unwrap_or_default(); + + std::env::join_paths(entries).unwrap_or_else(|_| OsString::from("/usr/bin:/bin")) +} + +fn resolve_kubectl_binary() -> Result> { + if let Some(path) = std::env::var_os("OPENSHELL_GATEWAY_KUBECTL") { + return Ok(PathBuf::from(path)); + } + + let path = std::env::var_os("PATH").unwrap_or_default(); + for dir in std::env::split_paths(&path) { + let candidate = dir.join("kubectl"); + if candidate.is_file() && !is_openshell_kubectl_wrapper(&candidate) { + return Ok(candidate); + } + } + + Err( + "could not find a real kubectl binary on PATH; install kubectl or set OPENSHELL_GATEWAY_KUBECTL" + .into(), + ) +} + +fn configure_clean_env(cmd: &mut Command, kubeconfig: &Path) { + cmd.env_clear().env("KUBECONFIG", kubeconfig); + + for key in [ + "HOME", + "TERM", + "COLORTERM", + "NO_COLOR", + "LANG", + "LC_ALL", + "LC_CTYPE", + "TMPDIR", + ] { + if let Some(value) = std::env::var_os(key) { + cmd.env(key, value); + } + } + + cmd.env("PATH", filtered_path()); +} + +fn exec_with_gateway_kubeconfig(command: &[String]) -> Result> { + let kubeconfig = gateway_kubeconfig_path()?; + if !kubeconfig.is_file() { + return Err(format!( + "gateway kubeconfig not found: {}\nStart the VM first with `gateway` or `mise run vm`.", + kubeconfig.display() + ) + .into()); + } + + let program = &command[0]; + let mut cmd = if program == "kubectl" { + let mut kubectl = Command::new(resolve_kubectl_binary()?); + let has_kubeconfig = command + .iter() + .skip(1) + .any(|arg| arg == "--kubeconfig" || arg.starts_with("--kubeconfig=")); + if !has_kubeconfig { + kubectl.arg("--kubeconfig").arg(&kubeconfig); + } + kubectl.args(&command[1..]); + kubectl + } else { + let mut other = Command::new(program); + other.args(&command[1..]); + other + }; + + cmd.stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()); + configure_clean_env(&mut cmd, &kubeconfig); + + let status = cmd.status()?; + Ok(status.code().unwrap_or(1)) +} diff --git a/crates/openshell-vm/tests/gateway_integration.rs b/crates/openshell-vm/tests/gateway_integration.rs index 528e1eaa..03419021 100644 --- a/crates/openshell-vm/tests/gateway_integration.rs +++ b/crates/openshell-vm/tests/gateway_integration.rs @@ -6,7 +6,7 @@ //! These tests require: //! - libkrun installed (e.g. `brew tap slp/krun && brew install libkrun`) //! - macOS ARM64 with Apple Hypervisor.framework -//! - A pre-built rootfs at `~/.local/share/nemoclaw/gateway/rootfs` +//! - A pre-built rootfs at `~/.local/share/openshell/gateway/rootfs` //! //! All tests are `#[ignore]` — run them explicitly: //! @@ -58,7 +58,7 @@ fn assert_runtime_bundle_staged() { // ── Tests ────────────────────────────────────────────────────────────── -/// Boot the full NemoClaw gateway and verify the gRPC service becomes +/// Boot the full OpenShell gateway and verify the gRPC service becomes /// reachable on port 30051. #[test] #[ignore] // requires libkrun + rootfs @@ -71,7 +71,7 @@ fn gateway_boots_and_service_becomes_reachable() { let mut child = cmd.spawn().expect("failed to start gateway"); - // Poll for the navigator gRPC service. + // Poll for the OpenShell gRPC service. let addr: SocketAddr = ([127, 0, 0, 1], 30051).into(); let timeout = Duration::from_secs(180); let start = Instant::now(); diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index 4af63f23..2ff42a49 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -25,7 +25,7 @@ serviceAccount: # native-snapshotter + virtiofs incompatibility on sandbox re-creation. automountServiceAccountToken: true -# When automountServiceAccountToken is false, the navigator server needs +# When automountServiceAccountToken is false, the OpenShell gateway needs # a kubeconfig to reach the API server. Point this to the directory # containing the k3s kubeconfig (k3s.yaml). Only used when # automountServiceAccountToken is false. @@ -69,7 +69,7 @@ probes: resources: {} -# Persistent storage for the navigator database. When disabled, an +# Persistent storage for the OpenShell database. When disabled, an # emptyDir volume is used instead of a PVC. This is useful in microVM # environments where overlayfs-on-virtiofs doesn't support PVC mounts # reliably. diff --git a/tasks/scripts/ensure-vm-rootfs.sh b/tasks/scripts/ensure-vm-rootfs.sh new file mode 100755 index 00000000..812e0487 --- /dev/null +++ b/tasks/scripts/ensure-vm-rootfs.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ROOTFS_DIR="${XDG_DATA_HOME:-${HOME}/.local/share}/openshell/gateway/rootfs" + +if [ "${OPENSHELL_VM_FORCE_ROOTFS_REBUILD:-}" != "1" ] \ + && [ -x "${ROOTFS_DIR}/usr/local/bin/k3s" ] \ + && [ -f "${ROOTFS_DIR}/opt/openshell/.initialized" ]; then + echo "using existing gateway rootfs at ${ROOTFS_DIR}" + exit 0 +fi + +exec crates/openshell-vm/scripts/build-rootfs.sh diff --git a/tasks/scripts/run-vm.sh b/tasks/scripts/run-vm.sh new file mode 100755 index 00000000..229fd91c --- /dev/null +++ b/tasks/scripts/run-vm.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +RUNTIME_DIR="${ROOT}/target/debug/gateway.runtime" +GATEWAY_BIN="${ROOT}/target/debug/gateway" + +if [ "$(uname -s)" = "Darwin" ]; then + export DYLD_FALLBACK_LIBRARY_PATH="${RUNTIME_DIR}${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}" +fi + +exec "${GATEWAY_BIN}" "$@" diff --git a/tasks/vm.toml b/tasks/vm.toml index 17940b56..c8d4ee5b 100644 --- a/tasks/vm.toml +++ b/tasks/vm.toml @@ -5,11 +5,26 @@ [vm] description = "Build and run the standalone gateway microVM" -depends = ["vm:build", "vm:bundle-runtime", "vm:rootfs", "vm:codesign"] -run = "target/debug/gateway" +run = [ + "mise run vm:build:binary", + "tasks/scripts/bundle-vm-runtime.sh", + "tasks/scripts/ensure-vm-rootfs.sh", + "tasks/scripts/codesign-gateway.sh", + "tasks/scripts/run-vm.sh", +] hide = false ["vm:build"] +description = "Force a fresh gateway rebuild, including the rootfs" +run = [ + "mise run vm:build:binary", + "tasks/scripts/bundle-vm-runtime.sh", + "OPENSHELL_VM_FORCE_ROOTFS_REBUILD=1 tasks/scripts/ensure-vm-rootfs.sh", + "tasks/scripts/codesign-gateway.sh", +] +hide = false + +["vm:build:binary"] description = "Build the standalone gateway binary" run = "cargo build -p openshell-vm" hide = true @@ -21,11 +36,12 @@ hide = true ["vm:rootfs"] description = "Build the default gateway rootfs if needed" -run = "crates/openshell-vm/scripts/build-rootfs.sh" +run = "tasks/scripts/ensure-vm-rootfs.sh" hide = true ["vm:codesign"] description = "Codesign the gateway binary for Hypervisor.framework access on macOS" +depends = ["vm:build:binary"] run = "tasks/scripts/codesign-gateway.sh" hide = true From 47528ac7d17d81c8d7a262763bb152dc32faad17 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Mon, 23 Mar 2026 22:08:30 -0700 Subject: [PATCH 10/14] feat(vm): add custom libkrunfw runtime with bridge CNI and netfilter support Enable Kubernetes-compatible networking in the gateway microVM by building a custom libkrunfw kernel with CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_CONNTRACK, CONFIG_IP_NF_IPTABLES, and CONFIG_VETH compiled in. Key changes: - Docker-based kernel build pipeline for macOS (build-custom-libkrunfw.sh) - Kernel config fragment enabling bridge/netfilter/conntrack/NAT/IPVS - Feature-flagged bridge CNI with auto-detection fallback to legacy ptp - Runtime provenance tracking (SHA-256, build metadata, manifest validation) - VM capability checker and host-side verification matrix scripts - Mise tasks: vm:build-custom-runtime, vm:verify, vm:check-capabilities - Architecture and operator documentation --- architecture/custom-vm-runtime.md | 135 ++++++ crates/openshell-vm/runtime/README.md | 183 ++++++++ .../runtime/build-custom-libkrunfw.sh | 406 ++++++++++++++++++ .../runtime/kernel/bridge-cni.config | 92 ++++ crates/openshell-vm/scripts/build-rootfs.sh | 4 + .../scripts/check-vm-capabilities.sh | 234 ++++++++++ crates/openshell-vm/scripts/gateway-init.sh | 256 ++++++++--- crates/openshell-vm/scripts/verify-vm.sh | 226 ++++++++++ crates/openshell-vm/src/ffi.rs | 116 ++++- crates/openshell-vm/src/lib.rs | 88 ++++ tasks/scripts/bundle-vm-runtime.sh | 17 + tasks/vm.toml | 15 + 12 files changed, 1701 insertions(+), 71 deletions(-) create mode 100644 architecture/custom-vm-runtime.md create mode 100644 crates/openshell-vm/runtime/README.md create mode 100755 crates/openshell-vm/runtime/build-custom-libkrunfw.sh create mode 100644 crates/openshell-vm/runtime/kernel/bridge-cni.config create mode 100755 crates/openshell-vm/scripts/check-vm-capabilities.sh create mode 100755 crates/openshell-vm/scripts/verify-vm.sh diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md new file mode 100644 index 00000000..0dbdaf01 --- /dev/null +++ b/architecture/custom-vm-runtime.md @@ -0,0 +1,135 @@ +# Custom libkrunfw VM Runtime + +## Overview + +The OpenShell gateway VM uses [libkrun](https://github.com/containers/libkrun) to boot a +lightweight microVM with Apple Hypervisor.framework (macOS) or KVM (Linux). The kernel +is embedded inside `libkrunfw`, a companion library that packages a pre-built Linux kernel. + +The stock `libkrunfw` from Homebrew ships a minimal kernel without bridge, netfilter, or +conntrack support. This limits Kubernetes networking to a point-to-point (ptp) CNI +configuration without kube-proxy or service VIPs. + +The custom libkrunfw runtime adds bridge CNI, iptables/nftables, and conntrack support to +the VM kernel, enabling standard Kubernetes networking. + +## Architecture + +``` +Host (macOS/Linux) +├── gateway binary +│ ├── Loads libkrun.dylib (VMM) +│ ├── Preloads libkrunfw.dylib (kernel) +│ └── Logs runtime provenance +├── gateway.runtime/ (sidecar bundle) +│ ├── libkrun.dylib +│ ├── libkrunfw.dylib (stock or custom) +│ ├── gvproxy +│ ├── manifest.json +│ └── provenance.json (custom only) +└── gvproxy (networking) + +Guest VM +├── gateway-init.sh (PID 1) +│ ├── Detects kernel capabilities +│ ├── Selects network profile: bridge | legacy-vm-net +│ ├── Configures CNI +│ └── Execs k3s server +└── check-vm-capabilities.sh (diagnostics) +``` + +## Network Profiles + +The VM init script auto-detects kernel capabilities and selects the appropriate +networking profile. This can be overridden via `OPENSHELL_VM_NET_PROFILE`. + +### Bridge Profile (custom runtime) + +- CNI: bridge plugin with `cni0` interface +- IP masquerade: enabled (iptables) +- kube-proxy: enabled +- Service VIPs: functional (ClusterIP, NodePort) +- hostNetwork workarounds: removed + +### Legacy VM Net Profile (stock runtime) + +- CNI: ptp plugin (point-to-point) +- IP masquerade: disabled (no iptables) +- kube-proxy: disabled +- Service VIPs: not functional (direct IP routing only) +- hostNetwork workarounds: required for some pods + +## Runtime Provenance + +At boot, the gateway binary logs provenance metadata about the loaded runtime bundle: + +- Library paths and SHA-256 hashes +- Whether the runtime is custom-built or stock +- For custom runtimes: libkrunfw commit, kernel version, build timestamp + +This information is sourced from `provenance.json` (generated by the build script) +and makes it straightforward to correlate VM behavior with a specific runtime artifact. + +## Build Pipeline + +``` +crates/openshell-vm/runtime/ +├── build-custom-libkrunfw.sh # Clones libkrunfw, applies config, builds +├── kernel/ +│ └── bridge-cni.config # Kernel config fragment +└── README.md # Operator documentation + +Output: target/custom-runtime/ +├── libkrunfw.dylib # Custom library +├── provenance.json # Build metadata +├── bridge-cni.config # Config fragment used +└── kernel.config # Full kernel .config +``` + +## Kernel Config Fragment + +The `bridge-cni.config` fragment enables these kernel features on top of the stock +libkrunfw kernel: + +| Feature | Config | Purpose | +|---------|--------|---------| +| Bridge device | `CONFIG_BRIDGE` | cni0 bridge for pod networking | +| Bridge netfilter | `CONFIG_BRIDGE_NETFILTER` | kube-proxy visibility into bridge traffic | +| Netfilter | `CONFIG_NETFILTER` | iptables/nftables framework | +| Connection tracking | `CONFIG_NF_CONNTRACK` | NAT state tracking | +| NAT | `CONFIG_NF_NAT` | Service VIP DNAT/SNAT | +| iptables | `CONFIG_IP_NF_IPTABLES` | kube-proxy iptables mode | +| nftables | `CONFIG_NF_TABLES` | kube-proxy nft mode (future) | +| veth | `CONFIG_VETH` | Pod network namespace pairs | +| IPVS | `CONFIG_IP_VS` | kube-proxy IPVS mode (optional) | + +## Verification + +Two verification tools are provided: + +1. **Capability checker** (`check-vm-capabilities.sh`): Runs inside the VM to verify + kernel capabilities. Produces pass/fail results for each required feature. + +2. **Verification matrix** (`verify-vm.sh`): Runs from the host against a running VM. + Checks node health, pod status, networking, service reachability, and event logs. + +## Rollout Strategy + +1. Custom runtime support is opt-in via `OPENSHELL_VM_RUNTIME_SOURCE_DIR`. +2. Auto-detection selects the correct network profile at boot. +3. The stock runtime path (`legacy-vm-net`) remains the default until the custom + runtime is proven stable. +4. Rollback: unset the env var and re-bundle with stock libraries. + +## Related Files + +| File | Purpose | +|------|---------| +| `crates/openshell-vm/src/ffi.rs` | Runtime loading, provenance capture | +| `crates/openshell-vm/src/lib.rs` | VM launch, provenance logging | +| `crates/openshell-vm/scripts/gateway-init.sh` | Guest init, network profile selection | +| `crates/openshell-vm/scripts/check-vm-capabilities.sh` | Kernel capability checker | +| `crates/openshell-vm/scripts/verify-vm.sh` | Host-side verification matrix | +| `crates/openshell-vm/runtime/` | Build pipeline and kernel config | +| `tasks/scripts/bundle-vm-runtime.sh` | Runtime bundling (stock + custom) | +| `tasks/vm.toml` | Mise task definitions | diff --git a/crates/openshell-vm/runtime/README.md b/crates/openshell-vm/runtime/README.md new file mode 100644 index 00000000..891d1a23 --- /dev/null +++ b/crates/openshell-vm/runtime/README.md @@ -0,0 +1,183 @@ +# Custom libkrunfw Runtime + +This directory contains the build infrastructure for a custom `libkrunfw` runtime +that enables bridge CNI and netfilter support in the OpenShell gateway VM. + +## Why + +The stock `libkrunfw` (from Homebrew) ships a kernel without bridge, netfilter, +or conntrack support. This means the VM cannot: + +- Create `cni0` bridge interfaces (required by the bridge CNI plugin) +- Run kube-proxy (requires iptables/nftables) +- Route service VIP traffic (requires NAT/conntrack) + +The custom runtime builds libkrunfw with an additional kernel config fragment +that enables these features. + +## Directory Structure + +``` +runtime/ + build-custom-libkrunfw.sh # Build script for custom libkrunfw + kernel/ + bridge-cni.config # Kernel config fragment (bridge + netfilter) +``` + +## Building + +### Prerequisites + +- Rust toolchain +- make, git, curl +- On macOS: Xcode command line tools and cross-compilation tools for aarch64 + +### Quick Build + +```bash +# Build custom libkrunfw (clones libkrunfw repo, applies config, builds) +./crates/openshell-vm/runtime/build-custom-libkrunfw.sh + +# Or via mise task: +mise run vm:build-custom-runtime +``` + +### Output + +Build artifacts are placed in `target/custom-runtime/`: + +``` +target/custom-runtime/ + libkrunfw.dylib # The custom library + libkrunfw..dylib # Version-suffixed copy + provenance.json # Build metadata (commit, hash, timestamp) + bridge-cni.config # The config fragment used + kernel.config # Full kernel .config (for debugging) +``` + +### Using the Custom Runtime + +```bash +# Point the bundle script at the custom build: +export OPENSHELL_VM_RUNTIME_SOURCE_DIR=target/custom-runtime +mise run vm:bundle-runtime + +# Then boot the VM as usual: +mise run vm +``` + +## Network Profiles + +The VM init script (`gateway-init.sh`) auto-detects the kernel capabilities +and selects the appropriate networking profile: + +| Profile | Kernel | CNI | kube-proxy | Service VIPs | +|---------|--------|-----|------------|--------------| +| `bridge` | Custom (bridge+netfilter) | bridge CNI (`cni0`) | Enabled | Yes | +| `legacy-vm-net` | Stock (no netfilter) | ptp CNI | Disabled | No (direct IP) | + +To force a specific profile: + +```bash +# Inside the VM (set in gateway-init.sh env): +export OPENSHELL_VM_NET_PROFILE=bridge # Force bridge CNI +export OPENSHELL_VM_NET_PROFILE=legacy-vm-net # Force legacy ptp CNI +``` + +## Runtime Provenance + +At VM boot, the gateway binary logs provenance information about the loaded +runtime: + +``` +runtime: /path/to/gateway.runtime + libkrunfw: libkrunfw.dylib + sha256: a1b2c3d4e5f6... + type: custom (OpenShell-built) + libkrunfw-commit: abc1234 + kernel-version: 6.6.30 + build-timestamp: 2026-03-23T10:00:00Z +``` + +For stock runtimes: +``` +runtime: /path/to/gateway.runtime + libkrunfw: libkrunfw.dylib + sha256: f6e5d4c3b2a1... + type: stock (system/homebrew) +``` + +## Verification + +### Capability Check (inside VM) + +```bash +# Run inside the VM to verify kernel capabilities: +/srv/check-vm-capabilities.sh + +# JSON output for CI: +/srv/check-vm-capabilities.sh --json +``` + +### Full Verification Matrix + +```bash +# Run from the host with a running VM: +./crates/openshell-vm/scripts/verify-vm.sh + +# Or via mise task: +mise run vm:verify +``` + +## Rollback + +To revert to the stock runtime: + +```bash +# Unset the custom runtime source: +unset OPENSHELL_VM_RUNTIME_SOURCE_DIR + +# Re-bundle with stock libraries: +mise run vm:bundle-runtime + +# Boot — will auto-detect legacy-vm-net profile: +mise run vm +``` + +## Troubleshooting + +### "FailedCreatePodSandBox" bridge errors + +The kernel does not have bridge support. Verify: +```bash +# Inside VM: +ip link add test0 type bridge && echo "bridge OK" && ip link del test0 +``` + +If this fails, you are running the stock runtime. Build and use the custom one. + +### kube-proxy CrashLoopBackOff + +The kernel does not have netfilter support. Verify: +```bash +# Inside VM: +iptables -L -n +``` + +If this fails with "iptables not found" or "modprobe: can't change directory", +the kernel lacks CONFIG_NETFILTER. Use the custom runtime. + +### Runtime mismatch after upgrade + +If libkrunfw is updated (e.g., via `brew upgrade`), the stock runtime may +change. Check provenance: +```bash +# Look for provenance info in VM boot output +grep "runtime:" ~/.local/share/openshell/gateway/console.log +``` + +Re-build the custom runtime if needed: +```bash +mise run vm:build-custom-runtime +mise run vm:bundle-runtime +``` diff --git a/crates/openshell-vm/runtime/build-custom-libkrunfw.sh b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh new file mode 100755 index 00000000..ab2aa1bf --- /dev/null +++ b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh @@ -0,0 +1,406 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build a custom libkrunfw with bridge/netfilter kernel support. +# +# This script clones libkrunfw, applies the OpenShell kernel config +# fragment (bridge CNI, iptables, conntrack), builds the library, and +# stages the artifact with provenance metadata. +# +# Prerequisites: +# - Rust toolchain (cargo) +# - make, git, curl +# - Cross-compilation toolchain for aarch64 (if building on x86_64) +# - On macOS: Xcode command line tools +# +# Usage: +# ./build-custom-libkrunfw.sh [--output-dir DIR] [--libkrunfw-ref REF] +# +# Environment: +# LIBKRUNFW_REF - git ref to check out (default: main) +# LIBKRUNFW_REPO - git repo URL (default: github.com/containers/libkrunfw) +# OPENSHELL_RUNTIME_OUTPUT_DIR - output directory for built artifacts + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" +KERNEL_CONFIG_FRAGMENT="${SCRIPT_DIR}/kernel/bridge-cni.config" + +# Defaults +LIBKRUNFW_REPO="${LIBKRUNFW_REPO:-https://github.com/containers/libkrunfw.git}" +LIBKRUNFW_REF="${LIBKRUNFW_REF:-main}" +OUTPUT_DIR="${OPENSHELL_RUNTIME_OUTPUT_DIR:-${PROJECT_ROOT}/target/custom-runtime}" +BUILD_DIR="${PROJECT_ROOT}/target/libkrunfw-build" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --output-dir) + OUTPUT_DIR="$2"; shift 2 ;; + --libkrunfw-ref) + LIBKRUNFW_REF="$2"; shift 2 ;; + --help|-h) + echo "Usage: $0 [--output-dir DIR] [--libkrunfw-ref REF]" + echo "" + echo "Build a custom libkrunfw with bridge/netfilter kernel support." + echo "" + echo "Options:" + echo " --output-dir DIR Output directory for built artifacts" + echo " --libkrunfw-ref REF Git ref to check out (default: main)" + echo "" + echo "Environment:" + echo " LIBKRUNFW_REPO Git repo URL" + echo " LIBKRUNFW_REF Git ref (branch/tag/commit)" + echo " OPENSHELL_RUNTIME_OUTPUT_DIR Output directory" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2; exit 1 ;; + esac +done + +echo "==> Building custom libkrunfw" +echo " Repo: ${LIBKRUNFW_REPO}" +echo " Ref: ${LIBKRUNFW_REF}" +echo " Config fragment: ${KERNEL_CONFIG_FRAGMENT}" +echo " Output: ${OUTPUT_DIR}" +echo "" + +# ── Clone / update libkrunfw ──────────────────────────────────────────── + +if [ -d "${BUILD_DIR}/libkrunfw/.git" ]; then + echo "==> Updating existing libkrunfw checkout..." + git -C "${BUILD_DIR}/libkrunfw" fetch origin + git -C "${BUILD_DIR}/libkrunfw" checkout "${LIBKRUNFW_REF}" + git -C "${BUILD_DIR}/libkrunfw" pull --ff-only 2>/dev/null || true +else + echo "==> Cloning libkrunfw..." + mkdir -p "${BUILD_DIR}" + git clone "${LIBKRUNFW_REPO}" "${BUILD_DIR}/libkrunfw" + git -C "${BUILD_DIR}/libkrunfw" checkout "${LIBKRUNFW_REF}" +fi + +LIBKRUNFW_DIR="${BUILD_DIR}/libkrunfw" +LIBKRUNFW_COMMIT=$(git -C "${LIBKRUNFW_DIR}" rev-parse HEAD) +LIBKRUNFW_SHORT=$(git -C "${LIBKRUNFW_DIR}" rev-parse --short HEAD) + +echo " Commit: ${LIBKRUNFW_COMMIT}" + +# ── Detect the kernel version libkrunfw targets ──────────────────────── + +# libkrunfw's Makefile typically sets KERNEL_VERSION or has it in a +# config file. Try to detect it. +KERNEL_VERSION="" +if [ -f "${LIBKRUNFW_DIR}/Makefile" ]; then + KERNEL_VERSION=$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' "${LIBKRUNFW_DIR}/Makefile" 2>/dev/null | head -1 | sed 's/.*= *//' || true) +fi +if [ -z "$KERNEL_VERSION" ] && [ -f "${LIBKRUNFW_DIR}/kernel_version" ]; then + KERNEL_VERSION=$(cat "${LIBKRUNFW_DIR}/kernel_version") +fi +echo " Kernel version: ${KERNEL_VERSION:-unknown}" + +# ── Apply kernel config fragment ──────────────────────────────────────── + +echo "==> Applying OpenShell kernel config fragment..." + +# libkrunfw builds the kernel with a config generated from its own +# sources. The config merge happens after `make olddefconfig` runs +# on the base config. We use the kernel's scripts/kconfig/merge_config.sh +# when available, otherwise do a simple append+olddefconfig. + +MERGE_HOOK="${LIBKRUNFW_DIR}/openshell-kconfig-hook.sh" +cat > "${MERGE_HOOK}" << 'HOOKEOF' +#!/usr/bin/env bash +# Hook called by the libkrunfw build after extracting the kernel source. +# Merges the OpenShell kernel config fragment into .config. +set -euo pipefail + +KERNEL_DIR="$1" +FRAGMENT="$2" + +if [ ! -d "$KERNEL_DIR" ]; then + echo "ERROR: kernel source dir not found: $KERNEL_DIR" >&2 + exit 1 +fi + +if [ ! -f "$FRAGMENT" ]; then + echo "ERROR: config fragment not found: $FRAGMENT" >&2 + exit 1 +fi + +cd "$KERNEL_DIR" + +if [ -f scripts/kconfig/merge_config.sh ]; then + echo " Using kernel merge_config.sh" + KCONFIG_CONFIG=.config ./scripts/kconfig/merge_config.sh -m .config "$FRAGMENT" +else + echo " Appending fragment and running olddefconfig" + cat "$FRAGMENT" >> .config +fi + +make ARCH=arm64 olddefconfig + +# Verify critical configs are set +REQUIRED=( + CONFIG_BRIDGE + CONFIG_BRIDGE_NETFILTER + CONFIG_NETFILTER + CONFIG_NF_CONNTRACK + CONFIG_NF_NAT + CONFIG_IP_NF_IPTABLES + CONFIG_IP_NF_FILTER + CONFIG_IP_NF_NAT + CONFIG_VETH + CONFIG_NET_NS +) + +MISSING=() +for cfg in "${REQUIRED[@]}"; do + if ! grep -q "^${cfg}=[ym]" .config; then + MISSING+=("$cfg") + fi +done + +if [ ${#MISSING[@]} -gt 0 ]; then + echo "ERROR: Required kernel configs not set after merge:" >&2 + printf " %s\n" "${MISSING[@]}" >&2 + exit 1 +fi + +echo " All required kernel configs verified." +HOOKEOF +chmod +x "${MERGE_HOOK}" + +# ── Build libkrunfw ──────────────────────────────────────────────────── + +echo "==> Building libkrunfw (this may take 10-30 minutes)..." + +cd "${LIBKRUNFW_DIR}" + +# Detect macOS vs Linux and pick the right library extension / target +if [ "$(uname -s)" = "Darwin" ]; then + LIB_EXT="dylib" +else + LIB_EXT="so" +fi + +# Detect the kernel source directory name from the Makefile +KERNEL_DIR_NAME=$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' Makefile | head -1 | sed 's/KERNEL_VERSION *= *//') +if [ -z "$KERNEL_DIR_NAME" ]; then + echo "ERROR: Could not detect KERNEL_VERSION from Makefile" >&2 + exit 1 +fi +echo " Kernel source dir: ${KERNEL_DIR_NAME}" + +if [ "$(uname -s)" = "Darwin" ]; then + # On macOS, the entire kernel build (extract, patch, config, compile) + # must happen inside a Linux container since the kernel build system + # requires a Linux toolchain. We mount the checkout, inject the + # config fragment, and produce kernel.c inside the container. + + if ! command -v docker &>/dev/null; then + echo "ERROR: docker is required to build the kernel on macOS" >&2 + exit 1 + fi + + echo "==> Building kernel inside Docker (macOS detected)..." + + # Pre-build a Docker image with all build dependencies so the + # (potentially emulated) apt-get/pip work is cached across runs. + BUILDER_IMAGE="libkrunfw-builder:bookworm" + if ! docker image inspect "${BUILDER_IMAGE}" >/dev/null 2>&1; then + echo " Building Docker toolchain image (first time only)..." + docker build --platform linux/arm64 -t "${BUILDER_IMAGE}" - <<'DOCKERFILE' +FROM debian:bookworm-slim +RUN apt-get update -qq && \ + apt-get install -y -qq make gcc bc flex bison libelf-dev python3 \ + coreutils curl xz-utils patch libssl-dev cpio >/dev/null 2>&1 && \ + rm -rf /var/lib/apt/lists/* +DOCKERFILE + fi + + # Stage only the files the build needs into a temp dir so we + # avoid copying the enormous kernel source tree over virtiofs. + DOCKER_STAGE=$(mktemp -d) + trap 'rm -rf "${DOCKER_STAGE}"' EXIT + + echo " Staging build inputs..." + cp "${LIBKRUNFW_DIR}/Makefile" "${DOCKER_STAGE}/" + cp "${LIBKRUNFW_DIR}/bin2cbundle.py" "${DOCKER_STAGE}/" + # Copy base kernel config(s) and patches + for f in "${LIBKRUNFW_DIR}"/config-libkrunfw*; do + [ -f "$f" ] && cp "$f" "${DOCKER_STAGE}/" + done + if [ -d "${LIBKRUNFW_DIR}/patches" ]; then + cp -r "${LIBKRUNFW_DIR}/patches" "${DOCKER_STAGE}/patches" + fi + if [ -d "${LIBKRUNFW_DIR}/patches-tee" ]; then + cp -r "${LIBKRUNFW_DIR}/patches-tee" "${DOCKER_STAGE}/patches-tee" + fi + # Copy the cached tarball if present so we don't re-download + if [ -d "${LIBKRUNFW_DIR}/tarballs" ]; then + cp -r "${LIBKRUNFW_DIR}/tarballs" "${DOCKER_STAGE}/tarballs" + fi + + docker run --rm \ + -v "${DOCKER_STAGE}:/work" \ + -v "${KERNEL_CONFIG_FRAGMENT}:/fragment/bridge-cni.config:ro" \ + -v "${MERGE_HOOK}:/fragment/merge-hook.sh:ro" \ + -w /build \ + --platform linux/arm64 \ + "${BUILDER_IMAGE}" \ + bash -c ' + set -euo pipefail + + KDIR="'"${KERNEL_DIR_NAME}"'" + + # Copy staged inputs to container-local filesystem so tar + # extraction does not hit macOS APFS bind-mount limitations. + echo " Copying build inputs to container filesystem..." + cp -r /work/* /build/ + + # Patch bin2cbundle.py to make the elftools import lazy. + # We only use -t Image (raw path) which does not need elftools, + # but the top-level import would fail without pyelftools installed. + sed -i "s/^from elftools/# lazy: from elftools/" bin2cbundle.py + + # Step 1: prepare kernel sources (download, extract, patch, base config, olddefconfig) + echo " Preparing kernel sources..." + make "$KDIR" + + # Step 2: merge the OpenShell config fragment + echo " Merging OpenShell kernel config fragment..." + bash /fragment/merge-hook.sh "/build/$KDIR" /fragment/bridge-cni.config + + # Step 3: build the kernel and generate the C bundle + echo " Building kernel (this is the slow part)..." + make -j"$(nproc)" "$KDIR"/arch/arm64/boot/Image + + echo " Generating kernel.c bundle..." + python3 bin2cbundle.py -t Image "$KDIR"/arch/arm64/boot/Image kernel.c + + # Copy artifacts back to the bind-mounted staging dir + echo " Copying artifacts back to host..." + cp /build/kernel.c /work/kernel.c + cp /build/"$KDIR"/.config /work/kernel.config + ' + + # Move artifacts from staging dir to the libkrunfw checkout + cp "${DOCKER_STAGE}/kernel.c" "${LIBKRUNFW_DIR}/kernel.c" + if [ -f "${DOCKER_STAGE}/kernel.config" ]; then + mkdir -p "${LIBKRUNFW_DIR}/${KERNEL_DIR_NAME}" + cp "${DOCKER_STAGE}/kernel.config" "${LIBKRUNFW_DIR}/${KERNEL_DIR_NAME}/.config" + fi + + # Compile the shared library on the host (uses host cc for a .dylib) + echo "==> Compiling libkrunfw.dylib on host..." + ABI_VERSION=$(grep -oE 'ABI_VERSION\s*=\s*[0-9]+' Makefile | head -1 | sed 's/[^0-9]//g') + cc -fPIC -DABI_VERSION="${ABI_VERSION}" -shared -o "libkrunfw.${ABI_VERSION}.dylib" kernel.c +else + # On Linux, we can do everything natively in three steps: + + # Step 1: prepare kernel sources + echo " Preparing kernel sources..." + make "${KERNEL_DIR_NAME}" + + # Step 2: merge config fragment + echo "==> Merging OpenShell kernel config fragment..." + bash "${MERGE_HOOK}" "${LIBKRUNFW_DIR}/${KERNEL_DIR_NAME}" "${KERNEL_CONFIG_FRAGMENT}" + + # Step 3: build the kernel and shared library + make -j"$(nproc)" "$(grep -oE 'KRUNFW_BINARY_Linux\s*=\s*\S+' Makefile | head -1 | sed 's/[^=]*= *//')" || \ + make -j"$(nproc)" libkrunfw.so +fi + +# ── Stage output artifacts ────────────────────────────────────────────── + +echo "==> Staging artifacts..." +mkdir -p "${OUTPUT_DIR}" + +# Find the built library — check versioned names (e.g. libkrunfw.5.dylib) first +BUILT_LIB="" +for candidate in \ + "${LIBKRUNFW_DIR}"/libkrunfw*.${LIB_EXT} \ + "${LIBKRUNFW_DIR}/libkrunfw.${LIB_EXT}" \ + "${LIBKRUNFW_DIR}/target/release/libkrunfw.${LIB_EXT}" \ + "${LIBKRUNFW_DIR}/build/libkrunfw.${LIB_EXT}"; do + if [ -f "$candidate" ]; then + BUILT_LIB="$candidate" + break + fi +done + +if [ -z "$BUILT_LIB" ]; then + echo "ERROR: Could not find built libkrunfw.${LIB_EXT}" >&2 + echo " Searched in ${LIBKRUNFW_DIR}/ for libkrunfw*.${LIB_EXT}" + exit 1 +fi + +echo " Found library: ${BUILT_LIB}" + +# Compute SHA-256 (shasum on macOS, sha256sum on Linux) +if command -v sha256sum &>/dev/null; then + ARTIFACT_HASH=$(sha256sum "${BUILT_LIB}" | cut -d' ' -f1) +else + ARTIFACT_HASH=$(shasum -a 256 "${BUILT_LIB}" | cut -d' ' -f1) +fi +ARTIFACT_HASH_SHORT="${ARTIFACT_HASH:0:12}" + +# Copy the library — always stage as libkrunfw.dylib / libkrunfw.so +# (the base name the runtime loader expects) plus the original name +cp "${BUILT_LIB}" "${OUTPUT_DIR}/libkrunfw.${LIB_EXT}" +BUILT_BASENAME="$(basename "${BUILT_LIB}")" +if [ "${BUILT_BASENAME}" != "libkrunfw.${LIB_EXT}" ]; then + cp "${BUILT_LIB}" "${OUTPUT_DIR}/${BUILT_BASENAME}" +fi + +# Copy the kernel config that was actually used (for reproducibility) +KERNEL_SRC_DIR="" +for candidate in \ + "${LIBKRUNFW_DIR}/linux-"* \ + "${LIBKRUNFW_DIR}/build/linux-"* \ + "${LIBKRUNFW_DIR}/kernel/linux-"*; do + if [ -d "$candidate" ] && [ -f "${candidate}/.config" ]; then + KERNEL_SRC_DIR="$candidate" + break + fi +done + +if [ -n "$KERNEL_SRC_DIR" ] && [ -f "${KERNEL_SRC_DIR}/.config" ]; then + cp "${KERNEL_SRC_DIR}/.config" "${OUTPUT_DIR}/kernel.config" +fi + +# Copy our fragment for reference +cp "${KERNEL_CONFIG_FRAGMENT}" "${OUTPUT_DIR}/bridge-cni.config" + +# ── Write provenance metadata ────────────────────────────────────────── + +cat > "${OUTPUT_DIR}/provenance.json" << EOF +{ + "artifact": "libkrunfw-custom", + "version": "0.1.0-openshell", + "build_timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", + "libkrunfw_repo": "${LIBKRUNFW_REPO}", + "libkrunfw_ref": "${LIBKRUNFW_REF}", + "libkrunfw_commit": "${LIBKRUNFW_COMMIT}", + "kernel_version": "${KERNEL_VERSION:-unknown}", + "kernel_config_fragment": "bridge-cni.config", + "artifact_sha256": "${ARTIFACT_HASH}", + "host_os": "$(uname -s)", + "host_arch": "$(uname -m)", + "builder": "build-custom-libkrunfw.sh" +} +EOF + +echo "" +echo "==> Build complete" +echo " Library: ${OUTPUT_DIR}/libkrunfw.${LIB_EXT}" +echo " SHA256: ${ARTIFACT_HASH_SHORT}..." +echo " Provenance: ${OUTPUT_DIR}/provenance.json" +echo " Commit: ${LIBKRUNFW_SHORT}" +echo "" +echo "To use this runtime:" +echo " export OPENSHELL_VM_RUNTIME_SOURCE_DIR=${OUTPUT_DIR}" +echo " mise run vm:bundle-runtime" diff --git a/crates/openshell-vm/runtime/kernel/bridge-cni.config b/crates/openshell-vm/runtime/kernel/bridge-cni.config new file mode 100644 index 00000000..2e484b07 --- /dev/null +++ b/crates/openshell-vm/runtime/kernel/bridge-cni.config @@ -0,0 +1,92 @@ +# Custom kernel config fragment for libkrunfw (OpenShell VM) +# +# This fragment is applied on top of libkrunfw's base kernel config +# to enable bridge CNI, netfilter/iptables, and conntrack support +# required for Kubernetes pod networking in the VM. +# +# Apply with: scripts/merge-kconfig.sh +# +# See also: check-vm-capabilities.sh for runtime verification. + +# ── Network Namespaces (required for pod isolation) ───────────────────── +CONFIG_NET_NS=y +CONFIG_NAMESPACES=y + +# ── Virtual Ethernet (veth pairs for pod networking) ──────────────────── +CONFIG_VETH=y + +# ── Linux Bridge (required for bridge CNI plugin) ────────────────────── +CONFIG_BRIDGE=y +CONFIG_BRIDGE_NETFILTER=y +CONFIG_BRIDGE_IGMP_SNOOPING=y + +# ── Netfilter framework ──────────────────────────────────────────────── +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_INGRESS=y +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_NETLINK_LOG=y + +# ── Connection tracking (required for NAT and kube-proxy) ────────────── +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y + +# ── NAT (required for service VIP / DNAT / SNAT) ────────────────────── +CONFIG_NF_NAT=y +CONFIG_NF_NAT_MASQUERADE_IPV4=y + +# ── iptables (kube-proxy iptables mode) ──────────────────────────────── +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_REJECT=y + +# ── nftables (kube-proxy nft mode, future-proofing) ──────────────────── +CONFIG_NF_TABLES=y +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_CT=y +CONFIG_NFT_NAT=y +CONFIG_NFT_MASQ=y +CONFIG_NFT_REJECT=y +CONFIG_NFT_COMPAT=y +CONFIG_NFT_COUNTER=y + +# ── IP forwarding and routing (required for pod-to-pod) ──────────────── +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_NET_IP_TUNNEL=y + +# ── IPVS (optional: kube-proxy IPVS mode) ───────────────────────────── +CONFIG_IP_VS=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_RR=y +CONFIG_IP_VS_WRR=y +CONFIG_IP_VS_SH=y +CONFIG_IP_VS_NFCT=y + +# ── Misc networking required by Kubernetes ───────────────────────────── +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_CGROUP_NET_CLASSID=y + +# ── Dummy interface (fallback networking) ────────────────────────────── +CONFIG_DUMMY=y + +# ── TUN/TAP (used by some CNI plugins) ──────────────────────────────── +CONFIG_TUN=y + +# ── Cgroups (already in base, ensure v2 is available) ────────────────── +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PIDS=y +CONFIG_MEMCG=y diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh index e5483a20..d2fed06e 100755 --- a/crates/openshell-vm/scripts/build-rootfs.sh +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -115,6 +115,10 @@ chmod +x "${ROOTFS_DIR}/srv/gateway-init.sh" cp "${SCRIPT_DIR}/hello-server.py" "${ROOTFS_DIR}/srv/hello-server.py" chmod +x "${ROOTFS_DIR}/srv/hello-server.py" +# Inject VM capability checker for runtime diagnostics. +cp "${SCRIPT_DIR}/check-vm-capabilities.sh" "${ROOTFS_DIR}/srv/check-vm-capabilities.sh" +chmod +x "${ROOTFS_DIR}/srv/check-vm-capabilities.sh" + # ── Package and inject helm chart ──────────────────────────────────── HELM_CHART_DIR="${PROJECT_ROOT}/deploy/helm/openshell" diff --git a/crates/openshell-vm/scripts/check-vm-capabilities.sh b/crates/openshell-vm/scripts/check-vm-capabilities.sh new file mode 100755 index 00000000..2e758f5e --- /dev/null +++ b/crates/openshell-vm/scripts/check-vm-capabilities.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# VM Kernel Capability Checker +# +# Runs inside the guest VM (or a container with the same rootfs) to +# verify that the kernel has the capabilities required for bridge CNI +# networking, kube-proxy, and Kubernetes pod networking. +# +# Usage: +# ./check-vm-capabilities.sh [--json] +# +# Exit codes: +# 0 = all required capabilities present +# 1 = one or more required capabilities missing +# 2 = script error + +set -euo pipefail + +JSON_OUTPUT=false +if [ "${1:-}" = "--json" ]; then + JSON_OUTPUT=true +fi + +PASS=0 +FAIL=0 +WARN=0 +RESULTS=() + +# ── Helpers ───────────────────────────────────────────────────────────── + +check() { + local name="$1" + local category="$2" + local required="$3" # "required" or "optional" + local description="$4" + shift 4 + local cmd=("$@") + + if eval "${cmd[@]}" >/dev/null 2>&1; then + RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"pass\",\"required\":\"$required\",\"description\":\"$description\"}") + PASS=$((PASS + 1)) + if [ "$JSON_OUTPUT" = false ]; then + printf " ✓ %-40s %s\n" "$name" "$description" + fi + else + if [ "$required" = "required" ]; then + RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"fail\",\"required\":\"$required\",\"description\":\"$description\"}") + FAIL=$((FAIL + 1)) + if [ "$JSON_OUTPUT" = false ]; then + printf " ✗ %-40s %s (REQUIRED)\n" "$name" "$description" + fi + else + RESULTS+=("{\"name\":\"$name\",\"category\":\"$category\",\"status\":\"warn\",\"required\":\"$required\",\"description\":\"$description\"}") + WARN=$((WARN + 1)) + if [ "$JSON_OUTPUT" = false ]; then + printf " ~ %-40s %s (optional)\n" "$name" "$description" + fi + fi + fi +} + +check_module() { + local module="$1" + # Check /proc/modules (loaded), /proc/config.gz (builtin), or /sys/module + if [ -d "/sys/module/$module" ]; then + return 0 + fi + if grep -q "^${module} " /proc/modules 2>/dev/null; then + return 0 + fi + # Check if compiled in via /proc/config.gz or /boot/config + local config_key + config_key="CONFIG_$(echo "$module" | tr '[:lower:]-' '[:upper:]_')" + if [ -f /proc/config.gz ]; then + if zcat /proc/config.gz 2>/dev/null | grep -q "^${config_key}=[ym]"; then + return 0 + fi + fi + return 1 +} + +# ── Capability Checks ────────────────────────────────────────────────── + +if [ "$JSON_OUTPUT" = false ]; then + echo "VM Kernel Capability Check" + echo "==========================" + echo "" + echo "Kernel: $(uname -r)" + echo "" +fi + +# --- Network Namespaces --- +if [ "$JSON_OUTPUT" = false ]; then echo "[Network Namespaces]"; fi + +check "net_namespace" "netns" "required" \ + "network namespace support (CONFIG_NET_NS)" \ + "test -d /proc/self/ns && ls /proc/self/ns/net" + +check "veth_pair" "netns" "required" \ + "veth pair creation (CONFIG_VETH)" \ + "ip link add _chk0 type veth peer name _chk1 && ip link del _chk0" + +# --- Linux Bridge --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Linux Bridge]"; fi + +check "bridge_module" "bridge" "required" \ + "bridge device support (CONFIG_BRIDGE)" \ + "ip link add _chkbr0 type bridge && ip link del _chkbr0" + +check "bridge_nf_call" "bridge" "required" \ + "bridge netfilter (CONFIG_BRIDGE_NETFILTER)" \ + "check_module bridge && test -f /proc/sys/net/bridge/bridge-nf-call-iptables 2>/dev/null || check_module br_netfilter" + +# --- Netfilter / iptables --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Netfilter / iptables]"; fi + +check "netfilter" "netfilter" "required" \ + "netfilter framework (CONFIG_NETFILTER)" \ + "check_module nf_conntrack || check_module ip_tables || test -d /proc/sys/net/netfilter" + +check "nf_conntrack" "netfilter" "required" \ + "connection tracking (CONFIG_NF_CONNTRACK)" \ + "check_module nf_conntrack" + +check "nf_nat" "netfilter" "required" \ + "NAT support (CONFIG_NF_NAT)" \ + "check_module nf_nat" + +check "iptables_filter" "netfilter" "required" \ + "iptables filter (CONFIG_IP_NF_FILTER)" \ + "check_module ip_tables || iptables -L -n >/dev/null 2>&1" + +check "iptables_nat" "netfilter" "required" \ + "iptables NAT (CONFIG_IP_NF_NAT)" \ + "check_module iptable_nat || iptables -t nat -L -n >/dev/null 2>&1" + +check "iptables_mangle" "netfilter" "optional" \ + "iptables mangle (CONFIG_IP_NF_MANGLE)" \ + "check_module iptable_mangle || iptables -t mangle -L -n >/dev/null 2>&1" + +check "nf_conntrack_netlink" "netfilter" "optional" \ + "conntrack netlink (CONFIG_NF_CT_NETLINK)" \ + "check_module nf_conntrack_netlink" + +check "nftables" "netfilter" "optional" \ + "nftables (CONFIG_NF_TABLES)" \ + "check_module nf_tables || nft list ruleset >/dev/null 2>&1" + +# --- IP Forwarding / Routing --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[IP Forwarding]"; fi + +check "ip_forward" "routing" "required" \ + "IP forwarding (sysctl)" \ + "test -f /proc/sys/net/ipv4/ip_forward" + +check "ip_route" "routing" "required" \ + "IP routing" \ + "ip route show >/dev/null 2>&1" + +# --- CNI Plugin Dependencies --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[CNI Plugins]"; fi + +check "cni_bridge_bin" "cni" "required" \ + "bridge CNI plugin binary" \ + "test -x /opt/cni/bin/bridge || find /var/lib/rancher/k3s/data -name bridge -type f 2>/dev/null | head -1 | grep -q ." + +check "cni_host_local_bin" "cni" "required" \ + "host-local IPAM plugin binary" \ + "test -x /opt/cni/bin/host-local || find /var/lib/rancher/k3s/data -name host-local -type f 2>/dev/null | head -1 | grep -q ." + +check "cni_loopback_bin" "cni" "required" \ + "loopback CNI plugin binary" \ + "test -x /opt/cni/bin/loopback || find /var/lib/rancher/k3s/data -name loopback -type f 2>/dev/null | head -1 | grep -q ." + +check "cni_portmap_bin" "cni" "optional" \ + "portmap CNI plugin binary (needs iptables)" \ + "test -x /opt/cni/bin/portmap || find /var/lib/rancher/k3s/data -name portmap -type f 2>/dev/null | head -1 | grep -q ." + +# --- Userspace Tools --- +if [ "$JSON_OUTPUT" = false ]; then echo ""; echo "[Userspace Tools]"; fi + +check "iptables_bin" "userspace" "required" \ + "iptables binary" \ + "command -v iptables" + +check "conntrack_bin" "userspace" "optional" \ + "conntrack binary" \ + "command -v conntrack" + +check "ip_bin" "userspace" "required" \ + "iproute2 (ip command)" \ + "command -v ip" + +# ── Summary ──────────────────────────────────────────────────────────── + +if [ "$JSON_OUTPUT" = true ]; then + echo "{" + echo " \"kernel\": \"$(uname -r)\"," + echo " \"timestamp\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"," + echo " \"pass\": $PASS," + echo " \"fail\": $FAIL," + echo " \"warn\": $WARN," + echo " \"results\": [" + local_first=true + for r in "${RESULTS[@]}"; do + if [ "$local_first" = true ]; then + local_first=false + else + echo "," + fi + printf " %s" "$r" + done + echo "" + echo " ]" + echo "}" +else + echo "" + echo "─────────────────────────────────────────" + printf "Results: %d passed, %d failed, %d warnings\n" "$PASS" "$FAIL" "$WARN" + + if [ "$FAIL" -gt 0 ]; then + echo "" + echo "FAIL: $FAIL required capabilities missing." + echo "The VM kernel needs to be rebuilt with the missing features." + echo "See: crates/openshell-vm/runtime/kernel/README.md" + exit 1 + else + echo "" + echo "PASS: All required capabilities present." + exit 0 + fi +fi diff --git a/crates/openshell-vm/scripts/gateway-init.sh b/crates/openshell-vm/scripts/gateway-init.sh index 5e9d49d0..0d6c9274 100755 --- a/crates/openshell-vm/scripts/gateway-init.sh +++ b/crates/openshell-vm/scripts/gateway-init.sh @@ -210,9 +210,17 @@ HELMCHART="$K3S_MANIFESTS/openshell-helmchart.yaml" if [ -f "$HELMCHART" ]; then # Use pre-loaded images — don't pull from registry. sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" - # VM bootstrap runs without CNI bridge networking. - sed -i 's|__HOST_NETWORK__|true|g' "$HELMCHART" - sed -i 's|__AUTOMOUNT_SA_TOKEN__|false|g' "$HELMCHART" + + if [ "$NET_PROFILE" = "bridge" ]; then + # Bridge CNI: pods use normal pod networking, not hostNetwork. + sed -i 's|__HOST_NETWORK__|false|g' "$HELMCHART" + sed -i 's|__AUTOMOUNT_SA_TOKEN__|true|g' "$HELMCHART" + else + # Legacy: VM bootstrap runs without CNI bridge networking. + sed -i 's|__HOST_NETWORK__|true|g' "$HELMCHART" + sed -i 's|__AUTOMOUNT_SA_TOKEN__|false|g' "$HELMCHART" + fi + sed -i 's|__KUBECONFIG_HOST_PATH__|"/etc/rancher/k3s"|g' "$HELMCHART" sed -i 's|__PERSISTENCE_ENABLED__|false|g' "$HELMCHART" sed -i 's|__DB_URL__|"sqlite:/tmp/openshell.db"|g' "$HELMCHART" @@ -223,55 +231,159 @@ fi AGENT_MANIFEST="$K3S_MANIFESTS/agent-sandbox.yaml" if [ -f "$AGENT_MANIFEST" ]; then - # Keep agent-sandbox on pod networking to avoid host port clashes. - # Point in-cluster client traffic at the API server node IP because - # kube-proxy is disabled in VM mode. - sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" - sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" - if ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then - sed -i 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\ - args:\ - - -metrics-bind-address=:8082\ - env:\ - - name: KUBERNETES_SERVICE_HOST\ - value: 192.168.127.2\ - - name: KUBERNETES_SERVICE_PORT\ - value: "6443"|g' "$AGENT_MANIFEST" + if [ "$NET_PROFILE" = "bridge" ]; then + # Bridge CNI: agent-sandbox uses normal pod networking. + # kube-proxy is enabled so kubernetes.default.svc is reachable + # via ClusterIP — no need for KUBERNETES_SERVICE_HOST override. + sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" + sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" + ts "agent-sandbox: using pod networking (bridge profile)" else - sed -i 's|value: 127.0.0.1|value: 192.168.127.2|g' "$AGENT_MANIFEST" - fi - if grep -q 'hostNetwork: true' "$AGENT_MANIFEST" \ - || grep -q 'ClusterFirstWithHostNet' "$AGENT_MANIFEST" \ - || ! grep -q 'KUBERNETES_SERVICE_HOST' "$AGENT_MANIFEST" \ - || ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then - echo "ERROR: failed to patch agent-sandbox manifest for VM networking constraints: $AGENT_MANIFEST" >&2 - exit 1 + # Legacy: keep agent-sandbox on pod networking to avoid host port + # clashes. Point in-cluster client traffic at the API server node + # IP because kube-proxy is disabled in VM mode. + sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" + sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" + if ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then + sed -i 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\ + args:\ + - -metrics-bind-address=:8082\ + env:\ + - name: KUBERNETES_SERVICE_HOST\ + value: 192.168.127.2\ + - name: KUBERNETES_SERVICE_PORT\ + value: "6443"|g' "$AGENT_MANIFEST" + else + sed -i 's|value: 127.0.0.1|value: 192.168.127.2|g' "$AGENT_MANIFEST" + fi + if grep -q 'hostNetwork: true' "$AGENT_MANIFEST" \ + || grep -q 'ClusterFirstWithHostNet' "$AGENT_MANIFEST" \ + || ! grep -q 'KUBERNETES_SERVICE_HOST' "$AGENT_MANIFEST" \ + || ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then + echo "ERROR: failed to patch agent-sandbox manifest for VM networking constraints: $AGENT_MANIFEST" >&2 + exit 1 + fi + ts "agent-sandbox: patched for legacy-vm-net (API server override)" fi fi -# local-storage implies local-path-provisioner, which requires CNI bridge -# networking that is unavailable in the VM kernel. -rm -f "$K3S_MANIFESTS/local-storage.yaml" 2>/dev/null || true +# local-storage implies local-path-provisioner. In legacy mode it +# requires CNI bridge networking that is unavailable. In bridge mode +# it can work but we leave it disabled for now until validated. +if [ "$NET_PROFILE" != "bridge" ]; then + rm -f "$K3S_MANIFESTS/local-storage.yaml" 2>/dev/null || true +fi -# ── CNI configuration (iptables-free) ─────────────────────────────────── -# The libkrun VM kernel has no netfilter/iptables support. Flannel's -# masquerade rules and kube-proxy both require iptables and crash without -# it. We disable both and use a simple ptp CNI with host-local IPAM -# instead. This avoids linux bridge requirements in the VM kernel. +# ── CNI configuration ─────────────────────────────────────────────────── +# Two networking profiles: +# +# 1. "bridge" (default when kernel supports it): +# Uses the bridge CNI plugin with iptables masquerade. Requires +# CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_NAT in the VM kernel. +# This is the standard Kubernetes CNI path — compatible with +# kube-proxy, service VIPs, and portmap. # -# ipMasq=false avoids any iptables calls in the plugin. -# portmap plugin removed — it requires iptables for DNAT rules. +# 2. "legacy-vm-net" (fallback for stock libkrunfw without netfilter): +# Uses ptp CNI without iptables. No masquerade, no portmap. +# kube-proxy must be disabled. This is the original VM path. # -# containerd falls back to default CNI paths: -# conf_dir = /etc/cni/net.d -# bin_dir = /opt/cni/bin -# We write the config to the default path and symlink k3s CNI binaries. +# The profile is auto-detected from kernel capabilities but can be +# forced via OPENSHELL_VM_NET_PROFILE=bridge|legacy-vm-net. + +NET_PROFILE="${OPENSHELL_VM_NET_PROFILE:-auto}" + +detect_net_profile() { + # Check for bridge + netfilter kernel support. + # If we can create a bridge and the netfilter sysctl exists, use bridge CNI. + local has_bridge=false + local has_netfilter=false + + if ip link add _probe_br0 type bridge 2>/dev/null; then + ip link del _probe_br0 2>/dev/null || true + has_bridge=true + fi + + if [ -d /proc/sys/net/netfilter ] || [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + has_netfilter=true + fi + + if [ "$has_bridge" = true ] && [ "$has_netfilter" = true ]; then + echo "bridge" + else + echo "legacy-vm-net" + fi +} + +if [ "$NET_PROFILE" = "auto" ]; then + NET_PROFILE=$(detect_net_profile) +fi + +ts "network profile: ${NET_PROFILE}" CNI_CONF_DIR="/etc/cni/net.d" CNI_BIN_DIR="/opt/cni/bin" mkdir -p "$CNI_CONF_DIR" "$CNI_BIN_DIR" -cat > "$CNI_CONF_DIR/10-ptp.conflist" << 'CNICFG' +if [ "$NET_PROFILE" = "bridge" ]; then + # ── Bridge CNI (full Kubernetes networking) ───────────────────── + # This path requires a custom libkrunfw with bridge + netfilter + # kernel support. Creates a cni0 bridge, uses iptables masquerade, + # and is compatible with kube-proxy. + + # Enable IP forwarding (required for masquerade). + echo 1 > /proc/sys/net/ipv4/ip_forward 2>/dev/null || true + + # Enable bridge netfilter call (required for kube-proxy to see + # bridged traffic). + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables 2>/dev/null || true + fi + + cat > "$CNI_CONF_DIR/10-bridge.conflist" << 'CNICFG' +{ + "cniVersion": "1.0.0", + "name": "bridge", + "plugins": [ + { + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "isDefaultGateway": true, + "ipMasq": true, + "hairpinMode": true, + "ipam": { + "type": "host-local", + "ranges": [[{ "subnet": "10.42.0.0/24" }]], + "routes": [{ "dst": "0.0.0.0/0" }] + } + }, + { + "type": "portmap", + "capabilities": { "portMappings": true }, + "snat": true + }, + { + "type": "loopback" + } + ] +} +CNICFG + + # Remove any legacy ptp config. + rm -f "$CNI_CONF_DIR/10-ptp.conflist" 2>/dev/null || true + + ts "bridge CNI configured (cni0 + iptables masquerade)" +else + # ── Legacy ptp CNI (iptables-free) ───────────────────────────── + # The libkrun VM kernel has no netfilter/iptables support. Flannel's + # masquerade rules and kube-proxy both require iptables and crash + # without it. We disable both and use a simple ptp CNI with + # host-local IPAM instead. This avoids linux bridge requirements. + # + # ipMasq=false avoids any iptables calls in the plugin. + # portmap plugin removed — it requires iptables for DNAT rules. + + cat > "$CNI_CONF_DIR/10-ptp.conflist" << 'CNICFG' { "cniVersion": "1.0.0", "name": "ptp", @@ -292,11 +404,17 @@ cat > "$CNI_CONF_DIR/10-ptp.conflist" << 'CNICFG' } CNICFG + # Remove any bridge config. + rm -f "$CNI_CONF_DIR/10-bridge.conflist" 2>/dev/null || true + + ts "ptp CNI configured (iptables-free, no linux bridge)" +fi + # Symlink k3s-bundled CNI binaries to the default containerd bin path. # k3s extracts its tools to /var/lib/rancher/k3s/data//bin/. K3S_DATA_BIN=$(find /var/lib/rancher/k3s/data -maxdepth 2 -name bin -type d 2>/dev/null | head -1) if [ -n "$K3S_DATA_BIN" ]; then - for plugin in ptp host-local loopback bandwidth; do + for plugin in bridge ptp host-local loopback bandwidth portmap; do [ -f "$K3S_DATA_BIN/$plugin" ] && ln -sf "$K3S_DATA_BIN/$plugin" "$CNI_BIN_DIR/$plugin" done ts "CNI binaries linked from $K3S_DATA_BIN" @@ -308,30 +426,36 @@ fi # (pre-baked state from the Docker build used host-gw flannel). rm -f "/var/lib/rancher/k3s/agent/etc/cni/net.d/10-flannel.conflist" 2>/dev/null || true -ts "ptp CNI configured (iptables-free, no linux bridge)" - # ── Start k3s ────────────────────────────────────────────────────────── -# Flags tuned for fast single-node startup: -# --disable=traefik,servicelb,metrics-server: skip unused controllers -# --disable=coredns,local-storage: local-storage implies local-path -# provisioner and requires bridge-based networking unavailable in VM -# --disable-network-policy: skip network policy controller -# --disable-kube-proxy: VM kernel has no netfilter/iptables -# --flannel-backend=none: replaced with ptp CNI above -# --snapshotter=native: overlayfs is incompatible with virtiofs (the -# host-backed filesystem in libkrun). Operations inside overlayfs -# mounts on virtiofs fail with ECONNRESET. The native snapshotter -# uses simple directory copies and works reliably on any filesystem. - -ts "starting k3s server" -exec /usr/local/bin/k3s server \ - --disable=traefik,servicelb,metrics-server,coredns,local-storage \ - --disable-network-policy \ - --disable-kube-proxy \ - --write-kubeconfig-mode=644 \ - --node-ip="$NODE_IP" \ - --kube-apiserver-arg=bind-address=0.0.0.0 \ - --resolv-conf=/etc/resolv.conf \ - --tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2 \ - --flannel-backend=none \ +# Flags tuned for fast single-node startup. The k3s flags vary depending +# on the network profile: +# +# bridge: kube-proxy enabled, flannel disabled (bridge CNI handles it) +# legacy-vm-net: kube-proxy disabled (no iptables), flannel disabled + +K3S_ARGS=( + --disable=traefik,servicelb,metrics-server,coredns + --disable-network-policy + --write-kubeconfig-mode=644 + --node-ip="$NODE_IP" + --kube-apiserver-arg=bind-address=0.0.0.0 + --resolv-conf=/etc/resolv.conf + --tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2 + --flannel-backend=none --snapshotter=native +) + +if [ "$NET_PROFILE" = "bridge" ]; then + # With bridge CNI + iptables, kube-proxy can run. Don't disable it. + # local-storage can also work with bridge networking. + ts "starting k3s server (bridge profile — kube-proxy enabled)" +else + # Legacy: no iptables means no kube-proxy and no local-storage. + K3S_ARGS+=( + --disable=local-storage + --disable-kube-proxy + ) + ts "starting k3s server (legacy-vm-net profile — kube-proxy disabled)" +fi + +exec /usr/local/bin/k3s server "${K3S_ARGS[@]}" diff --git a/crates/openshell-vm/scripts/verify-vm.sh b/crates/openshell-vm/scripts/verify-vm.sh new file mode 100755 index 00000000..31e65931 --- /dev/null +++ b/crates/openshell-vm/scripts/verify-vm.sh @@ -0,0 +1,226 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# VM Verification Matrix +# +# Runs a comprehensive set of checks against a running gateway VM to +# validate networking, service reachability, and overall health. +# +# This script is designed to run both locally and in CI as a pass/fail +# gate for merge readiness. +# +# Usage: +# ./verify-vm.sh [--kubeconfig PATH] [--timeout SECS] +# +# Prerequisites: +# - A running gateway VM (`mise run vm`) +# - kubectl available in PATH +# +# Exit codes: +# 0 = all checks passed +# 1 = one or more checks failed +# 2 = script error / prerequisites not met + +set -euo pipefail + +KUBECONFIG="${KUBECONFIG:-${HOME}/.kube/gateway.yaml}" +TIMEOUT="${TIMEOUT:-120}" +PASS=0 +FAIL=0 +WARN=0 + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --kubeconfig) KUBECONFIG="$2"; shift 2 ;; + --timeout) TIMEOUT="$2"; shift 2 ;; + --help|-h) + echo "Usage: $0 [--kubeconfig PATH] [--timeout SECS]" + exit 0 + ;; + *) echo "Unknown argument: $1" >&2; exit 2 ;; + esac +done + +export KUBECONFIG + +# ── Helpers ───────────────────────────────────────────────────────────── + +check() { + local name="$1" + local category="$2" + shift 2 + local cmd=("$@") + + printf " %-50s " "$name" + if output=$(eval "${cmd[@]}" 2>&1); then + echo "PASS" + PASS=$((PASS + 1)) + else + echo "FAIL" + if [ -n "$output" ]; then + echo " $output" | head -3 + fi + FAIL=$((FAIL + 1)) + fi +} + +wait_for_api() { + local deadline=$((SECONDS + TIMEOUT)) + while [ $SECONDS -lt $deadline ]; do + if kubectl get nodes -o name >/dev/null 2>&1; then + return 0 + fi + sleep 2 + done + return 1 +} + +echo "VM Verification Matrix" +echo "======================" +echo "" +echo "Kubeconfig: ${KUBECONFIG}" +echo "Timeout: ${TIMEOUT}s" +echo "" + +# ── Prerequisites ────────────────────────────────────────────────────── + +if [ ! -f "$KUBECONFIG" ]; then + echo "ERROR: Kubeconfig not found at ${KUBECONFIG}" + echo "Is the gateway VM running? Start with: mise run vm" + exit 2 +fi + +if ! command -v kubectl >/dev/null 2>&1; then + echo "ERROR: kubectl not found in PATH" + exit 2 +fi + +echo "[Waiting for API server...]" +if ! wait_for_api; then + echo "ERROR: API server not reachable after ${TIMEOUT}s" + exit 2 +fi +echo "" + +# ── Node Health ──────────────────────────────────────────────────────── + +echo "[Node Health]" + +check "node exists" "node" \ + "kubectl get nodes -o name | grep -q 'node/'" + +check "node is Ready" "node" \ + "kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' | grep -q True" + +echo "" + +# ── System Pods ──────────────────────────────────────────────────────── + +echo "[System Pods]" + +check "kube-system pods running" "pods" \ + "kubectl -n kube-system get pods -o jsonpath='{.items[*].status.phase}' | grep -qv Pending" + +check "no FailedCreatePodSandBox events" "pods" \ + "! kubectl get events -A --field-selector reason=FailedCreatePodSandBox -o name 2>/dev/null | grep -q ." + +check "no CrashLoopBackOff pods" "pods" \ + "! kubectl get pods -A -o jsonpath='{.items[*].status.containerStatuses[*].state.waiting.reason}' 2>/dev/null | grep -q CrashLoopBackOff" + +echo "" + +# ── OpenShell Namespace ──────────────────────────────────────────────── + +echo "[OpenShell Namespace]" + +check "openshell namespace exists" "openshell" \ + "kubectl get namespace openshell -o name" + +check "openshell-0 pod exists" "openshell" \ + "kubectl -n openshell get pod openshell-0 -o name" + +check "openshell-0 pod is Ready" "openshell" \ + "kubectl -n openshell get pod openshell-0 -o jsonpath='{.status.conditions[?(@.type==\"Ready\")].status}' | grep -q True" + +echo "" + +# ── Networking ───────────────────────────────────────────────────────── + +echo "[Networking]" + +check "services exist" "networking" \ + "kubectl get svc -A -o name | grep -q ." + +check "kubernetes service has ClusterIP" "networking" \ + "kubectl get svc kubernetes -o jsonpath='{.spec.clusterIP}' | grep -q ." + +# Check if bridge CNI is in use (cni0 bridge exists) +CNI_PROFILE="unknown" +if kubectl exec -n openshell openshell-0 -- ip link show cni0 >/dev/null 2>&1; then + CNI_PROFILE="bridge" +else + CNI_PROFILE="legacy-vm-net" +fi +echo " CNI profile detected: ${CNI_PROFILE}" + +if [ "$CNI_PROFILE" = "bridge" ]; then + check "cni0 bridge exists in pod" "networking" \ + "kubectl exec -n openshell openshell-0 -- ip link show cni0 2>/dev/null" + + # With bridge CNI, kubernetes.default.svc should be reachable. + check "kubernetes.default.svc reachable from pod" "networking" \ + "kubectl exec -n openshell openshell-0 -- wget -q -O /dev/null --timeout=5 https://kubernetes.default.svc/healthz 2>/dev/null || kubectl exec -n openshell openshell-0 -- curl -sk --connect-timeout 5 https://kubernetes.default.svc/healthz 2>/dev/null" +else + echo " (skipping bridge-specific checks for legacy-vm-net profile)" +fi + +check "no bridge creation errors in events" "networking" \ + "! kubectl get events -A 2>/dev/null | grep -qi 'bridge.*fail\\|cni0.*error\\|FailedCreatePodSandBox.*bridge'" + +echo "" + +# ── Host Port Connectivity ───────────────────────────────────────────── + +echo "[Host Connectivity]" + +check "port 6443 (kube-apiserver) reachable" "host" \ + "timeout 5 bash -c 'echo > /dev/tcp/127.0.0.1/6443' 2>/dev/null || nc -z -w5 127.0.0.1 6443 2>/dev/null" + +check "port 30051 (gateway service) reachable" "host" \ + "timeout 5 bash -c 'echo > /dev/tcp/127.0.0.1/30051' 2>/dev/null || nc -z -w5 127.0.0.1 30051 2>/dev/null" + +echo "" + +# ── Event / Log Checks ──────────────────────────────────────────────── + +echo "[Events / Logs]" + +check "no repeated bind/listen conflicts" "events" \ + "! kubectl get events -A 2>/dev/null | grep -ci 'bind.*address already in use\\|listen.*address already in use' | grep -qv '^0$'" + +check "no hostNetwork fallback warnings" "events" \ + "! kubectl get events -A 2>/dev/null | grep -ci 'hostNetwork.*fallback' | grep -qv '^0$'" + +echo "" + +# ── Summary ──────────────────────────────────────────────────────────── + +echo "─────────────────────────────────────────────────────" +printf "Results: %d passed, %d failed\n" "$PASS" "$FAIL" +echo "CNI Profile: ${CNI_PROFILE}" +echo "" + +if [ "$FAIL" -gt 0 ]; then + echo "FAIL: ${FAIL} check(s) failed." + echo "" + echo "Debugging:" + echo " kubectl get nodes,pods -A" + echo " kubectl get events -A --sort-by=.lastTimestamp" + echo " cat ~/.local/share/openshell/gateway/console.log" + exit 1 +else + echo "PASS: All checks passed." + exit 0 +fi diff --git a/crates/openshell-vm/src/ffi.rs b/crates/openshell-vm/src/ffi.rs index c53cc47d..96c3f5de 100644 --- a/crates/openshell-vm/src/ffi.rs +++ b/crates/openshell-vm/src/ffi.rs @@ -16,6 +16,21 @@ use libloading::Library; use crate::VmError; +/// Runtime provenance information extracted from the bundle. +#[derive(Debug, Clone)] +pub struct RuntimeProvenance { + /// Path to the libkrun library that was loaded. + pub libkrun_path: PathBuf, + /// Paths to all libkrunfw libraries that were preloaded. + pub libkrunfw_paths: Vec, + /// SHA-256 hash of the primary libkrunfw artifact (if computable). + pub libkrunfw_sha256: Option, + /// Contents of provenance.json if present in the runtime bundle. + pub provenance_json: Option, + /// Whether this is a custom (OpenShell-built) runtime. + pub is_custom: bool, +} + pub const KRUN_LOG_TARGET_DEFAULT: i32 = -1; pub const KRUN_LOG_LEVEL_OFF: u32 = 0; pub const KRUN_LOG_LEVEL_ERROR: u32 = 1; @@ -73,6 +88,7 @@ pub struct LibKrun { } static LIBKRUN: OnceLock = OnceLock::new(); +static RUNTIME_PROVENANCE: OnceLock = OnceLock::new(); pub fn libkrun() -> Result<&'static LibKrun, VmError> { if let Some(lib) = LIBKRUN.get() { @@ -84,12 +100,37 @@ pub fn libkrun() -> Result<&'static LibKrun, VmError> { Ok(LIBKRUN.get().expect("libkrun should be initialized")) } +/// Return the provenance information for the loaded runtime. +/// +/// Only available after [`libkrun()`] has been called successfully. +pub fn runtime_provenance() -> Option<&'static RuntimeProvenance> { + RUNTIME_PROVENANCE.get() +} + impl LibKrun { fn load() -> Result { let path = runtime_libkrun_path()?; - preload_runtime_support_libraries(path.parent().ok_or_else(|| { + let runtime_dir = path.parent().ok_or_else(|| { VmError::HostSetup(format!("libkrun has no parent dir: {}", path.display())) - })?)?; + })?; + let krunfw_paths = preload_runtime_support_libraries(runtime_dir)?; + + // Build and store provenance information. + let provenance_json_path = runtime_dir.join("provenance.json"); + let provenance_json = fs::read_to_string(&provenance_json_path).ok(); + let is_custom = provenance_json.is_some(); + + let libkrunfw_sha256 = krunfw_paths.first().and_then(|p| compute_sha256(p).ok()); + + let provenance = RuntimeProvenance { + libkrun_path: path.clone(), + libkrunfw_paths: krunfw_paths, + libkrunfw_sha256, + provenance_json, + is_custom, + }; + let _ = RUNTIME_PROVENANCE.set(provenance); + let library = Box::leak(Box::new(unsafe { Library::new(&path).map_err(|e| { VmError::HostSetup(format!("load libkrun from {}: {e}", path.display())) @@ -123,7 +164,7 @@ fn runtime_libkrun_path() -> Result { Ok(crate::configured_runtime_dir()?.join(required_runtime_lib_name())) } -fn preload_runtime_support_libraries(runtime_dir: &Path) -> Result<(), VmError> { +fn preload_runtime_support_libraries(runtime_dir: &Path) -> Result, VmError> { let entries = fs::read_dir(runtime_dir) .map_err(|e| VmError::HostSetup(format!("read {}: {e}", runtime_dir.display())))?; @@ -149,7 +190,7 @@ fn preload_runtime_support_libraries(runtime_dir: &Path) -> Result<(), VmError> support_libs.sort(); - for path in support_libs { + for path in &support_libs { let path_cstr = std::ffi::CString::new(path.to_string_lossy().as_bytes()).map_err(|e| { VmError::HostSetup(format!( "invalid support library path {}: {e}", @@ -174,7 +215,7 @@ fn preload_runtime_support_libraries(runtime_dir: &Path) -> Result<(), VmError> } } - Ok(()) + Ok(support_libs) } fn required_runtime_lib_name() -> &'static str { @@ -188,6 +229,71 @@ fn required_runtime_lib_name() -> &'static str { } } +/// Compute SHA-256 hash of a file, returning hex string. +fn compute_sha256(path: &Path) -> Result { + use std::io::Read; + let mut file = fs::File::open(path)?; + let mut hasher = sha2_hasher(); + let mut buf = [0u8; 8192]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + hasher_update(&mut hasher, &buf[..n]); + } + Ok(hasher_finalize(hasher)) +} + +// Minimal SHA-256 using the sha2 crate if available, otherwise shell out. +// We attempt a runtime `shasum` call to avoid adding a crate dependency. +fn sha2_hasher() -> Sha256State { + Sha256State { + data: Vec::with_capacity(1024 * 1024), + } +} + +struct Sha256State { + data: Vec, +} + +fn hasher_update(state: &mut Sha256State, bytes: &[u8]) { + state.data.extend_from_slice(bytes); +} + +fn hasher_finalize(state: Sha256State) -> String { + // Use shasum via process for simplicity — avoids adding a crypto dependency. + use std::io::Write; + use std::process::{Command, Stdio}; + + let mut child = match Command::new("shasum") + .args(["-a", "256"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + { + Ok(c) => c, + Err(_) => return "unknown".to_string(), + }; + + if let Some(mut stdin) = child.stdin.take() { + let _ = stdin.write_all(&state.data); + } + + match child.wait_with_output() { + Ok(output) if output.status.success() => { + let stdout = String::from_utf8_lossy(&output.stdout); + stdout + .split_whitespace() + .next() + .unwrap_or("unknown") + .to_string() + } + _ => "unknown".to_string(), + } +} + fn load_symbol( library: &'static Library, symbol: &[u8], diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 46076ceb..b14c234f 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -285,6 +285,34 @@ fn validate_runtime_dir(dir: &Path) -> Result { } } + // Validate manifest.json if present — warn but don't fail if files + // listed in the manifest are missing (backwards compatibility). + let manifest_path = dir.join("manifest.json"); + if manifest_path.is_file() { + if let Ok(contents) = std::fs::read_to_string(&manifest_path) { + // Simple check: verify all listed files exist. + // The manifest lists files as JSON strings in a "files" array. + for line in contents.lines() { + let trimmed = line.trim().trim_matches(|c| c == '"' || c == ','); + if !trimmed.is_empty() + && !trimmed.starts_with('{') + && !trimmed.starts_with('}') + && !trimmed.starts_with('[') + && !trimmed.starts_with(']') + && !trimmed.contains(':') + { + let file_path = dir.join(trimmed); + if !file_path.exists() { + eprintln!( + "warning: manifest.json references missing file: {}", + trimmed + ); + } + } + } + } + } + Ok(gvproxy) } @@ -333,6 +361,60 @@ fn raise_nofile_limit() { } } +/// Log runtime provenance information for diagnostics. +/// +/// Prints the libkrun/libkrunfw versions, artifact hashes, and whether +/// a custom runtime is in use. This makes it easy to correlate VM issues +/// with the specific runtime bundle. +fn log_runtime_provenance(runtime_dir: &Path) { + if let Some(prov) = ffi::runtime_provenance() { + eprintln!("runtime: {}", runtime_dir.display()); + for krunfw in &prov.libkrunfw_paths { + let name = krunfw + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_else(|| "unknown".to_string()); + eprintln!(" libkrunfw: {name}"); + } + if let Some(ref sha) = prov.libkrunfw_sha256 { + let short = if sha.len() > 12 { &sha[..12] } else { sha }; + eprintln!(" sha256: {short}..."); + } + if prov.is_custom { + eprintln!(" type: custom (OpenShell-built)"); + // Parse provenance.json for additional details. + if let Some(ref json) = prov.provenance_json { + // Extract key fields without pulling in serde_json for this. + for key in &["libkrunfw_commit", "kernel_version", "build_timestamp"] { + if let Some(val) = extract_json_string(json, key) { + eprintln!(" {}: {}", key.replace('_', "-"), val); + } + } + } + } else { + eprintln!(" type: stock (system/homebrew)"); + } + } +} + +/// Simple JSON string value extractor (avoids serde_json dependency +/// for this single use case). +fn extract_json_string(json: &str, key: &str) -> Option { + let pattern = format!("\"{}\"", key); + let idx = json.find(&pattern)?; + let after_key = &json[idx + pattern.len()..]; + // Skip whitespace and colon + let after_colon = after_key.trim_start().strip_prefix(':')?; + let after_ws = after_colon.trim_start(); + if after_ws.starts_with('"') { + let value_start = &after_ws[1..]; + let end = value_start.find('"')?; + Some(value_start[..end].to_string()) + } else { + None + } +} + fn clamp_log_level(level: u32) -> u32 { match level { 0 => ffi::KRUN_LOG_LEVEL_OFF, @@ -625,6 +707,12 @@ pub fn launch(config: &VmConfig) -> Result { configure_runtime_loader_env(runtime_dir)?; raise_nofile_limit(); + // ── Log runtime provenance ───────────────────────────────────── + // After configuring the loader, trigger library loading so that + // provenance is captured before we proceed with VM configuration. + let _ = ffi::libkrun()?; + log_runtime_provenance(runtime_dir); + // ── Configure the microVM ────────────────────────────────────── let vm = VmContext::create(config.log_level)?; diff --git a/tasks/scripts/bundle-vm-runtime.sh b/tasks/scripts/bundle-vm-runtime.sh index 05a5a46f..bc91e9b3 100755 --- a/tasks/scripts/bundle-vm-runtime.sh +++ b/tasks/scripts/bundle-vm-runtime.sh @@ -51,6 +51,14 @@ if [ "${#KRUNFW_FILES[@]}" -eq 0 ]; then exit 1 fi +# Check for provenance.json (custom runtime indicator) +PROVENANCE_FILE="${LIB_DIR}/provenance.json" +IS_CUSTOM="false" +if [ -f "$PROVENANCE_FILE" ]; then + IS_CUSTOM="true" + echo "custom runtime detected (provenance.json present)" +fi + TARGETS=( "${ROOT}/target/debug" "${ROOT}/target/release" @@ -68,16 +76,25 @@ for target_dir in "${TARGETS[@]}"; do install -m 0644 "$krunfw" "${runtime_dir}/$(basename "$krunfw")" done + # Copy provenance.json if this is a custom runtime. + if [ "$IS_CUSTOM" = "true" ] && [ -f "$PROVENANCE_FILE" ]; then + install -m 0644 "$PROVENANCE_FILE" "${runtime_dir}/provenance.json" + fi + manifest_entries=() manifest_entries+=(' "libkrun.dylib"') manifest_entries+=(' "gvproxy"') for krunfw in "${KRUNFW_FILES[@]}"; do manifest_entries+=(" \"$(basename "$krunfw")\"") done + if [ "$IS_CUSTOM" = "true" ]; then + manifest_entries+=(' "provenance.json"') + fi cat > "${runtime_dir}/manifest.json" < Date: Tue, 24 Mar 2026 10:22:53 -0700 Subject: [PATCH 11/14] fix(vm): resolve CNI bridge and nftables errors for reliable pod networking Switch kube-proxy to nftables mode and add missing kernel config options (NFT_NUMGEN, NFT_FIB_IPV4/6, NFT_LIMIT, NFT_REDIR, NFT_TPROXY) plus xtables match modules required by CNI bridge masquerade. Add stale CNI state cleanup on boot (cni0 bridge, veth pairs, IPAM allocations, pod network namespaces, sandbox controller shim) to prevent 'route already exists' errors from persistent rootfs. Remove dual bridge/legacy-vm-net profile system in favor of bridge-only with fail-fast kernel validation. Drop host-mapped port 6443 (kube-apiserver) since it is not needed for normal gateway operation. Update bundle script to fall back to Homebrew for libkrun.dylib (VMM) while still requiring custom libkrunfw (kernel). --- architecture/custom-vm-runtime.md | 44 ++- crates/openshell-vm/runtime/README.md | 40 ++- .../runtime/build-custom-libkrunfw.sh | 161 ++++------ .../runtime/kernel/bridge-cni.config | 33 +- crates/openshell-vm/scripts/build-rootfs.sh | 2 +- crates/openshell-vm/scripts/gateway-init.sh | 299 ++++++++---------- crates/openshell-vm/scripts/verify-vm.sh | 23 +- crates/openshell-vm/src/lib.rs | 10 +- tasks/scripts/bundle-vm-runtime.sh | 13 +- 9 files changed, 284 insertions(+), 341 deletions(-) diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md index 0dbdaf01..7d0be284 100644 --- a/architecture/custom-vm-runtime.md +++ b/architecture/custom-vm-runtime.md @@ -7,8 +7,7 @@ lightweight microVM with Apple Hypervisor.framework (macOS) or KVM (Linux). The is embedded inside `libkrunfw`, a companion library that packages a pre-built Linux kernel. The stock `libkrunfw` from Homebrew ships a minimal kernel without bridge, netfilter, or -conntrack support. This limits Kubernetes networking to a point-to-point (ptp) CNI -configuration without kube-proxy or service VIPs. +conntrack support. This is insufficient for Kubernetes pod networking. The custom libkrunfw runtime adds bridge CNI, iptables/nftables, and conntrack support to the VM kernel, enabling standard Kubernetes networking. @@ -31,33 +30,25 @@ Host (macOS/Linux) Guest VM ├── gateway-init.sh (PID 1) -│ ├── Detects kernel capabilities -│ ├── Selects network profile: bridge | legacy-vm-net -│ ├── Configures CNI +│ ├── Validates kernel capabilities (fail-fast) +│ ├── Configures bridge CNI │ └── Execs k3s server └── check-vm-capabilities.sh (diagnostics) ``` -## Network Profiles +## Network Profile -The VM init script auto-detects kernel capabilities and selects the appropriate -networking profile. This can be overridden via `OPENSHELL_VM_NET_PROFILE`. +The VM uses the bridge CNI profile, which requires a custom libkrunfw with bridge and +netfilter kernel support. The init script validates these capabilities at boot and fails +fast with an actionable error if they are missing. -### Bridge Profile (custom runtime) +### Bridge Profile - CNI: bridge plugin with `cni0` interface -- IP masquerade: enabled (iptables) -- kube-proxy: enabled +- IP masquerade: enabled (iptables-legacy via CNI bridge plugin) +- kube-proxy: enabled (nftables mode) - Service VIPs: functional (ClusterIP, NodePort) -- hostNetwork workarounds: removed - -### Legacy VM Net Profile (stock runtime) - -- CNI: ptp plugin (point-to-point) -- IP masquerade: disabled (no iptables) -- kube-proxy: disabled -- Service VIPs: not functional (direct IP routing only) -- hostNetwork workarounds: required for some pods +- hostNetwork workarounds: not required ## Runtime Provenance @@ -98,10 +89,12 @@ libkrunfw kernel: | Netfilter | `CONFIG_NETFILTER` | iptables/nftables framework | | Connection tracking | `CONFIG_NF_CONNTRACK` | NAT state tracking | | NAT | `CONFIG_NF_NAT` | Service VIP DNAT/SNAT | -| iptables | `CONFIG_IP_NF_IPTABLES` | kube-proxy iptables mode | -| nftables | `CONFIG_NF_TABLES` | kube-proxy nft mode (future) | +| iptables | `CONFIG_IP_NF_IPTABLES` | CNI bridge masquerade | +| nftables | `CONFIG_NF_TABLES` | kube-proxy nftables mode (primary) | | veth | `CONFIG_VETH` | Pod network namespace pairs | | IPVS | `CONFIG_IP_VS` | kube-proxy IPVS mode (optional) | +| Landlock | `CONFIG_SECURITY_LANDLOCK` | Filesystem sandboxing support | +| Seccomp filter | `CONFIG_SECCOMP_FILTER` | Syscall filtering support | ## Verification @@ -116,10 +109,9 @@ Two verification tools are provided: ## Rollout Strategy 1. Custom runtime support is opt-in via `OPENSHELL_VM_RUNTIME_SOURCE_DIR`. -2. Auto-detection selects the correct network profile at boot. -3. The stock runtime path (`legacy-vm-net`) remains the default until the custom - runtime is proven stable. -4. Rollback: unset the env var and re-bundle with stock libraries. +2. The init script validates kernel capabilities at boot and fails fast if missing. +3. Rollback: unset the env var and re-bundle with stock libraries (note: stock + libraries lack bridge/netfilter and pod networking will not work). ## Related Files diff --git a/crates/openshell-vm/runtime/README.md b/crates/openshell-vm/runtime/README.md index 891d1a23..e8289685 100644 --- a/crates/openshell-vm/runtime/README.md +++ b/crates/openshell-vm/runtime/README.md @@ -9,11 +9,11 @@ The stock `libkrunfw` (from Homebrew) ships a kernel without bridge, netfilter, or conntrack support. This means the VM cannot: - Create `cni0` bridge interfaces (required by the bridge CNI plugin) -- Run kube-proxy (requires iptables/nftables) +- Run kube-proxy (requires nftables) - Route service VIP traffic (requires NAT/conntrack) The custom runtime builds libkrunfw with an additional kernel config fragment -that enables these features. +that enables these networking and sandboxing features. ## Directory Structure @@ -21,7 +21,7 @@ that enables these features. runtime/ build-custom-libkrunfw.sh # Build script for custom libkrunfw kernel/ - bridge-cni.config # Kernel config fragment (bridge + netfilter) + bridge-cni.config # Kernel config fragment (networking + sandboxing) ``` ## Building @@ -66,23 +66,16 @@ mise run vm:bundle-runtime mise run vm ``` -## Network Profiles +## Networking -The VM init script (`gateway-init.sh`) auto-detects the kernel capabilities -and selects the appropriate networking profile: +The VM uses bridge CNI for pod networking with nftables-mode kube-proxy for +service VIP / ClusterIP support. The kernel config fragment enables both +iptables (for CNI bridge masquerade) and nftables (for kube-proxy). -| Profile | Kernel | CNI | kube-proxy | Service VIPs | -|---------|--------|-----|------------|--------------| -| `bridge` | Custom (bridge+netfilter) | bridge CNI (`cni0`) | Enabled | Yes | -| `legacy-vm-net` | Stock (no netfilter) | ptp CNI | Disabled | No (direct IP) | - -To force a specific profile: - -```bash -# Inside the VM (set in gateway-init.sh env): -export OPENSHELL_VM_NET_PROFILE=bridge # Force bridge CNI -export OPENSHELL_VM_NET_PROFILE=legacy-vm-net # Force legacy ptp CNI -``` +k3s is started with `--kube-proxy-arg=proxy-mode=nftables` because the +bundled iptables binaries in k3s have revision-negotiation issues with the +libkrun kernel's xt_MARK module. nftables mode uses the kernel's nf_tables +subsystem directly and avoids this entirely. ## Runtime Provenance @@ -158,14 +151,17 @@ If this fails, you are running the stock runtime. Build and use the custom one. ### kube-proxy CrashLoopBackOff -The kernel does not have netfilter support. Verify: +kube-proxy runs in nftables mode. If it crashes, verify nftables support: ```bash # Inside VM: -iptables -L -n +nft list ruleset ``` -If this fails with "iptables not found" or "modprobe: can't change directory", -the kernel lacks CONFIG_NETFILTER. Use the custom runtime. +If this fails, the kernel may lack `CONFIG_NF_TABLES`. Use the custom runtime. + +Common errors: +- `unknown option "--xor-mark"`: kube-proxy is running in iptables mode instead + of nftables. Verify `--kube-proxy-arg=proxy-mode=nftables` is in the k3s args. ### Runtime mismatch after upgrade diff --git a/crates/openshell-vm/runtime/build-custom-libkrunfw.sh b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh index ab2aa1bf..a69fc0c1 100755 --- a/crates/openshell-vm/runtime/build-custom-libkrunfw.sh +++ b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh @@ -152,6 +152,13 @@ REQUIRED=( CONFIG_IP_NF_IPTABLES CONFIG_IP_NF_FILTER CONFIG_IP_NF_NAT + CONFIG_NF_TABLES + CONFIG_NFT_NUMGEN + CONFIG_NFT_FIB_IPV4 + CONFIG_NFT_FIB_IPV6 + CONFIG_NFT_CT + CONFIG_NFT_NAT + CONFIG_NFT_MASQ CONFIG_VETH CONFIG_NET_NS ) @@ -195,103 +202,75 @@ fi echo " Kernel source dir: ${KERNEL_DIR_NAME}" if [ "$(uname -s)" = "Darwin" ]; then - # On macOS, the entire kernel build (extract, patch, config, compile) - # must happen inside a Linux container since the kernel build system - # requires a Linux toolchain. We mount the checkout, inject the - # config fragment, and produce kernel.c inside the container. - - if ! command -v docker &>/dev/null; then - echo "ERROR: docker is required to build the kernel on macOS" >&2 + # On macOS, use krunvm to build the kernel inside a lightweight Linux VM. + # This matches the upstream libkrunfw build approach and avoids all the + # issues with Docker emulation and APFS filesystem limitations. + # + # Prerequisites: brew tap slp/krun && brew install krunvm + + if ! command -v krunvm &>/dev/null; then + echo "ERROR: krunvm is required to build the kernel on macOS" >&2 + echo " Install with: brew tap slp/krun && brew install krunvm" >&2 exit 1 fi - echo "==> Building kernel inside Docker (macOS detected)..." - - # Pre-build a Docker image with all build dependencies so the - # (potentially emulated) apt-get/pip work is cached across runs. - BUILDER_IMAGE="libkrunfw-builder:bookworm" - if ! docker image inspect "${BUILDER_IMAGE}" >/dev/null 2>&1; then - echo " Building Docker toolchain image (first time only)..." - docker build --platform linux/arm64 -t "${BUILDER_IMAGE}" - <<'DOCKERFILE' -FROM debian:bookworm-slim -RUN apt-get update -qq && \ - apt-get install -y -qq make gcc bc flex bison libelf-dev python3 \ - coreutils curl xz-utils patch libssl-dev cpio >/dev/null 2>&1 && \ - rm -rf /var/lib/apt/lists/* -DOCKERFILE + echo "==> Building kernel inside krunvm (macOS detected)..." + + VM_NAME="libkrunfw-openshell" + + # Clean up any leftover VM from a previous failed run + krunvm delete "${VM_NAME}" 2>/dev/null || true + + # Copy the config fragment into the libkrunfw tree so the VM can see it. + # The merge hook (MERGE_HOOK) is already written there by the cat above. + cp -f "${KERNEL_CONFIG_FRAGMENT}" "${LIBKRUNFW_DIR}/openshell-bridge-cni.config" + + echo " Creating VM..." + # krunvm may print "The volume has been configured" on first use of a + # volume path and exit non-zero. Retry once if that happens. + if ! krunvm create fedora \ + --name "${VM_NAME}" \ + --cpus 4 \ + --mem 4096 \ + -v "${LIBKRUNFW_DIR}:/work" \ + -w /work; then + echo " Retrying VM creation..." + krunvm create fedora \ + --name "${VM_NAME}" \ + --cpus 4 \ + --mem 4096 \ + -v "${LIBKRUNFW_DIR}:/work" \ + -w /work fi - # Stage only the files the build needs into a temp dir so we - # avoid copying the enormous kernel source tree over virtiofs. - DOCKER_STAGE=$(mktemp -d) - trap 'rm -rf "${DOCKER_STAGE}"' EXIT - - echo " Staging build inputs..." - cp "${LIBKRUNFW_DIR}/Makefile" "${DOCKER_STAGE}/" - cp "${LIBKRUNFW_DIR}/bin2cbundle.py" "${DOCKER_STAGE}/" - # Copy base kernel config(s) and patches - for f in "${LIBKRUNFW_DIR}"/config-libkrunfw*; do - [ -f "$f" ] && cp "$f" "${DOCKER_STAGE}/" - done - if [ -d "${LIBKRUNFW_DIR}/patches" ]; then - cp -r "${LIBKRUNFW_DIR}/patches" "${DOCKER_STAGE}/patches" - fi - if [ -d "${LIBKRUNFW_DIR}/patches-tee" ]; then - cp -r "${LIBKRUNFW_DIR}/patches-tee" "${DOCKER_STAGE}/patches-tee" - fi - # Copy the cached tarball if present so we don't re-download - if [ -d "${LIBKRUNFW_DIR}/tarballs" ]; then - cp -r "${LIBKRUNFW_DIR}/tarballs" "${DOCKER_STAGE}/tarballs" - fi + echo " Installing build dependencies..." + krunvm start "${VM_NAME}" /usr/bin/dnf -- install -y \ + 'dnf-command(builddep)' python3-pyelftools + + krunvm start "${VM_NAME}" /usr/bin/dnf -- builddep -y kernel + + # Step 1: prepare kernel sources (download, extract, patch, base config) + echo " Preparing kernel sources..." + krunvm start "${VM_NAME}" /usr/bin/make -- "${KERNEL_DIR_NAME}" + + # Step 2: merge the OpenShell config fragment + echo " Merging OpenShell kernel config fragment..." + krunvm start "${VM_NAME}" /usr/bin/bash -- \ + /work/openshell-kconfig-hook.sh "/work/${KERNEL_DIR_NAME}" /work/openshell-bridge-cni.config + + # Step 3: build the kernel and generate the C bundle + echo " Building kernel (this is the slow part)..." + krunvm start "${VM_NAME}" /usr/bin/make -- -j4 - docker run --rm \ - -v "${DOCKER_STAGE}:/work" \ - -v "${KERNEL_CONFIG_FRAGMENT}:/fragment/bridge-cni.config:ro" \ - -v "${MERGE_HOOK}:/fragment/merge-hook.sh:ro" \ - -w /build \ - --platform linux/arm64 \ - "${BUILDER_IMAGE}" \ - bash -c ' - set -euo pipefail - - KDIR="'"${KERNEL_DIR_NAME}"'" - - # Copy staged inputs to container-local filesystem so tar - # extraction does not hit macOS APFS bind-mount limitations. - echo " Copying build inputs to container filesystem..." - cp -r /work/* /build/ - - # Patch bin2cbundle.py to make the elftools import lazy. - # We only use -t Image (raw path) which does not need elftools, - # but the top-level import would fail without pyelftools installed. - sed -i "s/^from elftools/# lazy: from elftools/" bin2cbundle.py - - # Step 1: prepare kernel sources (download, extract, patch, base config, olddefconfig) - echo " Preparing kernel sources..." - make "$KDIR" - - # Step 2: merge the OpenShell config fragment - echo " Merging OpenShell kernel config fragment..." - bash /fragment/merge-hook.sh "/build/$KDIR" /fragment/bridge-cni.config - - # Step 3: build the kernel and generate the C bundle - echo " Building kernel (this is the slow part)..." - make -j"$(nproc)" "$KDIR"/arch/arm64/boot/Image - - echo " Generating kernel.c bundle..." - python3 bin2cbundle.py -t Image "$KDIR"/arch/arm64/boot/Image kernel.c - - # Copy artifacts back to the bind-mounted staging dir - echo " Copying artifacts back to host..." - cp /build/kernel.c /work/kernel.c - cp /build/"$KDIR"/.config /work/kernel.config - ' - - # Move artifacts from staging dir to the libkrunfw checkout - cp "${DOCKER_STAGE}/kernel.c" "${LIBKRUNFW_DIR}/kernel.c" - if [ -f "${DOCKER_STAGE}/kernel.config" ]; then - mkdir -p "${LIBKRUNFW_DIR}/${KERNEL_DIR_NAME}" - cp "${DOCKER_STAGE}/kernel.config" "${LIBKRUNFW_DIR}/${KERNEL_DIR_NAME}/.config" + echo " Cleaning up VM..." + krunvm delete "${VM_NAME}" + + # Clean up temp files from the libkrunfw tree + rm -f "${LIBKRUNFW_DIR}/openshell-bridge-cni.config" + + if [ ! -f "${LIBKRUNFW_DIR}/kernel.c" ]; then + echo "ERROR: kernel.c was not produced — build failed" >&2 + exit 1 fi # Compile the shared library on the host (uses host cc for a .dylib) diff --git a/crates/openshell-vm/runtime/kernel/bridge-cni.config b/crates/openshell-vm/runtime/kernel/bridge-cni.config index 2e484b07..7b9610e3 100644 --- a/crates/openshell-vm/runtime/kernel/bridge-cni.config +++ b/crates/openshell-vm/runtime/kernel/bridge-cni.config @@ -35,11 +35,27 @@ CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y +# ── Netfilter xtables match modules (required by kube-proxy & kubelet) ─ +# kube-proxy uses xt_conntrack for stateful rules and xt_comment for +# labeling chains. Without these, iptables fails with: +# "Couldn't load match 'conntrack': No such file or directory" +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y +CONFIG_NETFILTER_XT_MATCH_RECENT=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y + # ── NAT (required for service VIP / DNAT / SNAT) ────────────────────── CONFIG_NF_NAT=y CONFIG_NF_NAT_MASQUERADE_IPV4=y -# ── iptables (kube-proxy iptables mode) ──────────────────────────────── +# ── iptables (CNI bridge masquerade + compat) ────────────────────────── CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_NAT=y @@ -47,7 +63,9 @@ CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_REJECT=y -# ── nftables (kube-proxy nft mode, future-proofing) ──────────────────── +# ── nftables (kube-proxy nftables mode — primary proxy backend) ───────── +# kube-proxy nftables proxier requires: numgen (random LB), fib (local +# address detection), counter, ct, nat, masq, reject, limit, redir. CONFIG_NF_TABLES=y CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=y @@ -55,7 +73,12 @@ CONFIG_NFT_NAT=y CONFIG_NFT_MASQ=y CONFIG_NFT_REJECT=y CONFIG_NFT_COMPAT=y -CONFIG_NFT_COUNTER=y +CONFIG_NFT_NUMGEN=y +CONFIG_NFT_FIB_IPV4=y +CONFIG_NFT_FIB_IPV6=y +CONFIG_NFT_LIMIT=y +CONFIG_NFT_REDIR=y +CONFIG_NFT_TPROXY=y # ── IP forwarding and routing (required for pod-to-pod) ──────────────── CONFIG_IP_ADVANCED_ROUTER=y @@ -90,3 +113,7 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PIDS=y CONFIG_MEMCG=y + +# ── Security features required by the sandbox runtime ─────────────────── +CONFIG_SECURITY_LANDLOCK=y +CONFIG_SECCOMP_FILTER=y diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh index d2fed06e..4c406497 100755 --- a/crates/openshell-vm/scripts/build-rootfs.sh +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -65,7 +65,7 @@ docker rm -f "${INIT_CONTAINER_NAME}" 2>/dev/null || true echo "==> Building base image..." docker build --platform linux/arm64 -t "${BASE_IMAGE_TAG}" -f - . <<'DOCKERFILE' -FROM ubuntu:22.04 +FROM nvcr.io/nvidia/base/ubuntu:noble-20251013 RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ diff --git a/crates/openshell-vm/scripts/gateway-init.sh b/crates/openshell-vm/scripts/gateway-init.sh index 0d6c9274..ab1b8c08 100755 --- a/crates/openshell-vm/scripts/gateway-init.sh +++ b/crates/openshell-vm/scripts/gateway-init.sh @@ -153,14 +153,16 @@ find /run -name '*.sock' -delete 2>/dev/null || true # # The kine (SQLite) DB cleanup in build-rootfs.sh already removes stale # pod/sandbox records from k3s etcd, preventing kubelet from reconciling -# against stale sandboxes. Containerd's internal sandbox records in -# meta.db are harmless because the CRI plugin reconciles with kubelet -# on startup — any sandboxes unknown to kubelet are cleaned up gracefully -# without triggering SandboxChanged events. +# against stale sandboxes. CONTAINERD_DIR="/var/lib/rancher/k3s/agent/containerd" if [ -d "$CONTAINERD_DIR" ]; then # Remove runtime task state (stale shim PIDs, sockets from dead processes). rm -rf "${CONTAINERD_DIR}/io.containerd.runtime.v2.task" 2>/dev/null || true + # Remove sandbox controller shim state. Stale sandbox records cause + # containerd to reuse network namespaces from previous boots, which + # already have routes configured. The CNI bridge plugin then fails + # with "file exists" when adding the default route on retry. + rm -rf "${CONTAINERD_DIR}/io.containerd.sandbox.controller.v1.shim" 2>/dev/null || true # Clean stale ingest temp files from the content store. rm -rf "${CONTAINERD_DIR}/io.containerd.content.v1.content/ingest" 2>/dev/null || true mkdir -p "${CONTAINERD_DIR}/io.containerd.content.v1.content/ingest" @@ -171,6 +173,83 @@ rm -rf /run/k3s 2>/dev/null || true ts "stale artifacts cleaned" +# ── Clean stale CNI / pod networking state ────────────────────────────── +# The rootfs persists across VM restarts via virtio-fs. Previous pod +# sandboxes leave behind veth pairs, bridge routes, host-local IPAM +# allocations, and network namespaces. If not cleaned, the bridge CNI +# plugin fails with: +# "failed to add route ... file exists" +# because the default route via cni0 already exists from the prior boot, +# or a stale network namespace already has the route configured. + +# Tear down the CNI bridge and its associated routes. +if ip link show cni0 >/dev/null 2>&1; then + ip link set cni0 down 2>/dev/null || true + ip link delete cni0 2>/dev/null || true + ts "deleted stale cni0 bridge" +fi + +# Remove any leftover veth pairs (CNI bridge plugin creates vethXXXX). +for veth in $(ip -o link show type veth 2>/dev/null | awk -F': ' '{print $2}' | cut -d'@' -f1); do + ip link delete "$veth" 2>/dev/null || true +done + +# Flush host-local IPAM allocations so IPs can be reassigned cleanly. +rm -rf /var/lib/cni/networks 2>/dev/null || true +rm -rf /var/lib/cni/results 2>/dev/null || true + +# Flush any stale CNI-added routes for the pod CIDR. These can conflict +# with routes the bridge plugin tries to add on the next boot. +ip route flush 10.42.0.0/24 2>/dev/null || true + +# Clean up stale pod network namespaces from previous boots. Containerd +# creates named netns under /var/run/netns/ for each pod sandbox. If +# these persist across VM restarts, the CNI bridge plugin fails when +# adding routes because the stale netns already has the default route +# configured from the prior boot. Removing all named network namespaces +# forces containerd to create fresh ones. +if [ -d /var/run/netns ]; then + for ns in $(ip netns list 2>/dev/null | awk '{print $1}'); do + ip netns delete "$ns" 2>/dev/null || true + done +fi +# Also clean the netns bind-mount directory used by containerd/CRI. +# Containerd may use /run/netns/ or /var/run/netns/ (same via tmpfs). +rm -rf /run/netns/* 2>/dev/null || true +rm -rf /var/run/netns/* 2>/dev/null || true + +ts "stale CNI networking state cleaned" + +# ── Network profile detection ─────────────────────────────────────────── +# Detect early so manifest patching and k3s flags both use the same value. +# +# "bridge" is the only supported profile. It requires a custom libkrunfw +# with CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_NAT built in. If the +# kernel lacks these capabilities the VM cannot run pod networking and we +# fail fast with an actionable error. + +NET_PROFILE="bridge" + +ts "network profile: ${NET_PROFILE}" + +# Validate that the kernel actually has the required capabilities. +_caps_ok=true +if ! ip link add _cap_br0 type bridge 2>/dev/null; then + echo "ERROR: kernel lacks bridge support (CONFIG_BRIDGE). Use a custom libkrunfw." >&2 + _caps_ok=false +else + ip link del _cap_br0 2>/dev/null || true +fi +if [ ! -d /proc/sys/net/netfilter ] && [ ! -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + echo "ERROR: kernel lacks netfilter support (CONFIG_NETFILTER). Use a custom libkrunfw." >&2 + _caps_ok=false +fi +if [ "$_caps_ok" = false ]; then + echo "FATAL: required kernel capabilities missing — cannot configure pod networking." >&2 + echo "See: architecture/custom-vm-runtime.md for build instructions." >&2 + exit 1 +fi + # ── Deploy bundled manifests (cold boot only) ─────────────────────────── # On pre-initialized rootfs, manifests are already in place from the # build-time k3s boot. Skip this entirely for fast startup. @@ -211,15 +290,13 @@ if [ -f "$HELMCHART" ]; then # Use pre-loaded images — don't pull from registry. sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART" - if [ "$NET_PROFILE" = "bridge" ]; then - # Bridge CNI: pods use normal pod networking, not hostNetwork. - sed -i 's|__HOST_NETWORK__|false|g' "$HELMCHART" - sed -i 's|__AUTOMOUNT_SA_TOKEN__|true|g' "$HELMCHART" - else - # Legacy: VM bootstrap runs without CNI bridge networking. - sed -i 's|__HOST_NETWORK__|true|g' "$HELMCHART" - sed -i 's|__AUTOMOUNT_SA_TOKEN__|false|g' "$HELMCHART" - fi + # Bridge CNI: pods use normal pod networking, not hostNetwork. + # The pre-init in build-rootfs.sh replaces __HOST_NETWORK__ with "true" + # for Docker container networking. At VM boot with bridge CNI we need + # to override it back to "false" so pods use the CNI bridge network. + sed -i 's|hostNetwork: true|hostNetwork: false|g' "$HELMCHART" + sed -i 's|__HOST_NETWORK__|false|g' "$HELMCHART" + sed -i 's|__AUTOMOUNT_SA_TOKEN__|true|g' "$HELMCHART" sed -i 's|__KUBECONFIG_HOST_PATH__|"/etc/rancher/k3s"|g' "$HELMCHART" sed -i 's|__PERSISTENCE_ENABLED__|false|g' "$HELMCHART" @@ -231,115 +308,34 @@ fi AGENT_MANIFEST="$K3S_MANIFESTS/agent-sandbox.yaml" if [ -f "$AGENT_MANIFEST" ]; then - if [ "$NET_PROFILE" = "bridge" ]; then - # Bridge CNI: agent-sandbox uses normal pod networking. - # kube-proxy is enabled so kubernetes.default.svc is reachable - # via ClusterIP — no need for KUBERNETES_SERVICE_HOST override. - sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" - sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" - ts "agent-sandbox: using pod networking (bridge profile)" - else - # Legacy: keep agent-sandbox on pod networking to avoid host port - # clashes. Point in-cluster client traffic at the API server node - # IP because kube-proxy is disabled in VM mode. - sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" - sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" - if ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then - sed -i 's|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0|image: registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0\ - args:\ - - -metrics-bind-address=:8082\ - env:\ - - name: KUBERNETES_SERVICE_HOST\ - value: 192.168.127.2\ - - name: KUBERNETES_SERVICE_PORT\ - value: "6443"|g' "$AGENT_MANIFEST" - else - sed -i 's|value: 127.0.0.1|value: 192.168.127.2|g' "$AGENT_MANIFEST" - fi - if grep -q 'hostNetwork: true' "$AGENT_MANIFEST" \ - || grep -q 'ClusterFirstWithHostNet' "$AGENT_MANIFEST" \ - || ! grep -q 'KUBERNETES_SERVICE_HOST' "$AGENT_MANIFEST" \ - || ! grep -q 'metrics-bind-address=:8082' "$AGENT_MANIFEST"; then - echo "ERROR: failed to patch agent-sandbox manifest for VM networking constraints: $AGENT_MANIFEST" >&2 - exit 1 - fi - ts "agent-sandbox: patched for legacy-vm-net (API server override)" - fi -fi - -# local-storage implies local-path-provisioner. In legacy mode it -# requires CNI bridge networking that is unavailable. In bridge mode -# it can work but we leave it disabled for now until validated. -if [ "$NET_PROFILE" != "bridge" ]; then - rm -f "$K3S_MANIFESTS/local-storage.yaml" 2>/dev/null || true -fi - -# ── CNI configuration ─────────────────────────────────────────────────── -# Two networking profiles: -# -# 1. "bridge" (default when kernel supports it): -# Uses the bridge CNI plugin with iptables masquerade. Requires -# CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_NAT in the VM kernel. -# This is the standard Kubernetes CNI path — compatible with -# kube-proxy, service VIPs, and portmap. -# -# 2. "legacy-vm-net" (fallback for stock libkrunfw without netfilter): -# Uses ptp CNI without iptables. No masquerade, no portmap. -# kube-proxy must be disabled. This is the original VM path. -# -# The profile is auto-detected from kernel capabilities but can be -# forced via OPENSHELL_VM_NET_PROFILE=bridge|legacy-vm-net. - -NET_PROFILE="${OPENSHELL_VM_NET_PROFILE:-auto}" - -detect_net_profile() { - # Check for bridge + netfilter kernel support. - # If we can create a bridge and the netfilter sysctl exists, use bridge CNI. - local has_bridge=false - local has_netfilter=false - - if ip link add _probe_br0 type bridge 2>/dev/null; then - ip link del _probe_br0 2>/dev/null || true - has_bridge=true - fi - - if [ -d /proc/sys/net/netfilter ] || [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then - has_netfilter=true - fi - - if [ "$has_bridge" = true ] && [ "$has_netfilter" = true ]; then - echo "bridge" - else - echo "legacy-vm-net" - fi -} - -if [ "$NET_PROFILE" = "auto" ]; then - NET_PROFILE=$(detect_net_profile) + # Bridge CNI: agent-sandbox uses normal pod networking. + # kube-proxy is enabled so kubernetes.default.svc is reachable + # via ClusterIP — no need for KUBERNETES_SERVICE_HOST override. + sed -i '/hostNetwork: true/d' "$AGENT_MANIFEST" + sed -i '/dnsPolicy: ClusterFirstWithHostNet/d' "$AGENT_MANIFEST" + ts "agent-sandbox: using pod networking (bridge profile)" fi -ts "network profile: ${NET_PROFILE}" +# ── CNI configuration (bridge) ────────────────────────────────────────── +# Uses the bridge CNI plugin with iptables masquerade. Requires +# CONFIG_BRIDGE, CONFIG_NETFILTER, CONFIG_NF_NAT in the VM kernel +# (validated above at boot). kube-proxy uses nftables mode for service +# VIP routing. CNI_CONF_DIR="/etc/cni/net.d" CNI_BIN_DIR="/opt/cni/bin" mkdir -p "$CNI_CONF_DIR" "$CNI_BIN_DIR" -if [ "$NET_PROFILE" = "bridge" ]; then - # ── Bridge CNI (full Kubernetes networking) ───────────────────── - # This path requires a custom libkrunfw with bridge + netfilter - # kernel support. Creates a cni0 bridge, uses iptables masquerade, - # and is compatible with kube-proxy. - - # Enable IP forwarding (required for masquerade). - echo 1 > /proc/sys/net/ipv4/ip_forward 2>/dev/null || true +# Enable IP forwarding (required for masquerade). +echo 1 > /proc/sys/net/ipv4/ip_forward 2>/dev/null || true - # Enable bridge netfilter call (required for kube-proxy to see - # bridged traffic). - if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then - echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables 2>/dev/null || true - fi +# Enable bridge netfilter call (required for CNI bridge masquerade to +# see bridged traffic). +if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables 2>/dev/null || true +fi - cat > "$CNI_CONF_DIR/10-bridge.conflist" << 'CNICFG' +cat > "$CNI_CONF_DIR/10-bridge.conflist" << 'CNICFG' { "cniVersion": "1.0.0", "name": "bridge", @@ -353,8 +349,7 @@ if [ "$NET_PROFILE" = "bridge" ]; then "hairpinMode": true, "ipam": { "type": "host-local", - "ranges": [[{ "subnet": "10.42.0.0/24" }]], - "routes": [{ "dst": "0.0.0.0/0" }] + "ranges": [[{ "subnet": "10.42.0.0/24" }]] } }, { @@ -369,53 +364,19 @@ if [ "$NET_PROFILE" = "bridge" ]; then } CNICFG - # Remove any legacy ptp config. - rm -f "$CNI_CONF_DIR/10-ptp.conflist" 2>/dev/null || true - - ts "bridge CNI configured (cni0 + iptables masquerade)" -else - # ── Legacy ptp CNI (iptables-free) ───────────────────────────── - # The libkrun VM kernel has no netfilter/iptables support. Flannel's - # masquerade rules and kube-proxy both require iptables and crash - # without it. We disable both and use a simple ptp CNI with - # host-local IPAM instead. This avoids linux bridge requirements. - # - # ipMasq=false avoids any iptables calls in the plugin. - # portmap plugin removed — it requires iptables for DNAT rules. - - cat > "$CNI_CONF_DIR/10-ptp.conflist" << 'CNICFG' -{ - "cniVersion": "1.0.0", - "name": "ptp", - "plugins": [ - { - "type": "ptp", - "ipMasq": false, - "ipam": { - "type": "host-local", - "ranges": [[{ "subnet": "10.42.0.0/24" }]], - "routes": [{ "dst": "0.0.0.0/0" }] - } - }, - { - "type": "loopback" - } - ] -} -CNICFG - - # Remove any bridge config. - rm -f "$CNI_CONF_DIR/10-bridge.conflist" 2>/dev/null || true +# Remove any stale legacy ptp config. +rm -f "$CNI_CONF_DIR/10-ptp.conflist" 2>/dev/null || true - ts "ptp CNI configured (iptables-free, no linux bridge)" -fi +ts "bridge CNI configured (cni0 + iptables masquerade)" # Symlink k3s-bundled CNI binaries to the default containerd bin path. # k3s extracts its tools to /var/lib/rancher/k3s/data//bin/. +# Use -e (not -f) because k3s ships these as symlinks to a `cni` multicall +# binary. K3S_DATA_BIN=$(find /var/lib/rancher/k3s/data -maxdepth 2 -name bin -type d 2>/dev/null | head -1) if [ -n "$K3S_DATA_BIN" ]; then - for plugin in bridge ptp host-local loopback bandwidth portmap; do - [ -f "$K3S_DATA_BIN/$plugin" ] && ln -sf "$K3S_DATA_BIN/$plugin" "$CNI_BIN_DIR/$plugin" + for plugin in bridge host-local loopback bandwidth portmap; do + [ -e "$K3S_DATA_BIN/$plugin" ] && ln -sf "$K3S_DATA_BIN/$plugin" "$CNI_BIN_DIR/$plugin" done ts "CNI binaries linked from $K3S_DATA_BIN" else @@ -427,11 +388,15 @@ fi rm -f "/var/lib/rancher/k3s/agent/etc/cni/net.d/10-flannel.conflist" 2>/dev/null || true # ── Start k3s ────────────────────────────────────────────────────────── -# Flags tuned for fast single-node startup. The k3s flags vary depending -# on the network profile: +# Flags tuned for fast single-node startup. Bridge CNI handles pod +# networking; kube-proxy runs in nftables mode for service VIP / ClusterIP +# support. # -# bridge: kube-proxy enabled, flannel disabled (bridge CNI handles it) -# legacy-vm-net: kube-proxy disabled (no iptables), flannel disabled +# nftables mode: k3s bundles its own iptables binaries whose MARK target +# doesn't negotiate xt_MARK revision 2 correctly with the libkrun kernel, +# causing --xor-mark failures. nftables mode uses the kernel's nf_tables +# subsystem directly and sidesteps the issue entirely. The kernel is +# configured with CONFIG_NF_TABLES=y and related modules. K3S_ARGS=( --disable=traefik,servicelb,metrics-server,coredns @@ -443,19 +408,9 @@ K3S_ARGS=( --tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2 --flannel-backend=none --snapshotter=native + --kube-proxy-arg=proxy-mode=nftables ) -if [ "$NET_PROFILE" = "bridge" ]; then - # With bridge CNI + iptables, kube-proxy can run. Don't disable it. - # local-storage can also work with bridge networking. - ts "starting k3s server (bridge profile — kube-proxy enabled)" -else - # Legacy: no iptables means no kube-proxy and no local-storage. - K3S_ARGS+=( - --disable=local-storage - --disable-kube-proxy - ) - ts "starting k3s server (legacy-vm-net profile — kube-proxy disabled)" -fi +ts "starting k3s server (bridge CNI + nftables kube-proxy)" exec /usr/local/bin/k3s server "${K3S_ARGS[@]}" diff --git a/crates/openshell-vm/scripts/verify-vm.sh b/crates/openshell-vm/scripts/verify-vm.sh index 31e65931..a314301f 100755 --- a/crates/openshell-vm/scripts/verify-vm.sh +++ b/crates/openshell-vm/scripts/verify-vm.sh @@ -157,24 +157,18 @@ check "kubernetes service has ClusterIP" "networking" \ "kubectl get svc kubernetes -o jsonpath='{.spec.clusterIP}' | grep -q ." # Check if bridge CNI is in use (cni0 bridge exists) -CNI_PROFILE="unknown" if kubectl exec -n openshell openshell-0 -- ip link show cni0 >/dev/null 2>&1; then - CNI_PROFILE="bridge" + echo " CNI profile detected: bridge" else - CNI_PROFILE="legacy-vm-net" + echo " WARNING: cni0 bridge not detected — bridge CNI may not be running yet" fi -echo " CNI profile detected: ${CNI_PROFILE}" -if [ "$CNI_PROFILE" = "bridge" ]; then - check "cni0 bridge exists in pod" "networking" \ - "kubectl exec -n openshell openshell-0 -- ip link show cni0 2>/dev/null" +check "cni0 bridge exists in pod" "networking" \ + "kubectl exec -n openshell openshell-0 -- ip link show cni0 2>/dev/null" - # With bridge CNI, kubernetes.default.svc should be reachable. - check "kubernetes.default.svc reachable from pod" "networking" \ - "kubectl exec -n openshell openshell-0 -- wget -q -O /dev/null --timeout=5 https://kubernetes.default.svc/healthz 2>/dev/null || kubectl exec -n openshell openshell-0 -- curl -sk --connect-timeout 5 https://kubernetes.default.svc/healthz 2>/dev/null" -else - echo " (skipping bridge-specific checks for legacy-vm-net profile)" -fi +# With bridge CNI, kubernetes.default.svc should be reachable. +check "kubernetes.default.svc reachable from pod" "networking" \ + "kubectl exec -n openshell openshell-0 -- wget -q -O /dev/null --timeout=5 https://kubernetes.default.svc/healthz 2>/dev/null || kubectl exec -n openshell openshell-0 -- curl -sk --connect-timeout 5 https://kubernetes.default.svc/healthz 2>/dev/null" check "no bridge creation errors in events" "networking" \ "! kubectl get events -A 2>/dev/null | grep -qi 'bridge.*fail\\|cni0.*error\\|FailedCreatePodSandBox.*bridge'" @@ -185,9 +179,6 @@ echo "" echo "[Host Connectivity]" -check "port 6443 (kube-apiserver) reachable" "host" \ - "timeout 5 bash -c 'echo > /dev/tcp/127.0.0.1/6443' 2>/dev/null || nc -z -w5 127.0.0.1 6443 2>/dev/null" - check "port 30051 (gateway service) reachable" "host" \ "timeout 5 bash -c 'echo > /dev/tcp/127.0.0.1/30051' 2>/dev/null || nc -z -w5 127.0.0.1 30051 2>/dev/null" diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index b14c234f..e6bd5ce7 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -146,8 +146,7 @@ impl VmConfig { /// /// Runs `/srv/gateway-init.sh` which mounts essential filesystems, /// deploys the OpenShell helm chart, and execs `k3s server`. - /// Exposes the Kubernetes API on port 6443 and the OpenShell - /// gateway (`NodePort`) on port 30051. + /// Exposes the OpenShell gateway on port 30051. pub fn gateway(rootfs: PathBuf) -> Self { Self { rootfs, @@ -162,10 +161,6 @@ impl VmConfig { ], workdir: "/".to_string(), port_map: vec![ - // Map host 6443 -> guest 6444 (real kube-apiserver). - // The k3s dynamiclistener on 6443 has TLS issues through - // port forwarding, so we go directly to the apiserver. - "6443:6444".to_string(), // Navigator server — with hostNetwork the server binds // directly to port 8080 on the VM's interface, bypassing // NodePort (which requires kube-proxy / iptables). @@ -369,6 +364,7 @@ fn raise_nofile_limit() { fn log_runtime_provenance(runtime_dir: &Path) { if let Some(prov) = ffi::runtime_provenance() { eprintln!("runtime: {}", runtime_dir.display()); + eprintln!(" libkrun: {}", prov.libkrun_path.display()); for krunfw in &prov.libkrunfw_paths { let name = krunfw .file_name() @@ -993,8 +989,6 @@ pub fn launch(config: &VmConfig) -> Result { match std::fs::read_to_string(&kubeconfig_src) { Ok(contents) => { - // The kubeconfig has server: https://127.0.0.1:6443 - // which is correct since we forward host:6443 -> guest:6444. if let Err(e) = std::fs::write(&dest, &contents) { eprintln!(" failed to write kubeconfig: {e}"); } else { diff --git a/tasks/scripts/bundle-vm-runtime.sh b/tasks/scripts/bundle-vm-runtime.sh index bc91e9b3..f391363a 100755 --- a/tasks/scripts/bundle-vm-runtime.sh +++ b/tasks/scripts/bundle-vm-runtime.sh @@ -35,10 +35,19 @@ if [ -z "$GVPROXY_BIN" ]; then fi fi +# libkrun.dylib: prefer the custom runtime dir, fall back to Homebrew. +# libkrun is the VMM and does not need a custom build; only libkrunfw +# carries the custom kernel. LIBKRUN="${LIB_DIR}/libkrun.dylib" if [ ! -e "$LIBKRUN" ]; then - echo "libkrun not found at ${LIBKRUN}; set OPENSHELL_VM_RUNTIME_SOURCE_DIR" >&2 - exit 1 + BREW_PREFIX="${BREW_PREFIX:-$(brew --prefix 2>/dev/null || true)}" + if [ -n "$BREW_PREFIX" ] && [ -e "${BREW_PREFIX}/lib/libkrun.dylib" ]; then + LIBKRUN="${BREW_PREFIX}/lib/libkrun.dylib" + echo "using Homebrew libkrun at ${LIBKRUN}" + else + echo "libkrun not found at ${LIB_DIR}/libkrun.dylib or Homebrew; install libkrun or set OPENSHELL_VM_RUNTIME_SOURCE_DIR" >&2 + exit 1 + fi fi KRUNFW_FILES=() From e012e092b741298f24a53b3ea0ae527c4c53e8dd Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Tue, 24 Mar 2026 10:32:30 -0700 Subject: [PATCH 12/14] fix(vm): fix gateway readiness timeout with correct port mapping and aligned pre-bake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues caused the gateway service readiness check to time out: 1. Port mapping mismatch: gvproxy mapped host:30051 → VM:8080, but with bridge CNI the pod listens on 8080 inside its network namespace, not on the VM's root namespace. Changed to 30051:30051 so traffic flows through the NodePort service (kube-proxy nftables → pod:8080). 2. Pod cycling from helm upgrade: build-rootfs.sh pre-baked with hostNetwork=true and automountServiceAccountToken=false, but gateway-init.sh changed these at boot, triggering a HelmChart reconcile that killed the pre-baked pod ~90s in. Aligned pre-bake values (hostNetwork=false, automountServiceAccountToken=true) to match runtime, eliminating the manifest delta. --- crates/openshell-vm/scripts/build-rootfs.sh | 22 ++++++++++----------- crates/openshell-vm/src/lib.rs | 12 +++++------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh index 4c406497..e79a9c7d 100755 --- a/crates/openshell-vm/scripts/build-rootfs.sh +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -243,17 +243,17 @@ if [ -f "$HELMCHART" ]; then sed -i '' "s|server:[[:space:]]*sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest|server:\n sandboxImage: ${SANDBOX_IMAGE}|g" "$HELMCHART" 2>/dev/null || true sed -i '' "s|sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest|sandboxImage: ${SANDBOX_IMAGE}|g" "$HELMCHART" 2>/dev/null \ || sed -i "s|sandboxImage: ghcr.io/nvidia/openshell-community/sandboxes/base:latest|sandboxImage: ${SANDBOX_IMAGE}|g" "$HELMCHART" - # Enable hostNetwork for VM (no kube-proxy / iptables). - sed -i '' 's|__HOST_NETWORK__|true|g' "$HELMCHART" 2>/dev/null \ - || sed -i 's|__HOST_NETWORK__|true|g' "$HELMCHART" - # Disable SA token automount. The projected volume at - # /var/run/secrets/kubernetes.io/serviceaccount fails on sandbox - # re-creation because /var/run is a symlink to /run in the container - # image and the native snapshotter + virtiofs combination can't - # resolve it correctly on the second mount. - sed -i '' 's|__AUTOMOUNT_SA_TOKEN__|false|g' "$HELMCHART" 2>/dev/null \ - || sed -i 's|__AUTOMOUNT_SA_TOKEN__|false|g' "$HELMCHART" - # Mount the k3s kubeconfig into the pod since SA token isn't mounted. + # Bridge CNI: pods use normal pod networking, not hostNetwork. + # This must match what gateway-init.sh applies at runtime so the + # HelmChart manifest is unchanged at boot — preventing a helm + # upgrade job that would cycle the pre-baked pod. + sed -i '' 's|__HOST_NETWORK__|false|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__HOST_NETWORK__|false|g' "$HELMCHART" + # Enable SA token automount for bridge CNI mode. Must match + # gateway-init.sh runtime value to avoid manifest delta. + sed -i '' 's|__AUTOMOUNT_SA_TOKEN__|true|g' "$HELMCHART" 2>/dev/null \ + || sed -i 's|__AUTOMOUNT_SA_TOKEN__|true|g' "$HELMCHART" + # Mount the k3s kubeconfig into the pod for VM mode. sed -i '' 's|__KUBECONFIG_HOST_PATH__|"/etc/rancher/k3s"|g' "$HELMCHART" 2>/dev/null \ || sed -i 's|__KUBECONFIG_HOST_PATH__|"/etc/rancher/k3s"|g' "$HELMCHART" # Disable persistence — use /tmp for the SQLite database. PVC mounts diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index e6bd5ce7..28308598 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -161,12 +161,12 @@ impl VmConfig { ], workdir: "/".to_string(), port_map: vec![ - // Navigator server — with hostNetwork the server binds - // directly to port 8080 on the VM's interface, bypassing - // NodePort (which requires kube-proxy / iptables). - // Map host 30051 -> guest 8080 so the external-facing - // port stays the same for CLI clients. - "30051:8080".to_string(), + // Navigator server — with bridge CNI the pod listens on + // 8080 inside its own network namespace (10.42.0.x), not + // on the VM's root namespace. The NodePort service + // (kube-proxy nftables) forwards VM:30051 → pod:8080. + // gvproxy maps host:30051 → VM:30051 to complete the path. + "30051:30051".to_string(), ], vsock_ports: vec![], log_level: 3, // Info — for debugging From 9e340ea75504bde4676fd6eb95f4a550d76ca122 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Tue, 24 Mar 2026 10:56:15 -0700 Subject: [PATCH 13/14] fix(vm): restore kube-apiserver port forwarding for host-side kubectl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit (070bcca1) dropped port 6443 from the gvproxy port_map, breaking all host-side kubectl commands including the readiness check and stale pod recovery. k3s runs the API server with host networking so VM:6443 is directly reachable — restore the 6443:6443 mapping alongside the 30051:30051 NodePort mapping. --- crates/openshell-vm/src/lib.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 28308598..cc98392c 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -161,6 +161,12 @@ impl VmConfig { ], workdir: "/".to_string(), port_map: vec![ + // kube-apiserver — k3s runs the API server with host + // networking so it listens on VM:6443 directly. The + // host-side readiness check (`wait_for_gateway_service`) + // and `recover_stale_pods` both use kubectl against + // 127.0.0.1:6443 via the copied kubeconfig. + "6443:6443".to_string(), // Navigator server — with bridge CNI the pod listens on // 8080 inside its own network namespace (10.42.0.x), not // on the VM's root namespace. The NodePort service From 86d7b3d6b80244647fc14adb58019eb11c104f03 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Tue, 24 Mar 2026 11:08:17 -0700 Subject: [PATCH 14/14] refactor(vm): eliminate host-side kubectl dependency from boot pipeline Remove all kubectl calls from the host-side boot sequence, eliminating the need to forward port 6443 (kube-apiserver) outside the VM. Changes: - wait_for_gateway_service: TCP probe only (30051), no kubectl pod check - bootstrap_gateway: cold boot writes TLS secret manifests via virtio-fs into k3s auto-deploy dir instead of kubectl apply - bootstrap_gateway: warm boot skips namespace wait (TCP probe suffices) - recover_stale_pods: removed entirely (gateway-init.sh already cleans containerd runtime/sandbox state, CNI state, and network namespaces) - Kubeconfig copy moved to best-effort post-readiness (for debugging) - Port 6443 removed from gvproxy port_map Removed functions: recover_stale_pods, wait_for_namespace, apply_tls_secrets, kubectl_apply. Net: -362 lines, +147 lines. No kubectl binary required on host. --- crates/openshell-vm/src/lib.rs | 509 ++++++++++----------------------- 1 file changed, 147 insertions(+), 362 deletions(-) diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index cc98392c..3a6818f4 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -161,13 +161,7 @@ impl VmConfig { ], workdir: "/".to_string(), port_map: vec![ - // kube-apiserver — k3s runs the API server with host - // networking so it listens on VM:6443 directly. The - // host-side readiness check (`wait_for_gateway_service`) - // and `recover_stale_pods` both use kubectl against - // 127.0.0.1:6443 via the copied kubeconfig. - "6443:6443".to_string(), - // Navigator server — with bridge CNI the pod listens on + // OpenShell server — with bridge CNI the pod listens on // 8080 inside its own network namespace (10.42.0.x), not // on the VM's root namespace. The NodePort service // (kube-proxy nftables) forwards VM:30051 → pod:8080. @@ -955,88 +949,27 @@ pub fn launch(config: &VmConfig) -> Result { ); } - // Wait for k3s kubeconfig to appear (virtio-fs makes it - // visible on the host). Only do this for the gateway preset - // (when exec_path is the default init script). + // Bootstrap the OpenShell control plane and wait for the + // service to be reachable. Only for the gateway preset. if config.exec_path == "/srv/gateway-init.sh" { - let kubeconfig_src = config.rootfs.join("etc/rancher/k3s/k3s.yaml"); - let kc_start = Instant::now(); - eprintln!("Waiting for kubeconfig..."); - - // Aggressive polling initially (100ms) then back off to 1s. - // Total budget: ~90s (enough for k3s cold start). - let mut found = false; - let deadline = Instant::now() + std::time::Duration::from_secs(90); - let mut interval = std::time::Duration::from_millis(100); - while Instant::now() < deadline { - if kubeconfig_src.is_file() - && std::fs::metadata(&kubeconfig_src) - .map(|m| m.len() > 0) - .unwrap_or(false) - { - found = true; - break; - } - std::thread::sleep(interval); - interval = (interval * 2).min(std::time::Duration::from_secs(1)); - } - - if found { - eprintln!( - "Kubeconfig appeared [{:.1}s]", - kc_start.elapsed().as_secs_f64() - ); - // Copy kubeconfig to ~/.kube/gateway.yaml, rewriting - // the server URL to point at the forwarded host port. - let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); - let kube_dir = PathBuf::from(&home).join(".kube"); - let _ = std::fs::create_dir_all(&kube_dir); - let dest = kube_dir.join("gateway.yaml"); - - match std::fs::read_to_string(&kubeconfig_src) { - Ok(contents) => { - if let Err(e) = std::fs::write(&dest, &contents) { - eprintln!(" failed to write kubeconfig: {e}"); - } else { - eprintln!("Kubeconfig: {}", dest.display()); - eprintln!(" export KUBECONFIG={}", dest.display()); - } - } - Err(e) => { - eprintln!(" failed to read kubeconfig: {e}"); - } - } - - // Bootstrap the OpenShell control plane: generate PKI, - // create TLS secrets, and store cluster metadata so CLI - // clients and e2e tests can connect. - // - // If the rootfs has pre-baked PKI (from build-rootfs.sh), - // this skips the namespace wait and kubectl apply entirely. - if let Err(e) = bootstrap_gateway(&dest, &config.rootfs) { - eprintln!("Bootstrap failed: {e}"); - eprintln!( - " The VM is running but OpenShell may not be fully operational." - ); - } - } else { - eprintln!(" kubeconfig not found after 90s (k3s may still be starting)"); + // Bootstrap stores host-side metadata and mTLS creds. + // With pre-baked rootfs (Path 1) this reads PKI directly + // from virtio-fs — no kubectl or port forwarding needed. + // Cold boot (Path 2) writes secret manifests into the + // k3s auto-deploy directory via virtio-fs. + if let Err(e) = bootstrap_gateway(&config.rootfs) { + eprintln!("Bootstrap failed: {e}"); + eprintln!(" The VM is running but OpenShell may not be fully operational."); } - // On warm reboots (rootfs persists via virtio-fs), the k3s - // database may have stale pod records from the previous - // session. containerd v2 doesn't always recover these - // automatically. Force-delete any pods stuck in Unknown - // or failed state so the StatefulSet controller recreates - // them. - let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); - let kubeconfig_dest = PathBuf::from(&home).join(".kube/gateway.yaml"); - recover_stale_pods(&kubeconfig_dest); - - // Wait for the gRPC service to be reachable before - // declaring "Ready". The openshell pod needs a few - // seconds after k3s starts to bind its port. + // Wait for the gRPC service to be reachable via TCP + // probe on host:30051. This confirms the full path + // (gvproxy → kube-proxy nftables → pod:8080) is working. wait_for_gateway_service(); + + // Best-effort: copy kubeconfig for manual debugging. + // Not blocking — the boot pipeline doesn't need it. + copy_kubeconfig_best_effort(&config.rootfs); } eprintln!("Ready [{:.1}s total]", boot_start.elapsed().as_secs_f64()); @@ -1092,18 +1025,25 @@ const GATEWAY_PORT: u16 = 30051; /// Bootstrap the OpenShell control plane after k3s is ready. /// -/// Three paths, fastest first: +/// All operations use the virtio-fs rootfs — no kubectl or API server +/// port forwarding required. This avoids exposing port 6443 outside the +/// VM. /// -/// 1. **Pre-baked PKI** (from `build-rootfs.sh`): reads PEM files directly -/// from the rootfs, stores creds + metadata on the host. No cluster -/// interaction at all. Completes in <50ms. +/// Three paths, in priority order: +/// +/// 1. **Pre-baked rootfs** (from `build-rootfs.sh`): PKI files at +/// `rootfs/opt/openshell/pki/`. TLS secrets already exist in the k3s +/// database. Reads certs from the filesystem and stores metadata on the +/// host. /// /// 2. **Warm boot**: host-side metadata + mTLS certs survive across VM -/// restarts. Waits for the openshell namespace, then returns. +/// restarts. Nothing to do — service readiness is confirmed by the TCP +/// probe in `wait_for_gateway_service()`. /// -/// 3. **Cold boot**: generates fresh PKI, waits for namespace, applies -/// secrets via kubectl, stores everything on the host. -fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { +/// 3. **Cold boot**: generates fresh PKI and writes TLS secret manifests +/// into the k3s auto-deploy directory (`/var/lib/rancher/k3s/server/manifests/`) +/// via virtio-fs. k3s picks them up automatically. +fn bootstrap_gateway(rootfs: &Path) -> Result<(), VmError> { let bootstrap_start = Instant::now(); // Build gateway metadata early — it only depends on knowing the port and @@ -1167,15 +1107,10 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { // ── Path 2: Warm boot ────────────────────────────────────────── // // Host-side metadata + mTLS certs survive from a previous boot. - // Just wait for the namespace to confirm k3s is ready. - let kc = kubeconfig - .to_str() - .ok_or_else(|| VmError::InvalidPath(kubeconfig.display().to_string()))?; - + // Service readiness is confirmed by the TCP probe in + // `wait_for_gateway_service()` — no kubectl needed here. if is_warm_boot() { eprintln!("Warm boot detected — reusing existing PKI and metadata."); - eprintln!("Waiting for openshell namespace..."); - wait_for_namespace(kc)?; eprintln!( "Warm boot ready [{:.1}s]", bootstrap_start.elapsed().as_secs_f64() @@ -1187,6 +1122,10 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { } // ── Path 3: Cold boot (no pre-baked state) ───────────────────── + // + // Generate fresh PKI and write TLS secret manifests into the k3s + // auto-deploy directory via virtio-fs. k3s watches this directory + // and applies any YAML files automatically. eprintln!("Generating TLS certificates..."); let pki_bundle = openshell_bootstrap::pki::generate_pki(&[]) .map_err(|e| VmError::Bootstrap(format!("PKI generation failed: {e}")))?; @@ -1194,13 +1133,12 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { openshell_bootstrap::store_gateway_metadata(GATEWAY_CLUSTER_NAME, &metadata) .map_err(|e| VmError::Bootstrap(format!("failed to store cluster metadata: {e}")))?; - let ns_start = Instant::now(); - eprintln!("Waiting for openshell namespace..."); - wait_for_namespace(kc)?; - eprintln!("Namespace ready [{:.1}s]", ns_start.elapsed().as_secs_f64()); - - eprintln!("Creating TLS secrets..."); - apply_tls_secrets(kc, &pki_bundle)?; + // Write TLS secrets as k3s auto-deploy manifests via virtio-fs. + // k3s watches /var/lib/rancher/k3s/server/manifests/ and applies + // any YAML files dropped there, eliminating the need for kubectl + // or API server port forwarding. + eprintln!("Writing TLS secret manifests via virtio-fs..."); + write_tls_secret_manifests(rootfs, &pki_bundle)?; openshell_bootstrap::mtls::store_pki_bundle(GATEWAY_CLUSTER_NAME, &pki_bundle) .map_err(|e| VmError::Bootstrap(format!("failed to store mTLS credentials: {e}")))?; @@ -1225,9 +1163,10 @@ fn bootstrap_gateway(kubeconfig: &Path, rootfs: &Path) -> Result<(), VmError> { /// - Cluster metadata exists: `$XDG_CONFIG_HOME/openshell/gateways/gateway/metadata.json` /// - mTLS certs exist: `$XDG_CONFIG_HOME/openshell/gateways/gateway/mtls/{ca.crt,tls.crt,tls.key}` /// -/// When true, the host-side bootstrap (PKI generation, kubectl apply, metadata -/// storage) can be skipped because the virtio-fs rootfs persists k3s state -/// (TLS certs, kine/sqlite, containerd images, helm releases) across VM restarts. +/// When true, the host-side bootstrap (PKI generation, secret manifest writing, +/// metadata storage) can be skipped because the virtio-fs rootfs persists k3s +/// state (TLS certs, kine/sqlite, containerd images, helm releases) across VM +/// restarts. fn is_warm_boot() -> bool { let Ok(home) = std::env::var("HOME") else { return false; @@ -1267,50 +1206,24 @@ fn is_warm_boot() -> bool { /// across boots so the native snapshotter doesn't re-extract image layers. /// Runtime task state is cleaned by `gateway-init.sh` on each boot. /// -/// We poll kubectl for `Ready=True`, then verify with a host-side TCP -/// probe to `127.0.0.1:30051` to confirm the full gvproxy->VM->pod -/// path works. gvproxy accepts TCP connections even when nothing listens -/// in the guest, but those connections reset immediately. A connection -/// that stays open (server waiting for TLS `ClientHello`) proves the pod -/// is genuinely serving. +/// Wait for the OpenShell gRPC service to be reachable from the host. +/// +/// Polls `host_tcp_probe()` on `127.0.0.1:30051` with 1s intervals. +/// The probe confirms the full networking path: gvproxy → kube-proxy +/// nftables → pod:8080. A successful probe means the pod is running, +/// the NodePort service is routing, and the server is accepting +/// connections. No kubectl or API server access required. fn wait_for_gateway_service() { let start = Instant::now(); let timeout = std::time::Duration::from_secs(90); let poll_interval = std::time::Duration::from_secs(1); - let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); - let kubeconfig = PathBuf::from(&home).join(".kube/gateway.yaml"); - let kc = kubeconfig.to_string_lossy(); - eprintln!("Waiting for gateway service..."); loop { - // Check if the pod is Ready. - let is_ready = std::process::Command::new("kubectl") - .args(["--kubeconfig", &kc]) - .args([ - "-n", - "openshell", - "get", - "pod", - "openshell-0", - "-o", - "jsonpath={.status.conditions[?(@.type==\"Ready\")].status}", - ]) - .output() - .ok() - .filter(|o| o.status.success()) - .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string()) - .is_some_and(|s| s == "True"); - - if is_ready { - // Pod reports Ready — verify with a host-side TCP probe to - // confirm the full gvproxy -> VM -> pod path works. - if host_tcp_probe() { - eprintln!("Service healthy [{:.1}s]", start.elapsed().as_secs_f64()); - return; - } - eprintln!(" pod Ready but host TCP probe failed, retrying..."); + if host_tcp_probe() { + eprintln!("Service healthy [{:.1}s]", start.elapsed().as_secs_f64()); + return; } if start.elapsed() >= timeout { @@ -1325,95 +1238,6 @@ fn wait_for_gateway_service() { } } -/// Force-delete pods stuck in `Unknown` or failed states (safety net). -/// -/// On warm reboots (virtio-fs persists rootfs across VM restarts), the -/// k3s database retains pod records from the previous session. Containerd -/// runtime task state is cleaned but metadata (meta.db) is preserved to -/// avoid re-extracting image layers. This function is a safety net for -/// edge cases where reconciliation fails — it force-deletes pods in -/// `Unknown` or `Failed` state so controllers can recreate them. -fn recover_stale_pods(kubeconfig: &Path) { - let kc = kubeconfig.to_string_lossy(); - - // Wait briefly for the API server to be responsive. - let deadline = Instant::now() + std::time::Duration::from_secs(30); - let mut interval = std::time::Duration::from_millis(500); - loop { - if let Ok(output) = std::process::Command::new("kubectl") - .args(["--kubeconfig", &kc]) - .args(["get", "nodes", "-o", "name"]) - .output() - { - if output.status.success() { - break; - } - } - if Instant::now() >= deadline { - eprintln!(" API server not ready after 30s, skipping pod recovery"); - return; - } - std::thread::sleep(interval); - interval = (interval * 2).min(std::time::Duration::from_secs(2)); - } - - // Get all pods in a parseable format: namespace/name status - let output = std::process::Command::new("kubectl") - .args(["--kubeconfig", &kc]) - .args([ - "get", "pods", "-A", - "-o", "jsonpath={range .items[*]}{.metadata.namespace}/{.metadata.name} {.status.phase}\\n{end}", - ]) - .output(); - - let Ok(output) = output else { return }; - if !output.status.success() { - return; - } - - let stdout = String::from_utf8_lossy(&output.stdout); - let mut stale_count = 0u32; - - for line in stdout.lines() { - let parts: Vec<&str> = line.trim().split_whitespace().collect(); - if parts.len() != 2 { - continue; - } - let (ns_name, phase) = (parts[0], parts[1]); - // Delete pods in Unknown or Failed state — they can't recover - // from stale containerd sandbox state. - if phase == "Unknown" || phase == "Failed" { - let ns_and_name: Vec<&str> = ns_name.splitn(2, '/').collect(); - if ns_and_name.len() != 2 { - continue; - } - let (ns, name) = (ns_and_name[0], ns_and_name[1]); - let result = std::process::Command::new("kubectl") - .args(["--kubeconfig", &kc]) - .args([ - "-n", - ns, - "delete", - "pod", - name, - "--force", - "--grace-period=0", - ]) - .output(); - - if let Ok(r) = result { - if r.status.success() { - stale_count += 1; - } - } - } - } - - if stale_count > 0 { - eprintln!("Recovered {stale_count} stale pod(s)"); - } -} - /// Probe `127.0.0.1:30051` from the host to verify the full /// gvproxy → VM → pod path is working. /// @@ -1451,146 +1275,107 @@ fn host_tcp_probe() -> bool { } } -/// Poll kubectl until the `openshell` namespace exists. -/// -/// Uses exponential backoff (500ms → 3s) to minimize latency when the -/// namespace appears quickly while avoiding kubectl spam. -fn wait_for_namespace(kubeconfig: &str) -> Result<(), VmError> { - let start = Instant::now(); - let timeout = std::time::Duration::from_secs(180); - let mut interval = std::time::Duration::from_millis(500); - let mut attempts = 0u32; - - loop { - let output = std::process::Command::new("kubectl") - .args(["--kubeconfig", kubeconfig]) - .args(["get", "namespace", "openshell", "-o", "name"]) - .output(); - - if let Ok(output) = output - && output.status.success() - { - let stdout = String::from_utf8_lossy(&output.stdout); - if stdout.contains("openshell") { - return Ok(()); - } - } - - if start.elapsed() >= timeout { - return Err(VmError::Bootstrap( - "timed out waiting for openshell namespace (180s). \ - Check console.log for k3s errors." - .to_string(), - )); - } - - attempts += 1; - if attempts.is_multiple_of(10) { - eprintln!( - " still waiting for openshell namespace ({:.0}s elapsed)", - start.elapsed().as_secs_f64() - ); - } - - std::thread::sleep(interval); - interval = (interval * 2).min(std::time::Duration::from_secs(3)); - } -} - -/// Apply the three TLS K8s secrets required by the OpenShell server. +/// Write TLS secret manifests into the k3s auto-deploy directory via virtio-fs. /// -/// Uses `kubectl apply -f -` on the host, piping JSON manifests via stdin. -fn apply_tls_secrets( - kubeconfig: &str, +/// k3s watches `/var/lib/rancher/k3s/server/manifests/` and automatically +/// applies any YAML files placed there. This avoids the need for kubectl +/// or API server port forwarding from the host. +fn write_tls_secret_manifests( + rootfs: &Path, bundle: &openshell_bootstrap::pki::PkiBundle, ) -> Result<(), VmError> { use base64::Engine; use base64::engine::general_purpose::STANDARD; - let secrets = [ - // 1. openshell-server-tls (kubernetes.io/tls) - serde_json::json!({ - "apiVersion": "v1", - "kind": "Secret", - "metadata": { - "name": openshell_bootstrap::constants::SERVER_TLS_SECRET_NAME, - "namespace": "openshell" - }, - "type": "kubernetes.io/tls", - "data": { - "tls.crt": STANDARD.encode(&bundle.server_cert_pem), - "tls.key": STANDARD.encode(&bundle.server_key_pem) - } - }), - // 2. openshell-server-client-ca (Opaque) - serde_json::json!({ - "apiVersion": "v1", - "kind": "Secret", - "metadata": { - "name": openshell_bootstrap::constants::SERVER_CLIENT_CA_SECRET_NAME, - "namespace": "openshell" - }, - "type": "Opaque", - "data": { - "ca.crt": STANDARD.encode(&bundle.ca_cert_pem) - } - }), - // 3. openshell-client-tls (Opaque) — shared by CLI and sandbox pods - serde_json::json!({ - "apiVersion": "v1", - "kind": "Secret", - "metadata": { - "name": openshell_bootstrap::constants::CLIENT_TLS_SECRET_NAME, - "namespace": "openshell" - }, - "type": "Opaque", - "data": { - "tls.crt": STANDARD.encode(&bundle.client_cert_pem), - "tls.key": STANDARD.encode(&bundle.client_key_pem), - "ca.crt": STANDARD.encode(&bundle.ca_cert_pem) - } - }), - ]; - - for secret in &secrets { - let name = secret["metadata"]["name"].as_str().unwrap_or("unknown"); - kubectl_apply(kubeconfig, &secret.to_string()) - .map_err(|e| VmError::Bootstrap(format!("failed to create secret {name}: {e}")))?; - eprintln!(" secret/{name} created"); - } + let manifests_dir = rootfs.join("var/lib/rancher/k3s/server/manifests"); + std::fs::create_dir_all(&manifests_dir) + .map_err(|e| VmError::Bootstrap(format!("failed to create manifests dir: {e}")))?; + + let server_tls_name = openshell_bootstrap::constants::SERVER_TLS_SECRET_NAME; + let client_ca_name = openshell_bootstrap::constants::SERVER_CLIENT_CA_SECRET_NAME; + let client_tls_name = openshell_bootstrap::constants::CLIENT_TLS_SECRET_NAME; + + // Combine all three secrets into a single multi-document YAML file. + // k3s applies the entire file atomically. + let manifest = format!( + r#"--- +apiVersion: v1 +kind: Namespace +metadata: + name: openshell +--- +apiVersion: v1 +kind: Secret +metadata: + name: {server_tls_name} + namespace: openshell +type: kubernetes.io/tls +data: + tls.crt: {server_crt} + tls.key: {server_key} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {client_ca_name} + namespace: openshell +type: Opaque +data: + ca.crt: {ca_crt} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {client_tls_name} + namespace: openshell +type: Opaque +data: + tls.crt: {client_crt} + tls.key: {client_key} + ca.crt: {ca_crt} +"#, + server_crt = STANDARD.encode(&bundle.server_cert_pem), + server_key = STANDARD.encode(&bundle.server_key_pem), + ca_crt = STANDARD.encode(&bundle.ca_cert_pem), + client_crt = STANDARD.encode(&bundle.client_cert_pem), + client_key = STANDARD.encode(&bundle.client_key_pem), + ); + + let dest = manifests_dir.join("openshell-tls-secrets.yaml"); + std::fs::write(&dest, manifest) + .map_err(|e| VmError::Bootstrap(format!("failed to write TLS manifest: {e}")))?; + eprintln!(" TLS secret manifests written to {}", dest.display()); Ok(()) } -/// Run `kubectl apply -f -` with the given manifest piped via stdin. -fn kubectl_apply(kubeconfig: &str, manifest: &str) -> Result<(), String> { - use std::io::Write; - use std::process::{Command, Stdio}; - - let mut child = Command::new("kubectl") - .args(["--kubeconfig", kubeconfig, "apply", "-f", "-"]) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .map_err(|e| format!("failed to spawn kubectl: {e}"))?; - - if let Some(mut stdin) = child.stdin.take() { - stdin - .write_all(manifest.as_bytes()) - .map_err(|e| format!("failed to write manifest to kubectl stdin: {e}"))?; +/// Best-effort: copy the k3s kubeconfig to `~/.kube/gateway.yaml` for +/// manual debugging. Not required for the boot pipeline — runs after +/// the service is already confirmed healthy. +fn copy_kubeconfig_best_effort(rootfs: &Path) { + let kubeconfig_src = rootfs.join("etc/rancher/k3s/k3s.yaml"); + if !kubeconfig_src.is_file() { + return; } - let output = child - .wait_with_output() - .map_err(|e| format!("failed to wait for kubectl: {e}"))?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(format!("kubectl apply failed: {stderr}")); + let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); + let kube_dir = PathBuf::from(&home).join(".kube"); + let _ = std::fs::create_dir_all(&kube_dir); + let dest = kube_dir.join("gateway.yaml"); + + match std::fs::read_to_string(&kubeconfig_src) { + Ok(contents) => { + if let Err(e) = std::fs::write(&dest, &contents) { + eprintln!(" failed to write kubeconfig: {e}"); + } else { + eprintln!("Kubeconfig: {}", dest.display()); + eprintln!(" export KUBECONFIG={}", dest.display()); + } + } + Err(e) => { + eprintln!(" failed to read kubeconfig: {e}"); + } } - - Ok(()) } static CHILD_PID: std::sync::atomic::AtomicI32 = std::sync::atomic::AtomicI32::new(0);