Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ cloud-hypervisor
cloud-hypervisor/**
lib/system/exec_agent/exec-agent
lib/system/guest_agent/guest-agent
lib/system/guest_agent/guest_agent
lib/system/init/init

# Envoy binaries
lib/ingress/binaries/**
Expand Down
20 changes: 14 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SHELL := /bin/bash
.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries build-preview-cli release-prep clean
.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean build-embedded

# Directory where local binaries will be installed
BIN_DIR ?= $(CURDIR)/bin
Expand Down Expand Up @@ -165,26 +165,33 @@ ensure-caddy-binaries:
fi

# Build guest-agent (guest binary) into its own directory for embedding
lib/system/guest_agent/guest-agent: lib/system/guest_agent/main.go
lib/system/guest_agent/guest-agent: lib/system/guest_agent/*.go
@echo "Building guest-agent..."
cd lib/system/guest_agent && CGO_ENABLED=0 go build -ldflags="-s -w" -o guest-agent .

# Build init binary (runs as PID 1 in guest VM) for embedding
lib/system/init/init: lib/system/init/*.go
@echo "Building init binary..."
cd lib/system/init && CGO_ENABLED=0 go build -ldflags="-s -w" -o init .

build-embedded: lib/system/guest_agent/guest-agent lib/system/init/init

# Build the binary
build: ensure-ch-binaries ensure-caddy-binaries lib/system/guest_agent/guest-agent | $(BIN_DIR)
build: ensure-ch-binaries ensure-caddy-binaries build-embedded | $(BIN_DIR)
go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api

# Build all binaries
build-all: build

# Run in development mode with hot reload
dev: ensure-ch-binaries ensure-caddy-binaries lib/system/guest_agent/guest-agent $(AIR)
dev: ensure-ch-binaries ensure-caddy-binaries build-embedded $(AIR)
@rm -f ./tmp/main
$(AIR) -c .air.toml

# Run tests (as root for network capabilities, enables caching and parallelism)
# Usage: make test - runs all tests
# make test TEST=TestCreateInstanceWithNetwork - runs specific test
test: ensure-ch-binaries ensure-caddy-binaries lib/system/guest_agent/guest-agent
test: ensure-ch-binaries ensure-caddy-binaries build-embedded
@if [ -n "$(TEST)" ]; then \
echo "Running specific test: $(TEST)"; \
sudo env "PATH=$$PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" go test -tags containers_image_openpgp -run=$(TEST) -v -timeout=180s ./...; \
Expand All @@ -203,8 +210,9 @@ clean:
rm -rf lib/vmm/binaries/cloud-hypervisor/
rm -rf lib/ingress/binaries/
rm -f lib/system/guest_agent/guest-agent
rm -f lib/system/init/init

# Prepare for release build (called by GoReleaser)
# Downloads all embedded binaries and builds embedded components
release-prep: download-ch-binaries build-caddy-binaries lib/system/guest_agent/guest-agent
release-prep: download-ch-binaries build-caddy-binaries build-embedded
go mod tidy
29 changes: 16 additions & 13 deletions cmd/api/api/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ var upgrader = websocket.Upgrader{

// ExecRequest represents the JSON body for exec requests
type ExecRequest struct {
Command []string `json:"command"`
TTY bool `json:"tty"`
Env map[string]string `json:"env,omitempty"`
Cwd string `json:"cwd,omitempty"`
Timeout int32 `json:"timeout,omitempty"` // seconds
Command []string `json:"command"`
TTY bool `json:"tty"`
Env map[string]string `json:"env,omitempty"`
Cwd string `json:"cwd,omitempty"`
Timeout int32 `json:"timeout,omitempty"` // seconds
WaitForAgent int32 `json:"wait_for_agent,omitempty"` // seconds to wait for guest agent to be ready
}

// ExecHandler handles exec requests via WebSocket for bidirectional streaming
Expand Down Expand Up @@ -106,6 +107,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) {
"tty", execReq.TTY,
"cwd", execReq.Cwd,
"timeout", execReq.Timeout,
"wait_for_agent", execReq.WaitForAgent,
)

// Create WebSocket read/writer wrapper
Expand All @@ -122,14 +124,15 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) {

// Execute via vsock
exit, err := guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{
Command: execReq.Command,
Stdin: wsConn,
Stdout: wsConn,
Stderr: wsConn,
TTY: execReq.TTY,
Env: execReq.Env,
Cwd: execReq.Cwd,
Timeout: execReq.Timeout,
Command: execReq.Command,
Stdin: wsConn,
Stdout: wsConn,
Stderr: wsConn,
TTY: execReq.TTY,
Env: execReq.Env,
Cwd: execReq.Cwd,
Timeout: execReq.Timeout,
WaitForAgent: time.Duration(execReq.WaitForAgent) * time.Second,
})

duration := time.Since(startTime)
Expand Down
35 changes: 10 additions & 25 deletions cmd/api/api/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,38 +115,23 @@ func TestExecInstanceNonTTY(t *testing.T) {
t.Logf("vsock socket exists: %s", actualInst.VsockSocket)
}

// Wait for exec agent to be ready (retry a few times)
var exit *guest.ExitStatus
var stdout, stderr outputBuffer
var execErr error

dialer, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID)
require.NoError(t, err)

t.Log("Testing exec command: whoami")
maxRetries := 10
for i := 0; i < maxRetries; i++ {
stdout = outputBuffer{}
stderr = outputBuffer{}

exit, execErr = guest.ExecIntoInstance(ctx(), dialer, guest.ExecOptions{
Command: []string{"/bin/sh", "-c", "whoami"},
Stdin: nil,
Stdout: &stdout,
Stderr: &stderr,
TTY: false,
})

if execErr == nil {
break
}

t.Logf("Exec attempt %d/%d failed, retrying: %v", i+1, maxRetries, execErr)
time.Sleep(1 * time.Second)
}
exit, execErr := guest.ExecIntoInstance(ctx(), dialer, guest.ExecOptions{
Command: []string{"/bin/sh", "-c", "whoami"},
Stdin: nil,
Stdout: &stdout,
Stderr: &stderr,
TTY: false,
WaitForAgent: 10 * time.Second, // Wait up to 10s for guest agent to be ready
})

// Assert exec worked
require.NoError(t, execErr, "exec should succeed after retries")
require.NoError(t, execErr, "exec should succeed")
require.NotNil(t, exit, "exit status should be returned")
require.Equal(t, 0, exit.Code, "whoami should exit with code 0")

Expand Down Expand Up @@ -251,7 +236,7 @@ func TestExecWithDebianMinimal(t *testing.T) {

// Verify the app exited but VM is still usable (key behavior this test validates)
logs = collectTestLogs(t, svc, inst.Id, 200)
assert.Contains(t, logs, "overlay-init: app exited with code", "App should have exited")
assert.Contains(t, logs, "[exec] app exited with code", "App should have exited")

// Test exec commands work even though the main app (bash) has exited
dialer2, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID)
Expand Down
213 changes: 213 additions & 0 deletions integration/systemd_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
package integration

import (
"bytes"
"context"
"os"
"strings"
"testing"
"time"

"github.com/onkernel/hypeman/cmd/api/config"
"github.com/onkernel/hypeman/lib/devices"
"github.com/onkernel/hypeman/lib/guest"
"github.com/onkernel/hypeman/lib/hypervisor"
"github.com/onkernel/hypeman/lib/images"
"github.com/onkernel/hypeman/lib/instances"
"github.com/onkernel/hypeman/lib/network"
"github.com/onkernel/hypeman/lib/paths"
"github.com/onkernel/hypeman/lib/system"
"github.com/onkernel/hypeman/lib/volumes"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// TestSystemdMode verifies that hypeman correctly detects and runs
// systemd-based images with systemd as PID 1.
//
// This test uses the jrei/systemd-ubuntu image from Docker Hub which runs
// systemd as its CMD. The test verifies that hypeman auto-detects this and:
// - Uses systemd mode (chroot to container rootfs)
// - Starts systemd as PID 1
// - Injects and starts the hypeman-agent.service
func TestSystemdMode(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}

// Skip if KVM is not available
if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) {
t.Skip("/dev/kvm not available")
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()

// Set up test environment
tmpDir := t.TempDir()
p := paths.New(tmpDir)

cfg := &config.Config{
DataDir: tmpDir,
BridgeName: "vmbr0",
SubnetCIDR: "10.100.0.0/16",
DNSServer: "1.1.1.1",
}

// Create managers
imageManager, err := images.NewManager(p, 1, nil)
require.NoError(t, err)

systemManager := system.NewManager(p)
networkManager := network.NewManager(p, cfg, nil)
deviceManager := devices.NewManager(p)
volumeManager := volumes.NewManager(p, 0, nil)

limits := instances.ResourceLimits{
MaxOverlaySize: 100 * 1024 * 1024 * 1024,
MaxVcpusPerInstance: 0,
MaxMemoryPerInstance: 0,
MaxTotalVcpus: 0,
MaxTotalMemory: 0,
}

instanceManager := instances.NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, "", nil, nil)

// Cleanup any orphaned instances
t.Cleanup(func() {
instanceManager.DeleteInstance(ctx, "systemd-test")
})

imageName := "docker.io/jrei/systemd-ubuntu:22.04"

// Pull the systemd image
t.Log("Pulling systemd image:", imageName)
_, err = imageManager.CreateImage(ctx, images.CreateImageRequest{
Name: imageName,
})
require.NoError(t, err)

// Wait for image to be ready
t.Log("Waiting for image build...")
var img *images.Image
for i := 0; i < 120; i++ {
img, err = imageManager.GetImage(ctx, imageName)
if err == nil && img.Status == images.StatusReady {
break
}
time.Sleep(1 * time.Second)
}
require.Equal(t, images.StatusReady, img.Status, "image should be ready")

// Verify systemd detection
t.Run("IsSystemdImage", func(t *testing.T) {
isSystemd := images.IsSystemdImage(img.Entrypoint, img.Cmd)
assert.True(t, isSystemd, "image should be detected as systemd, entrypoint=%v cmd=%v", img.Entrypoint, img.Cmd)
})

// Ensure system files (kernel, initrd)
t.Log("Ensuring system files...")
err = systemManager.EnsureSystemFiles(ctx)
require.NoError(t, err)

// Create the systemd instance
t.Log("Creating systemd instance...")
inst, err := instanceManager.CreateInstance(ctx, instances.CreateInstanceRequest{
Name: "systemd-test",
Image: imageName,
Size: 2 * 1024 * 1024 * 1024, // 2GB
HotplugSize: 512 * 1024 * 1024,
OverlaySize: 1024 * 1024 * 1024,
Vcpus: 2,
NetworkEnabled: false, // No network needed for this test
})
require.NoError(t, err)
t.Logf("Instance created: %s", inst.Id)

// Wait for guest agent to be ready
t.Log("Waiting for guest agent...")
err = waitForGuestAgent(ctx, instanceManager, inst.Id, 60*time.Second)
require.NoError(t, err, "guest agent should be ready")

// Test: Verify systemd is PID 1
t.Run("SystemdIsPID1", func(t *testing.T) {
output, exitCode, err := execInInstance(ctx, inst, "cat", "/proc/1/comm")
require.NoError(t, err, "exec should work")
require.Equal(t, 0, exitCode, "command should succeed")

pid1Name := strings.TrimSpace(output)
assert.Equal(t, "systemd", pid1Name, "PID 1 should be systemd")
t.Logf("PID 1 is: %s", pid1Name)
})

// Test: Verify guest-agent binary exists
t.Run("GuestAgentExists", func(t *testing.T) {
output, exitCode, err := execInInstance(ctx, inst, "test", "-x", "/opt/hypeman/guest-agent")
require.NoError(t, err, "exec should work")
assert.Equal(t, 0, exitCode, "guest-agent binary should exist at /opt/hypeman/guest-agent, output: %s", output)
})

// Test: Verify hypeman-agent.service is active
t.Run("AgentServiceActive", func(t *testing.T) {
output, exitCode, err := execInInstance(ctx, inst, "systemctl", "is-active", "hypeman-agent")
require.NoError(t, err, "exec should work")
status := strings.TrimSpace(output)
assert.Equal(t, 0, exitCode, "hypeman-agent service should be active, status: %s", status)
assert.Equal(t, "active", status, "service status should be 'active'")
t.Logf("hypeman-agent service status: %s", status)
})

// Test: Verify we can view agent logs via journalctl
t.Run("AgentLogsAccessible", func(t *testing.T) {
output, exitCode, err := execInInstance(ctx, inst, "journalctl", "-u", "hypeman-agent", "--no-pager", "-n", "5")
require.NoError(t, err, "exec should work")
assert.Equal(t, 0, exitCode, "journalctl should succeed")
t.Logf("Agent logs (last 5 lines):\n%s", output)
})

t.Log("All systemd mode tests passed!")
}

// waitForGuestAgent polls until the guest agent is ready
func waitForGuestAgent(ctx context.Context, mgr instances.Manager, instanceID string, timeout time.Duration) error {
inst, err := mgr.GetInstance(ctx, instanceID)
if err != nil {
return err
}

dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID)
if err != nil {
return err
}

// Use WaitForAgent to wait for the agent to be ready
var stdout bytes.Buffer
_, err = guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{
Command: []string{"echo", "ready"},
Stdout: &stdout,
TTY: false,
WaitForAgent: timeout,
})
return err
}

// execInInstance executes a command in the instance
func execInInstance(ctx context.Context, inst *instances.Instance, command ...string) (string, int, error) {
dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID)
if err != nil {
return "", -1, err
}

var stdout, stderr bytes.Buffer
exit, err := guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{
Command: command,
Stdout: &stdout,
Stderr: &stderr,
TTY: false,
})
if err != nil {
return stderr.String(), -1, err
}

return stdout.String(), exit.Code, nil
}
Loading