diff --git a/.env b/.env new file mode 100644 index 0000000..516be13 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +export PATH="$PWD/bin:$PATH" diff --git a/.github/workflows/ci-pr.yml b/.github/workflows/ci-pr.yml new file mode 100644 index 0000000..01e590e --- /dev/null +++ b/.github/workflows/ci-pr.yml @@ -0,0 +1,33 @@ +name: PR CI + +on: + pull_request: + branches: + - main + types: + - opened + - reopened + - synchronize + +jobs: + checks: + name: Test and Build + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24.x' + cache: true + + - name: Run tests + run: make test + + - name: Build CLI + run: make hind-cli + + - name: Build Images + run: ./bin/hind build all diff --git a/.gitignore b/.gitignore index 09f8a39..abefe68 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ .vscode /bin/ TODO + +skills-lock.json +.claude/skills/golang-pro/ +.claude/worktrees/ +.worktrees/ diff --git a/.team/backlog.md b/.team/backlog.md new file mode 100644 index 0000000..5c3f29f --- /dev/null +++ b/.team/backlog.md @@ -0,0 +1,23 @@ +# Team Backlog + +Closed items: `.team/done/` + +| ID | Title | Type | Priority | Size | Source | Spec | +|-------|-------|------|----------|------|--------|------| +| W-026 | Add GitHub Actions CI for pre-merge checks | maintenance | P1 | S | User | `W-026.md` | +| W-033 | Fix top-priority CLI correctness bugs (`get`/`rm` missing cluster, `stop` partial failure exits) | maintenance | P1 | M | QA audit | `W-033.md` | +| W-017 | Close `hind-stop.feature` behavior gaps | feature | P2 | L | B-015 audit | `W-015.md` | +| W-019 | Enforce `default-cluster.feature` profile-selection contracts | feature | P2 | M | B-015 audit | `W-015.md` | +| W-022 | Refactor `pkg/build/release` for a more ergonomic interface | maintenance | P2 | M | User | `W-022.md` | +| W-034 | Fix low-priority CLI behavior/UX bugs from QA (`list`/`start`/`build`/`set`) | maintenance | P3 | M | QA audit | `W-034.md` | +| W-020 | Fix all `go` test files that use duplicate mocks | maintenance | P3 | S | User | `W-020.md` | +| W-021 | Address B-013 staff review additional observations | maintenance | P3 | S | B-013 review | `W-021.md` | +| W-023 | Allow users to pass arbitrary package versions to `hind build` | feature | P3 | M | User | `W-023.md` | +| W-024 | Allow users to pass arbitrary package versions to `hind start` | feature | P3 | M | User | `W-024.md` | +| W-025 | Publish container images to an OCI registry on version update | feature | P4 | XL | User | `W-025.md` | +| W-027 | Add ID value to cluster nodes `pkg/cluster/types.go` | maintenance | P3 | S | User | `W-027.md` | +| W-028 | Migrate `*_test.go` test cases to `stretchr/testify` | maintenance | P4 | L | User | - | +| W-029 | Add ingress controller for routing traffic to the internal network | feature | P4 | XL | User | - | +| W-030 | Build and publish releases to brew for macos install | feature | P3 | L | User | - | +| W-031 | Add open subcommand to open the web ui of a component | feature | P3 | M | User | - | +| W-032 | Add login subcommand to exec into an interactive shell in a node | feature | P3 | M | User | - | diff --git a/.team/bugs.md b/.team/bugs.md new file mode 100644 index 0000000..d4ab998 --- /dev/null +++ b/.team/bugs.md @@ -0,0 +1,162 @@ +# Bugs + +Closed items: `.team/done/bugs/` + +--- + + + +--- + +## BUG-001 — `hind get` succeeds silently when cluster has no config file on disk + +**Severity:** P2 +**Status:** open +**Source:** QA audit of assigned commands (get, list, stop, set profile) + +**Root cause file:** `pkg/cluster/manager.go:325-331` (`LoadPersistedConfig`) + +**Repro steps:** +1. Ensure no cluster named "ghost" has ever been created (no config file at `~/.config/hind/cluster/ghost/cluster.json`). +2. Run `hind get ghost`. +3. Observe exit code and output. + +**Expected:** Command exits non-zero with a "cluster not found" error message. + +**Actual:** `LoadPersistedConfig` (called by `Manager.Get`) falls through the `!m.ConfigFileExists()` branch without returning an error because `m.config.Name` is non-empty (it was set by `cluster.New` from the supplied arg). The manager then calls `provider.InspectNetwork` and `provider.InspectContainer` using the freshly-synthesised default config, both return `nil` (no such resources), and `Get` returns an empty `ClusterInfo` with no error. `get.runE` renders a table showing `Status: N/A`, `Network: ` (empty), and exits 0. The user receives no indication that the cluster does not exist. + +**Contrast with `hind stop`:** `stop.runE` explicitly calls `clusterMgr.ConfigFileExists()` before proceeding and returns an error if false. `get.runE` has no equivalent guard. + +--- + +## BUG-002 — `hind list` swallows `tabwriter.Flush` error + +**Severity:** P3 +**Status:** open +**Source:** QA audit of assigned commands (get, list, stop, set profile) + +**Root cause file:** `pkg/cmd/hind/list/list.go:110` + +**Repro steps:** +1. Any `hind list` invocation that reaches the table-printing path (at least one cluster exists). +2. Observe: the return value of `w.Flush()` is discarded. + +**Expected:** The error returned by `w.Flush()` (e.g. a broken-pipe or closed writer) is propagated and causes `runE` to return a non-zero exit code, consistent with how `get.runE` handles `w.Flush()` at line 85-87 of `get.go`. + +**Actual:** `w.Flush()` is called as a bare statement with no error check (`w.Flush()` on line 110, no `if err :=` wrapper). If the flush fails the error is silently dropped and the command exits 0, potentially producing truncated output. + +--- + +## BUG-003 — `hind stop` exits 0 even when containers fail to stop + +**Severity:** P2 +**Status:** open +**Source:** QA audit of assigned commands (get, list, stop, set profile) + +**Root cause file:** `pkg/cmd/hind/stop/stop.go:115-121` + +**Two related issues in the same function:** + +**Issue A — `FailedCount > 0` branch always returns nil (line 121)** +When one or more containers fail to stop (without `--force`), `runE` prints the partial-stop warning to `ErrOut` but returns `nil` — exit 0. Scripts cannot detect the failure. + +**Issue B — `--force` branch (line 115) evaluated before `FailedCount` check (line 119)** +When `force=true` and containers failed to kill, the `if force` branch fires first and returns `nil` with "force stopped", so the `FailedCount` path is never reached at all. Individual failure messages are still printed to `ErrOut` (lines 103-105) but the exit code and final status message are incorrect. + +**Repro steps (Issue A):** +1. Start a cluster with at least two containers. +2. Arrange for one container's stop to fail (e.g. Docker daemon error on that container). +3. Run `hind stop ` (no --force). +4. Observe exit code is 0 despite partial failure. + +**Repro steps (Issue B):** +1. Same setup but run `hind stop --force `. +2. Observe "force stopped" message and exit 0 — `FailedCount` path is never reached. + +**Expected:** Both paths should return a non-nil error when `result.FailedCount > 0`. + +--- + +## BUG-004 — `hind rm` succeeds silently when cluster does not exist + +**Severity:** P2 +**Status:** open +**Source:** QA audit of assigned commands (rm, start, build) + +**Root cause file:** `pkg/cluster/manager.go:221-276` (`Delete`) + +**Repro steps:** +1. Ensure no cluster named `ghost` exists. +2. Run `hind rm ghost`. +3. Observe output and exit code. + +**Expected:** Command exits non-zero with an error such as `cluster 'ghost' does not exist`. + +**Actual:** Command exits 0 and prints `Cluster 'ghost' deleted successfully`. `Delete()` does not fail when config/resources are absent, so non-existent deletion is treated as success. + +--- + +## BUG-005 — `hind start` never returns `StartResultAlreadyRunning` + +**Severity:** P3 +**Status:** open +**Source:** QA audit of assigned commands (rm, start, build) + +**Root cause file:** `pkg/cluster/manager.go:72-108` (`Start`) + +**Repro steps:** +1. Run `hind start mycluster`. +2. Run `hind start mycluster` again while already fully running. + +**Expected:** Already-running no-op path should return `StartResultAlreadyRunning` and avoid redundant connection-info output. + +**Actual:** Existing-cluster path always returns `StartResultResumed`, even when reconcile has no actions. `StartResultAlreadyRunning` appears to be dead/unused. + +--- + +## BUG-006 — `hind build` wrong-arg error message uses slice formatting + +**Severity:** P3 +**Status:** open +**Source:** QA audit of assigned commands (rm, start, build) + +**Root cause file:** `pkg/cmd/hind/build/build.go:39` + +**Repro steps:** +1. Run `hind build` (no args), or `hind build a b`. + +**Expected:** Error should report argument count cleanly (e.g. `received 0` / `received 2`) or use Cobra's default exact-args error. + +**Actual:** Custom message uses `%s` with `[]string`, producing messages like `accepts 1 arg, received []` or `[a b]`. + +--- + +## BUG-007 — `hind set profile` writes success message to stderr + +**Severity:** P3 +**Status:** open +**Source:** QA audit of assigned commands (get, list, stop, set profile) + +**Root cause file:** `pkg/cmd/hind/set/set.go:42` + +**Repro steps:** +1. Run `hind set profile `. +2. Redirect stderr to `/dev/null`. + +**Expected:** Success output should be on stdout if treated as user-facing command result. + +**Actual:** Success message is written to `streams.ErrOut`, so it disappears when stderr is redirected. diff --git a/.team/deferred/2026-04-30-hind-releases.md b/.team/deferred/2026-04-30-hind-releases.md new file mode 100644 index 0000000..04bc6af --- /dev/null +++ b/.team/deferred/2026-04-30-hind-releases.md @@ -0,0 +1,405 @@ +# Hind Releases Feature Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Implement the `hind releases` command surface so `hind-releases.feature` scenario "List available hind versions" is fully covered, and the feature file is normalized to remove the two incomplete scenario stubs. + +**Architecture:** A new `pkg/cmd/hind/releases/` command package follows the existing Cobra command pattern (constructor + `runE`). All version data comes from `pkg/build/release` (already implemented); the command layer is presentation-only and uses `text/tabwriter` to render a sorted, column-labeled table matching the feature contract. No new release-layer logic is required. + +**Tech Stack:** Go 1.21+, Cobra, `text/tabwriter`, `pkg/build/release`, `cmd.IOStreams`. + +--- + +## File structure + +| Path | Status | Responsibility | +|------|--------|---------------| +| `pkg/cmd/hind/releases/releases.go` | Create | Command constructor + `runE` — renders sorted version table | +| `pkg/cmd/hind/releases/releases_test.go` | Create | Table-driven tests for column order, row order, header format, and empty-data edge case | +| `pkg/cmd/hind/root.go` | Modify | Register `releases.NewCommand` on root | +| `features/hind-releases.feature` | Modify | Remove the two empty/stub scenarios; normalize Background and List scenario wording | + +--- + +## Task 1: Normalize `hind-releases.feature` + +**Files:** +- Modify: `features/hind-releases.feature` + +The feature file has two malformed stub scenarios ("Create new hind cluster" and "Run non existent hind version") that have no steps and are out of scope for B-020. Remove them. Normalize the Background and the "List available hind versions" scenario to be precise enough for acceptance testing. + +- [ ] **Step 1: Read the current feature file** + +Open `features/hind-releases.feature` and confirm the three scenarios and their step states. + +- [ ] **Step 2: Write the normalized feature file** + +Replace the full file with: + +```gherkin +Feature: HIND releases menu + As a maintainer of the HIND CLI + I want an easy way to view the hind versions and the version of the HashiCorp binaries that are included + So that releases can easily be built and published + + Background: + Given I have defined the hind version in the version configuration + And the hind version has the defined consul version + And the vault version + And the nomad version + + Scenario: List available hind versions + Given I run the "hind releases" command + When I execute the command + Then the CLI will list in a table the available hind versions + And the column header row is printed first with columns: HIND, CONSUL, NOMAD, VAULT + And the first column is the hind version + And the remaining columns are displayed in alphabetical order: consul, nomad, vault + And the latest version is on the first row + And the oldest version is on the last row +``` + +- [ ] **Step 3: Commit** + +```bash +git add features/hind-releases.feature +git commit -m "feat(B-020): normalize hind-releases.feature — remove stubs, clarify list scenario" +``` + +--- + +## Task 2: Implement `pkg/cmd/hind/releases` command + +**Files:** +- Create: `pkg/cmd/hind/releases/releases.go` + +The command lists all hind releases sorted newest-first as a `tabwriter` table with columns: `HIND`, `CONSUL`, `NOMAD`, `VAULT` (alphabetical after HIND). The latest version appears on the first row. + +- [ ] **Step 1: Write the failing test first (see Task 3 — do Task 3 step 1 before this)** + +(Tests are in Task 3. Write tests before implementation per TDD sequence. Come back here after Task 3 Step 1.) + +- [ ] **Step 2: Write the implementation** + +Create `/Users/james/dev/github/stenh0use/hind/pkg/cmd/hind/releases/releases.go`: + +```go +// Package releases implements the "hind releases" command. +// It renders a sorted table of all hind releases and their HashiCorp component versions. +package releases + +import ( + "fmt" + "sort" + "text/tabwriter" + + "github.com/apex/log" + "github.com/spf13/cobra" + + "github.com/stenh0use/hind/pkg/build/release" + "github.com/stenh0use/hind/pkg/cmd" +) + +// NewCommand returns a new cobra.Command for listing hind releases. +func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command { + command := &cobra.Command{ + Use: "releases", + Short: "List available hind releases and their HashiCorp component versions", + Long: "List all available hind releases in a table sorted by version (latest first).", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + return runE(logger, streams) + }, + } + return command +} + +func runE(logger *log.Logger, streams cmd.IOStreams) error { + logger.Debug("Listing hind releases") + + versions := release.List() + if len(versions) == 0 { + fmt.Fprintln(streams.ErrOut, "No releases found") + return nil + } + + // Sort versions descending (latest first) using semver-style string comparison. + // Versions follow "MAJOR.MINOR.PATCH" format so lexicographic sort is valid + // when zero-padded; use sort.Slice with reverse string comparison as a conservative + // baseline. For strict semver ordering in future, switch to golang.org/x/mod/semver. + sort.Slice(versions, func(i, j int) bool { + return versions[i] > versions[j] + }) + + w := tabwriter.NewWriter(streams.Out, 0, 0, 3, ' ', 0) + fmt.Fprintln(w, "HIND\tCONSUL\tNOMAD\tVAULT") + + for _, v := range versions { + info, err := release.Get(v) + if err != nil { + // Skip unknown versions; should not occur with List() output. + logger.Warnf("skipping unknown release %q: %v", v, err) + continue + } + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", info.Hind, info.Consul, info.Nomad, info.Vault) + } + + return w.Flush() +} +``` + +- [ ] **Step 3: Run go vet** + +```bash +go vet ./pkg/cmd/hind/releases/... +``` + +Expected: no output (success). + +- [ ] **Step 4: Run the tests** + +```bash +go test ./pkg/cmd/hind/releases/ +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add pkg/cmd/hind/releases/releases.go +git commit -m "feat(B-020): add hind releases command — table-rendered sorted release list" +``` + +--- + +## Task 3: Write tests for `releases` command + +**Files:** +- Create: `pkg/cmd/hind/releases/releases_test.go` + +Tests use `cmd.IOStreams` with `bytes.Buffer` and a test-scoped `release.Data` to assert header format, column order, row order (latest first), and the empty-data branch. + +Because `runE` calls `release.List()` and `release.Get()` from the package-level store (which is fixed at compile time), the test approach injects via a function var seam — the same pattern used in `pkg/cmd/hind/build/build.go`. Alternatively, since the package-level store is immutable and deterministic, we can test against the real store and assert structural properties (header present, at least one row, latest version on first row) rather than injecting a fake store. This is simpler and avoids adding a seam solely for tests. + +Choose the real-store approach: tests assert: +1. Header row is first and contains exactly `HIND`, `CONSUL`, `NOMAD`, `VAULT`. +2. Output has at least two rows (header + at least one version). +3. First data row corresponds to `release.Latest().Hind`. +4. Each data row has four tab-separated fields. +5. Empty error output on success. + +- [ ] **Step 1: Write the failing tests** + +Create `/Users/james/dev/github/stenh0use/hind/pkg/cmd/hind/releases/releases_test.go`: + +```go +package releases + +import ( + "bytes" + "strings" + "testing" + + "github.com/apex/log" + + "github.com/stenh0use/hind/pkg/build/release" + "github.com/stenh0use/hind/pkg/cmd" +) + +func testStreams() (cmd.IOStreams, *bytes.Buffer, *bytes.Buffer) { + out := &bytes.Buffer{} + errOut := &bytes.Buffer{} + return cmd.IOStreams{Out: out, ErrOut: errOut}, out, errOut +} + +func testLogger() *log.Logger { + return log.Log +} + +func TestRunE_HeaderRow(t *testing.T) { + streams, out, errOut := testStreams() + if err := runE(testLogger(), streams); err != nil { + t.Fatalf("runE() unexpected error: %v", err) + } + if errOut.Len() != 0 { + t.Errorf("expected empty stderr, got: %q", errOut.String()) + } + + lines := splitLines(out.String()) + if len(lines) < 2 { + t.Fatalf("expected at least 2 lines (header + 1 release), got %d", len(lines)) + } + + header := lines[0] + for _, col := range []string{"HIND", "CONSUL", "NOMAD", "VAULT"} { + if !strings.Contains(header, col) { + t.Errorf("header row %q missing column %q", header, col) + } + } +} + +func TestRunE_LatestVersionFirstRow(t *testing.T) { + streams, out, _ := testStreams() + if err := runE(testLogger(), streams); err != nil { + t.Fatalf("runE() unexpected error: %v", err) + } + + lines := splitLines(out.String()) + if len(lines) < 2 { + t.Fatalf("expected at least 2 lines, got %d", len(lines)) + } + + // First data row (index 1) should start with latest hind version. + latest := release.Latest().Hind + firstDataRow := lines[1] + if !strings.HasPrefix(strings.TrimSpace(firstDataRow), latest) { + t.Errorf("first data row %q does not start with latest version %q", firstDataRow, latest) + } +} + +func TestRunE_DataRowsHaveFourFields(t *testing.T) { + streams, out, _ := testStreams() + if err := runE(testLogger(), streams); err != nil { + t.Fatalf("runE() unexpected error: %v", err) + } + + lines := splitLines(out.String()) + // Skip header line. + for i, line := range lines[1:] { + fields := strings.Fields(line) + if len(fields) != 4 { + t.Errorf("data row %d %q: expected 4 fields, got %d", i+1, line, len(fields)) + } + } +} + +func TestNewCommand_Structure(t *testing.T) { + streams, _, _ := testStreams() + cmd := NewCommand(testLogger(), streams) + + if cmd.Use != "releases" { + t.Errorf("Use = %q, want %q", cmd.Use, "releases") + } + if cmd.Args == nil { + t.Error("Args validator is nil; expected cobra.NoArgs") + } + if cmd.RunE == nil { + t.Error("RunE is nil") + } +} + +// splitLines returns non-empty lines from output. +func splitLines(s string) []string { + var result []string + for _, line := range strings.Split(s, "\n") { + if strings.TrimSpace(line) != "" { + result = append(result, line) + } + } + return result +} +``` + +- [ ] **Step 2: Run the test to confirm it fails (package does not exist yet)** + +```bash +go test ./pkg/cmd/hind/releases/ +``` + +Expected: compile error — package not found. This is the TDD red state. + +- [ ] **Step 3: Implement (return to Task 2, Step 2 above)** + +- [ ] **Step 4: Re-run tests after implementation** + +```bash +go test ./pkg/cmd/hind/releases/ +``` + +Expected: PASS (all four tests green). + +- [ ] **Step 5: Commit test file** + +```bash +git add pkg/cmd/hind/releases/releases_test.go +git commit -m "test(B-020): add releases command table-driven tests" +``` + +--- + +## Task 4: Register releases command on root + +**Files:** +- Modify: `pkg/cmd/hind/root.go` + +- [ ] **Step 1: Add the import** + +In `/Users/james/dev/github/stenh0use/hind/pkg/cmd/hind/root.go`, add to the import block: + +```go +"github.com/stenh0use/hind/pkg/cmd/hind/releases" +``` + +- [ ] **Step 2: Register the command** + +In the `NewCommand` function body, after the existing `rootCmd.AddCommand(...)` calls, add: + +```go +rootCmd.AddCommand(releases.NewCommand(logger, streams)) +``` + +- [ ] **Step 3: Run go vet** + +```bash +go vet ./pkg/cmd/hind/... +``` + +Expected: no output. + +- [ ] **Step 4: Run full test suite** + +```bash +make test +``` + +Expected: PASS across all packages. + +- [ ] **Step 5: Verify CLI wiring** + +```bash +make hind-cli +./bin/hind releases +``` + +Expected: table with header `HIND CONSUL NOMAD VAULT` followed by release rows, latest version (0.4.0) on first row. + +- [ ] **Step 6: Commit** + +```bash +git add pkg/cmd/hind/root.go +git commit -m "feat(B-020): register releases subcommand on root hind command" +``` + +--- + +## Self-review against spec + +**Spec coverage check:** + +| Feature requirement | Task that covers it | +|---|---| +| "list in a table the available hind versions" | Task 2 — tabwriter table output | +| "column names on the first row" | Task 2 — `HIND\tCONSUL\tNOMAD\tVAULT` header; Task 3 TestRunE_HeaderRow | +| "first column is hind version" | Task 2 — first tab field is `info.Hind`; Task 3 TestRunE_DataRowsHaveFourFields | +| "remaining columns in alphabetical order consul, nomad, vault" | Task 2 — column order is hardcoded `CONSUL\tNOMAD\tVAULT`; Task 3 TestRunE_HeaderRow | +| "latest version on first row" | Task 2 — descending sort; Task 3 TestRunE_LatestVersionFirstRow | +| "oldest version on last row" | Task 2 — descending sort covers this implicitly; no separate test needed since sort order is the same invariant | +| Feature file normalization — remove stubs | Task 1 | +| Command registered and reachable | Task 4 | + +**Gap check:** No spec requirement without a task. The two incomplete stubs ("Create new hind cluster", "Run non existent hind version") are explicitly out of scope for B-020; they are removed in Task 1 rather than implemented. + +**Placeholder scan:** No TBD/TODO patterns; all steps have concrete code. + +**Type consistency:** `release.List()` returns `[]string`; `release.Get(v)` returns `(release.Info, error)` — both used consistently in Task 2 and Task 3. diff --git a/.team/deferred/README.md b/.team/deferred/README.md new file mode 100644 index 0000000..d9ffc07 --- /dev/null +++ b/.team/deferred/README.md @@ -0,0 +1,2 @@ +# Deferred items +This folder contains deferred or won't do items diff --git a/.team/done/plans/P-013.md b/.team/done/plans/P-013.md new file mode 100644 index 0000000..dfbad48 --- /dev/null +++ b/.team/done/plans/P-013.md @@ -0,0 +1,367 @@ +# P-013 — Implementation Plan: Migrate image build runtime to pkg/provider +Status: approved +Spec: B-013.md + +--- + +## Overview + +This plan sequences the elimination of `pkg/build/image/internal/docker` as a runtime dependency of `pkg/build/image`, routing all Docker operations through `pkg/provider` interfaces. Five phases with a coupled-commit constraint on Phases 3 and 4. + +--- + +## Phase 1 — Provider contract expansion + +**Goal:** Extend `pkg/provider/image.go` with `BuildImageResult` and the full `BuildImageOptions` fields. Update the `Client` interface signature. Everything compiles; no runtime behaviour changes. + +### Files to change + +**`pkg/provider/image.go`** + +Replace the current `BuildImageOptions` struct with: + +```go +type BuildImageOptions struct { + Name string + Tag string + ContextDir string + BuildArgs map[string]string + Dockerfile string // optional; empty means default Dockerfile + WithCache bool // pass --no-cache when false + Platform string // optional; empty means omit --platform — do not supply a default +} +``` + +Add the new result type immediately below `BuildImageOptions`: + +```go +// BuildImageResult is the structured result of a successful image build. +// Both fields are non-empty on success; an empty Digest or ImageRef is an error condition. +type BuildImageResult struct { + Digest string // sha256 digest, e.g. "sha256:abc123..." + ImageRef string // fully qualified ref, e.g. "hind/consul:0.1.0" +} +``` + +**`pkg/provider/provider.go`** + +Change the `BuildImage` method signature on `Client`: + +```go +BuildImage(ctx context.Context, opts BuildImageOptions) (BuildImageResult, error) +``` + +**`pkg/provider/dockercli/build.go`** + +Update the stub implementation to match the new signature (return `BuildImageResult` instead of `string`). The body can return `BuildImageResult{}, fmt.Errorf(...)` for now — full implementation is Phase 2. This is just enough to restore compilation. + +### Tests to add or update + +- No new tests required in this phase (it is additive and the adapter body is still a stub). +- Confirm `make test` passes (compilation gate only). + +### Definition of done + +- `go build ./...` succeeds with no errors. +- `make test` passes. +- `provider.Client.BuildImage` signature matches `(ctx, BuildImageOptions) (BuildImageResult, error)` exactly. +- `BuildImageOptions` carries all six fields listed in the spec. +- `BuildImageResult` is exported with `Digest` and `ImageRef` string fields. + +--- + +## Phase 2 — dockercli adapter parity (buildx + digest extraction) + +**Goal:** Implement `BuildImage` in `pkg/provider/dockercli/build.go` with full behavioral parity: buildx availability check at call time, `--load` flag, metadata-file-based digest extraction, structured result return. Introduce a `CommandExecutor`-style seam so unit tests can inject fake execution. + +### Files to change + +**`pkg/provider/dockercli/build.go`** + +1. **CommandExecutor seam.** Define a `CommandExecutor` interface local to the `dockercli` package (or import it from a shared location if one is preferred later). Mirror the interface already in `internal/docker/docker.go`: + + ```go + type CommandExecutor interface { + Run(ctx context.Context, dir string, stdout, stderr io.Writer, name string, args ...string) error + Output(ctx context.Context, dir string, name string, args ...string) ([]byte, error) + } + ``` + + Add an `executor CommandExecutor` field to `Client` in `pkg/provider/dockercli/client.go`. Populate it from a package-level `osCommandExecutor` default in `New(...)`. Tests set it directly before calling `BuildImage`. + + Alternatively (simpler), add a `withExecutor` constructor for tests only: + ```go + func newWithExecutor(logger *log.Logger, exec CommandExecutor) *Client + ``` + +2. **Buildx availability check — at call time.** Inside `BuildImage`, before running the build, call a package-private `checkBuildxAvailable(ctx, executor)` that runs: + + ``` + docker system info --format {{json .}} + ``` + + Parse the JSON (reuse or mirror the `DockerInfo`/`HasClientPlugin` logic from `internal/docker/info.go`). If buildx is absent, return: + + ```go + return BuildImageResult{}, fmt.Errorf("buildx client plugin is needed but not installed") + ``` + + `New(...)` in `client.go` must not call this check. + +3. **Build command.** Construct and run: + + ``` + docker buildx build + -t : + --load + --metadata-file /metadata.json + [--no-cache] // when opts.WithCache == false + [-f ] // when non-empty + [--platform ] // only when non-empty — no default + [--build-arg K=V ...] + . + ``` + + Run the command with the executor, capturing stdout and stderr. On non-zero exit, return a wrapped error including stderr. + +4. **Digest extraction.** After a successful build, read `/metadata.json`, unmarshal into a local `buildMetadata` struct: + + ```go + type buildMetadata struct { + ContainerImageDigest string `json:"containerimage.config.digest"` + ImageName string `json:"image.name"` + } + ``` + + Extract `ContainerImageDigest`. If it is empty or does not begin with `sha256:`, return an error rather than a zero-value result. + +5. **Return.** On success: + + ```go + return provider.BuildImageResult{ + Digest: metadata.ContainerImageDigest, + ImageRef: fmt.Sprintf("%s:%s", opts.Name, opts.Tag), + }, nil + ``` + + Validate both fields are non-empty before returning; return an error if either is empty. + +**`pkg/provider/dockercli/client.go`** + +Add `executor CommandExecutor` field to `Client`. Set it to `osCommandExecutor{}` in `New(...)`. Add unexported `newWithExecutor` for tests. + +**`pkg/provider/dockercli/info.go`** (new file, or inline in build.go) + +Copy or adapt `DockerInfo` and `HasClientPlugin` from `internal/docker/info.go` into the `dockercli` package to support the buildx check. Do not import from `internal/docker` — the goal is provider self-containment. + +### Tests to add or update + +Create `pkg/provider/dockercli/build_test.go`: + +1. **`TestBuildImage_BuildxAbsent`** — inject a fake executor whose `Output` returns JSON with no buildx plugin. Assert `BuildImage` returns a non-nil error containing "buildx". Assert the error is returned immediately (no build command executed). + +2. **`TestBuildImage_Success`** — inject a fake executor that: + - Returns valid `docker system info` JSON with buildx listed. + - On the buildx build command, returns exit 0. + - Writes a minimal `metadata.json` to a temp `ContextDir` with a known `containerimage.config.digest` value (`sha256:abc123...`). + Assert `BuildImageResult.Digest` starts with `sha256:` and equals the injected value. Assert `BuildImageResult.ImageRef` equals `":"`. + +3. **`TestNew_SucceedsWithoutBuildx`** — call `New(logger)` with a fake executor that would return a buildx-absent system info response. Assert `New` returns without error. Assert the returned `provider.Client` is non-nil. + +4. **`TestBuildImage_EmptyDigestIsError`** — inject a fake executor that returns a metadata file with an empty `containerimage.config.digest`. Assert `BuildImage` returns an error. + +5. **`TestBuildImage_LoadFlagPresent`** — capture the args passed to the build command in the fake executor. Assert `--load` is present in the args unconditionally. + +6. **`TestBuildImage_PlatformOmittedWhenEmpty`** — assert `--platform` is absent in args when `opts.Platform == ""`. + +7. **`TestBuildImage_NoCacheWhenWithCacheFalse`** — assert `--no-cache` is present in args when `opts.WithCache == false`. + +### Definition of done + +- All seven tests pass. +- `make test` passes. +- `New(logger)` requires no buildx to succeed (AC-3). +- `BuildImage` returns `BuildImageResult` with non-empty `Digest` (starts `sha256:`) and non-empty `ImageRef` on fake-success path (AC-2). +- `BuildImage` returns a descriptive error when buildx is absent (AC-3). +- `--load` is always present in buildx invocation (spec requirement). +- `--platform` is omitted when `Platform` field is empty (spec requirement). +- No import of `pkg/build/image/internal/docker` in the dockercli package. + +--- + +## Phase 3 + Phase 4 — Decouple image.go and rewire builder.go + +> **Coupled-commit note (from spec):** Phases 3 and 4 must land in the same commit. After Phase 3 alone, `builder.go` still references `internal/docker` types that `image.go` no longer provides, causing a compile break. Implement both changes together and commit atomically. + +### Phase 3 — Remove docker.BuildArg from image.go + +**Goal:** Replace `[]docker.BuildArg` with `map[string]string` in `pkg/build/image/image.go`. Remove the `internal/docker` import from that file. + +#### Files to change + +**`pkg/build/image/image.go`** + +- Remove `import "github.com/stenh0use/hind/pkg/build/image/internal/docker"`. +- Change `packagesToBuildArgs()` return type from `([]docker.BuildArg, error)` to `(map[string]string, error)`. Return a `map[string]string` (key = `CONSUL_VERSION` etc., value = version string). +- Change `buildArgs()` return type from `([]docker.BuildArg, error)` to `(map[string]string, error)`. Merge the package args map and add `HIND_VERSION` and `BASE_IMAGE` keys. +- Both methods remain unexported; only their return types change. + +### Phase 4 — Rewire builder.go to use provider only + +**Goal:** Replace all `internal/docker` usage in `pkg/build/image/builder.go` with `pkg/provider` calls. Remove the `internal/docker` import from that file. Preserve the dependency-check error message verbatim (AC-5). + +#### Files to change + +**`pkg/build/image/builder.go`** + +1. **Constructor change.** `Builder` must receive a `provider.Client`. Update `NewBuilder`: + + ```go + func NewBuilder(logger *log.Logger, client provider.Client, kind release.ImageKind) (*Builder, error) + ``` + + Add `client provider.Client` field to `Builder` struct. + +2. **Remove `internal/docker` import.** Delete all references to `docker.NewImage`, `docker.BuildOptions`, `docker.BuildArg`, `dockerImg.UpdateBuildOptions`, `dockerImg.BuildImage`, `dockerImg.TagExists`. + +3. **Rewire `BuildImage`.** Replace `dockerImg.BuildImage(ctx)` with: + + ```go + result, err := b.client.BuildImage(ctx, provider.BuildImageOptions{ + Name: b.image.Kind.ImageName(), + Tag: b.image.Release, + ContextDir: buildFiles.BuildDir(), + BuildArgs: buildArgs, // map[string]string from b.image.buildArgs() + WithCache: false, + Platform: "", + }) + ``` + + On success, log using `result.ImageRef` (or construct the ref from `Name`/`Tag` as before). The logging line must remain: + + ```go + b.logger.WithField("image", result.ImageRef).Info("Successfully built image") + ``` + +4. **Rewire `checkDependencies`.** Replace `docker.NewImage(...).TagExists(ctx)` with: + + ```go + exists, err := b.client.TagExists(ctx, sanitizedName, b.image.BaseImage.Tag) + ``` + + The error message strings must remain byte-for-byte identical (AC-5): + + ```go + return fmt.Errorf("base image dependency not met: %s\n"+ + "Resolution: Run 'hind build %s' to build the required dependency", + sanitizedName, component) + ``` + +5. **Remove logger from NewBuilder if it is now only used for log calls.** Keep `logger` field if logging is still in use (it is — log start/success). Do not remove it. + +**Callers of `NewBuilder`** — find all call sites with: +``` +grep -rn "NewBuilder(" pkg/ cmd/ +``` +Update each call site to pass `client provider.Client` as the second argument. + +### Tests to add or update + +**`pkg/build/image/builder_test.go`** + +- Existing tests for `NewBuilder` (valid/invalid kinds) must be updated to pass a `provider.Client` argument. Use a fake/stub `provider.Client` that satisfies the interface (implement only the methods exercised — `TagExists` and `BuildImage`). +- Add `TestBuilder_BuildImage_CallsProviderBuildImage` — use a stub client that records the `BuildImageOptions` it receives. Assert `Name`, `Tag`, `ContextDir`, and at least one `BuildArgs` key match expected values. +- Add `TestBuilder_CheckDependencies_CallsProviderTagExists` — use a stub that returns `false` from `TagExists`. Assert the returned error contains both required strings. +- Add `TestBuilder_CheckDependencies_SkipsWhenPull` — stub client whose `TagExists` panics (to detect accidental calls). Set `BaseImage.Pull = true`. Assert no error. + +**`pkg/build/image/image_test.go`** (new file if it does not exist) + +- `TestBuildArgs_ReturnsMapWithExpectedKeys` — call `buildArgs()` on a consul `Image`. Assert map contains keys `CONSUL_VERSION`, `HIND_VERSION`, `BASE_IMAGE`. Assert no `docker.BuildArg` type appears (enforced by compile). +- `TestPackagesToBuildArgs_KnownPackages` — assert all packages for a nomad `Image` produce expected version keys. + +### Definition of done (Phases 3+4 together) + +- `go build ./...` succeeds with no errors. +- `make test` passes. +- `grep "internal/docker" pkg/build/image/image.go` returns no matches (AC-4). +- `grep "internal/docker" pkg/build/image/builder.go` returns no matches (AC-1). +- `packagesToBuildArgs()` and `buildArgs()` return `map[string]string` (AC-4). +- Dependency-check error message strings are character-for-character identical to pre-migration output (AC-5). +- All builder tests pass using only `provider.Client` stub — no `internal/docker` concrete type in test file. + +--- + +## Phase 5 — Delete or shim pkg/build/image/internal/docker + +**Goal:** Satisfy AC-7. Preferred outcome per spec is full deletion. + +### Preconditions + +- AC-1 and AC-4 are green: no `.go` file in `pkg/build/image` (outside `internal/docker` itself) imports `internal/docker`. +- Verify with: `grep -r "internal/docker" pkg/build/image/ --include="*.go"` — only matches inside `pkg/build/image/internal/docker/` itself. + +### Files to change + +**Delete the entire directory:** + +``` +pkg/build/image/internal/docker/docker.go +pkg/build/image/internal/docker/docker_test.go +pkg/build/image/internal/docker/info.go +``` + +If any logic in `internal/docker` is still needed (e.g., `DockerInfo` JSON parsing), verify it has been reproduced in `pkg/provider/dockercli` (Phase 2) before deleting. + +**If full deletion is not yet safe** (e.g., a test dependency remains), reduce to a shim: + +- Replace `docker.go` and `info.go` with a single `shim.go` containing only the package declaration and a prominent comment: + + ```go + // Package docker is a deprecated shim scheduled for removal. + // Tracked by: + // No production code path imports this package. + package docker + ``` + + Delete `docker_test.go` entirely (tests for deleted logic have no value). + +### Tests to add or update + +- No new tests. Ensure `make test` still passes after deletion. +- Run `go vet ./...` to catch any stale references. + +### Definition of done + +- `make test` passes. +- `go build ./...` succeeds. +- Either: + - `pkg/build/image/internal/docker` directory does not exist, or + - It contains only a shim file with a comment referencing a removal ticket, and `grep -r "internal/docker" pkg/build/image/ --include="*.go"` returns matches only inside that directory. +- AC-7 is satisfied. + +--- + +## Acceptance criteria verification checklist + +| AC | Verified by | +|----|-------------| +| AC-1 | `grep -r "internal/docker" pkg/build/image/ --include="*.go"` — no matches outside the `internal/docker` dir itself | +| AC-2 | Unit test `TestBuildImage_Success` passes; asserts `Digest` starts `sha256:` and `ImageRef` is non-empty | +| AC-3 | `TestBuildImage_BuildxAbsent` and `TestNew_SucceedsWithoutBuildx` pass | +| AC-4 | `grep "internal/docker" pkg/build/image/image.go` — no matches; return types compile as `map[string]string` | +| AC-5 | `TestBuilder_CheckDependencies_CallsProviderTagExists` asserts exact error strings; manual smoke test | +| AC-6 | Manual: `make hind-cli && ./bin/hind build consul`; then `docker images -q hind/consul:` returns non-empty | +| AC-7 | `internal/docker` directory deleted or reduced to documented shim with no production importers | + +--- + +## Commit strategy + +| Phase | Commit message | +|-------|---------------| +| 1 | `feat(provider): add BuildImageResult and expand BuildImageOptions` | +| 2 | `feat(dockercli): implement BuildImage with buildx, --load, and digest extraction` | +| 3+4 | `refactor(image): decouple from internal/docker, wire builder through provider` | +| 5 | `chore(image): delete internal/docker package` | + +Each phase commit must leave `go build ./...` and `make test` green, except that Phases 3 and 4 are a single commit and must not be split. diff --git a/.team/done/specs/W-013.md b/.team/done/specs/W-013.md new file mode 100644 index 0000000..df381d9 --- /dev/null +++ b/.team/done/specs/W-013.md @@ -0,0 +1,186 @@ +# B-013 Spec — Migrate image build runtime interactions from `internal/docker` to `pkg/provider` + +Status: revised spec — staff approved (2026-05-01) +Source work item: B-013 + +## Goal + +Eliminate the direct `pkg/build/image/internal/docker` dependency from `pkg/build/image`, routing all runtime Docker operations through `pkg/provider` interfaces instead. As part of this migration, `BuildImage` on the provider interface gains a structured, digest-bearing result type so callers never parse provider-private files. + +## Scope completed +- Discovery/spec only completed for B-013; no product-code edits were made. +- Runtime interactions in image build flows were traced and mapped from `pkg/build/image/internal/docker` to target `pkg/provider` seams. + +## Inventory of `internal/docker` usages in image build flow +- Direct package imports and call paths: + - `pkg/build/image/builder.go` + - imports `pkg/build/image/internal/docker` + - constructs `docker.NewImage(...)` + - uses `UpdateBuildOptions(...)` + - invokes `BuildImage(ctx)` for runtime build + - uses `TagExists(ctx)` during base-image dependency checks + - `pkg/build/image/image.go` + - imports `pkg/build/image/internal/docker` + - returns `[]docker.BuildArg` from `packagesToBuildArgs()` and `buildArgs()` (domain/model leak) +- Runtime command interactions encapsulated in `internal/docker/docker.go`: + - `docker system info --format {{json .}}` (plugin/dependency preflight) + - `docker buildx build ... --metadata-file metadata.json` (image build) + - `docker images -q ` (tag existence) + - metadata file read/parse from build context (`metadata.json`) to obtain digest + +## Provider interfaces/adapters required for replacement +- Existing provider surface (present): + - `provider.Client.BuildImage(ctx, opts)` + - `provider.Client.TagExists(ctx, name, tag)` + - `provider.Client.PullImage(ctx, name, tag)` +- Required additive contract for parity: + - `BuildImage` must return structured output (at least digest + image ref), not empty string. + - Build options must support deterministic build args and any buildx parity options needed by current flow (metadata capture, platform/cache toggles where required). + - Buildx availability must be verified inside `BuildImage` at call time (see API contract below). + - Adapter-owned metadata extraction strategy (provider returns digest directly; callers should not parse provider-private files). +- Adapter changes: + - `pkg/provider/dockercli/build.go` must migrate from `docker build` to buildx-capable flow (or equivalent digest-producing strategy) to match existing behavior. + +## API contract + +### BuildImage return type + +The current `BuildImage(ctx, opts) (string, error)` signature on `provider.Client` changes to: + +```go +BuildImage(ctx context.Context, opts BuildImageOptions) (BuildImageResult, error) +``` + +`BuildImageResult` is a new exported struct defined in `pkg/provider/image.go`: + +```go +type BuildImageResult struct { + Digest string // sha256 digest of the built image (e.g. "sha256:abc123...") + ImageRef string // fully qualified image reference (e.g. "hind/consul:0.1.0") +} +``` + +Both fields must be non-empty on a successful build. A result with an empty `Digest` or empty `ImageRef` is an error condition; the adapter must return an error rather than a zero-value struct. + +### Buildx availability check — at call time, inside `BuildImage`, not at construction + +Buildx availability is checked inside the `dockercli` adapter's `BuildImage` implementation, not in `dockercli.New(...)`. `dockercli.New` must succeed regardless of whether buildx is installed; it performs no buildx probe. There is no capability-check method on `provider.Client`. When `BuildImage` is called and buildx is not present, the method returns a descriptive error immediately. When buildx is available, the build proceeds normally. Users who start a cluster using already-published images and never call `BuildImage` are entirely unaffected. + +### Docker CLI adapter parity — `--load` flag + +The `dockercli` adapter must pass `--load` to the buildx command unconditionally. Without `--load`, buildx may complete successfully without making the image available in the local Docker image store, which would silently break any subsequent `TagExists` check or container creation referencing that image. + +### BuildImageOptions expansion + +`BuildImageOptions` in `pkg/provider/image.go` must be extended to carry the fields the adapter needs: + +```go +type BuildImageOptions struct { + Name string + Tag string + ContextDir string + BuildArgs map[string]string + Dockerfile string // optional; empty means default Dockerfile + WithCache bool // pass --no-cache when false + Platform string // optional; empty string means omit --platform entirely — do not pass a default +} +``` + +`BuildArgs` remains `map[string]string` (provider-neutral). The `internal/docker.BuildArg` slice type is not exposed outside `pkg/build/image/internal/docker`. + +The `Platform` field zero-value is intentionally "omit". When `Platform` is empty the adapter must not pass a `--platform` flag to buildx. The adapter must not substitute any default platform string. + +## Behavioral invariant — dependency-check error message + +The error output produced when a required base image is absent must contain the following exact strings: + +- The plugin-not-found message: `base image dependency not met: ` +- The resolution hint: `Resolution: Run 'hind build ' to build the required dependency` + +The orchestration path responsible for this message may be refactored, but the output observed by the user must match these strings verbatim. QA validates by removing the local base image and running the dependent build command (see AC-5). + +## Migration estimate by component/call path +- Component A: Provider contract expansion (`pkg/provider` types + interface) + - Size: M + - Risk: low-medium (additive API changes, downstream compile impact manageable) +- Component B: Docker CLI adapter parity (`pkg/provider/dockercli/build.go`) + - Size: M-L + - Risk: medium-high (behavior parity around digest/metadata/error handling, `--load` flag, buildx constructor check) +- Component C: Image domain type decoupling (`pkg/build/image/image.go`) + - Size: S-M + - Risk: medium (touches argument plumbing/tests) +- Component D: Build orchestrator rewiring (`pkg/build/image/builder.go`) + - Size: S-M + - Risk: medium (must preserve dependency resolution UX/messages) +- Component E: Legacy package retirement/shim (`pkg/build/image/internal/docker`) + - Size: S-M + - Risk: medium (test migration + cleanup sequencing) + +## Recommended sequencing and blockers +- Phase 1: Add provider result/types and expand `BuildImageOptions` (additive). +- Phase 2: Implement buildx availability check inside `BuildImage`; implement adapter parity for digest-producing builds (buildx + `--load` + metadata extraction). +- Phase 3: Decouple `image.go` from `docker.BuildArg` into provider-neutral args. +- Phase 4: Rewire `builder.go` to provider-only runtime interface. +- Phase 5: Remove direct runtime dependency on `internal/docker`; retain short-lived shim only if needed for rollout safety. + +**Coupled commit note:** Phase 3 (`image.go` type changes) and Phase 4 (`builder.go` rewiring) must land in the same commit. The intermediate state after Phase 3 alone produces a compile break because `builder.go` still references `internal/docker` types that have been removed from `image.go`. Do not leave a passing build between these two phases. + +- Primary blocker: + - `pkg/provider/dockercli/build.go` currently returns an empty build result and lacks buildx metadata parity; orchestration switch should not proceed until adapter parity (digest return, `--load`, call-time buildx capability check inside `BuildImage`) is demonstrated. + +## Guidance for non-conforming call paths +- Any call path that reads `metadata.json` outside provider boundary is non-conforming; move digest derivation into provider adapter and return result via provider types. +- Any domain package returning `internal/docker` types is non-conforming; replace with local/provider-neutral structs and transform at boundary. +- Any direct docker command orchestration outside provider adapters is non-conforming for target architecture. + +## Acceptance criteria + +Each item below is a pass/fail condition. All must pass before this item is closed. + +**AC-1 — No internal/docker import in pkg/build/image after migration** +`grep -r "internal/docker" pkg/build/image/` returns no matches in any `.go` file outside of `pkg/build/image/internal/docker` itself. This includes `builder.go` and `image.go`. + +**AC-2 — BuildImage returns a non-empty structured result** +`provider.Client.BuildImage` returns `(BuildImageResult, error)`. On a successful build, `BuildImageResult.Digest` is non-empty and begins with `sha256:`, and `BuildImageResult.ImageRef` is non-empty and matches the expected `name:tag` form. A unit test on the `dockercli` adapter must assert both fields are populated after a successful mock/fake build invocation. + +**AC-3 — BuildImage returns a descriptive error when buildx is not available** +When `BuildImage` is called on the `dockercli` adapter and the buildx plugin is not present, the method returns a non-nil, descriptive error. When buildx is available, `BuildImage` proceeds normally. `dockercli.New(...)` does not check for buildx and must succeed regardless of whether buildx is installed. `provider.Client` declares no capability-check method. A unit test must assert that when buildx is absent, `BuildImage` returns the error; a separate test must assert that `dockercli.New` succeeds in a buildx-absent environment. Users who never call `BuildImage` are unaffected. + +**AC-4 — image.go build arg functions return provider-neutral types** +`packagesToBuildArgs()` and `buildArgs()` in `pkg/build/image/image.go` return `[]provider.BuildArg` or an equivalent local struct — not `[]docker.BuildArg`. The file has no import of `pkg/build/image/internal/docker`. + +**AC-5 — Dependency-check failure message is unchanged (regression guard)** +Running `hind build nomad` when the `consul` base image does not exist locally produces output containing exactly: +``` +base image dependency not met: +Resolution: Run 'hind build ' to build the required dependency +``` +This message must be identical to the pre-migration output. QA validates by removing the local consul image and running the build command. + +**AC-6 — End-to-end build produces a locally tagged image** +`hind build consul` (or any valid component) completes without error and `docker images -q hind/consul:` returns a non-empty image ID. This validates functional parity with the pre-migration flow, including the `--load` flag ensuring local image store availability. + +**AC-7 — internal/docker is deleted or reduced to a documented shim** +After migration, either: +- `pkg/build/image/internal/docker` is deleted entirely, or +- It is reduced to a named shim with a comment referencing a tracking ticket for final removal, and no production code path imports it. + +A shim is acceptable only if a concrete removal ticket exists. The spec author considers full deletion the preferred outcome. + +## Test migration guidance +- Unit tests to add/update: + - Provider contract tests for digest-bearing build results and validation errors. + - `dockercli` adapter tests for buildx invocation args (including `--load` presence), metadata/digest parsing behavior, and error wrapping. + - `BuildImage` tests asserting the buildx-absent error path, and a separate `dockercli.New` test asserting it succeeds when buildx is absent. + - `builder` tests asserting provider interaction only (no `internal/docker` concrete dependency). + - `image` tests asserting build arg generation without `internal/docker` type coupling. +- Testability guidance — digest extraction seam: + - The `dockercli` adapter should preserve a command-execution seam analogous to the `CommandExecutor` interface in `internal/docker/docker.go`. This allows unit tests to inject a fake executor and assert digest extraction behavior (metadata file parsing, error handling) without running real Docker. The seam may be a field on the adapter struct populated at construction, following the same pattern already established in `internal/docker`. +- Regression expectations: + - Preserve dependency-check failure UX (`hind build ` guidance) and existing tag lookup semantics. + - Preserve build success/failure logging semantics at orchestration layer. + +## Staff verdict +- Verdict: approved +- Reason: B-013 acceptance criteria are fully satisfied as discovery/spec output with explicit migration boundaries, interface requirements, sequencing, blockers, and test guidance. +- Next role: engineer implementation planning/execution, then QA parity validation gate. diff --git a/.team/handoff.md b/.team/handoff.md new file mode 100644 index 0000000..fb3d122 --- /dev/null +++ b/.team/handoff.md @@ -0,0 +1,11 @@ +# Handoff + +## Active Work +- None. + +## Completed +- WI-001 / W-026: Finalized spec, implemented PR CI workflow, passed verification, staff-approved, QA-approved. + +## Notes +- Ready for next work item. +- Commit created for W-026 deliverables. diff --git a/.team/log.md b/.team/log.md new file mode 100644 index 0000000..6d08840 --- /dev/null +++ b/.team/log.md @@ -0,0 +1,30 @@ +# Team Log + +## Staff Verdict: Plan Sign-off for WI-001 (W-026) +Plan approved. Scope is limited to PR CI workflow with `make test` and `make hind-cli` on Go 1.24.x for pull requests to main. No architecture concerns. + +## QA Sign-off Review Dispatch for WI-001 +Mode: sign-off review. +Staff verdict heading: "Staff Verdict: Plan Sign-off for WI-001 (W-026)". +Relevant files: `.team/specs/W-026.md`, `.team/plans/P-026.md`. +Acceptance criteria: W-026 spec criteria 1-6. +Output target: write defects to `.team/bugs.md`; write a no-findings line in `.team/log.md`. + +## QA No-Findings: WI-001 Plan Gate +No defects found in plan/spec alignment for W-026 at plan gate. + +## Staff Verdict: Final Implementation Review for WI-001 (W-026) +Approved. Workflow implementation matches the approved plan and acceptance criteria. Required CI checks are correctly defined for PRs to main. + +## QA Sign-off Review Dispatch for WI-001 (Implementation) +Mode: sign-off review, then CLI QA run. +Staff verdict heading: "Staff Verdict: Final Implementation Review for WI-001 (W-026)". +Relevant files: `.github/workflows/ci-pr.yml`, `.team/specs/W-026.md`. +Acceptance criteria: W-026 spec criteria 1-6. +Output target: write defects to `.team/bugs.md`; write a no-findings line in `.team/log.md`. + +## QA No-Findings: WI-001 Implementation Gate +Independent QA validation found no defects for W-026 implementation. + +## Completion Summary: WI-001 +W-026 is complete with a new GitHub Actions PR CI workflow that triggers on pull requests to main (opened, reopened, synchronize), sets up Go 1.24.x, runs `make test`, and builds the CLI with `make hind-cli`. Local verification passed for both required checks. Staff-engineer approved both plan and final implementation, and QA recorded no findings at plan and implementation sign-off gates. diff --git a/.team/plans/P-026.md b/.team/plans/P-026.md new file mode 100644 index 0000000..f43bcb5 --- /dev/null +++ b/.team/plans/P-026.md @@ -0,0 +1,16 @@ +# P-026: Implement PR CI workflow for W-026 + +## Work item +WI-001 / W-026 + +## Steps +1. Add `.github/workflows/ci-pr.yml` with pull_request trigger to main on opened/reopened/synchronize. +2. Configure job on ubuntu-latest with Go 1.24.x, checkout, cache via setup-go, then run: + - `make test` + - `make hind-cli` +3. Validate locally with `make test` and `make hind-cli`. +4. Update `.team` artifacts with implementation, QA, and staff review outcomes. +5. Commit changes with a focused message for W-026. + +## Risks +- CI runtime length may increase due to test suite duration. diff --git a/.team/specs/W-015.md b/.team/specs/W-015.md new file mode 100644 index 0000000..536fdf4 --- /dev/null +++ b/.team/specs/W-015.md @@ -0,0 +1,38 @@ +# B-015 — Feature spec vs implementation audit + +Date: 2026-04-30 +Scope: `hind-releases.feature`, `hind-build.feature`, `default-cluster.feature`, `hind-start.feature`, `hind-stop.feature` + +## Status matrix + +- `hind-start.feature`: **partially implemented** +- `hind-stop.feature`: **partially implemented** +- `hind-build.feature`: **partially implemented** +- `default-cluster.feature`: **partially implemented** +- `hind-releases.feature`: **not implemented** + +## Evidence summary + +### `hind-stop.feature` — partially implemented +Implemented: +- default/positional/explicit cluster name selection behavior (with active-cluster fallback) +- stop flow with success message and preserved config semantics +- timeout flag exists +- cluster-not-found error path exists + +Gaps: +- no `--force` flag behavior +- no `--verbose` flag behavior/log sequence +- no partial-stop warning/success semantics for per-container failures +- no explicit already-stopped idempotent success message contract +- no explicit unhealthy-container handling message contract + +### `default-cluster.feature` — partially implemented +Implemented: +- successful `hind start` sets active cluster +- `hind set profile [name]` command exists + +Gaps: +- `hind set profile [name]` does not verify cluster existence before setting active cluster +- active cluster reset behavior references `hind rm`; actual command surface is `hind rm`, and reset semantics need explicit spec alignment +- failure message contract for non-existent profile is not enforced diff --git a/.team/specs/W-020.md b/.team/specs/W-020.md new file mode 100644 index 0000000..9209e86 --- /dev/null +++ b/.team/specs/W-020.md @@ -0,0 +1,50 @@ +# B-020 Spec — Review all `go` test files for mock interface duplication + +Status: open +Source: User + +## Goal + +Audit every test file for locally-defined mock/stub/fake types, identify which duplicate interfaces already served by `pkg/provider/mock.ClientStub`, and consolidate where it is safe to do so. + +## Inventory + +| File | Type | Interface | Status | +|------|------|-----------|--------| +| `pkg/build/image/builder_test.go` | `providerStub` | `provider.Client` | **DUPLICATE** — see Item 1 | +| `pkg/provider/dockercli/build_test.go` | `fakeExecutor` | `CommandExecutor` | OK — internal `dockercli` interface, no shared mock | +| `pkg/cmd/hind/start/start_test.go` | `stubStartManager` | `clusterStarter` | OK — narrow command-layer interface | +| `pkg/cmd/hind/get/get_test.go` | `stubClusterManager` | `clusterManager` | OK — narrow command-layer interface | +| `pkg/cmd/hind/rm/rm_test.go` | `stubDeleter` | `clusterDeleter` | OK — narrow command-layer interface | +| `pkg/cmd/hind/stop/stop_test.go` | `fakeStopManager` | `clusterStopper` | OK — narrow command-layer interface | + +## Items + +### Item 1 — `providerStub` duplicates `mock.ClientStub` + +**File:** `pkg/build/image/builder_test.go:17-70` + +`providerStub` manually implements all 14 methods of `provider.Client`. The canonical shared mock, `pkg/provider/mock.ClientStub`, already covers every method with the same optional-func-field injection pattern. The only behavioural difference is the `BuildImage` default return: `providerStub` returns a non-zero result (`Digest: "sha256:stub"`, `ImageRef: opts.Name+":"+opts.Tag`), whereas `mock.ClientStub` returns the zero value (which is intentionally invalid per B-021 AC-2). + +Replace `providerStub` with `mock.ClientStub`: +- Wire `BuildImageFn` explicitly in any test that asserts on a `BuildImageResult` (currently `TestBuilder_BuildImage_CallsProviderBuildImage` and the two tests that need a default non-error build). +- Wire `TagExistsFn` explicitly where needed (already done by the existing tests that set it). +- Delete the `providerStub` type and its 14 method definitions, and the `newStubClient` helper. +- Update imports: replace the `provider` import used only by `providerStub` if it becomes unused; add `"github.com/stenh0use/hind/pkg/provider/mock"`. + +Note: `builder_test.go` is `package image` (white-box), so it can import `pkg/provider/mock` without a cycle. + +## Out of scope + +The command-layer stubs (`stubStartManager`, `stubClusterManager`, `stubDeleter`, `fakeStopManager`) implement narrow local interfaces defined inside each command package. They are the correct pattern for that layer and should not be consolidated. + +## Acceptance criteria + +**AC-1 — `providerStub` removed** +`pkg/build/image/builder_test.go` contains no `providerStub` type or `newStubClient` helper. All tests in that file compile and pass. + +**AC-2 — `mock.ClientStub` used instead** +`builder_test.go` imports `pkg/provider/mock` and constructs test doubles using `&mock.ClientStub{...}` with explicit `BuildImageFn`/`TagExistsFn` wiring where needed. + +**AC-3 — No behaviour change** +`make test` passes with no new failures. diff --git a/.team/specs/W-021.md b/.team/specs/W-021.md new file mode 100644 index 0000000..92a8d67 --- /dev/null +++ b/.team/specs/W-021.md @@ -0,0 +1,61 @@ +# B-021 Spec — Address B-013 staff review additional observations + +Status: open +Source: `.team/log.md` B-013 review — "Additional observations (non-blocking, engineer discretion)" +Related spec: `.team/done/specs/B-013.md` + +## Goal + +Clean up the three non-blocking observations raised during the B-013 staff review that were deferred for follow-up. + +## Items + +### Item 1 — `TagExists`/`PullImage` bypass `c.executor` seam + +**File:** `pkg/provider/dockercli/build.go` + +`TagExists` and `PullImage` call `baseClientCmd` directly instead of routing through `c.executor`. This is a pre-existing inconsistency not introduced by B-013. The consequence is that these two methods cannot be faked via the `CommandExecutor` seam, so unit tests cannot intercept their Docker calls. + +Decide and document one of: +- Route both through `c.executor` (enables unit testing, aligns with `BuildImage` pattern) +- Leave as-is and add a comment noting the inconsistency and the testing limitation + +### Item 2 — `mock.ClientStub.BuildImage` default returns invalid zero value + +**File:** `pkg/provider/mock/mock.go:81` + +The default `BuildImage` implementation (when `BuildImageFn` is nil) returns `provider.BuildImageResult{}, nil` — empty `Digest` and `ImageRef`. Per the B-013 spec contract (AC-2 and `B-013.md` API contract), a result with either field empty is an error condition. A test using `&mock.ClientStub{}` directly without wiring `BuildImageFn` can silently assert against an invalid result. + +Add a comment on the `BuildImage` method explaining that the zero-value default is intentionally invalid per the spec contract, and that callers must wire `BuildImageFn` when they need to assert on the result. + +### Item 3 — `imageRef` intermediate variable in `BuildImage` + +**File:** `pkg/provider/dockercli/build.go:52-57` + +The intermediate `imageRef` variable is set and immediately used in the return literal. It can be inlined: + +```go +// before +imageRef := fmt.Sprintf("%s:%s", opts.Name, opts.Tag) +return provider.BuildImageResult{ + Digest: digest, + ImageRef: imageRef, +}, nil + +// after +return provider.BuildImageResult{ + Digest: digest, + ImageRef: fmt.Sprintf("%s:%s", opts.Name, opts.Tag), +}, nil +``` + +## Acceptance criteria + +**AC-1 — Item 1 resolved** +Either `TagExists` and `PullImage` route through `c.executor` (with tests updated to verify), or a comment is present explaining the bypass and its testability consequence. + +**AC-2 — Item 2 comment added** +`mock.ClientStub.BuildImage` has a comment stating that the zero-value default is invalid per spec contract and that `BuildImageFn` must be wired when asserting on the result. + +**AC-3 — Item 3 inlined** +`imageRef` intermediate variable is removed; `fmt.Sprintf` is inlined into the return literal in `BuildImage`. diff --git a/.team/specs/W-022.md b/.team/specs/W-022.md new file mode 100644 index 0000000..26ad4b3 --- /dev/null +++ b/.team/specs/W-022.md @@ -0,0 +1,79 @@ +# S-022 Spec — Simplify `pkg/build/release` to a single latest-versions record + +Status: open +Source: User + +## Goal + +Replace the multi-release historical map in `pkg/build/release` with a single, flat latest-versions record. Eliminate the maintenance burden of adding a new versioned map entry every time a package is updated. + +## Context + +`pkg/build/release/versions.go` holds a `map[string]Info` keyed by hind release version (e.g., `"0.4.0"`). Every time any package version changes, a developer must add a new full map entry and bump the `latest` constant. + +In practice, the build pipeline only ever calls `release.Latest()`. No build path calls `release.Get(version)` for a historical version. The only consumer of the historical map is the `hind releases` command, which lists past release rows in a table. + +The current `Info` struct also carries a `Hind` field that duplicates the map key, and the `Data` type with its `Get`/`List` methods exists solely to support multi-version lookup. + +## Decisions + +**Keep only a single latest-versions record.** +Remove the versioned map. Replace it with a single `Info` value that represents the current tested package versions. There is no use case in a local dev tool for building against historical versions. + +**Remove the `releases` command.** +With only one version record, the `hind releases` table has a single row and provides no value. Remove the `hind releases` subcommand. The `hind version` command already communicates the hind build version; package versions do not need a dedicated subcommand for local dev use. + +**Do not add arbitrary version flags to `hind build`.** +Allowing users to pass arbitrary package versions is out of scope for this item. That is a separate feature decision with its own testing and validation surface. + +**Do not add a config file.** +Out of scope. Adds complexity for no identified local dev need. + +**Remove the `Hind` field from `Info`.** +It duplicates the hind binary version already reported by `hind version`. Nothing in the build pipeline uses `Info.Hind` for meaningful differentiation after the map is removed. + +**Remove `Data`, `Get`, and `List`.** +These types and functions exist only to support multi-version lookup. After collapsing to a single record they have no purpose. + +## In scope + +- Replace the versioned map in `versions.go` with a single exported `Latest` `Info` value (or a package-level `func Latest() Info` returning a literal). +- Remove the `Hind` field from `release.Info`. +- Delete `release.Data`, `release.New`, `release.Get`, and `release.List`. +- Update `pkg/build/image/image.go`: `NewImage` already calls `release.Latest()`; adjust call site to the new shape. +- Update `pkg/build/image/image.go`: `packagesToBuildArgs` calls `release.Get(i.Release)` using `i.Release` (which was `Info.Hind`); remove this indirection and pass the `Info` directly or inline the lookup. +- Remove `pkg/cmd/hind/releases/` (command package and tests). +- Remove the `releases` subcommand registration from `pkg/cmd/hind/root.go`. +- Remove `features/hind-releases.feature` (or mark it deleted) since the command no longer exists. +- Update `pkg/build/release/release_test.go` to cover the simplified interface. + +## Out of scope + +- Adding a `--version` flag to `hind build`. +- A config file for package versions. +- A registry of available per-package versions. +- Semver resolution or fetching latest versions from upstream at runtime. +- Changes to `pkg/build/image/builder.go`, Dockerfiles, or build args format. + +## Acceptance criteria + +**AC-1 — Single latest-versions record** +`pkg/build/release/versions.go` contains no map keyed by release version string. It defines a single set of current package versions accessible via `release.Latest()`. + +**AC-2 — `release.Info` has no `Hind` field** +The `Info` struct does not contain a `Hind` field. No call site sets or reads `Info.Hind`. + +**AC-3 — `Data`, `New`, `Get`, and `List` are removed** +`pkg/build/release/release.go` exports no `Data` type, `New` constructor, `Get` function, or `List` function. No call site references them. + +**AC-4 — Build pipeline unaffected** +`hind build consul`, `hind build nomad`, `hind build vault`, `hind build all` execute without error. The Docker build args passed to each image (e.g., `CONSUL_VERSION`, `NOMAD_VERSION`) are unchanged in name and value. + +**AC-5 — `hind releases` command is removed** +`hind releases` is not a registered subcommand. Running `hind releases` returns a "unknown command" error. `pkg/cmd/hind/releases/` does not exist. + +**AC-6 — `make test` passes** +All tests pass with no new failures after the changes. + +**AC-7 — No orphaned references** +`go build ./...` and `go vet ./...` report no errors. No file imports `pkg/cmd/hind/releases` or references `release.Data`, `release.Get`, `release.List`, or `release.New`. diff --git a/.team/specs/W-023.md b/.team/specs/W-023.md new file mode 100644 index 0000000..8290d56 --- /dev/null +++ b/.team/specs/W-023.md @@ -0,0 +1,13 @@ +# Goal +Allow users to pass arbitrary package versions to `hind build`. + +# Context +`hind build` currently uses a fixed set of tested package versions from `pkg/build/release`. +Users may want to test against a specific upstream release (e.g. a new Nomad version) without modifying source code. + +# Constraints and open questions +- Which packages should support version overrides (all, or per-package flags)? +- Flag syntax: `--nomad-version`, `--consul-version`, etc., or a single `--versions key=val,...`? +- Should overrides be validated against a known set, or accepted as-is? +- Should overrides be persisted (e.g. in a config file) or one-shot per command invocation? +- How does this interact with image caching — should a version override force a rebuild? diff --git a/.team/specs/W-024.md b/.team/specs/W-024.md new file mode 100644 index 0000000..c4c9a69 --- /dev/null +++ b/.team/specs/W-024.md @@ -0,0 +1,16 @@ +# Goal +Allow users to pass arbitrary package versions to `hind start`. + +# Context +`hind start` launches a cluster using pre-built images. Users may want to start a cluster +with a specific package version (e.g. a particular Nomad release) without having to manually +run `hind build` with version overrides first. + +This is the `hind start` counterpart to B-023 (arbitrary versions for `hind build`). + +# Constraints and open questions +- Should `hind start` accept version flags directly and trigger a build with those versions if the image is not already available? +- Or should it only reference already-built images and error if the requested version image is missing? +- How does this interact with B-023 — should version flag syntax be consistent across `build` and `start`? +- Should version overrides apply to all packages or be per-package flags? +- Should a version mismatch between a running cluster node and the requested version be surfaced as a warning or an error? diff --git a/.team/specs/W-025.md b/.team/specs/W-025.md new file mode 100644 index 0000000..4c0e689 --- /dev/null +++ b/.team/specs/W-025.md @@ -0,0 +1,16 @@ +# Goal +Publish pre-built container images to an OCI registry whenever the tracked package versions are updated. + +# Context +Currently users must build images locally with `hind build`. If images were published to a +registry on each version update, users could pull pre-built images instead of building from +scratch, reducing setup time and ensuring a canonical set of tested images exists. + +# Constraints and open questions +- Which OCI registry should images be pushed to (GHCR, Docker Hub, other)? +- Should publishing be triggered by a CI pipeline on merge to main, or via a manual `hind publish` command, or both? +- How should images be tagged — by package version (e.g. `nomad:1.9.0`), by hind release, or a combination? +- Should the existing `hind build` command gain a `--push` flag, or should publishing be a separate command/workflow? +- Are multi-platform images (amd64/arm64) required? +- What authentication/credentials model is needed for pushing to the registry? +- Should locally-pulled images be preferred over local builds if a matching tag exists in the registry? diff --git a/.team/specs/W-026.md b/.team/specs/W-026.md new file mode 100644 index 0000000..35322d7 --- /dev/null +++ b/.team/specs/W-026.md @@ -0,0 +1,14 @@ +# Goal +Add GitHub Actions CI to run pre-merge checks on every pull request. + +# Context +There are currently no automated checks gating merges to main. Tests, linting, and builds +run manually. A CI pipeline would catch regressions earlier and enforce quality gates +consistently across contributors. + +# Constraints and open questions +- Which checks should be required to pass before merge: `make test`, `go vet`, `go build`, linting (staticcheck/golangci-lint)? +- Should the workflow also run `hind build` to validate Docker image builds, or is that too slow/expensive for CI? +- What Go version(s) should the matrix target? +- Should checks run on push to all branches or only on PRs targeting main? +- Is a dependabot / automated dependency update workflow in scope here? diff --git a/.team/specs/W-027.md b/.team/specs/W-027.md new file mode 100644 index 0000000..bd45347 --- /dev/null +++ b/.team/specs/W-027.md @@ -0,0 +1,8 @@ +# Goal +Reduce unnecessary parsing code by storing the node number/id in state + +# Example + +pkg/cluster/types.go:20-37 `config.Node` is created with id in the string + +pkg/cluster/types.go:39-56 parseClientNodeNumber(...) extracts the id from the string diff --git a/.team/specs/W-033.md b/.team/specs/W-033.md new file mode 100644 index 0000000..c802378 --- /dev/null +++ b/.team/specs/W-033.md @@ -0,0 +1,43 @@ +# Goal +Fix top-priority CLI correctness bugs where commands return success for missing clusters or partial failures. + +# Context +QA identified multiple P2 bugs where command exit codes and success messages do not reflect reality. These are high-impact for both human users and scripts that rely on non-zero exits. + +This spec bundles: +- BUG-001 (`hind get` silent success when cluster does not exist) +- BUG-003 (`hind stop` returns success on partial stop failures) +- BUG-004 (`hind rm` silent success when cluster does not exist) + +# In scope +- Ensure `hind get ` returns a non-zero error when the cluster config does not exist. +- Ensure `hind stop` returns non-zero when any container fails to stop. +- Ensure `hind stop --force` does not mask partial failures behind a success exit. +- Ensure `hind rm ` returns a non-zero error when the cluster does not exist. +- Add/update tests in affected command/cluster packages to lock expected behavior. + +# Out of scope +- Output wording polish beyond what is needed for correctness. +- Refactors unrelated to existence/exit-code correctness. +- P3 issues from the same QA pass (handled in separate spec). + +# Proposed changes +- `pkg/cluster/manager.go` + - Update missing-config behavior used by `Get` path to return a not-found error for non-existent clusters. + - Update `Delete()` to return a not-found error when the cluster does not exist. +- `pkg/cmd/hind/stop/stop.go` + - Reorder/adjust branch logic so `FailedCount > 0` is evaluated as failure and returns a non-nil error for both forced and non-forced stop flows. +- Tests + - Add/adjust tests for: + - `hind get` missing cluster -> non-zero error + - `hind rm` missing cluster -> non-zero error + - `hind stop` partial failure -> non-zero error + - `hind stop --force` partial failure -> non-zero error (and no masked success) + +# Acceptance criteria +- **AC-1:** `hind get ` exits non-zero and reports cluster-not-found. +- **AC-2:** `hind rm ` exits non-zero and does not print successful deletion. +- **AC-3:** `hind stop ` exits non-zero when one or more containers fail to stop. +- **AC-4:** `hind stop --force ` exits non-zero when one or more containers fail to stop. +- **AC-5:** Updated/new tests fail before fix and pass after fix. +- **AC-6:** `make test` passes. diff --git a/.team/specs/W-034.md b/.team/specs/W-034.md new file mode 100644 index 0000000..3ac2766 --- /dev/null +++ b/.team/specs/W-034.md @@ -0,0 +1,37 @@ +# Goal +Fix low-priority CLI behavior/UX issues from QA that do not block core correctness but improve reliability and consistency. + +# Context +QA identified three P3 issues that should be cleaned up after top-priority correctness bugs: +- BUG-002 (`hind list` ignores `tabwriter.Flush` errors) +- BUG-005 (`hind start` never returns already-running result) +- BUG-006 (`hind build` malformed exact-args error message) +- BUG-007 (`hind set profile` success output stream consistency) + +# In scope +- `hind list` should propagate flush/write errors. +- `hind start` should correctly signal already-running no-op state (or intentionally remove dead state if not desired). +- `hind build` wrong-arg error should be clear and count-based. +- `hind set profile` success stream behavior should be made consistent and tested (stdout vs stderr decision). + +# Out of scope +- New CLI features. +- Major output redesign. +- Changes to top-priority P2 bug behavior (covered by separate spec). + +# Proposed changes +- `pkg/cmd/hind/list/list.go` + - Check `w.Flush()` error and return wrapped error on failure. +- `pkg/cluster/manager.go` and/or `pkg/cmd/hind/start/start.go` + - Ensure already-running reconcile no-op returns `StartResultAlreadyRunning` and consumers handle it correctly; if project chooses not to surface this state, remove dead enum path and align behavior/tests explicitly. +- `pkg/cmd/hind/build/build.go` + - Replace slice-formatted arg-count error with count-based message or propagate Cobra exact-args error. +- `pkg/cmd/hind/set/set.go` + - Decide and standardize success output stream for `set profile` confirmation; add test coverage for chosen behavior. + +# Acceptance criteria +- **AC-1:** `hind list` returns non-zero on flush/write failure. +- **AC-2:** `hind start` already-running no-op behavior is explicit, implemented, and covered by tests. +- **AC-3:** `hind build` wrong-arg error message is clear and no longer prints raw slice formatting. +- **AC-4:** `hind set profile` success message stream is intentional, consistent, and tested. +- **AC-5:** `make test` passes. diff --git a/.team/work-items.md b/.team/work-items.md new file mode 100644 index 0000000..1aefd03 --- /dev/null +++ b/.team/work-items.md @@ -0,0 +1,7 @@ +# Work Items + +- ID: WI-001 + Description: W-026 GitHub Actions CI pre-merge checks for pull requests. + Assigned role: team-lead + Status: done + Blockers: none diff --git a/AGENTS.md b/AGENTS.md index cc51c07..b84549d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,233 +1,62 @@ -# Claude Code Assistant Instructions - Hashistack in Docker (hind) +# hind Claude Guide -## 🤖 AI Context & Project Overview +Use this file as fast project context for working in `hind`. -You are assisting with **hind** - a Go-based CLI tool that builds and runs different components from the HashiCorp ecosystem (the "Hashistack") in Docker containers. This project provides a quick playground for Nomad, Consul, and related services, similar to how `kind` works for Kubernetes. +## Project at a glance -### Key Project Components +`hind` is a Go CLI (Cobra) for running HashiCorp components locally in Docker for development/testing. -- **CLI Tool**: `hind` binary built with Cobra framework -- **Docker Images**: Custom images for Nomad, Consul, and supporting services -- **Cluster Management**: Multi-node Nomad clusters with service discovery -- **Network Integration**: Optional support for CNI and Service Mesh - -### Key Project Files - -- `cmd/hind/` - CLI entry point and command structure -- `pkg/` - Core Go packages organized by functionality -- `Makefile` - Build and deployment automation - -## 🎯 Primary Objectives - -1. **Build reliable HashiCorp service containers** - Custom images optimized for development -2. **Provide simple cluster lifecycle management** - Easy up/down operations -3. **Enable multi-node testing scenarios** - Scalable client nodes -4. **Support advanced networking** - CNI integration -5. **Maintain Go best practices** - Clean, idiomatic Go code - -## 🏛️ Architecture Decisions - -**Why Docker CLI via provider abstraction instead of Docker SDK?** -- Better compatibility with existing Docker installations -- Simpler debugging (can replicate issues with docker commands) -- Matches kind's approach (proven pattern for local clusters) -- Easy to add alternative container runtimes (podman, etc.) later - -**Why Cobra for CLI framework?** -- Industry standard for Go CLIs (kubectl, gh, docker use it) -- Built-in help generation and shell completion -- Easy subcommand management and flag parsing -- Excellent documentation and community support - -**Why separate provider abstraction layer?** -- Allows future support for different container runtimes -- Makes testing easier (can mock container operations) -- Keeps Docker-specific logic isolated -- Follows dependency inversion principle - -## ⚡ Quick Command Reference +## Core commands ```bash -# Build the CLI tool +# Build CLI make hind-cli - -# Build Docker images -./bin/hind build all # Build all images -./bin/hind build nomad # Build specific image - -# Cluster management -./bin/hind start # Start cluster (default profile) -./bin/hind start # Start with named profile -./bin/hind start --clients=3 # Start with 3 client nodes -./bin/hind list # List all clusters -./bin/hind get # Get cluster details -./bin/hind rm # Delete a cluster - -# Go development commands -go build -o bin/hind # Build CLI -go test ./... # Run all tests -go mod tidy # Clean dependencies -go fmt ./... # Format code -go vet ./... # Lint code -make test # Run fmt, vet, and tests -``` - -## 🚨 CRITICAL RULES - NO EXCEPTIONS - -### After Every Code Change - -1. ✅ Run `make test ` - Format all Go code -2. ✅ Test CLI functionality manually if applicable -3. ✅ Never skip quality checks for "small changes" - -### Go Code Style Mandates - -- **Follow Go conventions** - Use `gofmt`, `golint`, and `go vet` -- **Package organization** - Keep packages focused and well-named -- **Error handling** - Always handle errors appropriately -- **No global state** - Use dependency injection patterns -- **Interfaces over structs** - Keep interfaces small and focused -- **120 char line limit** - Keep code readable -- **Comments explain WHY, not WHAT** - Code should be self-documenting (see [docs/STYLE_GUIDE.md](docs/STYLE_GUIDE.md)) - -## ⚠️ Common Pitfalls - -**Container Naming:** -- ❌ Don't use arbitrary container names -- ✅ Always use the pattern: `hind...` -- Example: `hind.default.nomad.01`, `hind.test.consul.01` - -**Network Cleanup:** -- ❌ Networks won't delete if containers still reference them -- ✅ Always delete containers before deleting networks -- ✅ Use `./bin/hind delete ` to ensure proper cleanup order - -**Image Building:** -- ❌ Don't assume cached layers are current -- ✅ Use `docker build --no-cache` if build behavior seems inconsistent -- ✅ Check base image digests in `pkg/build/image/` when debugging - -**Provider Abstraction:** -- ❌ Don't call Docker commands directly in cluster code -- ✅ Always go through the `pkg/provider` interface -- ✅ This keeps the code testable and runtime-agnostic - -**Configuration Management:** -- ❌ Don't hardcode HashiCorp versions in cluster code -- ✅ Always use `pkg/build/release/` for version management -- ✅ This ensures consistency across images and runtime - -**Test Cleanup:** -- ❌ Don't leave test clusters running -- ✅ Always defer cleanup in tests: `defer cluster.Delete(ctx)` -- ✅ Use unique cluster names per test to avoid conflicts - -**For detailed development guidelines, see:** -- [docs/CONTRIBUTING.md](docs/CONTRIBUTING.md) - Development workflow and implementation checklist -- [docs/STYLE_GUIDE.md](docs/STYLE_GUIDE.md) - Code style guidelines -- [docs/TESTING.md](docs/TESTING.md) - Testing patterns and best practices -- [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) - Debugging guides - -## 🏗️ Project Structure - -``` -hind/ -├── cmd/hind/ # CLI application entry point -│ ├── main.go # Main CLI entry -│ └── app/ # Application setup and initialization -│ -├── pkg/ # Core Go packages -│ │ -│ ├── cmd/hind/ # Cobra CLI commands implementation -│ │ ├── root.go # Root command setup, adds all subcommands -│ │ ├── build/ # Build command - builds Docker images -│ │ ├── start/ # Start command - creates/starts clusters -│ │ ├── get/ # Get command - retrieves cluster details -│ │ ├── list/ # List command - lists all clusters -│ │ ├── rm/ # Delete command - removes clusters -│ │ ├── format/ # Format utilities for CLI output -│ │ └── version/ # Version command - displays version info -│ │ -│ ├── build/ # Image building and release management -│ │ ├── image/ # Docker image specifications and building -│ │ │ # WHEN: Adding new HashiCorp service images -│ │ │ # WHEN: Modifying image build configurations -│ │ └── release/ # Release version management for services -│ │ # WHEN: Adding new HashiCorp version support -│ │ # WHEN: Defining image metadata and versions -│ │ -│ ├── cluster/ # Cluster orchestration and lifecycle -│ │ ├── cluster.go # Main cluster type and operations (Create, Start, Stop, Delete) -│ │ │ # WHEN: Implementing cluster lifecycle features -│ │ ├── types.go # Cluster type definitions and defaults -│ │ ├── cni/ # Container Network Interface implementations -│ │ │ ├── cni.go # CNI interface definition -│ │ │ ├── none/ # No CNI (basic Docker networking) -│ │ │ ├── cilium/ # Cilium CNI implementation -│ │ │ └── factory/ # CNI factory pattern for creating CNI instances -│ │ │ # WHEN: Adding new CNI providers -│ │ │ # WHEN: Implementing network policies -│ │ └── runtime/ # Runtime configuration and container orchestration -│ │ # WHEN: Adding runtime-specific features -│ │ -│ ├── provider/ # Container provider abstraction layer -│ │ ├── provider.go # Interface for container/network operations -│ │ │ # WHEN: Adding support for new container runtimes -│ │ └── dockercli/ # Docker CLI implementation -│ │ ├── client.go # Docker client wrapper -│ │ ├── container.go # Container lifecycle operations -│ │ ├── network.go # Network management -│ │ ├── image.go # Image operations -│ │ └── build.go # Image building -│ │ # WHEN: Implementing Docker-specific features -│ │ # WHEN: Adding new container operations -│ │ -│ ├── config/ # Configuration types and structures -│ │ └── config.go # Cluster, Node, Network, Volume configs -│ │ # WHEN: Adding new configuration options -│ │ # WHEN: Defining node/cluster properties -│ │ -│ └── file/ # File system utilities -│ └── file.go # File/directory operations, path management -│ # WHEN: Adding file I/O operations -│ # WHEN: Managing cluster state files -│ -├── jobs/ # Example Nomad job files for testing -│ -└── features/ # Feature definitions and planning documents +# or +go build -o bin/hind + +# Build images +./bin/hind build all +./bin/hind build nomad + +# Cluster lifecycle +./bin/hind start [cluster-name] +./bin/hind start --clients=3 +./bin/hind list +./bin/hind get +./bin/hind stop [cluster-name] +./bin/hind rm + +# Quality checks +make test ``` -### Package Responsibilities Guide - -**When adding NEW features, consider:** - -- **CLI Commands** → `pkg/cmd/hind//` - User-facing commands -- **Image Changes** → `pkg/build/image/` - New services or image configurations -- **Cluster Logic** → `pkg/cluster/` - Cluster orchestration, lifecycle management -- **Networking** → `pkg/cluster/cni/` - CNI providers, network policies -- **Container Operations** → `pkg/provider/dockercli/` - Low-level container/network ops -- **Configuration** → `pkg/config/` - New config types, node properties -- **File Operations** → `pkg/file/` - State persistence, file management - +## Required workflow -## 🚀 Quick Start for Claude Code +- After code changes, run `make test`. +- If CLI behavior changed, also validate manually (for example: `make hind-cli && ./bin/hind --help`). -When starting a session: +## Architecture map -1. **Read this file first** for Go project context -2. **Check current branch** - Should be working on `feat/feat-name` -3. **Review recent commits** - Understand latest changes -4. **Run tests** - `go test ./...` to see current state -5. **Check CLI functionality** - `make hind-cli && ./bin/hind --help` +- `cmd/hind/` — CLI entrypoint. +- `pkg/cmd/hind/` — Cobra commands and formatting. +- `pkg/cluster/` — cluster lifecycle/orchestration. +- `pkg/provider/` — container runtime abstraction (`dockercli` implementation). +- `pkg/build/image/` — image definitions/build logic. +- `pkg/build/release/` — service versions/metadata. +- `pkg/config/` — config types. +- `pkg/file/` — file/path utilities. -## 📌 Remember +## High-signal rules -- **Go conventions are mandatory** - `gofmt`, `go vet`, proper error handling -- **Test-driven development** - Write tests first when possible -- **Docker implications** - Consider container impact of changes -- **CLI usability** - Commands should be intuitive and well-documented -- **HashiCorp ecosystem** - Understand service interactions +- Container names follow: `hind...`. +- In cluster/business logic, go through `pkg/provider` interfaces; avoid direct Docker command usage there. +- Delete containers before deleting networks. +- Do not hardcode HashiCorp versions in cluster code; use `pkg/build/release`. +- Tests creating clusters should clean up (`defer cluster.Delete(ctx)`) and use unique cluster names. ---- +## References -_This document is optimized for Claude Code working on the hind Go CLI project. Always refer to current code structure and `features/*.feature` for authoritative requirements._ +- `docs/CONTRIBUTING.md` — workflow and implementation checklist. +- `docs/STYLE_GUIDE.md` — style and conventions. +- `docs/TESTING.md` — testing patterns. +- `docs/TROUBLESHOOTING.md` — debugging guidance. diff --git a/README.md b/README.md index 216d7ed..af8dfa3 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,6 @@ nomad run jobs/example.hcl ```bash ./bin/hind start [cluster-name] # Create and start a cluster --clients int # Number of client nodes (default: 1) - --version string # Hind image version to use (default: "latest") --timeout duration # Timeout for starting cluster (default: 5m) --verbose # Enable verbose output diff --git a/docs/cilium.md b/docs/cilium.md index 4431c2f..f4b1891 100644 --- a/docs/cilium.md +++ b/docs/cilium.md @@ -6,11 +6,8 @@ The following setup is based on the Cosmonic [blog post](https://cosmonic.com/bl ## Enable Cilium -Start a cluster with Cilium CNI enabled: +Cilium cannot be enabled via `hind start` right now. The previous `--cni=cilium` flag was removed with the CNI package cleanup, so there is currently no supported CLI path to enable Cilium in `hind`. -```bash -./bin/hind start --cni=cilium -``` Check Cilium health status (may take 2-5 minutes to become fully healthy): diff --git a/features/default-cluster.feature b/features/default-cluster.feature deleted file mode 100644 index c970093..0000000 --- a/features/default-cluster.feature +++ /dev/null @@ -1,30 +0,0 @@ -Feature: hind active cluster selection - As a user of the hind cli - I want to be able to select an active selected cluster - So that when I run any cli commands I do not need to specify the profile name of the cluster - - Scenario: - Given I run the command `hind start` - And the command is successfull - When the command is executed - Then the active cluster profile name will be set as the newly active cluster - - Scenario: - Given I run the command `hind delete` - And the cluster to be deleted is the active selected cluster - And the command is successfull - When the command is executed - Then the active cluster profile name will be reset to "default" - - Scenario: - Given I run the command `hind set profile [name]` - And the cluster [name] exists - When the command is executed - Then the active selected cluster will be change to name - - Scenario: - Given I run the command `hind set profile [name]` - And the cluster [name] does not exist - When the command is executed - Then the command will fail - And the command will print a message that lets the user no that the profile does not exist diff --git a/features/hind-build.feature b/features/hind-build.feature deleted file mode 100644 index 4ee343d..0000000 --- a/features/hind-build.feature +++ /dev/null @@ -1,43 +0,0 @@ -Feature: hind build container images - As a maintainer or user of the hind cli - I want an easy way to build specific versions of the hind container images - So that they are can be built - - Background: - Given I have defined the hind version in the version configuration - And the hind version has the defined consul version - And the vault version - And the nomad version - - Scenario: Build consul image without version - Given I run the cli `hind build consul` command - When I execute the command without a version provided - Then the build package will leverage the versions package - And the versions package will provide the latest hind version - And in the hind version will be the consul version for that release - And the consul version will be passed to the build command as a build arg - And the consul image will be built - And the consul image will be tagged as `hind.consul:` - - Scenario: Build image dependencies met - Given I run a build command for a target image with dependent base images - When the target image is built - Then the build functionality will check the target for base image dependencies - And the base image dependencies will be checked to confirm they exist - And the build functionality will build the target image - - Scenario: Build image dependencies not met - Given I run a build command for a target image with dependent base images - When the target image is built - Then the build functionality will check the target for base image dependencies - And the base image dependencies will be checked to confirm they exist - And the build will fail with an error message for the missing dependencies - And the error message will include instructions to resolved the missing dependencies - - Scenario: Build all images - Given I run a build command for the target image `all` - When the command is executed - Then the build will determine the order of the build chain - And the first image built with no build dependencies will be built - And the next image built that has it's dependencies met wil be built - And the remaining images will be built once all of their dependencies are met diff --git a/features/hind-releases.feature b/features/hind-releases.feature deleted file mode 100644 index 2af818d..0000000 --- a/features/hind-releases.feature +++ /dev/null @@ -1,29 +0,0 @@ -Feature: HIND releases menu - As a maintainer of the HIND CLI - I want an easy way to maintain the hind version and the version of the hashicorp binaries that are include - So that releases can easily be built and published - - Background: - Given I have defined the hind version in the version configuration - And the hind version has the defined consul version - And the vault version - And the nomad version - - Scenario: List available hind versions - Given I run the cli versions menu - When I execute the command - Then the cli will list in a table the available hind versions - And the hashistack component versions that are included in a specific version will be displayed on the same row - And the names of the columns of the table will be listed on the first row - And the first column will be the hind version - And the remaining columns will be displayed in alphabetical order consul, nomad, vault - And the latest version will be on the first row - And the oldest version will be on the last row - - Scenario: Create new hind cluster - Given I run the cli command create - When I execute the command with the - Then the - - - Scenario: Run non existent hind version diff --git a/features/hind-start.feature b/features/hind-start.feature deleted file mode 100644 index 6019590..0000000 --- a/features/hind-start.feature +++ /dev/null @@ -1,135 +0,0 @@ -Feature: hind start cluster - As a user of the hind cli - I want an easy way to start or create a cluster - So that I can quickly begin working with HashiCorp services - - Background: - Given I have hind cli installed - And a docker daemon is running - And no clusters are currently running - - # Cluster Name Argument - Scenario: Start command uses default cluster name when no name specified - When I run `hind start` - Then the CLI should use cluster name "default" - - Scenario: Start command uses specified cluster name - When I run `hind start dev` - Then the CLI should use cluster name "dev" - - Scenario: Start command accepts cluster name as positional argument - When I run `hind start my-test-cluster` - Then the CLI should use cluster name "my-test-cluster" - - # Cluster Creation Flow - Scenario: Start creates a new cluster when none exists - Given no cluster named "default" exists - When I run `hind start` - Then the CLI should detect no existing cluster - And the CLI should create a new cluster with the following: - | Component | Count | - | Nomad Server | 1 | - | Nomad Client | 1 | - | Consul Server | 1 | - And all containers should be in running state - And the CLI should output "Cluster 'default' started successfully" - And the CLI should display connection information - - Scenario: Start creates a named cluster when none exists - Given no cluster named "dev" exists - When I run `hind start dev` - Then the CLI should detect no existing cluster - And the CLI should create a new cluster named "dev" - And all containers should be in running state - And the CLI should output "Cluster 'dev' started successfully" - - Scenario: Start resumes a stopped cluster - Given a cluster named "default" exists - And the cluster containers are stopped - When I run `hind start` - Then the CLI should detect existing cluster containers - And the CLI should start all stopped containers - And all containers should be in running state - And the CLI should output "Cluster 'default' started successfully" - - Scenario: Start command is idempotent when cluster already running - Given a cluster named "default" exists - And the cluster containers are running - When I run `hind start` - Then the CLI should detect the cluster is already running - And the CLI should output "Cluster 'default' is already running" - And no containers should be created or restarted - - # Configuration Options - Scenario: Start cluster with custom node count - Given no cluster named "default" exists - When I run `hind start --clients 3` - Then the CLI should create a cluster with 3 client nodes - And all 3 client containers should be running - - Scenario: Start named cluster with custom node count - Given no cluster named "staging" exists - When I run `hind start staging --clients 5` - Then the CLI should create a cluster named "staging" with 5 client nodes - And all 5 client containers should be running - - Scenario: Start uses existing cluster configuration when no flags provided - Given a cluster named "default" exists with 3 client nodes - And the cluster containers are stopped - When I run `hind start` - Then the CLI should start the cluster with 3 client nodes - And the CLI should not modify the cluster configuration - And the CLI should output "Cluster 'default' started successfully" - - Scenario: Start scales existing cluster when clients flag provided - Given a cluster named "default" exists with 3 client nodes - And the cluster containers are running - When I run `hind start --clients 5` - Then the CLI should scale the cluster to 5 client nodes - And the CLI should create 2 additional client containers - And all 5 client containers should be running - And the cluster configuration should be updated - - Scenario: Start scales down existing cluster when clients flag is lower - Given a cluster named "default" exists with 5 client nodes - And the cluster containers are running - When I run `hind start --clients 2` - Then the CLI should scale the cluster down to 2 client nodes - And the CLI should remove 3 client containers - And 2 client containers should be running - And the cluster configuration should be updated - - # Error Scenarios - Scenario: Start fails when Docker daemon is not running - Given the docker daemon is not running - When I run `hind start` - Then the CLI should output an error "Docker daemon is not accessible" - And the CLI should exit with code 1 - - Scenario: Start fails when port conflicts exist - Given port 4646 is already in use - When I run `hind start` - Then the CLI should output an error "Port conflict detected: 4646" - And the CLI should suggest "Stop the conflicting service or use a different profile" - And the CLI should exit with code 1 - - Scenario: Start partially recovers from unhealthy containers - Given a cluster named "default" exists - And some containers are in failed state - When I run `hind start` - Then the CLI should detect unhealthy containers - And the CLI should recreate failed containers - And all containers should be in running state - - # Verbose Output - Scenario: Start with verbose flag shows detailed progress - Given no cluster named "default" exists - When I run `hind start --verbose` - Then the CLI should output detailed progress including: - | Log Entry | - | Checking for existing cluster | - | Creating network 'hind-default' | - | Pulling image 'hind/nomad:latest' | - | Starting container 'nomad-server' | - | Waiting for Nomad API readiness | - | Cluster health check passed | diff --git a/features/hind-stop.feature b/features/hind-stop.feature deleted file mode 100644 index 27a26a2..0000000 --- a/features/hind-stop.feature +++ /dev/null @@ -1,159 +0,0 @@ -Feature: hind stop cluster - As a user of the hind cli - I want an easy way to stop a running cluster - So that I can pause my work and free up resources without losing my cluster configuration - - Background: - Given I have hind cli installed - And a docker daemon is running - - # Cluster Name Argument - Scenario: Stop command uses default cluster name when no name specified - Given a cluster named "default" exists - And the cluster containers are running - When I run `hind stop` - Then the CLI should use cluster name "default" - - Scenario: Stop command uses specified cluster name - Given a cluster named "dev" exists - And the cluster containers are running - When I run `hind stop dev` - Then the CLI should use cluster name "dev" - - Scenario: Stop command accepts cluster name as positional argument - Given a cluster named "my-test-cluster" exists - And the cluster containers are running - When I run `hind stop my-test-cluster` - Then the CLI should use cluster name "my-test-cluster" - - # Basic Stop Flow - Scenario: Stop stops all containers in a running cluster - Given a cluster named "default" exists with the following: - | Component | Count | - | Nomad Server | 1 | - | Nomad Client | 3 | - | Consul Server | 1 | - And all cluster containers are running - When I run `hind stop` - Then the CLI should stop all cluster containers - And all containers should be in stopped state - And the CLI should output "Cluster 'default' stopped successfully" - And the cluster configuration should be preserved - - Scenario: Stop stops a named cluster - Given a cluster named "staging" exists - And the cluster containers are running - When I run `hind stop staging` - Then the CLI should stop all containers for cluster "staging" - And all containers should be in stopped state - And the CLI should output "Cluster 'staging' stopped successfully" - - Scenario: Stop command is idempotent when cluster already stopped - Given a cluster named "default" exists - And all cluster containers are stopped - When I run `hind stop` - Then the CLI should detect the cluster is already stopped - And the CLI should output "Cluster 'default' is already stopped" - And no containers should be modified - - Scenario: Stop preserves cluster configuration for future restart - Given a cluster named "default" exists with 5 client nodes - And the cluster containers are running - When I run `hind stop` - Then the CLI should stop all cluster containers - And the cluster configuration should be preserved - And the configuration should show 5 client nodes - And a subsequent `hind start` should resume with the same configuration - - # Partial States - Scenario: Stop handles partially running cluster - Given a cluster named "default" exists - And some cluster containers are running - And some cluster containers are stopped - When I run `hind stop` - Then the CLI should stop all running containers - And all containers should be in stopped state - And the CLI should output "Cluster 'default' stopped successfully" - - Scenario: Stop handles unhealthy containers gracefully - Given a cluster named "default" exists - And some containers are in failed state - And some containers are running - When I run `hind stop` - Then the CLI should stop all running containers - And the CLI should not attempt to stop already failed containers - And the CLI should output "Cluster 'default' stopped (some containers were already failed)" - - # Error Scenarios - Scenario: Stop fails when cluster does not exist - Given no cluster named "nonexistent" exists - When I run `hind stop nonexistent` - Then the CLI should output an error "Cluster 'nonexistent' not found" - And the CLI should exit with code 1 - - Scenario: Stop fails when Docker daemon is not running - Given a cluster named "default" exists - And the docker daemon is not running - When I run `hind stop` - Then the CLI should output an error "Docker daemon is not accessible" - And the CLI should exit with code 1 - - Scenario: Stop continues despite container stop failures - Given a cluster named "default" exists with 3 client nodes - And the cluster containers are running - And container "hind.default.nomad-client.02" cannot be stopped - When I run `hind stop` - Then the CLI should attempt to stop all containers - And the CLI should stop containers 1 and 3 successfully - And the CLI should output a warning "Failed to stop container 'hind.default.nomad-client.02'" - And the CLI should output "Cluster 'default' partially stopped" - And the CLI should exit with code 0 - - # Force Stop - Scenario: Stop with force flag kills containers immediately - Given a cluster named "default" exists - And the cluster containers are running - When I run `hind stop --force` - Then the CLI should kill all containers without graceful shutdown - And all containers should be in stopped state - And the CLI should output "Cluster 'default' force stopped" - - Scenario: Stop with timeout flag waits specified duration - Given a cluster named "default" exists - And the cluster containers are running - When I run `hind stop --timeout 30` - Then the CLI should wait up to 30 seconds for graceful shutdown - And all containers should be in stopped state - And the CLI should output "Cluster 'default' stopped successfully" - - # Verbose Output - Scenario: Stop with verbose flag shows detailed progress - Given a cluster named "default" exists with 2 client nodes - And the cluster containers are running - When I run `hind stop --verbose` - Then the CLI should output detailed progress including: - | Log Entry | - | Checking cluster 'default' status | - | Stopping container 'hind.default.nomad.01' | - | Stopping container 'hind.default.nomad.02' | - | Stopping container 'hind.default.nomad.03' | - | Stopping container 'hind.default.consul.01' | - | All containers stopped successfully | - - # Integration with Other Commands - Scenario: Stop followed by start resumes cluster with same configuration - Given a cluster named "prod" exists with 4 client nodes - And the cluster containers are running - When I run `hind stop prod` - And I run `hind start prod` - Then the cluster should start with 4 client nodes - And all containers should be in running state - And the CLI should output "Cluster 'prod' started successfully" - - Scenario: Stop does not affect other running clusters - Given a cluster named "dev" exists and is running - And a cluster named "staging" exists and is running - When I run `hind stop dev` - Then cluster "dev" containers should be stopped - And cluster "staging" containers should still be running - And the CLI should output "Cluster 'dev' stopped successfully" diff --git a/pkg/build/image/builder.go b/pkg/build/image/builder.go index 8f53e3e..f9de638 100644 --- a/pkg/build/image/builder.go +++ b/pkg/build/image/builder.go @@ -7,16 +7,20 @@ import ( "github.com/apex/log" "github.com/stenh0use/hind/pkg/build/image/files" - "github.com/stenh0use/hind/pkg/build/image/internal/docker" "github.com/stenh0use/hind/pkg/build/release" + "github.com/stenh0use/hind/pkg/provider" ) +// Builder orchestrates building a single hind Docker image via a provider.Client. type Builder struct { logger *log.Logger + client provider.Client image Image } -func NewBuilder(logger *log.Logger, kind release.ImageKind) (*Builder, error) { +// NewBuilder constructs a Builder for the given image kind, using the provided +// provider.Client for all runtime Docker interactions. +func NewBuilder(logger *log.Logger, client provider.Client, kind release.ImageKind) (*Builder, error) { image, err := NewImage(kind) if err != nil { return nil, fmt.Errorf("failed to create image definition: %w", err) @@ -24,10 +28,13 @@ func NewBuilder(logger *log.Logger, kind release.ImageKind) (*Builder, error) { return &Builder{ logger: logger, + client: client, image: image, }, nil } +// BuildImage builds the image, writing build files to a temporary directory and +// delegating the Docker build to the provider client. func (b *Builder) BuildImage(ctx context.Context) error { if err := b.checkDependencies(ctx); err != nil { return fmt.Errorf("dependency check failed: %w", err) @@ -42,42 +49,37 @@ func (b *Builder) BuildImage(ctx context.Context) error { return fmt.Errorf("failed to write build files for %s: %w", b.image.Kind, err) } - imageName := b.image.Kind.ImageName() - dockerImg := docker.NewImage(b.logger, imageName, b.image.Release) - buildArgs, err := b.image.buildArgs() if err != nil { return fmt.Errorf("failed to generate build args: %w", err) } - dockerImg.UpdateBuildOptions( - &docker.BuildOptions{ - ContextDir: buildFiles.BuildDir(), - BuildArgs: buildArgs, - }) - - _, err = dockerImg.BuildImage(ctx) + result, err := b.client.BuildImage(ctx, provider.BuildImageOptions{ + Name: b.image.Kind.ImageName(), + Tag: b.image.Release, + ContextDir: buildFiles.BuildDir(), + BuildArgs: buildArgs, + WithCache: false, + Platform: "", + }) if err != nil { return fmt.Errorf("failed to build image %s: %w", b.image.Kind, err) } - b.logger.WithField("image", fmt.Sprintf("%s:%s", b.image.Name, b.image.Release)). - Info("Successfully built image") + b.logger.WithField("image", result.ImageRef).Info("Successfully built image") return nil } -// checkDependencies implements feature requirement for dependency validation +// checkDependencies verifies that required base images exist locally before building. func (b *Builder) checkDependencies(ctx context.Context) error { if b.image.BaseImage.Pull { - // Base image is from registry (e.g., debian:bullseye-slim), no local dependency + // Base image is from registry (e.g., debian:bullseye-slim), no local dependency. return nil } sanitizedName, _ := strings.CutPrefix(b.image.BaseImage.Name, release.ImageRegistry+"/") - i := docker.NewImage(b.logger, sanitizedName, b.image.BaseImage.Tag) - - exists, err := i.TagExists(ctx) + exists, err := b.client.TagExists(ctx, sanitizedName, b.image.BaseImage.Tag) if err != nil { return fmt.Errorf("failed to check tag exists: %w", err) } diff --git a/pkg/build/image/builder_test.go b/pkg/build/image/builder_test.go index 58d03fb..2823984 100644 --- a/pkg/build/image/builder_test.go +++ b/pkg/build/image/builder_test.go @@ -1,56 +1,92 @@ package image import ( + "context" "strings" "testing" "github.com/apex/log" "github.com/apex/log/handlers/discard" "github.com/stenh0use/hind/pkg/build/release" + "github.com/stenh0use/hind/pkg/config" + "github.com/stenh0use/hind/pkg/provider" ) +// providerStub is a minimal stub implementing provider.Client for use in builder tests. +// Only BuildImage and TagExists need real behaviour; all others are no-ops. +type providerStub struct { + buildImageFn func(ctx context.Context, opts provider.BuildImageOptions) (provider.BuildImageResult, error) + tagExistsFn func(ctx context.Context, name, tag string) (bool, error) +} + +func (s *providerStub) BuildImage(ctx context.Context, opts provider.BuildImageOptions) (provider.BuildImageResult, error) { + if s.buildImageFn != nil { + return s.buildImageFn(ctx, opts) + } + return provider.BuildImageResult{Digest: "sha256:stub", ImageRef: opts.Name + ":" + opts.Tag}, nil +} + +func (s *providerStub) TagExists(ctx context.Context, name, tag string) (bool, error) { + if s.tagExistsFn != nil { + return s.tagExistsFn(ctx, name, tag) + } + return true, nil +} + +// Remaining provider.Client methods — no-op stubs. +func (s *providerStub) CreateContainer(ctx context.Context, cfg provider.ContainerSpec) (string, error) { + return "", nil +} +func (s *providerStub) StartContainer(ctx context.Context, name string) error { return nil } +func (s *providerStub) StopContainer(ctx context.Context, name string) error { return nil } +func (s *providerStub) KillContainer(ctx context.Context, name string) error { return nil } +func (s *providerStub) DeleteContainer(ctx context.Context, name string) error { return nil } +func (s *providerStub) InspectContainer(ctx context.Context, name string) (*provider.ContainerInfo, error) { + return nil, nil +} +func (s *providerStub) ListContainers(ctx context.Context, filters []string) ([]provider.ContainerInfo, error) { + return nil, nil +} +func (s *providerStub) PullImage(ctx context.Context, name, tag string) error { return nil } +func (s *providerStub) CreateNetwork(ctx context.Context, cfg config.Network) (string, error) { + return "", nil +} +func (s *providerStub) DeleteNetwork(ctx context.Context, name string) error { return nil } +func (s *providerStub) ListNetworks(ctx context.Context, filters []string) ([]provider.NetworkInfo, error) { + return nil, nil +} +func (s *providerStub) InspectNetwork(ctx context.Context, name string) (*provider.NetworkInfo, error) { + return nil, nil +} + +// newTestLogger returns a logger that discards all output. +func newTestLogger() *log.Logger { + return &log.Logger{Handler: discard.New()} +} + +// newStubClient returns a providerStub that satisfies provider.Client. +func newStubClient() *providerStub { + return &providerStub{} +} + func TestNewBuilder(t *testing.T) { tests := []struct { name string kind release.ImageKind wantErr bool }{ - { - name: "valid consul image", - kind: release.Consul, - wantErr: false, - }, - { - name: "valid nomad image", - kind: release.Nomad, - wantErr: false, - }, - { - name: "valid nomad-client image", - kind: release.NomadClient, - wantErr: false, - }, - { - name: "valid vault image", - kind: release.Vault, - wantErr: false, - }, - { - name: "invalid image kind", - kind: release.ImageKind("invalid"), - wantErr: true, - }, - { - name: "empty image kind", - kind: release.ImageKind(""), - wantErr: true, - }, + {name: "valid consul image", kind: release.Consul, wantErr: false}, + {name: "valid nomad image", kind: release.Nomad, wantErr: false}, + {name: "valid nomad-client image", kind: release.NomadClient, wantErr: false}, + {name: "valid vault image", kind: release.Vault, wantErr: false}, + {name: "invalid image kind", kind: release.ImageKind("invalid"), wantErr: true}, + {name: "empty image kind", kind: release.ImageKind(""), wantErr: true}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - logger := &log.Logger{Handler: discard.New()} - got, err := NewBuilder(logger, tt.kind) + logger := newTestLogger() + got, err := NewBuilder(logger, newStubClient(), tt.kind) if tt.wantErr { if err == nil { @@ -62,15 +98,12 @@ func TestNewBuilder(t *testing.T) { if err != nil { t.Fatalf("NewBuilder(%v) unexpected error: %v", tt.kind, err) } - if got == nil { t.Errorf("NewBuilder(%v) = nil, want non-nil Builder", tt.kind) } - if got.logger == nil { t.Errorf("NewBuilder(%v).logger = nil, want non-nil logger", tt.kind) } - if got.image.Kind != tt.kind { t.Errorf("NewBuilder(%v).image.Kind = %v, want %v", tt.kind, got.image.Kind, tt.kind) } @@ -118,12 +151,10 @@ func TestConstructName(t *testing.T) { if !strings.HasPrefix(got, tt.wantPrefix) { t.Errorf("constructName(%v) = %q, want prefix %q", tt.imageKind, got, tt.wantPrefix) } - if !strings.HasSuffix(got, tt.wantSuffix) { t.Errorf("constructName(%v) = %q, want suffix %q", tt.imageKind, got, tt.wantSuffix) } - // Verify full format: registry/repo/prefix.kind expectedFormat := "docker.io/stenh0use/hind." + string(tt.imageKind) if got != expectedFormat { t.Errorf("constructName(%v) = %q, want %q", tt.imageKind, got, expectedFormat) @@ -133,8 +164,6 @@ func TestConstructName(t *testing.T) { } func TestBuilder_ImageConfiguration(t *testing.T) { - logger := &log.Logger{Handler: discard.New()} - tests := []struct { name string kind release.ImageKind @@ -145,31 +174,31 @@ func TestBuilder_ImageConfiguration(t *testing.T) { name: "consul uses debian base", kind: release.Consul, wantImageName: "consul", - wantBaseImagePull: true, // Pulls from registry + wantBaseImagePull: true, }, { name: "nomad depends on consul", kind: release.Nomad, wantImageName: "nomad", - wantBaseImagePull: false, // Uses local consul image + wantBaseImagePull: false, }, { name: "nomad-client depends on nomad", kind: release.NomadClient, wantImageName: "nomad-client", - wantBaseImagePull: false, // Uses local nomad image + wantBaseImagePull: false, }, { name: "vault depends on consul", kind: release.Vault, wantImageName: "vault", - wantBaseImagePull: false, // Uses local consul image + wantBaseImagePull: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - builder, err := NewBuilder(logger, tt.kind) + builder, err := NewBuilder(newTestLogger(), newStubClient(), tt.kind) if err != nil { t.Fatalf("NewBuilder(%v) unexpected error: %v", tt.kind, err) } @@ -177,15 +206,90 @@ func TestBuilder_ImageConfiguration(t *testing.T) { if builder.image.Name != tt.wantImageName { t.Errorf("Builder.image.Name = %q, want %q", builder.image.Name, tt.wantImageName) } - if builder.image.BaseImage.Pull != tt.wantBaseImagePull { t.Errorf("Builder.image.BaseImage.Pull = %v, want %v", builder.image.BaseImage.Pull, tt.wantBaseImagePull) } - - // Verify packages are set if len(builder.image.Packages) == 0 { t.Errorf("Builder.image.Packages is empty, want non-empty package list") } }) } } + +func TestBuilder_CheckDependencies_CallsProviderTagExists(t *testing.T) { + // nomad has BaseImage.Pull=false, so checkDependencies should call TagExists. + stub := &providerStub{ + tagExistsFn: func(_ context.Context, _, _ string) (bool, error) { + return false, nil // simulate missing base image + }, + } + + builder, err := NewBuilder(newTestLogger(), stub, release.Nomad) + if err != nil { + t.Fatalf("NewBuilder: %v", err) + } + + err = builder.checkDependencies(context.Background()) + if err == nil { + t.Fatal("expected error when base image is absent, got nil") + } + if !strings.Contains(err.Error(), "base image dependency not met") { + t.Errorf("error should contain 'base image dependency not met', got: %q", err.Error()) + } + if !strings.Contains(err.Error(), "Resolution: Run 'hind build") { + t.Errorf("error should contain resolution hint, got: %q", err.Error()) + } +} + +func TestBuilder_BuildImage_CallsProviderBuildImage(t *testing.T) { + var capturedOpts provider.BuildImageOptions + + stub := &providerStub{ + buildImageFn: func(_ context.Context, opts provider.BuildImageOptions) (provider.BuildImageResult, error) { + capturedOpts = opts + return provider.BuildImageResult{Digest: "sha256:abc", ImageRef: "name:tag"}, nil + }, + } + + // Use consul: BaseImage.Pull=true so checkDependencies skips TagExists. + builder, err := NewBuilder(newTestLogger(), stub, release.Consul) + if err != nil { + t.Fatalf("NewBuilder: %v", err) + } + + if err := builder.BuildImage(context.Background()); err != nil { + t.Fatalf("BuildImage returned unexpected error: %v", err) + } + + expectedName := release.Consul.ImageName() + if capturedOpts.Name != expectedName { + t.Errorf("BuildImageOptions.Name = %q, want %q", capturedOpts.Name, expectedName) + } + if capturedOpts.Tag == "" { + t.Errorf("BuildImageOptions.Tag is empty, want non-empty release tag") + } + if capturedOpts.ContextDir == "" { + t.Errorf("BuildImageOptions.ContextDir is empty, want a non-empty path") + } + if len(capturedOpts.BuildArgs) == 0 { + t.Errorf("BuildImageOptions.BuildArgs is empty, want at least one entry") + } +} + +func TestBuilder_CheckDependencies_SkipsWhenPull(t *testing.T) { + // consul has BaseImage.Pull=true — TagExists must never be called. + stub := &providerStub{ + tagExistsFn: func(_ context.Context, _, _ string) (bool, error) { + panic("TagExists must not be called when BaseImage.Pull is true") + }, + } + + builder, err := NewBuilder(newTestLogger(), stub, release.Consul) + if err != nil { + t.Fatalf("NewBuilder: %v", err) + } + + if err := builder.checkDependencies(context.Background()); err != nil { + t.Errorf("checkDependencies should return nil for pull=true image, got: %v", err) + } +} diff --git a/pkg/build/image/files/files.go b/pkg/build/image/files/files.go index 435bfbf..97a0f47 100644 --- a/pkg/build/image/files/files.go +++ b/pkg/build/image/files/files.go @@ -65,7 +65,7 @@ func imageFS(i string) (fs.FS, error) { } func (i *Image) WriteFiles() error { - if err := i.manager.EnsureDir(i.buildDir); err != nil { + if err := i.manager.EnsureDir("."); err != nil { return fmt.Errorf("failed to create build dir: %w", err) } diff --git a/pkg/build/image/files/files_test.go b/pkg/build/image/files/files_test.go new file mode 100644 index 0000000..ec13518 --- /dev/null +++ b/pkg/build/image/files/files_test.go @@ -0,0 +1,53 @@ +package files + +import ( + "os" + "path/filepath" + "testing" +) + +func TestImageWriteFiles_WritesEmbeddedBuildContext(t *testing.T) { + tests := []struct { + name string + imageName string + expectFiles []string + }{ + { + name: "consul build context", + imageName: "consul", + expectFiles: []string{ + "Dockerfile", + }, + }, + { + name: "nomad build context", + imageName: "nomad", + expectFiles: []string{ + "Dockerfile", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + homeDir := t.TempDir() + t.Setenv("HOME", homeDir) + + imageFiles, err := New(tt.imageName) + if err != nil { + t.Fatalf("New(%q) error = %v", tt.imageName, err) + } + + if err := imageFiles.WriteFiles(); err != nil { + t.Fatalf("WriteFiles() error = %v", err) + } + + for _, rel := range tt.expectFiles { + fullPath := filepath.Join(imageFiles.BuildDir(), rel) + if _, err := os.Stat(fullPath); err != nil { + t.Fatalf("expected file %q to exist: %v", fullPath, err) + } + } + }) + } +} diff --git a/pkg/build/image/image.go b/pkg/build/image/image.go index 60737dd..ebf2d91 100644 --- a/pkg/build/image/image.go +++ b/pkg/build/image/image.go @@ -7,7 +7,6 @@ import ( "fmt" "strings" - "github.com/stenh0use/hind/pkg/build/image/internal/docker" "github.com/stenh0use/hind/pkg/build/release" ) @@ -106,38 +105,33 @@ func newVault(rel release.Info) Image { } } -func (i *Image) packagesToBuildArgs() ([]docker.BuildArg, error) { +// packagesToBuildArgs converts the image's package list to a map of build arg +// key-value pairs (e.g. CONSUL_VERSION -> "1.17.0"). +func (i *Image) packagesToBuildArgs() (map[string]string, error) { rel, err := release.Get(i.Release) if err != nil { return nil, fmt.Errorf("failed to get release %s: %w", i.Release, err) } - args := make([]docker.BuildArg, 0, len(i.Packages)) + args := make(map[string]string, len(i.Packages)) for _, name := range i.Packages { if version, err := rel.GetPackage(name); err == nil { - args = append(args, docker.BuildArg{ - Arg: strings.ToUpper(name) + "_VERSION", - Value: version, - }) + args[strings.ToUpper(name)+"_VERSION"] = version } } return args, nil } -func (i *Image) buildArgs() ([]docker.BuildArg, error) { +// buildArgs returns the full set of build arguments for the image, including +// package versions, the hind version, and the base image reference. +func (i *Image) buildArgs() (map[string]string, error) { args, err := i.packagesToBuildArgs() if err != nil { return nil, fmt.Errorf("failed to generate build args for image %s: %w", i.Name, err) } - args = append(args, docker.BuildArg{ - Arg: "HIND_VERSION", - Value: i.Release, - }) - args = append(args, docker.BuildArg{ - Arg: "BASE_IMAGE", - Value: fmt.Sprintf("%s:%s", i.BaseImage.Name, i.BaseImage.Tag), - }) + args["HIND_VERSION"] = i.Release + args["BASE_IMAGE"] = fmt.Sprintf("%s:%s", i.BaseImage.Name, i.BaseImage.Tag) return args, nil } diff --git a/pkg/build/image/image_test.go b/pkg/build/image/image_test.go new file mode 100644 index 0000000..5bb8046 --- /dev/null +++ b/pkg/build/image/image_test.go @@ -0,0 +1,62 @@ +package image + +import ( + "testing" + + "github.com/stenh0use/hind/pkg/build/release" +) + +func TestBuildArgs_ReturnsMapWithExpectedKeys(t *testing.T) { + img, err := NewImage(release.Consul) + if err != nil { + t.Fatalf("NewImage(Consul): %v", err) + } + + args, err := img.buildArgs() + if err != nil { + t.Fatalf("buildArgs(): %v", err) + } + + wantKeys := []string{"CONSUL_VERSION", "HIND_VERSION", "BASE_IMAGE"} + for _, k := range wantKeys { + if _, ok := args[k]; !ok { + t.Errorf("buildArgs() missing key %q; got keys: %v", k, mapKeys(args)) + } + } +} + +func TestPackagesToBuildArgs_KnownPackages(t *testing.T) { + img, err := NewImage(release.Nomad) + if err != nil { + t.Fatalf("NewImage(Nomad): %v", err) + } + + args, err := img.packagesToBuildArgs() + if err != nil { + t.Fatalf("packagesToBuildArgs(): %v", err) + } + + // Nomad image packages: consul, nomad — expect both version keys. + wantKeys := []string{"CONSUL_VERSION", "NOMAD_VERSION"} + for _, k := range wantKeys { + if _, ok := args[k]; !ok { + t.Errorf("packagesToBuildArgs() missing key %q; got keys: %v", k, mapKeys(args)) + } + } + + // Verify values are non-empty. + for k, v := range args { + if v == "" { + t.Errorf("packagesToBuildArgs() key %q has empty value", k) + } + } +} + +// mapKeys returns the keys of a map for error messages. +func mapKeys(m map[string]string) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + return keys +} diff --git a/pkg/build/image/internal/docker/docker.go b/pkg/build/image/internal/docker/docker.go deleted file mode 100644 index 529c79f..0000000 --- a/pkg/build/image/internal/docker/docker.go +++ /dev/null @@ -1,231 +0,0 @@ -// Package docker provides Docker CLI integration for building and managing images. -// It wraps Docker buildx commands and provides utilities for checking Docker daemon -// capabilities and installed plugins. -package docker - -import ( - "context" - "encoding/json" - "fmt" - "os" - "os/exec" - "strings" - - "github.com/apex/log" -) - -const defaultBuilder string = "buildx" - -// Image holds options for building and running a Docker image using the Docker CLI. -type Image struct { - Name string // Name of the image to build - Tag string // Tag part of Name:tag for the built image - logger *log.Logger // Logger for build output - BuildOptions *BuildOptions // Options for building the image (nil if not building) - metadata *BuildMetadata // Cached metadata about built image -} - -type BuildOptions struct { - ContextDir string - Dockerfile string - BuildArgs []BuildArg - WithCache bool // Whether to use the build cache - Platform string // Optional platform to build for -} - -// BuildMetadata is extracted from the docker buildx metadata.json -type BuildMetadata struct { - ContainerImageDigest string `json:"containerimage.config.digest"` - ImageName string `json:"image.name"` -} - -type BuildArg struct { - Arg string - Value string -} - -func NewImage(logger *log.Logger, name, tag string) Image { - return Image{ - logger: logger, - Name: name, - Tag: tag, - } -} - -func (i *Image) UpdateBuildOptions(opts *BuildOptions) { - if i.BuildOptions == nil { - i.BuildOptions = opts - return - } - - if opts.ContextDir != "" { - i.BuildOptions.ContextDir = opts.ContextDir - } - if opts.Dockerfile != "" { - i.BuildOptions.Dockerfile = opts.Dockerfile - } - i.BuildOptions.WithCache = opts.WithCache - if opts.Platform != "" { - i.BuildOptions.Platform = opts.Platform - } - if opts.BuildArgs != nil { - i.BuildOptions.BuildArgs = opts.BuildArgs - } -} - -func (i *Image) FormatBuildArgs() []string { - if i.BuildOptions == nil || i.BuildOptions.BuildArgs == nil { - return []string{} - } - - args := make([]string, 0, len(i.BuildOptions.BuildArgs)) - for _, v := range i.BuildOptions.BuildArgs { - args = append(args, "--build-arg", fmt.Sprintf("%s=%s", v.Arg, v.Value)) - } - - return args -} - -// RefreshBuildMetadata reads and parses the metadata.json file from disk, updating the cache -func (i *Image) RefreshBuildMetadata(ctx context.Context) (*BuildMetadata, error) { - if i.BuildOptions == nil { - return nil, fmt.Errorf("build options not set: cannot read metadata file") - } - - metadataFile := i.BuildOptions.ContextDir + "/metadata.json" - data, err := os.ReadFile(metadataFile) - if err != nil { - return nil, fmt.Errorf("failed to read metadata file %s: %w", metadataFile, err) - } - - var metadata BuildMetadata - if err := json.Unmarshal(data, &metadata); err != nil { - return nil, fmt.Errorf("failed to unmarshal metadata from %s: %w", metadataFile, err) - } - - // Cache the metadata for future calls - i.metadata = &metadata - return i.metadata, nil -} - -// GetBuildMetadata returns cached metadata, loading from file if not already cached -func (i *Image) GetBuildMetadata(ctx context.Context) (*BuildMetadata, error) { - // Return cached metadata if available - if i.metadata != nil { - return i.metadata, nil - } - - // Load from file and cache - return i.RefreshBuildMetadata(ctx) -} - -func (i *Image) BuildImage(ctx context.Context) (string, error) { - if err := checkDependencies(ctx); err != nil { - return "", fmt.Errorf("failed to build image %s:%s: %w", i.Name, i.Tag, err) - } - - if i.BuildOptions == nil { - return "", fmt.Errorf("build options not set: cannot build image") - } - - i.logger.WithFields(log.Fields{"name": i.Name, "tag": i.Tag}).Info("Building image") - - cmd := i.buildCommand(ctx) - - i.logger.WithField("command", cmd.String()).Debug("Running Docker build command") - - var stdout, stderr strings.Builder - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - i.logger.WithFields(log.Fields{ - "stdout": stdout.String(), - "stderr": stderr.String(), - "error": err, - }).Debug("failed to build image") - return "", fmt.Errorf("failed to build image: %w: %s", err, stderr.String()) - } - - i.logger.WithFields(log.Fields{"name": i.Name, "tag": i.Tag}).Info("Successfully built image") - - return i.getImageDigest(ctx) -} - -// imageRef constructs the full image name -func (i *Image) imageRef() string { - return fmt.Sprintf("%s:%s", i.Name, i.Tag) -} - -// buildCommand creates the docker buildx command with all options -func (i *Image) buildCommand(ctx context.Context) *exec.Cmd { - cmd := exec.CommandContext( - ctx, - "docker", - "buildx", - "build", - "-t", i.imageRef(), - "--metadata-file", "metadata.json", - ) - - cmd.Dir = i.BuildOptions.ContextDir - - if i.BuildOptions.Dockerfile != "" { - cmd.Args = append(cmd.Args, "-f", i.BuildOptions.Dockerfile) - } - - if !i.BuildOptions.WithCache { - cmd.Args = append(cmd.Args, "--no-cache") - } - - if i.BuildOptions.Platform != "" { - cmd.Args = append(cmd.Args, "--platform", i.BuildOptions.Platform) - } - - cmd.Args = append(cmd.Args, i.FormatBuildArgs()...) - cmd.Args = append(cmd.Args, ".") - - return cmd -} - -// getImageDigest retrieves and logs the built image digest -func (i *Image) getImageDigest(ctx context.Context) (string, error) { - imageMeta, err := i.GetBuildMetadata(ctx) - if err != nil { - return "", fmt.Errorf("failed to read image ID from metadata: %w", err) - } - - i.logger.WithField("imageMeta", imageMeta).Info("Image metadata") - return imageMeta.ContainerImageDigest, nil -} - -func (i *Image) TagExists(ctx context.Context) (bool, error) { - cmd := exec.CommandContext(ctx, "docker", "images", "-q", i.imageRef()) - var stdout, stderr strings.Builder - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - return false, fmt.Errorf("failed to check if tag exists: %w: %s", err, stderr.String()) - } - return strings.TrimSpace(stdout.String()) != "", nil -} - -func checkDependencies(ctx context.Context) error { - info := DockerInfo{} - if err := info.Get(ctx); err != nil { - return fmt.Errorf("failed to get docker system info: %w", err) - } - - if !info.HasClientPlugin(defaultBuilder) { - return fmt.Errorf("%s client plugin is needed but not installed", defaultBuilder) - } - - // This is only required for multi platform builds - // const snapshotter = "io.containerd.snapshotter.v1" - // if !info.HasDriverType(snapshotter) { - // return fmt.Errorf("'%s' driver is needed but not configured", snapshotter) - // } - - return nil -} diff --git a/pkg/build/image/internal/docker/docker_test.go b/pkg/build/image/internal/docker/docker_test.go deleted file mode 100644 index 049bf1b..0000000 --- a/pkg/build/image/internal/docker/docker_test.go +++ /dev/null @@ -1,226 +0,0 @@ -package docker - -import ( - "testing" - - "github.com/apex/log" - "github.com/apex/log/handlers/discard" -) - -func TestFormatBuildArgs(t *testing.T) { - tests := []struct { - name string - args []BuildArg - want []string - }{ - { - name: "no build args", - args: nil, - want: []string{}, - }, - { - name: "empty build args", - args: []BuildArg{}, - want: []string{}, - }, - { - name: "single build arg", - args: []BuildArg{ - {Arg: "VERSION", Value: "1.0"}, - }, - want: []string{"--build-arg", "VERSION=1.0"}, - }, - { - name: "multiple build args", - args: []BuildArg{ - {Arg: "VERSION", Value: "1.0"}, - {Arg: "BASE", Value: "alpine"}, - }, - want: []string{ - "--build-arg", "VERSION=1.0", - "--build-arg", "BASE=alpine", - }, - }, - { - name: "build args with special characters", - args: []BuildArg{ - {Arg: "URL", Value: "https://example.com/path?query=value"}, - {Arg: "MESSAGE", Value: "hello world"}, - }, - want: []string{ - "--build-arg", "URL=https://example.com/path?query=value", - "--build-arg", "MESSAGE=hello world", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - logger := &log.Logger{Handler: discard.New()} - img := NewImage(logger, "test", "latest") - img.UpdateBuildOptions(&BuildOptions{ - BuildArgs: tt.args, - }) - - got := img.FormatBuildArgs() - - if len(got) != len(tt.want) { - t.Errorf("FormatBuildArgs() length = %d, want %d\ngot: %v\nwant: %v", - len(got), len(tt.want), got, tt.want) - return - } - - for i := range got { - if got[i] != tt.want[i] { - t.Errorf("FormatBuildArgs()[%d] = %q, want %q", i, got[i], tt.want[i]) - } - } - }) - } -} - -func TestUpdateBuildOptions(t *testing.T) { - logger := &log.Logger{Handler: discard.New()} - - t.Run("set options when nil", func(t *testing.T) { - img := NewImage(logger, "test", "latest") - - if img.BuildOptions != nil { - t.Fatal("Expected BuildOptions to be nil initially") - } - - opts := &BuildOptions{ - ContextDir: "/build", - Dockerfile: "Dockerfile", - BuildArgs: []BuildArg{ - {Arg: "VERSION", Value: "1.0"}, - }, - } - - img.UpdateBuildOptions(opts) - - if img.BuildOptions == nil { - t.Fatal("BuildOptions should not be nil after update") - } - - if img.BuildOptions.ContextDir != "/build" { - t.Errorf("ContextDir = %q, want %q", img.BuildOptions.ContextDir, "/build") - } - }) - - t.Run("merge non-empty values", func(t *testing.T) { - img := NewImage(logger, "test", "latest") - img.BuildOptions = &BuildOptions{ - ContextDir: "/original", - Dockerfile: "Dockerfile.original", - } - - img.UpdateBuildOptions(&BuildOptions{ - ContextDir: "/updated", - // Dockerfile intentionally empty - should not override - }) - - if img.BuildOptions.ContextDir != "/updated" { - t.Errorf("ContextDir = %q, want %q", img.BuildOptions.ContextDir, "/updated") - } - - if img.BuildOptions.Dockerfile != "Dockerfile.original" { - t.Errorf("Dockerfile = %q, want %q (should not override with empty)", - img.BuildOptions.Dockerfile, "Dockerfile.original") - } - }) - - t.Run("update build args", func(t *testing.T) { - img := NewImage(logger, "test", "latest") - img.BuildOptions = &BuildOptions{ - BuildArgs: []BuildArg{ - {Arg: "OLD", Value: "value"}, - }, - } - - newArgs := []BuildArg{ - {Arg: "NEW", Value: "value"}, - } - - img.UpdateBuildOptions(&BuildOptions{ - BuildArgs: newArgs, - }) - - if len(img.BuildOptions.BuildArgs) != 1 { - t.Errorf("BuildArgs length = %d, want 1", len(img.BuildOptions.BuildArgs)) - } - - if img.BuildOptions.BuildArgs[0].Arg != "NEW" { - t.Errorf("BuildArgs[0].Arg = %q, want %q", - img.BuildOptions.BuildArgs[0].Arg, "NEW") - } - }) -} - -func TestImageRef(t *testing.T) { - tests := []struct { - name string - imgName string - imgTag string - want string - }{ - { - name: "standard image ref", - imgName: "myapp", - imgTag: "v1.0.0", - want: "myapp:v1.0.0", - }, - { - name: "latest tag", - imgName: "myapp", - imgTag: "latest", - want: "myapp:latest", - }, - { - name: "image with registry", - imgName: "docker.io/user/myapp", - imgTag: "sha256abc", - want: "docker.io/user/myapp:sha256abc", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - logger := &log.Logger{Handler: discard.New()} - img := NewImage(logger, tt.imgName, tt.imgTag) - - got := img.imageRef() - - if got != tt.want { - t.Errorf("imageRef() = %q, want %q", got, tt.want) - } - }) - } -} - -func TestNewImage(t *testing.T) { - logger := &log.Logger{Handler: discard.New()} - - t.Run("creates image with correct fields", func(t *testing.T) { - name := "test-image" - tag := "v1.0.0" - - img := NewImage(logger, name, tag) - - if img.Name != name { - t.Errorf("Name = %q, want %q", img.Name, name) - } - - if img.Tag != tag { - t.Errorf("Tag = %q, want %q", img.Tag, tag) - } - - if img.logger == nil { - t.Error("Logger should not be nil") - } - - if img.BuildOptions != nil { - t.Error("BuildOptions should be nil initially") - } - }) -} diff --git a/pkg/build/image/internal/docker/info.go b/pkg/build/image/internal/docker/info.go deleted file mode 100644 index 3fcda8d..0000000 --- a/pkg/build/image/internal/docker/info.go +++ /dev/null @@ -1,59 +0,0 @@ -package docker - -import ( - "context" - "encoding/json" - "fmt" - "os/exec" -) - -type DockerInfo struct { - ClientInfo ClientInfo `json:"ClientInfo"` - DriverStatus [][2]string `json:"DriverStatus"` -} - -type ClientInfo struct { - Plugins []Plugin `json:"Plugins"` - Version string `json:"Version"` -} - -type Plugin struct { - SchemaVersion string `json:"SchemaVersion"` - Vendor string `json:"Vendor"` - Version string `json:"Version"` - ShortDescription string `json:"ShortDescription"` - Name string `json:"Name"` - Path string `json:"Path"` -} - -func (i *DockerInfo) Get(ctx context.Context) error { - cmd := exec.CommandContext(ctx, "docker", "system", "info", "-f", "json") - data, err := cmd.Output() - if err != nil { - return fmt.Errorf("failed to get docker info: %w", err) - } - - if err := json.Unmarshal(data, &i); err != nil { - return fmt.Errorf("failed to unmarshal docker info: %w", err) - } - - return nil -} - -func (i *DockerInfo) HasClientPlugin(name string) bool { - for _, plugin := range i.ClientInfo.Plugins { - if plugin.Name == name { - return true - } - } - return false -} - -func (i *DockerInfo) HasDriverType(name string) bool { - for _, ds := range i.DriverStatus { - if ds[0] == "driver-type" && ds[1] == name { - return true - } - } - return false -} diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index ac6eb22..0925f26 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -4,7 +4,11 @@ package cluster import ( + "errors" "fmt" + "os" + "path/filepath" + "strings" "time" "github.com/stenh0use/hind/pkg/file" @@ -23,6 +27,39 @@ const ( DefaultContainerPollInterval = 1 * time.Second ) +// ValidateClusterName ensures a cluster name cannot be used for path traversal +// or absolute/root escape when constructing persisted config paths. +func ValidateClusterName(name string) error { + trimmed := strings.TrimSpace(name) + if trimmed == "" { + return errors.New("cluster name cannot be empty") + } + + if filepath.IsAbs(trimmed) { + return errors.New("cluster name must be relative") + } + + segments := strings.FieldsFunc(trimmed, func(r rune) bool { + return r == '/' || r == '\\' + }) + for _, segment := range segments { + if segment == ".." { + return errors.New("cluster name cannot contain traversal segments") + } + } + + cleaned := filepath.Clean(trimmed) + if cleaned == "." { + return errors.New("cluster name cannot resolve to current directory") + } + + if strings.HasPrefix(cleaned, "..") { + return errors.New("cluster name cannot escape root") + } + + return nil +} + // List returns all cluster names found in the cluster configuration directory. func List() ([]string, error) { var clusters []string @@ -32,6 +69,9 @@ func List() ([]string, error) { } entries, err := fm.ListDir(ClusterConfigDir) if err != nil { + if errors.Is(err, os.ErrNotExist) { + return clusters, nil + } return nil, err } @@ -66,6 +106,10 @@ func GetActiveCluster() (string, error) { // SetActiveCluster sets the currently active cluster func SetActiveCluster(clusterName string) error { + if err := ValidateClusterName(clusterName); err != nil { + return fmt.Errorf("invalid cluster name %q: %w", clusterName, err) + } + fm, err := file.NewFromHomeDir(DefaultConfigParentDir, DefaultConfigName) if err != nil { return err diff --git a/pkg/cluster/cluster_test.go b/pkg/cluster/cluster_test.go index c16b7fe..9bd957e 100644 --- a/pkg/cluster/cluster_test.go +++ b/pkg/cluster/cluster_test.go @@ -1,8 +1,14 @@ package cluster import ( + "context" + "reflect" + "slices" "testing" + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/build/release" "github.com/stenh0use/hind/pkg/config" ) @@ -207,3 +213,194 @@ func TestStartResult(t *testing.T) { t.Errorf("expected 3 unique StartResult values, got %d", len(seen)) } } + +func TestListReturnsEmptyWhenConfigDirMissing(t *testing.T) { + t.Setenv("HOME", t.TempDir()) + + clusters, err := List() + if err != nil { + t.Fatalf("List() returned error when config dir is missing: %v", err) + } + + if len(clusters) != 0 { + t.Fatalf("List() expected 0 clusters on first run, got %d", len(clusters)) + } +} + +func TestAddClientNodes_UsesNextAvailableNumbering(t *testing.T) { + version := release.Latest().Hind + clusterConfig := &config.Cluster{ + Name: "demo", + Version: version, + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{ + {Name: "hind.demo.consul.01", Role: config.Server}, + {Name: "hind.demo.client.01", Role: config.Client}, + {Name: "hind.demo.client.03", Role: config.Client}, + }, + } + + m := &Manager{ + logger: &log.Logger{Handler: discard.New(), Level: log.ErrorLevel}, + config: clusterConfig, + } + + if err := m.addClientNodes(1); err != nil { + t.Fatalf("addClientNodes() error = %v", err) + } + + var gotClientNames []string + for _, node := range m.config.Nodes { + if node.Role == config.Client { + gotClientNames = append(gotClientNames, node.Name) + } + } + slices.Sort(gotClientNames) + + wantClientNames := []string{ + "hind.demo.client.01", + "hind.demo.client.03", + "hind.demo.client.04", + } + if !slices.Equal(gotClientNames, wantClientNames) { + t.Fatalf("client names = %v, want %v", gotClientNames, wantClientNames) + } +} + +func TestNewNomadClientNode_ReturnsConsistentClientConfig(t *testing.T) { + node := newNomadClientNode("demo", "hind.demo", "1.8.0", 7) + + if node.Name != "hind.demo.client.07" { + t.Fatalf("node.Name = %q, want %q", node.Name, "hind.demo.client.07") + } + if node.Kind != config.NomadNode { + t.Fatalf("node.Kind = %q, want %q", node.Kind, config.NomadNode) + } + if node.Role != config.Client { + t.Fatalf("node.Role = %q, want %q", node.Role, config.Client) + } + if node.Network != "hind.demo" { + t.Fatalf("node.Network = %q, want %q", node.Network, "hind.demo") + } + if node.Image.Name != release.NomadClient.ImageName() { + t.Fatalf("node.Image.Name = %q, want %q", node.Image.Name, release.NomadClient.ImageName()) + } + if node.Image.Tag != "1.8.0" { + t.Fatalf("node.Image.Tag = %q, want %q", node.Image.Tag, "1.8.0") + } + if len(node.Devices) != 1 || node.Devices[0] != "/dev/fuse" { + t.Fatalf("node.Devices = %v, want [/dev/fuse]", node.Devices) + } + if node.Environment["CONSUL_AGENT_MODE"] != "client" { + t.Fatalf("CONSUL_AGENT_MODE = %q, want %q", node.Environment["CONSUL_AGENT_MODE"], "client") + } + if node.Environment["CONSUL_SERVER_ADDRESS"] != "hind.demo.consul.01" { + t.Fatalf("CONSUL_SERVER_ADDRESS = %q, want %q", node.Environment["CONSUL_SERVER_ADDRESS"], "hind.demo.consul.01") + } + if node.Environment["NOMAD_AGENT_MODE"] != "client" { + t.Fatalf("NOMAD_AGENT_MODE = %q, want %q", node.Environment["NOMAD_AGENT_MODE"], "client") + } +} + +func TestNewClusterConfig_UsesClientNodeFactory(t *testing.T) { + cfg, err := newClusterConfig("test", release.Latest().Hind) + if err != nil { + t.Fatalf("newClusterConfig() error = %v", err) + } + + var clients []config.Node + for _, node := range cfg.Nodes { + if node.Role == config.Client { + clients = append(clients, node) + } + } + if len(clients) != 1 { + t.Fatalf("client node count = %d, want 1", len(clients)) + } + + expected := newNomadClientNode("test", "hind.test", cfg.Version, 1) + client := clients[0] + if client.Name != expected.Name { + t.Fatalf("client.Name = %q, want %q", client.Name, expected.Name) + } + if client.Image != expected.Image { + t.Fatalf("client.Image = %+v, want %+v", client.Image, expected.Image) + } + if !slices.Equal(client.Devices, expected.Devices) { + t.Fatalf("client.Devices = %v, want %v", client.Devices, expected.Devices) + } + if !slices.Equal(client.Ports, expected.Ports) { + t.Fatalf("client.Ports = %v, want %v", client.Ports, expected.Ports) + } + if len(client.Volumes) != len(expected.Volumes) { + t.Fatalf("client.Volumes len = %d, want %d", len(client.Volumes), len(expected.Volumes)) + } + if len(client.Environment) != len(expected.Environment) { + t.Fatalf("environment size = %d, want %d", len(client.Environment), len(expected.Environment)) + } + for key, wantValue := range expected.Environment { + if got := client.Environment[key]; got != wantValue { + t.Fatalf("environment[%q] = %q, want %q", key, got, wantValue) + } + } +} + +func TestSetClientCount_UsesClientNodeFactory(t *testing.T) { + version := release.Latest().Hind + clusterConfig := &config.Cluster{ + Name: "demo", + Version: version, + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{ + {Name: "hind.demo.consul.01", Role: config.Server}, + {Name: "hind.demo.nomad.01", Role: config.Server}, + {Name: "hind.demo.client.01", Role: config.Client}, + }, + } + + m := &Manager{config: clusterConfig} + + if err := m.SetClientCount(context.Background(), 2); err != nil { + t.Fatalf("SetClientCount() error = %v", err) + } + + var clients []config.Node + for _, node := range m.config.Nodes { + if node.Role == config.Client { + clients = append(clients, node) + } + } + + if len(clients) != 2 { + t.Fatalf("client node count = %d, want 2", len(clients)) + } + + expectedClients := []config.Node{ + newNomadClientNode("demo", "hind.demo", version, 1), + newNomadClientNode("demo", "hind.demo", version, 2), + } + + if !reflect.DeepEqual(clients, expectedClients) { + t.Fatalf("client nodes = %#v, want %#v", clients, expectedClients) + } + + var nonClientNames []string + for _, node := range m.config.Nodes { + if node.Role != config.Client { + nonClientNames = append(nonClientNames, node.Name) + } + } + + wantNonClientNames := []string{"hind.demo.consul.01", "hind.demo.nomad.01"} + if !reflect.DeepEqual(nonClientNames, wantNonClientNames) { + t.Fatalf("non-client names = %v, want %v", nonClientNames, wantNonClientNames) + } +} + +func TestSetClientCount_RejectsCountBelowOne(t *testing.T) { + m := &Manager{config: &config.Cluster{Name: "demo"}} + + if err := m.SetClientCount(context.Background(), 0); err == nil { + t.Fatal("SetClientCount() error = nil, want non-nil") + } +} diff --git a/pkg/cluster/cni/cilium/cilium.go b/pkg/cluster/cni/cilium/cilium.go deleted file mode 100644 index a79b96b..0000000 --- a/pkg/cluster/cni/cilium/cilium.go +++ /dev/null @@ -1,89 +0,0 @@ -package cilium - -import ( - "fmt" - "strconv" - - "github.com/stenh0use/hind/pkg/cluster/cni" -) - -// CiliumCNI implements CNI interface for Cilium -type CiliumCNI struct { - name string - ipv4Range string - options map[string]string - enabled bool -} - -// NewCiliumCNI creates a new instance of Cilium CNI -func NewCiliumCNI(name, ipv4Range string, options map[string]string) *CiliumCNI { - if options == nil { - options = make(map[string]string) - } - - return &CiliumCNI{ - name: name, - ipv4Range: ipv4Range, - options: options, - enabled: true, - } -} - -// Type returns the CNI type -func (c *CiliumCNI) Type() cni.CNIType { - return cni.CNITypeCilium -} - -// Enabled returns whether this CNI is enabled -func (c *CiliumCNI) Enabled() bool { - return c.enabled -} - -// Start starts the Cilium CNI -func (c *CiliumCNI) Start() error { - if !c.enabled { - return nil - } - - // TODO: Implement proper Cilium startup - return nil -} - -// Stop stops the Cilium CNI -func (c *CiliumCNI) Stop() error { - if !c.enabled { - return nil - } - - // TODO: Implement proper Cilium shutdown - return nil -} - -// Status returns the current status of Cilium CNI -func (c *CiliumCNI) Status() (string, error) { - if !c.enabled { - return "disabled", nil - } - - // TODO: Implement proper status checking - return "running", nil -} - -// GetEnvironmentVars returns environment variables for Cilium CNI -func (c *CiliumCNI) GetEnvironmentVars() map[string]string { - if !c.enabled { - return map[string]string{} - } - - envVars := map[string]string{ - "CILIUM_ENABLED": strconv.FormatBool(c.enabled), - "CILIUM_IPV4_RANGE": c.ipv4Range, - } - - // Add any custom options - for k, v := range c.options { - envVars[fmt.Sprintf("CILIUM_%s", k)] = v - } - - return envVars -} diff --git a/pkg/cluster/cni/cni.go b/pkg/cluster/cni/cni.go deleted file mode 100644 index 68dcfd6..0000000 --- a/pkg/cluster/cni/cni.go +++ /dev/null @@ -1,35 +0,0 @@ -package cni - -// CNI represents a Container Network Interface implementation -type CNI interface { - // Type returns the CNI type (none, cilium) - Type() CNIType - - // Enabled returns whether this CNI is enabled - Enabled() bool - - // Start starts the CNI - Start() error - - // Stop stops the CNI - Stop() error - - // Status returns the current status - Status() (string, error) - - // GetEnvironmentVars returns environment variables to inject into containers - GetEnvironmentVars() map[string]string -} - -// CNIType represents the type of CNI -type CNIType string - -const ( - CNITypeNone CNIType = "none" - CNITypeCilium CNIType = "cilium" -) - -// Factory can create CNI instances -type Factory interface { - CreateCNI(cniType CNIType, config map[string]string) (CNI, error) -} diff --git a/pkg/cluster/cni/factory/factory.go b/pkg/cluster/cni/factory/factory.go deleted file mode 100644 index db59bfe..0000000 --- a/pkg/cluster/cni/factory/factory.go +++ /dev/null @@ -1,49 +0,0 @@ -package factory - -import ( - "fmt" - - "github.com/stenh0use/hind/pkg/cluster/cni" - "github.com/stenh0use/hind/pkg/cluster/cni/cilium" - "github.com/stenh0use/hind/pkg/cluster/cni/none" -) - -// DefaultFactory implements CNI factory -type DefaultFactory struct{} - -// NewDefaultFactory creates a new default CNI factory -func NewDefaultFactory() *DefaultFactory { - return &DefaultFactory{} -} - -// CreateCNI creates a CNI instance based on type and configuration -func (f *DefaultFactory) CreateCNI(cniType cni.CNIType, config map[string]string) (cni.CNI, error) { - switch cniType { - case cni.CNITypeNone: - return none.NewNoneCNI(), nil - - case cni.CNITypeCilium: - name := config["name"] - if name == "" { - name = "cilium" - } - - ipv4Range := config["ipv4_range"] - if ipv4Range == "" { - ipv4Range = "10.8.0.0/16" - } - - // Remove standard config keys and pass the rest as options - options := make(map[string]string) - for k, v := range config { - if k != "name" && k != "ipv4_range" { - options[k] = v - } - } - - return cilium.NewCiliumCNI(name, ipv4Range, options), nil - - default: - return nil, fmt.Errorf("unsupported CNI type: %s", cniType) - } -} diff --git a/pkg/cluster/cni/none/none.go b/pkg/cluster/cni/none/none.go deleted file mode 100644 index 9b21caf..0000000 --- a/pkg/cluster/cni/none/none.go +++ /dev/null @@ -1,43 +0,0 @@ -package none - -import ( - "github.com/stenh0use/hind/pkg/cluster/cni" -) - -// NoneCNI represents no CNI (disabled networking) -type NoneCNI struct{} - -// NewNoneCNI creates a new instance of NoneCNI -func NewNoneCNI() *NoneCNI { - return &NoneCNI{} -} - -// Type returns the CNI type -func (n *NoneCNI) Type() cni.CNIType { - return cni.CNITypeNone -} - -// Enabled returns whether this CNI is enabled (always false for none) -func (n *NoneCNI) Enabled() bool { - return false -} - -// Start is a no-op for none CNI -func (n *NoneCNI) Start() error { - return nil -} - -// Stop is a no-op for none CNI -func (n *NoneCNI) Stop() error { - return nil -} - -// Status returns that CNI is disabled -func (n *NoneCNI) Status() (string, error) { - return "disabled", nil -} - -// GetEnvironmentVars returns empty environment variables -func (n *NoneCNI) GetEnvironmentVars() map[string]string { - return map[string]string{} -} diff --git a/pkg/cluster/manager.go b/pkg/cluster/manager.go index bc1820a..3f91c85 100644 --- a/pkg/cluster/manager.go +++ b/pkg/cluster/manager.go @@ -11,7 +11,6 @@ import ( "github.com/stenh0use/hind/pkg/config" "github.com/stenh0use/hind/pkg/file" "github.com/stenh0use/hind/pkg/provider" - "github.com/stenh0use/hind/pkg/provider/dockercli" ) // Manager handles cluster lifecycle operations. @@ -35,7 +34,14 @@ func (m *Manager) SetConfig(cfg *config.Cluster) { // New creates a new cluster manager with the given name and default configuration. // It initializes the file manager, provider, and cluster configuration for the specified cluster name. -func New(logger *log.Logger, name string) (*Manager, error) { +func New(logger *log.Logger, name string, client provider.Client) (*Manager, error) { + if err := ValidateClusterName(name); err != nil { + return nil, fmt.Errorf("invalid cluster name %q: %w", name, err) + } + if client == nil { + return nil, fmt.Errorf("provider client cannot be nil") + } + cfg, err := newClusterConfig(name, release.Latest().Hind) if err != nil { return nil, fmt.Errorf("failed to create default cluster config for '%s': %w", name, err) @@ -49,10 +55,10 @@ func New(logger *log.Logger, name string) (*Manager, error) { m := &Manager{ logger: logger, - provider: dockercli.New(logger), + provider: client, config: cfg, fm: fm, - configFile: file.JoinPath(fm.GetRootDir(), ClusterConfigDir, name, ClusterConfigFile), + configFile: file.JoinPath(ClusterConfigDir, name, ClusterConfigFile), } return m, nil } @@ -79,7 +85,7 @@ func (m *Manager) Start(ctx context.Context) (StartResult, error) { } else { // Use the config created by New() - it already has defaults // Just ensure the directory exists - clusterDir := file.JoinPath(m.fm.GetRootDir(), ClusterConfigDir, m.config.Name) + clusterDir := file.JoinPath(ClusterConfigDir, m.config.Name) if err := m.fm.EnsureDir(clusterDir); err != nil { return StartResultCreated, fmt.Errorf("failed to create cluster dir: %w", err) } @@ -125,11 +131,14 @@ func (m *Manager) waitForContainersRunning(ctx context.Context, timeout time.Dur return nil } - // Check if context is done + timer := time.NewTimer(DefaultContainerPollInterval) select { case <-ctx.Done(): + if !timer.Stop() { + <-timer.C + } return ctx.Err() - case <-time.After(DefaultContainerPollInterval): + case <-timer.C: // Continue waiting } } @@ -137,55 +146,76 @@ func (m *Manager) waitForContainersRunning(ctx context.Context, timeout time.Dur return fmt.Errorf("timeout waiting for containers to reach running state") } +type StopOptions struct { + Force bool + Verbose bool +} + +type StopResult struct { + StoppedCount int + AlreadyStoppedCount int + FailedCount int + FailedPreStopCount int + Failures []string + VerboseLines []string +} + +func (r StopResult) AlreadyStopped() bool { + return r.StoppedCount == 0 && r.FailedCount == 0 && r.AlreadyStoppedCount > 0 +} + func (m *Manager) Stop(ctx context.Context) error { - // Load cluster config from disk if not already in memory - // This allows Stop to work even if Manager was created without loading config - if m.config == nil || m.config.Name == "" { - cfg, err := m.loadConfig() - if err != nil { - return fmt.Errorf("failed to load cluster config: %w", err) - } - m.config = cfg - } + _, err := m.StopWithOptions(ctx, StopOptions{}) + return err +} - // Track how many containers were stopped - stoppedCount := 0 - alreadyStoppedCount := 0 +func (m *Manager) StopWithOptions(ctx context.Context, opts StopOptions) (StopResult, error) { + result := StopResult{} + if err := m.LoadPersistedConfig(); err != nil { + return result, err + } - // Stop each node container for _, node := range m.config.Nodes { + if opts.Verbose { + result.VerboseLines = append(result.VerboseLines, fmt.Sprintf("Checking container '%s' status", node.Name)) + } containerInfo, err := m.provider.InspectContainer(ctx, node.Name) - - // Skip if container doesn't exist + if err != nil { + return result, fmt.Errorf("failed to inspect container %s: %w", node.Name, err) + } if containerInfo == nil { - m.logger.WithField("name", node.Name).Debug("container not found, skipping...") + if opts.Verbose { + result.VerboseLines = append(result.VerboseLines, fmt.Sprintf("Container '%s' not found, skipping", node.Name)) + } continue - } else if err != nil { - return err } - // Check current status and stop if running if containerInfo.Status == provider.Running.String() { - m.logger.WithField("name", node.Name).Debug("stopping container") - if err := m.provider.StopContainer(ctx, node.Name); err != nil { - return fmt.Errorf("failed to stop container %s: %w", node.Name, err) + if opts.Verbose { + result.VerboseLines = append(result.VerboseLines, fmt.Sprintf("Stopping container '%s'", node.Name)) + } + if opts.Force { + err = m.provider.KillContainer(ctx, node.Name) + } else { + err = m.provider.StopContainer(ctx, node.Name) } - m.logger.WithField("name", node.Name).Info("stopped container") - stoppedCount++ - } else { - m.logger.WithField("name", node.Name).Debug("container already stopped") - alreadyStoppedCount++ + if err != nil { + result.FailedCount++ + result.Failures = append(result.Failures, node.Name) + m.logger.Warnf("Failed to stop container '%s': %v", node.Name, err) + continue + } + result.StoppedCount++ + continue } - } - // Log summary - if stoppedCount == 0 && alreadyStoppedCount > 0 { - m.logger.Debug("all containers already stopped") - } else if stoppedCount > 0 { - m.logger.Debugf("stopped %d container(s)", stoppedCount) + if containerInfo.Status == provider.Error.String() { + result.FailedPreStopCount++ + } + result.AlreadyStoppedCount++ } - return nil + return result, nil } func (m *Manager) Delete(ctx context.Context) error { @@ -206,14 +236,16 @@ func (m *Manager) Delete(ctx context.Context) error { // Delete cluster nodes for _, node := range m.config.Nodes { containerInfo, err := m.provider.InspectContainer(ctx, node.Name) + if err != nil { + return fmt.Errorf("failed to inspect container %s: %w", node.Name, err) + } if containerInfo == nil { m.logger.WithField("name", node.Name).Debug("container not found, skipping...") continue - } else if err != nil { - return err - } else if containerInfo.Status == provider.Running.String() { + } + if containerInfo.Status == provider.Running.String() { if err = m.provider.StopContainer(ctx, node.Name); err != nil { - return err + return fmt.Errorf("failed to stop container %s: %w", node.Name, err) } } @@ -225,7 +257,10 @@ func (m *Manager) Delete(ctx context.Context) error { // Check if network exists netInfo, err := m.provider.InspectNetwork(ctx, m.config.Network.Name) - if err == nil && netInfo != nil { + if err != nil { + return fmt.Errorf("failed to inspect network %s: %w", m.config.Network.Name, err) + } + if netInfo != nil { if err := m.provider.DeleteNetwork(ctx, m.config.Network.Name); err != nil { return fmt.Errorf("failed to delete network: %w", err) } @@ -240,16 +275,20 @@ func (m *Manager) Delete(ctx context.Context) error { return nil } -func (m *Manager) Get(ctx context.Context) (*provider.ClusterInfo, error) { - state := &provider.ClusterInfo{} +func (m *Manager) Get(ctx context.Context) (*ClusterInfo, error) { + state := &ClusterInfo{} + + if err := m.LoadPersistedConfig(); err != nil { + return nil, err + } - // Use in-memory config (don't load from disk) - // This allows Get() to work during reconciliation before config is saved networkInfo, err := m.provider.InspectNetwork(ctx, m.config.Network.Name) if err != nil { return nil, fmt.Errorf("failed to inspect network: %w", err) } - state.Network = *networkInfo + if networkInfo != nil { + state.Network = *networkInfo + } containerInfos := []provider.ContainerInfo{} for _, node := range m.config.Nodes { @@ -275,9 +314,31 @@ func (m *Manager) Provider() provider.Client { // ConfigFileExists checks if the cluster config file exists func (m *Manager) ConfigFileExists() bool { + if m.fm == nil { + return false + } return m.fm.FileExists(m.configFile) } +// LoadPersistedConfig loads cluster configuration from disk when available. +// If no persisted config exists, the current in-memory config is left unchanged. +func (m *Manager) LoadPersistedConfig() error { + if !m.ConfigFileExists() { + if m.config == nil || m.config.Name == "" { + return fmt.Errorf("cluster config not found") + } + return nil + } + + cfg, err := m.loadConfig() + if err != nil { + return fmt.Errorf("failed to load cluster config: %w", err) + } + + m.config = cfg + return nil +} + // SetClientCount updates the number of client nodes in the cluster configuration func (m *Manager) SetClientCount(ctx context.Context, count int) error { if count < 1 { @@ -300,23 +361,7 @@ func (m *Manager) SetClientCount(ctx context.Context, count int) error { } for i := 0; i < count; i++ { - nomadClient := config.Node{ - Name: fmt.Sprintf("hind.%s.client.%.2d", name, i+1), - Kind: config.NomadNode, - Role: config.Client, - Network: m.config.Network.Name, - Image: config.Image{ - Name: release.NomadClient.ImageName(), - Tag: v.Hind, - }, - Devices: []string{"/dev/fuse"}, - Environment: map[string]string{ - "CONSUL_AGENT_MODE": "client", - "CONSUL_SERVER_ADDRESS": fmt.Sprintf("hind.%s.consul.%.2d", name, 1), - "NOMAD_AGENT_MODE": "client", - }, - } - newNodes = append(newNodes, nomadClient) + newNodes = append(newNodes, newNomadClientNode(name, m.config.Network.Name, v.Hind, i+1)) } m.config.Nodes = newNodes @@ -421,7 +466,6 @@ func (m *Manager) Scale(ctx context.Context, targetClientCount int) error { func (m *Manager) addClientNodes(count int) error { m.logger.Debugf("Adding %d client node configs", count) - currentClientCount := m.CountClientNodes() v, err := release.Get(m.config.Version) if err != nil { return fmt.Errorf("failed to get version: %w", err) @@ -430,24 +474,8 @@ func (m *Manager) addClientNodes(count int) error { name := m.config.Name for i := 0; i < count; i++ { - nodeNum := currentClientCount + i + 1 - nomadClient := config.Node{ - Name: fmt.Sprintf("hind.%s.client.%.2d", name, nodeNum), - Kind: config.NomadNode, - Role: config.Client, - Network: m.config.Network.Name, - Image: config.Image{ - Name: release.NomadClient.ImageName(), - Tag: v.Hind, - }, - Devices: []string{"/dev/fuse"}, - Environment: map[string]string{ - "CONSUL_AGENT_MODE": "client", - "CONSUL_SERVER_ADDRESS": fmt.Sprintf("hind.%s.consul.%.2d", name, 1), - "NOMAD_AGENT_MODE": "client", - }, - } - m.config.Nodes = append(m.config.Nodes, nomadClient) + nodeNum := nextClientNodeNumber(name, m.config.Nodes) + m.config.Nodes = append(m.config.Nodes, newNomadClientNode(name, m.config.Network.Name, v.Hind, nodeNum)) } return nil diff --git a/pkg/cluster/manager_behavior_test.go b/pkg/cluster/manager_behavior_test.go new file mode 100644 index 0000000..ed0aa1a --- /dev/null +++ b/pkg/cluster/manager_behavior_test.go @@ -0,0 +1,212 @@ +package cluster + +import ( + "context" + "encoding/json" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + + "github.com/stenh0use/hind/pkg/config" + "github.com/stenh0use/hind/pkg/file" + "github.com/stenh0use/hind/pkg/provider" + "github.com/stenh0use/hind/pkg/provider/mock" +) + +func newManagerForBehaviorTests(t *testing.T, clusterName string, cfg *config.Cluster, stub *mock.ClientStub) *Manager { + t.Helper() + + root := t.TempDir() + fm, err := file.New(root) + if err != nil { + t.Fatalf("file.New() error = %v", err) + } + + return &Manager{ + logger: &log.Logger{Handler: discard.New(), Level: log.ErrorLevel}, + provider: stub, + config: cfg, + fm: fm, + configFile: file.JoinPath(ClusterConfigDir, clusterName, ClusterConfigFile), + } +} + +func TestManagerStart_ReturnsErrorWhenPersistedConfigInvalid(t *testing.T) { + t.Parallel() + + m := newManagerForBehaviorTests(t, "demo", &config.Cluster{Name: "demo"}, &mock.ClientStub{}) + + if err := m.fm.WriteFile(m.configFile, []byte("{")); err != nil { + t.Fatalf("WriteFile() error = %v", err) + } + + result, err := m.Start(context.Background()) + if err == nil { + t.Fatal("Start() expected error, got nil") + } + if result != StartResultCreated { + t.Fatalf("Start() result = %v, want %v on error", result, StartResultCreated) + } + if !strings.Contains(err.Error(), "failed to load cluster config") { + t.Fatalf("Start() error = %q, want load-config context", err) + } +} + +func TestManagerStart_UsesPersistedConfigForReconcile(t *testing.T) { + t.Parallel() + + persistedOnlyNode := "hind.demo.client.03" + wantErr := errors.New("persisted node inspected") + + stub := &mock.ClientStub{ + InspectNetworkFn: func(context.Context, string) (*provider.NetworkInfo, error) { + return &provider.NetworkInfo{Name: "hind.demo"}, nil + }, + InspectContainerFn: func(_ context.Context, name string) (*provider.ContainerInfo, error) { + if name == persistedOnlyNode { + return nil, wantErr + } + return nil, nil + }, + } + + m := newManagerForBehaviorTests(t, "demo", &config.Cluster{ + Name: "demo", + Nodes: []config.Node{ + {Name: "hind.demo.consul.01"}, + }, + Network: config.Network{Name: "hind.demo-default"}, + }, stub) + + persisted := &config.Cluster{ + Name: "demo", + Nodes: []config.Node{ + {Name: "hind.demo.consul.01"}, + {Name: persistedOnlyNode}, + }, + Network: config.Network{Name: "hind.demo"}, + } + + data, err := json.Marshal(persisted) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + if err := m.fm.WriteFile(m.configFile, data); err != nil { + t.Fatalf("WriteFile() error = %v", err) + } + + _, err = m.Start(context.Background()) + if err == nil { + t.Fatal("Start() expected reconcile error, got nil") + } + if !errors.Is(err, wantErr) { + t.Fatalf("Start() error = %v, want wrapped %v", err, wantErr) + } +} + +func TestManagerGet_ReturnsErrorWhenNoPersistedConfigAndNoDefaults(t *testing.T) { + t.Parallel() + + m := newManagerForBehaviorTests(t, "demo", &config.Cluster{}, &mock.ClientStub{}) + + _, err := m.Get(context.Background()) + if err == nil { + t.Fatal("Get() expected error, got nil") + } + if !strings.Contains(err.Error(), "cluster config not found") { + t.Fatalf("Get() error = %q, want missing-config error", err) + } +} + +func TestManagerStop_AggregatesStopContainerError(t *testing.T) { + t.Parallel() + + nodeName := "hind.demo.consul.01" + + stub := &mock.ClientStub{ + InspectContainerFn: func(context.Context, string) (*provider.ContainerInfo, error) { + return &provider.ContainerInfo{Name: nodeName, Status: provider.Running.String()}, nil + }, + StopContainerFn: func(context.Context, string) error { + return errors.New("stop failed") + }, + } + + m := newManagerForBehaviorTests(t, "demo", &config.Cluster{ + Name: "demo", + Nodes: []config.Node{ + {Name: nodeName}, + }, + }, stub) + + result, err := m.StopWithOptions(context.Background(), StopOptions{}) + if err != nil { + t.Fatalf("StopWithOptions() unexpected error: %v", err) + } + if result.FailedCount != 1 { + t.Fatalf("FailedCount = %d, want 1", result.FailedCount) + } + if len(result.Failures) != 1 || result.Failures[0] != nodeName { + t.Fatalf("Failures = %v, want [%s]", result.Failures, nodeName) + } +} + +func TestManagerDelete_ReturnsWrappedStopContainerError(t *testing.T) { + t.Parallel() + + wantErr := errors.New("stop failed") + nodeName := "hind.demo.consul.01" + + stub := &mock.ClientStub{ + InspectContainerFn: func(context.Context, string) (*provider.ContainerInfo, error) { + return &provider.ContainerInfo{Name: nodeName, Status: provider.Running.String()}, nil + }, + StopContainerFn: func(context.Context, string) error { + return wantErr + }, + } + + m := newManagerForBehaviorTests(t, "demo", &config.Cluster{ + Name: "demo", + Nodes: []config.Node{ + {Name: nodeName}, + }, + Network: config.Network{Name: "hind.demo"}, + }, stub) + + err := m.Delete(context.Background()) + if err == nil { + t.Fatal("Delete() expected error, got nil") + } + if !errors.Is(err, wantErr) { + t.Fatalf("Delete() error = %v, want wrapped %v", err, wantErr) + } + if !strings.Contains(err.Error(), "failed to stop container") { + t.Fatalf("Delete() error = %q, want stop-container context", err) + } +} + +func TestList_ReturnsErrorWhenClusterPathIsFile(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + + baseDir := filepath.Join(home, DefaultConfigParentDir, DefaultConfigName) + if err := os.MkdirAll(baseDir, 0o755); err != nil { + t.Fatalf("MkdirAll() error = %v", err) + } + + clusterPath := filepath.Join(baseDir, ClusterConfigDir) + if err := os.WriteFile(clusterPath, []byte("not-a-directory"), 0o644); err != nil { + t.Fatalf("WriteFile() error = %v", err) + } + + _, err := List() + if err == nil { + t.Fatal("List() expected error when cluster path is a file, got nil") + } +} diff --git a/pkg/cluster/manager_get_test.go b/pkg/cluster/manager_get_test.go new file mode 100644 index 0000000..5a4a88c --- /dev/null +++ b/pkg/cluster/manager_get_test.go @@ -0,0 +1,411 @@ +package cluster + +import ( + "context" + "encoding/json" + "errors" + "slices" + "strings" + "testing" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/config" + "github.com/stenh0use/hind/pkg/file" + "github.com/stenh0use/hind/pkg/provider" + "github.com/stenh0use/hind/pkg/provider/mock" +) + +func TestManagerGet_NetworkNotFoundDoesNotPanic(t *testing.T) { + t.Parallel() + + root := t.TempDir() + fm, err := file.New(root) + if err != nil { + t.Fatalf("file.New() error = %v", err) + } + + persisted := &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{ + {Name: "hind.demo.consul.01"}, + }, + } + persistedData, err := json.Marshal(persisted) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + configPath := file.JoinPath(ClusterConfigDir, "demo", ClusterConfigFile) + if err := fm.WriteFile(configPath, persistedData); err != nil { + t.Fatalf("WriteFile() error = %v", err) + } + + m := &Manager{ + provider: &mock.ClientStub{ + InspectNetworkFn: func(ctx context.Context, name string) (*provider.NetworkInfo, error) { + return nil, nil + }, + InspectContainerFn: func(ctx context.Context, name string) (*provider.ContainerInfo, error) { + return nil, nil + }, + }, + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo-default"}, + Nodes: []config.Node{ + {Name: "hind.demo.client.01"}, + }, + }, + fm: fm, + configFile: configPath, + } + + got, err := m.Get(context.Background()) + if err != nil { + t.Fatalf("Get() unexpected error: %v", err) + } + if got == nil { + t.Fatal("Get() returned nil state") + } + if got.Network.Name != "" { + t.Errorf("Get().Network.Name = %q, want empty string when network missing", got.Network.Name) + } + if len(got.Containers) != 0 { + t.Errorf("Get().Containers len = %d, want 0", len(got.Containers)) + } +} + +func TestManagerGet_ReturnsInspectNetworkError(t *testing.T) { + t.Parallel() + + wantErr := errors.New("docker daemon unavailable") + m := &Manager{ + provider: &mock.ClientStub{ + InspectNetworkFn: func(ctx context.Context, name string) (*provider.NetworkInfo, error) { + return nil, wantErr + }, + }, + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo"}, + }, + } + + _, err := m.Get(context.Background()) + if err == nil { + t.Fatal("Get() expected error, got nil") + } + if !errors.Is(err, wantErr) { + t.Fatalf("Get() error = %v, want wrapped %v", err, wantErr) + } + if !strings.Contains(err.Error(), "failed to inspect network") { + t.Fatalf("Get() error = %q, want context about network inspect", err) + } +} + +func TestManagerGet_ReturnsInspectContainerError(t *testing.T) { + t.Parallel() + + wantErr := errors.New("inspect container failed") + m := &Manager{ + provider: &mock.ClientStub{ + InspectNetworkFn: func(ctx context.Context, name string) (*provider.NetworkInfo, error) { + return &provider.NetworkInfo{Name: name}, nil + }, + InspectContainerFn: func(ctx context.Context, name string) (*provider.ContainerInfo, error) { + return nil, wantErr + }, + }, + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{{Name: "hind.demo.consul.01"}}, + }, + } + + _, err := m.Get(context.Background()) + if err == nil { + t.Fatal("Get() expected error, got nil") + } + if !errors.Is(err, wantErr) { + t.Fatalf("Get() error = %v, want wrapped %v", err, wantErr) + } + if !strings.Contains(err.Error(), "failed to inspect node 'hind.demo.consul.01'") { + t.Fatalf("Get() error = %q, missing node context", err) + } +} + +func TestManagerGet_UsesPersistedTopology(t *testing.T) { + t.Parallel() + + root := t.TempDir() + fm, err := file.New(root) + if err != nil { + t.Fatalf("file.New() error = %v", err) + } + + persisted := &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{ + {Name: "hind.demo.consul.01"}, + {Name: "hind.demo.nomad.01"}, + {Name: "hind.demo.client.01"}, + {Name: "hind.demo.client.02"}, + {Name: "hind.demo.client.03"}, + }, + } + + persistedData, err := json.Marshal(persisted) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + + configPath := file.JoinPath(ClusterConfigDir, "demo", ClusterConfigFile) + if err := fm.WriteFile(configPath, persistedData); err != nil { + t.Fatalf("WriteFile() error = %v", err) + } + + inspected := []string{} + m := &Manager{ + provider: &mock.ClientStub{ + InspectNetworkFn: func(ctx context.Context, name string) (*provider.NetworkInfo, error) { + return &provider.NetworkInfo{Name: name}, nil + }, + InspectContainerFn: func(ctx context.Context, name string) (*provider.ContainerInfo, error) { + inspected = append(inspected, name) + return &provider.ContainerInfo{Name: name, Status: provider.Running.String()}, nil + }, + }, + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo-default"}, + Nodes: []config.Node{ + {Name: "hind.demo.consul.01"}, + {Name: "hind.demo.nomad.01"}, + {Name: "hind.demo.client.01"}, + }, + }, + fm: fm, + configFile: configPath, + } + + state, err := m.Get(context.Background()) + if err != nil { + t.Fatalf("Get() unexpected error: %v", err) + } + + if state.Network.Name != "hind.demo" { + t.Fatalf("Get().Network.Name = %q, want %q", state.Network.Name, "hind.demo") + } + + if len(state.Containers) != len(persisted.Nodes) { + t.Fatalf("Get().Containers len = %d, want %d", len(state.Containers), len(persisted.Nodes)) + } + + if !slices.Contains(inspected, "hind.demo.client.03") { + t.Fatalf("Get() did not inspect persisted scaled node hind.demo.client.03; inspected=%v", inspected) + } +} + +func TestManagerStop_UsesPersistedTopology(t *testing.T) { + t.Parallel() + + root := t.TempDir() + fm, err := file.New(root) + if err != nil { + t.Fatalf("file.New() error = %v", err) + } + + persisted := &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{ + {Name: "hind.demo.consul.01"}, + {Name: "hind.demo.nomad.01"}, + {Name: "hind.demo.client.01"}, + {Name: "hind.demo.client.02"}, + {Name: "hind.demo.client.03"}, + }, + } + persistedData, err := json.Marshal(persisted) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + + configPath := file.JoinPath(ClusterConfigDir, "demo", ClusterConfigFile) + if err := fm.WriteFile(configPath, persistedData); err != nil { + t.Fatalf("WriteFile() error = %v", err) + } + + stopped := []string{} + m := &Manager{ + logger: &log.Logger{Handler: discard.New(), Level: log.ErrorLevel}, + provider: &mock.ClientStub{ + InspectContainerFn: func(ctx context.Context, name string) (*provider.ContainerInfo, error) { + return &provider.ContainerInfo{Name: name, Status: provider.Running.String()}, nil + }, + StopContainerFn: func(ctx context.Context, name string) error { + stopped = append(stopped, name) + return nil + }, + }, + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo-default"}, + Nodes: []config.Node{ + {Name: "hind.demo.consul.01"}, + {Name: "hind.demo.nomad.01"}, + {Name: "hind.demo.client.01"}, + }, + }, + fm: fm, + configFile: configPath, + } + + if err := m.Stop(context.Background()); err != nil { + t.Fatalf("Stop() unexpected error: %v", err) + } + + if len(stopped) != len(persisted.Nodes) { + t.Fatalf("Stop() stopped %d nodes, want %d", len(stopped), len(persisted.Nodes)) + } + + if !slices.Contains(stopped, "hind.demo.client.03") { + t.Fatalf("Stop() did not stop persisted scaled node hind.demo.client.03; stopped=%v", stopped) + } +} + +func TestManagerLoadPersistedConfig_MissingFileKeepsDefaults(t *testing.T) { + t.Parallel() + + m := &Manager{ + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo-default"}, + Nodes: []config.Node{{Name: "hind.demo.consul.01"}}, + }, + } + + if err := m.LoadPersistedConfig(); err != nil { + t.Fatalf("LoadPersistedConfig() unexpected error: %v", err) + } + + if m.config.Network.Name != "hind.demo-default" { + t.Fatalf("LoadPersistedConfig() changed defaults unexpectedly; got network %q", m.config.Network.Name) + } +} + +func TestManagerLoadPersistedConfig_MissingAndNoDefaultsErrors(t *testing.T) { + t.Parallel() + + m := &Manager{} + if err := m.LoadPersistedConfig(); err == nil { + t.Fatal("LoadPersistedConfig() expected error when no persisted file and no in-memory config") + } +} + +func TestManagerStop_PropagatesInspectContainerError(t *testing.T) { + t.Parallel() + + wantErr := errors.New("container inspect failed") + m := &Manager{ + logger: &log.Logger{Handler: discard.New(), Level: log.ErrorLevel}, + provider: &mock.ClientStub{ + InspectContainerFn: func(ctx context.Context, name string) (*provider.ContainerInfo, error) { + // Return nil info with a real error (e.g. docker daemon error) + return nil, wantErr + }, + }, + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{{Name: "hind.demo.consul.01"}}, + }, + } + + err := m.Stop(context.Background()) + if err == nil { + t.Fatal("Stop() expected error when InspectContainer returns error, got nil") + } + if !errors.Is(err, wantErr) { + t.Fatalf("Stop() error = %v, want wrapped %v", err, wantErr) + } +} + +func TestManagerDelete_PropagatesInspectContainerError(t *testing.T) { + t.Parallel() + + root := t.TempDir() + fm, err := file.New(root) + if err != nil { + t.Fatalf("file.New() error = %v", err) + } + + wantErr := errors.New("container inspect failed") + m := &Manager{ + logger: &log.Logger{Handler: discard.New(), Level: log.ErrorLevel}, + provider: &mock.ClientStub{ + InspectContainerFn: func(ctx context.Context, name string) (*provider.ContainerInfo, error) { + // Return nil info with a real error (e.g. docker daemon error) + return nil, wantErr + }, + }, + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{{Name: "hind.demo.consul.01"}}, + }, + fm: fm, + configFile: file.JoinPath(ClusterConfigDir, "demo", ClusterConfigFile), + } + + err = m.Delete(context.Background()) + if err == nil { + t.Fatal("Delete() expected error when InspectContainer returns error, got nil") + } + if !errors.Is(err, wantErr) { + t.Fatalf("Delete() error = %v, want wrapped %v", err, wantErr) + } +} + +func TestManagerDelete_PropagatesInspectNetworkError(t *testing.T) { + t.Parallel() + + root := t.TempDir() + fm, err := file.New(root) + if err != nil { + t.Fatalf("file.New() error = %v", err) + } + + wantErr := errors.New("network inspect failed") + m := &Manager{ + logger: &log.Logger{Handler: discard.New(), Level: log.ErrorLevel}, + provider: &mock.ClientStub{ + InspectContainerFn: func(ctx context.Context, name string) (*provider.ContainerInfo, error) { + // Container does not exist — nil, nil is the not-found signal + return nil, nil + }, + InspectNetworkFn: func(ctx context.Context, name string) (*provider.NetworkInfo, error) { + // Return nil info with a real error + return nil, wantErr + }, + }, + config: &config.Cluster{ + Name: "demo", + Network: config.Network{Name: "hind.demo"}, + Nodes: []config.Node{}, + }, + fm: fm, + configFile: file.JoinPath(ClusterConfigDir, "demo", ClusterConfigFile), + } + + err = m.Delete(context.Background()) + if err == nil { + t.Fatal("Delete() expected error when InspectNetwork returns error, got nil") + } + if !errors.Is(err, wantErr) { + t.Fatalf("Delete() error = %v, want wrapped %v", err, wantErr) + } +} diff --git a/pkg/cluster/manager_new_test.go b/pkg/cluster/manager_new_test.go new file mode 100644 index 0000000..7b0d4fe --- /dev/null +++ b/pkg/cluster/manager_new_test.go @@ -0,0 +1,36 @@ +package cluster + +import ( + "testing" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/provider/mock" +) + +func TestNewUsesInjectedProvider(t *testing.T) { + logger := &log.Logger{Handler: discard.New()} + injectedProvider := &mock.ClientStub{} + + manager, err := New(logger, "di-seam", injectedProvider) + if err != nil { + t.Fatalf("New() error = %v", err) + } + + if manager.Provider() != injectedProvider { + t.Fatalf("Provider() did not return injected provider") + } +} + +func TestNewReturnsErrorWhenProviderIsNil(t *testing.T) { + logger := &log.Logger{Handler: discard.New()} + + manager, err := New(logger, "di-seam", nil) + if err == nil { + t.Fatal("New() error = nil, want non-nil") + } + + if manager != nil { + t.Fatal("New() manager = non-nil, want nil") + } +} diff --git a/pkg/cluster/manager_wait_test.go b/pkg/cluster/manager_wait_test.go new file mode 100644 index 0000000..bd1b836 --- /dev/null +++ b/pkg/cluster/manager_wait_test.go @@ -0,0 +1,52 @@ +package cluster + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/config" + "github.com/stenh0use/hind/pkg/provider" + "github.com/stenh0use/hind/pkg/provider/mock" +) + +func TestWaitForContainersRunning_ReturnsContextErrorPromptly(t *testing.T) { + m := &Manager{ + logger: &log.Logger{Handler: discard.New()}, + provider: &mock.ClientStub{ + InspectContainerFn: func(_ context.Context, name string) (*provider.ContainerInfo, error) { + return &provider.ContainerInfo{Name: name, Status: "exited"}, nil + }, + InspectNetworkFn: func(_ context.Context, name string) (*provider.NetworkInfo, error) { + return &provider.NetworkInfo{Name: name}, nil + }, + }, + config: &config.Cluster{ + Name: "test", + Network: config.Network{ + Name: "hind.test", + }, + Nodes: []config.Node{ + {Name: "hind.test.consul.01"}, + }, + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + start := time.Now() + err := m.waitForContainersRunning(ctx, 5*time.Second) + elapsed := time.Since(start) + + if !errors.Is(err, context.Canceled) { + t.Fatalf("waitForContainersRunning() error = %v, want context.Canceled", err) + } + + if elapsed > 200*time.Millisecond { + t.Fatalf("waitForContainersRunning() took %s, expected prompt return after canceled context", elapsed) + } +} diff --git a/pkg/cluster/path_confinement_test.go b/pkg/cluster/path_confinement_test.go new file mode 100644 index 0000000..3db75f7 --- /dev/null +++ b/pkg/cluster/path_confinement_test.go @@ -0,0 +1,81 @@ +package cluster + +import ( + "strings" + "testing" +) + +func TestValidateClusterName(t *testing.T) { + tests := []struct { + name string + clusterName string + wantErr bool + }{ + { + name: "valid simple name", + clusterName: "default", + wantErr: false, + }, + { + name: "valid with punctuation", + clusterName: "dev-cluster_01", + wantErr: false, + }, + { + name: "empty name", + clusterName: "", + wantErr: true, + }, + { + name: "whitespace name", + clusterName: " ", + wantErr: true, + }, + { + name: "unix traversal", + clusterName: "../../etc", + wantErr: true, + }, + { + name: "windows traversal", + clusterName: "..\\..\\windows", + wantErr: true, + }, + { + name: "absolute unix path", + clusterName: "/tmp/escape", + wantErr: true, + }, + { + name: "clean resolves up", + clusterName: "../cluster", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateClusterName(tt.clusterName) + if tt.wantErr && err == nil { + t.Fatalf("ValidateClusterName(%q) expected error, got nil", tt.clusterName) + } + if !tt.wantErr && err != nil { + t.Fatalf("ValidateClusterName(%q) expected no error, got %v", tt.clusterName, err) + } + }) + } +} + +func TestSetActiveCluster_RejectsTraversalName(t *testing.T) { + tmpDir := t.TempDir() + t.Setenv("HOME", tmpDir) + + err := SetActiveCluster("../../etc") + if err == nil { + t.Fatal("expected error for traversal cluster name, got nil") + } + + if !strings.Contains(err.Error(), "invalid cluster name") { + t.Fatalf("expected invalid cluster name error, got %v", err) + } +} diff --git a/pkg/cluster/reconcile.go b/pkg/cluster/reconcile.go index cb380f5..3c0f704 100644 --- a/pkg/cluster/reconcile.go +++ b/pkg/cluster/reconcile.go @@ -49,7 +49,7 @@ func (m *Manager) Reconcile(ctx context.Context) error { } // 2. Calculate what needs to change - plan, err := m.calculateReconcilePlan(ctx, actual) + plan, err := m.calculateReconcilePlan(actual) if err != nil { return fmt.Errorf("failed to calculate reconcile plan: %w", err) } @@ -114,7 +114,7 @@ func (m *Manager) getActualState(ctx context.Context) (*ActualState, error) { } // calculateReconcilePlan compares desired vs actual and produces a plan -func (m *Manager) calculateReconcilePlan(ctx context.Context, actual *ActualState) (*ReconcilePlan, error) { +func (m *Manager) calculateReconcilePlan(actual *ActualState) (*ReconcilePlan, error) { plan := &ReconcilePlan{ ContainersToCreate: []config.Node{}, ContainersToStart: []string{}, @@ -190,8 +190,9 @@ func (m *Manager) executeReconcilePlan(ctx context.Context, plan *ReconcilePlan) } // Recreate - action.NewConfig.Labels = labels - id, err := m.provider.CreateContainer(ctx, action.NewConfig) + nodeConfig := action.NewConfig + nodeConfig.Labels = labels + id, err := m.provider.CreateContainer(ctx, provider.ContainerSpecFromNode(nodeConfig)) if err != nil { return fmt.Errorf("failed to recreate container '%s': %w", action.ExistingName, err) } @@ -202,7 +203,7 @@ func (m *Manager) executeReconcilePlan(ctx context.Context, plan *ReconcilePlan) for _, node := range plan.ContainersToCreate { m.logger.Infof("Creating container '%s'", node.Name) node.Labels = labels - id, err := m.provider.CreateContainer(ctx, node) + id, err := m.provider.CreateContainer(ctx, provider.ContainerSpecFromNode(node)) if err != nil { return fmt.Errorf("failed to create container '%s': %w", node.Name, err) } diff --git a/pkg/cluster/reconcile_test.go b/pkg/cluster/reconcile_test.go index aec5744..6c91792 100644 --- a/pkg/cluster/reconcile_test.go +++ b/pkg/cluster/reconcile_test.go @@ -1,7 +1,6 @@ package cluster import ( - "context" "testing" "github.com/stenh0use/hind/pkg/config" @@ -75,7 +74,7 @@ func TestCalculateReconcilePlan_NewCluster(t *testing.T) { Containers: map[string]*provider.ContainerInfo{}, } - plan, err := m.calculateReconcilePlan(context.Background(), actual) + plan, err := m.calculateReconcilePlan(actual) if err != nil { t.Fatalf("calculateReconcilePlan() error = %v", err) } @@ -118,7 +117,7 @@ func TestCalculateReconcilePlan_AllRunning(t *testing.T) { }, } - plan, err := m.calculateReconcilePlan(context.Background(), actual) + plan, err := m.calculateReconcilePlan(actual) if err != nil { t.Fatalf("calculateReconcilePlan() error = %v", err) } @@ -145,16 +144,16 @@ func TestCalculateReconcilePlan_StoppedContainers(t *testing.T) { Containers: map[string]*provider.ContainerInfo{ "hind.test.consul.01": { Name: "hind.test.consul.01", - Status: "exited", + Status: provider.Stopped.String(), }, "hind.test.nomad.01": { Name: "hind.test.nomad.01", - Status: "exited", + Status: provider.Stopped.String(), }, }, } - plan, err := m.calculateReconcilePlan(context.Background(), actual) + plan, err := m.calculateReconcilePlan(actual) if err != nil { t.Fatalf("calculateReconcilePlan() error = %v", err) } @@ -193,7 +192,7 @@ func TestCalculateReconcilePlan_UnhealthyContainers(t *testing.T) { }, } - plan, err := m.calculateReconcilePlan(context.Background(), actual) + plan, err := m.calculateReconcilePlan(actual) if err != nil { t.Fatalf("calculateReconcilePlan() error = %v", err) } @@ -234,7 +233,7 @@ func TestCalculateReconcilePlan_MixedStates(t *testing.T) { }, "hind.test.nomad.01": { Name: "hind.test.nomad.01", - Status: "exited", + Status: provider.Stopped.String(), }, "hind.test.vault.01": { Name: "hind.test.vault.01", @@ -244,7 +243,7 @@ func TestCalculateReconcilePlan_MixedStates(t *testing.T) { }, } - plan, err := m.calculateReconcilePlan(context.Background(), actual) + plan, err := m.calculateReconcilePlan(actual) if err != nil { t.Fatalf("calculateReconcilePlan() error = %v", err) } diff --git a/pkg/cluster/stop_test.go b/pkg/cluster/stop_test.go new file mode 100644 index 0000000..2a64272 --- /dev/null +++ b/pkg/cluster/stop_test.go @@ -0,0 +1,84 @@ +package cluster + +import ( + "context" + "errors" + "testing" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + + "github.com/stenh0use/hind/pkg/config" + "github.com/stenh0use/hind/pkg/provider" + "github.com/stenh0use/hind/pkg/provider/mock" +) + +func TestStopWithOptions(t *testing.T) { + tests := []struct { + name string + statuses map[string]string + force bool + stopErrFor string + wantStopped int + wantAlready int + wantFailed int + wantPreFailed int + wantFailListSize int + }{ + {name: "already stopped idempotent", statuses: map[string]string{"n1": provider.Stopped.String(), "n2": provider.Stopped.String()}, wantAlready: 2}, + {name: "partial failure continues", statuses: map[string]string{"n1": provider.Running.String(), "n2": provider.Running.String()}, stopErrFor: "n2", wantStopped: 1, wantFailed: 1, wantFailListSize: 1}, + {name: "unhealthy counted", statuses: map[string]string{"n1": provider.Error.String(), "n2": provider.Running.String()}, wantStopped: 1, wantAlready: 1, wantPreFailed: 1}, + {name: "force uses kill", statuses: map[string]string{"n1": provider.Running.String()}, force: true, wantStopped: 1}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + stops := 0 + kills := 0 + client := &mock.ClientStub{} + client.InspectContainerFn = func(_ context.Context, name string) (*provider.ContainerInfo, error) { + status, ok := tt.statuses[name] + if !ok { + return nil, nil + } + return &provider.ContainerInfo{Name: name, Status: status}, nil + } + client.StopContainerFn = func(_ context.Context, name string) error { + stops++ + if name == tt.stopErrFor { + return errors.New("stop failed") + } + return nil + } + client.KillContainerFn = func(_ context.Context, _ string) error { + kills++ + return nil + } + + m := &Manager{ + logger: &log.Logger{Handler: discard.New(), Level: log.ErrorLevel}, + provider: client, + config: &config.Cluster{Name: tt.name, Nodes: []config.Node{{Name: "n1"}, {Name: "n2"}}}, + } + + res, err := m.StopWithOptions(context.Background(), StopOptions{Force: tt.force}) + if err != nil { + t.Fatalf("StopWithOptions() error = %v", err) + } + if res.StoppedCount != tt.wantStopped || res.AlreadyStoppedCount != tt.wantAlready || res.FailedCount != tt.wantFailed || res.FailedPreStopCount != tt.wantPreFailed { + t.Fatalf("unexpected result: %+v", res) + } + if len(res.Failures) != tt.wantFailListSize { + t.Fatalf("failures len = %d, want %d", len(res.Failures), tt.wantFailListSize) + } + if tt.force { + if kills == 0 { + t.Fatalf("expected kill calls") + } + if stops != 0 { + t.Fatalf("expected no stop calls when force=true") + } + } + }) + } +} diff --git a/pkg/cluster/types.go b/pkg/cluster/types.go index d544c47..7f15870 100644 --- a/pkg/cluster/types.go +++ b/pkg/cluster/types.go @@ -2,9 +2,12 @@ package cluster import ( "fmt" + "strconv" + "strings" "github.com/stenh0use/hind/pkg/build/release" "github.com/stenh0use/hind/pkg/config" + "github.com/stenh0use/hind/pkg/provider" ) const ( @@ -14,9 +17,72 @@ const ( DefaultVaultServers = 1 ) +func newNomadClientNode(clusterName, networkName, version string, nodeNumber int) config.Node { + return config.Node{ + Name: fmt.Sprintf("hind.%s.client.%.2d", clusterName, nodeNumber), + Kind: config.NomadNode, + Role: config.Client, + Network: networkName, + Image: config.Image{ + Name: release.NomadClient.ImageName(), + Tag: version, + }, + Devices: []string{"/dev/fuse"}, + Environment: map[string]string{ + "CONSUL_AGENT_MODE": "client", + "CONSUL_SERVER_ADDRESS": fmt.Sprintf("hind.%s.consul.%.2d", clusterName, 1), + "NOMAD_AGENT_MODE": "client", + }, + } +} + +func parseClientNodeNumber(clusterName, nodeName string) (int, bool) { + prefix := fmt.Sprintf("hind.%s.client.", clusterName) + if !strings.HasPrefix(nodeName, prefix) { + return 0, false + } + + suffix := strings.TrimPrefix(nodeName, prefix) + if suffix == "" { + return 0, false + } + + number, err := strconv.Atoi(suffix) + if err != nil || number < 1 { + return 0, false + } + + return number, true +} + +func nextClientNodeNumber(clusterName string, nodes []config.Node) int { + maxNodeNumber := 0 + for _, node := range nodes { + if node.Role != config.Client { + continue + } + + number, ok := parseClientNodeNumber(clusterName, node.Name) + if !ok { + continue + } + + if number > maxNodeNumber { + maxNodeNumber = number + } + } + + return maxNodeNumber + 1 +} + // StartResult indicates the outcome of a cluster start operation type StartResult int +type ClusterInfo struct { + Containers []provider.ContainerInfo + Network provider.NetworkInfo +} + const ( // StartResultCreated indicates a new cluster was created StartResultCreated StartResult = iota @@ -92,23 +158,7 @@ func newClusterConfig(name string, version string) (*config.Cluster, error) { } for count := range DefaultNomadClients { - nomadClient := config.Node{ - Name: fmt.Sprintf("hind.%s.client.%.2d", name, count+1), - Kind: config.NomadNode, - Role: config.Client, - Network: networkName, - Image: config.Image{ - Name: release.NomadClient.ImageName(), - Tag: v.Hind, - }, - Devices: []string{"/dev/fuse"}, - Environment: map[string]string{ - "CONSUL_AGENT_MODE": "client", - "CONSUL_SERVER_ADDRESS": fmt.Sprintf("hind.%s.consul.%.2d", name, 1), - "NOMAD_AGENT_MODE": "client", - }, - } - nodes = append(nodes, nomadClient) + nodes = append(nodes, newNomadClientNode(name, networkName, v.Hind, count+1)) } for count := range DefaultVaultServers { @@ -121,13 +171,6 @@ func newClusterConfig(name string, version string) (*config.Cluster, error) { Name: release.Vault.ImageName(), Tag: v.Hind, }, - Ports: []config.PortMapping{ - { - HostPort: 8200, - ContainerPort: 8200, - Protocol: "tcp", - }, - }, Environment: map[string]string{ "CONSUL_AGENT_MODE": "client", "CONSUL_SERVER_ADDRESS": fmt.Sprintf("hind.%s.consul.%.2d", name, 1), diff --git a/pkg/cluster/types_test.go b/pkg/cluster/types_test.go new file mode 100644 index 0000000..0f90430 --- /dev/null +++ b/pkg/cluster/types_test.go @@ -0,0 +1,28 @@ +package cluster + +import "testing" + +func TestNewClusterConfig_VaultPortsAssignedOnce(t *testing.T) { + cfg, err := newClusterConfig("test", "0.4.0") + if err != nil { + t.Fatalf("newClusterConfig() error = %v", err) + } + + vaultCount := 0 + for _, node := range cfg.Nodes { + if node.Kind != "vault" { + continue + } + vaultCount++ + if len(node.Ports) != 1 { + t.Fatalf("vault node ports len = %d, want 1", len(node.Ports)) + } + if node.Ports[0].HostPort != 8200 || node.Ports[0].ContainerPort != 8200 { + t.Fatalf("vault port mapping = %+v, want 8200:8200", node.Ports[0]) + } + } + + if vaultCount == 0 { + t.Fatal("expected at least one vault node") + } +} diff --git a/pkg/cmd/AGENTS.md b/pkg/cmd/AGENTS.md index cb78a98..6471e7f 100644 --- a/pkg/cmd/AGENTS.md +++ b/pkg/cmd/AGENTS.md @@ -1,495 +1,72 @@ -# HIND CLI Package - Development Guide +# pkg/cmd Claude Guide -## Package Overview +Use this file for command-layer work in `pkg/cmd`. -The `pkg/cmd` package contains all CLI command implementations for the hind tool, organized around the Cobra framework. +## Scope -**Package Structure:** -``` -pkg/cmd/ -├── iostreams.go # IO abstraction for testable output -├── logger.go # Logger factory and configuration -├── hind/ # Root and subcommands -│ ├── root.go # Root command (hind) -│ ├── build/ # Build command for Docker images -│ ├── get/ # Get command for cluster details -│ ├── list/ # List command for all clusters -│ ├── rm/ # Remove command to delete clusters -│ ├── start/ # Start command to create/start clusters -│ ├── stop/ # Stop command to stop clusters -│ ├── set/ # Set command group for configuration -│ ├── version/ # Version command -│ └── format/ # Shared formatting utilities -``` - -**Key Principles:** -- **Separation of Concerns**: CLI layer handles only user interaction, delegates business logic to `pkg/cluster`, `pkg/build` -- **Testability**: Commands accept dependencies (logger, IO streams) rather than creating them -- **Consistency**: All commands follow the same structural patterns - ---- - -## Command Structure Patterns - -### Standard Command Signature - -All commands use this signature: -```go -func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command -``` - -### Command Types - -**Leaf Command** (performs actual work): -```go -func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command { - cmd := &cobra.Command{ - Use: "list", - Short: "List all hind clusters", - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - return runE(cmd.Context(), logger, streams) - }, - } - return cmd -} - -func runE(ctx context.Context, logger *log.Logger, streams IOStreams) error { - // Implementation here -} -``` - -**Group Command** (organizes subcommands): -```go -func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command { - cmd := &cobra.Command{ - Use: "set", - Short: "Set hind configuration options", - } - cmd.AddCommand(newProfileCommand(logger, streams)) - return cmd -} -``` - -### RunE Separation Pattern - -**Always separate RunE logic** from NewCommand for testability: - -```go -func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command { - cmd := &cobra.Command{ - RunE: func(cmd *cobra.Command, args []string) error { - return runE(cmd.Context(), logger, streams, args) - }, - } - return cmd -} - -// runE contains the actual command logic -func runE(ctx context.Context, logger *log.Logger, streams IOStreams, args []string) error { - // 1. Parse arguments - // 2. Validate input - // 3. Call business logic (cluster.Manager, build.Builder) - // 4. Format output - return nil -} -``` - ---- +- `pkg/cmd` handles CLI interaction only. +- Delegate business logic to `pkg/cluster`, `pkg/build`, and related packages. -## Dependency Injection +## Package layout -### Logger Injection +- `iostreams.go` — standard input/output abstraction. +- `logger.go` — logger setup. +- `hind/` — root command + subcommands. -**Production usage**: -```go -logLevel := cmd.GetLogLevelFromEnv() // Reads HIND_LOGLEVEL -logger := cmd.NewLogger(logLevel, "text") -``` - -**Test usage**: -```go -logger := &log.Logger{ - Handler: discard.New(), // No-op handler for tests - Level: log.ErrorLevel, -} -``` - -**Logger levels**: DebugLevel (verbose), InfoLevel, WarnLevel, ErrorLevel +## Command structure -### IOStreams Injection +- Standard constructor for command packages under `pkg/cmd/hind/*`: -**IOStreams type** (`pkg/cmd/iostreams.go`): ```go -type IOStreams struct { - In io.Reader // stdin - Out io.Writer // stdout - program output - ErrOut io.Writer // stderr - status messages -} +func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command ``` -**Production**: `streams := cmd.StandardIOStreams()` -**Test**: Capture with `bytes.Buffer` for output verification - ---- - -## IO Guidelines +- Keep constructor focused on wiring flags/args. +- Put command behavior in `runE(...)` for testability. +- Use Cobra arg validators (`cobra.NoArgs`, `cobra.ExactArgs`, etc.). -### Stream Usage +## Flags -- **streams.Out** - Program output (parseable, machine-readable) - - Structured data, tables, JSON - - Content users might pipe or parse +- Use local vars for a few flags. +- Use a `flagpole` struct when a command has many flags (typically 4+). +- Prefer stable defaults and explicit flag descriptions. -- **streams.ErrOut** - Status messages (human-readable) - - Progress updates, completion messages - - Warnings that don't fail the command +## IO and logging rules -- **logger** - Internal logging (controlled by log level) - - Debug information (requires --verbose) - - Info/Warn for internal state - - Never use logger.Error() in commands - return errors instead +- `streams.Out`: program output users may parse. +- `streams.ErrOut`: status/progress messages. +- `logger`: debug/internal logs. +- Return errors from command logic; do not report command failures via `logger.Error()`. +- Avoid direct `fmt.Println` / `os.Stdout` / `os.Stderr` in commands. -### Output Rules +## Error handling -**DO:** -- ✅ Use `streams.Out` for data users might pipe or parse -- ✅ Use `streams.ErrOut` for status messages -- ✅ Use `logger` for debug/verbose information -- ✅ Use tabwriter for aligned columns +- Wrap errors with context using `%w`. +- Include useful identifiers (cluster name, operation) in user-facing error context. -**DON'T:** -- ❌ Use `fmt.Println()` or `fmt.Printf()` directly -- ❌ Mix program output and status messages on stdout -- ❌ Use `logger.Error()` to report command failures - return errors -- ❌ Write to `os.Stdout` or `os.Stderr` directly +## Active cluster behavior ---- - -## Flag Management - -### Flagpole Pattern (4+ flags) - -```go -type flagpole struct { - hindVersion string - timeout time.Duration - clients int -} - -func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command { - flags := &flagpole{} - - cmd := &cobra.Command{ - Use: "start [cluster-name]", - Short: "Start or create a hind cluster", - RunE: func(cmd *cobra.Command, args []string) error { - return runE(cmd, cmd.Context(), logger, streams, flags, args) - }, - } - - cmd.Flags().StringVar(&flags.hindVersion, "version", "latest", "Hind version to use") - cmd.Flags().DurationVar(&flags.timeout, "timeout", DefaultStartTimeout, "Timeout for startup") - cmd.Flags().IntVar(&flags.clients, "clients", 1, "Number of client nodes") - - return cmd -} -``` - -### Simple Pattern (0-3 flags) - -```go -func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command { - var timeout time.Duration - - cmd := &cobra.Command{ - RunE: func(cmd *cobra.Command, args []string) error { - return runE(cmd.Context(), logger, streams, timeout) - }, - } - - cmd.Flags().DurationVar(&timeout, "timeout", DefaultTimeout, "Operation timeout") - return cmd -} -``` - -### Flag Conventions - -- Use lowercase with hyphens: `--cluster-name`, `--timeout` -- Always provide sensible defaults -- Use constants for default values: `DefaultStartTimeout` -- Check explicit vs default with `cmd.Flags().Changed("flag-name")` - ---- - -## Error Handling - -### Error Wrapping - -**Always wrap business logic errors** with user-facing context: - -```go -func runE(cmd *cobra.Command, ctx context.Context, logger *log.Logger, - streams IOStreams, args []string) error { - mgr, err := cluster.New(logger, clusterName) - if err != nil { - return fmt.Errorf("failed to initialize cluster manager: %w", err) - } - - if err := mgr.Start(ctx); err != nil { - return fmt.Errorf("failed to start cluster %q: %w", clusterName, err) - } - - return nil -} -``` - -### Typed Errors - -```go -import "errors" - -func runE(...) error { - if err := mgr.Delete(ctx); err != nil { - var notFoundErr *cluster.NotFoundError - if errors.As(err, ¬FoundErr) { - logger.Warnf("Cluster %q not found, nothing to delete", notFoundErr.Name) - return nil - } - return fmt.Errorf("failed to delete cluster: %w", err) - } - return nil -} -``` +- For optional cluster args, prefer: + 1. explicit arg, + 2. active cluster from `cluster.GetActiveCluster()`, + 3. fallback default. +- Commands that select/create a cluster should set active cluster when appropriate. +- Removing the active cluster should clear it. -### Error Conventions +## Testing essentials -- ✅ Wrap errors with `fmt.Errorf("context: %w", err)` -- ✅ Include relevant identifiers in messages (cluster names, etc.) -- ✅ Return errors from runE - let app layer format them -- ❌ Don't log errors with logger.Error() - return them instead -- ❌ Don't panic or call os.Exit() in command code +- Prefer table-driven tests. +- Use `t.Parallel()` only when safe. +- In parallel table tests, capture range var (`tt := tt`). +- Verify output by injecting `cmd.IOStreams` with `bytes.Buffer`. ---- +## Canonical examples -## Testing Patterns - -### Command Construction Tests - -```go -func TestNewCommand(t *testing.T) { - logger := &log.Logger{Handler: discard.New(), Level: log.ErrorLevel} - streams := IOStreams{In: strings.NewReader(""), Out: io.Discard, ErrOut: io.Discard} - - cmd := NewCommand(logger, streams) - - if cmd == nil { - t.Fatal("NewCommand() returned nil") - } - - if cmd.Use != "list" { - t.Errorf("Expected Use to be 'list', got '%s'", cmd.Use) - } -} -``` - -### Table-Driven Tests - -```go -func TestCommandArgs(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - args []string - wantError bool - }{ - {"no args is valid", []string{}, false}, - {"one arg is valid", []string{"dev"}, false}, - {"two args is invalid", []string{"dev", "extra"}, true}, - } - - for _, tt := range tests { - tt := tt // Capture range variable - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - // Test implementation - }) - } -} -``` - -### Output Verification - -```go -func TestListCommand_Output(t *testing.T) { - var buf bytes.Buffer - streams := IOStreams{Out: &buf, ErrOut: io.Discard} - - cmd := NewCommand(logger, streams) - cmd.SetArgs([]string{}) - - if err := cmd.Execute(); err != nil { - t.Fatalf("Command execution failed: %v", err) - } - - output := buf.String() - if !strings.Contains(output, "NAME") { - t.Error("Expected header 'NAME' in output") - } -} -``` - -### Parallel Tests - -Use `t.Parallel()` for faster test runs: -- ✅ Pure logic tests (no shared state) -- ✅ Tests that don't modify environment variables -- ❌ Tests that modify global state -- ❌ Integration tests with real Docker operations - -**CRITICAL**: Always capture range variables in parallel tests: -```go -for _, tt := range tests { - tt := tt // Required for parallel tests - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - // Test logic - }) -} -``` - ---- - -## Active Cluster Management - -### Pattern - -```go -func runE(cmd *cobra.Command, ctx context.Context, logger *log.Logger, - streams IOStreams, args []string) error { - var clusterName string - - if len(args) > 0 { - clusterName = args[0] - } else { - activeCluster, err := cluster.GetActiveCluster() - if err != nil { - logger.Debugf("Failed to get active cluster: %v", err) - } - - if activeCluster == "" { - clusterName = "default" - logger.Debugf("No active cluster, using default") - } else { - clusterName = activeCluster - logger.Debugf("Using active cluster: %s", clusterName) - } - } - - // ... rest of command logic -} -``` - -### Setting Active Cluster - -Commands that create/start clusters should set the active cluster: -```go -if err := cluster.SetActiveCluster(clusterName); err != nil { - logger.Warnf("Failed to set active cluster: %v", err) -} -``` - -### Clearing Active Cluster - -Commands that delete clusters should clear if deleting the active cluster: -```go -activeCluster, _ := cluster.GetActiveCluster() -if activeCluster == clusterName { - if err := cluster.ClearActiveCluster(); err != nil { - logger.Warnf("Failed to clear active cluster: %v", err) - } -} -``` - ---- - -## Implementation Checklist - -When implementing a new command: - -- [ ] **Signature**: `func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command` -- [ ] **Flagpole**: Use flagpole struct if command has 4+ flags -- [ ] **RunE separation**: Extract logic to separate `runE()` function -- [ ] **IO streams**: All output through `streams.Out` or `streams.ErrOut` -- [ ] **Error wrapping**: Wrap business logic errors with context -- [ ] **Active cluster**: Handle active cluster logic (if applicable) -- [ ] **Args validation**: Set appropriate `Args` validator (`NoArgs`, `ExactArgs`, etc.) -- [ ] **Documentation**: Provide clear `Short` and `Long` descriptions -- [ ] **Test file**: Create `_test.go` with table-driven tests -- [ ] **Output tests**: Verify output format using buffer streams -- [ ] **Flag tests**: Verify flags exist and have correct defaults -- [ ] **Parallel tests**: Add `t.Parallel()` where safe, capture range variables - ---- - -## Reference Examples - -For complete working examples, see: -- **Simple command**: [pkg/cmd/hind/list/list.go](../hind/list/list.go) -- **Complex command with flags**: [pkg/cmd/hind/start/start.go](../hind/start/start.go) -- **Group command**: [pkg/cmd/hind/set/set.go](../hind/set/set.go) -- **Testing patterns**: [pkg/cmd/hind/list/list_test.go](../hind/list/list_test.go) - ---- - -## Quick Reference Templates - -### Minimal Command -```go -func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command { - cmd := &cobra.Command{ - Use: "command-name", - Short: "Brief description", - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - return runE(cmd.Context(), logger, streams) - }, - } - return cmd -} - -func runE(ctx context.Context, logger *log.Logger, streams IOStreams) error { - // Implementation - return nil -} -``` - -### Command with Flags -```go -type flagpole struct { - flag1 string - flag2 int -} - -func NewCommand(logger *log.Logger, streams IOStreams) *cobra.Command { - flags := &flagpole{} - - cmd := &cobra.Command{ - Use: "command-name", - Short: "Brief description", - RunE: func(cmd *cobra.Command, args []string) error { - return runE(cmd, cmd.Context(), logger, streams, flags, args) - }, - } - - cmd.Flags().StringVar(&flags.flag1, "flag1", "default", "description") - cmd.Flags().IntVar(&flags.flag2, "flag2", 0, "description") - - return cmd -} -``` +- `pkg/cmd/hind/list/list.go` +- `pkg/cmd/hind/start/start.go` +- `pkg/cmd/hind/set/set.go` +- `pkg/cmd/hind/rm/rm.go` ---- +## Validation -This guide documents hind-specific patterns. For general Go/Cobra best practices, refer to the official documentation. +- Run `make test` after command-layer changes. diff --git a/pkg/cmd/hind/build/build.go b/pkg/cmd/hind/build/build.go index a065b46..05a9d48 100644 --- a/pkg/cmd/hind/build/build.go +++ b/pkg/cmd/hind/build/build.go @@ -13,6 +13,7 @@ import ( "github.com/stenh0use/hind/pkg/build/image" "github.com/stenh0use/hind/pkg/build/release" "github.com/stenh0use/hind/pkg/cmd" + "github.com/stenh0use/hind/pkg/provider/dockercli" ) const ( @@ -62,24 +63,31 @@ func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, flags kinds = []release.ImageKind{release.ImageKind(target)} } + client := dockercli.New(logger) + for _, k := range kinds { - // For single image build, use the specified timeout buildCtx, cancel := context.WithTimeout(ctx, flags.timeout) - defer cancel() + err := func() error { + defer cancel() - logger.WithField("timeout", flags.timeout).Debug("Building image with timeout") - fmt.Fprintf(streams.ErrOut, "Building %s image...\n", k) + logger.WithField("timeout", flags.timeout).Debug("Building image with timeout") + fmt.Fprintf(streams.ErrOut, "Building %s image...\n", k) - builder, err := image.NewBuilder(logger, k) - if err != nil { - return fmt.Errorf("failed to create builder for %s: %w", k, err) - } + builder, err := image.NewBuilder(logger, client, k) + if err != nil { + return fmt.Errorf("failed to create builder for %s: %w", k, err) + } - if err := builder.BuildImage(buildCtx); err != nil { - return fmt.Errorf("failed to build %s image: %w", k, err) - } + if err := builder.BuildImage(buildCtx); err != nil { + return fmt.Errorf("failed to build %s image: %w", k, err) + } - fmt.Fprintf(streams.ErrOut, "Successfully built %s image\n", k) + fmt.Fprintf(streams.ErrOut, "Successfully built %s image\n", k) + return nil + }() + if err != nil { + return err + } } return nil diff --git a/pkg/cmd/hind/get/get.go b/pkg/cmd/hind/get/get.go index 2f8a41c..b43fa2d 100644 --- a/pkg/cmd/hind/get/get.go +++ b/pkg/cmd/hind/get/get.go @@ -3,6 +3,7 @@ package get import ( "context" "fmt" + "strings" "text/tabwriter" "time" @@ -11,11 +12,23 @@ import ( "github.com/stenh0use/hind/pkg/cluster" "github.com/stenh0use/hind/pkg/cmd" + "github.com/stenh0use/hind/pkg/provider" + "github.com/stenh0use/hind/pkg/provider/dockercli" ) // DefaultGetTimeout is the default timeout for getting a cluster const DefaultGetTimeout = 2 * time.Minute +type clusterManager interface { + Get(ctx context.Context) (*cluster.ClusterInfo, error) +} + +type clusterManagerFactory func(logger *log.Logger, name string) (clusterManager, error) + +var newClusterManager clusterManagerFactory = func(logger *log.Logger, name string) (clusterManager, error) { + return cluster.New(logger, name, dockercli.New(logger)) +} + // NewCommand creates the cluster delete command func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command { var timeout time.Duration @@ -42,20 +55,19 @@ func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeou getCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - // Create cluster configuration - cluster, err := cluster.New(logger, clusterName) + manager, err := newClusterManager(logger, clusterName) if err != nil { return fmt.Errorf("failed to create cluster manager: %w", err) } - state, err := cluster.Get(getCtx) + state, err := manager.Get(getCtx) if err != nil { return fmt.Errorf("failed to get cluster: %w", err) } // Print cluster information - fmt.Fprintf(streams.Out, "---\nCluster: %s\n", cluster.Config().Name) - fmt.Fprintf(streams.Out, "Status: created\n") + fmt.Fprintf(streams.Out, "---\nCluster: %s\n", clusterName) + fmt.Fprintf(streams.Out, "Status: %s\n", aggregateStatus(state)) fmt.Fprintf(streams.Out, "Network: %s\n", state.Network.Name) if len(state.Containers) > 0 { @@ -67,11 +79,59 @@ func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeou node.HostName, node.Image, node.Status, - node.Ports, + formatPorts(node.Ports), ) } - w.Flush() + if err := w.Flush(); err != nil { + return fmt.Errorf("failed to flush output: %w", err) + } } return nil } + +func aggregateStatus(state *cluster.ClusterInfo) string { + if len(state.Containers) == 0 { + return provider.NA.String() + } + + hasRunning := false + hasStopped := false + hasError := false + allRunning := true + allStopped := true + + for _, container := range state.Containers { + switch strings.ToLower(container.Status) { + case provider.Running.String(): + hasRunning = true + allStopped = false + case provider.Stopped.String(): + hasStopped = true + allRunning = false + default: + hasError = true + allRunning = false + allStopped = false + } + } + + switch { + case allRunning: + return provider.Running.String() + case allStopped: + return provider.Stopped.String() + case hasError || (hasRunning && hasStopped): + return provider.Error.String() + default: + return provider.Error.String() + } +} + +func formatPorts(ports []string) string { + if len(ports) == 0 { + return "-" + } + + return strings.Join(ports, ", ") +} diff --git a/pkg/cmd/hind/get/get_test.go b/pkg/cmd/hind/get/get_test.go index c6084f6..9366a20 100644 --- a/pkg/cmd/hind/get/get_test.go +++ b/pkg/cmd/hind/get/get_test.go @@ -1,14 +1,19 @@ package get import ( + "bytes" + "context" "io" + "strings" "testing" "time" "github.com/apex/log" "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/cluster" "github.com/stenh0use/hind/pkg/cmd" + "github.com/stenh0use/hind/pkg/provider" ) func TestNewCommand(t *testing.T) { @@ -110,3 +115,122 @@ func TestCommandArgs(t *testing.T) { }) } } + +type stubClusterManager struct { + state *cluster.ClusterInfo + err error +} + +func (s *stubClusterManager) Get(ctx context.Context) (*cluster.ClusterInfo, error) { + if s.err != nil { + return nil, s.err + } + return s.state, nil +} + +func TestRunE_FormatsStatusAndPortsFromRuntimeState(t *testing.T) { + logger := &log.Logger{Handler: discard.New(), Level: log.ErrorLevel} + + var out bytes.Buffer + streams := cmd.IOStreams{Out: &out, ErrOut: io.Discard} + + originalFactory := newClusterManager + newClusterManager = func(logger *log.Logger, name string) (clusterManager, error) { + return &stubClusterManager{state: &cluster.ClusterInfo{ + Network: provider.NetworkInfo{Name: "hind.test"}, + Containers: []provider.ContainerInfo{ + { + HostName: "hind.demo.server.01", + Image: "nomad:latest", + Status: "running", + Ports: []string{"127.0.0.1:4646->4646/tcp", "127.0.0.1:4647->4647/tcp"}, + }, + }, + }}, nil + } + defer func() { newClusterManager = originalFactory }() + + err := runE(context.Background(), logger, streams, time.Second, []string{"demo"}) + if err != nil { + t.Fatalf("runE returned error: %v", err) + } + + output := out.String() + if !strings.Contains(output, "Status: running") { + t.Fatalf("expected running status in output, got: %s", output) + } + if strings.Contains(output, "%!s(") { + t.Fatalf("expected no fmt artifact in output, got: %s", output) + } + if !strings.Contains(output, "127.0.0.1:4646->4646/tcp, 127.0.0.1:4647->4647/tcp") { + t.Fatalf("expected joined ports in output, got: %s", output) + } +} + +func TestAggregateStatus(t *testing.T) { + tests := []struct { + name string + containers []provider.ContainerInfo + expected string + }{ + { + name: "no containers", + expected: provider.NA.String(), + }, + { + name: "all running", + containers: []provider.ContainerInfo{{Status: "running"}, {Status: "running"}}, + expected: provider.Running.String(), + }, + { + name: "all stopped", + containers: []provider.ContainerInfo{{Status: provider.Stopped.String()}, {Status: provider.Stopped.String()}}, + expected: provider.Stopped.String(), + }, + { + name: "exited is unknown without provider normalization", + containers: []provider.ContainerInfo{{Status: "exited"}}, + expected: provider.Error.String(), + }, + { + name: "mixed running and stopped reports error", + containers: []provider.ContainerInfo{{Status: "running"}, {Status: "stopped"}}, + expected: provider.Error.String(), + }, + { + name: "unknown state reports error", + containers: []provider.ContainerInfo{{Status: "restarting"}}, + expected: provider.Error.String(), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + status := aggregateStatus(&cluster.ClusterInfo{Containers: tt.containers}) + if status != tt.expected { + t.Fatalf("expected status %q, got %q", tt.expected, status) + } + }) + } +} + +func TestFormatPorts(t *testing.T) { + tests := []struct { + name string + ports []string + expected string + }{ + {name: "empty ports", ports: nil, expected: "-"}, + {name: "single port", ports: []string{"127.0.0.1:4646->4646/tcp"}, expected: "127.0.0.1:4646->4646/tcp"}, + {name: "multiple ports", ports: []string{"4646/tcp", "4647/tcp"}, expected: "4646/tcp, 4647/tcp"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := formatPorts(tt.ports) + if actual != tt.expected { + t.Fatalf("expected ports %q, got %q", tt.expected, actual) + } + }) + } +} diff --git a/pkg/cmd/hind/list/list.go b/pkg/cmd/hind/list/list.go index 8b1a689..a9a817b 100644 --- a/pkg/cmd/hind/list/list.go +++ b/pkg/cmd/hind/list/list.go @@ -13,6 +13,7 @@ import ( "github.com/stenh0use/hind/pkg/cmd" "github.com/stenh0use/hind/pkg/config" "github.com/stenh0use/hind/pkg/provider" + "github.com/stenh0use/hind/pkg/provider/dockercli" ) // DefaultListTimeout is the default timeout for listing clusters @@ -116,7 +117,7 @@ func getClusterStatus(ctx context.Context, logger *log.Logger, clusterName strin defer cancel() // Create cluster manager - manager, err := cluster.New(logger, clusterName) + manager, err := cluster.New(logger, clusterName, dockercli.New(logger)) if err != nil { return nil, fmt.Errorf("failed to create cluster manager: %w", err) } @@ -132,7 +133,7 @@ func getClusterStatus(ctx context.Context, logger *log.Logger, clusterName strin } // aggregateClusterStatus computes cluster-level status from container statuses -func aggregateClusterStatus(info *provider.ClusterInfo, cfg *config.Cluster) *clusterStatus { +func aggregateClusterStatus(info *cluster.ClusterInfo, cfg *config.Cluster) *clusterStatus { status := &clusterStatus{ TotalNodes: len(cfg.Nodes), } diff --git a/pkg/cmd/hind/list/list_test.go b/pkg/cmd/hind/list/list_test.go index 4170127..35e9136 100644 --- a/pkg/cmd/hind/list/list_test.go +++ b/pkg/cmd/hind/list/list_test.go @@ -1,15 +1,45 @@ package list import ( + "bytes" + "context" + "strings" "testing" "time" + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + + "github.com/stenh0use/hind/pkg/cluster" + "github.com/stenh0use/hind/pkg/cmd" "github.com/stenh0use/hind/pkg/config" "github.com/stenh0use/hind/pkg/provider" ) +func TestRunE_NoClustersOnFirstRunWhenConfigDirMissing(t *testing.T) { + t.Setenv("HOME", t.TempDir()) + + logger := &log.Logger{Handler: discard.New(), Level: log.ErrorLevel} + var stdout bytes.Buffer + var stderr bytes.Buffer + streams := cmd.IOStreams{In: strings.NewReader(""), Out: &stdout, ErrOut: &stderr} + + err := runE(context.Background(), logger, streams, DefaultListTimeout) + if err != nil { + t.Fatalf("runE() returned error on first-run missing config dir: %v", err) + } + + if got := stderr.String(); !strings.Contains(got, "No clusters found") { + t.Fatalf("expected empty-state output in stderr, got %q", got) + } + + if got := stdout.String(); got != "" { + t.Fatalf("expected no stdout table output, got %q", got) + } +} + func TestAggregateClusterStatus_AllRunning(t *testing.T) { - info := &provider.ClusterInfo{ + info := &cluster.ClusterInfo{ Containers: []provider.ContainerInfo{ {Name: "node1", Status: provider.Running.String(), Created: time.Now().Format(time.RFC3339)}, {Name: "node2", Status: provider.Running.String(), Created: time.Now().Format(time.RFC3339)}, @@ -35,7 +65,7 @@ func TestAggregateClusterStatus_AllRunning(t *testing.T) { } func TestAggregateClusterStatus_AllStopped(t *testing.T) { - info := &provider.ClusterInfo{ + info := &cluster.ClusterInfo{ Containers: []provider.ContainerInfo{ {Name: "node1", Status: provider.Stopped.String(), Created: time.Now().Format(time.RFC3339)}, {Name: "node2", Status: provider.Stopped.String(), Created: time.Now().Format(time.RFC3339)}, @@ -57,7 +87,7 @@ func TestAggregateClusterStatus_AllStopped(t *testing.T) { } func TestAggregateClusterStatus_Mixed(t *testing.T) { - info := &provider.ClusterInfo{ + info := &cluster.ClusterInfo{ Containers: []provider.ContainerInfo{ {Name: "node1", Status: provider.Running.String(), Created: time.Now().Format(time.RFC3339)}, {Name: "node2", Status: provider.Stopped.String(), Created: time.Now().Format(time.RFC3339)}, @@ -80,7 +110,7 @@ func TestAggregateClusterStatus_Mixed(t *testing.T) { } func TestAggregateClusterStatus_WithErrors(t *testing.T) { - info := &provider.ClusterInfo{ + info := &cluster.ClusterInfo{ Containers: []provider.ContainerInfo{ {Name: "node1", Status: provider.Running.String(), Created: time.Now().Format(time.RFC3339)}, {Name: "node2", Status: provider.Error.String(), Created: time.Now().Format(time.RFC3339)}, @@ -99,7 +129,7 @@ func TestAggregateClusterStatus_WithErrors(t *testing.T) { } func TestAggregateClusterStatus_NoContainers(t *testing.T) { - info := &provider.ClusterInfo{ + info := &cluster.ClusterInfo{ Containers: []provider.ContainerInfo{}, } @@ -115,7 +145,7 @@ func TestAggregateClusterStatus_NoContainers(t *testing.T) { } func TestAggregateClusterStatus_PartialRunning(t *testing.T) { - info := &provider.ClusterInfo{ + info := &cluster.ClusterInfo{ Containers: []provider.ContainerInfo{ {Name: "node1", Status: provider.Running.String(), Created: time.Now().Format(time.RFC3339)}, {Name: "node2", Status: provider.Running.String(), Created: time.Now().Format(time.RFC3339)}, @@ -235,7 +265,7 @@ func TestAggregateClusterStatus_OldestCreationTime(t *testing.T) { middle := time.Now().Add(-24 * time.Hour) newest := time.Now().Add(-1 * time.Hour) - info := &provider.ClusterInfo{ + info := &cluster.ClusterInfo{ Containers: []provider.ContainerInfo{ {Name: "node1", Status: provider.Running.String(), Created: newest.Format(time.RFC3339)}, {Name: "node2", Status: provider.Running.String(), Created: oldest.Format(time.RFC3339)}, @@ -255,8 +285,46 @@ func TestAggregateClusterStatus_OldestCreationTime(t *testing.T) { } } +func TestAggregateClusterStatus_StoppedStatusComesFromProvider(t *testing.T) { + info := &cluster.ClusterInfo{ + Containers: []provider.ContainerInfo{ + {Name: "node1", Status: provider.Stopped.String(), Created: time.Now().Format(time.RFC3339)}, + {Name: "node2", Status: provider.Stopped.String(), Created: time.Now().Format(time.RFC3339)}, + }, + } + + cfg := &config.Cluster{ + Nodes: []config.Node{{}, {}}, + } + + result := aggregateClusterStatus(info, cfg) + + if result.Status != "stopped" { + t.Errorf("Expected status 'stopped' for stopped containers, got '%s'", result.Status) + } +} + +func TestAggregateClusterStatus_ExitedStatusWithoutNormalizationIsPartial(t *testing.T) { + info := &cluster.ClusterInfo{ + Containers: []provider.ContainerInfo{ + {Name: "node1", Status: "exited", Created: time.Now().Format(time.RFC3339)}, + {Name: "node2", Status: "exited", Created: time.Now().Format(time.RFC3339)}, + }, + } + + cfg := &config.Cluster{ + Nodes: []config.Node{{}, {}}, + } + + result := aggregateClusterStatus(info, cfg) + + if result.Status != "partial" { + t.Errorf("Expected status 'partial' without provider normalization, got '%s'", result.Status) + } +} + func TestAggregateClusterStatus_InvalidCreationTime(t *testing.T) { - info := &provider.ClusterInfo{ + info := &cluster.ClusterInfo{ Containers: []provider.ContainerInfo{ {Name: "node1", Status: provider.Running.String(), Created: "invalid-time"}, }, diff --git a/pkg/cmd/hind/releases/releases.go b/pkg/cmd/hind/releases/releases.go new file mode 100644 index 0000000..7292007 --- /dev/null +++ b/pkg/cmd/hind/releases/releases.go @@ -0,0 +1,68 @@ +// Package releases implements the "hind releases" command. +// It lists all available hind releases and the HashiCorp component versions +// each release includes, rendered as a tab-aligned table. +package releases + +import ( + "context" + "fmt" + "sort" + "text/tabwriter" + + "github.com/apex/log" + "github.com/spf13/cobra" + + "github.com/stenh0use/hind/pkg/build/release" + "github.com/stenh0use/hind/pkg/cmd" +) + +// NewCommand returns a cobra.Command that prints the hind releases table. +func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command { + command := &cobra.Command{ + Use: "releases", + Short: "List available hind releases", + Long: "List all available hind releases and the HashiCorp component versions they include.", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + return runE(cmd.Context(), logger, streams) + }, + } + + return command +} + +// runE fetches the release list, sorts descending by hind version, and writes +// a tabwriter-aligned table to streams.Out. +// +// Column order: HIND, CONSUL, NOMAD, VAULT (hind first; remaining in +// alphabetical order as required by hind-releases.feature). +// +// NOTE: Sorting uses lexicographic descending order which is correct for the +// current release set (MAJOR.MINOR.PATCH with no ambiguous zero-padding). +// TODO: switch to golang.org/x/mod/semver when the release count grows. +func runE(_ context.Context, _ *log.Logger, streams cmd.IOStreams) error { + versions := release.List() + if len(versions) == 0 { + fmt.Fprintln(streams.ErrOut, "No releases found") + return nil + } + + // Sort descending so that the latest version appears on the first row. + sort.Slice(versions, func(i, j int) bool { + return versions[i] > versions[j] + }) + + w := tabwriter.NewWriter(streams.Out, 0, 0, 3, ' ', 0) + fmt.Fprintln(w, "HIND\tCONSUL\tNOMAD\tVAULT") + + for _, v := range versions { + info, err := release.Get(v) + if err != nil { + // Skip unknown entries; this should not happen with the built-in store. + continue + } + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", info.Hind, info.Consul, info.Nomad, info.Vault) + } + + return w.Flush() +} diff --git a/pkg/cmd/hind/releases/releases_test.go b/pkg/cmd/hind/releases/releases_test.go new file mode 100644 index 0000000..85c223d --- /dev/null +++ b/pkg/cmd/hind/releases/releases_test.go @@ -0,0 +1,151 @@ +package releases + +import ( + "bytes" + "context" + "io" + "strings" + "testing" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + + "github.com/stenh0use/hind/pkg/build/release" + "github.com/stenh0use/hind/pkg/cmd" +) + +// testStreams returns a logger and IOStreams whose stdout is captured in the +// returned buffer. +func testStreams() (*log.Logger, cmd.IOStreams, *bytes.Buffer) { + logger := &log.Logger{Handler: discard.New(), Level: log.ErrorLevel} + var buf bytes.Buffer + streams := cmd.IOStreams{ + In: strings.NewReader(""), + Out: &buf, + ErrOut: io.Discard, + } + return logger, streams, &buf +} + +// TestRunE_HeaderRow asserts that the first output line contains the four +// required column labels and that they appear in alphabetical order after HIND. +func TestRunE_HeaderRow(t *testing.T) { + t.Parallel() + + logger, streams, buf := testStreams() + + if err := runE(context.Background(), logger, streams); err != nil { + t.Fatalf("runE() returned unexpected error: %v", err) + } + + lines := strings.Split(strings.TrimSpace(buf.String()), "\n") + if len(lines) == 0 { + t.Fatal("runE() produced no output") + } + + header := lines[0] + for _, col := range []string{"HIND", "CONSUL", "NOMAD", "VAULT"} { + if !strings.Contains(header, col) { + t.Errorf("header row missing column %q; got: %q", col, header) + } + } + + // Confirm alphabetical column order after HIND: + // CONSUL must precede NOMAD, NOMAD must precede VAULT. + idxConsul := strings.Index(header, "CONSUL") + idxNomad := strings.Index(header, "NOMAD") + idxVault := strings.Index(header, "VAULT") + + if idxConsul >= idxNomad { + t.Errorf("expected CONSUL before NOMAD in header; got: %q", header) + } + if idxNomad >= idxVault { + t.Errorf("expected NOMAD before VAULT in header; got: %q", header) + } +} + +// TestRunE_AlphabeticalColumnOrder verifies that HIND is the leftmost column +// (first field in the header row) and that CONSUL < NOMAD < VAULT follow it. +func TestRunE_AlphabeticalColumnOrder(t *testing.T) { + t.Parallel() + + logger, streams, buf := testStreams() + + if err := runE(context.Background(), logger, streams); err != nil { + t.Fatalf("runE() returned unexpected error: %v", err) + } + + lines := strings.Split(strings.TrimSpace(buf.String()), "\n") + if len(lines) == 0 { + t.Fatal("runE() produced no output") + } + + fields := strings.Fields(lines[0]) + if len(fields) < 4 { + t.Fatalf("expected at least 4 header fields, got %d: %v", len(fields), fields) + } + + if fields[0] != "HIND" { + t.Errorf("first column must be HIND; got %q", fields[0]) + } + if fields[1] != "CONSUL" { + t.Errorf("second column must be CONSUL; got %q", fields[1]) + } + if fields[2] != "NOMAD" { + t.Errorf("third column must be NOMAD; got %q", fields[2]) + } + if fields[3] != "VAULT" { + t.Errorf("fourth column must be VAULT; got %q", fields[3]) + } +} + +// TestRunE_LatestVersionFirstRow asserts that the first data row (line after +// the header) starts with the latest hind version from release.Latest(). +func TestRunE_LatestVersionFirstRow(t *testing.T) { + t.Parallel() + + logger, streams, buf := testStreams() + + if err := runE(context.Background(), logger, streams); err != nil { + t.Fatalf("runE() returned unexpected error: %v", err) + } + + lines := strings.Split(strings.TrimSpace(buf.String()), "\n") + // lines[0] is the header; lines[1] is the first data row. + if len(lines) < 2 { + t.Fatalf("expected at least a header and one data row, got %d line(s)", len(lines)) + } + + latest := release.Latest().Hind + firstDataRow := lines[1] + fields := strings.Fields(firstDataRow) + if len(fields) < 1 { + t.Fatalf("first data row is empty") + } + + if fields[0] != latest { + t.Errorf("expected first data row to start with latest version %q; got %q", latest, fields[0]) + } +} + +// TestNewCommand_Structure asserts that the command is wired correctly so that +// it is reachable as "hind releases". +func TestNewCommand_Structure(t *testing.T) { + t.Parallel() + + logger, streams, _ := testStreams() + c := NewCommand(logger, streams) + + if c == nil { + t.Fatal("NewCommand() returned nil") + } + if c.Use != "releases" { + t.Errorf("expected Use=%q, got %q", "releases", c.Use) + } + if c.Args == nil { + t.Error("expected Args validator to be set (NoArgs)") + } + if c.RunE == nil { + t.Error("expected RunE to be set") + } +} diff --git a/pkg/cmd/hind/rm/rm.go b/pkg/cmd/hind/rm/rm.go index 0ed2e24..f314e60 100644 --- a/pkg/cmd/hind/rm/rm.go +++ b/pkg/cmd/hind/rm/rm.go @@ -10,11 +10,24 @@ import ( "github.com/stenh0use/hind/pkg/cluster" "github.com/stenh0use/hind/pkg/cmd" + "github.com/stenh0use/hind/pkg/provider/dockercli" ) // DefaultDeleteTimeout is the default timeout for destroying a cluster const DefaultDeleteTimeout = 2 * time.Minute +// clusterDeleter is the minimal interface required by runE to delete a cluster. +// It is satisfied by *cluster.Manager and can be replaced in tests to avoid Docker. +type clusterDeleter interface { + Delete(ctx context.Context) error +} + +// newClusterManagerFn is the factory used to create a clusterDeleter for a given cluster +// name. Tests may replace this variable to inject a stub without a real Docker daemon. +var newClusterManagerFn = func(logger *log.Logger, clusterName string) (clusterDeleter, error) { + return cluster.New(logger, clusterName, dockercli.New(logger)) +} + // NewCommand creates the cluster delete command func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command { var timeout time.Duration @@ -45,7 +58,7 @@ func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeou logger.Debugf("Failed to get active cluster: %v", err) } - // If no cluster name provided, use active cluster or fall back to "default" + // If no cluster name provided, use active cluster or fall back to "default". if clusterName == "" { if activeCluster == "" { clusterName = "default" @@ -59,8 +72,8 @@ func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeou deleteCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - // Create cluster configuration - clusterMgr, err := cluster.New(logger, clusterName) + // Create cluster manager via factory seam (replaceable in tests) + clusterMgr, err := newClusterManagerFn(logger, clusterName) if err != nil { return fmt.Errorf("failed to create cluster manager: %w", err) } @@ -69,7 +82,11 @@ func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeou return fmt.Errorf("failed to delete cluster: %w", err) } - // If the deleted cluster was the active cluster, clear the active cluster setting + // If the deleted cluster was the active cluster, clear the active cluster setting. + // Clearing removes the active file entirely, leaving no active cluster (empty string). + // All command resolution paths treat an empty/missing active cluster as "default", so + // clearing is semantically equivalent to resetting to "default" without writing that + // literal value to disk (which would conflict with a real cluster named "default"). if activeCluster == clusterName { if err := cluster.ClearActiveCluster(); err != nil { logger.Warnf("Failed to clear active cluster: %v", err) diff --git a/pkg/cmd/hind/rm/rm_test.go b/pkg/cmd/hind/rm/rm_test.go index df92291..c49d833 100644 --- a/pkg/cmd/hind/rm/rm_test.go +++ b/pkg/cmd/hind/rm/rm_test.go @@ -1,14 +1,18 @@ package rm import ( + "context" "io" + "os" "testing" "time" "github.com/apex/log" "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/cluster" "github.com/stenh0use/hind/pkg/cmd" + "github.com/stenh0use/hind/pkg/file" ) func TestNewCommand(t *testing.T) { @@ -66,6 +70,63 @@ func TestCommandFlags(t *testing.T) { } } +// stubDeleter is a no-op clusterDeleter used to bypass real Docker calls in tests. +type stubDeleter struct{} + +func (s *stubDeleter) Delete(_ context.Context) error { return nil } + +// TestRunE_ClearsActiveClusterOnDelete verifies that when the cluster being removed +// is the currently active cluster, runE calls ClearActiveCluster so that subsequent +// commands fall back to the "default" cluster resolution path. +func TestRunE_ClearsActiveClusterOnDelete(t *testing.T) { + // Redirect HOME so cluster state is isolated to this test. + tmpDir := t.TempDir() + oldHome := os.Getenv("HOME") + os.Setenv("HOME", tmpDir) + defer os.Setenv("HOME", oldHome) + + const clusterName = "my-cluster" + + // Pre-create the cluster directory so SetActiveCluster accepts the name. + fm, err := file.NewFromHomeDir(cluster.DefaultConfigParentDir, cluster.DefaultConfigName) + if err != nil { + t.Fatalf("NewFromHomeDir: %v", err) + } + clusterDir := file.JoinPath(cluster.ClusterConfigDir, clusterName) + if err := fm.EnsureDir(clusterDir); err != nil { + t.Fatalf("EnsureDir: %v", err) + } + + // Set the cluster as active. + if err := cluster.SetActiveCluster(clusterName); err != nil { + t.Fatalf("SetActiveCluster: %v", err) + } + + // Replace the factory with a stub so Delete() succeeds without Docker. + orig := newClusterManagerFn + newClusterManagerFn = func(_ *log.Logger, _ string) (clusterDeleter, error) { + return &stubDeleter{}, nil + } + defer func() { newClusterManagerFn = orig }() + + logger := &log.Logger{Handler: discard.New(), Level: log.ErrorLevel} + streams := cmd.IOStreams{Out: io.Discard, ErrOut: io.Discard} + + if err := runE(context.Background(), logger, streams, DefaultDeleteTimeout, clusterName); err != nil { + t.Fatalf("runE returned unexpected error: %v", err) + } + + // After deletion of the active cluster the active cluster file must be cleared, + // yielding an empty string from GetActiveCluster (the canonical "no active cluster" state). + active, err := cluster.GetActiveCluster() + if err != nil { + t.Fatalf("GetActiveCluster: %v", err) + } + if active != "" { + t.Errorf("expected active cluster to be cleared (empty string), got %q", active) + } +} + func TestCommandArgs(t *testing.T) { logger := &log.Logger{ Handler: discard.New(), diff --git a/pkg/cmd/hind/root.go b/pkg/cmd/hind/root.go index 4ddec66..05d0471 100644 --- a/pkg/cmd/hind/root.go +++ b/pkg/cmd/hind/root.go @@ -10,6 +10,7 @@ import ( "github.com/stenh0use/hind/pkg/cmd/hind/build" "github.com/stenh0use/hind/pkg/cmd/hind/get" "github.com/stenh0use/hind/pkg/cmd/hind/list" + "github.com/stenh0use/hind/pkg/cmd/hind/releases" "github.com/stenh0use/hind/pkg/cmd/hind/rm" "github.com/stenh0use/hind/pkg/cmd/hind/set" "github.com/stenh0use/hind/pkg/cmd/hind/start" @@ -40,6 +41,7 @@ func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command { rootCmd.AddCommand(build.NewCommand(logger, streams)) rootCmd.AddCommand(get.NewCommand(logger, streams)) rootCmd.AddCommand(list.NewCommand(logger, streams)) + rootCmd.AddCommand(releases.NewCommand(logger, streams)) rootCmd.AddCommand(rm.NewCommand(logger, streams)) rootCmd.AddCommand(set.NewCommand(logger, streams)) rootCmd.AddCommand(start.NewCommand(logger, streams)) diff --git a/pkg/cmd/hind/set/set_test.go b/pkg/cmd/hind/set/set_test.go index 60f5d07..d8e8bce 100644 --- a/pkg/cmd/hind/set/set_test.go +++ b/pkg/cmd/hind/set/set_test.go @@ -3,6 +3,7 @@ package set import ( "io" "os" + "strings" "testing" "github.com/apex/log" @@ -79,14 +80,26 @@ func TestSetProfileCommand_NonExistentCluster(t *testing.T) { // Create command command := NewCommand(logger, streams) - // Set args to non-existent cluster - command.SetArgs([]string{"profile", "non-existent-cluster"}) + clusterName := "non-existent-cluster" + command.SetArgs([]string{"profile", clusterName}) // Execute command - should fail err := command.Execute() if err == nil { t.Fatal("Expected error when setting non-existent cluster as active, got nil") } + + // Assert exact-message contract: the error must identify the cluster and state it does not exist. + // SetActiveCluster returns "cluster '' does not exist"; the command wraps it with + // "failed to set active cluster: ...". Both the cluster name and "does not exist" must + // appear in the final user-visible error message so the user knows which profile is missing. + errMsg := err.Error() + if !strings.Contains(errMsg, clusterName) { + t.Errorf("error message %q does not contain cluster name %q", errMsg, clusterName) + } + if !strings.Contains(errMsg, "does not exist") { + t.Errorf("error message %q does not contain 'does not exist'", errMsg) + } } func TestSetProfileCommand_NoArgs(t *testing.T) { diff --git a/pkg/cmd/hind/start/start.go b/pkg/cmd/hind/start/start.go index 3f14a13..b9d2905 100644 --- a/pkg/cmd/hind/start/start.go +++ b/pkg/cmd/hind/start/start.go @@ -10,17 +10,33 @@ import ( "github.com/stenh0use/hind/pkg/cluster" "github.com/stenh0use/hind/pkg/cmd" + "github.com/stenh0use/hind/pkg/provider/dockercli" ) // DefaultStartTimeout is the default timeout for starting a cluster const DefaultStartTimeout = 5 * time.Minute +// clusterStarter is the minimal interface required by runE to start a cluster. +// It is satisfied by *cluster.Manager and can be replaced in tests to avoid Docker. +type clusterStarter interface { + ConfigFileExists() bool + SetClientCount(ctx context.Context, count int) error + Start(ctx context.Context) (cluster.StartResult, error) + CountClientNodes() int + Scale(ctx context.Context, targetClientCount int) error +} + +// newStartManagerFn is the factory used to create a clusterStarter for a given cluster +// name. Tests may replace this variable to inject a stub without a real Docker daemon. +var newStartManagerFn = func(logger *log.Logger, clusterName string) (clusterStarter, error) { + return cluster.New(logger, clusterName, dockercli.New(logger)) +} + // flagpole holds all flags for the start command type flagpole struct { - hindVersion string - timeout time.Duration - clients int - verbose bool + timeout time.Duration + clients int + verbose bool } // NewCommand creates the cluster start command @@ -37,7 +53,6 @@ func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command { }, } - command.Flags().StringVar(&flags.hindVersion, "version", "latest", "Hind image version to use") command.Flags().DurationVar(&flags.timeout, "timeout", DefaultStartTimeout, "Timeout for starting the cluster") command.Flags().IntVar(&flags.clients, "clients", 1, "Number of client nodes to create") command.Flags().BoolVar(&flags.verbose, "verbose", false, "Enable verbose output") @@ -45,6 +60,10 @@ func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command { return command } +// checkDockerDaemonFn is the function used to verify Docker daemon accessibility. +// Tests may replace this variable to bypass the real Docker check. +var checkDockerDaemonFn = checkDockerDaemon + func runE(cmd *cobra.Command, ctx context.Context, logger *log.Logger, streams cmd.IOStreams, flags *flagpole, args []string) error { // Get cluster name from args or use default var clusterName string @@ -75,14 +94,14 @@ func runE(cmd *cobra.Command, ctx context.Context, logger *log.Logger, streams c startCtx, cancel := context.WithTimeout(ctx, flags.timeout) defer cancel() - // Check if Docker daemon is accessible first + // Check if Docker daemon is accessible first (replaceable via checkDockerDaemonFn in tests) logger.Debug("Checking Docker daemon accessibility") - if err := checkDockerDaemon(startCtx, logger); err != nil { + if err := checkDockerDaemonFn(startCtx, logger); err != nil { return fmt.Errorf("Docker daemon is not accessible: %w", err) } - // Create cluster manager - mgr, err := cluster.New(logger, clusterName) + // Create cluster manager via factory seam (replaceable in tests) + mgr, err := newStartManagerFn(logger, clusterName) if err != nil { return fmt.Errorf("failed to create cluster manager: %w", err) } @@ -129,7 +148,7 @@ func runE(cmd *cobra.Command, ctx context.Context, logger *log.Logger, streams c func checkDockerDaemon(ctx context.Context, logger *log.Logger) error { // Create a temporary manager to test Docker connectivity // This is a lightweight check before we do any real work - tempMgr, err := cluster.New(logger, "temp-check") + tempMgr, err := cluster.New(logger, "temp-check", dockercli.New(logger)) if err != nil { return err } diff --git a/pkg/cmd/hind/start/start_test.go b/pkg/cmd/hind/start/start_test.go index 4a59914..b075b46 100644 --- a/pkg/cmd/hind/start/start_test.go +++ b/pkg/cmd/hind/start/start_test.go @@ -1,42 +1,203 @@ package start import ( + "context" + "io" + "os" "testing" + "time" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + + "github.com/stenh0use/hind/pkg/cluster" + "github.com/stenh0use/hind/pkg/cmd" + "github.com/stenh0use/hind/pkg/file" ) +// stubStartManager is a no-op clusterStarter used to bypass real Docker calls in tests. +type stubStartManager struct { + startResult cluster.StartResult +} + +func (s *stubStartManager) ConfigFileExists() bool { return false } +func (s *stubStartManager) SetClientCount(_ context.Context, _ int) error { return nil } +func (s *stubStartManager) Start(_ context.Context) (cluster.StartResult, error) { + return s.startResult, nil +} +func (s *stubStartManager) CountClientNodes() int { return 1 } +func (s *stubStartManager) Scale(_ context.Context, _ int) error { return nil } + +func TestNewCommand(t *testing.T) { + logger := &log.Logger{ + Handler: discard.New(), + Level: log.ErrorLevel, + } + streams := cmd.IOStreams{ + Out: io.Discard, + ErrOut: io.Discard, + } + + command := NewCommand(logger, streams) + + if command == nil { + t.Fatal("NewCommand() returned nil") + } + + if command.Use != "start [cluster-name]" { + t.Errorf("Expected Use to be 'start [cluster-name]', got '%s'", command.Use) + } + + if command.Short != "Start or create a hind cluster" { + t.Errorf("Expected Short to be 'Start or create a hind cluster', got '%s'", command.Short) + } +} + +func TestDefaultTimeout(t *testing.T) { + expected := 5 * time.Minute + if DefaultStartTimeout != expected { + t.Errorf("Expected DefaultStartTimeout to be %v, got %v", expected, DefaultStartTimeout) + } +} + +func TestCommandFlags(t *testing.T) { + logger := &log.Logger{ + Handler: discard.New(), + Level: log.ErrorLevel, + } + streams := cmd.IOStreams{ + Out: io.Discard, + ErrOut: io.Discard, + } + + command := NewCommand(logger, streams) + + timeoutFlag := command.Flags().Lookup("timeout") + if timeoutFlag == nil { + t.Fatal("Expected 'timeout' flag to exist") + } + + if timeoutFlag.DefValue != "5m0s" { + t.Errorf("Expected timeout default value to be '5m0s', got '%s'", timeoutFlag.DefValue) + } + + clientsFlag := command.Flags().Lookup("clients") + if clientsFlag == nil { + t.Fatal("Expected 'clients' flag to exist") + } + + if clientsFlag.DefValue != "1" { + t.Errorf("Expected clients default value to be '1', got '%s'", clientsFlag.DefValue) + } + + verboseFlag := command.Flags().Lookup("verbose") + if verboseFlag == nil { + t.Fatal("Expected 'verbose' flag to exist") + } + + if command.Flags().Lookup("version") != nil { + t.Fatal("Expected 'version' flag to be absent") + } +} + +// TestRunE_SetsActiveCluster verifies that after a successful start runE calls +// SetActiveCluster so that the started cluster becomes the active profile. +func TestRunE_SetsActiveCluster(t *testing.T) { + // Redirect HOME so cluster state is isolated to this test. + tmpDir := t.TempDir() + oldHome := os.Getenv("HOME") + os.Setenv("HOME", tmpDir) + defer os.Setenv("HOME", oldHome) + + const clusterName = "test-start-cluster" + + // Pre-create the cluster directory so SetActiveCluster accepts the name. + // runE calls SetActiveCluster after mgr.Start succeeds; SetActiveCluster verifies + // the cluster directory exists before writing the active file. + fm, err := file.NewFromHomeDir(cluster.DefaultConfigParentDir, cluster.DefaultConfigName) + if err != nil { + t.Fatalf("NewFromHomeDir: %v", err) + } + clusterDir := file.JoinPath(cluster.ClusterConfigDir, clusterName) + if err := fm.EnsureDir(clusterDir); err != nil { + t.Fatalf("EnsureDir: %v", err) + } + + // Stub checkDockerDaemonFn so runE does not require a live Docker daemon. + origDockerCheck := checkDockerDaemonFn + checkDockerDaemonFn = func(_ context.Context, _ *log.Logger) error { return nil } + defer func() { checkDockerDaemonFn = origDockerCheck }() + + // Stub newStartManagerFn so mgr.Start() does not require Docker. + origManagerFn := newStartManagerFn + newStartManagerFn = func(_ *log.Logger, _ string) (clusterStarter, error) { + return &stubStartManager{startResult: cluster.StartResultCreated}, nil + } + defer func() { newStartManagerFn = origManagerFn }() + + logger := &log.Logger{Handler: discard.New(), Level: log.ErrorLevel} + streams := cmd.IOStreams{Out: io.Discard, ErrOut: io.Discard} + flags := &flagpole{ + timeout: DefaultStartTimeout, + clients: 1, + } + + // Build a minimal cobra command to satisfy the cmd parameter expected by runE. + cobraCmd := NewCommand(logger, streams) + + if err := runE(cobraCmd, context.Background(), logger, streams, flags, []string{clusterName}); err != nil { + t.Fatalf("runE returned unexpected error: %v", err) + } + + // After a successful start the active cluster must be set to the started cluster name. + active, err := cluster.GetActiveCluster() + if err != nil { + t.Fatalf("GetActiveCluster: %v", err) + } + if active != clusterName { + t.Errorf("expected active cluster %q, got %q", clusterName, active) + } +} + func TestClusterNameExtraction(t *testing.T) { + logger := &log.Logger{ + Handler: discard.New(), + Level: log.ErrorLevel, + } + streams := cmd.IOStreams{ + Out: io.Discard, + ErrOut: io.Discard, + } + tests := []struct { - name string - args []string - expected string + name string + args []string + wantError bool }{ { - name: "no args uses default", - args: []string{}, - expected: "default", + name: "no args", + args: []string{}, + wantError: false, }, { - name: "single arg uses cluster name", - args: []string{"dev"}, - expected: "dev", + name: "one arg", + args: []string{"test-cluster"}, + wantError: false, }, { - name: "custom cluster name", - args: []string{"my-test-cluster"}, - expected: "my-test-cluster", + name: "too many args", + args: []string{"cluster1", "cluster2"}, + wantError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Simulate the cluster name extraction logic - clusterName := "default" - if len(tt.args) > 0 { - clusterName = tt.args[0] - } - - if clusterName != tt.expected { - t.Errorf("expected cluster name %q, got %q", tt.expected, clusterName) + command := NewCommand(logger, streams) + command.SetArgs(tt.args) + err := command.Args(command, tt.args) + if (err != nil) != tt.wantError { + t.Errorf("Args validation error = %v, wantError %v", err, tt.wantError) } }) } diff --git a/pkg/cmd/hind/stop/stop.go b/pkg/cmd/hind/stop/stop.go index 57144fa..95024e5 100644 --- a/pkg/cmd/hind/stop/stop.go +++ b/pkg/cmd/hind/stop/stop.go @@ -10,14 +10,27 @@ import ( "github.com/stenh0use/hind/pkg/cluster" "github.com/stenh0use/hind/pkg/cmd" + "github.com/stenh0use/hind/pkg/provider/dockercli" ) +type clusterStopper interface { + ConfigFileExists() bool + StopWithOptions(ctx context.Context, opts cluster.StopOptions) (cluster.StopResult, error) +} + +var getActiveClusterFn = cluster.GetActiveCluster +var newClusterManagerFn = func(logger *log.Logger, clusterName string) (clusterStopper, error) { + return cluster.New(logger, clusterName, dockercli.New(logger)) +} + // DefaultStopTimeout is the default timeout for stopping a cluster const DefaultStopTimeout = 30 * time.Second // NewCommand creates the cluster stop command func NewCommand(logger *log.Logger, streams cmd.IOStreams) *cobra.Command { var timeout time.Duration + var force bool + var verbose bool command := &cobra.Command{ Use: "stop [cluster-name]", @@ -30,18 +43,20 @@ The cluster can be resumed later with 'hind start'.`, if len(args) > 0 { clusterName = args[0] } - return runE(cmd.Context(), logger, streams, timeout, clusterName) + return runE(cmd.Context(), logger, streams, timeout, force, verbose, clusterName) }, } command.Flags().DurationVar(&timeout, "timeout", DefaultStopTimeout, "Timeout for stopping the cluster") + command.Flags().BoolVar(&force, "force", false, "Force stop running containers immediately") + command.Flags().BoolVar(&verbose, "verbose", false, "Show detailed stop progress") return command } -func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeout time.Duration, clusterName string) error { +func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeout time.Duration, force bool, verbose bool, clusterName string) error { // Get active cluster (for informational purposes only) - activeCluster, err := cluster.GetActiveCluster() + activeCluster, err := getActiveClusterFn() if err != nil { logger.Debugf("Failed to get active cluster: %v", err) } @@ -61,7 +76,7 @@ func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeou defer cancel() // Create cluster manager - clusterMgr, err := cluster.New(logger, clusterName) + clusterMgr, err := newClusterManagerFn(logger, clusterName) if err != nil { return fmt.Errorf("failed to create cluster manager: %w", err) } @@ -71,13 +86,40 @@ func runE(ctx context.Context, logger *log.Logger, streams cmd.IOStreams, timeou return fmt.Errorf("cluster '%s' not found", clusterName) } - // Execute stop operation - if err := clusterMgr.Stop(stopCtx); err != nil { + if verbose { + fmt.Fprintf(streams.ErrOut, "Checking cluster '%s' status\n", clusterName) + } + + result, err := clusterMgr.StopWithOptions(stopCtx, cluster.StopOptions{Force: force, Verbose: verbose}) + if verbose { + for _, line := range result.VerboseLines { + fmt.Fprintf(streams.ErrOut, "%s\n", line) + } + } + if err != nil { return fmt.Errorf("failed to stop cluster: %w", err) } - // Note: Unlike delete, we do NOT modify active cluster setting - // The stopped cluster remains the active cluster for future start commands + for _, name := range result.Failures { + fmt.Fprintf(streams.ErrOut, "Failed to stop container '%s'\n", name) + } + + if result.FailedPreStopCount > 0 { + fmt.Fprintf(streams.ErrOut, "Cluster '%s' stopped (some containers were already failed)\n", clusterName) + return nil + } + if result.AlreadyStopped() { + fmt.Fprintf(streams.ErrOut, "Cluster '%s' is already stopped\n", clusterName) + return nil + } + if force { + fmt.Fprintf(streams.ErrOut, "Cluster '%s' force stopped\n", clusterName) + return nil + } + if result.FailedCount > 0 { + fmt.Fprintf(streams.ErrOut, "Cluster '%s' partially stopped\n", clusterName) + return nil + } fmt.Fprintf(streams.ErrOut, "Cluster '%s' stopped successfully\n", clusterName) return nil diff --git a/pkg/cmd/hind/stop/stop_test.go b/pkg/cmd/hind/stop/stop_test.go index 89e190d..d185fea 100644 --- a/pkg/cmd/hind/stop/stop_test.go +++ b/pkg/cmd/hind/stop/stop_test.go @@ -1,21 +1,196 @@ package stop import ( + "bytes" + "context" + "errors" "io" + "strings" "testing" "time" "github.com/apex/log" "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/cluster" "github.com/stenh0use/hind/pkg/cmd" ) -func TestNewCommand(t *testing.T) { - logger := &log.Logger{ - Handler: discard.New(), - Level: log.ErrorLevel, +func testLogger() *log.Logger { + return &log.Logger{Handler: discard.New(), Level: log.ErrorLevel} +} + +type fakeStopManager struct { + configExists bool + result cluster.StopResult + err error + receivedOpts cluster.StopOptions +} + +func (f *fakeStopManager) ConfigFileExists() bool { + return f.configExists +} + +func (f *fakeStopManager) StopWithOptions(_ context.Context, opts cluster.StopOptions) (cluster.StopResult, error) { + f.receivedOpts = opts + if f.err != nil { + return cluster.StopResult{}, f.err + } + return f.result, nil +} + +func withRunEStubs(t *testing.T, active string, build func() clusterStopper) { + t.Helper() + oldActive := getActiveClusterFn + oldNewMgr := newClusterManagerFn + getActiveClusterFn = func() (string, error) { return active, nil } + newClusterManagerFn = func(_ *log.Logger, _ string) (clusterStopper, error) { return build(), nil } + t.Cleanup(func() { + getActiveClusterFn = oldActive + newClusterManagerFn = oldNewMgr + }) +} + +func TestRunEMessageContracts(t *testing.T) { + tests := []struct { + name string + force bool + verbose bool + result cluster.StopResult + wantLines []string + wantForceOpt bool + wantVerboseOpt bool + wantFailContain bool + }{ + { + name: "already stopped", + result: cluster.StopResult{AlreadyStoppedCount: 2}, + wantLines: []string{"Cluster 'default' is already stopped"}, + }, + { + name: "force stopped", + force: true, + result: cluster.StopResult{StoppedCount: 2}, + wantLines: []string{"Cluster 'default' force stopped"}, + wantForceOpt: true, + }, + { + name: "partial failure warnings", + result: cluster.StopResult{StoppedCount: 1, FailedCount: 1, Failures: []string{"n2"}}, + wantLines: []string{"Failed to stop container 'n2'", "Cluster 'default' partially stopped"}, + }, + { + name: "unhealthy pre-failed", + result: cluster.StopResult{AlreadyStoppedCount: 1, FailedPreStopCount: 1}, + wantLines: []string{"Cluster 'default' stopped (some containers were already failed)"}, + }, + { + name: "verbose ordering", + verbose: true, + result: cluster.StopResult{StoppedCount: 1, VerboseLines: []string{ + "Checking container 'n1' status", + "Stopping container 'n1'", + }}, + wantLines: []string{ + "Checking cluster 'default' status", + "Checking container 'n1' status", + "Stopping container 'n1'", + "Cluster 'default' stopped successfully", + }, + wantVerboseOpt: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mgr := &fakeStopManager{configExists: true, result: tt.result} + withRunEStubs(t, "", func() clusterStopper { return mgr }) + errBuf := &bytes.Buffer{} + streams := cmd.IOStreams{Out: io.Discard, ErrOut: errBuf} + + err := runE(context.Background(), testLogger(), streams, DefaultStopTimeout, tt.force, tt.verbose, "") + if err != nil { + t.Fatalf("runE() error = %v", err) + } + + out := strings.TrimSpace(errBuf.String()) + gotLines := []string{} + if out != "" { + gotLines = strings.Split(out, "\n") + } + if len(gotLines) != len(tt.wantLines) { + t.Fatalf("line count=%d want=%d output=%q", len(gotLines), len(tt.wantLines), errBuf.String()) + } + for i := range tt.wantLines { + if gotLines[i] != tt.wantLines[i] { + t.Fatalf("line[%d]=%q want %q", i, gotLines[i], tt.wantLines[i]) + } + } + if mgr.receivedOpts.Force != tt.wantForceOpt { + t.Fatalf("force opt=%v want %v", mgr.receivedOpts.Force, tt.wantForceOpt) + } + if mgr.receivedOpts.Verbose != tt.wantVerboseOpt { + t.Fatalf("verbose opt=%v want %v", mgr.receivedOpts.Verbose, tt.wantVerboseOpt) + } + }) + } +} + +func TestRunEStopError(t *testing.T) { + mgr := &fakeStopManager{configExists: true, err: errors.New("boom")} + withRunEStubs(t, "", func() clusterStopper { return mgr }) + streams := cmd.IOStreams{Out: io.Discard, ErrOut: io.Discard} + + err := runE(context.Background(), testLogger(), streams, DefaultStopTimeout, false, false, "") + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "failed to stop cluster") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestRunEClusterNotFound(t *testing.T) { + mgr := &fakeStopManager{configExists: false} + withRunEStubs(t, "", func() clusterStopper { return mgr }) + streams := cmd.IOStreams{Out: io.Discard, ErrOut: io.Discard} + + err := runE(context.Background(), testLogger(), streams, DefaultStopTimeout, false, false, "") + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "cluster 'default' not found") { + t.Fatalf("unexpected error: %v", err) } +} + +func TestRunEUsesActiveClusterWhenNoArg(t *testing.T) { + mgr := &fakeStopManager{configExists: true, result: cluster.StopResult{AlreadyStoppedCount: 1}} + oldActive := getActiveClusterFn + oldNewMgr := newClusterManagerFn + getActiveClusterFn = func() (string, error) { return "active", nil } + var gotName string + newClusterManagerFn = func(_ *log.Logger, clusterName string) (clusterStopper, error) { + gotName = clusterName + return mgr, nil + } + t.Cleanup(func() { + getActiveClusterFn = oldActive + newClusterManagerFn = oldNewMgr + }) + + streams := cmd.IOStreams{Out: io.Discard, ErrOut: io.Discard} + err := runE(context.Background(), testLogger(), streams, DefaultStopTimeout, false, false, "") + if err != nil { + t.Fatalf("runE() error = %v", err) + } + if gotName != "active" { + t.Fatalf("cluster name=%q want active", gotName) + } +} + +func TestNewCommand(t *testing.T) { + logger := testLogger() streams := cmd.IOStreams{ Out: io.Discard, ErrOut: io.Discard, @@ -44,10 +219,7 @@ func TestDefaultTimeout(t *testing.T) { } func TestCommandFlags(t *testing.T) { - logger := &log.Logger{ - Handler: discard.New(), - Level: log.ErrorLevel, - } + logger := testLogger() streams := cmd.IOStreams{ Out: io.Discard, ErrOut: io.Discard, @@ -55,22 +227,33 @@ func TestCommandFlags(t *testing.T) { command := NewCommand(logger, streams) - // Check if timeout flag exists + // Check flags exist timeoutFlag := command.Flags().Lookup("timeout") if timeoutFlag == nil { t.Fatal("Expected 'timeout' flag to exist") } + forceFlag := command.Flags().Lookup("force") + if forceFlag == nil { + t.Fatal("Expected 'force' flag to exist") + } + verboseFlag := command.Flags().Lookup("verbose") + if verboseFlag == nil { + t.Fatal("Expected 'verbose' flag to exist") + } if timeoutFlag.DefValue != "30s" { t.Errorf("Expected timeout default value to be '30s', got '%s'", timeoutFlag.DefValue) } + if forceFlag.DefValue != "false" { + t.Errorf("Expected force default value to be 'false', got '%s'", forceFlag.DefValue) + } + if verboseFlag.DefValue != "false" { + t.Errorf("Expected verbose default value to be 'false', got '%s'", verboseFlag.DefValue) + } } func TestCommandArgs(t *testing.T) { - logger := &log.Logger{ - Handler: discard.New(), - Level: log.ErrorLevel, - } + logger := testLogger() streams := cmd.IOStreams{ Out: io.Discard, ErrOut: io.Discard, diff --git a/pkg/file/file.go b/pkg/file/file.go index 711d43c..9267317 100644 --- a/pkg/file/file.go +++ b/pkg/file/file.go @@ -36,7 +36,7 @@ func NewFromHomeDir(paths ...string) (*Manager, error) { // New creates a new file manager for the specified root directory func New(rootDir string) (*Manager, error) { // Validate rootDir - if err := validatePath(rootDir); err != nil { + if err := validateRootPath(rootDir); err != nil { return nil, fmt.Errorf("invalid path for rootDir: %w", err) } @@ -60,7 +60,11 @@ func (f *Manager) EnsureDir(path string) error { return fmt.Errorf("invalid path for EnsureDir: %w", err) } - fullPath := f.resolvePath(path) + fullPath, err := f.resolvePath(path) + if err != nil { + return fmt.Errorf("invalid path for EnsureDir: %w", err) + } + if err := os.MkdirAll(fullPath, dirPermissions); err != nil { return fmt.Errorf("failed to create directory %s: %w", fullPath, err) } @@ -73,7 +77,11 @@ func (f *Manager) RemoveDir(path string) error { return fmt.Errorf("invalid path for RemoveDir: %w", err) } - fullPath := f.resolvePath(path) + fullPath, err := f.resolvePath(path) + if err != nil { + return fmt.Errorf("invalid path for RemoveDir: %w", err) + } + if err := os.RemoveAll(fullPath); err != nil { return fmt.Errorf("failed to remove directory %s: %w", fullPath, err) } @@ -86,7 +94,11 @@ func (f *Manager) DirExists(path string) bool { return false } - fullPath := f.resolvePath(path) + fullPath, err := f.resolvePath(path) + if err != nil { + return false + } + info, err := os.Stat(fullPath) if err != nil { return false @@ -100,7 +112,11 @@ func (f *Manager) ListDir(path string) ([]os.DirEntry, error) { return nil, fmt.Errorf("invalid path for ListDir: %w", err) } - fullPath := f.resolvePath(path) + fullPath, err := f.resolvePath(path) + if err != nil { + return nil, fmt.Errorf("invalid path for ListDir: %w", err) + } + entries, err := os.ReadDir(fullPath) if err != nil { return nil, fmt.Errorf("failed to read directory %s: %w", fullPath, err) @@ -120,7 +136,10 @@ func (f *Manager) WriteFile(path string, data []byte) error { return errors.New("data cannot be nil") } - fullPath := f.resolvePath(path) + fullPath, err := f.resolvePath(path) + if err != nil { + return fmt.Errorf("invalid path for WriteFile: %w", err) + } // Ensure parent directory exists parentDir := filepath.Dir(fullPath) @@ -140,7 +159,11 @@ func (f *Manager) ReadFile(path string) ([]byte, error) { return nil, fmt.Errorf("invalid path for ReadFile: %w", err) } - fullPath := f.resolvePath(path) + fullPath, err := f.resolvePath(path) + if err != nil { + return nil, fmt.Errorf("invalid path for ReadFile: %w", err) + } + data, err := os.ReadFile(fullPath) if err != nil { return nil, fmt.Errorf("failed to read file %s: %w", fullPath, err) @@ -154,7 +177,11 @@ func (f *Manager) FileExists(path string) bool { return false } - fullPath := f.resolvePath(path) + fullPath, err := f.resolvePath(path) + if err != nil { + return false + } + info, err := os.Stat(fullPath) if err != nil { return false @@ -171,8 +198,15 @@ func (f *Manager) CopyFile(src, dst string) error { return fmt.Errorf("invalid destination path for CopyFile: %w", err) } - srcPath := f.resolvePath(src) - dstPath := f.resolvePath(dst) + srcPath, err := f.resolvePath(src) + if err != nil { + return fmt.Errorf("invalid source path for CopyFile: %w", err) + } + + dstPath, err := f.resolvePath(dst) + if err != nil { + return fmt.Errorf("invalid destination path for CopyFile: %w", err) + } // Open source file srcFile, err := os.Open(srcPath) @@ -213,7 +247,11 @@ func (f *Manager) RemoveFile(path string) error { return fmt.Errorf("invalid path for RemoveFile: %w", err) } - fullPath := f.resolvePath(path) + fullPath, err := f.resolvePath(path) + if err != nil { + return fmt.Errorf("invalid path for RemoveFile: %w", err) + } + if err := os.Remove(fullPath); err != nil { return fmt.Errorf("failed to remove file %s: %w", fullPath, err) } @@ -227,7 +265,13 @@ func (f *Manager) GetPath(path string) string { if err := validatePath(path); err != nil { return "" } - return f.resolvePath(path) + + fullPath, err := f.resolvePath(path) + if err != nil { + return "" + } + + return fullPath } // GetRootDir returns the root directory @@ -241,30 +285,68 @@ func (f *Manager) Exists(path string) bool { return false } - fullPath := f.resolvePath(path) - _, err := os.Stat(fullPath) + fullPath, err := f.resolvePath(path) + if err != nil { + return false + } + + _, err = os.Stat(fullPath) return err == nil } -// resolvePath resolves a path relative to the root directory -func (f *Manager) resolvePath(path string) string { - if filepath.IsAbs(path) { - return filepath.Clean(path) +// resolvePath resolves a path relative to the root directory and ensures confinement. +func (f *Manager) resolvePath(path string) (string, error) { + fullPath := filepath.Clean(filepath.Join(f.rootDir, path)) + + relPath, err := filepath.Rel(f.rootDir, fullPath) + if err != nil { + return "", fmt.Errorf("failed to evaluate relative path: %w", err) + } + + if relPath == ".." || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) { + return "", errors.New("path escapes root directory") } - return JoinPath(f.rootDir, path) + + return fullPath, nil } func JoinPath(paths ...string) string { return filepath.Clean(filepath.Join(paths...)) } -// validatePath validates that a path is not empty and is relative +// validatePath validates that a path is not empty, not absolute, and does not include traversal segments. func validatePath(path string) error { if path == "" { return errors.New("path cannot be empty") } - // Trim whitespace and check again + trimmed := strings.TrimSpace(path) + if trimmed == "" { + return errors.New("path cannot be empty or whitespace") + } + + if filepath.IsAbs(trimmed) { + return errors.New("path must be relative") + } + + segments := strings.FieldsFunc(trimmed, func(r rune) bool { + return r == '/' || r == '\\' + }) + for _, segment := range segments { + if segment == ".." { + return errors.New("path cannot contain traversal segments") + } + } + + return nil +} + +// validateRootPath validates root path input for manager creation. +func validateRootPath(path string) error { + if path == "" { + return errors.New("path cannot be empty") + } + trimmed := strings.TrimSpace(path) if trimmed == "" { return errors.New("path cannot be empty or whitespace") diff --git a/pkg/file/file_test.go b/pkg/file/file_test.go new file mode 100644 index 0000000..1cfd9ce --- /dev/null +++ b/pkg/file/file_test.go @@ -0,0 +1,85 @@ +package file + +import "testing" + +func TestManagerRejectsTraversalAndAbsolutePaths(t *testing.T) { + tests := []struct { + name string + op func(*Manager) error + wantErr bool + }{ + { + name: "write rejects traversal", + op: func(m *Manager) error { + return m.WriteFile("../../escape.txt", []byte("x")) + }, + wantErr: true, + }, + { + name: "read rejects traversal", + op: func(m *Manager) error { + _, err := m.ReadFile("../cluster.json") + return err + }, + wantErr: true, + }, + { + name: "ensure dir rejects traversal", + op: func(m *Manager) error { + return m.EnsureDir("../../cluster") + }, + wantErr: true, + }, + { + name: "remove dir rejects traversal", + op: func(m *Manager) error { + return m.RemoveDir("../cluster") + }, + wantErr: true, + }, + { + name: "write rejects absolute", + op: func(m *Manager) error { + return m.WriteFile("/tmp/escape.txt", []byte("x")) + }, + wantErr: true, + }, + { + name: "valid nested relative path", + op: func(m *Manager) error { + return m.WriteFile("cluster/default/cluster.json", []byte("{}")) + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + root := t.TempDir() + m, err := New(root) + if err != nil { + t.Fatalf("New() failed: %v", err) + } + + err = tt.op(m) + if tt.wantErr && err == nil { + t.Fatal("expected error, got nil") + } + if !tt.wantErr && err != nil { + t.Fatalf("expected no error, got %v", err) + } + }) + } +} + +func TestManagerGetPathRejectsEscape(t *testing.T) { + root := t.TempDir() + m, err := New(root) + if err != nil { + t.Fatalf("New() failed: %v", err) + } + + if got := m.GetPath("../../escape"); got != "" { + t.Fatalf("expected empty path for traversal input, got %q", got) + } +} diff --git a/pkg/provider/container.go b/pkg/provider/container.go index 7a39778..f61433a 100644 --- a/pkg/provider/container.go +++ b/pkg/provider/container.go @@ -17,8 +17,4 @@ type ContainerInfo struct { Image string Ports []string Labels map[string]string - Network string - Address string } - -type ContainerSummary struct{} diff --git a/pkg/provider/container_spec.go b/pkg/provider/container_spec.go new file mode 100644 index 0000000..a2107e9 --- /dev/null +++ b/pkg/provider/container_spec.go @@ -0,0 +1,25 @@ +package provider + +import "github.com/stenh0use/hind/pkg/config" + +type ContainerSpec struct { + Name string + Network string + Image config.Image + Ports []config.PortMapping + Environment map[string]string + Labels config.Labels + Devices []string +} + +func ContainerSpecFromNode(node config.Node) ContainerSpec { + return ContainerSpec{ + Name: node.Name, + Network: node.Network, + Image: node.Image, + Ports: node.Ports, + Environment: node.Environment, + Labels: node.Labels, + Devices: node.Devices, + } +} diff --git a/pkg/provider/dockercli/build.go b/pkg/provider/dockercli/build.go index 6e46bbf..14aa3a8 100644 --- a/pkg/provider/dockercli/build.go +++ b/pkg/provider/dockercli/build.go @@ -1,7 +1,149 @@ package dockercli -import "context" +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" -type BuildOpts struct{} + "github.com/stenh0use/hind/pkg/provider" +) -func (c *Client) BuildImage(ctx context.Context, opts BuildOpts) {} +const metadataFileName = "metadata.json" + +// buildMetadata holds the parsed content of the docker buildx metadata.json file. +type buildMetadata struct { + ContainerImageDigest string `json:"containerimage.digest"` + ImageName string `json:"image.name"` +} + +// BuildImage builds a Docker image using buildx. It checks for buildx availability +// at call time and returns a structured result with the image digest and ref. +func (c *Client) BuildImage(ctx context.Context, opts provider.BuildImageOptions) (provider.BuildImageResult, error) { + if opts.Name == "" { + return provider.BuildImageResult{}, fmt.Errorf("image name is required") + } + if opts.Tag == "" { + return provider.BuildImageResult{}, fmt.Errorf("image tag is required") + } + if opts.ContextDir == "" { + return provider.BuildImageResult{}, fmt.Errorf("build context dir is required") + } + + // Check buildx availability at call time, not at construction. + if err := checkBuildxAvailable(ctx, c.executor); err != nil { + return provider.BuildImageResult{}, err + } + + args := c.buildxArgs(opts) + + var stdout, stderr strings.Builder + if err := c.executor.Run(ctx, opts.ContextDir, &stdout, &stderr, "docker", args...); err != nil { + return provider.BuildImageResult{}, fmt.Errorf("failed to build image: %w: %s", err, stderr.String()) + } + + digest, err := readDigestFromMetadata(filepath.Join(opts.ContextDir, metadataFileName)) + if err != nil { + return provider.BuildImageResult{}, err + } + + imageRef := fmt.Sprintf("%s:%s", opts.Name, opts.Tag) + + return provider.BuildImageResult{ + Digest: digest, + ImageRef: imageRef, + }, nil +} + +// buildxArgs constructs the argument list for `docker buildx build ...`. +// --load is always included so the image is loaded into the local Docker image store. +func (c *Client) buildxArgs(opts provider.BuildImageOptions) []string { + args := []string{ + "buildx", + "build", + "-t", fmt.Sprintf("%s:%s", opts.Name, opts.Tag), + "--load", + "--metadata-file", metadataFileName, + } + + if opts.Dockerfile != "" { + args = append(args, "-f", opts.Dockerfile) + } + + if !opts.WithCache { + args = append(args, "--no-cache") + } + + if opts.Platform != "" { + args = append(args, "--platform", opts.Platform) + } + + for k, v := range opts.BuildArgs { + args = append(args, "--build-arg", fmt.Sprintf("%s=%s", k, v)) + } + + args = append(args, ".") + return args +} + +// readDigestFromMetadata reads and parses metadata.json written by docker buildx. +// Returns an error if the digest is absent or does not begin with "sha256:". +func readDigestFromMetadata(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("failed to read metadata file %s: %w", path, err) + } + + var m buildMetadata + if err := json.Unmarshal(data, &m); err != nil { + return "", fmt.Errorf("failed to parse metadata file %s: %w", path, err) + } + + if m.ContainerImageDigest == "" { + return "", fmt.Errorf("metadata file %s contains an empty digest", path) + } + if !strings.HasPrefix(m.ContainerImageDigest, "sha256:") { + return "", fmt.Errorf("unexpected digest format in %s: %q", path, m.ContainerImageDigest) + } + + return m.ContainerImageDigest, nil +} + +// TagExists reports whether the given image name:tag exists in the local Docker image store. +func (c *Client) TagExists(ctx context.Context, name string, tag string) (bool, error) { + if name == "" { + return false, fmt.Errorf("image name is required") + } + if tag == "" { + return false, fmt.Errorf("image tag is required") + } + + cmd := baseClientCmd(ctx) + cmd.Args = append(cmd.Args, "image", "ls", fmt.Sprintf("%s:%s", name, tag), "--format", "{{ .ID }}") + out, err := cmd.Output() + if err != nil { + return false, fmt.Errorf("failed to inspect image tags: %w", err) + } + + return strings.TrimSpace(string(out)) != "", nil +} + +// PullImage pulls an image from a registry. +func (c *Client) PullImage(ctx context.Context, name string, tag string) error { + if name == "" { + return fmt.Errorf("image name is required") + } + if tag == "" { + return fmt.Errorf("image tag is required") + } + + cmd := baseClientCmd(ctx) + cmd.Args = append(cmd.Args, "pull", fmt.Sprintf("%s:%s", name, tag)) + if _, err := cmd.Output(); err != nil { + return fmt.Errorf("failed to pull image: %w", err) + } + + return nil +} diff --git a/pkg/provider/dockercli/build_test.go b/pkg/provider/dockercli/build_test.go new file mode 100644 index 0000000..4e5f332 --- /dev/null +++ b/pkg/provider/dockercli/build_test.go @@ -0,0 +1,296 @@ +package dockercli + +import ( + "context" + "encoding/json" + "io" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/provider" +) + +// fakeExecutor is a test double for CommandExecutor that records calls and +// returns configured results. +type fakeExecutor struct { + // outputFn is called when Output is invoked. If nil, returns empty bytes. + outputFn func(ctx context.Context, dir, name string, args ...string) ([]byte, error) + // runFn is called when Run is invoked. If nil, returns nil. + runFn func(ctx context.Context, dir string, stdout, stderr io.Writer, name string, args ...string) error + // capturedRunArgs holds the args passed to the most recent Run call. + capturedRunArgs []string +} + +func (f *fakeExecutor) Output(ctx context.Context, dir, name string, args ...string) ([]byte, error) { + if f.outputFn != nil { + return f.outputFn(ctx, dir, name, args...) + } + return []byte("{}"), nil +} + +func (f *fakeExecutor) Run(ctx context.Context, dir string, stdout, stderr io.Writer, name string, args ...string) error { + f.capturedRunArgs = args + if f.runFn != nil { + return f.runFn(ctx, dir, stdout, stderr, name, args...) + } + return nil +} + +// dockerInfoWithBuildx returns a JSON blob representing docker system info with buildx present. +func dockerInfoWithBuildx() []byte { + info := dockerInfo{ + ClientInfo: clientInfo{ + Plugins: []plugin{{Name: "buildx"}}, + }, + } + raw, _ := json.Marshal(info) + return raw +} + +// dockerInfoWithoutBuildx returns a JSON blob representing docker system info with no plugins. +func dockerInfoWithoutBuildx() []byte { + info := dockerInfo{ + ClientInfo: clientInfo{ + Plugins: []plugin{}, + }, + } + raw, _ := json.Marshal(info) + return raw +} + +// newTestLogger returns a logger that discards all output. +func newTestLogger() *log.Logger { + return &log.Logger{Handler: discard.New()} +} + +// writeMetadataFile writes a metadata.json with the given digest to dir. +func writeMetadataFile(t *testing.T, dir, digest string) { + t.Helper() + m := buildMetadata{ContainerImageDigest: digest} + data, err := json.Marshal(m) + if err != nil { + t.Fatalf("writeMetadataFile: marshal error: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, metadataFileName), data, 0o600); err != nil { + t.Fatalf("writeMetadataFile: write error: %v", err) + } +} + +func TestBuildImage_BuildxAbsent(t *testing.T) { + exec := &fakeExecutor{ + outputFn: func(_ context.Context, _, _ string, _ ...string) ([]byte, error) { + return dockerInfoWithoutBuildx(), nil + }, + } + client := newWithExecutor(newTestLogger(), exec) + + ctx := context.Background() + tmpDir := t.TempDir() + + _, err := client.BuildImage(ctx, provider.BuildImageOptions{ + Name: "myimage", + Tag: "latest", + ContextDir: tmpDir, + }) + + if err == nil { + t.Fatal("expected error when buildx is absent, got nil") + } + if !strings.Contains(err.Error(), "buildx") { + t.Errorf("expected error to contain 'buildx', got: %q", err.Error()) + } + // Verify no build command was executed (capturedRunArgs stays nil). + if exec.capturedRunArgs != nil { + t.Errorf("expected no build command to be run, but Run was called with: %v", exec.capturedRunArgs) + } +} + +func TestBuildImage_Success(t *testing.T) { + const wantDigest = "sha256:abc123deadbeef" + const wantName = "myimage" + const wantTag = "v1.0.0" + + tmpDir := t.TempDir() + + exec := &fakeExecutor{ + outputFn: func(_ context.Context, _, _ string, _ ...string) ([]byte, error) { + return dockerInfoWithBuildx(), nil + }, + runFn: func(_ context.Context, _ string, _, _ io.Writer, _ string, _ ...string) error { + writeMetadataFile(t, tmpDir, wantDigest) + return nil + }, + } + + client := newWithExecutor(newTestLogger(), exec) + ctx := context.Background() + + result, err := client.BuildImage(ctx, provider.BuildImageOptions{ + Name: wantName, + Tag: wantTag, + ContextDir: tmpDir, + }) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Digest != wantDigest { + t.Errorf("Digest = %q, want %q", result.Digest, wantDigest) + } + wantImageRef := wantName + ":" + wantTag + if result.ImageRef != wantImageRef { + t.Errorf("ImageRef = %q, want %q", result.ImageRef, wantImageRef) + } + if !strings.HasPrefix(result.Digest, "sha256:") { + t.Errorf("Digest does not start with 'sha256:': %q", result.Digest) + } +} + +func TestNew_SucceedsWithoutBuildx(t *testing.T) { + // New must not check for buildx; it must succeed regardless. + logger := newTestLogger() + client := New(logger) + if client == nil { + t.Error("New returned nil, want non-nil provider.Client") + } +} + +func TestBuildImage_EmptyDigestIsError(t *testing.T) { + tmpDir := t.TempDir() + + exec := &fakeExecutor{ + outputFn: func(_ context.Context, _, _ string, _ ...string) ([]byte, error) { + return dockerInfoWithBuildx(), nil + }, + runFn: func(_ context.Context, _ string, _, _ io.Writer, _ string, _ ...string) error { + // Write metadata with empty digest. + writeMetadataFile(t, tmpDir, "") + return nil + }, + } + + client := newWithExecutor(newTestLogger(), exec) + ctx := context.Background() + + _, err := client.BuildImage(ctx, provider.BuildImageOptions{ + Name: "myimage", + Tag: "v1", + ContextDir: tmpDir, + }) + + if err == nil { + t.Fatal("expected error for empty digest, got nil") + } +} + +func TestBuildImage_LoadFlagPresent(t *testing.T) { + tmpDir := t.TempDir() + const wantDigest = "sha256:loadtest" + + exec := &fakeExecutor{ + outputFn: func(_ context.Context, _, _ string, _ ...string) ([]byte, error) { + return dockerInfoWithBuildx(), nil + }, + runFn: func(_ context.Context, _ string, _, _ io.Writer, _ string, _ ...string) error { + writeMetadataFile(t, tmpDir, wantDigest) + return nil + }, + } + + client := newWithExecutor(newTestLogger(), exec) + ctx := context.Background() + + if _, err := client.BuildImage(ctx, provider.BuildImageOptions{ + Name: "myimage", + Tag: "v1", + ContextDir: tmpDir, + }); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, arg := range exec.capturedRunArgs { + if arg == "--load" { + found = true + break + } + } + if !found { + t.Errorf("--load not found in build args: %v", exec.capturedRunArgs) + } +} + +func TestBuildImage_PlatformOmittedWhenEmpty(t *testing.T) { + tmpDir := t.TempDir() + const wantDigest = "sha256:platformtest" + + exec := &fakeExecutor{ + outputFn: func(_ context.Context, _, _ string, _ ...string) ([]byte, error) { + return dockerInfoWithBuildx(), nil + }, + runFn: func(_ context.Context, _ string, _, _ io.Writer, _ string, _ ...string) error { + writeMetadataFile(t, tmpDir, wantDigest) + return nil + }, + } + + client := newWithExecutor(newTestLogger(), exec) + ctx := context.Background() + + if _, err := client.BuildImage(ctx, provider.BuildImageOptions{ + Name: "myimage", + Tag: "v1", + ContextDir: tmpDir, + Platform: "", // empty — must be omitted + }); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + for _, arg := range exec.capturedRunArgs { + if arg == "--platform" { + t.Errorf("--platform should be absent when Platform is empty, but found in args: %v", exec.capturedRunArgs) + } + } +} + +func TestBuildImage_NoCacheWhenWithCacheFalse(t *testing.T) { + tmpDir := t.TempDir() + const wantDigest = "sha256:nocachetest" + + exec := &fakeExecutor{ + outputFn: func(_ context.Context, _, _ string, _ ...string) ([]byte, error) { + return dockerInfoWithBuildx(), nil + }, + runFn: func(_ context.Context, _ string, _, _ io.Writer, _ string, _ ...string) error { + writeMetadataFile(t, tmpDir, wantDigest) + return nil + }, + } + + client := newWithExecutor(newTestLogger(), exec) + ctx := context.Background() + + if _, err := client.BuildImage(ctx, provider.BuildImageOptions{ + Name: "myimage", + Tag: "v1", + ContextDir: tmpDir, + WithCache: false, + }); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, arg := range exec.capturedRunArgs { + if arg == "--no-cache" { + found = true + break + } + } + if !found { + t.Errorf("--no-cache not found in args when WithCache=false: %v", exec.capturedRunArgs) + } +} diff --git a/pkg/provider/dockercli/client.go b/pkg/provider/dockercli/client.go index 4c403d1..43826a0 100644 --- a/pkg/provider/dockercli/client.go +++ b/pkg/provider/dockercli/client.go @@ -2,6 +2,7 @@ package dockercli import ( "context" + "io" "os/exec" "github.com/apex/log" @@ -10,15 +11,49 @@ import ( const clientBin = "docker" -// Client provides an interface to the Docker API for cluster operations +// CommandExecutor abstracts command execution for Docker operations. +// It allows tests to inject a fake executor without spawning real processes. +type CommandExecutor interface { + Run(ctx context.Context, dir string, stdout, stderr io.Writer, name string, args ...string) error + Output(ctx context.Context, dir string, name string, args ...string) ([]byte, error) +} + +type osCommandExecutor struct{} + +func (osCommandExecutor) Run(ctx context.Context, dir string, stdout, stderr io.Writer, name string, args ...string) error { + cmd := exec.CommandContext(ctx, name, args...) + cmd.Dir = dir + cmd.Stdout = stdout + cmd.Stderr = stderr + return cmd.Run() +} + +func (osCommandExecutor) Output(ctx context.Context, dir string, name string, args ...string) ([]byte, error) { + cmd := exec.CommandContext(ctx, name, args...) + cmd.Dir = dir + return cmd.Output() +} + +// Client provides an interface to the Docker API for cluster operations. type Client struct { - logger *log.Logger + logger *log.Logger + executor CommandExecutor } -// New creates a new Docker client +// New creates a new Docker client. +// It does not perform a buildx availability check; that happens inside BuildImage at call time. func New(logger *log.Logger) provider.Client { return &Client{ - logger: logger, + logger: logger, + executor: osCommandExecutor{}, + } +} + +// newWithExecutor creates a Client with a custom executor for testing. +func newWithExecutor(logger *log.Logger, exec CommandExecutor) *Client { + return &Client{ + logger: logger, + executor: exec, } } diff --git a/pkg/provider/dockercli/container.go b/pkg/provider/dockercli/container.go index 37253c0..db3c2ad 100644 --- a/pkg/provider/dockercli/container.go +++ b/pkg/provider/dockercli/container.go @@ -12,7 +12,6 @@ import ( "github.com/apex/log" "github.com/moby/moby/api/types/container" - "github.com/stenh0use/hind/pkg/config" "github.com/stenh0use/hind/pkg/provider" ) @@ -22,8 +21,15 @@ func baseContainerCmd(ctx context.Context) *exec.Cmd { return baseClientCmd(ctx, containerCmd) } +func normalizeContainerStatus(status string) string { + if strings.EqualFold(status, "exited") { + return provider.Stopped.String() + } + return status +} + // Create and start a container -func (c *Client) CreateContainer(ctx context.Context, cfg config.Node) (string, error) { +func (c *Client) CreateContainer(ctx context.Context, cfg provider.ContainerSpec) (string, error) { if cfg.Name == "" { return "", fmt.Errorf("name is required to create a container") } @@ -61,7 +67,7 @@ func (c *Client) CreateContainer(ctx context.Context, cfg config.Node) (string, } else if cfg.Image.Tag != "" { imgRef = fmt.Sprintf("%s:%s", cfg.Image.Name, cfg.Image.Tag) } else { - imgRef = cfg.Name + imgRef = cfg.Image.Name } // add container name if cfg.Name != "" { @@ -159,6 +165,23 @@ func (c *Client) StopContainer(ctx context.Context, name string) error { return nil } +// KillContainer force-stops a running container immediately. +func (c *Client) KillContainer(ctx context.Context, name string) error { + if name == "" { + return fmt.Errorf("name or id is required to kill a container") + } + cmd := baseContainerCmd(ctx) + cmd.Args = append(cmd.Args, "kill", name) + + c.logger.WithField("container", name).Debug("killing container") + _, err := cmd.Output() + if err != nil { + return fmt.Errorf("failed to kill container: %w", err) + } + + return nil +} + // Delete a container // TODO: need options such as `-v` to remove anonymous volumes on delete func (c *Client) DeleteContainer(ctx context.Context, name string) error { @@ -214,7 +237,7 @@ func (c *Client) InspectContainer(ctx context.Context, name string) (*provider.C Name: res.Name, Created: res.Created, HostName: res.Config.Hostname, - Status: res.State.Status, + Status: normalizeContainerStatus(res.State.Status), Image: res.Config.Image, } @@ -275,7 +298,7 @@ func (c *Client) ListContainers(ctx context.Context, filters []string) ([]provid response = append(response, provider.ContainerInfo{ ID: entry.ID, Name: entry.Names, - Status: entry.State, + Status: normalizeContainerStatus(entry.State), Image: entry.Image, }) } diff --git a/pkg/provider/dockercli/container_test.go b/pkg/provider/dockercli/container_test.go new file mode 100644 index 0000000..2b33877 --- /dev/null +++ b/pkg/provider/dockercli/container_test.go @@ -0,0 +1,92 @@ +package dockercli + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" + "github.com/stenh0use/hind/pkg/config" + "github.com/stenh0use/hind/pkg/provider" +) + +func TestNormalizeContainerStatus(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + {name: "running passthrough", input: provider.Running.String(), expected: provider.Running.String()}, + {name: "stopped passthrough", input: provider.Stopped.String(), expected: provider.Stopped.String()}, + {name: "exited maps to stopped", input: "exited", expected: provider.Stopped.String()}, + {name: "uppercase exited maps to stopped", input: "EXITED", expected: provider.Stopped.String()}, + {name: "unknown passthrough", input: "restarting", expected: "restarting"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := normalizeContainerStatus(tt.input); got != tt.expected { + t.Fatalf("normalizeContainerStatus(%q) = %q, want %q", tt.input, got, tt.expected) + } + }) + } +} + +func TestCreateContainer_UsesImageNameWhenTagAndDigestUnset(t *testing.T) { + tmpDir := t.TempDir() + argsFile := filepath.Join(tmpDir, "docker-args.txt") + dockerBin := filepath.Join(tmpDir, "docker") + script := "#!/bin/sh\nprintf '%s\n' \"$@\" > \"$DOCKER_ARGS_FILE\"\nprintf 'container-id\n'\n" + if err := os.WriteFile(dockerBin, []byte(script), 0o755); err != nil { + t.Fatalf("failed to write fake docker binary: %v", err) + } + + oldPath := os.Getenv("PATH") + if err := os.Setenv("PATH", tmpDir+":"+oldPath); err != nil { + t.Fatalf("failed to set PATH: %v", err) + } + t.Cleanup(func() { + _ = os.Setenv("PATH", oldPath) + }) + + oldArgsFile := os.Getenv("DOCKER_ARGS_FILE") + if err := os.Setenv("DOCKER_ARGS_FILE", argsFile); err != nil { + t.Fatalf("failed to set DOCKER_ARGS_FILE: %v", err) + } + t.Cleanup(func() { + _ = os.Setenv("DOCKER_ARGS_FILE", oldArgsFile) + }) + + c := &Client{logger: &log.Logger{Handler: discard.New()}} + cfg := config.Node{ + Name: "hind.test.consul.01", + Image: config.Image{ + Name: "docker.io/stenh0use/hind.consul", + }, + } + + if _, err := c.CreateContainer(context.Background(), provider.ContainerSpecFromNode(cfg)); err != nil { + t.Fatalf("CreateContainer() error = %v", err) + } + + argsOut, err := os.ReadFile(argsFile) + if err != nil { + t.Fatalf("failed to read docker args file: %v", err) + } + + args := strings.Split(strings.TrimSpace(string(argsOut)), "\n") + if len(args) == 0 { + t.Fatal("expected docker args, got none") + } + + last := args[len(args)-1] + if last != cfg.Image.Name { + t.Fatalf("image arg = %q, want %q", last, cfg.Image.Name) + } + if last == cfg.Name { + t.Fatalf("image arg unexpectedly fell back to container name %q", cfg.Name) + } +} diff --git a/pkg/provider/dockercli/info.go b/pkg/provider/dockercli/info.go new file mode 100644 index 0000000..2af00e9 --- /dev/null +++ b/pkg/provider/dockercli/info.go @@ -0,0 +1,51 @@ +package dockercli + +import ( + "context" + "encoding/json" + "fmt" +) + +// dockerInfo holds the parsed output of `docker system info --format {{json .}}`. +type dockerInfo struct { + ClientInfo clientInfo `json:"ClientInfo"` +} + +type clientInfo struct { + Plugins []plugin `json:"Plugins"` + Version string `json:"Version"` +} + +type plugin struct { + Name string `json:"Name"` +} + +// hasClientPlugin reports whether a named client plugin is present in the docker info. +func (i *dockerInfo) hasClientPlugin(name string) bool { + for _, p := range i.ClientInfo.Plugins { + if p.Name == name { + return true + } + } + return false +} + +// checkBuildxAvailable checks at call time whether the buildx plugin is installed. +// It uses the provided executor so tests can inject fakes. +func checkBuildxAvailable(ctx context.Context, executor CommandExecutor) error { + raw, err := executor.Output(ctx, "", "docker", "system", "info", "--format", "{{json .}}") + if err != nil { + return fmt.Errorf("failed to get docker system info: %w", err) + } + + info := dockerInfo{} + if err := json.Unmarshal(raw, &info); err != nil { + return fmt.Errorf("failed to parse docker system info: %w", err) + } + + if !info.hasClientPlugin("buildx") { + return fmt.Errorf("buildx client plugin is needed but not installed") + } + + return nil +} diff --git a/pkg/provider/dockercli/network.go b/pkg/provider/dockercli/network.go index eb74d1d..b23da29 100644 --- a/pkg/provider/dockercli/network.go +++ b/pkg/provider/dockercli/network.go @@ -138,7 +138,7 @@ func (c *Client) ListNetworks(ctx context.Context, filters []string) ([]provider out, err := cmd.Output() if err != nil { - return response, fmt.Errorf("failed to inspect network: %w", err) + return response, fmt.Errorf("failed to list networks: %w", err) } if len(out) == 0 { diff --git a/pkg/provider/dockercli/network_test.go b/pkg/provider/dockercli/network_test.go new file mode 100644 index 0000000..a64df19 --- /dev/null +++ b/pkg/provider/dockercli/network_test.go @@ -0,0 +1,42 @@ +package dockercli + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/apex/log" + "github.com/apex/log/handlers/discard" +) + +func TestListNetworks_ReturnsListSpecificErrorTextOnFailure(t *testing.T) { + tmpDir := t.TempDir() + dockerBin := filepath.Join(tmpDir, "docker") + script := "#!/bin/sh\nexit 2\n" + if err := os.WriteFile(dockerBin, []byte(script), 0o755); err != nil { + t.Fatalf("failed to write fake docker binary: %v", err) + } + + oldPath := os.Getenv("PATH") + if err := os.Setenv("PATH", tmpDir+":"+oldPath); err != nil { + t.Fatalf("failed to set PATH: %v", err) + } + t.Cleanup(func() { + _ = os.Setenv("PATH", oldPath) + }) + + c := &Client{logger: &log.Logger{Handler: discard.New()}} + _, err := c.ListNetworks(context.Background(), nil) + if err == nil { + t.Fatal("ListNetworks() expected error, got nil") + } + + if !strings.Contains(err.Error(), "failed to list networks") { + t.Fatalf("error = %q, want to contain %q", err.Error(), "failed to list networks") + } + if strings.Contains(err.Error(), "failed to inspect network") { + t.Fatalf("error = %q, should not contain inspect wording", err.Error()) + } +} diff --git a/pkg/provider/image.go b/pkg/provider/image.go new file mode 100644 index 0000000..df638d6 --- /dev/null +++ b/pkg/provider/image.go @@ -0,0 +1,19 @@ +package provider + +// BuildImageOptions holds the parameters for building a Docker image. +type BuildImageOptions struct { + Name string + Tag string + ContextDir string + BuildArgs map[string]string + Dockerfile string // optional; empty means default Dockerfile + WithCache bool // pass --no-cache when false + Platform string // optional; empty means omit --platform — do not supply a default +} + +// BuildImageResult is the structured result of a successful image build. +// Both fields are non-empty on success; an empty Digest or ImageRef is an error condition. +type BuildImageResult struct { + Digest string // sha256 digest, e.g. "sha256:abc123..." + ImageRef string // fully qualified ref, e.g. "hind/consul:0.1.0" +} diff --git a/pkg/provider/mock/mock.go b/pkg/provider/mock/mock.go new file mode 100644 index 0000000..0af4cea --- /dev/null +++ b/pkg/provider/mock/mock.go @@ -0,0 +1,126 @@ +package mock + +import ( + "context" + + "github.com/stenh0use/hind/pkg/config" + "github.com/stenh0use/hind/pkg/provider" +) + +// ClientStub is a stub implementation of provider.Client for testing. +type ClientStub struct { + CreateContainerFn func(context.Context, provider.ContainerSpec) (string, error) + StartContainerFn func(context.Context, string) error + StopContainerFn func(context.Context, string) error + KillContainerFn func(context.Context, string) error + DeleteContainerFn func(context.Context, string) error + InspectContainerFn func(context.Context, string) (*provider.ContainerInfo, error) + ListContainersFn func(context.Context, []string) ([]provider.ContainerInfo, error) + BuildImageFn func(context.Context, provider.BuildImageOptions) (provider.BuildImageResult, error) + TagExistsFn func(context.Context, string, string) (bool, error) + PullImageFn func(context.Context, string, string) error + CreateNetworkFn func(context.Context, config.Network) (string, error) + DeleteNetworkFn func(context.Context, string) error + ListNetworksFn func(context.Context, []string) ([]provider.NetworkInfo, error) + InspectNetworkFn func(context.Context, string) (*provider.NetworkInfo, error) +} + +func (c *ClientStub) CreateContainer(ctx context.Context, cfg provider.ContainerSpec) (string, error) { + if c.CreateContainerFn != nil { + return c.CreateContainerFn(ctx, cfg) + } + return "", nil +} + +func (c *ClientStub) StartContainer(ctx context.Context, name string) error { + if c.StartContainerFn != nil { + return c.StartContainerFn(ctx, name) + } + return nil +} + +func (c *ClientStub) StopContainer(ctx context.Context, name string) error { + if c.StopContainerFn != nil { + return c.StopContainerFn(ctx, name) + } + return nil +} + +func (c *ClientStub) KillContainer(ctx context.Context, name string) error { + if c.KillContainerFn != nil { + return c.KillContainerFn(ctx, name) + } + return nil +} + +func (c *ClientStub) DeleteContainer(ctx context.Context, name string) error { + if c.DeleteContainerFn != nil { + return c.DeleteContainerFn(ctx, name) + } + return nil +} + +func (c *ClientStub) InspectContainer(ctx context.Context, name string) (*provider.ContainerInfo, error) { + if c.InspectContainerFn != nil { + return c.InspectContainerFn(ctx, name) + } + return nil, nil +} + +func (c *ClientStub) ListContainers(ctx context.Context, filters []string) ([]provider.ContainerInfo, error) { + if c.ListContainersFn != nil { + return c.ListContainersFn(ctx, filters) + } + return nil, nil +} + +func (c *ClientStub) BuildImage(ctx context.Context, opts provider.BuildImageOptions) (provider.BuildImageResult, error) { + if c.BuildImageFn != nil { + return c.BuildImageFn(ctx, opts) + } + return provider.BuildImageResult{}, nil +} + +func (c *ClientStub) TagExists(ctx context.Context, name string, tag string) (bool, error) { + if c.TagExistsFn != nil { + return c.TagExistsFn(ctx, name, tag) + } + return false, nil +} + +func (c *ClientStub) PullImage(ctx context.Context, name string, tag string) error { + if c.PullImageFn != nil { + return c.PullImageFn(ctx, name, tag) + } + return nil +} + +func (c *ClientStub) CreateNetwork(ctx context.Context, cfg config.Network) (string, error) { + if c.CreateNetworkFn != nil { + return c.CreateNetworkFn(ctx, cfg) + } + return "", nil +} + +func (c *ClientStub) DeleteNetwork(ctx context.Context, name string) error { + if c.DeleteNetworkFn != nil { + return c.DeleteNetworkFn(ctx, name) + } + return nil +} + +func (c *ClientStub) ListNetworks(ctx context.Context, filters []string) ([]provider.NetworkInfo, error) { + if c.ListNetworksFn != nil { + return c.ListNetworksFn(ctx, filters) + } + return nil, nil +} + +func (c *ClientStub) InspectNetwork(ctx context.Context, name string) (*provider.NetworkInfo, error) { + if c.InspectNetworkFn != nil { + return c.InspectNetworkFn(ctx, name) + } + return nil, nil +} + +var _ provider.Client = (*ClientStub)(nil) diff --git a/pkg/provider/network.go b/pkg/provider/network.go index c9dea10..52a789e 100644 --- a/pkg/provider/network.go +++ b/pkg/provider/network.go @@ -7,12 +7,5 @@ type NetworkInfo struct { Name string Created time.Time Driver string - Status string - Image string - Ports []string Labels map[string]string - Network string - Address string } - -type NetworkSummary struct{} diff --git a/pkg/provider/provider.go b/pkg/provider/provider.go index 6f0d1a7..a4c528c 100644 --- a/pkg/provider/provider.go +++ b/pkg/provider/provider.go @@ -9,11 +9,13 @@ import ( type Client interface { // Container methods // Create and start a node - CreateContainer(ctx context.Context, cfg config.Node) (string, error) + CreateContainer(ctx context.Context, cfg ContainerSpec) (string, error) // Start a node if it is stopped StartContainer(ctx context.Context, name string) error // Stop a node if it is running StopContainer(ctx context.Context, name string) error + // Force stop a node immediately + KillContainer(ctx context.Context, name string) error // Delete a node DeleteContainer(ctx context.Context, name string) error // Inspect node state @@ -21,6 +23,11 @@ type Client interface { // List nodes ListContainers(ctx context.Context, filters []string) ([]ContainerInfo, error) + // Image methods + BuildImage(ctx context.Context, opts BuildImageOptions) (BuildImageResult, error) + TagExists(ctx context.Context, name string, tag string) (bool, error) + PullImage(ctx context.Context, name string, tag string) error + // Network methods // Create a new docker network CreateNetwork(ctx context.Context, cfg config.Network) (string, error) diff --git a/pkg/provider/status.go b/pkg/provider/status.go index c39f42a..825ff40 100644 --- a/pkg/provider/status.go +++ b/pkg/provider/status.go @@ -13,9 +13,3 @@ const ( func (s Status) String() string { return string(s) } - -type ClusterInfo struct { - Name string - Containers []ContainerInfo - Network NetworkInfo -}