From fd295fcdfa5be3e6d356dcc1c5887bd4f7354755 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:15:56 +0000 Subject: [PATCH 01/79] chore: ignore superpowers artifacts and local CLAUDE.md --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index aaadf73..2c511e8 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,9 @@ go.work.sum # Editor/IDE # .idea/ # .vscode/ + +# Superpowers working artifacts (specs, plans, review notes) — never committed +docs/superpowers/ + +# Local Claude project guidance — never committed +CLAUDE.md From 21f8692f31bb0ede4bbe64a7e6cc3d02885e0ba6 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:16:06 +0000 Subject: [PATCH 02/79] chore: initialize go module and makefile --- Makefile | 26 ++++++++++++++++++++++++++ go.mod | 3 +++ 2 files changed, 29 insertions(+) create mode 100644 Makefile create mode 100644 go.mod diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9ea5588 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +.PHONY: build test test-integration lint vet install clean + +GO ?= go + +build: + $(GO) build -o bin/exitnode ./cmd/exitnode + $(GO) build -o bin/exitnode-mcp ./cmd/exitnode-mcp + +test: + $(GO) test -race -short ./... + +test-integration: + EXITNODE_INTEGRATION=1 $(GO) test -race ./... + +vet: + $(GO) vet ./... + +lint: + golangci-lint run + +install: + $(GO) install ./cmd/exitnode + $(GO) install ./cmd/exitnode-mcp + +clean: + rm -rf bin/ diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..d5a7633 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/iker/exit-node + +go 1.22 From 4c9db1845c691e38bd3cf58f5f209f372b415921 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:17:44 +0000 Subject: [PATCH 03/79] feat(gcp): add ExitNode, ProvisionOpts, State enum with parser --- internal/gcp/types.go | 82 ++++++++++++++++++++++++++++++++++++++ internal/gcp/types_test.go | 47 ++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 internal/gcp/types.go create mode 100644 internal/gcp/types_test.go diff --git a/internal/gcp/types.go b/internal/gcp/types.go new file mode 100644 index 0000000..dbd60b9 --- /dev/null +++ b/internal/gcp/types.go @@ -0,0 +1,82 @@ +// Package gcp defines the GCP-backed exit-node types and the Provider +// interface. The concrete implementation lives in this package but is added +// in Plan 2; Plan 1 only sets the contract that internal/core consumes. +package gcp + +import ( + "fmt" + "strings" + "time" +) + +// State is the lifecycle state of an exit-node VM. +type State int + +const ( + StateUnknown State = iota + StatePending + StateRunning + StateStopped + StateTerminated +) + +func (s State) String() string { + switch s { + case StatePending: + return "pending" + case StateRunning: + return "running" + case StateStopped: + return "stopped" + case StateTerminated: + return "terminated" + default: + return "unknown" + } +} + +// ParseState parses a state string (case-insensitive). Unknown inputs return +// StateUnknown with a non-nil error. +func ParseState(s string) (State, error) { + switch strings.ToLower(s) { + case "pending": + return StatePending, nil + case "running": + return StateRunning, nil + case "stopped": + return StateStopped, nil + case "terminated": + return StateTerminated, nil + case "unknown": + return StateUnknown, nil + default: + return StateUnknown, fmt.Errorf("unknown state %q", s) + } +} + +// ExitNode is the canonical representation of a managed exit-node VM. +type ExitNode struct { + Name string + Region string + Zone string + MachineType string + PublicIP string + TailscaleIP string + DeviceID string + State State + CreatedAt time.Time +} + +// ProvisionOpts is the input to Provider.Provision. +type ProvisionOpts struct { + Name string // generated: vpn--- + Region string + Zone string // empty → provider chooses a random zone in Region + MachineType string + Hostname string // typically == Name + TailscaleAuthKey string // ephemeral, single-use, ~5m TTL + Tags []string // applied as both GCP labels and Tailscale tags + InstallScriptURL string + DiskSizeGB int + Network string +} diff --git a/internal/gcp/types_test.go b/internal/gcp/types_test.go new file mode 100644 index 0000000..28683b9 --- /dev/null +++ b/internal/gcp/types_test.go @@ -0,0 +1,47 @@ +package gcp + +import "testing" + +func TestStateString(t *testing.T) { + cases := []struct { + in State + want string + }{ + {StatePending, "pending"}, + {StateRunning, "running"}, + {StateStopped, "stopped"}, + {StateTerminated, "terminated"}, + {StateUnknown, "unknown"}, + } + for _, c := range cases { + if got := c.in.String(); got != c.want { + t.Errorf("State(%d).String() = %q, want %q", c.in, got, c.want) + } + } +} + +func TestParseState(t *testing.T) { + cases := []struct { + in string + want State + wantErr bool + }{ + {"pending", StatePending, false}, + {"PENDING", StatePending, false}, + {"running", StateRunning, false}, + {"stopped", StateStopped, false}, + {"terminated", StateTerminated, false}, + {"unknown", StateUnknown, false}, + {"banana", StateUnknown, true}, + {"", StateUnknown, true}, + } + for _, c := range cases { + got, err := ParseState(c.in) + if (err != nil) != c.wantErr { + t.Errorf("ParseState(%q) err = %v, wantErr = %v", c.in, err, c.wantErr) + } + if got != c.want { + t.Errorf("ParseState(%q) = %v, want %v", c.in, got, c.want) + } + } +} From 5ba6d79aed5f7c73655a2ce9cbbd4942d1315176 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:18:27 +0000 Subject: [PATCH 04/79] feat: declare Provider, TailscaleClient, PFSenseClient, Probe interfaces --- internal/gcp/provider.go | 15 +++++++++++++++ internal/pfsense/client.go | 10 ++++++++++ internal/pfsense/types.go | 9 +++++++++ internal/tailscale/client.go | 26 ++++++++++++++++++++++++++ internal/tailscale/types.go | 13 +++++++++++++ internal/verify/probe.go | 20 ++++++++++++++++++++ 6 files changed, 93 insertions(+) create mode 100644 internal/gcp/provider.go create mode 100644 internal/pfsense/client.go create mode 100644 internal/pfsense/types.go create mode 100644 internal/tailscale/client.go create mode 100644 internal/tailscale/types.go create mode 100644 internal/verify/probe.go diff --git a/internal/gcp/provider.go b/internal/gcp/provider.go new file mode 100644 index 0000000..ea8a34d --- /dev/null +++ b/internal/gcp/provider.go @@ -0,0 +1,15 @@ +package gcp + +import "context" + +// Provider abstracts the cloud backend that creates and manages exit-node +// VMs. Plan 1 only declares the interface; the concrete implementation +// lands in Plan 2. +type Provider interface { + Provision(ctx context.Context, opts ProvisionOpts) (*ExitNode, error) + Start(ctx context.Context, name string) error + Stop(ctx context.Context, name string) error + Destroy(ctx context.Context, name string) error + List(ctx context.Context) ([]*ExitNode, error) + Get(ctx context.Context, name string) (*ExitNode, error) +} diff --git a/internal/pfsense/client.go b/internal/pfsense/client.go new file mode 100644 index 0000000..2faf43b --- /dev/null +++ b/internal/pfsense/client.go @@ -0,0 +1,10 @@ +package pfsense + +import "context" + +// PFSenseClient abstracts the pfSense gateway management API. +type PFSenseClient interface { + GetGateway(ctx context.Context, name string) (*Gateway, error) + UpdateGatewayIP(ctx context.Context, name, ip string) error + Apply(ctx context.Context) error +} diff --git a/internal/pfsense/types.go b/internal/pfsense/types.go new file mode 100644 index 0000000..ae819e9 --- /dev/null +++ b/internal/pfsense/types.go @@ -0,0 +1,9 @@ +// Package pfsense defines the pfSense client interface and shared types. +// The concrete community pfsense-api impl lands in Plan 2. +package pfsense + +// Gateway is the canonical pfSense gateway record. +type Gateway struct { + Name string + IP string +} diff --git a/internal/tailscale/client.go b/internal/tailscale/client.go new file mode 100644 index 0000000..eb03a51 --- /dev/null +++ b/internal/tailscale/client.go @@ -0,0 +1,26 @@ +package tailscale + +import ( + "context" + "time" +) + +// TailscaleClient abstracts the Tailscale management API. +type TailscaleClient interface { + // MintEphemeralAuthKey returns a single-use, pre-authorized auth key + // tagged for the exit-node role. The key has a short TTL (~5 minutes). + MintEphemeralAuthKey(ctx context.Context, tags []string) (key string, err error) + + // WaitForDevice polls until a device with the given hostname registers. + // Returns the registered device or an error on timeout. + WaitForDevice(ctx context.Context, hostname string, timeout time.Duration) (*Device, error) + + // AuthorizeExitNode approves the device's exit-node advertisement. + AuthorizeExitNode(ctx context.Context, deviceID string) error + + // SetTags applies the tag set to the device. + SetTags(ctx context.Context, deviceID string, tags []string) error + + // DeleteDevice removes the device from the tailnet. + DeleteDevice(ctx context.Context, deviceID string) error +} diff --git a/internal/tailscale/types.go b/internal/tailscale/types.go new file mode 100644 index 0000000..d6e41b2 --- /dev/null +++ b/internal/tailscale/types.go @@ -0,0 +1,13 @@ +// Package tailscale defines the Tailscale client interface and shared +// types. The concrete OAuth client + ephemeral key minting impl lands in +// Plan 2. +package tailscale + +// Device is the canonical Tailscale device record. +type Device struct { + ID string + Hostname string + TailscaleIP string + Online bool + Tags []string +} diff --git a/internal/verify/probe.go b/internal/verify/probe.go new file mode 100644 index 0000000..d2d6abb --- /dev/null +++ b/internal/verify/probe.go @@ -0,0 +1,20 @@ +// Package verify defines the Probe interface. The concrete shellout impl +// (sets tailscale exit-node, curls, restores) lands in Plan 2. +package verify + +import "context" + +// Probe verifies egress routing from the orchestrator host. +type Probe interface { + // EgressVia temporarily routes the orchestrator host's egress through + // the given tailscale IP, curls an IP-echo service, restores the prior + // exit-node setting, and returns the observed egress IP. The restore + // runs in a defer and executes even on probe failure. + EgressVia(ctx context.Context, tailscaleIP string) (egressIP string, err error) + + // EgressDirect curls the IP-echo service with no tailscale exit-node + // override (or temporarily clears it), then restores. Used for the + // post-cutover check that traffic flows via the orchestrator's default + // route (LAN → pfSense → new exit node). + EgressDirect(ctx context.Context) (egressIP string, err error) +} From c5f6568c7f987fc120fa54693b95e94a771e65bc Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:19:31 +0000 Subject: [PATCH 05/79] feat(config): add TOML loader + env-var secret resolution --- go.mod | 2 + go.sum | 2 + internal/config/config.go | 111 +++++++++++++++++++++++ internal/config/config_test.go | 161 +++++++++++++++++++++++++++++++++ 4 files changed, 276 insertions(+) create mode 100644 go.sum create mode 100644 internal/config/config.go create mode 100644 internal/config/config_test.go diff --git a/go.mod b/go.mod index d5a7633..3d26d64 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/iker/exit-node go 1.22 + +require github.com/BurntSushi/toml v1.6.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f74b269 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= +github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..fee64b6 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,111 @@ +// Package config loads exitnode's TOML config and resolves secrets from +// the environment. +package config + +import ( + "fmt" + "os" + "time" + + "github.com/BurntSushi/toml" +) + +// Config is the root config struct, populated from TOML. +type Config struct { + GCP GCPConfig `toml:"gcp"` + Tailscale TailscaleConfig `toml:"tailscale"` + PFSense PFSenseConfig `toml:"pfsense"` + Behavior BehaviorConfig `toml:"behavior"` +} + +type GCPConfig struct { + Project string `toml:"project"` + DefaultRegion string `toml:"default_region"` + DefaultMachineType string `toml:"default_machine_type"` + DefaultZone string `toml:"default_zone"` + Network string `toml:"network"` + DiskSizeGB int `toml:"disk_size_gb"` +} + +type TailscaleConfig struct { + Tailnet string `toml:"tailnet"` + OAuthClientIDEnv string `toml:"oauth_client_id_env"` + OAuthClientSecretEnv string `toml:"oauth_client_secret_env"` + Tags []string `toml:"tags"` + EphemeralKeyTTL time.Duration `toml:"ephemeral_key_ttl"` +} + +type PFSenseConfig struct { + Host string `toml:"host"` + APIKeyEnv string `toml:"api_key_env"` + GatewayName string `toml:"gateway_name"` + VerifyTLS bool `toml:"verify_tls"` +} + +type BehaviorConfig struct { + AutoSyncPFSense bool `toml:"auto_sync_pfsense"` + VerifyPreCutover bool `toml:"verify_pre_cutover"` + VerifyPostCutover bool `toml:"verify_post_cutover"` + ProbeURL string `toml:"probe_url"` + RegistrationTimeout time.Duration `toml:"registration_timeout"` + InstallScriptURL string `toml:"install_script_url"` +} + +// Load reads and parses the TOML config at path. +func Load(path string) (*Config, error) { + b, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read config %s: %w", path, err) + } + var cfg Config + if err := toml.Unmarshal(b, &cfg); err != nil { + return nil, fmt.Errorf("parse config %s: %w", path, err) + } + return &cfg, nil +} + +// ResolveTailscaleSecrets reads the OAuth client id and secret from the env +// var names declared in the TOML config. Both must be set. +func (c *Config) ResolveTailscaleSecrets() (clientID, clientSecret string, err error) { + if c.Tailscale.OAuthClientIDEnv == "" || c.Tailscale.OAuthClientSecretEnv == "" { + return "", "", fmt.Errorf("config tailscale.oauth_client_id_env / oauth_client_secret_env must be set") + } + clientID = os.Getenv(c.Tailscale.OAuthClientIDEnv) + clientSecret = os.Getenv(c.Tailscale.OAuthClientSecretEnv) + if clientID == "" || clientSecret == "" { + return "", "", fmt.Errorf("tailscale OAuth env vars %s / %s are unset", + c.Tailscale.OAuthClientIDEnv, c.Tailscale.OAuthClientSecretEnv) + } + return clientID, clientSecret, nil +} + +// ResolvePFSenseAPIKey reads the pfSense API key from the configured env var. +func (c *Config) ResolvePFSenseAPIKey() (string, error) { + if c.PFSense.APIKeyEnv == "" { + return "", fmt.Errorf("config pfsense.api_key_env must be set") + } + key := os.Getenv(c.PFSense.APIKeyEnv) + if key == "" { + return "", fmt.Errorf("pfsense API key env var %s is unset", c.PFSense.APIKeyEnv) + } + return key, nil +} + +// GCPCredSource identifies where the GCP credentials came from. +type GCPCredSource int + +const ( + GCPSourceADC GCPCredSource = iota + GCPSourceEnvJSON +) + +// ResolveGCPCredentials returns the GCP credentials source and, when the +// source is GCPSourceEnvJSON, the JSON payload bytes. For GCPSourceADC the +// caller is expected to use google.FindDefaultCredentials and the payload +// will be empty. +func (c *Config) ResolveGCPCredentials() (GCPCredSource, []byte, error) { + if envJSON := os.Getenv("GCP_CREDENTIALS_JSON"); envJSON != "" { + return GCPSourceEnvJSON, []byte(envJSON), nil + } + return GCPSourceADC, nil, nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go new file mode 100644 index 0000000..e03d4cd --- /dev/null +++ b/internal/config/config_test.go @@ -0,0 +1,161 @@ +package config + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +const minimalToml = ` +[gcp] +project = "test-proj" +default_region = "us-west1" +default_machine_type = "e2-micro" +network = "default" +disk_size_gb = 10 + +[tailscale] +tailnet = "example.com" +oauth_client_id_env = "TS_ID" +oauth_client_secret_env = "TS_SECRET" +tags = ["tag:exit-node"] +ephemeral_key_ttl = "5m" + +[pfsense] +host = "10.0.0.1" +api_key_env = "PF_KEY" +gateway_name = "GW" +verify_tls = true + +[behavior] +auto_sync_pfsense = true +verify_pre_cutover = true +verify_post_cutover = false +probe_url = "https://api.ipify.org" +registration_timeout = "90s" +install_script_url = "https://example.com/install.sh" +` + +func writeToml(t *testing.T, body string) string { + t.Helper() + dir := t.TempDir() + p := filepath.Join(dir, "config.toml") + if err := os.WriteFile(p, []byte(body), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + return p +} + +func TestLoadMinimal(t *testing.T) { + p := writeToml(t, minimalToml) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + if cfg.GCP.Project != "test-proj" { + t.Errorf("GCP.Project = %q, want %q", cfg.GCP.Project, "test-proj") + } + if cfg.GCP.DefaultMachineType != "e2-micro" { + t.Errorf("GCP.DefaultMachineType = %q", cfg.GCP.DefaultMachineType) + } + if cfg.Tailscale.Tailnet != "example.com" { + t.Errorf("Tailscale.Tailnet = %q", cfg.Tailscale.Tailnet) + } + if cfg.Behavior.RegistrationTimeout != 90*time.Second { + t.Errorf("RegistrationTimeout = %v, want 90s", cfg.Behavior.RegistrationTimeout) + } + if !cfg.Behavior.VerifyPreCutover { + t.Errorf("VerifyPreCutover should be true") + } + if cfg.Behavior.VerifyPostCutover { + t.Errorf("VerifyPostCutover should be false") + } +} + +func TestResolveTailscaleSecrets(t *testing.T) { + p := writeToml(t, minimalToml) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + t.Setenv("TS_ID", "client-id-1") + t.Setenv("TS_SECRET", "client-secret-1") + + id, sec, err := cfg.ResolveTailscaleSecrets() + if err != nil { + t.Fatalf("ResolveTailscaleSecrets: %v", err) + } + if id != "client-id-1" || sec != "client-secret-1" { + t.Errorf("got (%q, %q), want (client-id-1, client-secret-1)", id, sec) + } +} + +func TestResolveTailscaleSecretsMissing(t *testing.T) { + p := writeToml(t, minimalToml) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + t.Setenv("TS_ID", "") + t.Setenv("TS_SECRET", "") + + if _, _, err := cfg.ResolveTailscaleSecrets(); err == nil { + t.Errorf("expected error when env vars unset") + } +} + +func TestResolvePFSenseAPIKey(t *testing.T) { + p := writeToml(t, minimalToml) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + t.Setenv("PF_KEY", "pfkey-1") + key, err := cfg.ResolvePFSenseAPIKey() + if err != nil { + t.Fatalf("ResolvePFSenseAPIKey: %v", err) + } + if key != "pfkey-1" { + t.Errorf("key = %q, want pfkey-1", key) + } +} + +func TestResolveGCPCredentialsEnvJSONOverridesADC(t *testing.T) { + // When GCP_CREDENTIALS_JSON is set, it wins. + p := writeToml(t, minimalToml) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + t.Setenv("GCP_CREDENTIALS_JSON", `{"type":"service_account","project_id":"x"}`) + src, payload, err := cfg.ResolveGCPCredentials() + if err != nil { + t.Fatalf("ResolveGCPCredentials: %v", err) + } + if src != GCPSourceEnvJSON { + t.Errorf("source = %v, want GCPSourceEnvJSON", src) + } + if string(payload) == "" { + t.Errorf("payload should not be empty when env JSON is set") + } +} + +func TestResolveGCPCredentialsFallsBackToADC(t *testing.T) { + p := writeToml(t, minimalToml) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + t.Setenv("GCP_CREDENTIALS_JSON", "") // explicitly clear + src, payload, err := cfg.ResolveGCPCredentials() + if err != nil { + t.Fatalf("ResolveGCPCredentials: %v", err) + } + if src != GCPSourceADC { + t.Errorf("source = %v, want GCPSourceADC", src) + } + if len(payload) != 0 { + t.Errorf("ADC payload should be empty (resolved later by google sdk)") + } +} From ff8ad5bf0b32c7c3531d2c1abdf9b43d3ac6936e Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:21:59 +0000 Subject: [PATCH 06/79] feat(state): add flock-protected JSON cache for active exit node --- go.mod | 8 ++- go.sum | 4 ++ internal/state/state.go | 117 +++++++++++++++++++++++++++++++++++ internal/state/state_test.go | 105 +++++++++++++++++++++++++++++++ 4 files changed, 232 insertions(+), 2 deletions(-) create mode 100644 internal/state/state.go create mode 100644 internal/state/state_test.go diff --git a/go.mod b/go.mod index 3d26d64..5c53014 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,9 @@ module github.com/iker/exit-node -go 1.22 +go 1.24.0 -require github.com/BurntSushi/toml v1.6.0 // indirect +require ( + github.com/BurntSushi/toml v1.6.0 // indirect + github.com/gofrs/flock v0.13.0 // indirect + golang.org/x/sys v0.37.0 // indirect +) diff --git a/go.sum b/go.sum index f74b269..9b148a3 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,6 @@ github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= +github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= +github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= diff --git a/internal/state/state.go b/internal/state/state.go new file mode 100644 index 0000000..96f1b0b --- /dev/null +++ b/internal/state/state.go @@ -0,0 +1,117 @@ +// Package state implements the on-disk cache for exit-node state. The file +// is treated as a best-effort cache; GCP labels + Tailscale tags are the +// source of truth. A POSIX file lock prevents concurrent rotates. +package state + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/gofrs/flock" + "github.com/iker/exit-node/internal/gcp" +) + +// ErrLocked is returned when another process holds the lock. +var ErrLocked = errors.New("state file already locked by another process") + +// Store is a flock-protected JSON cache for the current active exit node. +type Store struct { + path string + lock *flock.Flock +} + +// Open creates the parent directory if needed and acquires a non-blocking +// exclusive lock on the state file. The lock is released by Close. +func Open(path string) (*Store, error) { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return nil, fmt.Errorf("mkdir state parent: %w", err) + } + l := flock.New(path + ".lock") + got, err := l.TryLock() + if err != nil { + return nil, fmt.Errorf("acquire state lock: %w", err) + } + if !got { + return nil, ErrLocked + } + return &Store{path: path, lock: l}, nil +} + +// Close releases the file lock. Idempotent. +func (s *Store) Close() error { + if s.lock == nil { + return nil + } + err := s.lock.Unlock() + s.lock = nil + return err +} + +type onDisk struct { + Active *gcp.ExitNode `json:"active,omitempty"` +} + +func (s *Store) read() (*onDisk, error) { + b, err := os.ReadFile(s.path) + if errors.Is(err, os.ErrNotExist) { + return &onDisk{}, nil + } + if err != nil { + return nil, fmt.Errorf("read state: %w", err) + } + if len(b) == 0 { + return &onDisk{}, nil + } + var d onDisk + if err := json.Unmarshal(b, &d); err != nil { + return nil, fmt.Errorf("parse state: %w", err) + } + return &d, nil +} + +func (s *Store) write(d *onDisk) error { + b, err := json.MarshalIndent(d, "", " ") + if err != nil { + return fmt.Errorf("marshal state: %w", err) + } + tmp := s.path + ".tmp" + if err := os.WriteFile(tmp, b, 0o600); err != nil { + return fmt.Errorf("write tmp state: %w", err) + } + if err := os.Rename(tmp, s.path); err != nil { + return fmt.Errorf("rename state: %w", err) + } + return nil +} + +// GetActive returns the cached active exit node, or nil if none recorded. +func (s *Store) GetActive() (*gcp.ExitNode, error) { + d, err := s.read() + if err != nil { + return nil, err + } + return d.Active, nil +} + +// SetActive records the given node as the active one. +func (s *Store) SetActive(n *gcp.ExitNode) error { + d, err := s.read() + if err != nil { + return err + } + d.Active = n + return s.write(d) +} + +// ClearActive removes the active-node record. +func (s *Store) ClearActive() error { + d, err := s.read() + if err != nil { + return err + } + d.Active = nil + return s.write(d) +} diff --git a/internal/state/state_test.go b/internal/state/state_test.go new file mode 100644 index 0000000..93a5322 --- /dev/null +++ b/internal/state/state_test.go @@ -0,0 +1,105 @@ +package state + +import ( + "path/filepath" + "testing" + "time" + + "github.com/iker/exit-node/internal/gcp" +) + +func TestSaveLoadRoundtrip(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "state.json") + + s, err := Open(path) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer s.Close() + + active := &gcp.ExitNode{ + Name: "vpn-us-west1-a-xyz1", + Region: "us-west1", + Zone: "us-west1-a", + MachineType: "e2-micro", + PublicIP: "35.1.1.1", + TailscaleIP: "100.64.0.1", + DeviceID: "dev1", + State: gcp.StateRunning, + CreatedAt: time.Date(2026, 5, 12, 10, 0, 0, 0, time.UTC), + } + if err := s.SetActive(active); err != nil { + t.Fatalf("SetActive: %v", err) + } + if err := s.Close(); err != nil { + t.Fatalf("close first: %v", err) + } + + s2, err := Open(path) + if err != nil { + t.Fatalf("reopen: %v", err) + } + defer s2.Close() + + got, err := s2.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got == nil { + t.Fatalf("got nil active node, expected one") + } + if got.Name != active.Name { + t.Errorf("Name = %q, want %q", got.Name, active.Name) + } + if got.State != gcp.StateRunning { + t.Errorf("State = %v, want StateRunning", got.State) + } + if !got.CreatedAt.Equal(active.CreatedAt) { + t.Errorf("CreatedAt = %v, want %v", got.CreatedAt, active.CreatedAt) + } +} + +func TestGetActiveOnEmptyState(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "state.json") + + s, err := Open(path) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer s.Close() + + got, err := s.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got != nil { + t.Errorf("expected nil for empty state, got %+v", got) + } +} + +func TestClearActive(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "state.json") + + s, err := Open(path) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer s.Close() + + if err := s.SetActive(&gcp.ExitNode{Name: "x"}); err != nil { + t.Fatalf("SetActive: %v", err) + } + if err := s.ClearActive(); err != nil { + t.Fatalf("ClearActive: %v", err) + } + got, err := s.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got != nil { + t.Errorf("expected nil after Clear, got %+v", got) + } +} From 052dd9822b654587891cda163b360116721e081c Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:22:55 +0000 Subject: [PATCH 07/79] test(state): add flock contention test --- internal/state/state_test.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/internal/state/state_test.go b/internal/state/state_test.go index 93a5322..0ca7b5c 100644 --- a/internal/state/state_test.go +++ b/internal/state/state_test.go @@ -1,6 +1,7 @@ package state import ( + "errors" "path/filepath" "testing" "time" @@ -103,3 +104,31 @@ func TestClearActive(t *testing.T) { t.Errorf("expected nil after Clear, got %+v", got) } } + +func TestFlockContention(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "state.json") + + first, err := Open(path) + if err != nil { + t.Fatalf("first Open: %v", err) + } + defer first.Close() + + _, err = Open(path) + if !errors.Is(err, ErrLocked) { + t.Fatalf("second Open: got err=%v, want ErrLocked", err) + } + + // After releasing the first, a second Open succeeds. + if err := first.Close(); err != nil { + t.Fatalf("close first: %v", err) + } + second, err := Open(path) + if err != nil { + t.Fatalf("Open after release: %v", err) + } + if err := second.Close(); err != nil { + t.Fatalf("close second: %v", err) + } +} From 103b2b1ef04f3402b3c412c4bb18d55cba3f2b1c Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:23:39 +0000 Subject: [PATCH 08/79] feat(core): add Core struct, Deps, New ctor, CriticalError --- internal/core/core.go | 56 +++++++++++++++++++++++++++++++++++++++++ internal/core/errors.go | 21 ++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 internal/core/core.go create mode 100644 internal/core/errors.go diff --git a/internal/core/core.go b/internal/core/core.go new file mode 100644 index 0000000..108b8b2 --- /dev/null +++ b/internal/core/core.go @@ -0,0 +1,56 @@ +// Package core implements the orchestration logic for exitnode: rotate, +// up/down, status, health, sync, and cost. All external IO is abstracted +// through interfaces (gcp.Provider, tailscale.TailscaleClient, +// pfsense.PFSenseClient, verify.Probe) so this package can be exercised +// with hand-written mocks. +package core + +import ( + "log/slog" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/pfsense" + "github.com/iker/exit-node/internal/state" + "github.com/iker/exit-node/internal/tailscale" + "github.com/iker/exit-node/internal/verify" +) + +// Core wires the orchestrator's dependencies. +type Core struct { + cfg *config.Config + provider gcp.Provider + ts tailscale.TailscaleClient + pf pfsense.PFSenseClient + probe verify.Probe + store *state.Store + log *slog.Logger +} + +// Deps groups the constructor inputs. +type Deps struct { + Config *config.Config + Provider gcp.Provider + TS tailscale.TailscaleClient + PF pfsense.PFSenseClient + Probe verify.Probe + Store *state.Store + Logger *slog.Logger +} + +// New constructs a Core. Logger defaults to slog.Default if nil. +func New(d Deps) *Core { + log := d.Logger + if log == nil { + log = slog.Default() + } + return &Core{ + cfg: d.Config, + provider: d.Provider, + ts: d.TS, + pf: d.PF, + probe: d.Probe, + store: d.Store, + log: log, + } +} diff --git a/internal/core/errors.go b/internal/core/errors.go new file mode 100644 index 0000000..f18ec0a --- /dev/null +++ b/internal/core/errors.go @@ -0,0 +1,21 @@ +package core + +import "fmt" + +// CriticalError signals an unrecoverable rotate failure where the pfSense +// revert itself failed. Callers MUST surface this loudly: both the old and +// new nodes are likely alive and pfSense's gateway state is ambiguous. +type CriticalError struct { + Message string + PrimaryErr error + RevertErr error + NewNodeName string + NewDeviceID string +} + +func (e *CriticalError) Error() string { + return fmt.Sprintf("CRITICAL: %s: primary_err=%v; revert_err=%v; new_node=%s; new_device=%s", + e.Message, e.PrimaryErr, e.RevertErr, e.NewNodeName, e.NewDeviceID) +} + +func (e *CriticalError) Unwrap() error { return e.PrimaryErr } From 8c9b8d0713825c1b962824c1c90aadd0b355a782 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:24:31 +0000 Subject: [PATCH 09/79] test(core): add hand-written mocks for Provider, TS, PF, Probe --- internal/core/mocks_test.go | 207 ++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 internal/core/mocks_test.go diff --git a/internal/core/mocks_test.go b/internal/core/mocks_test.go new file mode 100644 index 0000000..4ecdd07 --- /dev/null +++ b/internal/core/mocks_test.go @@ -0,0 +1,207 @@ +package core + +import ( + "context" + "errors" + "sync" + "time" + + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/pfsense" + "github.com/iker/exit-node/internal/tailscale" +) + +// callRecord captures the name and args of a mock invocation. +type callRecord struct { + Name string + Args []any +} + +type recorder struct { + mu sync.Mutex + calls []callRecord +} + +func (r *recorder) record(name string, args ...any) { + r.mu.Lock() + defer r.mu.Unlock() + r.calls = append(r.calls, callRecord{Name: name, Args: args}) +} + +func (r *recorder) names() []string { + r.mu.Lock() + defer r.mu.Unlock() + out := make([]string, len(r.calls)) + for i, c := range r.calls { + out[i] = c.Name + } + return out +} + +// --- Provider mock ---------------------------------------------------------- + +type mockProvider struct { + recorder + ProvisionResult *gcp.ExitNode + ProvisionErr error + StartErr error + StopErr error + DestroyErr map[string]error // keyed by name + ListResult []*gcp.ExitNode + ListErr error + GetResult map[string]*gcp.ExitNode + GetErr error +} + +func newMockProvider() *mockProvider { + return &mockProvider{ + DestroyErr: map[string]error{}, + GetResult: map[string]*gcp.ExitNode{}, + } +} + +func (m *mockProvider) Provision(ctx context.Context, opts gcp.ProvisionOpts) (*gcp.ExitNode, error) { + m.record("Provision", opts.Name, opts.Region, opts.MachineType) + if m.ProvisionErr != nil { + return nil, m.ProvisionErr + } + return m.ProvisionResult, nil +} +func (m *mockProvider) Start(ctx context.Context, name string) error { + m.record("Start", name) + return m.StartErr +} +func (m *mockProvider) Stop(ctx context.Context, name string) error { + m.record("Stop", name) + return m.StopErr +} +func (m *mockProvider) Destroy(ctx context.Context, name string) error { + m.record("Destroy", name) + return m.DestroyErr[name] +} +func (m *mockProvider) List(ctx context.Context) ([]*gcp.ExitNode, error) { + m.record("List") + return m.ListResult, m.ListErr +} +func (m *mockProvider) Get(ctx context.Context, name string) (*gcp.ExitNode, error) { + m.record("Get", name) + if m.GetErr != nil { + return nil, m.GetErr + } + return m.GetResult[name], nil +} + +// --- TailscaleClient mock --------------------------------------------------- + +type mockTS struct { + recorder + MintKey string + MintErr error + WaitDevice *tailscale.Device + WaitErr error + AuthorizeErr error + SetTagsErr error + DeleteDeviceErr map[string]error // keyed by device id + BlockMintUntilSeen bool // if true, returns context.Canceled if ctx cancels +} + +func newMockTS() *mockTS { + return &mockTS{ + DeleteDeviceErr: map[string]error{}, + MintKey: "tskey-ephemeral", + } +} + +func (m *mockTS) MintEphemeralAuthKey(ctx context.Context, tags []string) (string, error) { + m.record("MintEphemeralAuthKey", tags) + if m.MintErr != nil { + return "", m.MintErr + } + return m.MintKey, nil +} +func (m *mockTS) WaitForDevice(ctx context.Context, hostname string, timeout time.Duration) (*tailscale.Device, error) { + m.record("WaitForDevice", hostname, timeout) + if m.WaitErr != nil { + return nil, m.WaitErr + } + return m.WaitDevice, nil +} +func (m *mockTS) AuthorizeExitNode(ctx context.Context, deviceID string) error { + m.record("AuthorizeExitNode", deviceID) + return m.AuthorizeErr +} +func (m *mockTS) SetTags(ctx context.Context, deviceID string, tags []string) error { + m.record("SetTags", deviceID, tags) + return m.SetTagsErr +} +func (m *mockTS) DeleteDevice(ctx context.Context, deviceID string) error { + m.record("DeleteDevice", deviceID) + return m.DeleteDeviceErr[deviceID] +} + +// --- PFSenseClient mock ----------------------------------------------------- + +type mockPF struct { + recorder + Gateways map[string]string // name → IP + GetErr error + UpdateErr map[string]error // keyed by IP being set; matched on the new IP arg + UpdateErrFirstCall error // if non-nil, fails on first call only (for revert tests) + ApplyErr []error // pop per call (1st call uses [0], 2nd [1]) + applyIdx int +} + +func newMockPF() *mockPF { + return &mockPF{ + Gateways: map[string]string{}, + UpdateErr: map[string]error{}, + } +} + +func (m *mockPF) GetGateway(ctx context.Context, name string) (*pfsense.Gateway, error) { + m.record("GetGateway", name) + if m.GetErr != nil { + return nil, m.GetErr + } + ip, ok := m.Gateways[name] + if !ok { + return nil, errors.New("gateway not found") + } + return &pfsense.Gateway{Name: name, IP: ip}, nil +} +func (m *mockPF) UpdateGatewayIP(ctx context.Context, name, ip string) error { + m.record("UpdateGatewayIP", name, ip) + if err, ok := m.UpdateErr[ip]; ok { + return err + } + m.Gateways[name] = ip + return nil +} +func (m *mockPF) Apply(ctx context.Context) error { + m.record("Apply") + i := m.applyIdx + m.applyIdx++ + if i < len(m.ApplyErr) { + return m.ApplyErr[i] + } + return nil +} + +// --- Probe mock ------------------------------------------------------------- + +type mockProbe struct { + recorder + EgressViaResult string + EgressViaErr error + EgressDirectResult string + EgressDirectErr error +} + +func (m *mockProbe) EgressVia(ctx context.Context, tailscaleIP string) (string, error) { + m.record("EgressVia", tailscaleIP) + return m.EgressViaResult, m.EgressViaErr +} +func (m *mockProbe) EgressDirect(ctx context.Context) (string, error) { + m.record("EgressDirect") + return m.EgressDirectResult, m.EgressDirectErr +} From 56318b7a9a9b4e7072f44e07d40ad0ef70cac320 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:28:02 +0000 Subject: [PATCH 10/79] feat(core): rotate happy path with mocked deps (no pfsense branch) --- internal/core/mocks_test.go | 36 +++++- internal/core/rotate.go | 150 +++++++++++++++++++++++++ internal/core/rotate_test.go | 211 +++++++++++++++++++++++++++++++++++ 3 files changed, 395 insertions(+), 2 deletions(-) create mode 100644 internal/core/rotate.go create mode 100644 internal/core/rotate_test.go diff --git a/internal/core/mocks_test.go b/internal/core/mocks_test.go index 4ecdd07..10f990f 100644 --- a/internal/core/mocks_test.go +++ b/internal/core/mocks_test.go @@ -11,21 +11,43 @@ import ( "github.com/iker/exit-node/internal/tailscale" ) -// callRecord captures the name and args of a mock invocation. +// clock issues monotonically-increasing sequence numbers shared across all +// mocks in a fixture, so the test can reconstruct the chronological order +// of cross-mock calls (e.g. ts.Mint → prov.Provision → ts.WaitForDevice). +type clock struct { + mu sync.Mutex + seq int +} + +func (c *clock) tick() int { + c.mu.Lock() + defer c.mu.Unlock() + c.seq++ + return c.seq +} + +// callRecord captures the name, args, and chronological sequence of a mock +// invocation. type callRecord struct { + Seq int Name string Args []any } type recorder struct { mu sync.Mutex + clock *clock // shared across mocks in the same fixture calls []callRecord } func (r *recorder) record(name string, args ...any) { + seq := 0 + if r.clock != nil { + seq = r.clock.tick() + } r.mu.Lock() defer r.mu.Unlock() - r.calls = append(r.calls, callRecord{Name: name, Args: args}) + r.calls = append(r.calls, callRecord{Seq: seq, Name: name, Args: args}) } func (r *recorder) names() []string { @@ -60,6 +82,10 @@ func newMockProvider() *mockProvider { } } +// attachClock wires a shared sequence clock into the mock's recorder so its +// calls can be merged chronologically with other mocks in the fixture. +func (m *mockProvider) attachClock(c *clock) { m.recorder.clock = c } + func (m *mockProvider) Provision(ctx context.Context, opts gcp.ProvisionOpts) (*gcp.ExitNode, error) { m.record("Provision", opts.Name, opts.Region, opts.MachineType) if m.ProvisionErr != nil { @@ -112,6 +138,8 @@ func newMockTS() *mockTS { } } +func (m *mockTS) attachClock(c *clock) { m.recorder.clock = c } + func (m *mockTS) MintEphemeralAuthKey(ctx context.Context, tags []string) (string, error) { m.record("MintEphemeralAuthKey", tags) if m.MintErr != nil { @@ -158,6 +186,8 @@ func newMockPF() *mockPF { } } +func (m *mockPF) attachClock(c *clock) { m.recorder.clock = c } + func (m *mockPF) GetGateway(ctx context.Context, name string) (*pfsense.Gateway, error) { m.record("GetGateway", name) if m.GetErr != nil { @@ -205,3 +235,5 @@ func (m *mockProbe) EgressDirect(ctx context.Context) (string, error) { m.record("EgressDirect") return m.EgressDirectResult, m.EgressDirectErr } + +func (m *mockProbe) attachClock(c *clock) { m.recorder.clock = c } diff --git a/internal/core/rotate.go b/internal/core/rotate.go new file mode 100644 index 0000000..9c8c577 --- /dev/null +++ b/internal/core/rotate.go @@ -0,0 +1,150 @@ +package core + +import ( + "context" + "crypto/rand" + "encoding/hex" + "fmt" + "time" + + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/tailscale" +) + +// RotateOpts controls a single rotate invocation. +type RotateOpts struct { + Region string // empty → cfg.GCP.DefaultRegion + MachineType string // empty → cfg.GCP.DefaultMachineType +} + +// RotateResult captures both the destroyed and provisioned nodes. +type RotateResult struct { + Old *gcp.ExitNode + New *gcp.ExitNode +} + +// Rotate provisions a new exit node, verifies it, optionally cuts pfSense +// over, then tears down the old one. See spec §6 for the rollback rules. +func (c *Core) Rotate(ctx context.Context, opts RotateOpts) (*RotateResult, error) { + region := opts.Region + if region == "" { + region = c.cfg.GCP.DefaultRegion + } + machine := opts.MachineType + if machine == "" { + machine = c.cfg.GCP.DefaultMachineType + } + + // 1. Snapshot current state. + old, err := c.store.GetActive() + if err != nil { + return nil, fmt.Errorf("read state: %w", err) + } + + // 2. Mint ephemeral auth key. + authKey, err := c.ts.MintEphemeralAuthKey(ctx, c.cfg.Tailscale.Tags) + if err != nil { + return nil, fmt.Errorf("mint ephemeral auth key: %w", err) + } + + // 3. Provision new VM. + name := genName(region) + newNode, err := c.provider.Provision(ctx, gcp.ProvisionOpts{ + Name: name, + Region: region, + Zone: c.cfg.GCP.DefaultZone, + MachineType: machine, + Hostname: name, + TailscaleAuthKey: authKey, + Tags: c.cfg.Tailscale.Tags, + InstallScriptURL: c.cfg.Behavior.InstallScriptURL, + DiskSizeGB: c.cfg.GCP.DiskSizeGB, + Network: c.cfg.GCP.Network, + }) + if err != nil { + return nil, fmt.Errorf("provision new node: %w", err) + } + + // From here on, newNode exists in GCP. Track cleanup. + var newDevice *tailscale.Device + cleanup := func(reason error) { + c.log.Warn("rotate failed, tearing down new node", + "reason", reason, "name", newNode.Name) + cctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if newDevice != nil { + if dErr := c.ts.DeleteDevice(cctx, newDevice.ID); dErr != nil { + c.log.Error("cleanup DeleteDevice failed", "err", dErr) + } + } + if dErr := c.provider.Destroy(cctx, newNode.Name); dErr != nil { + c.log.Error("cleanup Destroy failed", "err", dErr) + } + } + + // 4. Wait for Tailscale registration. + newDevice, err = c.ts.WaitForDevice(ctx, newNode.Name, c.cfg.Behavior.RegistrationTimeout) + if err != nil { + cleanup(fmt.Errorf("device never registered: %w", err)) + return nil, fmt.Errorf("wait for device: %w", err) + } + + // 5. Authorize as exit node. + if err := c.ts.AuthorizeExitNode(ctx, newDevice.ID); err != nil { + cleanup(fmt.Errorf("authorize as exit node: %w", err)) + return nil, fmt.Errorf("authorize exit node: %w", err) + } + + // 6. Set tags. + if err := c.ts.SetTags(ctx, newDevice.ID, c.cfg.Tailscale.Tags); err != nil { + cleanup(fmt.Errorf("set tags: %w", err)) + return nil, fmt.Errorf("set tags: %w", err) + } + + newNode.TailscaleIP = newDevice.TailscaleIP + newNode.DeviceID = newDevice.ID + + // 7. Pre-cutover probe. + if c.cfg.Behavior.VerifyPreCutover { + egress, err := c.probe.EgressVia(ctx, newDevice.TailscaleIP) + if err != nil { + cleanup(fmt.Errorf("probe failed: %w", err)) + return nil, fmt.Errorf("pre-cutover probe: %w", err) + } + if egress != newNode.PublicIP { + cleanup(fmt.Errorf("egress mismatch: probe=%s vm=%s", egress, newNode.PublicIP)) + return nil, fmt.Errorf("egress IP mismatch: got %s, want %s", egress, newNode.PublicIP) + } + } + + // (pfSense branch added in Task 10) + + // Tear down old node (best-effort). + if old != nil { + if err := c.provider.Destroy(ctx, old.Name); err != nil { + c.log.Error("teardown old VM failed (best-effort)", + "err", err, "name", old.Name) + } + if old.DeviceID != "" { + if err := c.ts.DeleteDevice(ctx, old.DeviceID); err != nil { + c.log.Error("teardown old device failed (best-effort)", + "err", err, "device_id", old.DeviceID) + } + } + } + + // Update state cache. + if err := c.store.SetActive(newNode); err != nil { + c.log.Warn("failed to update state cache; will reconcile next run", + "err", err) + } + + return &RotateResult{Old: old, New: newNode}, nil +} + +// genName returns a name in the convention vpn--<6-hex>. +func genName(region string) string { + var b [3]byte + _, _ = rand.Read(b[:]) + return fmt.Sprintf("vpn-%s-%s", region, hex.EncodeToString(b[:])) +} diff --git a/internal/core/rotate_test.go b/internal/core/rotate_test.go new file mode 100644 index 0000000..34620d0 --- /dev/null +++ b/internal/core/rotate_test.go @@ -0,0 +1,211 @@ +package core + +import ( + "context" + "errors" + "log/slog" + "reflect" + "sort" + "testing" + "time" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" + "github.com/iker/exit-node/internal/tailscale" +) + +// rotateFixture wires a Core with all mocks pre-populated for a happy +// rotate (no pfSense sync by default; tests override per case). +type rotateFixture struct { + cfg *config.Config + prov *mockProvider + ts *mockTS + pf *mockPF + probe *mockProbe + store *state.Store + core *Core + clock *clock + + oldNode *gcp.ExitNode + newNode *gcp.ExitNode + newDev *tailscale.Device +} + +// orderedNames returns the names of every recorded call across all mocks +// in chronological order (using the shared sequence clock). +func (f *rotateFixture) orderedNames() []string { + all := append([]callRecord{}, f.ts.calls...) + all = append(all, f.prov.calls...) + all = append(all, f.pf.calls...) + all = append(all, f.probe.calls...) + sort.Slice(all, func(i, j int) bool { return all[i].Seq < all[j].Seq }) + out := make([]string, len(all)) + for i, c := range all { + out[i] = c.Name + } + return out +} + +func newRotateFixture(t *testing.T) *rotateFixture { + t.Helper() + statePath := t.TempDir() + "/state.json" + store, err := state.Open(statePath) + if err != nil { + t.Fatalf("state.Open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + + old := &gcp.ExitNode{ + Name: "vpn-old", Region: "us-west1", Zone: "us-west1-a", + MachineType: "e2-micro", PublicIP: "35.0.0.1", TailscaleIP: "100.64.0.1", + DeviceID: "dev-old", State: gcp.StateRunning, + CreatedAt: time.Date(2026, 5, 1, 0, 0, 0, 0, time.UTC), + } + if err := store.SetActive(old); err != nil { + t.Fatalf("seed: %v", err) + } + + newN := &gcp.ExitNode{ + Name: "vpn-new", Region: "asia-southeast1", Zone: "asia-southeast1-a", + MachineType: "e2-micro", PublicIP: "35.9.9.9", + State: gcp.StateRunning, CreatedAt: time.Now(), + } + dev := &tailscale.Device{ID: "dev-new", Hostname: "vpn-new", TailscaleIP: "100.64.0.9", Online: true} + + cfg := &config.Config{ + GCP: config.GCPConfig{ + Project: "p", DefaultRegion: "us-west1", DefaultMachineType: "e2-micro", + Network: "default", DiskSizeGB: 10, + }, + Tailscale: config.TailscaleConfig{Tailnet: "x.com", Tags: []string{"tag:exit-node"}, EphemeralKeyTTL: 5 * time.Minute}, + PFSense: config.PFSenseConfig{GatewayName: "GW"}, + Behavior: config.BehaviorConfig{ + AutoSyncPFSense: false, // happy path without pfSense + VerifyPreCutover: true, + VerifyPostCutover: false, + ProbeURL: "https://example.com/ip", + RegistrationTimeout: 90 * time.Second, + InstallScriptURL: "https://example.com/install.sh", + }, + } + + clk := &clock{} + + prov := newMockProvider() + prov.ProvisionResult = newN + prov.GetResult[old.Name] = old + prov.attachClock(clk) + + ts := newMockTS() + ts.WaitDevice = dev + ts.attachClock(clk) + + pf := newMockPF() + pf.Gateways["GW"] = "100.64.0.1" + pf.attachClock(clk) + + probe := &mockProbe{EgressViaResult: newN.PublicIP} + probe.attachClock(clk) + + c := New(Deps{ + Config: cfg, Provider: prov, TS: ts, PF: pf, Probe: probe, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) + + return &rotateFixture{ + cfg: cfg, prov: prov, ts: ts, pf: pf, probe: probe, + store: store, core: c, clock: clk, + oldNode: old, newNode: newN, newDev: dev, + } +} + +// testWriter forwards slog output to t.Logf so tests stay readable. +type testWriter struct{ t *testing.T } + +func (w testWriter) Write(p []byte) (int, error) { w.t.Logf("%s", p); return len(p), nil } + +func TestRotateHappyPath_NoPFSense(t *testing.T) { + f := newRotateFixture(t) + res, err := f.core.Rotate(context.Background(), RotateOpts{Region: "asia-southeast1"}) + if err != nil { + t.Fatalf("Rotate err = %v", err) + } + if res.New == nil || res.New.Name != f.newNode.Name { + t.Errorf("res.New.Name = %v, want %s", res.New, f.newNode.Name) + } + if res.Old == nil || res.Old.Name != f.oldNode.Name { + t.Errorf("res.Old.Name = %v, want %s", res.Old, f.oldNode.Name) + } + + wantCalls := []string{ + "MintEphemeralAuthKey", + "Provision", + "WaitForDevice", + "AuthorizeExitNode", + "SetTags", + "EgressVia", + } + if !reflect.DeepEqual(f.ts.names()[:1], wantCalls[:1]) { + t.Errorf("first call: %v", f.ts.names()) + } + // Chronological sequence across all mocks (uses shared clock): + combined := f.orderedNames() + if !containsAll(combined, wantCalls...) { + t.Errorf("missing expected calls; got combined=%v", combined) + } + + // pfSense should NOT have been touched. + if len(f.pf.names()) != 0 { + t.Errorf("expected no pfSense calls, got %v", f.pf.names()) + } + + // State cache updated to new node. + got, err := f.store.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got == nil || got.Name != f.newNode.Name { + t.Errorf("state.Active = %v, want %s", got, f.newNode.Name) + } + + // Old node should be destroyed (best-effort, but called). + if !hasCallWithArg(f.prov.calls, "Destroy", f.oldNode.Name) { + t.Errorf("expected Destroy(%s) call, got %v", f.oldNode.Name, f.prov.calls) + } + if !hasCallWithArg(f.ts.calls, "DeleteDevice", f.oldNode.DeviceID) { + t.Errorf("expected DeleteDevice(%s) call, got %v", f.oldNode.DeviceID, f.ts.calls) + } + + // New node should NOT have been destroyed. + if hasCallWithArg(f.prov.calls, "Destroy", f.newNode.Name) { + t.Errorf("new node was destroyed on happy path; calls=%v", f.prov.calls) + } +} + +func containsAll(haystack []string, needles ...string) bool { + idx := 0 + for _, h := range haystack { + if idx < len(needles) && h == needles[idx] { + idx++ + } + } + return idx == len(needles) +} + +func hasCallWithArg(calls []callRecord, name string, arg any) bool { + for _, c := range calls { + if c.Name != name { + continue + } + for _, a := range c.Args { + if reflect.DeepEqual(a, arg) { + return true + } + } + } + return false +} + +// Sentinel to prevent "imported and not used" if errors becomes unused later. +var _ = errors.New From d21a15f41f888d8f48180fb86336201ba0cb5f65 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:28:47 +0000 Subject: [PATCH 11/79] feat(core): rotate pfSense cutover branch with revert discipline --- internal/core/rotate.go | 55 +++++++++++++++++++++++++++++++++++- internal/core/rotate_test.go | 21 ++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/internal/core/rotate.go b/internal/core/rotate.go index 9c8c577..7bc8d62 100644 --- a/internal/core/rotate.go +++ b/internal/core/rotate.go @@ -8,6 +8,7 @@ import ( "time" "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/pfsense" "github.com/iker/exit-node/internal/tailscale" ) @@ -117,7 +118,59 @@ func (c *Core) Rotate(ctx context.Context, opts RotateOpts) (*RotateResult, erro } } - // (pfSense branch added in Task 10) + // Snapshot the current pfSense gateway for revert. + var oldGw *pfsense.Gateway + if c.cfg.Behavior.AutoSyncPFSense { + oldGw, err = c.pf.GetGateway(ctx, c.cfg.PFSense.GatewayName) + if err != nil { + cleanup(fmt.Errorf("snapshot pfSense gateway: %w", err)) + return nil, fmt.Errorf("snapshot pfsense gateway: %w", err) + } + + // 8. POINT OF NO RETURN: update + apply. + if err := c.pf.UpdateGatewayIP(ctx, c.cfg.PFSense.GatewayName, newDevice.TailscaleIP); err != nil { + // UpdateGatewayIP failed before any Apply — live config unchanged. + // Defensive: still try a revert call in case the impl staged partial config. + rctx, rcancel := context.WithTimeout(context.Background(), 30*time.Second) + defer rcancel() + _ = c.pf.UpdateGatewayIP(rctx, c.cfg.PFSense.GatewayName, oldGw.IP) + cleanup(fmt.Errorf("pfsense update gateway failed: %w", err)) + return nil, fmt.Errorf("update pfsense gateway: %w", err) + } + if err := c.pf.Apply(ctx); err != nil { + // Apply failed: a stale stage might still exist. Revert. + rctx, rcancel := context.WithTimeout(context.Background(), 30*time.Second) + defer rcancel() + revertErr := c.pf.UpdateGatewayIP(rctx, c.cfg.PFSense.GatewayName, oldGw.IP) + if revertErr == nil { + revertErr = c.pf.Apply(rctx) + } + if revertErr != nil { + return nil, &CriticalError{ + Message: "pfsense apply failed AND revert failed", + PrimaryErr: err, + RevertErr: revertErr, + NewNodeName: newNode.Name, + NewDeviceID: newDevice.ID, + } + } + cleanup(fmt.Errorf("pfsense apply failed (reverted): %w", err)) + return nil, fmt.Errorf("apply pfsense changes: %w", err) + } + + // 9. (Optional) post-cutover probe. + if c.cfg.Behavior.VerifyPostCutover { + egress, perr := c.probe.EgressDirect(ctx) + if perr != nil || egress != newNode.PublicIP { + rctx, rcancel := context.WithTimeout(context.Background(), 30*time.Second) + defer rcancel() + _ = c.pf.UpdateGatewayIP(rctx, c.cfg.PFSense.GatewayName, oldGw.IP) + _ = c.pf.Apply(rctx) + cleanup(fmt.Errorf("post-cutover probe failed: egress=%s err=%v", egress, perr)) + return nil, fmt.Errorf("post-cutover probe: got %s, want %s (err=%v)", egress, newNode.PublicIP, perr) + } + } + } // Tear down old node (best-effort). if old != nil { diff --git a/internal/core/rotate_test.go b/internal/core/rotate_test.go index 34620d0..c3adea7 100644 --- a/internal/core/rotate_test.go +++ b/internal/core/rotate_test.go @@ -209,3 +209,24 @@ func hasCallWithArg(calls []callRecord, name string, arg any) bool { // Sentinel to prevent "imported and not used" if errors becomes unused later. var _ = errors.New + +func TestRotateHappyPath_WithPFSense(t *testing.T) { + f := newRotateFixture(t) + f.cfg.Behavior.AutoSyncPFSense = true + + res, err := f.core.Rotate(context.Background(), RotateOpts{Region: "asia-southeast1"}) + if err != nil { + t.Fatalf("Rotate err = %v", err) + } + if res.New == nil || res.New.Name != f.newNode.Name { + t.Errorf("res.New = %v", res.New) + } + + wantPFCalls := []string{"GetGateway", "UpdateGatewayIP", "Apply"} + if !reflect.DeepEqual(f.pf.names(), wantPFCalls) { + t.Errorf("pfSense calls = %v, want %v", f.pf.names(), wantPFCalls) + } + if got := f.pf.Gateways["GW"]; got != f.newDev.TailscaleIP { + t.Errorf("gateway IP = %s, want %s", got, f.newDev.TailscaleIP) + } +} From 20e980469879fb71ca4c5ba1aadd4cce2b5a5454 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:30:12 +0000 Subject: [PATCH 12/79] test(core): cover all pre-pfsense rotate failure cases --- internal/core/rotate_test.go | 98 ++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/internal/core/rotate_test.go b/internal/core/rotate_test.go index c3adea7..f74a0a2 100644 --- a/internal/core/rotate_test.go +++ b/internal/core/rotate_test.go @@ -210,6 +210,104 @@ func hasCallWithArg(calls []callRecord, name string, arg any) bool { // Sentinel to prevent "imported and not used" if errors becomes unused later. var _ = errors.New +func TestRotate_PrePFSenseFailureCases(t *testing.T) { + tests := []struct { + name string + mutate func(*rotateFixture) + wantErrSubstr string + wantDestroyNew bool // true if we expect Destroy(newNode.Name) + wantDelDevice bool // true if we expect DeleteDevice(newDev.ID) + }{ + { + name: "mint auth key fails", + mutate: func(f *rotateFixture) { f.ts.MintErr = errors.New("boom mint") }, + wantErrSubstr: "mint ephemeral auth key", + wantDestroyNew: false, // no VM created yet + wantDelDevice: false, + }, + { + name: "provision fails", + mutate: func(f *rotateFixture) { f.prov.ProvisionErr = errors.New("boom provision") }, + wantErrSubstr: "provision new node", + wantDestroyNew: false, + wantDelDevice: false, + }, + { + name: "register timeout", + mutate: func(f *rotateFixture) { f.ts.WaitErr = errors.New("timeout") }, + wantErrSubstr: "wait for device", + wantDestroyNew: true, + wantDelDevice: false, // device never registered + }, + { + name: "authorize fails", + mutate: func(f *rotateFixture) { f.ts.AuthorizeErr = errors.New("nope") }, + wantErrSubstr: "authorize exit node", + wantDestroyNew: true, + wantDelDevice: true, + }, + { + name: "set tags fails", + mutate: func(f *rotateFixture) { f.ts.SetTagsErr = errors.New("tag fail") }, + wantErrSubstr: "set tags", + wantDestroyNew: true, + wantDelDevice: true, + }, + { + name: "probe error", + mutate: func(f *rotateFixture) { f.probe.EgressViaErr = errors.New("probe fail") }, + wantErrSubstr: "pre-cutover probe", + wantDestroyNew: true, + wantDelDevice: true, + }, + { + name: "probe egress IP mismatch", + mutate: func(f *rotateFixture) { f.probe.EgressViaResult = "1.2.3.4" /* != newNode.PublicIP */ }, + wantErrSubstr: "egress IP mismatch", + wantDestroyNew: true, + wantDelDevice: true, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + f := newRotateFixture(t) + f.cfg.Behavior.AutoSyncPFSense = false + tc.mutate(f) + + _, err := f.core.Rotate(context.Background(), RotateOpts{Region: "asia-southeast1"}) + if err == nil { + t.Fatalf("expected error, got nil") + } + if !contains(err.Error(), tc.wantErrSubstr) { + t.Errorf("err = %q, want substring %q", err.Error(), tc.wantErrSubstr) + } + + gotDestroyNew := hasCallWithArg(f.prov.calls, "Destroy", f.newNode.Name) + gotDelDev := hasCallWithArg(f.ts.calls, "DeleteDevice", f.newDev.ID) + if gotDestroyNew != tc.wantDestroyNew { + t.Errorf("Destroy(new) = %v, want %v (calls=%v)", gotDestroyNew, tc.wantDestroyNew, f.prov.calls) + } + if gotDelDev != tc.wantDelDevice { + t.Errorf("DeleteDevice(new) = %v, want %v (calls=%v)", gotDelDev, tc.wantDelDevice, f.ts.calls) + } + + // Old node MUST NOT be destroyed in any pre-pfSense failure. + if hasCallWithArg(f.prov.calls, "Destroy", f.oldNode.Name) { + t.Errorf("old node was destroyed on failure; calls=%v", f.prov.calls) + } + }) + } +} + +func contains(s, substr string) bool { + for i := 0; i+len(substr) <= len(s); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + func TestRotateHappyPath_WithPFSense(t *testing.T) { f := newRotateFixture(t) f.cfg.Behavior.AutoSyncPFSense = true From 38bb0e55401c126d3027b171f2a73bb5c338db33 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:31:45 +0000 Subject: [PATCH 13/79] test(core): cover pfsense update/apply failure with successful revert --- internal/core/rotate_test.go | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/internal/core/rotate_test.go b/internal/core/rotate_test.go index f74a0a2..8b22a29 100644 --- a/internal/core/rotate_test.go +++ b/internal/core/rotate_test.go @@ -308,6 +308,76 @@ func contains(s, substr string) bool { return false } +func TestRotate_PFSenseFailures_RevertSucceeds(t *testing.T) { + tests := []struct { + name string + mutate func(*rotateFixture) + wantErrSubstr string + // Whether we expect the gateway to be back at the old IP after revert. + wantGatewayRestored bool + }{ + { + name: "UpdateGatewayIP fails (no Apply yet)", + mutate: func(f *rotateFixture) { + // Force UpdateGatewayIP(_, newTSIP) to fail. + f.pf.UpdateErr[f.newDev.TailscaleIP] = errors.New("update boom") + }, + wantErrSubstr: "update pfsense gateway", + wantGatewayRestored: true, + }, + { + name: "Apply fails, revert succeeds", + mutate: func(f *rotateFixture) { + // First Apply errors; the revert's Apply succeeds. + f.pf.ApplyErr = []error{errors.New("apply boom"), nil} + }, + wantErrSubstr: "apply pfsense changes", + wantGatewayRestored: true, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + f := newRotateFixture(t) + f.cfg.Behavior.AutoSyncPFSense = true + tc.mutate(f) + + _, err := f.core.Rotate(context.Background(), RotateOpts{Region: "asia-southeast1"}) + if err == nil { + t.Fatalf("expected error, got nil") + } + if !contains(err.Error(), tc.wantErrSubstr) { + t.Errorf("err = %q, want %q", err.Error(), tc.wantErrSubstr) + } + + // New node MUST be destroyed (cleanup ran). + if !hasCallWithArg(f.prov.calls, "Destroy", f.newNode.Name) { + t.Errorf("expected Destroy(new); calls=%v", f.prov.calls) + } + if !hasCallWithArg(f.ts.calls, "DeleteDevice", f.newDev.ID) { + t.Errorf("expected DeleteDevice(new); calls=%v", f.ts.calls) + } + + // Gateway should be back at old IP. + if tc.wantGatewayRestored { + if got := f.pf.Gateways["GW"]; got != "100.64.0.1" { + t.Errorf("gateway IP = %s, want 100.64.0.1 (old)", got) + } + } + + // Old node MUST NOT be destroyed. + if hasCallWithArg(f.prov.calls, "Destroy", f.oldNode.Name) { + t.Errorf("old node was destroyed; calls=%v", f.prov.calls) + } + + // Error must NOT be a *CriticalError (revert succeeded). + var crit *CriticalError + if errors.As(err, &crit) { + t.Errorf("got CriticalError, want plain error: %v", crit) + } + }) + } +} + func TestRotateHappyPath_WithPFSense(t *testing.T) { f := newRotateFixture(t) f.cfg.Behavior.AutoSyncPFSense = true From b3053f8ceadabeb45a155cfdc82bb6f0a0111268 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:32:36 +0000 Subject: [PATCH 14/79] =?UTF-8?q?test(core):=20CRITICAL=20case=20=E2=80=94?= =?UTF-8?q?=20apply=20+=20revert=20both=20fail=20leaves=20both=20nodes=20a?= =?UTF-8?q?live?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/core/rotate_test.go | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/internal/core/rotate_test.go b/internal/core/rotate_test.go index 8b22a29..697925d 100644 --- a/internal/core/rotate_test.go +++ b/internal/core/rotate_test.go @@ -378,6 +378,47 @@ func TestRotate_PFSenseFailures_RevertSucceeds(t *testing.T) { } } +func TestRotate_CriticalCase_ApplyAndRevertBothFail(t *testing.T) { + f := newRotateFixture(t) + f.cfg.Behavior.AutoSyncPFSense = true + + // First Apply errors. The revert's UpdateGatewayIP errors too. + f.pf.ApplyErr = []error{errors.New("apply boom"), errors.New("apply revert boom")} + f.pf.UpdateErr["100.64.0.1"] = errors.New("revert update boom") // the revert IP + + _, err := f.core.Rotate(context.Background(), RotateOpts{Region: "asia-southeast1"}) + if err == nil { + t.Fatalf("expected CriticalError, got nil") + } + var crit *CriticalError + if !errors.As(err, &crit) { + t.Fatalf("expected *CriticalError, got %T: %v", err, err) + } + if crit.NewNodeName != f.newNode.Name { + t.Errorf("NewNodeName = %q, want %q", crit.NewNodeName, f.newNode.Name) + } + if crit.NewDeviceID != f.newDev.ID { + t.Errorf("NewDeviceID = %q, want %q", crit.NewDeviceID, f.newDev.ID) + } + if crit.PrimaryErr == nil || crit.RevertErr == nil { + t.Errorf("PrimaryErr / RevertErr must both be non-nil; got primary=%v revert=%v", + crit.PrimaryErr, crit.RevertErr) + } + + // HARD INVARIANT: new node NOT destroyed. + if hasCallWithArg(f.prov.calls, "Destroy", f.newNode.Name) { + t.Errorf("CRITICAL: new node was destroyed during critical-case rotate; calls=%v", f.prov.calls) + } + if hasCallWithArg(f.ts.calls, "DeleteDevice", f.newDev.ID) { + t.Errorf("CRITICAL: new device was deleted during critical-case rotate; calls=%v", f.ts.calls) + } + + // Old node also NOT destroyed. + if hasCallWithArg(f.prov.calls, "Destroy", f.oldNode.Name) { + t.Errorf("old node was destroyed during critical-case rotate; calls=%v", f.prov.calls) + } +} + func TestRotateHappyPath_WithPFSense(t *testing.T) { f := newRotateFixture(t) f.cfg.Behavior.AutoSyncPFSense = true From 0b3f389b9387c4faaef888e22fb6e75cbce0c1ff Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:33:09 +0000 Subject: [PATCH 15/79] test(core): post-cutover probe failure reverts and destroys new --- internal/core/rotate_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/internal/core/rotate_test.go b/internal/core/rotate_test.go index 697925d..7233f41 100644 --- a/internal/core/rotate_test.go +++ b/internal/core/rotate_test.go @@ -419,6 +419,37 @@ func TestRotate_CriticalCase_ApplyAndRevertBothFail(t *testing.T) { } } +func TestRotate_PostCutoverProbeFails_RevertsAndDestroys(t *testing.T) { + f := newRotateFixture(t) + f.cfg.Behavior.AutoSyncPFSense = true + f.cfg.Behavior.VerifyPostCutover = true + + // Pre-cutover probe succeeds (returns matching IP). + // Post-cutover probe (EgressDirect) returns wrong IP. + f.probe.EgressDirectResult = "1.1.1.1" + + _, err := f.core.Rotate(context.Background(), RotateOpts{Region: "asia-southeast1"}) + if err == nil { + t.Fatalf("expected error") + } + if !contains(err.Error(), "post-cutover probe") { + t.Errorf("err = %q, want post-cutover probe", err.Error()) + } + + // Gateway must be reverted. + if got := f.pf.Gateways["GW"]; got != "100.64.0.1" { + t.Errorf("gateway = %s, want 100.64.0.1", got) + } + // New node destroyed. + if !hasCallWithArg(f.prov.calls, "Destroy", f.newNode.Name) { + t.Errorf("expected Destroy(new); calls=%v", f.prov.calls) + } + // Old node NOT destroyed. + if hasCallWithArg(f.prov.calls, "Destroy", f.oldNode.Name) { + t.Errorf("old node was destroyed; calls=%v", f.prov.calls) + } +} + func TestRotateHappyPath_WithPFSense(t *testing.T) { f := newRotateFixture(t) f.cfg.Behavior.AutoSyncPFSense = true From 3469766a59ef7af67ae9379581faca9b472cbef1 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:34:27 +0000 Subject: [PATCH 16/79] test(core): old-node teardown failures do not abort rotate --- internal/core/rotate_test.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/internal/core/rotate_test.go b/internal/core/rotate_test.go index 7233f41..59c9906 100644 --- a/internal/core/rotate_test.go +++ b/internal/core/rotate_test.go @@ -450,6 +450,29 @@ func TestRotate_PostCutoverProbeFails_RevertsAndDestroys(t *testing.T) { } } +func TestRotate_OldNodeTeardownFailuresAreNonFatal(t *testing.T) { + f := newRotateFixture(t) + f.cfg.Behavior.AutoSyncPFSense = true + f.prov.DestroyErr[f.oldNode.Name] = errors.New("destroy old boom") + f.ts.DeleteDeviceErr[f.oldNode.DeviceID] = errors.New("delete old device boom") + + res, err := f.core.Rotate(context.Background(), RotateOpts{Region: "asia-southeast1"}) + if err != nil { + t.Fatalf("expected success despite old-teardown errors, got %v", err) + } + if res.New == nil || res.New.Name != f.newNode.Name { + t.Errorf("res.New = %v", res.New) + } + // State was updated to new node despite teardown failures. + got, err := f.store.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got == nil || got.Name != f.newNode.Name { + t.Errorf("state.Active = %v, want %s", got, f.newNode.Name) + } +} + func TestRotateHappyPath_WithPFSense(t *testing.T) { f := newRotateFixture(t) f.cfg.Behavior.AutoSyncPFSense = true From 026a4eaa46d8a94df9da86b26735d78677aafbd7 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:35:28 +0000 Subject: [PATCH 17/79] feat(core): add idempotent Up command --- internal/core/up.go | 91 +++++++++++++++++++++++++++++++++++++++ internal/core/up_test.go | 93 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 internal/core/up.go create mode 100644 internal/core/up_test.go diff --git a/internal/core/up.go b/internal/core/up.go new file mode 100644 index 0000000..53273ed --- /dev/null +++ b/internal/core/up.go @@ -0,0 +1,91 @@ +package core + +import ( + "context" + "fmt" + + "github.com/iker/exit-node/internal/gcp" +) + +// UpOpts controls a single Up invocation. +type UpOpts struct { + Region string // empty → cfg.GCP.DefaultRegion + MachineType string // empty → cfg.GCP.DefaultMachineType +} + +// Up provisions an exit node if none is recorded as active. If one already +// exists, returns it unchanged (idempotent). +func (c *Core) Up(ctx context.Context, opts UpOpts) (*gcp.ExitNode, error) { + if existing, err := c.store.GetActive(); err != nil { + return nil, fmt.Errorf("read state: %w", err) + } else if existing != nil { + return existing, nil + } + + region := opts.Region + if region == "" { + region = c.cfg.GCP.DefaultRegion + } + machine := opts.MachineType + if machine == "" { + machine = c.cfg.GCP.DefaultMachineType + } + + authKey, err := c.ts.MintEphemeralAuthKey(ctx, c.cfg.Tailscale.Tags) + if err != nil { + return nil, fmt.Errorf("mint ephemeral auth key: %w", err) + } + name := genName(region) + node, err := c.provider.Provision(ctx, gcp.ProvisionOpts{ + Name: name, + Region: region, + Zone: c.cfg.GCP.DefaultZone, + MachineType: machine, + Hostname: name, + TailscaleAuthKey: authKey, + Tags: c.cfg.Tailscale.Tags, + InstallScriptURL: c.cfg.Behavior.InstallScriptURL, + DiskSizeGB: c.cfg.GCP.DiskSizeGB, + Network: c.cfg.GCP.Network, + }) + if err != nil { + return nil, fmt.Errorf("provision: %w", err) + } + dev, err := c.ts.WaitForDevice(ctx, node.Name, c.cfg.Behavior.RegistrationTimeout) + if err != nil { + // Best-effort destroy on registration failure to avoid orphans. + _ = c.provider.Destroy(context.Background(), node.Name) + return nil, fmt.Errorf("wait for device: %w", err) + } + if err := c.ts.AuthorizeExitNode(ctx, dev.ID); err != nil { + _ = c.ts.DeleteDevice(context.Background(), dev.ID) + _ = c.provider.Destroy(context.Background(), node.Name) + return nil, fmt.Errorf("authorize: %w", err) + } + if err := c.ts.SetTags(ctx, dev.ID, c.cfg.Tailscale.Tags); err != nil { + _ = c.ts.DeleteDevice(context.Background(), dev.ID) + _ = c.provider.Destroy(context.Background(), node.Name) + return nil, fmt.Errorf("set tags: %w", err) + } + node.TailscaleIP = dev.TailscaleIP + node.DeviceID = dev.ID + + if c.cfg.Behavior.VerifyPreCutover { + egress, err := c.probe.EgressVia(ctx, dev.TailscaleIP) + if err != nil { + _ = c.ts.DeleteDevice(context.Background(), dev.ID) + _ = c.provider.Destroy(context.Background(), node.Name) + return nil, fmt.Errorf("probe: %w", err) + } + if egress != node.PublicIP { + _ = c.ts.DeleteDevice(context.Background(), dev.ID) + _ = c.provider.Destroy(context.Background(), node.Name) + return nil, fmt.Errorf("egress IP mismatch: got %s, want %s", egress, node.PublicIP) + } + } + + if err := c.store.SetActive(node); err != nil { + c.log.Warn("failed to update state cache", "err", err) + } + return node, nil +} diff --git a/internal/core/up_test.go b/internal/core/up_test.go new file mode 100644 index 0000000..1e4e336 --- /dev/null +++ b/internal/core/up_test.go @@ -0,0 +1,93 @@ +package core + +import ( + "context" + "errors" + "log/slog" + "testing" + "time" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" + "github.com/iker/exit-node/internal/tailscale" +) + +func upFixture(t *testing.T) *rotateFixture { + t.Helper() + statePath := t.TempDir() + "/state.json" + store, err := state.Open(statePath) + if err != nil { + t.Fatalf("state.Open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + + newN := &gcp.ExitNode{ + Name: "vpn-up-1", Region: "us-west1", Zone: "us-west1-a", + MachineType: "e2-micro", PublicIP: "35.5.5.5", + State: gcp.StateRunning, CreatedAt: time.Now(), + } + dev := &tailscale.Device{ID: "dev-up", Hostname: "vpn-up-1", TailscaleIP: "100.64.0.55"} + + cfg := &config.Config{ + GCP: config.GCPConfig{ + Project: "p", DefaultRegion: "us-west1", DefaultMachineType: "e2-micro", + Network: "default", DiskSizeGB: 10, + }, + Tailscale: config.TailscaleConfig{Tags: []string{"tag:exit-node"}, EphemeralKeyTTL: 5 * time.Minute}, + Behavior: config.BehaviorConfig{ + VerifyPreCutover: true, + ProbeURL: "https://example.com/ip", + RegistrationTimeout: 90 * time.Second, + InstallScriptURL: "https://example.com/install.sh", + }, + } + prov := newMockProvider() + prov.ProvisionResult = newN + ts := newMockTS() + ts.WaitDevice = dev + probe := &mockProbe{EgressViaResult: newN.PublicIP} + pf := newMockPF() + c := New(Deps{ + Config: cfg, Provider: prov, TS: ts, PF: pf, Probe: probe, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) + return &rotateFixture{ + cfg: cfg, prov: prov, ts: ts, pf: pf, probe: probe, store: store, core: c, + newNode: newN, newDev: dev, + } +} + +func TestUp_CreatesWhenNoActive(t *testing.T) { + f := upFixture(t) + got, err := f.core.Up(context.Background(), UpOpts{}) + if err != nil { + t.Fatalf("Up: %v", err) + } + if got == nil || got.Name != f.newNode.Name { + t.Errorf("got %v, want %s", got, f.newNode.Name) + } + if !hasCallWithArg(f.prov.calls, "Provision", "") && len(f.prov.names()) == 0 { + t.Errorf("Provision was not called; calls=%v", f.prov.calls) + } +} + +func TestUp_NoopWhenActiveExists(t *testing.T) { + f := upFixture(t) + existing := &gcp.ExitNode{Name: "already-up", State: gcp.StateRunning} + if err := f.store.SetActive(existing); err != nil { + t.Fatalf("seed: %v", err) + } + got, err := f.core.Up(context.Background(), UpOpts{}) + if err != nil { + t.Fatalf("Up: %v", err) + } + if got.Name != "already-up" { + t.Errorf("got %v, want already-up", got) + } + if len(f.prov.names()) != 0 { + t.Errorf("expected no provider calls; got %v", f.prov.names()) + } +} + +var _ = errors.New From 5e272afd622a3e823f53ac9e6ad165650bd65ef6 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:36:18 +0000 Subject: [PATCH 18/79] feat(core): add Down command (stop / destroy) --- internal/core/down.go | 44 ++++++++++++++++++++ internal/core/down_test.go | 83 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 internal/core/down.go create mode 100644 internal/core/down_test.go diff --git a/internal/core/down.go b/internal/core/down.go new file mode 100644 index 0000000..3c8464c --- /dev/null +++ b/internal/core/down.go @@ -0,0 +1,44 @@ +package core + +import ( + "context" + "errors" + "fmt" +) + +// ErrNoActiveNode signals that a command needing an active node found none. +var ErrNoActiveNode = errors.New("no active exit node") + +// DownOpts controls Down behavior. +type DownOpts struct { + Destroy bool // if true, remove the VM + Tailscale device; else just Stop +} + +// Down stops or destroys the currently active exit node. +func (c *Core) Down(ctx context.Context, opts DownOpts) error { + active, err := c.store.GetActive() + if err != nil { + return fmt.Errorf("read state: %w", err) + } + if active == nil { + return ErrNoActiveNode + } + if !opts.Destroy { + if err := c.provider.Stop(ctx, active.Name); err != nil { + return fmt.Errorf("stop %s: %w", active.Name, err) + } + return nil + } + if err := c.provider.Destroy(ctx, active.Name); err != nil { + return fmt.Errorf("destroy %s: %w", active.Name, err) + } + if active.DeviceID != "" { + if err := c.ts.DeleteDevice(ctx, active.DeviceID); err != nil { + c.log.Warn("delete device failed (best-effort)", "err", err, "device_id", active.DeviceID) + } + } + if err := c.store.ClearActive(); err != nil { + c.log.Warn("clear state failed", "err", err) + } + return nil +} diff --git a/internal/core/down_test.go b/internal/core/down_test.go new file mode 100644 index 0000000..836170b --- /dev/null +++ b/internal/core/down_test.go @@ -0,0 +1,83 @@ +package core + +import ( + "context" + "errors" + "log/slog" + "testing" + "time" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" +) + +func downFixture(t *testing.T, active *gcp.ExitNode) *rotateFixture { + t.Helper() + statePath := t.TempDir() + "/state.json" + store, err := state.Open(statePath) + if err != nil { + t.Fatalf("state.Open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + if active != nil { + if err := store.SetActive(active); err != nil { + t.Fatalf("seed: %v", err) + } + } + c := New(Deps{ + Config: &config.Config{}, + Provider: newMockProvider(), TS: newMockTS(), + PF: newMockPF(), Probe: &mockProbe{}, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) + return &rotateFixture{store: store, core: c, + prov: c.provider.(*mockProvider), ts: c.ts.(*mockTS), + pf: c.pf.(*mockPF), probe: c.probe.(*mockProbe)} +} + +func TestDown_StopOnly(t *testing.T) { + active := &gcp.ExitNode{Name: "vpn-1", DeviceID: "dev-1", State: gcp.StateRunning, CreatedAt: time.Now()} + f := downFixture(t, active) + if err := f.core.Down(context.Background(), DownOpts{Destroy: false}); err != nil { + t.Fatalf("Down: %v", err) + } + if !hasCallWithArg(f.prov.calls, "Stop", "vpn-1") { + t.Errorf("expected Stop(vpn-1); calls=%v", f.prov.calls) + } + if hasCallWithArg(f.prov.calls, "Destroy", "vpn-1") { + t.Errorf("Destroy should not be called when Destroy=false") + } + // State cache retained. + got, _ := f.store.GetActive() + if got == nil || got.Name != "vpn-1" { + t.Errorf("state cleared on Stop; got %v", got) + } +} + +func TestDown_Destroy(t *testing.T) { + active := &gcp.ExitNode{Name: "vpn-1", DeviceID: "dev-1", State: gcp.StateRunning, CreatedAt: time.Now()} + f := downFixture(t, active) + if err := f.core.Down(context.Background(), DownOpts{Destroy: true}); err != nil { + t.Fatalf("Down: %v", err) + } + if !hasCallWithArg(f.prov.calls, "Destroy", "vpn-1") { + t.Errorf("expected Destroy(vpn-1); calls=%v", f.prov.calls) + } + if !hasCallWithArg(f.ts.calls, "DeleteDevice", "dev-1") { + t.Errorf("expected DeleteDevice(dev-1); calls=%v", f.ts.calls) + } + // State cache cleared. + got, _ := f.store.GetActive() + if got != nil { + t.Errorf("state not cleared; got %v", got) + } +} + +func TestDown_NoActive(t *testing.T) { + f := downFixture(t, nil) + err := f.core.Down(context.Background(), DownOpts{}) + if !errors.Is(err, ErrNoActiveNode) { + t.Errorf("err = %v, want ErrNoActiveNode", err) + } +} From 1fab9f20de806558e903ab9fe9a7e7bbb6df2e68 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:37:30 +0000 Subject: [PATCH 19/79] feat(core): add List and Status with drift detection --- internal/core/list.go | 17 +++++++++++ internal/core/list_test.go | 37 ++++++++++++++++++++++ internal/core/status.go | 39 ++++++++++++++++++++++++ internal/core/status_test.go | 59 ++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+) create mode 100644 internal/core/list.go create mode 100644 internal/core/list_test.go create mode 100644 internal/core/status.go create mode 100644 internal/core/status_test.go diff --git a/internal/core/list.go b/internal/core/list.go new file mode 100644 index 0000000..2b3626b --- /dev/null +++ b/internal/core/list.go @@ -0,0 +1,17 @@ +package core + +import ( + "context" + "fmt" + + "github.com/iker/exit-node/internal/gcp" +) + +// List returns all managed exit nodes. +func (c *Core) List(ctx context.Context) ([]*gcp.ExitNode, error) { + nodes, err := c.provider.List(ctx) + if err != nil { + return nil, fmt.Errorf("list: %w", err) + } + return nodes, nil +} diff --git a/internal/core/list_test.go b/internal/core/list_test.go new file mode 100644 index 0000000..15590dc --- /dev/null +++ b/internal/core/list_test.go @@ -0,0 +1,37 @@ +package core + +import ( + "context" + "log/slog" + "testing" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" +) + +func TestList(t *testing.T) { + statePath := t.TempDir() + "/state.json" + store, err := state.Open(statePath) + if err != nil { + t.Fatalf("state.Open: %v", err) + } + defer store.Close() + prov := newMockProvider() + prov.ListResult = []*gcp.ExitNode{ + {Name: "a", State: gcp.StateRunning}, + {Name: "b", State: gcp.StateStopped}, + } + c := New(Deps{ + Config: &config.Config{}, Provider: prov, TS: newMockTS(), + PF: newMockPF(), Probe: &mockProbe{}, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) + got, err := c.List(context.Background()) + if err != nil { + t.Fatalf("List: %v", err) + } + if len(got) != 2 || got[0].Name != "a" || got[1].Name != "b" { + t.Errorf("got %v", got) + } +} diff --git a/internal/core/status.go b/internal/core/status.go new file mode 100644 index 0000000..e8d6eef --- /dev/null +++ b/internal/core/status.go @@ -0,0 +1,39 @@ +package core + +import ( + "context" + "fmt" + + "github.com/iker/exit-node/internal/gcp" +) + +// StatusResult is the result of a Status call. +type StatusResult struct { + Cached *gcp.ExitNode // from state.json; nil if no active + Live *gcp.ExitNode // freshly fetched from the provider; nil if Cached is nil +} + +// Drift reports whether the live state differs from the cached state. +func (s *StatusResult) Drift() bool { + if s.Cached == nil || s.Live == nil { + return s.Cached != s.Live // both nil = no drift + } + return s.Cached.State != s.Live.State || s.Cached.PublicIP != s.Live.PublicIP +} + +// Status fetches the cached active node and the live provider record. +func (c *Core) Status(ctx context.Context) (*StatusResult, error) { + cached, err := c.store.GetActive() + if err != nil { + return nil, fmt.Errorf("read state: %w", err) + } + out := &StatusResult{Cached: cached} + if cached != nil { + live, err := c.provider.Get(ctx, cached.Name) + if err != nil { + return nil, fmt.Errorf("get live: %w", err) + } + out.Live = live + } + return out, nil +} diff --git a/internal/core/status_test.go b/internal/core/status_test.go new file mode 100644 index 0000000..9babba6 --- /dev/null +++ b/internal/core/status_test.go @@ -0,0 +1,59 @@ +package core + +import ( + "context" + "log/slog" + "testing" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" +) + +func TestStatus_ActiveExists(t *testing.T) { + statePath := t.TempDir() + "/state.json" + store, _ := state.Open(statePath) + defer store.Close() + active := &gcp.ExitNode{Name: "v1", State: gcp.StateStopped} + _ = store.SetActive(active) + + prov := newMockProvider() + // Provider now reports Running — drift from state cache. + prov.GetResult["v1"] = &gcp.ExitNode{Name: "v1", State: gcp.StateRunning} + c := New(Deps{ + Config: &config.Config{}, Provider: prov, TS: newMockTS(), + PF: newMockPF(), Probe: &mockProbe{}, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) + got, err := c.Status(context.Background()) + if err != nil { + t.Fatalf("Status: %v", err) + } + if got.Cached == nil || got.Cached.Name != "v1" { + t.Errorf("Cached = %v", got.Cached) + } + if got.Live == nil || got.Live.State != gcp.StateRunning { + t.Errorf("Live = %v", got.Live) + } + if !got.Drift() { + t.Errorf("expected Drift=true") + } +} + +func TestStatus_NoActive(t *testing.T) { + statePath := t.TempDir() + "/state.json" + store, _ := state.Open(statePath) + defer store.Close() + c := New(Deps{ + Config: &config.Config{}, Provider: newMockProvider(), TS: newMockTS(), + PF: newMockPF(), Probe: &mockProbe{}, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) + got, err := c.Status(context.Background()) + if err != nil { + t.Fatalf("Status: %v", err) + } + if got.Cached != nil || got.Live != nil { + t.Errorf("expected nil/nil, got %+v", got) + } +} From 4e7ff67ee72e9f78e4633e979eaf0b0021e95b55 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:38:34 +0000 Subject: [PATCH 20/79] feat(core): add Health probe-only command --- internal/core/health.go | 33 ++++++++++++++++ internal/core/health_test.go | 73 ++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 internal/core/health.go create mode 100644 internal/core/health_test.go diff --git a/internal/core/health.go b/internal/core/health.go new file mode 100644 index 0000000..38c7e02 --- /dev/null +++ b/internal/core/health.go @@ -0,0 +1,33 @@ +package core + +import ( + "context" + "fmt" +) + +// HealthResult is the result of a Health probe. +type HealthResult struct { + OK bool + EgressIP string // observed + ExpectedIP string // GCP public IP of the active node + ProbeErr error // non-nil if probe itself errored +} + +// Health runs the pre-cutover probe against the currently active node +// without mutating anything else. +func (c *Core) Health(ctx context.Context) (*HealthResult, error) { + active, err := c.store.GetActive() + if err != nil { + return nil, fmt.Errorf("read state: %w", err) + } + if active == nil { + return nil, ErrNoActiveNode + } + egress, perr := c.probe.EgressVia(ctx, active.TailscaleIP) + return &HealthResult{ + OK: perr == nil && egress == active.PublicIP, + EgressIP: egress, + ExpectedIP: active.PublicIP, + ProbeErr: perr, + }, nil +} diff --git a/internal/core/health_test.go b/internal/core/health_test.go new file mode 100644 index 0000000..4b415bf --- /dev/null +++ b/internal/core/health_test.go @@ -0,0 +1,73 @@ +package core + +import ( + "context" + "errors" + "log/slog" + "testing" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" +) + +func healthFixture(t *testing.T, active *gcp.ExitNode, probeIP string, probeErr error) *Core { + t.Helper() + statePath := t.TempDir() + "/state.json" + store, err := state.Open(statePath) + if err != nil { + t.Fatalf("state.Open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + if active != nil { + _ = store.SetActive(active) + } + prov := newMockProvider() + if active != nil { + prov.GetResult[active.Name] = active + } + probe := &mockProbe{EgressViaResult: probeIP, EgressViaErr: probeErr} + return New(Deps{ + Config: &config.Config{Behavior: config.BehaviorConfig{ProbeURL: "https://x/ip"}}, + Provider: prov, TS: newMockTS(), PF: newMockPF(), Probe: probe, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) +} + +func TestHealth_OK(t *testing.T) { + active := &gcp.ExitNode{Name: "v1", PublicIP: "1.2.3.4", TailscaleIP: "100.64.0.1"} + c := healthFixture(t, active, "1.2.3.4", nil) + got, err := c.Health(context.Background()) + if err != nil { + t.Fatalf("Health: %v", err) + } + if !got.OK { + t.Errorf("OK = false, want true; got=%+v", got) + } + if got.EgressIP != "1.2.3.4" { + t.Errorf("EgressIP = %q", got.EgressIP) + } +} + +func TestHealth_Mismatch(t *testing.T) { + active := &gcp.ExitNode{Name: "v1", PublicIP: "1.2.3.4", TailscaleIP: "100.64.0.1"} + c := healthFixture(t, active, "5.6.7.8", nil) + got, err := c.Health(context.Background()) + if err != nil { + t.Fatalf("Health: %v", err) + } + if got.OK { + t.Errorf("OK = true, want false") + } + if got.EgressIP != "5.6.7.8" || got.ExpectedIP != "1.2.3.4" { + t.Errorf("got %+v", got) + } +} + +func TestHealth_NoActive(t *testing.T) { + c := healthFixture(t, nil, "", nil) + _, err := c.Health(context.Background()) + if !errors.Is(err, ErrNoActiveNode) { + t.Errorf("err = %v, want ErrNoActiveNode", err) + } +} From bf6b0ce82e38e04b68608135e4c34353f6f828e6 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:40:00 +0000 Subject: [PATCH 21/79] feat(core): add idempotent SyncPFSense --- internal/core/sync.go | 33 ++++++++++++++++++ internal/core/sync_test.go | 69 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 internal/core/sync.go create mode 100644 internal/core/sync_test.go diff --git a/internal/core/sync.go b/internal/core/sync.go new file mode 100644 index 0000000..5081095 --- /dev/null +++ b/internal/core/sync.go @@ -0,0 +1,33 @@ +package core + +import ( + "context" + "fmt" +) + +// SyncPFSense pushes the active node's Tailscale IP to the configured +// pfSense gateway. Idempotent: if the gateway already matches, no write +// happens. +func (c *Core) SyncPFSense(ctx context.Context) error { + active, err := c.store.GetActive() + if err != nil { + return fmt.Errorf("read state: %w", err) + } + if active == nil { + return ErrNoActiveNode + } + gw, err := c.pf.GetGateway(ctx, c.cfg.PFSense.GatewayName) + if err != nil { + return fmt.Errorf("get gateway: %w", err) + } + if gw.IP == active.TailscaleIP { + return nil + } + if err := c.pf.UpdateGatewayIP(ctx, c.cfg.PFSense.GatewayName, active.TailscaleIP); err != nil { + return fmt.Errorf("update gateway: %w", err) + } + if err := c.pf.Apply(ctx); err != nil { + return fmt.Errorf("apply: %w", err) + } + return nil +} diff --git a/internal/core/sync_test.go b/internal/core/sync_test.go new file mode 100644 index 0000000..297aa14 --- /dev/null +++ b/internal/core/sync_test.go @@ -0,0 +1,69 @@ +package core + +import ( + "context" + "errors" + "log/slog" + "reflect" + "testing" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" +) + +func syncFixture(t *testing.T, active *gcp.ExitNode, gwIP string) *rotateFixture { + t.Helper() + statePath := t.TempDir() + "/state.json" + store, err := state.Open(statePath) + if err != nil { + t.Fatalf("state.Open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + if active != nil { + _ = store.SetActive(active) + } + pf := newMockPF() + pf.Gateways["GW"] = gwIP + c := New(Deps{ + Config: &config.Config{PFSense: config.PFSenseConfig{GatewayName: "GW"}}, + Provider: newMockProvider(), TS: newMockTS(), + PF: pf, Probe: &mockProbe{}, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) + return &rotateFixture{store: store, core: c, pf: pf} +} + +func TestSync_NoopWhenGatewayMatches(t *testing.T) { + active := &gcp.ExitNode{Name: "v1", TailscaleIP: "100.64.0.99"} + f := syncFixture(t, active, "100.64.0.99") + if err := f.core.SyncPFSense(context.Background()); err != nil { + t.Fatalf("SyncPFSense: %v", err) + } + want := []string{"GetGateway"} + if !reflect.DeepEqual(f.pf.names(), want) { + t.Errorf("calls = %v, want %v", f.pf.names(), want) + } +} + +func TestSync_PushesWhenGatewayDiffers(t *testing.T) { + active := &gcp.ExitNode{Name: "v1", TailscaleIP: "100.64.0.99"} + f := syncFixture(t, active, "100.64.0.1") + if err := f.core.SyncPFSense(context.Background()); err != nil { + t.Fatalf("SyncPFSense: %v", err) + } + want := []string{"GetGateway", "UpdateGatewayIP", "Apply"} + if !reflect.DeepEqual(f.pf.names(), want) { + t.Errorf("calls = %v, want %v", f.pf.names(), want) + } + if got := f.pf.Gateways["GW"]; got != "100.64.0.99" { + t.Errorf("gateway = %s, want 100.64.0.99", got) + } +} + +func TestSync_NoActive(t *testing.T) { + f := syncFixture(t, nil, "100.64.0.1") + if err := f.core.SyncPFSense(context.Background()); !errors.Is(err, ErrNoActiveNode) { + t.Errorf("err = %v, want ErrNoActiveNode", err) + } +} From 9003b751189334c056a93e4f60fdb88ebeb0cf3c Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:40:40 +0000 Subject: [PATCH 22/79] feat(core): add EstimateCost with static price table --- internal/core/cost.go | 62 +++++++++++++++++++++++++++++++ internal/core/cost_test.go | 76 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 internal/core/cost.go create mode 100644 internal/core/cost_test.go diff --git a/internal/core/cost.go b/internal/core/cost.go new file mode 100644 index 0000000..23b8714 --- /dev/null +++ b/internal/core/cost.go @@ -0,0 +1,62 @@ +package core + +import ( + "context" + "fmt" + "time" +) + +// CostOpts controls EstimateCost. +type CostOpts struct { + // Period is the look-back window. If zero, defaults to time since CreatedAt. + Period time.Duration +} + +// CostResult is the estimate output. +type CostResult struct { + MachineType string + Hours float64 + USDPerHour float64 + USD float64 + Note string +} + +// machineHourlyUSD is a static price table for the small set of machine +// types we support. Values from GCP's public on-demand list price (US +// regions) as of v0.1; intentionally not the Billing API. +var machineHourlyUSD = map[string]float64{ + "e2-micro": 0.008, + "e2-small": 0.017, + "e2-medium": 0.034, + "n2-standard-2": 0.097, +} + +// EstimateCost returns a rough cost estimate for the active exit node. +func (c *Core) EstimateCost(ctx context.Context, opts CostOpts) (*CostResult, error) { + active, err := c.store.GetActive() + if err != nil { + return nil, fmt.Errorf("read state: %w", err) + } + if active == nil { + return nil, ErrNoActiveNode + } + rate, ok := machineHourlyUSD[active.MachineType] + if !ok { + return &CostResult{ + MachineType: active.MachineType, + Note: fmt.Sprintf("no static price for %q; configure manually", active.MachineType), + }, nil + } + period := opts.Period + if period == 0 { + period = time.Since(active.CreatedAt) + } + hours := period.Hours() + return &CostResult{ + MachineType: active.MachineType, + Hours: hours, + USDPerHour: rate, + USD: hours * rate, + Note: "list price, US regions, on-demand; ignores egress bandwidth and disk", + }, nil +} diff --git a/internal/core/cost_test.go b/internal/core/cost_test.go new file mode 100644 index 0000000..1a46462 --- /dev/null +++ b/internal/core/cost_test.go @@ -0,0 +1,76 @@ +package core + +import ( + "context" + "log/slog" + "testing" + "time" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" +) + +func costFixture(t *testing.T, active *gcp.ExitNode) *Core { + t.Helper() + statePath := t.TempDir() + "/state.json" + store, err := state.Open(statePath) + if err != nil { + t.Fatalf("state.Open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + if active != nil { + _ = store.SetActive(active) + } + return New(Deps{ + Config: &config.Config{}, Provider: newMockProvider(), TS: newMockTS(), + PF: newMockPF(), Probe: &mockProbe{}, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) +} + +func TestEstimateCost_24h(t *testing.T) { + active := &gcp.ExitNode{ + Name: "v1", + MachineType: "e2-micro", + CreatedAt: time.Now().Add(-24 * time.Hour), + } + c := costFixture(t, active) + got, err := c.EstimateCost(context.Background(), CostOpts{Period: 24 * time.Hour}) + if err != nil { + t.Fatalf("EstimateCost: %v", err) + } + // e2-micro static rate is $0.008/hr (per the table in cost.go). + // 24h × $0.008 = $0.192. + wantMin, wantMax := 0.18, 0.21 + if got.USD < wantMin || got.USD > wantMax { + t.Errorf("USD = %.4f, want between %.2f and %.2f", got.USD, wantMin, wantMax) + } + if got.MachineType != "e2-micro" { + t.Errorf("MachineType = %q", got.MachineType) + } +} + +func TestEstimateCost_UnknownMachine(t *testing.T) { + active := &gcp.ExitNode{Name: "v1", MachineType: "fictional-1", CreatedAt: time.Now().Add(-1 * time.Hour)} + c := costFixture(t, active) + got, err := c.EstimateCost(context.Background(), CostOpts{Period: time.Hour}) + if err != nil { + t.Fatalf("EstimateCost: %v", err) + } + // Unknown machine: USD stays 0, Note explains. + if got.USD != 0 { + t.Errorf("USD = %.4f, want 0 for unknown machine", got.USD) + } + if got.Note == "" { + t.Errorf("Note should explain unknown machine type") + } +} + +func TestEstimateCost_NoActive(t *testing.T) { + c := costFixture(t, nil) + _, err := c.EstimateCost(context.Background(), CostOpts{Period: time.Hour}) + if err == nil { + t.Errorf("expected ErrNoActiveNode-like error") + } +} From 373666c20fed49397cfdf5bca57baab9dbb4a0a3 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 12:42:25 +0000 Subject: [PATCH 23/79] chore: add golangci-lint config --- .golangci.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .golangci.yaml diff --git a/.golangci.yaml b/.golangci.yaml new file mode 100644 index 0000000..2709c6f --- /dev/null +++ b/.golangci.yaml @@ -0,0 +1,20 @@ +run: + timeout: 5m + go: "1.22" + +linters: + disable-all: true + enable: + - govet + - errcheck + - ineffassign + - staticcheck + - unused + - gofmt + - goimports + +issues: + exclude-rules: + - path: _test\.go + linters: + - errcheck From a97fc018947f6ec2ad26f2fd91bb2cfed1dbae17 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 14:41:47 +0000 Subject: [PATCH 24/79] chore: add tailscale v2, gcp compute apiv1 dependencies --- go.mod | 17 +++++++++++++---- go.sum | 28 ++++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 5c53014..36831da 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,18 @@ module github.com/iker/exit-node -go 1.24.0 +go 1.25.0 require ( - github.com/BurntSushi/toml v1.6.0 // indirect - github.com/gofrs/flock v0.13.0 // indirect - golang.org/x/sys v0.37.0 // indirect + github.com/BurntSushi/toml v1.6.0 + github.com/gofrs/flock v0.13.0 +) + +require ( + cloud.google.com/go/compute v1.62.0 // indirect + github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 // indirect + golang.org/x/oauth2 v0.36.0 // indirect + golang.org/x/sys v0.43.0 // indirect + google.golang.org/api v0.278.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect + tailscale.com/client/tailscale/v2 v2.9.0 // indirect ) diff --git a/go.sum b/go.sum index 9b148a3..b532f94 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,30 @@ +cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= +cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= +cloud.google.com/go/compute v1.62.0 h1:tJ7lKJ8YEVa6vZX03Jc8o1YePbjKDOQhDw1BscMZ1bs= +cloud.google.com/go/compute v1.62.0/go.mod h1:Xm6PbsLgBpAg4va77ljbBdpMjzuU+uPp5Ze2dnZq7lw= github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= -golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= -golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 h1:erxeiTyq+nw4Cz5+hLDkOwNF5/9IQWCQPv0gpb3+QHU= +github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5/go.mod h1:DFSS3NAGHthKo1gTlmEcSBiZrRJXi28rLNd/1udP1c8= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= +golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= +golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= +golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +google.golang.org/api v0.278.0 h1:W7jiRvRi53VYFfZ/HoZjQBtJk7gOFbHD8ot1RzVZU6E= +google.golang.org/api v0.278.0/go.mod h1:B9TqLBwJqVjp1mtt7WeoQwWRwvu/400y5lETOql+giQ= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +tailscale.com/client/tailscale/v2 v2.9.0 h1:zBZIIeIYXL42qvvile7d29O2DKSr3AfNc2gzd1JCf2o= +tailscale.com/client/tailscale/v2 v2.9.0/go.mod h1:FGjvGT3ThHelqo0gfdK3IN3k1dwNbRzYbQh2XO3C47U= From 5d866fae8b635d8b86d6e942c1a285731785fe46 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 15:07:49 +0000 Subject: [PATCH 25/79] feat(verify): add commandRunner exec seam with fake runner --- internal/verify/shell.go | 38 ++++++++++++++++++++++++ internal/verify/shell_test.go | 54 +++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 internal/verify/shell.go create mode 100644 internal/verify/shell_test.go diff --git a/internal/verify/shell.go b/internal/verify/shell.go new file mode 100644 index 0000000..0e9e56e --- /dev/null +++ b/internal/verify/shell.go @@ -0,0 +1,38 @@ +// Package verify implements the local-tailnet Probe interface by shelling +// out to `tailscale` and `curl`. The exec layer is abstracted as a +// commandRunner so tests can substitute a fake without touching real +// binaries. +package verify + +import ( + "bytes" + "context" + "errors" + "os/exec" +) + +// commandRunner abstracts process execution so tests can substitute a +// fake without touching real binaries. The production implementation +// (osCmdRunner) shells out via os/exec. +type commandRunner interface { + Run(ctx context.Context, name string, args ...string) (stdout string, err error) +} + +// osCmdRunner is the production runner. Its Run shells out to the given +// binary on PATH; stderr is folded into the returned error on non-zero +// exit so tests of error paths see the underlying message. +type osCmdRunner struct{} + +func (osCmdRunner) Run(ctx context.Context, name string, args ...string) (string, error) { + cmd := exec.CommandContext(ctx, name, args...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + if stderr.Len() > 0 { + return stdout.String(), errors.New(stderr.String()) + } + return stdout.String(), err + } + return stdout.String(), nil +} diff --git a/internal/verify/shell_test.go b/internal/verify/shell_test.go new file mode 100644 index 0000000..2adfb7b --- /dev/null +++ b/internal/verify/shell_test.go @@ -0,0 +1,54 @@ +package verify + +import ( + "context" + "errors" + "strings" + "testing" +) + +// fakeRunner records every command and returns canned outputs / errors +// per call. Tests configure Responses in order; the i-th Run call +// returns Responses[i]. +type fakeRunner struct { + calls []fakeCall + Responses []fakeResponse +} + +type fakeCall struct { + Name string + Args []string +} + +type fakeResponse struct { + Stdout string + Err error +} + +func (f *fakeRunner) Run(ctx context.Context, name string, args ...string) (string, error) { + i := len(f.calls) + f.calls = append(f.calls, fakeCall{Name: name, Args: args}) + if i >= len(f.Responses) { + return "", errors.New("fakeRunner: unexpected extra call") + } + r := f.Responses[i] + return r.Stdout, r.Err +} + +func (f *fakeRunner) lastNCommands(n int) []string { + out := make([]string, 0, n) + start := len(f.calls) - n + if start < 0 { + start = 0 + } + for _, c := range f.calls[start:] { + out = append(out, c.Name+" "+strings.Join(c.Args, " ")) + } + return out +} + +func TestShellRunnerInterface(t *testing.T) { + // Sentinel test — confirms commandRunner is the seam and *fakeRunner + // implements it. Compile-only assertion. + var _ commandRunner = (*fakeRunner)(nil) +} From b4212cd5cd775409daa051c9d3541c3c62d3463b Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 15:17:40 +0000 Subject: [PATCH 26/79] feat(verify): EgressVia happy path with deferred exit-node restore --- internal/verify/shell.go | 82 +++++++++++++++++++++++++++++++++++ internal/verify/shell_test.go | 41 ++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/internal/verify/shell.go b/internal/verify/shell.go index 0e9e56e..779734e 100644 --- a/internal/verify/shell.go +++ b/internal/verify/shell.go @@ -7,8 +7,11 @@ package verify import ( "bytes" "context" + "encoding/json" "errors" + "fmt" "os/exec" + "strings" ) // commandRunner abstracts process execution so tests can substitute a @@ -36,3 +39,82 @@ func (osCmdRunner) Run(ctx context.Context, name string, args ...string) (string } return stdout.String(), nil } + +// New constructs the production Probe. The return type is *shellProbe in +// this task because EgressDirect (required by the Probe interface) is +// added in Task 5; Task 6 will widen this to Probe alongside the +// compile-time interface sentinel. +func New(probeURL string) *shellProbe { + return &shellProbe{run: osCmdRunner{}, probeURL: probeURL} +} + +// shellProbe is the concrete Probe implementation. It is unexported; the +// returned interface is the only public surface. +type shellProbe struct { + run commandRunner + probeURL string +} + +// EgressVia routes the orchestrator host's egress through the given +// Tailscale IP, curls the configured probe URL, restores the prior +// exit-node setting (always, even on probe failure), and returns the +// observed egress IP. +func (p *shellProbe) EgressVia(ctx context.Context, tailscaleIP string) (string, error) { + priorID, err := p.captureExitNodeID(ctx) + if err != nil { + return "", fmt.Errorf("capture prior exit-node: %w", err) + } + // Restore runs in a defer so it executes on ANY return path below. + defer p.restoreExitNode(priorID) + + if _, err := p.run.Run(ctx, "tailscale", "set", + fmt.Sprintf("--exit-node=%s", tailscaleIP), + "--exit-node-allow-lan-access=true", + ); err != nil { + return "", fmt.Errorf("set exit-node: %w", err) + } + if _, err := p.run.Run(ctx, "tailscale", "ping", "--timeout=10s", tailscaleIP); err != nil { + return "", fmt.Errorf("tailscale ping: %w", err) + } + out, err := p.run.Run(ctx, "curl", "--silent", "--max-time", "10", p.probeURL) + if err != nil { + return "", fmt.Errorf("curl probe URL: %w", err) + } + return strings.TrimSpace(out), nil +} + +// captureExitNodeID parses `tailscale status --json` and returns the +// current ExitNodeStatus.ID (empty string if no exit node was set). +func (p *shellProbe) captureExitNodeID(ctx context.Context) (string, error) { + out, err := p.run.Run(ctx, "tailscale", "status", "--json") + if err != nil { + return "", err + } + var parsed struct { + ExitNodeStatus *struct { + ID string `json:"ID"` + } `json:"ExitNodeStatus"` + } + if err := json.Unmarshal([]byte(out), &parsed); err != nil { + return "", fmt.Errorf("parse tailscale status json: %w", err) + } + if parsed.ExitNodeStatus == nil { + return "", nil + } + return parsed.ExitNodeStatus.ID, nil +} + +// restoreExitNode is best-effort; errors are intentionally swallowed +// because we're already in a defer chain and the caller has its own +// error to return. A failed restore is loud at the host level (you'll +// notice your egress is wrong); this is documented in the README's +// troubleshooting section. +func (p *shellProbe) restoreExitNode(priorID string) { + // Use a fresh context: the request ctx may already be canceled. + ctx := context.Background() + arg := "--exit-node=" + if priorID != "" { + arg = "--exit-node=" + priorID + } + _, _ = p.run.Run(ctx, "tailscale", "set", arg) +} diff --git a/internal/verify/shell_test.go b/internal/verify/shell_test.go index 2adfb7b..d1922e8 100644 --- a/internal/verify/shell_test.go +++ b/internal/verify/shell_test.go @@ -52,3 +52,44 @@ func TestShellRunnerInterface(t *testing.T) { // implements it. Compile-only assertion. var _ commandRunner = (*fakeRunner)(nil) } + +func TestEgressVia_HappyPath_RestoresPrior(t *testing.T) { + statusJSON := `{"ExitNodeStatus":{"ID":"prior-node-id"}}` + fake := &fakeRunner{ + Responses: []fakeResponse{ + {Stdout: statusJSON, Err: nil}, // tailscale status --json + {Stdout: "", Err: nil}, // tailscale set --exit-node= + {Stdout: "pong\n", Err: nil}, // tailscale ping + {Stdout: "203.0.113.7\n", Err: nil}, // curl + {Stdout: "", Err: nil}, // tailscale set --exit-node=prior-node-id (defer) + }, + } + p := &shellProbe{run: fake, probeURL: "https://example.com/ip"} + + got, err := p.EgressVia(context.Background(), "100.64.0.9") + if err != nil { + t.Fatalf("EgressVia: %v", err) + } + if got != "203.0.113.7" { + t.Errorf("egress = %q, want 203.0.113.7", got) + } + + if len(fake.calls) != 5 { + t.Fatalf("expected 5 commands, got %d: %v", len(fake.calls), fake.lastNCommands(len(fake.calls))) + } + // The fifth call must be the restore to the prior ID. + got5 := fake.calls[4] + if got5.Name != "tailscale" || !contains(got5.Args, "--exit-node=prior-node-id") { + t.Errorf("expected restore to prior id, got %s %v", got5.Name, got5.Args) + } +} + +// contains reports whether needle is in haystack. +func contains(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} From 9dbbd707fa200e9b634eeb1e5c809a58c2d96d03 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 16:00:09 +0000 Subject: [PATCH 27/79] test(verify): restore runs on every EgressVia failure path --- internal/verify/shell_test.go | 66 +++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/internal/verify/shell_test.go b/internal/verify/shell_test.go index d1922e8..8460ddd 100644 --- a/internal/verify/shell_test.go +++ b/internal/verify/shell_test.go @@ -93,3 +93,69 @@ func contains(haystack []string, needle string) bool { } return false } + +func TestEgressVia_RestoreRunsOnFailure(t *testing.T) { + statusJSON := `{"ExitNodeStatus":{"ID":"prior-node-id"}}` + + cases := []struct { + name string + // Responses: status, set, ping, curl — index of which one to fail. + failAt int + wantErrSubstr string + }{ + {name: "set fails", failAt: 1, wantErrSubstr: "set exit-node"}, + {name: "ping fails", failAt: 2, wantErrSubstr: "tailscale ping"}, + {name: "curl fails", failAt: 3, wantErrSubstr: "curl probe URL"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + responses := []fakeResponse{ + {Stdout: statusJSON, Err: nil}, // status + {Stdout: "", Err: nil}, // set + {Stdout: "pong\n", Err: nil}, // ping + {Stdout: "1.2.3.4\n", Err: nil}, // curl + } + responses[tc.failAt] = fakeResponse{Err: errors.New("boom")} + // Always one extra response for the restore. + responses = append(responses, fakeResponse{}) + + fake := &fakeRunner{Responses: responses} + p := &shellProbe{run: fake, probeURL: "https://x/ip"} + + _, err := p.EgressVia(context.Background(), "100.64.0.9") + if err == nil { + t.Fatalf("expected error, got nil") + } + if !strings.Contains(err.Error(), tc.wantErrSubstr) { + t.Errorf("err = %q, want substring %q", err.Error(), tc.wantErrSubstr) + } + + // The LAST call must be the restore. + last := fake.calls[len(fake.calls)-1] + if last.Name != "tailscale" || !contains(last.Args, "--exit-node=prior-node-id") { + t.Errorf("expected last call to be restore; got %s %v", last.Name, last.Args) + } + }) + } +} + +func TestEgressVia_RestoresEmptyWhenNoPriorExitNode(t *testing.T) { + // ExitNodeStatus absent in JSON → restore arg is bare "--exit-node=" + fake := &fakeRunner{ + Responses: []fakeResponse{ + {Stdout: `{}`, Err: nil}, // status: no ExitNodeStatus + {Stdout: "", Err: nil}, // set + {Stdout: "pong\n", Err: nil}, // ping + {Stdout: "1.2.3.4\n", Err: nil}, // curl + {Stdout: "", Err: nil}, // restore + }, + } + p := &shellProbe{run: fake, probeURL: "https://x/ip"} + if _, err := p.EgressVia(context.Background(), "100.64.0.9"); err != nil { + t.Fatalf("EgressVia: %v", err) + } + last := fake.calls[len(fake.calls)-1] + if last.Name != "tailscale" || !contains(last.Args, "--exit-node=") { + t.Errorf("expected restore with empty exit-node, got %s %v", last.Name, last.Args) + } +} From 88354dd0c6948d6fb57df0e5776de778d96c870a Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 18:09:25 +0000 Subject: [PATCH 28/79] feat(verify): EgressDirect for post-cutover probe --- internal/verify/shell.go | 21 +++++++++++++++++++ internal/verify/shell_test.go | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/internal/verify/shell.go b/internal/verify/shell.go index 779734e..eeac2a5 100644 --- a/internal/verify/shell.go +++ b/internal/verify/shell.go @@ -104,6 +104,27 @@ func (p *shellProbe) captureExitNodeID(ctx context.Context) (string, error) { return parsed.ExitNodeStatus.ID, nil } +// EgressDirect curls the probe URL with no Tailscale exit-node override, +// then restores the prior exit-node setting. Used for the post-cutover +// check that traffic flows via the host's default route (LAN → pfSense → +// new exit node). +func (p *shellProbe) EgressDirect(ctx context.Context) (string, error) { + priorID, err := p.captureExitNodeID(ctx) + if err != nil { + return "", fmt.Errorf("capture prior exit-node: %w", err) + } + defer p.restoreExitNode(priorID) + + if _, err := p.run.Run(ctx, "tailscale", "set", "--exit-node="); err != nil { + return "", fmt.Errorf("clear exit-node: %w", err) + } + out, err := p.run.Run(ctx, "curl", "--silent", "--max-time", "10", p.probeURL) + if err != nil { + return "", fmt.Errorf("curl probe URL: %w", err) + } + return strings.TrimSpace(out), nil +} + // restoreExitNode is best-effort; errors are intentionally swallowed // because we're already in a defer chain and the caller has its own // error to return. A failed restore is loud at the host level (you'll diff --git a/internal/verify/shell_test.go b/internal/verify/shell_test.go index 8460ddd..2697c82 100644 --- a/internal/verify/shell_test.go +++ b/internal/verify/shell_test.go @@ -159,3 +159,42 @@ func TestEgressVia_RestoresEmptyWhenNoPriorExitNode(t *testing.T) { t.Errorf("expected restore with empty exit-node, got %s %v", last.Name, last.Args) } } + +func TestEgressDirect_HappyPath_RestoresPrior(t *testing.T) { + statusJSON := `{"ExitNodeStatus":{"ID":"prior-node-id"}}` + fake := &fakeRunner{ + Responses: []fakeResponse{ + {Stdout: statusJSON}, // status + {Stdout: ""}, // set --exit-node= (clear) + {Stdout: "203.0.113.99\n"}, // curl + {Stdout: ""}, // restore + }, + } + p := &shellProbe{run: fake, probeURL: "https://x/ip"} + got, err := p.EgressDirect(context.Background()) + if err != nil { + t.Fatalf("EgressDirect: %v", err) + } + if got != "203.0.113.99" { + t.Errorf("egress = %q", got) + } + // 2nd call clears, 4th call restores. + if got, want := fake.calls[1].Args, []string{"set", "--exit-node="}; !equalSlices(got, want) { + t.Errorf("clear call = %v, want %v", got, want) + } + if last := fake.calls[3]; !contains(last.Args, "--exit-node=prior-node-id") { + t.Errorf("restore call = %v", last) + } +} + +func equalSlices(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} From 63ccfb36dc794c1c85b27f0abff9b65fd11c1489 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 18:19:48 +0000 Subject: [PATCH 29/79] test(verify): interface conformance sentinel for shellProbe --- internal/verify/shell.go | 7 ++----- internal/verify/shell_test.go | 9 +++++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/internal/verify/shell.go b/internal/verify/shell.go index eeac2a5..1558b54 100644 --- a/internal/verify/shell.go +++ b/internal/verify/shell.go @@ -40,11 +40,8 @@ func (osCmdRunner) Run(ctx context.Context, name string, args ...string) (string return stdout.String(), nil } -// New constructs the production Probe. The return type is *shellProbe in -// this task because EgressDirect (required by the Probe interface) is -// added in Task 5; Task 6 will widen this to Probe alongside the -// compile-time interface sentinel. -func New(probeURL string) *shellProbe { +// New constructs the production Probe (shells out to tailscale + curl). +func New(probeURL string) Probe { return &shellProbe{run: osCmdRunner{}, probeURL: probeURL} } diff --git a/internal/verify/shell_test.go b/internal/verify/shell_test.go index 2697c82..4c7d373 100644 --- a/internal/verify/shell_test.go +++ b/internal/verify/shell_test.go @@ -198,3 +198,12 @@ func equalSlices(a, b []string) bool { } return true } + +func TestNewReturnsProbe(t *testing.T) { + var _ Probe = (*shellProbe)(nil) // compile-time interface check + + p := New("https://example.com/ip") + if p == nil { + t.Fatalf("New returned nil") + } +} From 4ec99f591fa6760d68b1a30d06972c66926820d8 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 18:35:36 +0000 Subject: [PATCH 30/79] feat(pfsense): add REST client constructor + envelope decoder --- internal/pfsense/rest.go | 172 ++++++++++++++++++++++++++++++++++ internal/pfsense/rest_test.go | 77 +++++++++++++++ 2 files changed, 249 insertions(+) create mode 100644 internal/pfsense/rest.go create mode 100644 internal/pfsense/rest_test.go diff --git a/internal/pfsense/rest.go b/internal/pfsense/rest.go new file mode 100644 index 0000000..7a269ef --- /dev/null +++ b/internal/pfsense/rest.go @@ -0,0 +1,172 @@ +// Package pfsense holds the pfSense REST client. The interface is +// declared in client.go (Plan 1); this file is the concrete impl that +// talks to the community pfsense-api plugin (v2 endpoints). +package pfsense + +import ( + "context" + "crypto/tls" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +// Options configures the pfSense client. +type Options struct { + // BaseURL is the pfSense webConfigurator origin (e.g., + // "https://pfsense.lan"). No trailing slash required. + BaseURL string + // APIKey is the raw API key minted by the pfsense-api plugin. + // Sent as the value of the configured AuthHeader (default + // "Authorization", no "Bearer " prefix — that's what the plugin + // expects on v2). + APIKey string + // AuthHeader is the header name carrying the API key. Defaults to + // "Authorization" if empty. Exposed because some plugin builds + // accept "X-API-Key" instead. + AuthHeader string + // VerifyTLS controls server cert verification. Set false for + // pfSense's default self-signed cert; provide a real cert and set + // true for production. + VerifyTLS bool + // CACertPEM optionally pins a CA used to verify the server cert. + // Ignored if VerifyTLS is false. + CACertPEM []byte + // HTTPTimeout caps each request. Defaults to 30s if zero. + HTTPTimeout time.Duration +} + +// APIError is returned when the pfSense API responds with a non-2xx +// envelope code. +type APIError struct { + Code int // envelope.code (e.g., 404) + Status string // envelope.status (e.g., "error") + ResponseID string // envelope.response_id (e.g., "NOT_FOUND") + Message string // envelope.message +} + +func (e *APIError) Error() string { + return fmt.Sprintf("pfsense api: code=%d status=%s response_id=%s: %s", + e.Code, e.Status, e.ResponseID, e.Message) +} + +// pfClient is the concrete PFSenseClient. Unexported; New returns the +// interface. +type pfClient struct { + baseURL string + apiKey string + authHeader string + http *http.Client +} + +// New constructs a PFSenseClient. Returns the interface so the caller +// can't reach into the struct. +func New(opts Options) (PFSenseClient, error) { + if strings.TrimSpace(opts.APIKey) == "" { + return nil, errors.New("pfsense: APIKey required") + } + if strings.TrimSpace(opts.BaseURL) == "" { + return nil, errors.New("pfsense: BaseURL required") + } + if opts.HTTPTimeout == 0 { + opts.HTTPTimeout = 30 * time.Second + } + if opts.AuthHeader == "" { + opts.AuthHeader = "Authorization" + } + + tlsCfg := &tls.Config{InsecureSkipVerify: !opts.VerifyTLS} //nolint:gosec + // CA-cert pinning is a future enhancement; opts.CACertPEM ignored + // for v0.1. + + return &pfClient{ + baseURL: strings.TrimRight(opts.BaseURL, "/"), + apiKey: opts.APIKey, + authHeader: opts.AuthHeader, + http: &http.Client{ + Timeout: opts.HTTPTimeout, + Transport: &http.Transport{TLSClientConfig: tlsCfg}, + }, + }, nil +} + +// envelope is the wire shape of every pfsense-api v2 response. +type envelope struct { + Code int `json:"code"` + Status string `json:"status"` + ResponseID string `json:"response_id"` + Message string `json:"message"` + Data json.RawMessage `json:"data"` +} + +// decodeEnvelope reads a pfsense-api response body, returns *APIError +// for non-2xx envelope codes, and otherwise unmarshals envelope.data +// into `into`. Pass `new(any)` (or any other discardable) when there's +// no useful payload (e.g., POST /apply). +func decodeEnvelope(body []byte, into any) error { + var env envelope + if err := json.Unmarshal(body, &env); err != nil { + return fmt.Errorf("pfsense: parse envelope: %w (body=%q)", err, string(body)) + } + if env.Code < 200 || env.Code >= 300 { + return &APIError{ + Code: env.Code, Status: env.Status, + ResponseID: env.ResponseID, Message: env.Message, + } + } + if len(env.Data) == 0 || string(env.Data) == "null" { + return nil + } + if err := json.Unmarshal(env.Data, into); err != nil { + return fmt.Errorf("pfsense: parse envelope.data: %w", err) + } + return nil +} + +// errNotImplemented is returned by the interface stubs below until +// Tasks 8/9/10 replace them with real implementations. Stubs exist so +// *pfClient satisfies PFSenseClient and New() can return the +// interface as specified. +var errNotImplemented = errors.New("pfsense: not implemented") + +// GetGateway is a stub; Task 8 lands the real impl. +func (c *pfClient) GetGateway(ctx context.Context, name string) (*Gateway, error) { + return nil, errNotImplemented +} + +// UpdateGatewayIP is a stub; Task 9 lands the real impl. +func (c *pfClient) UpdateGatewayIP(ctx context.Context, name, ip string) error { + return errNotImplemented +} + +// Apply is a stub; Task 10 lands the real impl. +func (c *pfClient) Apply(ctx context.Context) error { + return errNotImplemented +} + +// do issues an HTTP request, applies auth + JSON headers, returns the +// raw body and HTTP status. +func (c *pfClient) do(ctx context.Context, method, path string, body io.Reader) ([]byte, int, error) { + req, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, body) + if err != nil { + return nil, 0, err + } + req.Header.Set(c.authHeader, c.apiKey) + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + resp, err := c.http.Do(req) + if err != nil { + return nil, 0, err + } + defer resp.Body.Close() + b, err := io.ReadAll(resp.Body) + if err != nil { + return nil, resp.StatusCode, err + } + return b, resp.StatusCode, nil +} diff --git a/internal/pfsense/rest_test.go b/internal/pfsense/rest_test.go new file mode 100644 index 0000000..adb0340 --- /dev/null +++ b/internal/pfsense/rest_test.go @@ -0,0 +1,77 @@ +package pfsense + +import ( + "context" + "crypto/tls" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +// newTestServer + newTestClient give every test a fresh httptest +// server and a pf REST client wired to it. The handler is what the +// test customizes. +func newTestServer(t *testing.T, handler http.HandlerFunc) (*httptest.Server, *pfClient) { + t.Helper() + srv := httptest.NewTLSServer(handler) + t.Cleanup(srv.Close) + + // httptest.NewTLSServer uses a self-signed cert; the production + // client supports InsecureSkipVerify via Options.VerifyTLS=false. + c, err := New(Options{ + BaseURL: srv.URL, + APIKey: "test-key", + VerifyTLS: false, + }) + if err != nil { + t.Fatalf("New: %v", err) + } + return srv, c.(*pfClient) +} + +func TestDecodeEnvelope_Success(t *testing.T) { + body := `{"code":200,"status":"ok","response_id":"SUCCESS","message":"","data":{"name":"GW","gateway":"100.64.0.1"}}` + var into struct { + Name string `json:"name"` + Gateway string `json:"gateway"` + } + if err := decodeEnvelope([]byte(body), &into); err != nil { + t.Fatalf("decodeEnvelope: %v", err) + } + if into.Name != "GW" || into.Gateway != "100.64.0.1" { + t.Errorf("got %+v", into) + } +} + +func TestDecodeEnvelope_ErrorCode(t *testing.T) { + body := `{"code":404,"status":"error","response_id":"NOT_FOUND","message":"gateway 'X' not found","data":null}` + err := decodeEnvelope([]byte(body), new(any)) + if err == nil { + t.Fatalf("expected error, got nil") + } + var apiErr *APIError + if !errors.As(err, &apiErr) { + t.Fatalf("expected *APIError, got %T", err) + } + if apiErr.Code != 404 || apiErr.ResponseID != "NOT_FOUND" { + t.Errorf("got %+v", apiErr) + } +} + +func TestNewRequiresAPIKey(t *testing.T) { + _, err := New(Options{BaseURL: "https://example.com", APIKey: ""}) + if err == nil { + t.Errorf("expected error for empty APIKey") + } +} + +// Silence unused-import warnings in early tasks; they're used by later tests. +var ( + _ = json.Marshal + _ = tls.Config{} + _ = strings.NewReader + _ = context.Background +) From 5928052dd165cc78fd8ca68c2c1dacf7d7ddef22 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 18:39:59 +0000 Subject: [PATCH 31/79] feat(pfsense): GetGateway via /api/v2/routing/gateway --- internal/pfsense/rest.go | 19 +++++++++++++-- internal/pfsense/rest_test.go | 44 ++++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/internal/pfsense/rest.go b/internal/pfsense/rest.go index 7a269ef..41f7ed4 100644 --- a/internal/pfsense/rest.go +++ b/internal/pfsense/rest.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "net/http" + "net/url" "strings" "time" ) @@ -133,9 +134,23 @@ func decodeEnvelope(body []byte, into any) error { // interface as specified. var errNotImplemented = errors.New("pfsense: not implemented") -// GetGateway is a stub; Task 8 lands the real impl. +// GetGateway returns the named gateway. Translates 404 envelopes to +// *APIError so callers can distinguish "not found" from "transport +// error". func (c *pfClient) GetGateway(ctx context.Context, name string) (*Gateway, error) { - return nil, errNotImplemented + body, _, err := c.do(ctx, http.MethodGet, + "/api/v2/routing/gateway?id="+url.QueryEscape(name), nil) + if err != nil { + return nil, fmt.Errorf("pfsense GET gateway: %w", err) + } + var raw struct { + Name string `json:"name"` + Gateway string `json:"gateway"` + } + if err := decodeEnvelope(body, &raw); err != nil { + return nil, err + } + return &Gateway{Name: raw.Name, IP: raw.Gateway}, nil } // UpdateGatewayIP is a stub; Task 9 lands the real impl. diff --git a/internal/pfsense/rest_test.go b/internal/pfsense/rest_test.go index adb0340..f28eed5 100644 --- a/internal/pfsense/rest_test.go +++ b/internal/pfsense/rest_test.go @@ -68,10 +68,52 @@ func TestNewRequiresAPIKey(t *testing.T) { } } +func TestGetGateway_Success(t *testing.T) { + var gotMethod, gotPath, gotAuth string + srv, c := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method + gotPath = r.URL.Path + "?" + r.URL.RawQuery + gotAuth = r.Header.Get("Authorization") + _, _ = w.Write([]byte(`{"code":200,"status":"ok","response_id":"SUCCESS","message":"","data":{"name":"GW","gateway":"100.64.0.1"}}`)) + }) + _ = srv + + gw, err := c.GetGateway(context.Background(), "GW") + if err != nil { + t.Fatalf("GetGateway: %v", err) + } + if gw.Name != "GW" || gw.IP != "100.64.0.1" { + t.Errorf("gw = %+v", gw) + } + if gotMethod != "GET" { + t.Errorf("method = %q, want GET", gotMethod) + } + if gotPath != "/api/v2/routing/gateway?id=GW" { + t.Errorf("path = %q, want /api/v2/routing/gateway?id=GW", gotPath) + } + if gotAuth != "test-key" { + t.Errorf("auth header = %q, want test-key", gotAuth) + } +} + +func TestGetGateway_NotFound(t *testing.T) { + _, c := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"code":404,"status":"error","response_id":"NOT_FOUND","message":"no such gw","data":null}`)) + }) + _, err := c.GetGateway(context.Background(), "missing") + var apiErr *APIError + if !errors.As(err, &apiErr) { + t.Fatalf("expected *APIError, got %T: %v", err, err) + } + if apiErr.Code != 404 { + t.Errorf("code = %d", apiErr.Code) + } +} + // Silence unused-import warnings in early tasks; they're used by later tests. var ( _ = json.Marshal _ = tls.Config{} _ = strings.NewReader - _ = context.Background ) From e8c4da6da2271dca0c51717296bdeb98df35aba2 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 18:44:16 +0000 Subject: [PATCH 32/79] feat(pfsense): UpdateGatewayIP via PATCH --- internal/pfsense/rest.go | 17 +++++++++-- internal/pfsense/rest_test.go | 53 +++++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/internal/pfsense/rest.go b/internal/pfsense/rest.go index 41f7ed4..6459ac0 100644 --- a/internal/pfsense/rest.go +++ b/internal/pfsense/rest.go @@ -153,9 +153,22 @@ func (c *pfClient) GetGateway(ctx context.Context, name string) (*Gateway, error return &Gateway{Name: raw.Name, IP: raw.Gateway}, nil } -// UpdateGatewayIP is a stub; Task 9 lands the real impl. +// UpdateGatewayIP changes the configured IP of the named gateway. The +// pfSense API requires a follow-up Apply call to make the change live. func (c *pfClient) UpdateGatewayIP(ctx context.Context, name, ip string) error { - return errNotImplemented + body, err := json.Marshal(struct { + ID string `json:"id"` + Gateway string `json:"gateway"` + }{ID: name, Gateway: ip}) + if err != nil { + return fmt.Errorf("marshal: %w", err) + } + respBody, _, err := c.do(ctx, http.MethodPatch, "/api/v2/routing/gateway", + strings.NewReader(string(body))) + if err != nil { + return fmt.Errorf("pfsense PATCH gateway: %w", err) + } + return decodeEnvelope(respBody, new(any)) } // Apply is a stub; Task 10 lands the real impl. diff --git a/internal/pfsense/rest_test.go b/internal/pfsense/rest_test.go index f28eed5..44db911 100644 --- a/internal/pfsense/rest_test.go +++ b/internal/pfsense/rest_test.go @@ -5,9 +5,9 @@ import ( "crypto/tls" "encoding/json" "errors" + "io" "net/http" "net/http/httptest" - "strings" "testing" ) @@ -111,9 +111,56 @@ func TestGetGateway_NotFound(t *testing.T) { } } +func TestUpdateGatewayIP_Success(t *testing.T) { + var gotMethod, gotPath, gotBody string + _, c := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method + gotPath = r.URL.Path + b, _ := io.ReadAll(r.Body) + gotBody = string(b) + _, _ = w.Write([]byte(`{"code":200,"status":"ok","response_id":"SUCCESS","message":"","data":{}}`)) + }) + if err := c.UpdateGatewayIP(context.Background(), "GW", "100.64.0.99"); err != nil { + t.Fatalf("UpdateGatewayIP: %v", err) + } + if gotMethod != "PATCH" { + t.Errorf("method = %s", gotMethod) + } + if gotPath != "/api/v2/routing/gateway" { + t.Errorf("path = %s", gotPath) + } + // Body shape: {"id":"GW","gateway":"100.64.0.99"} + var parsed struct { + ID string `json:"id"` + Gateway string `json:"gateway"` + } + if err := json.Unmarshal([]byte(gotBody), &parsed); err != nil { + t.Fatalf("body json: %v", err) + } + if parsed.ID != "GW" || parsed.Gateway != "100.64.0.99" { + t.Errorf("body = %+v", parsed) + } +} + +func TestUpdateGatewayIP_ValidationError(t *testing.T) { + _, c := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"code":400,"status":"error","response_id":"VALIDATION_FAILED","message":"bad ip","data":null}`)) + }) + err := c.UpdateGatewayIP(context.Background(), "GW", "not-an-ip") + if err == nil { + t.Fatalf("expected error") + } + var apiErr *APIError + if !errors.As(err, &apiErr) || apiErr.ResponseID != "VALIDATION_FAILED" { + t.Errorf("err = %v", err) + } +} + +// io is used here; ensure it's imported. +var _ = io.ReadAll + // Silence unused-import warnings in early tasks; they're used by later tests. var ( - _ = json.Marshal _ = tls.Config{} - _ = strings.NewReader ) From 3864b89d1ce4c8390008391d7a434f8fc784d424 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 18:51:43 +0000 Subject: [PATCH 33/79] feat(pfsense): Apply endpoint + interface sentinel --- internal/pfsense/rest.go | 16 +++++++++------- internal/pfsense/rest_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/internal/pfsense/rest.go b/internal/pfsense/rest.go index 6459ac0..a90559a 100644 --- a/internal/pfsense/rest.go +++ b/internal/pfsense/rest.go @@ -128,11 +128,8 @@ func decodeEnvelope(body []byte, into any) error { return nil } -// errNotImplemented is returned by the interface stubs below until -// Tasks 8/9/10 replace them with real implementations. Stubs exist so -// *pfClient satisfies PFSenseClient and New() can return the -// interface as specified. -var errNotImplemented = errors.New("pfsense: not implemented") +// Compile-time assertion that *pfClient implements PFSenseClient. +var _ PFSenseClient = (*pfClient)(nil) // GetGateway returns the named gateway. Translates 404 envelopes to // *APIError so callers can distinguish "not found" from "transport @@ -171,9 +168,14 @@ func (c *pfClient) UpdateGatewayIP(ctx context.Context, name, ip string) error { return decodeEnvelope(respBody, new(any)) } -// Apply is a stub; Task 10 lands the real impl. +// Apply reloads the routing configuration so pending changes (e.g., +// from UpdateGatewayIP) become live. func (c *pfClient) Apply(ctx context.Context) error { - return errNotImplemented + respBody, _, err := c.do(ctx, http.MethodPost, "/api/v2/routing/apply", nil) + if err != nil { + return fmt.Errorf("pfsense POST apply: %w", err) + } + return decodeEnvelope(respBody, new(any)) } // do issues an HTTP request, applies auth + JSON headers, returns the diff --git a/internal/pfsense/rest_test.go b/internal/pfsense/rest_test.go index 44db911..1daa943 100644 --- a/internal/pfsense/rest_test.go +++ b/internal/pfsense/rest_test.go @@ -157,6 +157,34 @@ func TestUpdateGatewayIP_ValidationError(t *testing.T) { } } +func TestApply_Success(t *testing.T) { + var gotMethod, gotPath string + _, c := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method + gotPath = r.URL.Path + _, _ = w.Write([]byte(`{"code":200,"status":"ok","response_id":"SUCCESS","message":"applied","data":null}`)) + }) + if err := c.Apply(context.Background()); err != nil { + t.Fatalf("Apply: %v", err) + } + if gotMethod != "POST" { + t.Errorf("method = %s", gotMethod) + } + if gotPath != "/api/v2/routing/apply" { + t.Errorf("path = %s", gotPath) + } +} + +func TestApply_ServerError(t *testing.T) { + _, c := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"code":500,"status":"error","response_id":"INTERNAL","message":"oops","data":null}`)) + }) + if err := c.Apply(context.Background()); err == nil { + t.Errorf("expected error") + } +} + // io is used here; ensure it's imported. var _ = io.ReadAll From 6a80200a6e8c22a68decbf0dd0882748bb1f535a Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 18:57:27 +0000 Subject: [PATCH 34/79] feat(tailscale): v2client constructor with OAuth client-credentials --- go.mod | 2 +- internal/tailscale/v2client.go | 107 ++++++++++++++++++++++++++++ internal/tailscale/v2client_test.go | 75 +++++++++++++++++++ 3 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 internal/tailscale/v2client.go create mode 100644 internal/tailscale/v2client_test.go diff --git a/go.mod b/go.mod index 36831da..634ee2b 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.25.0 require ( github.com/BurntSushi/toml v1.6.0 github.com/gofrs/flock v0.13.0 + tailscale.com/client/tailscale/v2 v2.9.0 ) require ( @@ -14,5 +15,4 @@ require ( golang.org/x/sys v0.43.0 // indirect google.golang.org/api v0.278.0 // indirect google.golang.org/protobuf v1.36.11 // indirect - tailscale.com/client/tailscale/v2 v2.9.0 // indirect ) diff --git a/internal/tailscale/v2client.go b/internal/tailscale/v2client.go new file mode 100644 index 0000000..3f06753 --- /dev/null +++ b/internal/tailscale/v2client.go @@ -0,0 +1,107 @@ +// Package tailscale's concrete TailscaleClient impl wraps +// tailscale.com/client/tailscale/v2. The wrapper is thin: it owns +// option validation, the AuthorizeExitNode composite call (which +// requires two underlying v2 calls), and WaitForDevice's polling loop. +package tailscale + +import ( + "context" + "errors" + "fmt" + "net/url" + "strings" + "time" + + tsv2 "tailscale.com/client/tailscale/v2" +) + +// errNotImplemented is returned by stub methods on tsClient until the +// corresponding task (12-15) supplies the real implementation. +var errNotImplemented = errors.New("tailscale: not implemented") + +// Options configures the Tailscale client. +type Options struct { + // Tailnet is the organization name (the part before .ts.net or + // the configured custom domain). Required. + Tailnet string + // ClientID + ClientSecret are the OAuth client-credentials. The + // recommended scopes are "auth_keys" and "devices:core" (write). + // Fall back to "all:write" if scope naming has shifted on the + // admin console. + ClientID string + ClientSecret string + // BaseURL overrides the Tailscale API origin. Empty → use the + // library default (https://api.tailscale.com). Tests set this to + // an httptest server URL. + BaseURL string + // PollInterval is the WaitForDevice poll cadence. Defaults to 2s. + PollInterval time.Duration +} + +// tsClient implements TailscaleClient. +type tsClient struct { + inner *tsv2.Client + tailnet string + pollInterval time.Duration +} + +// New constructs a TailscaleClient. +func New(opts Options) (TailscaleClient, error) { + if strings.TrimSpace(opts.Tailnet) == "" { + return nil, errors.New("tailscale: Tailnet required") + } + if strings.TrimSpace(opts.ClientID) == "" { + return nil, errors.New("tailscale: ClientID required") + } + if strings.TrimSpace(opts.ClientSecret) == "" { + return nil, errors.New("tailscale: ClientSecret required") + } + c := &tsv2.Client{ + Tailnet: opts.Tailnet, + Auth: &tsv2.OAuth{ + ClientID: opts.ClientID, + ClientSecret: opts.ClientSecret, + Scopes: []string{"auth_keys", "devices:core"}, + }, + } + if opts.BaseURL != "" { + u, err := url.Parse(opts.BaseURL) + if err != nil { + return nil, fmt.Errorf("tailscale: parse BaseURL: %w", err) + } + c.BaseURL = u + } + poll := opts.PollInterval + if poll == 0 { + poll = 2 * time.Second + } + return &tsClient{inner: c, tailnet: opts.Tailnet, pollInterval: poll}, nil +} + +// MintEphemeralAuthKey — Task 12 will implement. +func (c *tsClient) MintEphemeralAuthKey(ctx context.Context, tags []string) (string, error) { + return "", errNotImplemented +} + +// WaitForDevice — Task 13 will implement. +func (c *tsClient) WaitForDevice(ctx context.Context, hostname string, timeout time.Duration) (*Device, error) { + return nil, errNotImplemented +} + +// AuthorizeExitNode — Task 14 will implement. +func (c *tsClient) AuthorizeExitNode(ctx context.Context, deviceID string) error { + return errNotImplemented +} + +// SetTags — Task 15 will implement. +func (c *tsClient) SetTags(ctx context.Context, deviceID string, tags []string) error { + return errNotImplemented +} + +// DeleteDevice — Task 15 will implement. +func (c *tsClient) DeleteDevice(ctx context.Context, deviceID string) error { + return errNotImplemented +} + +// Compile-time assertion. +var _ TailscaleClient = (*tsClient)(nil) diff --git a/internal/tailscale/v2client_test.go b/internal/tailscale/v2client_test.go new file mode 100644 index 0000000..52bcc06 --- /dev/null +++ b/internal/tailscale/v2client_test.go @@ -0,0 +1,75 @@ +package tailscale + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" + "time" +) + +// tokenIssuer is the standard OAuth client-credentials endpoint stub. +// Every test server mux-routes /api/v2/oauth/token to this handler. +func tokenIssuer(w http.ResponseWriter, r *http.Request) { + _ = r.ParseForm() + // Don't bother validating client_id/secret here — that's the + // Tailscale library's job. Return a valid token response. + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "access_token": "fake-token", + "token_type": "Bearer", + "expires_in": 3600, + }) +} + +// newTestClient wires a tsClient to an httptest server. Tests provide +// the mux; tokenIssuer is registered for /api/v2/oauth/token here so +// every test gets OAuth for free. +func newTestClient(t *testing.T, mux *http.ServeMux) (*httptest.Server, *tsClient) { + t.Helper() + mux.HandleFunc("/api/v2/oauth/token", tokenIssuer) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + c, err := New(Options{ + Tailnet: "test.example.com", + ClientID: "test-id", + ClientSecret: "test-secret", + BaseURL: srv.URL, + }) + if err != nil { + t.Fatalf("New: %v", err) + } + return srv, c.(*tsClient) +} + +func TestNewRequiresCredentials(t *testing.T) { + cases := []struct { + name string + opts Options + }{ + {"missing tailnet", Options{ClientID: "x", ClientSecret: "y"}}, + {"missing client id", Options{Tailnet: "x", ClientSecret: "y"}}, + {"missing client secret", Options{Tailnet: "x", ClientID: "y"}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if _, err := New(tc.opts); err == nil { + t.Errorf("expected error") + } + }) + } +} + +// Silence "imported and not used" for early tasks; later tasks use these. +var ( + _ = errors.New + _ = url.Parse + _ = strings.TrimSpace + _ = time.Now + _ = context.Background +) From 672b96d8202b1271b4ed8e3c91c502bd489c889c Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 19:02:12 +0000 Subject: [PATCH 35/79] feat(tailscale): MintEphemeralAuthKey via Keys().CreateAuthKey --- internal/tailscale/v2client.go | 21 ++++++++- internal/tailscale/v2client_test.go | 71 ++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 4 deletions(-) diff --git a/internal/tailscale/v2client.go b/internal/tailscale/v2client.go index 3f06753..4ce2d09 100644 --- a/internal/tailscale/v2client.go +++ b/internal/tailscale/v2client.go @@ -78,9 +78,26 @@ func New(opts Options) (TailscaleClient, error) { return &tsClient{inner: c, tailnet: opts.Tailnet, pollInterval: poll}, nil } -// MintEphemeralAuthKey — Task 12 will implement. +// MintEphemeralAuthKey mints a single-use, ephemeral, preauthorized +// auth key tagged for the given tags. The key has a 5-minute TTL. func (c *tsClient) MintEphemeralAuthKey(ctx context.Context, tags []string) (string, error) { - return "", errNotImplemented + req := tsv2.CreateKeyRequest{ + Description: "exit-node bootstrap", + ExpirySeconds: 300, + } + // The Capabilities shape uses anonymous nested structs in the v2 + // library. The construction below mirrors the library's struct + // literal pattern; field names match those in CreateKeyRequest. + req.Capabilities.Devices.Create.Reusable = false + req.Capabilities.Devices.Create.Ephemeral = true + req.Capabilities.Devices.Create.Preauthorized = true + req.Capabilities.Devices.Create.Tags = tags + + key, err := c.inner.Keys().CreateAuthKey(ctx, req) + if err != nil { + return "", fmt.Errorf("tailscale create auth key: %w", err) + } + return key.Key, nil } // WaitForDevice — Task 13 will implement. diff --git a/internal/tailscale/v2client_test.go b/internal/tailscale/v2client_test.go index 52bcc06..bd290a8 100644 --- a/internal/tailscale/v2client_test.go +++ b/internal/tailscale/v2client_test.go @@ -69,7 +69,74 @@ func TestNewRequiresCredentials(t *testing.T) { var ( _ = errors.New _ = url.Parse - _ = strings.TrimSpace _ = time.Now - _ = context.Background ) + +func TestMintEphemeralAuthKey_Success(t *testing.T) { + var gotMethod, gotPath, gotAuth string + var gotBody map[string]any + mux := http.NewServeMux() + mux.HandleFunc("/api/v2/tailnet/test.example.com/keys", + func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method + gotPath = r.URL.Path + gotAuth = r.Header.Get("Authorization") + _ = json.NewDecoder(r.Body).Decode(&gotBody) + _ = json.NewEncoder(w).Encode(map[string]any{ + "id": "key-id-1", + "key": "tskey-auth-xxxxxx", + }) + }) + + _, c := newTestClient(t, mux) + got, err := c.MintEphemeralAuthKey(context.Background(), []string{"tag:exit-node"}) + if err != nil { + t.Fatalf("MintEphemeralAuthKey: %v", err) + } + if got != "tskey-auth-xxxxxx" { + t.Errorf("key = %q", got) + } + if gotMethod != "POST" { + t.Errorf("method = %s", gotMethod) + } + if gotPath != "/api/v2/tailnet/test.example.com/keys" { + t.Errorf("path = %s", gotPath) + } + if !strings.HasPrefix(gotAuth, "Bearer ") { + t.Errorf("auth header = %q, want Bearer prefix", gotAuth) + } + + // Body shape — drill into capabilities.devices.create: + caps, _ := gotBody["capabilities"].(map[string]any) + devs, _ := caps["devices"].(map[string]any) + create, _ := devs["create"].(map[string]any) + if v, _ := create["ephemeral"].(bool); !v { + t.Errorf("ephemeral != true (body=%v)", gotBody) + } + if v, _ := create["preauthorized"].(bool); !v { + t.Errorf("preauthorized != true") + } + if v, _ := create["reusable"].(bool); v { + t.Errorf("reusable should be false") + } + tags, _ := create["tags"].([]any) + if len(tags) != 1 || tags[0] != "tag:exit-node" { + t.Errorf("tags = %v", tags) + } + if exp, _ := gotBody["expirySeconds"].(float64); exp != 300 { + t.Errorf("expirySeconds = %v, want 300", exp) + } +} + +func TestMintEphemeralAuthKey_APIError(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/api/v2/tailnet/test.example.com/keys", + func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte(`{"message":"bad scope"}`)) + }) + _, c := newTestClient(t, mux) + if _, err := c.MintEphemeralAuthKey(context.Background(), nil); err == nil { + t.Errorf("expected error") + } +} From 06262fe7ab95e5443f6d983b381db04cf98b8caa Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 19:07:40 +0000 Subject: [PATCH 36/79] feat(tailscale): WaitForDevice with polling --- internal/tailscale/v2client.go | 54 ++++++++++++++++++++++++++-- internal/tailscale/v2client_test.go | 56 ++++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/internal/tailscale/v2client.go b/internal/tailscale/v2client.go index 4ce2d09..6d16277 100644 --- a/internal/tailscale/v2client.go +++ b/internal/tailscale/v2client.go @@ -100,9 +100,59 @@ func (c *tsClient) MintEphemeralAuthKey(ctx context.Context, tags []string) (str return key.Key, nil } -// WaitForDevice — Task 13 will implement. +// WaitForDevice polls the devices list until a device whose Hostname +// matches the given value appears, or until the timeout elapses. +// +// The v2 library's Device.Name is the FQDN (hostname plus tailnet +// suffix); some control-plane responses populate Name but not +// Hostname, so we accept either when matching. func (c *tsClient) WaitForDevice(ctx context.Context, hostname string, timeout time.Duration) (*Device, error) { - return nil, errNotImplemented + deadline := time.Now().Add(timeout) + for { + devs, err := c.inner.Devices().List(ctx) + if err != nil { + return nil, fmt.Errorf("tailscale list devices: %w", err) + } + for _, d := range devs { + if d.Hostname == hostname { + return toDevice(d), nil + } + // Some v2 responses populate Name (FQDN) instead of + // Hostname; tolerate either. + if strings.HasPrefix(d.Name, hostname+".") { + return toDevice(d), nil + } + } + if time.Now().After(deadline) { + return nil, fmt.Errorf("tailscale: device %q did not register within %s", hostname, timeout) + } + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(c.pollInterval): + } + } +} + +// toDevice converts a v2 library Device to our internal Device type. +// +// The v2 library has no Online field; ConnectedToControl is the +// closest equivalent (true when the device currently holds an active +// control-plane connection). +func toDevice(d tsv2.Device) *Device { + out := &Device{ + ID: d.NodeID, + Hostname: d.Hostname, + Online: d.ConnectedToControl, + Tags: append([]string(nil), d.Tags...), + } + if out.ID == "" { + out.ID = d.ID + } + if len(d.Addresses) > 0 { + out.TailscaleIP = d.Addresses[0] + } + return out } // AuthorizeExitNode — Task 14 will implement. diff --git a/internal/tailscale/v2client_test.go b/internal/tailscale/v2client_test.go index bd290a8..1503b28 100644 --- a/internal/tailscale/v2client_test.go +++ b/internal/tailscale/v2client_test.go @@ -8,6 +8,7 @@ import ( "net/http/httptest" "net/url" "strings" + "sync/atomic" "testing" "time" ) @@ -69,7 +70,6 @@ func TestNewRequiresCredentials(t *testing.T) { var ( _ = errors.New _ = url.Parse - _ = time.Now ) func TestMintEphemeralAuthKey_Success(t *testing.T) { @@ -140,3 +140,57 @@ func TestMintEphemeralAuthKey_APIError(t *testing.T) { t.Errorf("expected error") } } + +func TestWaitForDevice_FindsAfterDelay(t *testing.T) { + var listCalls int32 + mux := http.NewServeMux() + mux.HandleFunc("/api/v2/tailnet/test.example.com/devices", + func(w http.ResponseWriter, r *http.Request) { + n := atomic.AddInt32(&listCalls, 1) + devices := []map[string]any{} + if n >= 2 { // appear on second poll + devices = []map[string]any{{ + "nodeId": "node-1", + "id": "id-1", + "name": "vpn-us-west1-abc.test.example.com", + "hostname": "vpn-us-west1-abc", + "addresses": []string{"100.64.0.9"}, + }} + } + _ = json.NewEncoder(w).Encode(map[string]any{"devices": devices}) + }) + _, c := newTestClient(t, mux) + c.pollInterval = 10 * time.Millisecond // fast poll for tests + + dev, err := c.WaitForDevice(context.Background(), "vpn-us-west1-abc", 2*time.Second) + if err != nil { + t.Fatalf("WaitForDevice: %v", err) + } + if dev == nil || dev.Hostname != "vpn-us-west1-abc" { + t.Errorf("dev = %+v", dev) + } + if dev.TailscaleIP != "100.64.0.9" { + t.Errorf("TailscaleIP = %q", dev.TailscaleIP) + } + if atomic.LoadInt32(&listCalls) < 2 { + t.Errorf("expected >=2 list calls, got %d", listCalls) + } +} + +func TestWaitForDevice_Timeout(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/api/v2/tailnet/test.example.com/devices", + func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(map[string]any{"devices": []any{}}) + }) + _, c := newTestClient(t, mux) + c.pollInterval = 5 * time.Millisecond + + _, err := c.WaitForDevice(context.Background(), "missing", 50*time.Millisecond) + if err == nil { + t.Errorf("expected timeout error") + } +} + +// atomic import marker. +var _ = atomic.LoadInt32 From 92ce88727de5b303df5522617392f5f6b3c7778b Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 19:12:23 +0000 Subject: [PATCH 37/79] feat(tailscale): AuthorizeExitNode = SetAuthorized + SetSubnetRoutes --- internal/tailscale/v2client.go | 15 ++++++++++-- internal/tailscale/v2client_test.go | 36 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/internal/tailscale/v2client.go b/internal/tailscale/v2client.go index 6d16277..8861a9f 100644 --- a/internal/tailscale/v2client.go +++ b/internal/tailscale/v2client.go @@ -155,9 +155,20 @@ func toDevice(d tsv2.Device) *Device { return out } -// AuthorizeExitNode — Task 14 will implement. +// AuthorizeExitNode performs two underlying operations: +// 1. SetAuthorized(true) — only meaningful if the tailnet requires +// manual device approval. With preauth keys this is typically a +// no-op (already authorized), but calling it is idempotent. +// 2. SetSubnetRoutes(["0.0.0.0/0", "::/0"]) — enables the exit-node +// advertisement. func (c *tsClient) AuthorizeExitNode(ctx context.Context, deviceID string) error { - return errNotImplemented + if err := c.inner.Devices().SetAuthorized(ctx, deviceID, true); err != nil { + return fmt.Errorf("tailscale set authorized: %w", err) + } + if err := c.inner.Devices().SetSubnetRoutes(ctx, deviceID, []string{"0.0.0.0/0", "::/0"}); err != nil { + return fmt.Errorf("tailscale set subnet routes: %w", err) + } + return nil } // SetTags — Task 15 will implement. diff --git a/internal/tailscale/v2client_test.go b/internal/tailscale/v2client_test.go index 1503b28..87524f0 100644 --- a/internal/tailscale/v2client_test.go +++ b/internal/tailscale/v2client_test.go @@ -194,3 +194,39 @@ func TestWaitForDevice_Timeout(t *testing.T) { // atomic import marker. var _ = atomic.LoadInt32 + +func TestAuthorizeExitNode_CallsBothEndpoints(t *testing.T) { + var setAuthorizedCalled, setRoutesCalled bool + var routesBody map[string]any + + mux := http.NewServeMux() + mux.HandleFunc("/api/v2/device/node-1/authorized", + func(w http.ResponseWriter, r *http.Request) { + setAuthorizedCalled = true + w.WriteHeader(http.StatusOK) + }) + mux.HandleFunc("/api/v2/device/node-1/routes", + func(w http.ResponseWriter, r *http.Request) { + setRoutesCalled = true + _ = json.NewDecoder(r.Body).Decode(&routesBody) + _ = json.NewEncoder(w).Encode(map[string]any{ + "enabledRoutes": []string{"0.0.0.0/0", "::/0"}, + "advertisedRoutes": []string{"0.0.0.0/0", "::/0"}, + }) + }) + + _, c := newTestClient(t, mux) + if err := c.AuthorizeExitNode(context.Background(), "node-1"); err != nil { + t.Fatalf("AuthorizeExitNode: %v", err) + } + if !setAuthorizedCalled { + t.Errorf("expected /authorized to be called") + } + if !setRoutesCalled { + t.Errorf("expected /routes to be called") + } + routes, _ := routesBody["routes"].([]any) + if len(routes) != 2 { + t.Errorf("routes = %v", routes) + } +} From 341de163afa37a19ef0f863d1e6d4b9569116134 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 19:16:17 +0000 Subject: [PATCH 38/79] feat(tailscale): SetTags + DeleteDevice --- internal/tailscale/v2client.go | 18 ++++++----- internal/tailscale/v2client_test.go | 48 ++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/internal/tailscale/v2client.go b/internal/tailscale/v2client.go index 8861a9f..0fce384 100644 --- a/internal/tailscale/v2client.go +++ b/internal/tailscale/v2client.go @@ -15,10 +15,6 @@ import ( tsv2 "tailscale.com/client/tailscale/v2" ) -// errNotImplemented is returned by stub methods on tsClient until the -// corresponding task (12-15) supplies the real implementation. -var errNotImplemented = errors.New("tailscale: not implemented") - // Options configures the Tailscale client. type Options struct { // Tailnet is the organization name (the part before .ts.net or @@ -171,14 +167,20 @@ func (c *tsClient) AuthorizeExitNode(ctx context.Context, deviceID string) error return nil } -// SetTags — Task 15 will implement. +// SetTags replaces the device's tag set. func (c *tsClient) SetTags(ctx context.Context, deviceID string, tags []string) error { - return errNotImplemented + if err := c.inner.Devices().SetTags(ctx, deviceID, tags); err != nil { + return fmt.Errorf("tailscale set tags: %w", err) + } + return nil } -// DeleteDevice — Task 15 will implement. +// DeleteDevice removes the device from the tailnet. func (c *tsClient) DeleteDevice(ctx context.Context, deviceID string) error { - return errNotImplemented + if err := c.inner.Devices().Delete(ctx, deviceID); err != nil { + return fmt.Errorf("tailscale delete device: %w", err) + } + return nil } // Compile-time assertion. diff --git a/internal/tailscale/v2client_test.go b/internal/tailscale/v2client_test.go index 87524f0..fac760e 100644 --- a/internal/tailscale/v2client_test.go +++ b/internal/tailscale/v2client_test.go @@ -3,10 +3,8 @@ package tailscale import ( "context" "encoding/json" - "errors" "net/http" "net/http/httptest" - "net/url" "strings" "sync/atomic" "testing" @@ -66,12 +64,6 @@ func TestNewRequiresCredentials(t *testing.T) { } } -// Silence "imported and not used" for early tasks; later tasks use these. -var ( - _ = errors.New - _ = url.Parse -) - func TestMintEphemeralAuthKey_Success(t *testing.T) { var gotMethod, gotPath, gotAuth string var gotBody map[string]any @@ -192,9 +184,6 @@ func TestWaitForDevice_Timeout(t *testing.T) { } } -// atomic import marker. -var _ = atomic.LoadInt32 - func TestAuthorizeExitNode_CallsBothEndpoints(t *testing.T) { var setAuthorizedCalled, setRoutesCalled bool var routesBody map[string]any @@ -230,3 +219,40 @@ func TestAuthorizeExitNode_CallsBothEndpoints(t *testing.T) { t.Errorf("routes = %v", routes) } } + +func TestSetTags_Success(t *testing.T) { + var got map[string]any + mux := http.NewServeMux() + mux.HandleFunc("/api/v2/device/node-1/tags", + func(w http.ResponseWriter, r *http.Request) { + _ = json.NewDecoder(r.Body).Decode(&got) + w.WriteHeader(http.StatusOK) + }) + _, c := newTestClient(t, mux) + if err := c.SetTags(context.Background(), "node-1", []string{"tag:exit-node", "tag:home"}); err != nil { + t.Fatalf("SetTags: %v", err) + } + tags, _ := got["tags"].([]any) + if len(tags) != 2 { + t.Errorf("tags = %v", tags) + } +} + +func TestDeleteDevice_Success(t *testing.T) { + var deleted bool + mux := http.NewServeMux() + mux.HandleFunc("/api/v2/device/node-1", + func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodDelete { + deleted = true + w.WriteHeader(http.StatusOK) + } + }) + _, c := newTestClient(t, mux) + if err := c.DeleteDevice(context.Background(), "node-1"); err != nil { + t.Fatalf("DeleteDevice: %v", err) + } + if !deleted { + t.Errorf("expected DELETE /api/v2/device/node-1") + } +} From 4244551a53579232995d2138d1c0433df955f0ff Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 12 May 2026 19:50:00 +0000 Subject: [PATCH 39/79] feat(gcp): constructor with SA-JSON or ADC fallback --- go.mod | 22 ++++++ go.sum | 45 ++++++++++++ internal/gcp/compute.go | 134 +++++++++++++++++++++++++++++++++++ internal/gcp/compute_test.go | 32 +++++++++ 4 files changed, 233 insertions(+) create mode 100644 internal/gcp/compute.go create mode 100644 internal/gcp/compute_test.go diff --git a/go.mod b/go.mod index 634ee2b..18794b7 100644 --- a/go.mod +++ b/go.mod @@ -9,10 +9,32 @@ require ( ) require ( + cloud.google.com/go/auth v0.20.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/compute v1.62.0 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/s2a-go v0.1.9 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.15 // indirect + github.com/googleapis/gax-go/v2 v2.22.0 // indirect github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect + go.opentelemetry.io/otel v1.43.0 // indirect + go.opentelemetry.io/otel/metric v1.43.0 // indirect + go.opentelemetry.io/otel/trace v1.43.0 // indirect + golang.org/x/crypto v0.50.0 // indirect + golang.org/x/net v0.53.0 // indirect golang.org/x/oauth2 v0.36.0 // indirect golang.org/x/sys v0.43.0 // indirect + golang.org/x/text v0.36.0 // indirect google.golang.org/api v0.278.0 // indirect + google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260427160629-7cedc36a6bc4 // indirect + google.golang.org/grpc v1.80.0 // indirect google.golang.org/protobuf v1.36.11 // indirect ) diff --git a/go.sum b/go.sum index b532f94..d013c28 100644 --- a/go.sum +++ b/go.sum @@ -1,27 +1,72 @@ cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= +cloud.google.com/go/auth v0.20.0 h1:kXTssoVb4azsVDoUiF8KvxAqrsQcQtB53DcSgta74CA= +cloud.google.com/go/auth v0.20.0/go.mod h1:942/yi/itH1SsmpyrbnTMDgGfdy2BUqIKyd0cyYLc5Q= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= cloud.google.com/go/compute v1.62.0 h1:tJ7lKJ8YEVa6vZX03Jc8o1YePbjKDOQhDw1BscMZ1bs= cloud.google.com/go/compute v1.62.0/go.mod h1:Xm6PbsLgBpAg4va77ljbBdpMjzuU+uPp5Ze2dnZq7lw= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= +github.com/googleapis/enterprise-certificate-proxy v0.3.15 h1:xolVQTEXusUcAA5UgtyRLjelpFFHWlPQ4XfWGc7MBas= +github.com/googleapis/enterprise-certificate-proxy v0.3.15/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= +github.com/googleapis/gax-go/v2 v2.22.0 h1:PjIWBpgGIVKGoCXuiCoP64altEJCj3/Ei+kSU5vlZD4= +github.com/googleapis/gax-go/v2 v2.22.0/go.mod h1:irWBbALSr0Sk3qlqb9SyJ1h68WjgeFuiOzI4Rqw5+aY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 h1:erxeiTyq+nw4Cz5+hLDkOwNF5/9IQWCQPv0gpb3+QHU= github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5/go.mod h1:DFSS3NAGHthKo1gTlmEcSBiZrRJXi28rLNd/1udP1c8= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0/go.mod h1:C2NGBr+kAB4bk3xtMXfZ94gqFDtg/GkI7e9zqGh5Beg= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= +golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= +golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= +golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs= golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= google.golang.org/api v0.278.0 h1:W7jiRvRi53VYFfZ/HoZjQBtJk7gOFbHD8ot1RzVZU6E= google.golang.org/api v0.278.0/go.mod h1:B9TqLBwJqVjp1mtt7WeoQwWRwvu/400y5lETOql+giQ= +google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 h1:XzmzkmB14QhVhgnawEVsOn6OFsnpyxNPRY9QV01dNB0= +google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:L43LFes82YgSonw6iTXTxXUX1OlULt4AQtkik4ULL/I= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260427160629-7cedc36a6bc4 h1:tEkOQcXgF6dH1G+MVKZrfpYvozGrzb91k6ha7jireSM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260427160629-7cedc36a6bc4/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go new file mode 100644 index 0000000..6fbbbff --- /dev/null +++ b/internal/gcp/compute.go @@ -0,0 +1,134 @@ +// Package gcp's concrete Provider impl using cloud.google.com/go/compute/apiv1. +// The interface is declared in provider.go (Plan 1); this file implements +// it against real Compute Engine. +package gcp + +import ( + "context" + "errors" + "fmt" + "strings" + + compute "cloud.google.com/go/compute/apiv1" + "google.golang.org/api/option" +) + +// errNotImplemented is returned by stub methods that will be filled in by +// later tasks in Plan 2 (Tasks 18-20). +var errNotImplemented = errors.New("gcp: not implemented") + +// Options configures the GCP Provider. +type Options struct { + // Project is the GCP project ID. Required. + Project string + // Region is the default region used when ProvisionOpts.Region is + // empty. Optional but typically set from config. + Region string + // CredentialsJSON, if non-empty, is parsed as a service-account + // JSON key. Otherwise the underlying clients fall back to ADC + // (GOOGLE_APPLICATION_CREDENTIALS or the metadata server). + CredentialsJSON []byte + // Network is the VPC network name to attach instances to (default + // "default" if empty). + Network string + // InstallScriptURL is the URL pushed to VM metadata as + // startup-script-url. + InstallScriptURL string + // DiskSizeGB is the boot disk size in GB; default 10. + DiskSizeGB int64 +} + +// gcpProvider implements Provider. +type gcpProvider struct { + instances *compute.InstancesClient + zones *compute.ZonesClient + project string + region string + network string + scriptURL string + diskGB int64 +} + +// Compile-time assertion. +var _ Provider = (*gcpProvider)(nil) + +// New constructs a Provider. With CredentialsJSON empty, falls back to +// Application Default Credentials. +func New(ctx context.Context, opts Options) (Provider, error) { + if strings.TrimSpace(opts.Project) == "" { + return nil, errors.New("gcp: Project required") + } + if opts.DiskSizeGB == 0 { + opts.DiskSizeGB = 10 + } + if strings.TrimSpace(opts.Network) == "" { + opts.Network = "default" + } + + var clientOpts []option.ClientOption + if len(opts.CredentialsJSON) > 0 { + clientOpts = append(clientOpts, option.WithCredentialsJSON(opts.CredentialsJSON)) + } + + inst, err := compute.NewInstancesRESTClient(ctx, clientOpts...) + if err != nil { + return nil, fmt.Errorf("gcp: new instances client: %w", err) + } + zones, err := compute.NewZonesRESTClient(ctx, clientOpts...) + if err != nil { + _ = inst.Close() + return nil, fmt.Errorf("gcp: new zones client: %w", err) + } + return &gcpProvider{ + instances: inst, + zones: zones, + project: opts.Project, + region: opts.Region, + network: opts.Network, + scriptURL: opts.InstallScriptURL, + diskGB: opts.DiskSizeGB, + }, nil +} + +// Close releases the underlying gRPC connections. Exposed for tests + +// graceful shutdown. +func (p *gcpProvider) Close() error { + var err error + if e := p.instances.Close(); e != nil { + err = e + } + if e := p.zones.Close(); e != nil && err == nil { + err = e + } + return err +} + +// Provision — Task 18 will implement. +func (p *gcpProvider) Provision(ctx context.Context, opts ProvisionOpts) (*ExitNode, error) { + return nil, errNotImplemented +} + +// Start — Task 19 will implement. +func (p *gcpProvider) Start(ctx context.Context, name string) error { + return errNotImplemented +} + +// Stop — Task 19 will implement. +func (p *gcpProvider) Stop(ctx context.Context, name string) error { + return errNotImplemented +} + +// Destroy — Task 19 will implement. +func (p *gcpProvider) Destroy(ctx context.Context, name string) error { + return errNotImplemented +} + +// List — Task 20 will implement. +func (p *gcpProvider) List(ctx context.Context) ([]*ExitNode, error) { + return nil, errNotImplemented +} + +// Get — Task 20 will implement. +func (p *gcpProvider) Get(ctx context.Context, name string) (*ExitNode, error) { + return nil, errNotImplemented +} diff --git a/internal/gcp/compute_test.go b/internal/gcp/compute_test.go new file mode 100644 index 0000000..35b931e --- /dev/null +++ b/internal/gcp/compute_test.go @@ -0,0 +1,32 @@ +package gcp + +import ( + "context" + "testing" +) + +func TestNewWithoutCreds_UsesADC(t *testing.T) { + // Without GCP_CREDENTIALS_JSON set, New should still succeed — the + // underlying client lazily resolves ADC. We don't make any API + // calls in this test. + t.Setenv("GCP_CREDENTIALS_JSON", "") + t.Setenv("GOOGLE_APPLICATION_CREDENTIALS", "") + p, err := New(context.Background(), Options{ + Project: "test-proj", + Region: "us-west1", + }) + // New may or may not return an error here depending on whether + // ADC is available in the test env. The important assertions are: + // (a) it doesn't panic, and (b) when it succeeds, returns + // non-nil. CI runs in environments without ADC, so we tolerate + // either outcome. + if err == nil && p == nil { + t.Errorf("nil provider with nil error") + } +} + +func TestNewRequiresProject(t *testing.T) { + if _, err := New(context.Background(), Options{Region: "us-west1"}); err == nil { + t.Errorf("expected error for empty Project") + } +} From 0b23c7d005e4ac671093f3fe8aca1035a8de4891 Mon Sep 17 00:00:00 2001 From: Iker Date: Mon, 18 May 2026 18:08:50 +0000 Subject: [PATCH 40/79] feat(gcp): buildInstanceResource with shape unit test --- internal/gcp/compute.go | 55 ++++++++++++++++++++++++ internal/gcp/compute_test.go | 81 ++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index 6fbbbff..bf7d20d 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -10,7 +10,9 @@ import ( "strings" compute "cloud.google.com/go/compute/apiv1" + "cloud.google.com/go/compute/apiv1/computepb" "google.golang.org/api/option" + "google.golang.org/protobuf/proto" ) // errNotImplemented is returned by stub methods that will be filled in by @@ -132,3 +134,56 @@ func (p *gcpProvider) List(ctx context.Context) ([]*ExitNode, error) { func (p *gcpProvider) Get(ctx context.Context, name string) (*ExitNode, error) { return nil, errNotImplemented } + +// buildInstanceResource constructs the *computepb.Instance that +// Provision passes to Insert. Separated so we can shape-test it +// without hitting the API. +func (p *gcpProvider) buildInstanceResource(opts ProvisionOpts) *computepb.Instance { + labels := map[string]string{ + "managed-by": "exitnode", + "region": opts.Region, + } + + scriptURL := opts.InstallScriptURL + if scriptURL == "" { + scriptURL = p.scriptURL + } + diskGB := int64(opts.DiskSizeGB) + if diskGB == 0 { + diskGB = p.diskGB + } + network := opts.Network + if network == "" { + network = p.network + } + + return &computepb.Instance{ + Name: proto.String(opts.Name), + MachineType: proto.String(fmt.Sprintf("zones/%s/machineTypes/%s", opts.Zone, opts.MachineType)), + Labels: labels, + Disks: []*computepb.AttachedDisk{{ + Boot: proto.Bool(true), + AutoDelete: proto.Bool(true), + Type: proto.String("PERSISTENT"), + InitializeParams: &computepb.AttachedDiskInitializeParams{ + DiskSizeGb: proto.Int64(diskGB), + SourceImage: proto.String("projects/debian-cloud/global/images/family/debian-12"), + }, + }}, + NetworkInterfaces: []*computepb.NetworkInterface{{ + Network: proto.String("global/networks/" + network), + AccessConfigs: []*computepb.AccessConfig{{ + Type: proto.String("ONE_TO_ONE_NAT"), + Name: proto.String("External NAT"), + }}, + }}, + Metadata: &computepb.Metadata{ + Items: []*computepb.Items{ + {Key: proto.String("startup-script-url"), Value: proto.String(scriptURL)}, + {Key: proto.String("tailscale-auth-key"), Value: proto.String(opts.TailscaleAuthKey)}, + {Key: proto.String("tailscale-hostname"), Value: proto.String(opts.Hostname)}, + {Key: proto.String("tailscale-tags"), Value: proto.String(strings.Join(opts.Tags, ","))}, + }, + }, + } +} diff --git a/internal/gcp/compute_test.go b/internal/gcp/compute_test.go index 35b931e..fda6b95 100644 --- a/internal/gcp/compute_test.go +++ b/internal/gcp/compute_test.go @@ -30,3 +30,84 @@ func TestNewRequiresProject(t *testing.T) { t.Errorf("expected error for empty Project") } } + +func TestBuildInstanceResource_ShapesAllFields(t *testing.T) { + p := &gcpProvider{ + project: "test-proj", + region: "us-west1", + network: "default", + scriptURL: "https://example.com/install.sh", + diskGB: 20, + } + inst := p.buildInstanceResource(ProvisionOpts{ + Name: "vpn-test-1", + Region: "us-west1", + Zone: "us-west1-a", + MachineType: "e2-micro", + Hostname: "vpn-test-1", + TailscaleAuthKey: "tskey-secret", + Tags: []string{"tag:exit-node", "tag:home"}, + InstallScriptURL: "https://example.com/install.sh", + DiskSizeGB: 20, + Network: "default", + }) + + if got := inst.GetName(); got != "vpn-test-1" { + t.Errorf("Name = %q", got) + } + if got := inst.GetMachineType(); got != "zones/us-west1-a/machineTypes/e2-micro" { + t.Errorf("MachineType = %q", got) + } + labels := inst.GetLabels() + if labels["managed-by"] != "exitnode" { + t.Errorf("missing managed-by label: %v", labels) + } + if labels["region"] != "us-west1" { + t.Errorf("missing region label: %v", labels) + } + + // Boot disk: SourceImage points at Debian 12 family. + if len(inst.Disks) != 1 { + t.Fatalf("disks = %d, want 1", len(inst.Disks)) + } + disk := inst.Disks[0] + if !disk.GetBoot() { + t.Errorf("boot disk not flagged Boot=true") + } + if got := disk.InitializeParams.GetSourceImage(); got != "projects/debian-cloud/global/images/family/debian-12" { + t.Errorf("SourceImage = %q", got) + } + if got := disk.InitializeParams.GetDiskSizeGb(); got != 20 { + t.Errorf("DiskSizeGb = %d", got) + } + + // Public-IP NetworkInterface with one AccessConfig. + if len(inst.NetworkInterfaces) != 1 { + t.Fatalf("nics = %d, want 1", len(inst.NetworkInterfaces)) + } + nic := inst.NetworkInterfaces[0] + if nic.GetNetwork() != "global/networks/default" { + t.Errorf("Network = %q", nic.GetNetwork()) + } + if len(nic.AccessConfigs) != 1 || nic.AccessConfigs[0].GetType() != "ONE_TO_ONE_NAT" { + t.Errorf("AccessConfigs = %v", nic.AccessConfigs) + } + + // Metadata: startup-script-url + tailscale-auth-key + ...-hostname + ...-tags + got := map[string]string{} + for _, it := range inst.Metadata.Items { + got[it.GetKey()] = it.GetValue() + } + if got["startup-script-url"] != "https://example.com/install.sh" { + t.Errorf("startup-script-url = %q", got["startup-script-url"]) + } + if got["tailscale-auth-key"] != "tskey-secret" { + t.Errorf("tailscale-auth-key missing") + } + if got["tailscale-hostname"] != "vpn-test-1" { + t.Errorf("tailscale-hostname missing") + } + if got["tailscale-tags"] != "tag:exit-node,tag:home" { + t.Errorf("tailscale-tags = %q", got["tailscale-tags"]) + } +} From 53064be5e2cdceb38aa4f975620036d28d0c9988 Mon Sep 17 00:00:00 2001 From: Iker Date: Mon, 18 May 2026 18:20:39 +0000 Subject: [PATCH 41/79] =?UTF-8?q?feat(gcp):=20Provision=20wires=20Insert?= =?UTF-8?q?=20=E2=86=92=20Wait=20=E2=86=92=20Get?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/gcp/compute.go | 87 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index bf7d20d..b3a7708 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "strings" + "time" compute "cloud.google.com/go/compute/apiv1" "cloud.google.com/go/compute/apiv1/computepb" @@ -105,9 +106,91 @@ func (p *gcpProvider) Close() error { return err } -// Provision — Task 18 will implement. +// Provision creates a new VM with the configured labels + metadata, +// waits for it to reach RUNNING, and returns the populated ExitNode +// record (including the assigned public IP). +// +// Zone selection: if opts.Zone is empty, the caller is expected to +// have resolved a zone first (e.g., via PickZoneInRegion). v0.1 does +// not implement auto-pick inside Provision because zone-picking +// touches a separate API surface (ZonesClient). func (p *gcpProvider) Provision(ctx context.Context, opts ProvisionOpts) (*ExitNode, error) { - return nil, errNotImplemented + if opts.Zone == "" { + return nil, fmt.Errorf("gcp: Provision requires opts.Zone (auto-pick TODO)") + } + inst := p.buildInstanceResource(opts) + + op, err := p.instances.Insert(ctx, &computepb.InsertInstanceRequest{ + Project: p.project, + Zone: opts.Zone, + InstanceResource: inst, + }) + if err != nil { + return nil, fmt.Errorf("gcp: instances.Insert: %w", err) + } + if err := op.Wait(ctx); err != nil { + return nil, fmt.Errorf("gcp: wait for insert: %w", err) + } + + got, err := p.instances.Get(ctx, &computepb.GetInstanceRequest{ + Project: p.project, Zone: opts.Zone, Instance: opts.Name, + }) + if err != nil { + return nil, fmt.Errorf("gcp: instances.Get after insert: %w", err) + } + return instanceToExitNode(got), nil +} + +// instanceToExitNode converts a Compute API Instance to our ExitNode. +func instanceToExitNode(inst *computepb.Instance) *ExitNode { + out := &ExitNode{ + Name: inst.GetName(), + Zone: lastPathSegment(inst.GetZone()), + MachineType: lastPathSegment(inst.GetMachineType()), + State: parseInstanceStatus(inst.GetStatus()), + } + out.Region = inst.GetLabels()["region"] + if t := inst.GetCreationTimestamp(); t != "" { + // RFC3339 from compute API. + if parsed, err := time.Parse(time.RFC3339, t); err == nil { + out.CreatedAt = parsed + } + } + for _, nic := range inst.GetNetworkInterfaces() { + for _, ac := range nic.GetAccessConfigs() { + if ip := ac.GetNatIP(); ip != "" { + out.PublicIP = ip + break + } + } + } + return out +} + +// parseInstanceStatus maps Compute's status strings to our State enum. +func parseInstanceStatus(s string) State { + switch s { + case "PROVISIONING", "STAGING": + return StatePending + case "RUNNING": + return StateRunning + case "STOPPING", "STOPPED", "SUSPENDED": + return StateStopped + case "TERMINATED": + return StateTerminated + default: + return StateUnknown + } +} + +// lastPathSegment returns the substring after the final "/" — used to +// trim resource URLs like ".../zones/us-west1-a" down to "us-west1-a". +func lastPathSegment(s string) string { + i := strings.LastIndex(s, "/") + if i < 0 { + return s + } + return s[i+1:] } // Start — Task 19 will implement. From 2af7200af0d5435dfaf0415d470e255624b44aa5 Mon Sep 17 00:00:00 2001 From: Iker Date: Mon, 18 May 2026 18:33:08 +0000 Subject: [PATCH 42/79] fix(gcp): instanceToExitNode breaks on first NatIP + helper tests --- internal/gcp/compute.go | 3 +- internal/gcp/compute_test.go | 69 ++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index b3a7708..b492af8 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -156,11 +156,12 @@ func instanceToExitNode(inst *computepb.Instance) *ExitNode { out.CreatedAt = parsed } } +outer: for _, nic := range inst.GetNetworkInterfaces() { for _, ac := range nic.GetAccessConfigs() { if ip := ac.GetNatIP(); ip != "" { out.PublicIP = ip - break + break outer } } } diff --git a/internal/gcp/compute_test.go b/internal/gcp/compute_test.go index fda6b95..4a5f00e 100644 --- a/internal/gcp/compute_test.go +++ b/internal/gcp/compute_test.go @@ -3,6 +3,9 @@ package gcp import ( "context" "testing" + + "cloud.google.com/go/compute/apiv1/computepb" + "google.golang.org/protobuf/proto" ) func TestNewWithoutCreds_UsesADC(t *testing.T) { @@ -111,3 +114,69 @@ func TestBuildInstanceResource_ShapesAllFields(t *testing.T) { t.Errorf("tailscale-tags = %q", got["tailscale-tags"]) } } + +func TestParseInstanceStatus(t *testing.T) { + cases := []struct { + in string + want State + }{ + {"PROVISIONING", StatePending}, + {"STAGING", StatePending}, + {"RUNNING", StateRunning}, + {"STOPPING", StateStopped}, + {"STOPPED", StateStopped}, + {"SUSPENDED", StateStopped}, + {"TERMINATED", StateTerminated}, + {"", StateUnknown}, + {"weird-new-state", StateUnknown}, + } + for _, c := range cases { + if got := parseInstanceStatus(c.in); got != c.want { + t.Errorf("parseInstanceStatus(%q) = %v, want %v", c.in, got, c.want) + } + } +} + +func TestLastPathSegment(t *testing.T) { + cases := []struct{ in, want string }{ + {"projects/p/zones/us-west1-a", "us-west1-a"}, + {"plain", "plain"}, + {"trailing/", ""}, + {"", ""}, + } + for _, c := range cases { + if got := lastPathSegment(c.in); got != c.want { + t.Errorf("lastPathSegment(%q) = %q, want %q", c.in, got, c.want) + } + } +} + +func TestInstanceToExitNode_PublicIPFromFirstAccessConfig(t *testing.T) { + inst := &computepb.Instance{ + Name: proto.String("vpn-1"), + Zone: proto.String("https://www.googleapis.com/.../zones/us-west1-a"), + MachineType: proto.String("https://www.googleapis.com/.../zones/us-west1-a/machineTypes/e2-micro"), + Status: proto.String("RUNNING"), + Labels: map[string]string{"region": "us-west1"}, + NetworkInterfaces: []*computepb.NetworkInterface{ + {AccessConfigs: []*computepb.AccessConfig{{NatIP: proto.String("1.2.3.4")}}}, + {AccessConfigs: []*computepb.AccessConfig{{NatIP: proto.String("5.6.7.8")}}}, + }, + } + got := instanceToExitNode(inst) + if got.PublicIP != "1.2.3.4" { + t.Errorf("PublicIP = %q, want 1.2.3.4 (first NIC's first AccessConfig)", got.PublicIP) + } + if got.Zone != "us-west1-a" { + t.Errorf("Zone = %q", got.Zone) + } + if got.MachineType != "e2-micro" { + t.Errorf("MachineType = %q", got.MachineType) + } + if got.State != StateRunning { + t.Errorf("State = %v, want StateRunning", got.State) + } + if got.Region != "us-west1" { + t.Errorf("Region = %q", got.Region) + } +} From d36e9bba4043ec09101b3563a6f190bff847fc57 Mon Sep 17 00:00:00 2001 From: Iker Date: Mon, 18 May 2026 18:40:29 +0000 Subject: [PATCH 43/79] feat(gcp): Start, Stop, Destroy with AggregatedList zone resolution --- internal/gcp/compute.go | 73 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 6 deletions(-) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index b492af8..31e6308 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -12,6 +12,7 @@ import ( compute "cloud.google.com/go/compute/apiv1" "cloud.google.com/go/compute/apiv1/computepb" + "google.golang.org/api/iterator" "google.golang.org/api/option" "google.golang.org/protobuf/proto" ) @@ -194,19 +195,79 @@ func lastPathSegment(s string) string { return s[i+1:] } -// Start — Task 19 will implement. +// Start brings a stopped VM back online. func (p *gcpProvider) Start(ctx context.Context, name string) error { - return errNotImplemented + zone, err := p.findZone(ctx, name) + if err != nil { + return err + } + op, err := p.instances.Start(ctx, &computepb.StartInstanceRequest{ + Project: p.project, Zone: zone, Instance: name, + }) + if err != nil { + return fmt.Errorf("gcp: instances.Start: %w", err) + } + return op.Wait(ctx) } -// Stop — Task 19 will implement. +// Stop shuts down a VM (preserves disk). func (p *gcpProvider) Stop(ctx context.Context, name string) error { - return errNotImplemented + zone, err := p.findZone(ctx, name) + if err != nil { + return err + } + op, err := p.instances.Stop(ctx, &computepb.StopInstanceRequest{ + Project: p.project, Zone: zone, Instance: name, + }) + if err != nil { + return fmt.Errorf("gcp: instances.Stop: %w", err) + } + return op.Wait(ctx) } -// Destroy — Task 19 will implement. +// Destroy deletes a VM permanently. func (p *gcpProvider) Destroy(ctx context.Context, name string) error { - return errNotImplemented + zone, err := p.findZone(ctx, name) + if err != nil { + // If it's already gone, that's fine for rotate cleanup. + if strings.Contains(err.Error(), "not found") { + return nil + } + return err + } + op, err := p.instances.Delete(ctx, &computepb.DeleteInstanceRequest{ + Project: p.project, Zone: zone, Instance: name, + }) + if err != nil { + return fmt.Errorf("gcp: instances.Delete: %w", err) + } + return op.Wait(ctx) +} + +// findZone walks AggregatedList to locate the zone of a managed +// instance by name. Returns the zone (e.g., "us-west1-a") or an error +// if no matching instance exists. +func (p *gcpProvider) findZone(ctx context.Context, name string) (string, error) { + it := p.instances.AggregatedList(ctx, &computepb.AggregatedListInstancesRequest{ + Project: p.project, + Filter: proto.String(`labels.managed-by=exitnode`), + }) + for { + pair, err := it.Next() + if errors.Is(err, iterator.Done) { + break + } + if err != nil { + return "", fmt.Errorf("gcp: aggregated list: %w", err) + } + // Pair is (zone-key, *InstancesScopedList). Zone key looks like "zones/us-west1-a". + for _, inst := range pair.Value.GetInstances() { + if inst.GetName() == name { + return lastPathSegment(pair.Key), nil + } + } + } + return "", fmt.Errorf("gcp: instance %q not found among managed nodes", name) } // List — Task 20 will implement. From 6ea4b9c0949134164998b83e0390e8d1f944ed6e Mon Sep 17 00:00:00 2001 From: Iker Date: Mon, 18 May 2026 19:00:17 +0000 Subject: [PATCH 44/79] refactor(gcp): sentinel ErrInstanceNotFound + hoist managed-by constants --- internal/gcp/compute.go | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index 31e6308..f840a0b 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -21,6 +21,22 @@ import ( // later tasks in Plan 2 (Tasks 18-20). var errNotImplemented = errors.New("gcp: not implemented") +// ErrInstanceNotFound is returned by findZone when no managed instance +// matches the given name. Destroy uses errors.Is to treat this as an +// idempotent no-op during rotate cleanup. +var ErrInstanceNotFound = errors.New("gcp: instance not found") + +const ( + // labelManagedBy is the GCP instance label key we apply to every + // managed exit-node so we can filter aggregated lists by it. + labelManagedBy = "managed-by" + // labelManagedByValue is the value paired with labelManagedBy. + labelManagedByValue = "exitnode" + // filterManagedByExitnode is the AggregatedList filter expression + // matching only instances with the managed-by=exitnode label. + filterManagedByExitnode = "labels." + labelManagedBy + "=" + labelManagedByValue +) + // Options configures the GCP Provider. type Options struct { // Project is the GCP project ID. Required. @@ -230,7 +246,7 @@ func (p *gcpProvider) Destroy(ctx context.Context, name string) error { zone, err := p.findZone(ctx, name) if err != nil { // If it's already gone, that's fine for rotate cleanup. - if strings.Contains(err.Error(), "not found") { + if errors.Is(err, ErrInstanceNotFound) { return nil } return err @@ -250,7 +266,7 @@ func (p *gcpProvider) Destroy(ctx context.Context, name string) error { func (p *gcpProvider) findZone(ctx context.Context, name string) (string, error) { it := p.instances.AggregatedList(ctx, &computepb.AggregatedListInstancesRequest{ Project: p.project, - Filter: proto.String(`labels.managed-by=exitnode`), + Filter: proto.String(filterManagedByExitnode), }) for { pair, err := it.Next() @@ -267,7 +283,7 @@ func (p *gcpProvider) findZone(ctx context.Context, name string) (string, error) } } } - return "", fmt.Errorf("gcp: instance %q not found among managed nodes", name) + return "", fmt.Errorf("%w: %q among managed nodes", ErrInstanceNotFound, name) } // List — Task 20 will implement. @@ -285,8 +301,8 @@ func (p *gcpProvider) Get(ctx context.Context, name string) (*ExitNode, error) { // without hitting the API. func (p *gcpProvider) buildInstanceResource(opts ProvisionOpts) *computepb.Instance { labels := map[string]string{ - "managed-by": "exitnode", - "region": opts.Region, + labelManagedBy: labelManagedByValue, + "region": opts.Region, } scriptURL := opts.InstallScriptURL From 9bef0b3a1b2bc9d5ae80ccc153dc259e2decffc8 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 19 May 2026 18:25:16 +0000 Subject: [PATCH 45/79] feat(gcp): List (AggregatedList) + Get (by name) --- internal/gcp/compute.go | 44 +++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index f840a0b..aeed40c 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -17,10 +17,6 @@ import ( "google.golang.org/protobuf/proto" ) -// errNotImplemented is returned by stub methods that will be filled in by -// later tasks in Plan 2 (Tasks 18-20). -var errNotImplemented = errors.New("gcp: not implemented") - // ErrInstanceNotFound is returned by findZone when no managed instance // matches the given name. Destroy uses errors.Is to treat this as an // idempotent no-op during rotate cleanup. @@ -286,14 +282,46 @@ func (p *gcpProvider) findZone(ctx context.Context, name string) (string, error) return "", fmt.Errorf("%w: %q among managed nodes", ErrInstanceNotFound, name) } -// List — Task 20 will implement. +// List returns all VMs managed by exitnode across all zones. func (p *gcpProvider) List(ctx context.Context) ([]*ExitNode, error) { - return nil, errNotImplemented + it := p.instances.AggregatedList(ctx, &computepb.AggregatedListInstancesRequest{ + Project: p.project, + Filter: proto.String(filterManagedByExitnode), + }) + var out []*ExitNode + for { + pair, err := it.Next() + if errors.Is(err, iterator.Done) { + break + } + if err != nil { + return nil, fmt.Errorf("gcp: aggregated list: %w", err) + } + for _, inst := range pair.Value.GetInstances() { + out = append(out, instanceToExitNode(inst)) + } + } + return out, nil } -// Get — Task 20 will implement. +// Get fetches a single managed instance by name. Returns (nil, nil) if +// not found — the caller distinguishes "missing" from "error" by +// inspecting both return values. func (p *gcpProvider) Get(ctx context.Context, name string) (*ExitNode, error) { - return nil, errNotImplemented + zone, err := p.findZone(ctx, name) + if err != nil { + if errors.Is(err, ErrInstanceNotFound) { + return nil, nil + } + return nil, err + } + got, err := p.instances.Get(ctx, &computepb.GetInstanceRequest{ + Project: p.project, Zone: zone, Instance: name, + }) + if err != nil { + return nil, fmt.Errorf("gcp: instances.Get: %w", err) + } + return instanceToExitNode(got), nil } // buildInstanceResource constructs the *computepb.Instance that From 4c86b886a50ff7db02a749a742ee13e5045c7428 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 19 May 2026 18:32:12 +0000 Subject: [PATCH 46/79] refactor(gcp): findInstance returns proto to drop double-fetch in Get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renamed findZone → findInstance; the function now returns both the zone string and the matched *computepb.Instance proto from the AggregatedList scan. Start, Stop, and Destroy discard the proto with _, while Get uses it directly, eliminating the redundant instances.Get round-trip that previously followed every findZone call. The (nil, nil) contract for a missing instance is also now documented on the Provider.Get interface method. --- internal/gcp/compute.go | 43 ++++++++++++++++++---------------------- internal/gcp/provider.go | 3 +++ 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index aeed40c..c60bba2 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -17,7 +17,7 @@ import ( "google.golang.org/protobuf/proto" ) -// ErrInstanceNotFound is returned by findZone when no managed instance +// ErrInstanceNotFound is returned by findInstance when no managed instance // matches the given name. Destroy uses errors.Is to treat this as an // idempotent no-op during rotate cleanup. var ErrInstanceNotFound = errors.New("gcp: instance not found") @@ -209,7 +209,7 @@ func lastPathSegment(s string) string { // Start brings a stopped VM back online. func (p *gcpProvider) Start(ctx context.Context, name string) error { - zone, err := p.findZone(ctx, name) + zone, _, err := p.findInstance(ctx, name) if err != nil { return err } @@ -224,7 +224,7 @@ func (p *gcpProvider) Start(ctx context.Context, name string) error { // Stop shuts down a VM (preserves disk). func (p *gcpProvider) Stop(ctx context.Context, name string) error { - zone, err := p.findZone(ctx, name) + zone, _, err := p.findInstance(ctx, name) if err != nil { return err } @@ -239,7 +239,7 @@ func (p *gcpProvider) Stop(ctx context.Context, name string) error { // Destroy deletes a VM permanently. func (p *gcpProvider) Destroy(ctx context.Context, name string) error { - zone, err := p.findZone(ctx, name) + zone, _, err := p.findInstance(ctx, name) if err != nil { // If it's already gone, that's fine for rotate cleanup. if errors.Is(err, ErrInstanceNotFound) { @@ -256,30 +256,31 @@ func (p *gcpProvider) Destroy(ctx context.Context, name string) error { return op.Wait(ctx) } -// findZone walks AggregatedList to locate the zone of a managed -// instance by name. Returns the zone (e.g., "us-west1-a") or an error -// if no matching instance exists. -func (p *gcpProvider) findZone(ctx context.Context, name string) (string, error) { +// findInstance walks AggregatedList to locate a managed instance by +// name. Returns the zone (e.g., "us-west1-a") and the matching +// *computepb.Instance proto, or ErrInstanceNotFound (wrapped) if no +// managed instance with that name exists. +func (p *gcpProvider) findInstance(ctx context.Context, name string) (zone string, inst *computepb.Instance, err error) { it := p.instances.AggregatedList(ctx, &computepb.AggregatedListInstancesRequest{ Project: p.project, Filter: proto.String(filterManagedByExitnode), }) for { - pair, err := it.Next() - if errors.Is(err, iterator.Done) { + pair, iterErr := it.Next() + if errors.Is(iterErr, iterator.Done) { break } - if err != nil { - return "", fmt.Errorf("gcp: aggregated list: %w", err) + if iterErr != nil { + return "", nil, fmt.Errorf("gcp: aggregated list: %w", iterErr) } // Pair is (zone-key, *InstancesScopedList). Zone key looks like "zones/us-west1-a". - for _, inst := range pair.Value.GetInstances() { - if inst.GetName() == name { - return lastPathSegment(pair.Key), nil + for _, candidate := range pair.Value.GetInstances() { + if candidate.GetName() == name { + return lastPathSegment(pair.Key), candidate, nil } } } - return "", fmt.Errorf("%w: %q among managed nodes", ErrInstanceNotFound, name) + return "", nil, fmt.Errorf("%w: %q among managed nodes", ErrInstanceNotFound, name) } // List returns all VMs managed by exitnode across all zones. @@ -308,20 +309,14 @@ func (p *gcpProvider) List(ctx context.Context) ([]*ExitNode, error) { // not found — the caller distinguishes "missing" from "error" by // inspecting both return values. func (p *gcpProvider) Get(ctx context.Context, name string) (*ExitNode, error) { - zone, err := p.findZone(ctx, name) + _, inst, err := p.findInstance(ctx, name) if err != nil { if errors.Is(err, ErrInstanceNotFound) { return nil, nil } return nil, err } - got, err := p.instances.Get(ctx, &computepb.GetInstanceRequest{ - Project: p.project, Zone: zone, Instance: name, - }) - if err != nil { - return nil, fmt.Errorf("gcp: instances.Get: %w", err) - } - return instanceToExitNode(got), nil + return instanceToExitNode(inst), nil } // buildInstanceResource constructs the *computepb.Instance that diff --git a/internal/gcp/provider.go b/internal/gcp/provider.go index ea8a34d..940c3fb 100644 --- a/internal/gcp/provider.go +++ b/internal/gcp/provider.go @@ -11,5 +11,8 @@ type Provider interface { Stop(ctx context.Context, name string) error Destroy(ctx context.Context, name string) error List(ctx context.Context) ([]*ExitNode, error) + // Get fetches a single managed instance by name. + // Returns (nil, nil) if no instance with that name exists; any other + // error (transport, permissions, etc.) is returned as (nil, err). Get(ctx context.Context, name string) (*ExitNode, error) } From 7ab0b61661221b1ef3fa4094b2f3d49942a08444 Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 19 May 2026 18:34:15 +0000 Subject: [PATCH 47/79] feat(gcp): PickZoneInRegion helper (not on Provider iface) --- internal/gcp/compute.go | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index c60bba2..779f1af 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -7,6 +7,7 @@ import ( "context" "errors" "fmt" + "math/rand/v2" "strings" "time" @@ -319,6 +320,38 @@ func (p *gcpProvider) Get(ctx context.Context, name string) (*ExitNode, error) { return instanceToExitNode(inst), nil } +// PickZoneInRegion returns a random zone in the given region whose +// status is UP. The Provider exposes this helper because zone +// selection touches a separate API (ZonesClient) and the orchestrator +// in internal/core needs it before calling Provision. +func (p *gcpProvider) PickZoneInRegion(ctx context.Context, region string) (string, error) { + it := p.zones.List(ctx, &computepb.ListZonesRequest{ + Project: p.project, + }) + var ups []string + for { + z, err := it.Next() + if errors.Is(err, iterator.Done) { + break + } + if err != nil { + return "", fmt.Errorf("gcp: list zones: %w", err) + } + if z.GetStatus() != "UP" { + continue + } + // z.GetRegion() is a full URL like ".../regions/us-west1". + if lastPathSegment(z.GetRegion()) != region { + continue + } + ups = append(ups, z.GetName()) + } + if len(ups) == 0 { + return "", fmt.Errorf("gcp: no UP zones in region %q", region) + } + return ups[rand.IntN(len(ups))], nil +} + // buildInstanceResource constructs the *computepb.Instance that // Provision passes to Insert. Separated so we can shape-test it // without hitting the API. From 7662c0859d963bb5933db10c3598d2fdde4d790f Mon Sep 17 00:00:00 2001 From: Iker Date: Tue, 19 May 2026 19:16:54 +0000 Subject: [PATCH 48/79] polish(gcp): PickZoneInRegion server-side UP filter + docs + region guard ListZonesRequest now carries the GCP filter expression "status = UP" so DOWN zones never cross the wire, eliminating the redundant Go-side status check. The doc comment now documents the bare-region format, the empty/no-match error cases, and the non-interface nature. An explicit guard rejects an empty region with a clear error. --- internal/gcp/compute.go | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index 779f1af..a2f1e98 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -320,13 +320,21 @@ func (p *gcpProvider) Get(ctx context.Context, name string) (*ExitNode, error) { return instanceToExitNode(inst), nil } -// PickZoneInRegion returns a random zone in the given region whose -// status is UP. The Provider exposes this helper because zone -// selection touches a separate API (ZonesClient) and the orchestrator -// in internal/core needs it before calling Provision. +// PickZoneInRegion returns a random UP zone within the given region. +// region must be a bare region name (e.g., "us-west1"), not a resource +// URL. Returns an error if region is empty or if no UP zones are +// found in that region. +// +// Not part of the Provider interface — callers type-assert to +// *gcpProvider. Zone selection lives here because it touches the +// separate ZonesClient API surface. func (p *gcpProvider) PickZoneInRegion(ctx context.Context, region string) (string, error) { + if region == "" { + return "", errors.New("gcp: PickZoneInRegion: region required") + } it := p.zones.List(ctx, &computepb.ListZonesRequest{ Project: p.project, + Filter: proto.String("status = UP"), }) var ups []string for { @@ -337,9 +345,6 @@ func (p *gcpProvider) PickZoneInRegion(ctx context.Context, region string) (stri if err != nil { return "", fmt.Errorf("gcp: list zones: %w", err) } - if z.GetStatus() != "UP" { - continue - } // z.GetRegion() is a full URL like ".../regions/us-west1". if lastPathSegment(z.GetRegion()) != region { continue From 4b7a97ce586396b4a1b3a2e07f4cb1c736133238 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 18:29:59 +0000 Subject: [PATCH 49/79] test(gcp): add integration test scaffold (build-tagged) --- Makefile | 2 +- internal/gcp/compute_integration_test.go | 54 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 internal/gcp/compute_integration_test.go diff --git a/Makefile b/Makefile index 9ea5588..d782ffa 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ test: $(GO) test -race -short ./... test-integration: - EXITNODE_INTEGRATION=1 $(GO) test -race ./... + EXITNODE_INTEGRATION=1 $(GO) test -race -tags integration ./... vet: $(GO) vet ./... diff --git a/internal/gcp/compute_integration_test.go b/internal/gcp/compute_integration_test.go new file mode 100644 index 0000000..aea5a51 --- /dev/null +++ b/internal/gcp/compute_integration_test.go @@ -0,0 +1,54 @@ +//go:build integration + +package gcp + +import ( + "context" + "os" + "testing" + "time" +) + +func gateIntegration(t *testing.T) (Options, bool) { + t.Helper() + if os.Getenv("EXITNODE_INTEGRATION") != "1" { + t.Skip("EXITNODE_INTEGRATION!=1; skipping real-GCP test") + } + proj := os.Getenv("EXITNODE_TEST_PROJECT") + if proj == "" { + t.Skip("EXITNODE_TEST_PROJECT unset; skipping") + } + opts := Options{ + Project: proj, + Region: envOr("EXITNODE_TEST_REGION", "us-central1"), + InstallScriptURL: "https://example.com/install.sh", // not actually run + } + if jsonKey := os.Getenv("GCP_CREDENTIALS_JSON"); jsonKey != "" { + opts.CredentialsJSON = []byte(jsonKey) + } + return opts, true +} + +func envOr(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} + +func TestIntegration_ListIsCallable(t *testing.T) { + opts, _ := gateIntegration(t) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + p, err := New(ctx, opts) + if err != nil { + t.Fatalf("New: %v", err) + } + defer p.(*gcpProvider).Close() + + // We don't assert content (the test project may have no managed + // VMs); just that the call returns without error. + if _, err := p.List(ctx); err != nil { + t.Fatalf("List: %v", err) + } +} From 22b737b014d8ae927c50117f46b23d2d8eaf94b7 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 18:40:04 +0000 Subject: [PATCH 50/79] polish(gcp): integration test surfaces Close error + drops dead bool --- internal/gcp/compute_integration_test.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/internal/gcp/compute_integration_test.go b/internal/gcp/compute_integration_test.go index aea5a51..aba517f 100644 --- a/internal/gcp/compute_integration_test.go +++ b/internal/gcp/compute_integration_test.go @@ -9,7 +9,7 @@ import ( "time" ) -func gateIntegration(t *testing.T) (Options, bool) { +func gateIntegration(t *testing.T) Options { t.Helper() if os.Getenv("EXITNODE_INTEGRATION") != "1" { t.Skip("EXITNODE_INTEGRATION!=1; skipping real-GCP test") @@ -26,7 +26,7 @@ func gateIntegration(t *testing.T) (Options, bool) { if jsonKey := os.Getenv("GCP_CREDENTIALS_JSON"); jsonKey != "" { opts.CredentialsJSON = []byte(jsonKey) } - return opts, true + return opts } func envOr(key, def string) string { @@ -37,14 +37,18 @@ func envOr(key, def string) string { } func TestIntegration_ListIsCallable(t *testing.T) { - opts, _ := gateIntegration(t) + opts := gateIntegration(t) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() p, err := New(ctx, opts) if err != nil { t.Fatalf("New: %v", err) } - defer p.(*gcpProvider).Close() + defer func() { + if err := p.(*gcpProvider).Close(); err != nil { + t.Errorf("Close: %v", err) + } + }() // We don't assert content (the test project may have no managed // VMs); just that the call returns without error. From 9260ccb2c75e3d9a05a3f2662703233e88a6af3c Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 18:43:12 +0000 Subject: [PATCH 51/79] feat(install): VM bootstrap script reading metadata-provided creds --- scripts/install.sh | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100755 scripts/install.sh diff --git a/scripts/install.sh b/scripts/install.sh new file mode 100755 index 0000000..bcab0c1 --- /dev/null +++ b/scripts/install.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# +# exitnode bootstrap script. Fetched at first boot via the GCP startup +# script (instance metadata key `startup-script-url`). Idempotent — +# safe to re-run. +# +# Reads required parameters from VM instance metadata: +# - tailscale-auth-key (ephemeral, single-use, ~5m TTL) +# - tailscale-hostname (vpn--) +# - tailscale-tags (comma-separated, e.g. "tag:exit-node") +# +set -euo pipefail + +META="http://metadata.google.internal/computeMetadata/v1/instance/attributes" + +get_metadata() { + curl --silent --show-error --fail \ + --header "Metadata-Flavor: Google" \ + "${META}/$1" +} + +AUTH_KEY="$(get_metadata tailscale-auth-key)" +HOSTNAME_VAL="$(get_metadata tailscale-hostname)" +TAGS="$(get_metadata tailscale-tags)" + +# Validate — fail loudly if any required key is missing. +: "${AUTH_KEY:?tailscale-auth-key metadata missing}" +: "${HOSTNAME_VAL:?tailscale-hostname metadata missing}" +: "${TAGS:?tailscale-tags metadata missing}" + +apt-get update +apt-get install -y curl + +# Tailscale via official installer (handles repo + key + apt install). +if ! command -v tailscale >/dev/null 2>&1; then + curl -fsSL https://tailscale.com/install.sh | sh +fi + +# IP forwarding — idempotent drop-in; do NOT edit /etc/sysctl.conf. +cat >/etc/sysctl.d/99-tailscale.conf < Date: Wed, 20 May 2026 18:53:19 +0000 Subject: [PATCH 52/79] docs: professional README with architecture, dev workflow, and roadmap --- README.md | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8a443fe..76490f4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,129 @@ # exit-node -Tailscale exit nodes on demand. Provision, rotate across cloud regions, and sync pfSense gateways. CLI + MCP for AI agents. + +[![Go Version](https://img.shields.io/badge/go-1.25-00ADD8?logo=go)](go.mod) +[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) +[![Status: Pre-alpha](https://img.shields.io/badge/status-pre--alpha-orange)](#status) + +On-demand Tailscale exit nodes that rotate across cloud regions and keep your +pfSense gateway in sync. Driven from a CLI for humans and an MCP server for +AI agents. + +> **Status**: pre-alpha. The library packages (provisioning, rotate +> orchestrator, client adapters, probes) are in place. The `exitnode` and +> `exitnode-mcp` binaries are not yet built — see [Roadmap](#roadmap). + +## Why + +VPN exit nodes are useful exactly because they're not always-on: + +- **Geo-bound testing.** Hop into `us-east1`, `europe-west1`, `asia-northeast1` + on demand and tear it down when done. +- **Egress-IP rotation.** A new node every cycle, so your egress address moves + without manual cloud-console clicking. +- **Tunnel discipline.** When you're done, the route disappears and your home + gateway returns to whatever it was before — no leftover state. + +Running a fleet of always-on exit nodes is wasteful and noisy. `exit-node` +provisions one when you ask, hands it to Tailscale, swaps your pfSense default +route to it, verifies egress, and destroys the old one cleanly. + +## How it works + +The rotate flow is a small state machine that runs end-to-end on `exitnode up`: + +``` +1. mint → ephemeral Tailscale auth-key (single-use, ~5min TTL) +2. provision → GCP VM with metadata-baked auth-key + tags + hostname +3. wait → poll Tailscale until the device shows up connected +4. authorize → set tags + approve subnet routes for the new device +5. probe → verify egress works through the candidate node +6. cut over → pfSense gateway monitor IP swapped to the new node +7. verify → re-probe through the live route; restore prior node on failure +8. cleanup → destroy the previous VM, delete its Tailscale device +``` + +Every step is idempotent and observable. If verification fails post-cutover, +the prior gateway is restored before the candidate is destroyed. + +## Architecture + +Four narrow client interfaces, one orchestrator: + +| Package | Responsibility | +| --- | --- | +| `internal/core` | Rotate state machine + `up` / `down` / `status` / `health` / `cost` / `list` / `sync` actions | +| `internal/config` | Typed configuration loader (project, region, tags, pfSense, gateway names) | +| `internal/state` | On-disk state file — what's currently running, last rotation, prior nodes | +| `internal/gcp` | Compute Engine adapter — `Provision` / `Start` / `Stop` / `Destroy` / `List` / `Get` | +| `internal/tailscale` | v2 API adapter — mint keys, wait for device, authorize, set tags, delete | +| `internal/pfsense` | pfSense REST adapter — read & swap gateway-monitor IP, apply | +| `internal/verify` | Egress probes — `EgressVia` (through a candidate) and `EgressDirect` (post-cutover) | + +Each adapter is interface-first; tests run against fakes. The `gcp` package +also has a `//go:build integration` smoke test that exercises real GCP +(skipped unless `EXITNODE_INTEGRATION=1` and `EXITNODE_TEST_PROJECT` are set). + +## Repository layout + +``` +exit-node/ +├── internal/ +│ ├── config/ # config loader +│ ├── state/ # on-disk state +│ ├── core/ # rotate orchestrator + action commands +│ ├── gcp/ # GCP Compute Engine adapter +│ ├── tailscale/ # Tailscale v2 API adapter +│ ├── pfsense/ # pfSense REST adapter +│ └── verify/ # egress probes +├── scripts/ +│ └── install.sh # VM first-boot bootstrap (fetched via startup-script-url) +├── Makefile +├── go.mod +└── README.md +``` + +## Development + +Requirements: Go 1.25+. + +```bash +# Unit tests (race-enabled, fast) +make test + +# Full suite incl. the build-tagged GCP integration test +# (still skips unless EXITNODE_INTEGRATION=1 and EXITNODE_TEST_PROJECT are set) +make test-integration + +# Vet +make vet + +# Lint (requires golangci-lint) +make lint +``` + +The integration test against real GCP additionally honours: + +| Env var | Default | Purpose | +| --- | --- | --- | +| `EXITNODE_INTEGRATION` | _unset_ | Must be `1` to run real-GCP tests | +| `EXITNODE_TEST_PROJECT` | _unset_ | GCP project ID to run against | +| `EXITNODE_TEST_REGION` | `us-central1` | Region for the smoke test | +| `GCP_CREDENTIALS_JSON` | _unset_ | Service-account JSON; otherwise ADC is used | + +## Roadmap + +Work is staged in three sequential plans: + +- **Plan 1 — Foundation & Core.** Module layout, `internal/config`, + `internal/state`, interface definitions for all four clients, and the + `internal/core` rotate orchestrator with table-driven mocked tests. + ✅ Complete. +- **Plan 2 — Real client implementations.** `internal/gcp`, + `internal/tailscale`, `internal/pfsense`, `internal/verify`, and + `scripts/install.sh`. ✅ Complete. +- **Plan 3 — Binaries & distribution.** `cmd/exitnode`, `cmd/exitnode-mcp`, + examples, CI workflows, and goreleaser packaging. ⏳ In progress. + +## License + +[MIT](LICENSE) © 2026 Iker. From eec3e8d298208e115c86ef2c1a2ce9b5569248d3 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 19:18:47 +0000 Subject: [PATCH 53/79] feat(core): Start(name) with active-state refresh --- internal/core/named.go | 57 +++++++++++++++++++ internal/core/named_test.go | 106 ++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 internal/core/named.go create mode 100644 internal/core/named_test.go diff --git a/internal/core/named.go b/internal/core/named.go new file mode 100644 index 0000000..512d52f --- /dev/null +++ b/internal/core/named.go @@ -0,0 +1,57 @@ +package core + +import ( + "context" + "errors" + + "github.com/iker/exit-node/internal/gcp" +) + +// ErrNameRequired is returned by Start when name is the empty string. +var ErrNameRequired = errors.New("core: name required") + +// GetActive returns the currently-active exit node from the local state cache, +// or nil if none is recorded. +func (c *Core) GetActive() (*gcp.ExitNode, error) { + return c.store.GetActive() +} + +// Start calls the provider's Start method for the named exit node and then +// refreshes the local state cache if that node is currently the active one. +func (c *Core) Start(ctx context.Context, name string) error { + if name == "" { + return ErrNameRequired + } + if err := c.provider.Start(ctx, name); err != nil { + return err + } + return c.refreshIfActive(ctx, name) +} + +// refreshIfActive re-fetches the named node from the provider and updates the +// local state cache — but only when name matches the currently-recorded active +// node. A provider Get failure is logged and swallowed; Start already succeeded +// so a state-refresh hiccup must not surface as a Start failure. +func (c *Core) refreshIfActive(ctx context.Context, name string) error { + active, err := c.store.GetActive() + if err != nil { + c.log.Warn("refreshIfActive: could not read active state", "err", err) + return nil + } + if active == nil || active.Name != name { + return nil + } + fresh, err := c.provider.Get(ctx, name) + if err != nil { + c.log.Warn("refreshIfActive: provider.Get failed, skipping state update", "name", name, "err", err) + return nil + } + if fresh == nil { + c.log.Warn("refreshIfActive: provider.Get returned nil, skipping state update", "name", name) + return nil + } + if err := c.store.SetActive(fresh); err != nil { + c.log.Warn("refreshIfActive: SetActive failed", "name", name, "err", err) + } + return nil +} diff --git a/internal/core/named_test.go b/internal/core/named_test.go new file mode 100644 index 0000000..ad09090 --- /dev/null +++ b/internal/core/named_test.go @@ -0,0 +1,106 @@ +package core + +import ( + "context" + "errors" + "log/slog" + "testing" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/state" +) + +type namedFixtureT struct { + c *Core + store *state.Store + prov *mockProvider + ts *mockTS +} + +func namedFixture(t *testing.T, active *gcp.ExitNode) *namedFixtureT { + t.Helper() + statePath := t.TempDir() + "/state.json" + store, err := state.Open(statePath) + if err != nil { + t.Fatalf("state.Open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + if active != nil { + if err := store.SetActive(active); err != nil { + t.Fatalf("SetActive: %v", err) + } + } + cfg := &config.Config{} + prov := newMockProvider() + ts := newMockTS() + pf := newMockPF() + probe := &mockProbe{} + c := New(Deps{ + Config: cfg, Provider: prov, TS: ts, PF: pf, Probe: probe, + Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + }) + return &namedFixtureT{c: c, store: store, prov: prov, ts: ts} +} + +func TestStart_CallsProviderStart(t *testing.T) { + f := namedFixture(t, nil) + + if err := f.c.Start(context.Background(), "vpn-us-central1-abc"); err != nil { + t.Fatalf("Start: %v", err) + } + if !hasCallWithArg(f.prov.calls, "Start", "vpn-us-central1-abc") { + t.Errorf("provider.Start not called with expected name; calls=%v", f.prov.calls) + } +} + +func TestStart_RefreshesStateWhenNameMatchesActive(t *testing.T) { + active := &gcp.ExitNode{Name: "vpn-us-central1-abc", State: gcp.StateStopped} + f := namedFixture(t, active) + f.prov.GetResult["vpn-us-central1-abc"] = &gcp.ExitNode{ + Name: "vpn-us-central1-abc", State: gcp.StateRunning, PublicIP: "1.2.3.4", + } + + if err := f.c.Start(context.Background(), "vpn-us-central1-abc"); err != nil { + t.Fatalf("Start: %v", err) + } + got, err := f.store.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got == nil { + t.Fatal("GetActive returned nil, want refreshed node") + } + if got.State != gcp.StateRunning || got.PublicIP != "1.2.3.4" { + t.Errorf("state not refreshed: got State=%v PublicIP=%q", got.State, got.PublicIP) + } +} + +func TestStart_DoesNotTouchStateWhenNameMismatch(t *testing.T) { + active := &gcp.ExitNode{Name: "vpn-us-central1-abc", State: gcp.StateRunning, PublicIP: "9.9.9.9"} + f := namedFixture(t, active) + // Provider Get for the different name returns nothing (default empty map). + + if err := f.c.Start(context.Background(), "vpn-eu-west1-xyz"); err != nil { + t.Fatalf("Start: %v", err) + } + got, err := f.store.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got == nil || got.PublicIP != "9.9.9.9" { + t.Errorf("state mutated unexpectedly: %+v", got) + } +} + +func TestStart_RejectsEmptyName(t *testing.T) { + f := namedFixture(t, nil) + err := f.c.Start(context.Background(), "") + if !errors.Is(err, ErrNameRequired) { + t.Errorf("got %v, want ErrNameRequired", err) + } + // Provider.Start must not have been called. + if hasCallWithArg(f.prov.calls, "Start", "") { + t.Errorf("provider.Start was called despite empty name") + } +} From ef3e21ef45fca272e3ad21f14a349af52d7ac7e8 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 19:49:49 +0000 Subject: [PATCH 54/79] polish(core): wrap provider.Start error with node name --- internal/core/named.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/core/named.go b/internal/core/named.go index 512d52f..930d6c7 100644 --- a/internal/core/named.go +++ b/internal/core/named.go @@ -3,6 +3,7 @@ package core import ( "context" "errors" + "fmt" "github.com/iker/exit-node/internal/gcp" ) @@ -23,7 +24,7 @@ func (c *Core) Start(ctx context.Context, name string) error { return ErrNameRequired } if err := c.provider.Start(ctx, name); err != nil { - return err + return fmt.Errorf("provider.Start %s: %w", name, err) } return c.refreshIfActive(ctx, name) } From b5581bddd88fbeccc769aa4653f8758759266505 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 19:51:08 +0000 Subject: [PATCH 55/79] feat(core): Stop(name) with active-state refresh --- internal/core/named.go | 12 ++++++++++ internal/core/named_test.go | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/internal/core/named.go b/internal/core/named.go index 930d6c7..2a3e560 100644 --- a/internal/core/named.go +++ b/internal/core/named.go @@ -29,6 +29,18 @@ func (c *Core) Start(ctx context.Context, name string) error { return c.refreshIfActive(ctx, name) } +// Stop stops a running exit-node VM by name. If the name matches the +// currently-active node, the on-disk state cache is refreshed. +func (c *Core) Stop(ctx context.Context, name string) error { + if name == "" { + return ErrNameRequired + } + if err := c.provider.Stop(ctx, name); err != nil { + return fmt.Errorf("provider.Stop %s: %w", name, err) + } + return c.refreshIfActive(ctx, name) +} + // refreshIfActive re-fetches the named node from the provider and updates the // local state cache — but only when name matches the currently-recorded active // node. A provider Get failure is logged and swallowed; Start already succeeded diff --git a/internal/core/named_test.go b/internal/core/named_test.go index ad09090..02c65b8 100644 --- a/internal/core/named_test.go +++ b/internal/core/named_test.go @@ -104,3 +104,48 @@ func TestStart_RejectsEmptyName(t *testing.T) { t.Errorf("provider.Start was called despite empty name") } } + +func TestStop_CallsProviderStop(t *testing.T) { + f := namedFixture(t, nil) + + if err := f.c.Stop(context.Background(), "vpn-us-central1-abc"); err != nil { + t.Fatalf("Stop: %v", err) + } + if !hasCallWithArg(f.prov.calls, "Stop", "vpn-us-central1-abc") { + t.Errorf("provider.Stop not called with expected name; calls=%v", f.prov.calls) + } +} + +func TestStop_RefreshesStateWhenNameMatchesActive(t *testing.T) { + active := &gcp.ExitNode{Name: "vpn-us-central1-abc", State: gcp.StateRunning} + f := namedFixture(t, active) + f.prov.GetResult["vpn-us-central1-abc"] = &gcp.ExitNode{ + Name: "vpn-us-central1-abc", State: gcp.StateStopped, PublicIP: "", + } + + if err := f.c.Stop(context.Background(), "vpn-us-central1-abc"); err != nil { + t.Fatalf("Stop: %v", err) + } + got, err := f.store.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got == nil { + t.Fatal("GetActive returned nil, want refreshed node") + } + if got.State != gcp.StateStopped { + t.Errorf("state not refreshed: got State=%v, want StateStopped", got.State) + } +} + +func TestStop_RejectsEmptyName(t *testing.T) { + f := namedFixture(t, nil) + err := f.c.Stop(context.Background(), "") + if !errors.Is(err, ErrNameRequired) { + t.Errorf("got %v, want ErrNameRequired", err) + } + // Provider.Stop must not have been called. + if hasCallWithArg(f.prov.calls, "Stop", "") { + t.Errorf("provider.Stop was called despite empty name") + } +} From c1ab03ba3c7f76466d59a9c84fc8545cca043cc2 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 19:53:22 +0000 Subject: [PATCH 56/79] feat(core): Destroy(name) with TS device cleanup and state clear --- internal/core/named.go | 44 ++++++++++++++++++++ internal/core/named_test.go | 81 +++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/internal/core/named.go b/internal/core/named.go index 2a3e560..6182d04 100644 --- a/internal/core/named.go +++ b/internal/core/named.go @@ -4,10 +4,15 @@ import ( "context" "errors" "fmt" + "time" "github.com/iker/exit-node/internal/gcp" ) +// deviceLookupTimeout is the maximum time Destroy will wait for a Tailscale +// device to appear before giving up and proceeding with VM destruction. +const deviceLookupTimeout = 5 * time.Second + // ErrNameRequired is returned by Start when name is the empty string. var ErrNameRequired = errors.New("core: name required") @@ -41,6 +46,45 @@ func (c *Core) Stop(ctx context.Context, name string) error { return c.refreshIfActive(ctx, name) } +// Destroy removes a named exit node: first it tries to look up and delete the +// Tailscale device, then it destroys the VM. If the VM destroy fails the error +// is returned. If the destroyed node was the active node, the state cache is +// cleared. +func (c *Core) Destroy(ctx context.Context, name string) error { + if name == "" { + return ErrNameRequired + } + + // Step 1: Attempt to remove the Tailscale device. A short timeout is used + // so that a missing or already-deleted device does not block VM cleanup. + dev, err := c.ts.WaitForDevice(ctx, name, deviceLookupTimeout) + if err != nil { + c.log.Warn("Destroy: could not look up TS device, skipping device delete", "name", name, "err", err) + } else if dev != nil { + if delErr := c.ts.DeleteDevice(ctx, dev.ID); delErr != nil { + c.log.Warn("Destroy: ts.DeleteDevice failed, proceeding with VM destroy", "name", name, "deviceID", dev.ID, "err", delErr) + } + } + + // Step 2: Destroy the VM — load-bearing; failure aborts the operation. + if err := c.provider.Destroy(ctx, name); err != nil { + return fmt.Errorf("provider.Destroy %s: %w", name, err) + } + + // Step 3: Clear state cache if this was the active node. + active, err := c.store.GetActive() + if err != nil { + c.log.Warn("Destroy: could not read active state", "err", err) + return nil + } + if active != nil && active.Name == name { + if err := c.store.ClearActive(); err != nil { + return fmt.Errorf("clear state after destroy: %w", err) + } + } + return nil +} + // refreshIfActive re-fetches the named node from the provider and updates the // local state cache — but only when name matches the currently-recorded active // node. A provider Get failure is logged and swallowed; Start already succeeded diff --git a/internal/core/named_test.go b/internal/core/named_test.go index 02c65b8..3453bb1 100644 --- a/internal/core/named_test.go +++ b/internal/core/named_test.go @@ -9,6 +9,7 @@ import ( "github.com/iker/exit-node/internal/config" "github.com/iker/exit-node/internal/gcp" "github.com/iker/exit-node/internal/state" + "github.com/iker/exit-node/internal/tailscale" ) type namedFixtureT struct { @@ -149,3 +150,83 @@ func TestStop_RejectsEmptyName(t *testing.T) { t.Errorf("provider.Stop was called despite empty name") } } + +// --------------------------------------------------------------------------- +// Destroy tests +// --------------------------------------------------------------------------- + +func TestDestroy_HappyPath_DestroysVMAndDevice(t *testing.T) { + f := namedFixture(t, nil) + f.ts.WaitDevice = &tailscale.Device{ID: "device-1", Hostname: "vpn-us-central1-abc"} + + if err := f.c.Destroy(context.Background(), "vpn-us-central1-abc"); err != nil { + t.Fatalf("Destroy: %v", err) + } + if !hasCallWithArg(f.prov.calls, "Destroy", "vpn-us-central1-abc") { + t.Errorf("provider.Destroy not called with expected name; calls=%v", f.prov.calls) + } + if !hasCallWithArg(f.ts.calls, "DeleteDevice", "device-1") { + t.Errorf("ts.DeleteDevice not called with expected device ID; calls=%v", f.ts.calls) + } +} + +func TestDestroy_ClearsStateWhenNameMatchesActive(t *testing.T) { + active := &gcp.ExitNode{Name: "vpn-us-central1-abc", State: gcp.StateRunning} + f := namedFixture(t, active) + f.ts.WaitDevice = &tailscale.Device{ID: "device-1", Hostname: "vpn-us-central1-abc"} + + if err := f.c.Destroy(context.Background(), "vpn-us-central1-abc"); err != nil { + t.Fatalf("Destroy: %v", err) + } + got, err := f.store.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got != nil { + t.Errorf("state not cleared after destroy: %+v", got) + } +} + +func TestDestroy_LeavesStateAloneWhenNameMismatch(t *testing.T) { + active := &gcp.ExitNode{Name: "vpn-eu-west1-xyz", State: gcp.StateRunning, PublicIP: "9.9.9.9"} + f := namedFixture(t, active) + f.ts.WaitDevice = &tailscale.Device{ID: "device-1", Hostname: "vpn-us-central1-abc"} + + if err := f.c.Destroy(context.Background(), "vpn-us-central1-abc"); err != nil { + t.Fatalf("Destroy: %v", err) + } + got, err := f.store.GetActive() + if err != nil { + t.Fatalf("GetActive: %v", err) + } + if got == nil || got.Name != "vpn-eu-west1-xyz" { + t.Errorf("active state mutated unexpectedly: %+v", got) + } +} + +func TestDestroy_DeviceLookupFails_ProceedsWithVMDestroy(t *testing.T) { + f := namedFixture(t, nil) + f.ts.WaitErr = errors.New("device not found") + // WaitDevice is nil — mockTS returns (nil, WaitErr) when WaitErr is set. + + if err := f.c.Destroy(context.Background(), "vpn-us-central1-abc"); err != nil { + t.Fatalf("Destroy: %v", err) + } + if !hasCallWithArg(f.prov.calls, "Destroy", "vpn-us-central1-abc") { + t.Errorf("provider.Destroy not called with expected name; calls=%v", f.prov.calls) + } + if hasCallWithArg(f.ts.calls, "DeleteDevice", "device-1") { + t.Errorf("ts.DeleteDevice must not be called when device lookup failed; calls=%v", f.ts.calls) + } +} + +func TestDestroy_RejectsEmptyName(t *testing.T) { + f := namedFixture(t, nil) + err := f.c.Destroy(context.Background(), "") + if !errors.Is(err, ErrNameRequired) { + t.Errorf("got %v, want ErrNameRequired", err) + } + if hasCallWithArg(f.prov.calls, "Destroy", "") { + t.Errorf("provider.Destroy must not be called for empty name") + } +} From 2a4b87e4fe1db6aa41f71ed04ce938024e8fe939 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 19:55:11 +0000 Subject: [PATCH 57/79] docs(examples): annotated reference config.toml + parse test --- examples/config.toml | 76 ++++++++++++++++++++++++++++++++++ internal/config/config_test.go | 13 ++++++ 2 files changed, 89 insertions(+) create mode 100644 examples/config.toml diff --git a/examples/config.toml b/examples/config.toml new file mode 100644 index 0000000..620c183 --- /dev/null +++ b/examples/config.toml @@ -0,0 +1,76 @@ +# exitnode — reference configuration. +# +# Copy this file to ~/.config/exitnode/config.toml and edit the values +# for your environment. All env-var names are configurable here so that +# you can pick whatever naming convention your secret store uses. + +[gcp] +# GCP project ID that hosts the exit-node VMs. +project = "my-gcp-project" +# Default region used by `exitnode up` and `exitnode rotate` when no +# --region flag is given. +default_region = "us-west1" +# Default machine type. e2-micro is the cheapest x86_64; e2-small is +# the cheapest with more than 1 vCPU. +default_machine_type = "e2-micro" +# Default zone within the region. Empty → exit-node picks a random +# UP zone in the region at provision time. +default_zone = "" +# VPC network name. "default" is the auto-created VPC every GCP +# project starts with. +network = "default" +# Boot disk size in GiB. The image is debian-12; 10 GiB is comfortably +# above the image's actual footprint. +disk_size_gb = 10 + +[tailscale] +# Your tailnet name — visible at the top of the Tailscale admin console. +tailnet = "example.com" +# Env-var names holding the OAuth client-credentials. Both are required. +# Create the OAuth client at https://login.tailscale.com/admin/settings/oauth +# with scopes "auth_keys" (write) and "devices:core" (write). +oauth_client_id_env = "TAILSCALE_OAUTH_CLIENT_ID" +oauth_client_secret_env = "TAILSCALE_OAUTH_CLIENT_SECRET" +# Tags applied to every provisioned device. Must be defined in your +# tailnet's ACL `tagOwners`. +tags = ["tag:exit-node"] +# How long the ephemeral auth key minted for each new VM remains valid +# before Tailscale rejects it. 5m is generous — the VM consumes the key +# on first boot, usually within 60 seconds. +ephemeral_key_ttl = "5m" + +[pfsense] +# pfSense webConfigurator host or IP. No trailing slash. +host = "https://pfsense.lan" +# Env-var name holding the pfsense-api plugin API key. +api_key_env = "PFSENSE_API_KEY" +# Name of the gateway you've configured in System → Routing → Gateways +# that points at the Tailscale interface. exitnode patches its +# monitor-IP to follow the active exit node. +gateway_name = "TAILSCALE_VPN_GW" +# Set false to skip TLS verification (only safe for the default +# self-signed cert on a trusted LAN). +verify_tls = true + +[behavior] +# When true, every `exitnode up` and `exitnode rotate` runs the +# pfsense sync step automatically. Set false if you want to drive +# pfsense sync manually via `exitnode pfsense sync`. +auto_sync_pfsense = true +# Pre-cutover probe — verifies egress through the candidate node +# from your orchestrator host BEFORE swapping pfSense over. Strongly +# recommended. +verify_pre_cutover = true +# Post-cutover probe — re-verifies egress through the LAN gateway +# AFTER pfSense is swapped. Default off because it requires the +# orchestrator host to use pfSense as its default route. +verify_post_cutover = false +# Public probe URL. Anything that returns the caller's IP as plain +# text works (api.ipify.org, ifconfig.me, etc.). +probe_url = "https://api.ipify.org" +# How long exitnode will wait for the new VM's Tailscale device to +# appear in the API after provision. +registration_timeout = "90s" +# Public URL of scripts/install.sh — fetched by the VM startup script. +# Point at your fork's main branch (or a tagged release for stability). +install_script_url = "https://raw.githubusercontent.com/USER/exit-node/main/scripts/install.sh" diff --git a/internal/config/config_test.go b/internal/config/config_test.go index e03d4cd..bef8ee8 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -159,3 +159,16 @@ func TestResolveGCPCredentialsFallsBackToADC(t *testing.T) { t.Errorf("ADC payload should be empty (resolved later by google sdk)") } } + +func TestExampleConfigParses(t *testing.T) { + cfg, err := Load("../../examples/config.toml") + if err != nil { + t.Fatalf("Load examples/config.toml: %v", err) + } + if cfg.GCP.Project == "" || cfg.Tailscale.Tailnet == "" || cfg.PFSense.Host == "" { + t.Errorf("example config has empty top-level values: %+v", cfg) + } + if cfg.Tailscale.EphemeralKeyTTL == 0 { + t.Errorf("example config did not parse the duration: %+v", cfg.Tailscale) + } +} From 476facd6e34721b389ee229d2b83af04e94ed9ef Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:01:12 +0000 Subject: [PATCH 58/79] feat(cli): root command + global flags + buildCore helper --- cmd/exitnode/main.go | 13 ++++ cmd/exitnode/render.go | 19 +++++ cmd/exitnode/root.go | 156 ++++++++++++++++++++++++++++++++++++++ cmd/exitnode/root_test.go | 22 ++++++ go.mod | 3 + go.sum | 10 +++ 6 files changed, 223 insertions(+) create mode 100644 cmd/exitnode/main.go create mode 100644 cmd/exitnode/render.go create mode 100644 cmd/exitnode/root.go create mode 100644 cmd/exitnode/root_test.go diff --git a/cmd/exitnode/main.go b/cmd/exitnode/main.go new file mode 100644 index 0000000..7870c99 --- /dev/null +++ b/cmd/exitnode/main.go @@ -0,0 +1,13 @@ +package main + +import ( + "fmt" + "os" +) + +func main() { + if err := newRootCmd().Execute(); err != nil { + fmt.Fprintln(os.Stderr, "exitnode:", err) + os.Exit(1) + } +} diff --git a/cmd/exitnode/render.go b/cmd/exitnode/render.go new file mode 100644 index 0000000..5ee716a --- /dev/null +++ b/cmd/exitnode/render.go @@ -0,0 +1,19 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" +) + +// renderJSON marshals v to w as indented JSON. +func renderJSON(w io.Writer, v any) error { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(v) +} + +// renderKV prints a key=value pair line. +func renderKV(w io.Writer, key string, val any) { + fmt.Fprintf(w, "%-14s %v\n", key+":", val) +} diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go new file mode 100644 index 0000000..7853403 --- /dev/null +++ b/cmd/exitnode/root.go @@ -0,0 +1,156 @@ +package main + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "path/filepath" + + "github.com/spf13/cobra" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/core" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/pfsense" + "github.com/iker/exit-node/internal/state" + "github.com/iker/exit-node/internal/tailscale" + "github.com/iker/exit-node/internal/verify" +) + +// rootOpts collects the global flags. One instance per process; subcommands +// read from it via closure. +type rootOpts struct { + configPath string + json bool + verbose bool +} + +func newRootCmd() *cobra.Command { + opts := &rootOpts{} + + cmd := &cobra.Command{ + Use: "exitnode", + Short: "On-demand Tailscale exit nodes that rotate across cloud regions", + SilenceUsage: true, + SilenceErrors: true, + } + + cmd.PersistentFlags().StringVar(&opts.configPath, "config", defaultConfigPath(), "Path to config.toml") + cmd.PersistentFlags().BoolVar(&opts.json, "json", false, "Machine-readable JSON output") + cmd.PersistentFlags().BoolVarP(&opts.verbose, "verbose", "v", false, "Verbose (debug) logging") + + cmd.AddCommand( + newUpCmd(opts), + newDownCmd(opts), + newRotateCmd(opts), + newListCmd(opts), + newStatusCmd(opts), + newHealthCmd(opts), + newPFSenseCmd(opts), + newCostCmd(opts), + ) + return cmd +} + +func defaultConfigPath() string { + if xdg := os.Getenv("XDG_CONFIG_HOME"); xdg != "" { + return filepath.Join(xdg, "exitnode", "config.toml") + } + home, _ := os.UserHomeDir() + return filepath.Join(home, ".config", "exitnode", "config.toml") +} + +func defaultStatePath() string { + if xdg := os.Getenv("XDG_CONFIG_HOME"); xdg != "" { + return filepath.Join(xdg, "exitnode", "state.json") + } + home, _ := os.UserHomeDir() + return filepath.Join(home, ".config", "exitnode", "state.json") +} + +// Subcommand stubs (real impls in subsequent tasks). Kept visible (not +// Hidden) so the help-output smoke test in Step 7 passes even before +// the real impls land. +func newUpCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "up"} } +func newDownCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "down"} } +func newRotateCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "rotate"} } +func newListCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "list"} } +func newStatusCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "status"} } +func newHealthCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "health"} } +func newPFSenseCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "pfsense"} } +func newCostCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "cost"} } + +// buildCore loads config + secrets, opens the state store, and constructs +// a *core.Core with real client implementations wired in. Returned cleanup +// must always be called (releases the state lock). +func (o *rootOpts) buildCore(ctx context.Context) (*core.Core, func(), error) { + cfg, err := config.Load(o.configPath) + if err != nil { + return nil, func() {}, fmt.Errorf("load config: %w", err) + } + + tsID, tsSecret, err := cfg.ResolveTailscaleSecrets() + if err != nil { + return nil, func() {}, err + } + pfKey, err := cfg.ResolvePFSenseAPIKey() + if err != nil { + return nil, func() {}, err + } + _, gcpJSON, err := cfg.ResolveGCPCredentials() + if err != nil { + return nil, func() {}, err + } + + provider, err := gcp.New(ctx, gcp.Options{ + Project: cfg.GCP.Project, + Region: cfg.GCP.DefaultRegion, + CredentialsJSON: gcpJSON, + Network: cfg.GCP.Network, + InstallScriptURL: cfg.Behavior.InstallScriptURL, + }) + if err != nil { + return nil, func() {}, fmt.Errorf("gcp client: %w", err) + } + + ts, err := tailscale.New(tailscale.Options{ + Tailnet: cfg.Tailscale.Tailnet, + ClientID: tsID, + ClientSecret: tsSecret, + }) + if err != nil { + return nil, func() {}, fmt.Errorf("tailscale client: %w", err) + } + + pf, err := pfsense.New(pfsense.Options{ + BaseURL: cfg.PFSense.Host, + APIKey: pfKey, + VerifyTLS: cfg.PFSense.VerifyTLS, + }) + if err != nil { + return nil, func() {}, fmt.Errorf("pfsense client: %w", err) + } + + probe := verify.New(cfg.Behavior.ProbeURL) + + store, err := state.Open(defaultStatePath()) + if err != nil { + if errors.Is(err, state.ErrLocked) { + return nil, func() {}, fmt.Errorf("another exitnode process is running (state.json locked)") + } + return nil, func() {}, fmt.Errorf("open state: %w", err) + } + + level := slog.LevelInfo + if o.verbose { + level = slog.LevelDebug + } + log := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: level})) + + c := core.New(core.Deps{ + Config: cfg, Provider: provider, TS: ts, PF: pf, Probe: probe, Store: store, Logger: log, + }) + return c, func() { _ = store.Close() }, nil +} diff --git a/cmd/exitnode/root_test.go b/cmd/exitnode/root_test.go new file mode 100644 index 0000000..2f8d37b --- /dev/null +++ b/cmd/exitnode/root_test.go @@ -0,0 +1,22 @@ +package main + +import ( + "bytes" + "strings" + "testing" +) + +func TestRootCmd_Help_ListsAllSubcommands(t *testing.T) { + cmd := newRootCmd() + buf := &bytes.Buffer{} + cmd.SetOut(buf) + cmd.SetArgs([]string{"--help"}) + if err := cmd.Execute(); err != nil { + t.Fatalf("execute: %v", err) + } + for _, sub := range []string{"up", "down", "rotate", "list", "status", "health", "pfsense", "cost"} { + if !strings.Contains(buf.String(), sub) { + t.Errorf("help output missing subcommand %q\nfull output:\n%s", sub, buf.String()) + } + } +} diff --git a/go.mod b/go.mod index 18794b7..05ceeff 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,9 @@ require ( github.com/google/s2a-go v0.1.9 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.15 // indirect github.com/googleapis/gax-go/v2 v2.22.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/cobra v1.10.2 // indirect + github.com/spf13/pflag v1.0.9 // indirect github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect diff --git a/go.sum b/go.sum index d013c28..aa5e495 100644 --- a/go.sum +++ b/go.sum @@ -12,6 +12,7 @@ github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -29,8 +30,15 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.15 h1:xolVQTEXusUcAA5Ugt github.com/googleapis/enterprise-certificate-proxy v0.3.15/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= github.com/googleapis/gax-go/v2 v2.22.0 h1:PjIWBpgGIVKGoCXuiCoP64altEJCj3/Ei+kSU5vlZD4= github.com/googleapis/gax-go/v2 v2.22.0/go.mod h1:irWBbALSr0Sk3qlqb9SyJ1h68WjgeFuiOzI4Rqw5+aY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= +github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= +github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 h1:erxeiTyq+nw4Cz5+hLDkOwNF5/9IQWCQPv0gpb3+QHU= @@ -45,6 +53,7 @@ go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWv go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= @@ -69,6 +78,7 @@ google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= tailscale.com/client/tailscale/v2 v2.9.0 h1:zBZIIeIYXL42qvvile7d29O2DKSr3AfNc2gzd1JCf2o= From 8119a7041fffac2bc15d142ccade6c1eb6e57083 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:02:43 +0000 Subject: [PATCH 59/79] feat(cli): exitnode up (idempotent provision) --- cmd/exitnode/root.go | 1 - cmd/exitnode/up.go | 48 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 cmd/exitnode/up.go diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index 7853403..7b58552 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -73,7 +73,6 @@ func defaultStatePath() string { // Subcommand stubs (real impls in subsequent tasks). Kept visible (not // Hidden) so the help-output smoke test in Step 7 passes even before // the real impls land. -func newUpCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "up"} } func newDownCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "down"} } func newRotateCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "rotate"} } func newListCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "list"} } diff --git a/cmd/exitnode/up.go b/cmd/exitnode/up.go new file mode 100644 index 0000000..cfbce10 --- /dev/null +++ b/cmd/exitnode/up.go @@ -0,0 +1,48 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/spf13/cobra" + + "github.com/iker/exit-node/internal/core" +) + +func newUpCmd(opts *rootOpts) *cobra.Command { + var ( + region string + machineType string + ) + cmd := &cobra.Command{ + Use: "up", + Short: "Provision an exit node (idempotent — returns the active one if it exists)", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + c, cleanup, err := opts.buildCore(ctx) + if err != nil { + return err + } + defer cleanup() + + node, err := c.Up(ctx, core.UpOpts{Region: region, MachineType: machineType}) + if err != nil { + return fmt.Errorf("up: %w", err) + } + return renderNode(os.Stdout, opts.json, node) + }, + } + cmd.Flags().StringVar(®ion, "region", "", "GCP region (overrides config default)") + cmd.Flags().StringVar(&machineType, "machine", "", "GCP machine type (overrides config default)") + return cmd +} + +func renderNode(w *os.File, asJSON bool, n any) error { + if asJSON { + return renderJSON(w, n) + } + // Type-assert to gcp.ExitNode is fine because callers pass that type. + fmt.Fprintln(w, n) + return nil +} From e714ce91e5fa2c94a256ea3750d31fd9e2c1114e Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:02:59 +0000 Subject: [PATCH 60/79] feat(cli): exitnode down [--destroy] --- cmd/exitnode/down.go | 32 ++++++++++++++++++++++++++++++++ cmd/exitnode/root.go | 1 - 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 cmd/exitnode/down.go diff --git a/cmd/exitnode/down.go b/cmd/exitnode/down.go new file mode 100644 index 0000000..89567ed --- /dev/null +++ b/cmd/exitnode/down.go @@ -0,0 +1,32 @@ +package main + +import ( + "context" + "fmt" + + "github.com/spf13/cobra" + + "github.com/iker/exit-node/internal/core" +) + +func newDownCmd(opts *rootOpts) *cobra.Command { + var destroy bool + cmd := &cobra.Command{ + Use: "down", + Short: "Stop (or with --destroy, terminate) the currently active exit node", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + c, cleanup, err := opts.buildCore(ctx) + if err != nil { + return err + } + defer cleanup() + if err := c.Down(ctx, core.DownOpts{Destroy: destroy}); err != nil { + return fmt.Errorf("down: %w", err) + } + return nil + }, + } + cmd.Flags().BoolVar(&destroy, "destroy", false, "Destroy the VM + tailscale device (default: stop only)") + return cmd +} diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index 7b58552..d08cd0a 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -73,7 +73,6 @@ func defaultStatePath() string { // Subcommand stubs (real impls in subsequent tasks). Kept visible (not // Hidden) so the help-output smoke test in Step 7 passes even before // the real impls land. -func newDownCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "down"} } func newRotateCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "rotate"} } func newListCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "list"} } func newStatusCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "status"} } From 882cd1bd8d364cb44da36892a1dda0dba57b586b Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:03:16 +0000 Subject: [PATCH 61/79] feat(cli): exitnode rotate [--region] --- cmd/exitnode/root.go | 1 - cmd/exitnode/rotate.go | 45 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 cmd/exitnode/rotate.go diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index d08cd0a..ffd0a57 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -73,7 +73,6 @@ func defaultStatePath() string { // Subcommand stubs (real impls in subsequent tasks). Kept visible (not // Hidden) so the help-output smoke test in Step 7 passes even before // the real impls land. -func newRotateCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "rotate"} } func newListCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "list"} } func newStatusCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "status"} } func newHealthCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "health"} } diff --git a/cmd/exitnode/rotate.go b/cmd/exitnode/rotate.go new file mode 100644 index 0000000..e3ae932 --- /dev/null +++ b/cmd/exitnode/rotate.go @@ -0,0 +1,45 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/spf13/cobra" + + "github.com/iker/exit-node/internal/core" +) + +func newRotateCmd(opts *rootOpts) *cobra.Command { + var region string + cmd := &cobra.Command{ + Use: "rotate", + Short: "Provision a new exit node, cut pfSense over, destroy the old one", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + c, cleanup, err := opts.buildCore(ctx) + if err != nil { + return err + } + defer cleanup() + + res, err := c.Rotate(ctx, core.RotateOpts{Region: region}) + if err != nil { + return fmt.Errorf("rotate: %w", err) + } + if opts.json { + return renderJSON(os.Stdout, res) + } + fmt.Fprintln(os.Stdout, "rotated:") + if res.Old != nil { + renderKV(os.Stdout, "old", res.Old.Name) + } + if res.New != nil { + renderKV(os.Stdout, "new", res.New.Name) + } + return nil + }, + } + cmd.Flags().StringVar(®ion, "region", "", "GCP region for the new node (overrides config default)") + return cmd +} From 8d401ffb38a96c07f14218da266e9b81acacbb2a Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:03:32 +0000 Subject: [PATCH 62/79] feat(cli): exitnode list [--json] --- cmd/exitnode/list.go | 41 +++++++++++++++++++++++++++++++++++++++++ cmd/exitnode/root.go | 1 - 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 cmd/exitnode/list.go diff --git a/cmd/exitnode/list.go b/cmd/exitnode/list.go new file mode 100644 index 0000000..3274964 --- /dev/null +++ b/cmd/exitnode/list.go @@ -0,0 +1,41 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/spf13/cobra" +) + +func newListCmd(opts *rootOpts) *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all exitnode-managed VMs in the GCP project", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + c, cleanup, err := opts.buildCore(ctx) + if err != nil { + return err + } + defer cleanup() + + nodes, err := c.List(ctx) + if err != nil { + return fmt.Errorf("list: %w", err) + } + if opts.json { + return renderJSON(os.Stdout, nodes) + } + if len(nodes) == 0 { + fmt.Fprintln(os.Stdout, "(no exitnode-managed VMs found)") + return nil + } + fmt.Fprintf(os.Stdout, "%-32s %-14s %-12s %s\n", "NAME", "REGION", "STATE", "PUBLIC_IP") + for _, n := range nodes { + fmt.Fprintf(os.Stdout, "%-32s %-14s %-12v %s\n", n.Name, n.Region, n.State, n.PublicIP) + } + return nil + }, + } +} diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index ffd0a57..1043685 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -73,7 +73,6 @@ func defaultStatePath() string { // Subcommand stubs (real impls in subsequent tasks). Kept visible (not // Hidden) so the help-output smoke test in Step 7 passes even before // the real impls land. -func newListCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "list"} } func newStatusCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "status"} } func newHealthCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "health"} } func newPFSenseCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "pfsense"} } From a4405bc0ab6231517e390b97aefae72be8b9875e Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:04:58 +0000 Subject: [PATCH 63/79] feat(cli): exitnode status [--json] --- cmd/exitnode/root.go | 3 +-- cmd/exitnode/status.go | 46 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 cmd/exitnode/status.go diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index 1043685..9e3ca9c 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -73,8 +73,7 @@ func defaultStatePath() string { // Subcommand stubs (real impls in subsequent tasks). Kept visible (not // Hidden) so the help-output smoke test in Step 7 passes even before // the real impls land. -func newStatusCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "status"} } -func newHealthCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "health"} } +func newHealthCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "health"} } func newPFSenseCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "pfsense"} } func newCostCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "cost"} } diff --git a/cmd/exitnode/status.go b/cmd/exitnode/status.go new file mode 100644 index 0000000..b0ff9e8 --- /dev/null +++ b/cmd/exitnode/status.go @@ -0,0 +1,46 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/spf13/cobra" +) + +func newStatusCmd(opts *rootOpts) *cobra.Command { + return &cobra.Command{ + Use: "status", + Short: "Show the currently active exit node and any state drift", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + c, cleanup, err := opts.buildCore(ctx) + if err != nil { + return err + } + defer cleanup() + + s, err := c.Status(ctx) + if err != nil { + return fmt.Errorf("status: %w", err) + } + if opts.json { + return renderJSON(os.Stdout, s) + } + if s.Cached == nil { + fmt.Fprintln(os.Stdout, "no active exit node") + return nil + } + renderKV(os.Stdout, "name", s.Cached.Name) + renderKV(os.Stdout, "region", s.Cached.Region) + if s.Live != nil { + renderKV(os.Stdout, "live_state", s.Live.State) + renderKV(os.Stdout, "public_ip", s.Live.PublicIP) + } + if s.Drift() { + fmt.Fprintln(os.Stdout, "(state drift detected — run `exitnode list` to inspect)") + } + return nil + }, + } +} From 2137acdcdc517508df5cf320869991a266673fde Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:05:15 +0000 Subject: [PATCH 64/79] feat(cli): exitnode health (exits 2 on probe fail) --- cmd/exitnode/health.go | 42 ++++++++++++++++++++++++++++++++++++++++++ cmd/exitnode/root.go | 1 - 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 cmd/exitnode/health.go diff --git a/cmd/exitnode/health.go b/cmd/exitnode/health.go new file mode 100644 index 0000000..06c7bb7 --- /dev/null +++ b/cmd/exitnode/health.go @@ -0,0 +1,42 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/spf13/cobra" +) + +func newHealthCmd(opts *rootOpts) *cobra.Command { + return &cobra.Command{ + Use: "health", + Short: "Probe egress through the active exit node", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + c, cleanup, err := opts.buildCore(ctx) + if err != nil { + return err + } + defer cleanup() + + h, err := c.Health(ctx) + if err != nil { + return fmt.Errorf("health: %w", err) + } + if opts.json { + return renderJSON(os.Stdout, h) + } + renderKV(os.Stdout, "ok", h.OK) + renderKV(os.Stdout, "egress_ip", h.EgressIP) + renderKV(os.Stdout, "expected_ip", h.ExpectedIP) + if h.ProbeErr != nil { + renderKV(os.Stdout, "probe_err", h.ProbeErr.Error()) + } + if !h.OK { + os.Exit(2) + } + return nil + }, + } +} diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index 9e3ca9c..906f738 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -73,7 +73,6 @@ func defaultStatePath() string { // Subcommand stubs (real impls in subsequent tasks). Kept visible (not // Hidden) so the help-output smoke test in Step 7 passes even before // the real impls land. -func newHealthCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "health"} } func newPFSenseCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "pfsense"} } func newCostCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "cost"} } From 62d25e89802016ce48bb0c668d6497f5f9af2154 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:05:32 +0000 Subject: [PATCH 65/79] feat(cli): exitnode pfsense sync --- cmd/exitnode/pfsense.go | 32 ++++++++++++++++++++++++++++++++ cmd/exitnode/root.go | 3 +-- 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 cmd/exitnode/pfsense.go diff --git a/cmd/exitnode/pfsense.go b/cmd/exitnode/pfsense.go new file mode 100644 index 0000000..8e56817 --- /dev/null +++ b/cmd/exitnode/pfsense.go @@ -0,0 +1,32 @@ +package main + +import ( + "context" + "fmt" + + "github.com/spf13/cobra" +) + +func newPFSenseCmd(opts *rootOpts) *cobra.Command { + cmd := &cobra.Command{ + Use: "pfsense", + Short: "pfSense gateway operations", + } + cmd.AddCommand(&cobra.Command{ + Use: "sync", + Short: "Push the active node's Tailscale IP to the configured pfSense gateway", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + c, cleanup, err := opts.buildCore(ctx) + if err != nil { + return err + } + defer cleanup() + if err := c.SyncPFSense(ctx); err != nil { + return fmt.Errorf("pfsense sync: %w", err) + } + return nil + }, + }) + return cmd +} diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index 906f738..de693e3 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -73,8 +73,7 @@ func defaultStatePath() string { // Subcommand stubs (real impls in subsequent tasks). Kept visible (not // Hidden) so the help-output smoke test in Step 7 passes even before // the real impls land. -func newPFSenseCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "pfsense"} } -func newCostCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "cost"} } +func newCostCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "cost"} } // buildCore loads config + secrets, opens the state store, and constructs // a *core.Core with real client implementations wired in. Returned cleanup From d79f7670b9e89e7f473765ab47ae6ff010315d8b Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:06:00 +0000 Subject: [PATCH 66/79] feat(cli): exitnode cost [--period] --- cmd/exitnode/cost.go | 70 +++++++++++++++++++++++++++++++++++++++ cmd/exitnode/cost_test.go | 28 ++++++++++++++++ cmd/exitnode/root.go | 5 --- 3 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 cmd/exitnode/cost.go create mode 100644 cmd/exitnode/cost_test.go diff --git a/cmd/exitnode/cost.go b/cmd/exitnode/cost.go new file mode 100644 index 0000000..95b455f --- /dev/null +++ b/cmd/exitnode/cost.go @@ -0,0 +1,70 @@ +package main + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/spf13/cobra" + + "github.com/iker/exit-node/internal/core" +) + +func newCostCmd(opts *rootOpts) *cobra.Command { + var period string + cmd := &cobra.Command{ + Use: "cost", + Short: "Estimate spend for the active exit node", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + c, cleanup, err := opts.buildCore(ctx) + if err != nil { + return err + } + defer cleanup() + + var d time.Duration + if period != "" { + p, err := parsePeriod(period) + if err != nil { + return err + } + d = p + } + + r, err := c.EstimateCost(ctx, core.CostOpts{Period: d}) + if err != nil { + return fmt.Errorf("cost: %w", err) + } + if opts.json { + return renderJSON(os.Stdout, r) + } + renderKV(os.Stdout, "machine_type", r.MachineType) + renderKV(os.Stdout, "hours", fmt.Sprintf("%.2f", r.Hours)) + renderKV(os.Stdout, "usd_per_hour", fmt.Sprintf("%.4f", r.USDPerHour)) + renderKV(os.Stdout, "usd", fmt.Sprintf("%.2f", r.USD)) + if r.Note != "" { + renderKV(os.Stdout, "note", r.Note) + } + return nil + }, + } + cmd.Flags().StringVar(&period, "period", "", "Look-back window: 24h, 7d, month (default: since node creation)") + return cmd +} + +// parsePeriod accepts a few user-friendly aliases on top of Go's +// time.ParseDuration. "month" means 30 days; "Nd" means N days. +func parsePeriod(s string) (time.Duration, error) { + if s == "month" { + return 30 * 24 * time.Hour, nil + } + if len(s) > 1 && s[len(s)-1] == 'd' { + var days int + if _, err := fmt.Sscanf(s, "%dd", &days); err == nil { + return time.Duration(days) * 24 * time.Hour, nil + } + } + return time.ParseDuration(s) +} diff --git a/cmd/exitnode/cost_test.go b/cmd/exitnode/cost_test.go new file mode 100644 index 0000000..d909e27 --- /dev/null +++ b/cmd/exitnode/cost_test.go @@ -0,0 +1,28 @@ +package main + +import ( + "testing" + "time" +) + +func TestParsePeriod(t *testing.T) { + cases := []struct { + in string + want time.Duration + }{ + {"24h", 24 * time.Hour}, + {"7d", 7 * 24 * time.Hour}, + {"month", 30 * 24 * time.Hour}, + {"30m", 30 * time.Minute}, + } + for _, tc := range cases { + got, err := parsePeriod(tc.in) + if err != nil { + t.Errorf("parsePeriod(%q) err=%v", tc.in, err) + continue + } + if got != tc.want { + t.Errorf("parsePeriod(%q) = %v, want %v", tc.in, got, tc.want) + } + } +} diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index de693e3..f78b408 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -70,11 +70,6 @@ func defaultStatePath() string { return filepath.Join(home, ".config", "exitnode", "state.json") } -// Subcommand stubs (real impls in subsequent tasks). Kept visible (not -// Hidden) so the help-output smoke test in Step 7 passes even before -// the real impls land. -func newCostCmd(*rootOpts) *cobra.Command { return &cobra.Command{Use: "cost"} } - // buildCore loads config + secrets, opens the state store, and constructs // a *core.Core with real client implementations wired in. Returned cleanup // must always be called (releases the state lock). From 491d5fc20766d3b476724daa667a96ea9c7df8f6 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:09:31 +0000 Subject: [PATCH 67/79] feat(mcp): server scaffolding + buildCore helper --- cmd/exitnode-mcp/buildcore.go | 108 ++++++++++++++++++++++++++++++++++ cmd/exitnode-mcp/main.go | 28 +++++++++ go.mod | 5 ++ go.sum | 10 ++++ 4 files changed, 151 insertions(+) create mode 100644 cmd/exitnode-mcp/buildcore.go create mode 100644 cmd/exitnode-mcp/main.go diff --git a/cmd/exitnode-mcp/buildcore.go b/cmd/exitnode-mcp/buildcore.go new file mode 100644 index 0000000..eee5a83 --- /dev/null +++ b/cmd/exitnode-mcp/buildcore.go @@ -0,0 +1,108 @@ +package main + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "path/filepath" + + "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/iker/exit-node/internal/config" + "github.com/iker/exit-node/internal/core" + "github.com/iker/exit-node/internal/gcp" + "github.com/iker/exit-node/internal/pfsense" + "github.com/iker/exit-node/internal/state" + "github.com/iker/exit-node/internal/tailscale" + "github.com/iker/exit-node/internal/verify" +) + +func defaultConfigPath() string { + if xdg := os.Getenv("XDG_CONFIG_HOME"); xdg != "" { + return filepath.Join(xdg, "exitnode", "config.toml") + } + home, _ := os.UserHomeDir() + return filepath.Join(home, ".config", "exitnode", "config.toml") +} + +func defaultStatePath() string { + if xdg := os.Getenv("XDG_CONFIG_HOME"); xdg != "" { + return filepath.Join(xdg, "exitnode", "state.json") + } + home, _ := os.UserHomeDir() + return filepath.Join(home, ".config", "exitnode", "state.json") +} + +// buildCore constructs a *core.Core with all live clients wired up. Each +// MCP tool call invokes this — opening + closing the state lock per +// invocation so the CLI and MCP can interleave without deadlocking each +// other. +func buildCore(ctx context.Context) (*core.Core, func(), error) { + cfg, err := config.Load(defaultConfigPath()) + if err != nil { + return nil, func() {}, fmt.Errorf("load config: %w", err) + } + + tsID, tsSecret, err := cfg.ResolveTailscaleSecrets() + if err != nil { + return nil, func() {}, err + } + pfKey, err := cfg.ResolvePFSenseAPIKey() + if err != nil { + return nil, func() {}, err + } + _, gcpJSON, err := cfg.ResolveGCPCredentials() + if err != nil { + return nil, func() {}, err + } + + provider, err := gcp.New(ctx, gcp.Options{ + Project: cfg.GCP.Project, + Region: cfg.GCP.DefaultRegion, + CredentialsJSON: gcpJSON, + Network: cfg.GCP.Network, + InstallScriptURL: cfg.Behavior.InstallScriptURL, + }) + if err != nil { + return nil, func() {}, fmt.Errorf("gcp: %w", err) + } + + ts, err := tailscale.New(tailscale.Options{ + Tailnet: cfg.Tailscale.Tailnet, + ClientID: tsID, + ClientSecret: tsSecret, + }) + if err != nil { + return nil, func() {}, fmt.Errorf("tailscale: %w", err) + } + + pf, err := pfsense.New(pfsense.Options{ + BaseURL: cfg.PFSense.Host, + APIKey: pfKey, + VerifyTLS: cfg.PFSense.VerifyTLS, + }) + if err != nil { + return nil, func() {}, fmt.Errorf("pfsense: %w", err) + } + + probe := verify.New(cfg.Behavior.ProbeURL) + + store, err := state.Open(defaultStatePath()) + if err != nil { + if errors.Is(err, state.ErrLocked) { + return nil, func() {}, fmt.Errorf("state.json locked by another process") + } + return nil, func() {}, fmt.Errorf("state: %w", err) + } + + log := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})) + c := core.New(core.Deps{ + Config: cfg, Provider: provider, TS: ts, PF: pf, Probe: probe, Store: store, Logger: log, + }) + return c, func() { _ = store.Close() }, nil +} + +// registerTools is implemented in tools.go. +func registerTools(*mcp.Server) {} diff --git a/cmd/exitnode-mcp/main.go b/cmd/exitnode-mcp/main.go new file mode 100644 index 0000000..b255bfe --- /dev/null +++ b/cmd/exitnode-mcp/main.go @@ -0,0 +1,28 @@ +package main + +import ( + "context" + "fmt" + "os" + "os/signal" + "syscall" + + "github.com/modelcontextprotocol/go-sdk/mcp" +) + +func main() { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + + server := mcp.NewServer(&mcp.Implementation{ + Name: "exitnode-mcp", + Version: "0.1.0", + }, nil) + + registerTools(server) + + if err := server.Run(ctx, &mcp.StdioTransport{}); err != nil { + fmt.Fprintln(os.Stderr, "exitnode-mcp:", err) + os.Exit(1) + } +} diff --git a/go.mod b/go.mod index 05ceeff..4e49dbf 100644 --- a/go.mod +++ b/go.mod @@ -17,13 +17,18 @@ require ( github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/jsonschema-go v0.4.3 // indirect github.com/google/s2a-go v0.1.9 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.15 // indirect github.com/googleapis/gax-go/v2 v2.22.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/modelcontextprotocol/go-sdk v1.6.0 // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.5.4 // indirect github.com/spf13/cobra v1.10.2 // indirect github.com/spf13/pflag v1.0.9 // indirect github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect go.opentelemetry.io/otel v1.43.0 // indirect diff --git a/go.sum b/go.sum index aa5e495..5efd292 100644 --- a/go.sum +++ b/go.sum @@ -24,6 +24,8 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= +github.com/google/jsonschema-go v0.4.3 h1:/DBOLZTfDow7pe2GmaJNhltueGTtDKICi8V8p+DQPd0= +github.com/google/jsonschema-go v0.4.3/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/googleapis/enterprise-certificate-proxy v0.3.15 h1:xolVQTEXusUcAA5UgtyRLjelpFFHWlPQ4XfWGc7MBas= @@ -32,9 +34,15 @@ github.com/googleapis/gax-go/v2 v2.22.0 h1:PjIWBpgGIVKGoCXuiCoP64altEJCj3/Ei+kSU github.com/googleapis/gax-go/v2 v2.22.0/go.mod h1:irWBbALSr0Sk3qlqb9SyJ1h68WjgeFuiOzI4Rqw5+aY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/modelcontextprotocol/go-sdk v1.6.0 h1:PPLS3kn7WtOEnR+Af4X5H96SG0qSab8R/ZQT/HkhPkY= +github.com/modelcontextprotocol/go-sdk v1.6.0/go.mod h1:kzm3kzFL1/+AziGOE0nUs3gvPoNxMCvkxokMkuFapXQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.5.4 h1:OW1VRern8Nw6ITAtwSZ7Idrl3MXCFwXHPgqESYfvNt0= +github.com/segmentio/encoding v0.5.4/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= @@ -43,6 +51,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 h1:erxeiTyq+nw4Cz5+hLDkOwNF5/9IQWCQPv0gpb3+QHU= github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5/go.mod h1:DFSS3NAGHthKo1gTlmEcSBiZrRJXi28rLNd/1udP1c8= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o= From 06aec1ce5f17731e398834da4b47fb82d9fb5c41 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:13:52 +0000 Subject: [PATCH 68/79] feat(mcp): wire all 10 tools to core methods --- cmd/exitnode-mcp/buildcore.go | 4 - cmd/exitnode-mcp/tools.go | 234 +++++++++++++++++++++++++++++++++ cmd/exitnode-mcp/tools_test.go | 37 ++++++ 3 files changed, 271 insertions(+), 4 deletions(-) create mode 100644 cmd/exitnode-mcp/tools.go create mode 100644 cmd/exitnode-mcp/tools_test.go diff --git a/cmd/exitnode-mcp/buildcore.go b/cmd/exitnode-mcp/buildcore.go index eee5a83..b4b48ef 100644 --- a/cmd/exitnode-mcp/buildcore.go +++ b/cmd/exitnode-mcp/buildcore.go @@ -8,8 +8,6 @@ import ( "os" "path/filepath" - "github.com/modelcontextprotocol/go-sdk/mcp" - "github.com/iker/exit-node/internal/config" "github.com/iker/exit-node/internal/core" "github.com/iker/exit-node/internal/gcp" @@ -104,5 +102,3 @@ func buildCore(ctx context.Context) (*core.Core, func(), error) { return c, func() { _ = store.Close() }, nil } -// registerTools is implemented in tools.go. -func registerTools(*mcp.Server) {} diff --git a/cmd/exitnode-mcp/tools.go b/cmd/exitnode-mcp/tools.go new file mode 100644 index 0000000..eca4245 --- /dev/null +++ b/cmd/exitnode-mcp/tools.go @@ -0,0 +1,234 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/iker/exit-node/internal/core" +) + +// --- Typed argument structs --- + +type provisionArgs struct { + Region string `json:"region,omitempty" jsonschema:"GCP region; omit to use the config default"` + MachineType string `json:"machine_type,omitempty" jsonschema:"GCP machine type; omit to use the config default"` +} + +type nameArgs struct { + Name string `json:"name" jsonschema:"Exit-node name (matches the GCP VM name)"` +} + +type nameOptionalArgs struct { + Name string `json:"name,omitempty" jsonschema:"Exit-node name; omit to use the currently active node"` +} + +type rotateArgs struct { + Region string `json:"region,omitempty" jsonschema:"GCP region for the replacement node; omit to use the config default"` +} + +type costArgs struct { + Period string `json:"period,omitempty" jsonschema:"Look-back window: 24h, 7d, month; omit for since-creation"` +} + +// --- registerTools wires every tool --- + +func registerTools(s *mcp.Server) { + mcp.AddTool(s, &mcp.Tool{ + Name: "provision_exit_node", + Description: "Provision (or return the existing) active exit node. Idempotent.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, a provisionArgs) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + node, err := c.Up(ctx, core.UpOpts{Region: a.Region, MachineType: a.MachineType}) + return jsonResult(node, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "start_exit_node", + Description: "Start a stopped exit node by name (defaults to the currently active node).", + }, func(ctx context.Context, _ *mcp.CallToolRequest, a nameOptionalArgs) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + name, err := resolveName(c, a.Name) + if err != nil { + return nil, nil, err + } + err = c.Start(ctx, name) + return jsonResult(map[string]string{"started": name}, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "stop_exit_node", + Description: "Stop a running exit node by name (defaults to the currently active node).", + }, func(ctx context.Context, _ *mcp.CallToolRequest, a nameOptionalArgs) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + name, err := resolveName(c, a.Name) + if err != nil { + return nil, nil, err + } + err = c.Stop(ctx, name) + return jsonResult(map[string]string{"stopped": name}, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "destroy_exit_node", + Description: "Destroy an exit-node VM + its Tailscale device. Name is REQUIRED — there is no implicit 'destroy active' here.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, a nameArgs) (*mcp.CallToolResult, any, error) { + if a.Name == "" { + return nil, nil, fmt.Errorf("name is required for destroy_exit_node") + } + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + err = c.Destroy(ctx, a.Name) + return jsonResult(map[string]string{"destroyed": a.Name}, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "rotate_exit_node", + Description: "Provision a new exit node, cut pfSense over, destroy the old one.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, a rotateArgs) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + res, err := c.Rotate(ctx, core.RotateOpts{Region: a.Region}) + return jsonResult(res, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "list_exit_nodes", + Description: "List all exitnode-managed VMs in the GCP project.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, _ struct{}) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + nodes, err := c.List(ctx) + return jsonResult(nodes, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "get_status", + Description: "Get cached + live status of the active exit node, plus a drift flag.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, _ struct{}) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + st, err := c.Status(ctx) + return jsonResult(st, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "verify_connectivity", + Description: "Probe egress through the active exit node and return ok / egress_ip / expected_ip.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, _ struct{}) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + h, err := c.Health(ctx) + return jsonResult(h, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "sync_pfsense_gateway", + Description: "Push the active node's Tailscale IP to the pfSense gateway-monitor IP + apply.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, _ struct{}) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + err = c.SyncPFSense(ctx) + return jsonResult(map[string]string{"status": "ok"}, err) + }) + + mcp.AddTool(s, &mcp.Tool{ + Name: "estimate_cost", + Description: "Estimate spend for the active exit node over a look-back window.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, a costArgs) (*mcp.CallToolResult, any, error) { + c, cleanup, err := buildCore(ctx) + if err != nil { + return nil, nil, err + } + defer cleanup() + var d time.Duration + if a.Period != "" { + p, perr := parsePeriodMCP(a.Period) + if perr != nil { + return nil, nil, perr + } + d = p + } + r, err := c.EstimateCost(ctx, core.CostOpts{Period: d}) + return jsonResult(r, err) + }) +} + +// resolveName falls back to the cached active node when the caller +// passed an empty name (used by start/stop tools). +func resolveName(c *core.Core, name string) (string, error) { + if name != "" { + return name, nil + } + active, err := c.GetActive() + if err != nil { + return "", fmt.Errorf("resolve active node: %w", err) + } + if active == nil { + return "", fmt.Errorf("name omitted and no active exit node recorded") + } + return active.Name, nil +} + +// jsonResult marshals v to JSON text content if err is nil; otherwise +// returns the error to the SDK which renders it as an isError result. +func jsonResult(v any, err error) (*mcp.CallToolResult, any, error) { + if err != nil { + return nil, nil, err + } + b, mErr := json.MarshalIndent(v, "", " ") + if mErr != nil { + return nil, nil, fmt.Errorf("marshal result: %w", mErr) + } + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: string(b)}}, + }, v, nil +} + +// parsePeriodMCP parses a human look-back window string (24h, 7d, month) +// into a time.Duration. Same semantics as the CLI's parsePeriod helper. +func parsePeriodMCP(s string) (time.Duration, error) { + if s == "month" { + return 30 * 24 * time.Hour, nil + } + if len(s) > 1 && s[len(s)-1] == 'd' { + var days int + if _, err := fmt.Sscanf(s, "%dd", &days); err == nil { + return time.Duration(days) * 24 * time.Hour, nil + } + } + return time.ParseDuration(s) +} diff --git a/cmd/exitnode-mcp/tools_test.go b/cmd/exitnode-mcp/tools_test.go new file mode 100644 index 0000000..076b74a --- /dev/null +++ b/cmd/exitnode-mcp/tools_test.go @@ -0,0 +1,37 @@ +package main + +import ( + "testing" + + "github.com/modelcontextprotocol/go-sdk/mcp" +) + +// TestRegisterTools_RegistersAllTen verifies that calling registerTools does +// not panic and registers exactly the 10 expected tool names. +// The MCP SDK (v1.6.0) does not expose a public listing API on *mcp.Server, so +// this is a no-panic smoke test. Protocol-level tool enumeration is covered by +// Task 16+ integration tests. +func TestRegisterTools_RegistersAllTen(t *testing.T) { + s := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "0"}, nil) + registerTools(s) // must not panic + + // Enumerate expected names to document the contract even though we cannot + // assert them via the SDK's public API at this point. + wantNames := []string{ + "provision_exit_node", + "start_exit_node", + "stop_exit_node", + "destroy_exit_node", + "rotate_exit_node", + "list_exit_nodes", + "get_status", + "verify_connectivity", + "sync_pfsense_gateway", + "estimate_cost", + } + if len(wantNames) != 10 { + t.Fatalf("test bug: expected 10 tools, listed %d", len(wantNames)) + } + // If a future SDK version exposes Server.ListTools() or similar, replace + // this comment with an assertion over wantNames. +} From 767c4be18c4cecade8483f82ac8208f7ee66b8d8 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:18:09 +0000 Subject: [PATCH 69/79] docs(examples): mcp.json snippet for Claude Desktop / OpenClaw --- examples/mcp.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 examples/mcp.json diff --git a/examples/mcp.json b/examples/mcp.json new file mode 100644 index 0000000..bf30dc7 --- /dev/null +++ b/examples/mcp.json @@ -0,0 +1,13 @@ +{ + "mcpServers": { + "exitnode": { + "command": "exitnode-mcp", + "args": [], + "env": { + "TAILSCALE_OAUTH_CLIENT_ID": "$TAILSCALE_OAUTH_CLIENT_ID", + "TAILSCALE_OAUTH_CLIENT_SECRET": "$TAILSCALE_OAUTH_CLIENT_SECRET", + "PFSENSE_API_KEY": "$PFSENSE_API_KEY" + } + } + } +} From a758b3c7f441b352ea91275f21be98a37848f40c Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:22:48 +0000 Subject: [PATCH 70/79] ci: add github actions workflow (vet, lint, test, cross-build matrix) --- .github/workflows/ci.yml | 63 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..5cd612b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,63 @@ +name: ci + +on: + push: + branches: [main, develop] + pull_request: + +permissions: + contents: read + +jobs: + test: + name: test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: '1.25' + check-latest: true + cache: true + - run: go vet ./... + - run: go test -race -short ./... + + lint: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: true + - uses: golangci/golangci-lint-action@v6 + with: + version: latest + args: --timeout=5m + + build-matrix: + name: build ${{ matrix.goos }}/${{ matrix.goarch }} + runs-on: ubuntu-latest + strategy: + matrix: + goos: [linux, darwin] + goarch: [amd64, arm64] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: true + - name: build exitnode + env: + GOOS: ${{ matrix.goos }} + GOARCH: ${{ matrix.goarch }} + CGO_ENABLED: '0' + run: go build -trimpath -o /dev/null ./cmd/exitnode + - name: build exitnode-mcp + env: + GOOS: ${{ matrix.goos }} + GOARCH: ${{ matrix.goarch }} + CGO_ENABLED: '0' + run: go build -trimpath -o /dev/null ./cmd/exitnode-mcp From a05eaa8cface3a203a3ce38d13d370bff5937768 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:28:03 +0000 Subject: [PATCH 71/79] ci(release): goreleaser config + release workflow + --version flag --- .github/workflows/release.yml | 26 ++++++++++++++ .goreleaser.yaml | 64 +++++++++++++++++++++++++++++++++++ cmd/exitnode-mcp/main.go | 2 ++ cmd/exitnode/main.go | 7 ++++ cmd/exitnode/root.go | 2 ++ 5 files changed, 101 insertions(+) create mode 100644 .github/workflows/release.yml create mode 100644 .goreleaser.yaml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..8dc7772 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,26 @@ +name: release + +on: + push: + tags: ['v*'] + +permissions: + contents: write + +jobs: + goreleaser: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: true + - uses: goreleaser/goreleaser-action@v6 + with: + version: latest + args: release --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.goreleaser.yaml b/.goreleaser.yaml new file mode 100644 index 0000000..0393371 --- /dev/null +++ b/.goreleaser.yaml @@ -0,0 +1,64 @@ +version: 2 + +project_name: exitnode + +before: + hooks: + - go mod download + +builds: + - id: exitnode + main: ./cmd/exitnode + binary: exitnode + env: + - CGO_ENABLED=0 + goos: [linux, darwin] + goarch: [amd64, arm64] + flags: [-trimpath] + ldflags: + - -s -w + - -X main.version={{.Version}} + - -X main.commit={{.Commit}} + - -X main.date={{.Date}} + - id: exitnode-mcp + main: ./cmd/exitnode-mcp + binary: exitnode-mcp + env: + - CGO_ENABLED=0 + goos: [linux, darwin] + goarch: [amd64, arm64] + flags: [-trimpath] + ldflags: + - -s -w + - -X main.version={{.Version}} + +archives: + - id: default + formats: [tar.gz] + name_template: >- + {{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }} + files: + - LICENSE + - README.md + - examples/config.toml + - examples/mcp.json + - scripts/install.sh + +checksum: + name_template: 'checksums.txt' + algorithm: sha256 + +changelog: + sort: asc + filters: + exclude: + - '^docs:' + - '^test:' + - '^chore:' + +release: + github: + owner: ceballosiker + name: exit-node + draft: false + prerelease: auto diff --git a/cmd/exitnode-mcp/main.go b/cmd/exitnode-mcp/main.go index b255bfe..a1f2f96 100644 --- a/cmd/exitnode-mcp/main.go +++ b/cmd/exitnode-mcp/main.go @@ -10,6 +10,8 @@ import ( "github.com/modelcontextprotocol/go-sdk/mcp" ) +var version = "dev" + func main() { ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) defer cancel() diff --git a/cmd/exitnode/main.go b/cmd/exitnode/main.go index 7870c99..b17e426 100644 --- a/cmd/exitnode/main.go +++ b/cmd/exitnode/main.go @@ -5,6 +5,13 @@ import ( "os" ) +// Populated by goreleaser via -ldflags "-X main.version=...". +var ( + version = "dev" + commit = "none" + date = "unknown" +) + func main() { if err := newRootCmd().Execute(); err != nil { fmt.Fprintln(os.Stderr, "exitnode:", err) diff --git a/cmd/exitnode/root.go b/cmd/exitnode/root.go index f78b408..ec08519 100644 --- a/cmd/exitnode/root.go +++ b/cmd/exitnode/root.go @@ -37,6 +37,8 @@ func newRootCmd() *cobra.Command { SilenceErrors: true, } + cmd.Version = fmt.Sprintf("%s (commit %s, %s)", version, commit, date) + cmd.PersistentFlags().StringVar(&opts.configPath, "config", defaultConfigPath(), "Path to config.toml") cmd.PersistentFlags().BoolVar(&opts.json, "json", false, "Machine-readable JSON output") cmd.PersistentFlags().BoolVarP(&opts.verbose, "verbose", "v", false, "Verbose (debug) logging") From 00684362b82e8580f37b9b1e400089440c3f4743 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:28:36 +0000 Subject: [PATCH 72/79] polish(mcp): wire version var into Implementation.Version --- cmd/exitnode-mcp/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/exitnode-mcp/main.go b/cmd/exitnode-mcp/main.go index a1f2f96..323596d 100644 --- a/cmd/exitnode-mcp/main.go +++ b/cmd/exitnode-mcp/main.go @@ -18,7 +18,7 @@ func main() { server := mcp.NewServer(&mcp.Implementation{ Name: "exitnode-mcp", - Version: "0.1.0", + Version: version, }, nil) registerTools(server) From bdd7aa267394ab44d2735685599cf8729090ad47 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:29:57 +0000 Subject: [PATCH 73/79] docs(readme): prereqs/install/configure/mcp/troubleshooting sections --- README.md | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 76490f4..e417d60 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,10 @@ On-demand Tailscale exit nodes that rotate across cloud regions and keep your pfSense gateway in sync. Driven from a CLI for humans and an MCP server for AI agents. -> **Status**: pre-alpha. The library packages (provisioning, rotate -> orchestrator, client adapters, probes) are in place. The `exitnode` and -> `exitnode-mcp` binaries are not yet built — see [Roadmap](#roadmap). +> **Status**: pre-alpha (v0.1). All library packages, both binaries +> (`exitnode`, `exitnode-mcp`), CI, and release packaging are in +> place. The first tagged release will produce binaries for +> linux/darwin × amd64/arm64. ## Why @@ -82,6 +83,67 @@ exit-node/ └── README.md ``` +## Prerequisites + +- **GCP project** with the Compute Engine API enabled and a + service account with `compute.instances.{create,start,stop,delete}`, + `compute.zones.list`, and `compute.images.useReadOnly` on + `projects/debian-cloud`. Either authenticate via Application + Default Credentials (`gcloud auth application-default login`) or + export the SA JSON in `GCP_CREDENTIALS_JSON`. +- **Tailscale tailnet** with an OAuth client (scopes: + `auth_keys` write, `devices:core` write) and ACL `tagOwners` + entries for whatever tag(s) you list in `tailscale.tags`. The + tailnet must also have `autoApprovers.exitNode` set to your + exit-node tag if you want approval to happen without manual + admin intervention. +- **pfSense** with the + [community pfsense-api plugin](https://github.com/jaredhendrickson13/pfsense-api) + installed and an API key minted. The plugin must have the + `Routing` permission group enabled for the key. +- **`tailscale` CLI** installed on the host that runs `exitnode` + (used by the pre-cutover egress probe). If unavailable, the + probe is skipped with a warning — see Troubleshooting. + +## Install + +Install both binaries via `go install`: + +```bash +go install github.com/iker/exit-node/cmd/exitnode@latest +go install github.com/iker/exit-node/cmd/exitnode-mcp@latest +``` + +Or download a release archive from +[Releases](https://github.com/ceballosiker/exit-node/releases) and drop +`exitnode` + `exitnode-mcp` into your `$PATH`. + +## Configure + +Copy [`examples/config.toml`](examples/config.toml) to +`~/.config/exitnode/config.toml` and edit the values for your +environment. Every field is annotated in the example. + +Export the secret env-vars referenced by your config: + +```bash +export TAILSCALE_OAUTH_CLIENT_ID=... +export TAILSCALE_OAUTH_CLIENT_SECRET=... +export PFSENSE_API_KEY=... +# Optional — use a service-account JSON instead of ADC: +export GCP_CREDENTIALS_JSON="$(cat path/to/sa.json)" +``` + +Run `exitnode --help` to see the full command list. The most common flow: + +```bash +exitnode up # provision (idempotent) +exitnode status # see what's active +exitnode health # probe egress +exitnode rotate --region us-east1 # cut over to a new region +exitnode down --destroy # tear it all down +``` + ## Development Requirements: Go 1.25+. @@ -110,6 +172,52 @@ The integration test against real GCP additionally honours: | `EXITNODE_TEST_REGION` | `us-central1` | Region for the smoke test | | `GCP_CREDENTIALS_JSON` | _unset_ | Service-account JSON; otherwise ADC is used | +## Using exitnode-mcp + +`exitnode-mcp` is a stdio MCP server that exposes the same operations to +AI agents (Claude Desktop, OpenClaw, any MCP host). Copy +[`examples/mcp.json`](examples/mcp.json) into your client's MCP +configuration. The binary must be on `$PATH`. + +Available tools: `provision_exit_node`, `start_exit_node`, +`stop_exit_node`, `destroy_exit_node`, `rotate_exit_node`, +`list_exit_nodes`, `get_status`, `verify_connectivity`, +`sync_pfsense_gateway`, `estimate_cost`. `destroy_exit_node` requires +an explicit `name` argument — there is no "destroy active" shortcut +at the MCP layer, deliberately. + +## Troubleshooting + +**`tailscale OAuth env vars ... are unset`** +The env-var names declared in `[tailscale].oauth_client_id_env` and +`oauth_client_secret_env` are not set in the calling shell. Mint a new +OAuth client at `https://login.tailscale.com/admin/settings/oauth` and +export the two values. Check by running +`echo "$TAILSCALE_OAUTH_CLIENT_ID"` (or whatever name your config uses). + +**pfSense revert failed — both nodes alive** +This is the loud-error case from the rotate state machine: after a +`pfSense Apply` failure, the orchestrator tried to revert to the old +gateway IP and that revert also failed. Both VMs remain running so your +home network keeps its VPN. Recover by manually setting the pfSense +gateway monitor IP to a known-good Tailscale IP from +`exitnode list`, then `exitnode down --destroy` to clean up. File an +issue with the orchestrator log so we can harden this path. + +**`exitnode up` says "no `tailscale` CLI on host"** +The pre-cutover egress probe shells out to `tailscale`. Install the +client (`curl -fsSL https://tailscale.com/install.sh | sh` on Linux, +`brew install tailscale` on macOS) and re-run. The probe is skipped +silently if the CLI is absent — it does not block provisioning, but +you lose verification. + +**"another exitnode process is running (state.json locked)"** +The state file's POSIX flock is held by another `exitnode` or +`exitnode-mcp` invocation. Most often this means a previous run +hung. Confirm with `lsof ~/.config/exitnode/state.json.lock`; if +nothing holds it, the lock file is stale and safe to delete with +`rm ~/.config/exitnode/state.json.lock`. + ## Roadmap Work is staged in three sequential plans: @@ -122,7 +230,7 @@ Work is staged in three sequential plans: `internal/tailscale`, `internal/pfsense`, `internal/verify`, and `scripts/install.sh`. ✅ Complete. - **Plan 3 — Binaries & distribution.** `cmd/exitnode`, `cmd/exitnode-mcp`, - examples, CI workflows, and goreleaser packaging. ⏳ In progress. + examples, CI workflows, and goreleaser packaging. ✅ Complete. ## License From 0db191eed2bd7611cf2174196a49607d08b4b3ff Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:30:45 +0000 Subject: [PATCH 74/79] chore: go mod tidy (Plan 3 closes out staged indirects) --- go.mod | 10 +++++----- go.sum | 20 ++++++++++++++++++-- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/go.mod b/go.mod index 4e49dbf..4c111e1 100644 --- a/go.mod +++ b/go.mod @@ -3,15 +3,19 @@ module github.com/iker/exit-node go 1.25.0 require ( + cloud.google.com/go/compute v1.62.0 github.com/BurntSushi/toml v1.6.0 github.com/gofrs/flock v0.13.0 + github.com/modelcontextprotocol/go-sdk v1.6.0 + github.com/spf13/cobra v1.10.2 + google.golang.org/api v0.278.0 + google.golang.org/protobuf v1.36.11 tailscale.com/client/tailscale/v2 v2.9.0 ) require ( cloud.google.com/go/auth v0.20.0 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect - cloud.google.com/go/compute v1.62.0 // indirect cloud.google.com/go/compute/metadata v0.9.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -22,10 +26,8 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.15 // indirect github.com/googleapis/gax-go/v2 v2.22.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/modelcontextprotocol/go-sdk v1.6.0 // indirect github.com/segmentio/asm v1.1.3 // indirect github.com/segmentio/encoding v0.5.4 // indirect - github.com/spf13/cobra v1.10.2 // indirect github.com/spf13/pflag v1.0.9 // indirect github.com/tailscale/hujson v0.0.0-20220506213045-af5ed07155e5 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect @@ -39,10 +41,8 @@ require ( golang.org/x/oauth2 v0.36.0 // indirect golang.org/x/sys v0.43.0 // indirect golang.org/x/text v0.36.0 // indirect - google.golang.org/api v0.278.0 // indirect google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260427160629-7cedc36a6bc4 // indirect google.golang.org/grpc v1.80.0 // indirect - google.golang.org/protobuf v1.36.11 // indirect ) diff --git a/go.sum b/go.sum index 5efd292..4ef83ed 100644 --- a/go.sum +++ b/go.sum @@ -24,10 +24,18 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= +github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= +github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/jsonschema-go v0.4.3 h1:/DBOLZTfDow7pe2GmaJNhltueGTtDKICi8V8p+DQPd0= github.com/google/jsonschema-go v0.4.3/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.15 h1:xolVQTEXusUcAA5UgtyRLjelpFFHWlPQ4XfWGc7MBas= github.com/googleapis/enterprise-certificate-proxy v0.3.15/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= github.com/googleapis/gax-go/v2 v2.22.0 h1:PjIWBpgGIVKGoCXuiCoP64altEJCj3/Ei+kSU5vlZD4= @@ -61,6 +69,10 @@ go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= @@ -68,14 +80,18 @@ golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs= -golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= -golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= +golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s= +golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= google.golang.org/api v0.278.0 h1:W7jiRvRi53VYFfZ/HoZjQBtJk7gOFbHD8ot1RzVZU6E= google.golang.org/api v0.278.0/go.mod h1:B9TqLBwJqVjp1mtt7WeoQwWRwvu/400y5lETOql+giQ= google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 h1:XzmzkmB14QhVhgnawEVsOn6OFsnpyxNPRY9QV01dNB0= From 9244a9f1c7398e64910e20a8b8f8e45ceb89e645 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:41:09 +0000 Subject: [PATCH 75/79] docs(readme): drop 3-plan roadmap; describe v0.1 surface + deferred items --- README.md | 63 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index e417d60..3495a3d 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,12 @@ [![Go Version](https://img.shields.io/badge/go-1.25-00ADD8?logo=go)](go.mod) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) -[![Status: Pre-alpha](https://img.shields.io/badge/status-pre--alpha-orange)](#status) +[![CI](https://github.com/ceballosiker/exit-node/actions/workflows/ci.yml/badge.svg)](https://github.com/ceballosiker/exit-node/actions/workflows/ci.yml) On-demand Tailscale exit nodes that rotate across cloud regions and keep your pfSense gateway in sync. Driven from a CLI for humans and an MCP server for AI agents. -> **Status**: pre-alpha (v0.1). All library packages, both binaries -> (`exitnode`, `exitnode-mcp`), CI, and release packaging are in -> place. The first tagged release will produce binaries for -> linux/darwin × amd64/arm64. - ## Why VPN exit nodes are useful exactly because they're not always-on: @@ -52,7 +47,7 @@ Four narrow client interfaces, one orchestrator: | Package | Responsibility | | --- | --- | -| `internal/core` | Rotate state machine + `up` / `down` / `status` / `health` / `cost` / `list` / `sync` actions | +| `internal/core` | Rotate state machine + lifecycle ops (`Up` / `Down` / `Rotate` / `Start` / `Stop` / `Destroy` / `List` / `Status` / `Health` / `SyncPFSense` / `EstimateCost`) — single source of truth for business logic, called by both the CLI and the MCP server | | `internal/config` | Typed configuration loader (project, region, tags, pfSense, gateway names) | | `internal/state` | On-disk state file — what's currently running, last rotation, prior nodes | | `internal/gcp` | Compute Engine adapter — `Provision` / `Start` / `Stop` / `Destroy` / `List` / `Get` | @@ -68,16 +63,26 @@ also has a `//go:build integration` smoke test that exercises real GCP ``` exit-node/ +├── cmd/ +│ ├── exitnode/ # cobra CLI binary +│ └── exitnode-mcp/ # MCP stdio server binary ├── internal/ -│ ├── config/ # config loader -│ ├── state/ # on-disk state -│ ├── core/ # rotate orchestrator + action commands -│ ├── gcp/ # GCP Compute Engine adapter -│ ├── tailscale/ # Tailscale v2 API adapter -│ ├── pfsense/ # pfSense REST adapter -│ └── verify/ # egress probes +│ ├── config/ # TOML loader + env-var resolution +│ ├── core/ # rotate orchestrator + lifecycle ops +│ ├── gcp/ # GCP Compute Engine adapter +│ ├── pfsense/ # pfSense REST adapter +│ ├── state/ # on-disk state cache (flock-protected) +│ ├── tailscale/ # Tailscale v2 API adapter +│ └── verify/ # egress probes +├── examples/ +│ ├── config.toml # annotated reference config +│ └── mcp.json # Claude Desktop / OpenClaw MCP snippet ├── scripts/ -│ └── install.sh # VM first-boot bootstrap (fetched via startup-script-url) +│ └── install.sh # VM first-boot bootstrap (fetched via startup-script-url) +├── .github/workflows/ +│ ├── ci.yml # vet + lint + race tests + cross-build matrix +│ └── release.yml # goreleaser on tag push +├── .goreleaser.yaml ├── Makefile ├── go.mod └── README.md @@ -218,19 +223,21 @@ hung. Confirm with `lsof ~/.config/exitnode/state.json.lock`; if nothing holds it, the lock file is stale and safe to delete with `rm ~/.config/exitnode/state.json.lock`. -## Roadmap - -Work is staged in three sequential plans: - -- **Plan 1 — Foundation & Core.** Module layout, `internal/config`, - `internal/state`, interface definitions for all four clients, and the - `internal/core` rotate orchestrator with table-driven mocked tests. - ✅ Complete. -- **Plan 2 — Real client implementations.** `internal/gcp`, - `internal/tailscale`, `internal/pfsense`, `internal/verify`, and - `scripts/install.sh`. ✅ Complete. -- **Plan 3 — Binaries & distribution.** `cmd/exitnode`, `cmd/exitnode-mcp`, - examples, CI workflows, and goreleaser packaging. ✅ Complete. +## What's next + +v0.1 covers the GCP + Tailscale + pfSense path the spec calls out as the +core use case. Items deferred for later versions: + +- **HTTP/SSE MCP transport** so the MCP server can be hosted remotely + rather than only over stdio. +- **`--strict-verify`** flag to promote "no `tailscale` CLI on host" from + a silent probe-skip to a hard failure. +- **Additional cloud providers** — AWS and Hetzner implementations of + the `Provider` interface. +- **Netgate Plus pfSense API** as a second `PFSenseClient` implementation + for users on the official Netgate plugin instead of the community one. +- **Scheduled rotation + auto-teardown on idle.** +- **Homebrew tap** for `brew install exitnode`. ## License From f7bd29fd56336ab3118afe4de20483c170961517 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:50:43 +0000 Subject: [PATCH 76/79] docs(readme): add personal-project disclaimer --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 3495a3d..6d4f76c 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,10 @@ On-demand Tailscale exit nodes that rotate across cloud regions and keep your pfSense gateway in sync. Driven from a CLI for humans and an MCP server for AI agents. +> **Personal project.** Built for my own home network. MIT-licensed and +> forkable, but no guarantees on uptime, breaking changes, or issue response +> — depend on this at your own risk. + ## Why VPN exit nodes are useful exactly because they're not always-on: From 1e536f4dc252cccfd39b0f508562a7137d9550f1 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 20:51:02 +0000 Subject: [PATCH 77/79] ci(lint): pin golangci-lint to v2.x (v1.x is built with go 1.24 < 1.25) --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5cd612b..9dad449 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,7 +33,9 @@ jobs: cache: true - uses: golangci/golangci-lint-action@v6 with: - version: latest + # Must be v2.x — v1 was built with Go 1.24 and refuses to analyze + # source targeting go 1.25 (which our go.mod pins). + version: v2.x args: --timeout=5m build-matrix: From 4b519c42c8ea0420a5abf1c77e26fb10762767c6 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 21:00:25 +0000 Subject: [PATCH 78/79] ci(lint): upgrade to golangci-lint v2.12 (action v9, new v2 config) --- .github/workflows/ci.yml | 9 +++++---- .golangci.yaml | 20 -------------------- .golangci.yml | 29 +++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 24 deletions(-) delete mode 100644 .golangci.yaml create mode 100644 .golangci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9dad449..65cf4b4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,11 +31,12 @@ jobs: with: go-version: '1.25' cache: true - - uses: golangci/golangci-lint-action@v6 + - uses: golangci/golangci-lint-action@v9 with: - # Must be v2.x — v1 was built with Go 1.24 and refuses to analyze - # source targeting go 1.25 (which our go.mod pins). - version: v2.x + # golangci-lint v1.x was built with Go 1.24 and refuses to analyze + # source targeting go 1.25 (which our go.mod pins). Pin a recent + # v2.x; the action's @v6/@v7/@v8 don't speak v2 config format. + version: v2.12.2 args: --timeout=5m build-matrix: diff --git a/.golangci.yaml b/.golangci.yaml deleted file mode 100644 index 2709c6f..0000000 --- a/.golangci.yaml +++ /dev/null @@ -1,20 +0,0 @@ -run: - timeout: 5m - go: "1.22" - -linters: - disable-all: true - enable: - - govet - - errcheck - - ineffassign - - staticcheck - - unused - - gofmt - - goimports - -issues: - exclude-rules: - - path: _test\.go - linters: - - errcheck diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..ba96cc6 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,29 @@ +version: "2" + +# Standard preset = errcheck, govet, ineffassign, staticcheck, unused. +# Matches what golangci-lint v1 enabled implicitly. Add more linters +# here only after the standard set is consistently green. +linters: + default: standard + settings: + errcheck: + # Stdout-write errors are not meaningfully recoverable in this CLI; + # do not require every fmt.Fprint* / Fprintln / Fprintf call to be + # blank-assigned. + exclude-functions: + - fmt.Fprint + - fmt.Fprintf + - fmt.Fprintln + exclusions: + rules: + # Test files routinely defer Close() on stores/servers without checking + # the error — the goroutine is already torn down and the next test will + # surface any real problem. + - path: _test\.go + linters: + - errcheck + +formatters: + enable: + - gofmt + - goimports From d83bf59b5e23d56a3e1f168dc0575774378d4338 Mon Sep 17 00:00:00 2001 From: Iker Date: Wed, 20 May 2026 21:00:25 +0000 Subject: [PATCH 79/79] fix(lint): address v2 staticcheck/errcheck/gofmt findings --- cmd/exitnode-mcp/buildcore.go | 1 - internal/core/down_test.go | 2 +- internal/core/mocks_test.go | 8 ++++---- internal/core/sync_test.go | 2 +- internal/gcp/compute.go | 7 ++++++- internal/gcp/types.go | 4 ++-- internal/pfsense/rest.go | 2 +- internal/verify/shell_test.go | 18 +++++++++--------- 8 files changed, 24 insertions(+), 20 deletions(-) diff --git a/cmd/exitnode-mcp/buildcore.go b/cmd/exitnode-mcp/buildcore.go index b4b48ef..2b06969 100644 --- a/cmd/exitnode-mcp/buildcore.go +++ b/cmd/exitnode-mcp/buildcore.go @@ -101,4 +101,3 @@ func buildCore(ctx context.Context) (*core.Core, func(), error) { }) return c, func() { _ = store.Close() }, nil } - diff --git a/internal/core/down_test.go b/internal/core/down_test.go index 836170b..fb2e04c 100644 --- a/internal/core/down_test.go +++ b/internal/core/down_test.go @@ -26,7 +26,7 @@ func downFixture(t *testing.T, active *gcp.ExitNode) *rotateFixture { } } c := New(Deps{ - Config: &config.Config{}, + Config: &config.Config{}, Provider: newMockProvider(), TS: newMockTS(), PF: newMockPF(), Probe: &mockProbe{}, Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), diff --git a/internal/core/mocks_test.go b/internal/core/mocks_test.go index 10f990f..1bf77ff 100644 --- a/internal/core/mocks_test.go +++ b/internal/core/mocks_test.go @@ -84,7 +84,7 @@ func newMockProvider() *mockProvider { // attachClock wires a shared sequence clock into the mock's recorder so its // calls can be merged chronologically with other mocks in the fixture. -func (m *mockProvider) attachClock(c *clock) { m.recorder.clock = c } +func (m *mockProvider) attachClock(c *clock) { m.clock = c } func (m *mockProvider) Provision(ctx context.Context, opts gcp.ProvisionOpts) (*gcp.ExitNode, error) { m.record("Provision", opts.Name, opts.Region, opts.MachineType) @@ -138,7 +138,7 @@ func newMockTS() *mockTS { } } -func (m *mockTS) attachClock(c *clock) { m.recorder.clock = c } +func (m *mockTS) attachClock(c *clock) { m.clock = c } func (m *mockTS) MintEphemeralAuthKey(ctx context.Context, tags []string) (string, error) { m.record("MintEphemeralAuthKey", tags) @@ -186,7 +186,7 @@ func newMockPF() *mockPF { } } -func (m *mockPF) attachClock(c *clock) { m.recorder.clock = c } +func (m *mockPF) attachClock(c *clock) { m.clock = c } func (m *mockPF) GetGateway(ctx context.Context, name string) (*pfsense.Gateway, error) { m.record("GetGateway", name) @@ -236,4 +236,4 @@ func (m *mockProbe) EgressDirect(ctx context.Context) (string, error) { return m.EgressDirectResult, m.EgressDirectErr } -func (m *mockProbe) attachClock(c *clock) { m.recorder.clock = c } +func (m *mockProbe) attachClock(c *clock) { m.clock = c } diff --git a/internal/core/sync_test.go b/internal/core/sync_test.go index 297aa14..e28c38a 100644 --- a/internal/core/sync_test.go +++ b/internal/core/sync_test.go @@ -26,7 +26,7 @@ func syncFixture(t *testing.T, active *gcp.ExitNode, gwIP string) *rotateFixture pf := newMockPF() pf.Gateways["GW"] = gwIP c := New(Deps{ - Config: &config.Config{PFSense: config.PFSenseConfig{GatewayName: "GW"}}, + Config: &config.Config{PFSense: config.PFSenseConfig{GatewayName: "GW"}}, Provider: newMockProvider(), TS: newMockTS(), PF: pf, Probe: &mockProbe{}, Store: store, Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), diff --git a/internal/gcp/compute.go b/internal/gcp/compute.go index a2f1e98..69edd4c 100644 --- a/internal/gcp/compute.go +++ b/internal/gcp/compute.go @@ -84,7 +84,12 @@ func New(ctx context.Context, opts Options) (Provider, error) { var clientOpts []option.ClientOption if len(opts.CredentialsJSON) > 0 { - clientOpts = append(clientOpts, option.WithCredentialsJSON(opts.CredentialsJSON)) + // SA1019: option.WithCredentialsJSON is marked deprecated because the + // upstream auth library cannot validate JSON it receives. Our caller + // is the operator (config + env var GCP_CREDENTIALS_JSON), which we + // control; the deprecation's threat model doesn't apply. Re-evaluate + // when we migrate to cloud.google.com/go/auth. + clientOpts = append(clientOpts, option.WithCredentialsJSON(opts.CredentialsJSON)) //nolint:staticcheck } inst, err := compute.NewInstancesRESTClient(ctx, clientOpts...) diff --git a/internal/gcp/types.go b/internal/gcp/types.go index dbd60b9..a463d55 100644 --- a/internal/gcp/types.go +++ b/internal/gcp/types.go @@ -69,9 +69,9 @@ type ExitNode struct { // ProvisionOpts is the input to Provider.Provision. type ProvisionOpts struct { - Name string // generated: vpn--- + Name string // generated: vpn--- Region string - Zone string // empty → provider chooses a random zone in Region + Zone string // empty → provider chooses a random zone in Region MachineType string Hostname string // typically == Name TailscaleAuthKey string // ephemeral, single-use, ~5m TTL diff --git a/internal/pfsense/rest.go b/internal/pfsense/rest.go index a90559a..9d3b348 100644 --- a/internal/pfsense/rest.go +++ b/internal/pfsense/rest.go @@ -193,7 +193,7 @@ func (c *pfClient) do(ctx context.Context, method, path string, body io.Reader) if err != nil { return nil, 0, err } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() b, err := io.ReadAll(resp.Body) if err != nil { return nil, resp.StatusCode, err diff --git a/internal/verify/shell_test.go b/internal/verify/shell_test.go index 4c7d373..ed5cd8d 100644 --- a/internal/verify/shell_test.go +++ b/internal/verify/shell_test.go @@ -57,11 +57,11 @@ func TestEgressVia_HappyPath_RestoresPrior(t *testing.T) { statusJSON := `{"ExitNodeStatus":{"ID":"prior-node-id"}}` fake := &fakeRunner{ Responses: []fakeResponse{ - {Stdout: statusJSON, Err: nil}, // tailscale status --json - {Stdout: "", Err: nil}, // tailscale set --exit-node= - {Stdout: "pong\n", Err: nil}, // tailscale ping - {Stdout: "203.0.113.7\n", Err: nil}, // curl - {Stdout: "", Err: nil}, // tailscale set --exit-node=prior-node-id (defer) + {Stdout: statusJSON, Err: nil}, // tailscale status --json + {Stdout: "", Err: nil}, // tailscale set --exit-node= + {Stdout: "pong\n", Err: nil}, // tailscale ping + {Stdout: "203.0.113.7\n", Err: nil}, // curl + {Stdout: "", Err: nil}, // tailscale set --exit-node=prior-node-id (defer) }, } p := &shellProbe{run: fake, probeURL: "https://example.com/ip"} @@ -164,10 +164,10 @@ func TestEgressDirect_HappyPath_RestoresPrior(t *testing.T) { statusJSON := `{"ExitNodeStatus":{"ID":"prior-node-id"}}` fake := &fakeRunner{ Responses: []fakeResponse{ - {Stdout: statusJSON}, // status - {Stdout: ""}, // set --exit-node= (clear) - {Stdout: "203.0.113.99\n"}, // curl - {Stdout: ""}, // restore + {Stdout: statusJSON}, // status + {Stdout: ""}, // set --exit-node= (clear) + {Stdout: "203.0.113.99\n"}, // curl + {Stdout: ""}, // restore }, } p := &shellProbe{run: fake, probeURL: "https://x/ip"}