From 325db552217d75166e260565feb43c66478460ac Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Wed, 6 May 2026 07:58:33 +0000 Subject: [PATCH 1/7] feat(gateway): reconciled gateway-state subcommand + prepare-export integration Adds `y-cluster gateway state` -- a JSON snapshot of the cluster's GatewayClass, Gateway, HTTPRoute, GRPCRoute, ClientTrafficPolicy, and BackendTrafficPolicy resources -- and wires it into the appliance export pipeline so the reconciled snapshot ships alongside the qcow2/OVA/gcp-tar deliverables. Subcommand: - `y-cluster gateway state [--context=NAME]` prints JSON to stdout. Each kind carries spec AND status, so consumers can answer maintenance-relevant questions deterministically: Is HTTPS ready? (walk gateways[].status.listeners[] for port==443, programmed==true, attachedRoutes>0). Is port 80 redirect-only? (walk httpRoutes[].rules for filters of type RequestRedirect). Are ClientTrafficPolicy settings actually in effect? (walk clientTrafficPolicies[].status.ancestors[] for Accepted=True alongside spec.clientIPDetection.xForwardedFor.numTrustedHops). - `y-cluster gateway clear-dns-hint-ip [--context=NAME] [--gateway-class=y-cluster]` removes the yolean.se/dns-hint-ip annotation from the GatewayClass. Idempotent; used by prepare-export. The shape is documented as a generated JSON Schema at pkg/provision/schema/gateway-state.schema.json (added to schemagen alongside the provider config schemas). prepare-export reshape: - Now requires a RUNNING cluster. Earlier behavior (require stopped cluster, error otherwise) is inverted: the new live phase runs `gateway clear-dns-hint-ip` (so the per-deploy LB IP doesn't bake into the customer snapshot) followed by `gateway state` (dumping to /-gateway-state.json), both needing the apiserver up. After the live phase, prepare-export stops the VM internally before the existing offline virt-customize phase. The previous explicit `y-cluster stop` step in callers becomes redundant. - Preflight reordering: virt-customize + kubectl LookPath checks fire first, so missing-tool errors surface before the running-state check. Export changes: - `pkg/provision/qemu/export.go` copies the gateway-state.json sibling into BUNDLE_DIR. Best-effort: a build that skipped prepare-export (or one that ran before this change) won't have the file -- log + skip rather than fail the export. Script update: - `scripts/appliance-qemu-to-gcp.sh` drops the explicit `y-cluster stop` line before `y-cluster prepare-export`. With the new live phase that step is wrong (would bring down the cluster prepare-export needs up). Schema generation: - `cmd/internal/schemagen/main.go` gains a writeOutputSchema helper for non-provider-config schemas. Generates `gateway-state.schema.json` from gateway.State{} via the same invopop reflector, but with FieldNameTag=json (output is JSON, not YAML) and no provider-narrowing post-process. - `pkg/gateway.SchemaID` is the canonical $id; a fresh Fetch embeds it as `$schema` in the produced JSON so consumers can validate by URL. Smoke-tested against the live appliance-gcp-build VM: gateway state returns 1 GatewayClass (programmed listener on port 80, attachedRoutes=3), 3 HTTPRoutes (external-http, keycloak-admin, echo), 1 ClientTrafficPolicy (trust-lb-xff with numTrustedHops=1, Accepted=True). The currently-set dns-hint-ip annotation (`127.0.0.1` from the local-provision default) is what prepare-export will clear before snapshot. Tests: - `pkg/gateway/state_test.go` covers the targetRefs-shape flatten (singular vs plural), the case-insensitive Programmed-condition check, the SchemaID surfacing in the JSON output, and the zero-value-no-null-slices invariant. - `pkg/provision/qemu/prepare_export_test.go` updates the VM-state assertion (was: "expects stopped"; now: "expects running") and trims the unused filepath import. E2e against /dev/kvm not run in this commit; the existing qemu e2e suite's prepare-export coverage will now exercise the live phase automatically when re-run. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/internal/schemagen/main.go | 77 ++++- cmd/y-cluster/gateway.go | 91 +++++ cmd/y-cluster/main.go | 1 + pkg/gateway/fetch.go | 395 ++++++++++++++++++++++ pkg/gateway/state.go | 349 +++++++++++++++++++ pkg/gateway/state.schema.json | 374 ++++++++++++++++++++ pkg/gateway/state_test.go | 146 ++++++++ pkg/provision/qemu/export.go | 18 + pkg/provision/qemu/prepare_export.go | 90 ++++- pkg/provision/qemu/prepare_export_test.go | 27 +- 10 files changed, 1535 insertions(+), 33 deletions(-) create mode 100644 cmd/y-cluster/gateway.go create mode 100644 pkg/gateway/fetch.go create mode 100644 pkg/gateway/state.go create mode 100644 pkg/gateway/state.schema.json create mode 100644 pkg/gateway/state_test.go diff --git a/cmd/internal/schemagen/main.go b/cmd/internal/schemagen/main.go index dff3b8a..84dd5b6 100644 --- a/cmd/internal/schemagen/main.go +++ b/cmd/internal/schemagen/main.go @@ -1,6 +1,14 @@ -// schemagen generates JSON Schema files into pkg/provision/schema/: -// one per provisioner config struct, plus a portable common.schema.json -// reflected from CommonConfig alone. +// schemagen generates JSON Schema files for two distinct surfaces: +// +// - Provision-config schemas under pkg/provision/schema/: one +// per provisioner config struct (qemu, docker, multipass) plus +// a portable common.schema.json reflected from CommonConfig. +// +// - Output schemas alongside the Go type that produces them +// (e.g. pkg/gateway/state.schema.json next to gateway.State). +// These document the JSON shape published by `y-cluster` +// subcommands so downstream consumers can validate / parse +// against a stable contract. // // Each per-provider schema has its `provider` property post-processed // from the inherited enum into a single-value `const` so the file @@ -34,6 +42,7 @@ import ( "github.com/invopop/jsonschema" "sigs.k8s.io/yaml" + "github.com/Yolean/y-cluster/pkg/gateway" "github.com/Yolean/y-cluster/pkg/provision/config" ) @@ -102,9 +111,71 @@ func run() error { } fmt.Printf("wrote %s\n", commonOut) + // Output schemas: not provider-config schemas, but other + // stable JSON shapes y-cluster produces for downstream + // consumption. These live alongside the Go type that + // produces them, NOT under pkg/provision/schema/ (which is + // for input/config schemas). Add new outputs below as more + // y-cluster commands publish stable JSON contracts. + gatewayStateOut := filepath.Join(root, "pkg", "gateway", "state.schema.json") + if err := writeOutputSchema(gatewayStateOut, &gateway.State{}, gateway.SchemaID); err != nil { + return fmt.Errorf("generate %s: %w", gatewayStateOut, err) + } + fmt.Printf("wrote %s\n", gatewayStateOut) + return nil } +// writeOutputSchema reflects a non-provider Go struct into a +// standalone JSON Schema file. Differs from writeProviderSchema +// in two ways: +// +// - Uses the `json` struct tag for property names, since the +// output is JSON (not YAML), and consumers parse the JSON +// directly. Reusing FieldNameTag="yaml" would produce YAML- +// tagged property names that don't match the runtime output. +// - No provider-narrowing post-processing -- the schema covers +// the full output type as-is. +// +// The schemaID is written into the schema's $id so consumers +// can validate by URL reference. SchemaID values live in the +// source package as exported constants (e.g. gateway.SchemaID). +func writeOutputSchema(outPath string, sample any, schemaID string) error { + r := &jsonschema.Reflector{ + AllowAdditionalProperties: false, + DoNotReference: false, + FieldNameTag: "json", + // Keep RequiredFromJSONSchemaTags symmetric with the + // provider schemas: omitempty fields fall through to + // non-required without us having to hand-tag each one. + RequiredFromJSONSchemaTags: false, + } + schema := r.Reflect(sample) + data, err := json.MarshalIndent(schema, "", " ") + if err != nil { + return err + } + // Replace the reflector's auto-generated $id with our stable + // one. invopop emits a github.com/-prefixed URL by default; + // we want the schema reachable by a URL operators control. + data, err = setSchemaID(data, schemaID) + if err != nil { + return err + } + return os.WriteFile(outPath, append(data, '\n'), 0o644) +} + +// setSchemaID rewrites the top-level $id field of the schema +// document. Returns the raw JSON bytes with the new $id. +func setSchemaID(data []byte, schemaID string) ([]byte, error) { + var doc map[string]any + if err := json.Unmarshal(data, &doc); err != nil { + return nil, err + } + doc["$id"] = schemaID + return json.MarshalIndent(doc, "", " ") +} + // checkCollisions ensures no two providers declare the same own // (non-embedded) yaml field name. CommonConfig fields are skipped: // they're shared by design and surface in every provider via diff --git a/cmd/y-cluster/gateway.go b/cmd/y-cluster/gateway.go new file mode 100644 index 0000000..be2c35f --- /dev/null +++ b/cmd/y-cluster/gateway.go @@ -0,0 +1,91 @@ +package main + +import ( + "encoding/json" + "fmt" + + "github.com/spf13/cobra" + + "github.com/Yolean/y-cluster/pkg/cluster" + "github.com/Yolean/y-cluster/pkg/gateway" +) + +// gatewayCmd is the parent of `y-cluster gateway *`. Today +// it has one child (`state`); `clear-dns-hint-ip` is wired +// here too because it lives in the same surface area, but +// the canonical caller is prepare-export, not interactive. +// +// Future ops (rotate-cert, diff-vs-baseline, route-test) slot +// under this same command group when use cases land. +func gatewayCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "gateway", + Short: "Inspect and manage the y-cluster Gateway state", + } + cmd.AddCommand(gatewayStateCmd()) + cmd.AddCommand(gatewayClearDNSHintIPCmd()) + return cmd +} + +func gatewayStateCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "state", + Short: "Print the cluster's reconciled Gateway state as JSON", + Long: `Snapshot the cluster's GatewayClass, Gateway, HTTPRoute, GRPCRoute, +ClientTrafficPolicy, and BackendTrafficPolicy resources -- including +their reconciliation status conditions -- and print as JSON to stdout. + +The shape is documented in pkg/gateway/state.schema.json +(generated). Consumers parse the JSON to determine HTTPS readiness +(walk gateways[].status.listeners[]), redirect-vs-real-traffic on a +port (walk httpRoutes[].rules), and policy effects (walk +clientTrafficPolicies[].spec for numTrustedHops / trustedCIDRs + +.status for whether envoy-gateway accepted them). + +Used by: + - prepare-export: dumps to /-gateway-state.json so + the appliance bundle ships the snapshot the customer received. + - Operator interactive use: ` + "`y-cluster gateway state | jq ...`" + ` for + debugging.`, + Args: cobra.NoArgs, + RunE: func(c *cobra.Command, _ []string) error { + st, err := gateway.Fetch(c.Context(), contextName) + if err != nil { + return err + } + out, err := json.MarshalIndent(st, "", " ") + if err != nil { + return fmt.Errorf("marshal state: %w", err) + } + fmt.Fprintln(c.OutOrStdout(), string(out)) + return nil + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func gatewayClearDNSHintIPCmd() *cobra.Command { + var contextName string + var gatewayClassName string + cmd := &cobra.Command{ + Use: "clear-dns-hint-ip", + Short: "Remove the yolean.se/dns-hint-ip annotation from the y-cluster GatewayClass", + Long: `Used by prepare-export to strip the per-deploy IP from the appliance +snapshot. The annotation is set by envoygateway.Install at provision +time, scoped to the operator's local LB or public IP -- baking it +into the customer-facing snapshot would point their tooling at our +infrastructure. + +Idempotent: a no-op when the annotation isn't present (or the +GatewayClass doesn't exist).`, + Args: cobra.NoArgs, + RunE: func(c *cobra.Command, _ []string) error { + return gateway.ClearDNSHintIPAnnotation(c.Context(), contextName, gatewayClassName) + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + cmd.Flags().StringVar(&gatewayClassName, "gateway-class", "y-cluster", "GatewayClass name to patch") + return cmd +} diff --git a/cmd/y-cluster/main.go b/cmd/y-cluster/main.go index 8409072..04a3fc7 100644 --- a/cmd/y-cluster/main.go +++ b/cmd/y-cluster/main.go @@ -139,6 +139,7 @@ func rootCmd() *cobra.Command { root.AddCommand(crictlCmd()) root.AddCommand(cacheCmd()) root.AddCommand(echoCmd()) + root.AddCommand(gatewayCmd()) root.AddCommand(localstorageCmd()) return root diff --git a/pkg/gateway/fetch.go b/pkg/gateway/fetch.go new file mode 100644 index 0000000..2885d8c --- /dev/null +++ b/pkg/gateway/fetch.go @@ -0,0 +1,395 @@ +package gateway + +import ( + "context" + "encoding/json" + "sort" + "strings" + + "github.com/Yolean/y-cluster/pkg/provision/envoygateway" +) + +// raw kubectl item shapes. We unmarshal kubectl's JSON output +// into these intermediate types, then project the fields we +// care about into the typed *State* shape. Most fields stay as +// json.RawMessage so kubectl's exact output passes through +// without our partial schema getting between consumers and the +// upstream gateway-api types. + +type rawList struct { + Items []json.RawMessage `json:"items"` +} + +type rawMetadata struct { + Name string `json:"name"` + Namespace string `json:"namespace,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` + Labels map[string]string `json:"labels,omitempty"` +} + +type rawCondition struct { + Type string `json:"type"` + Status string `json:"status"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` +} + +func toConditions(in []rawCondition) []Condition { + if len(in) == 0 { + return nil + } + out := make([]Condition, len(in)) + for i, c := range in { + out[i] = Condition{Type: c.Type, Status: c.Status, Reason: c.Reason, Message: c.Message} + } + return out +} + +// === GatewayClass === + +type rawGatewayClass struct { + Metadata rawMetadata `json:"metadata"` + Spec struct { + ControllerName string `json:"controllerName"` + } `json:"spec"` + Status struct { + Conditions []rawCondition `json:"conditions"` + } `json:"status"` +} + +func fetchGatewayClass(ctx context.Context, kubectlContext string) (*GatewayClass, error) { + var list rawList + if err := kubectlGetJSON(ctx, kubectlContext, "gatewayclass", &list); err != nil { + return nil, err + } + for _, raw := range list.Items { + var gc rawGatewayClass + if err := json.Unmarshal(raw, &gc); err != nil { + return nil, err + } + if gc.Spec.ControllerName != envoygateway.EGControllerName { + continue + } + return &GatewayClass{ + Name: gc.Metadata.Name, + ControllerName: gc.Spec.ControllerName, + Annotations: gc.Metadata.Annotations, + Labels: gc.Metadata.Labels, + Conditions: toConditions(gc.Status.Conditions), + }, nil + } + // No envoy-gateway-controlled GatewayClass yet; return nil + // rather than error -- the cluster may be pre-install. + return nil, nil +} + +// === Gateway === + +type rawGateway struct { + Metadata rawMetadata `json:"metadata"` + Spec struct { + GatewayClassName string `json:"gatewayClassName"` + Listeners []rawListener `json:"listeners"` + } `json:"spec"` + Status struct { + Conditions []rawCondition `json:"conditions"` + Listeners []rawListenerStatus `json:"listeners"` + } `json:"status"` +} + +type rawListener struct { + Name string `json:"name"` + Port int `json:"port"` + Protocol string `json:"protocol"` + Hostname string `json:"hostname,omitempty"` + AllowedRoutes json.RawMessage `json:"allowedRoutes,omitempty"` + TLS json.RawMessage `json:"tls,omitempty"` +} + +type rawListenerStatus struct { + Name string `json:"name"` + AttachedRoutes int `json:"attachedRoutes"` + Conditions []rawCondition `json:"conditions"` +} + +func fetchGateways(ctx context.Context, kubectlContext string, out *State) error { + var list rawList + if err := kubectlGetJSON(ctx, kubectlContext, "gateway", &list); err != nil { + return err + } + gws := make([]Gateway, 0, len(list.Items)) + for _, raw := range list.Items { + var g rawGateway + if err := json.Unmarshal(raw, &g); err != nil { + return err + } + listeners := make([]Listener, len(g.Spec.Listeners)) + for i, l := range g.Spec.Listeners { + listeners[i] = Listener{ + Name: l.Name, + Port: l.Port, + Protocol: l.Protocol, + Hostname: l.Hostname, + AllowedRoutes: l.AllowedRoutes, + TLS: l.TLS, + } + } + listenerStatus := make([]ListenerStatus, len(g.Status.Listeners)) + for i, ls := range g.Status.Listeners { + listenerStatus[i] = ListenerStatus{ + Name: ls.Name, + AttachedRoutes: ls.AttachedRoutes, + Conditions: toConditions(ls.Conditions), + Programmed: hasTrueCondition(ls.Conditions, "Programmed"), + } + } + gws = append(gws, Gateway{ + Namespace: g.Metadata.Namespace, + Name: g.Metadata.Name, + GatewayClassName: g.Spec.GatewayClassName, + Listeners: listeners, + Status: GatewayStatus{ + Conditions: toConditions(g.Status.Conditions), + Listeners: listenerStatus, + }, + }) + } + sortGateways(gws) + out.Gateways = gws + return nil +} + +func hasTrueCondition(conditions []rawCondition, t string) bool { + for _, c := range conditions { + if c.Type == t && strings.EqualFold(c.Status, "True") { + return true + } + } + return false +} + +func sortGateways(in []Gateway) { + sort.Slice(in, func(i, j int) bool { + if in[i].Namespace != in[j].Namespace { + return in[i].Namespace < in[j].Namespace + } + return in[i].Name < in[j].Name + }) +} + +// === Routes (HTTPRoute + GRPCRoute share the shape) === + +type rawRoute struct { + Metadata rawMetadata `json:"metadata"` + Spec struct { + ParentRefs json.RawMessage `json:"parentRefs,omitempty"` + Hostnames []string `json:"hostnames,omitempty"` + Rules json.RawMessage `json:"rules,omitempty"` + } `json:"spec"` + Status struct { + Parents []rawRouteParentStatus `json:"parents"` + } `json:"status"` +} + +type rawRouteParentStatus struct { + ParentRef json.RawMessage `json:"parentRef,omitempty"` + ControllerName string `json:"controllerName,omitempty"` + Conditions []rawCondition `json:"conditions"` +} + +func toRouteParents(in []rawRouteParentStatus) []RouteParentStatus { + if len(in) == 0 { + return nil + } + out := make([]RouteParentStatus, len(in)) + for i, p := range in { + out[i] = RouteParentStatus{ + ParentRef: p.ParentRef, + ControllerName: p.ControllerName, + Conditions: toConditions(p.Conditions), + } + } + return out +} + +func fetchHTTPRoutes(ctx context.Context, kubectlContext string, out *State) error { + var list rawList + if err := kubectlGetJSON(ctx, kubectlContext, "httproute", &list); err != nil { + return err + } + routes := make([]HTTPRoute, 0, len(list.Items)) + for _, raw := range list.Items { + var r rawRoute + if err := json.Unmarshal(raw, &r); err != nil { + return err + } + routes = append(routes, HTTPRoute{ + Namespace: r.Metadata.Namespace, + Name: r.Metadata.Name, + ParentRefs: r.Spec.ParentRefs, + Hostnames: r.Spec.Hostnames, + Rules: r.Spec.Rules, + Status: RouteStatus{Parents: toRouteParents(r.Status.Parents)}, + }) + } + sort.Slice(routes, func(i, j int) bool { + if routes[i].Namespace != routes[j].Namespace { + return routes[i].Namespace < routes[j].Namespace + } + return routes[i].Name < routes[j].Name + }) + out.HTTPRoutes = routes + return nil +} + +func fetchGRPCRoutes(ctx context.Context, kubectlContext string, out *State) error { + var list rawList + if err := kubectlGetJSON(ctx, kubectlContext, "grpcroute", &list); err != nil { + return err + } + routes := make([]GRPCRoute, 0, len(list.Items)) + for _, raw := range list.Items { + var r rawRoute + if err := json.Unmarshal(raw, &r); err != nil { + return err + } + routes = append(routes, GRPCRoute{ + Namespace: r.Metadata.Namespace, + Name: r.Metadata.Name, + ParentRefs: r.Spec.ParentRefs, + Hostnames: r.Spec.Hostnames, + Rules: r.Spec.Rules, + Status: RouteStatus{Parents: toRouteParents(r.Status.Parents)}, + }) + } + sort.Slice(routes, func(i, j int) bool { + if routes[i].Namespace != routes[j].Namespace { + return routes[i].Namespace < routes[j].Namespace + } + return routes[i].Name < routes[j].Name + }) + out.GRPCRoutes = routes + return nil +} + +// === Envoy Gateway extension policies === + +type rawPolicy struct { + Metadata rawMetadata `json:"metadata"` + Spec json.RawMessage `json:"spec"` + Status struct { + Ancestors []rawPolicyAncestorStatus `json:"ancestors"` + } `json:"status"` +} + +type rawPolicyAncestorStatus struct { + AncestorRef json.RawMessage `json:"ancestorRef,omitempty"` + ControllerName string `json:"controllerName,omitempty"` + Conditions []rawCondition `json:"conditions"` +} + +func toPolicyAncestors(in []rawPolicyAncestorStatus) []PolicyAncestorStatus { + if len(in) == 0 { + return nil + } + out := make([]PolicyAncestorStatus, len(in)) + for i, a := range in { + out[i] = PolicyAncestorStatus{ + AncestorRef: a.AncestorRef, + ControllerName: a.ControllerName, + Conditions: toConditions(a.Conditions), + } + } + return out +} + +// extractTargetRefs pulls the policy's targetRefs (or singular +// targetRef) field out of its spec. Both shapes appear in the +// envoy-gateway extension's CRDs depending on version. We +// surface them in a normalised `targetRefs` field at the top +// level of our snapshot so consumers don't have to chase the +// spec shape. +func extractTargetRefs(spec json.RawMessage) json.RawMessage { + if len(spec) == 0 { + return nil + } + var probe struct { + TargetRefs json.RawMessage `json:"targetRefs,omitempty"` + TargetRef json.RawMessage `json:"targetRef,omitempty"` + } + if err := json.Unmarshal(spec, &probe); err != nil { + return nil + } + if len(probe.TargetRefs) > 0 { + return probe.TargetRefs + } + if len(probe.TargetRef) > 0 { + // Wrap the singular targetRef into a single-element array + // so consumers see one consistent shape. + wrapped := []json.RawMessage{probe.TargetRef} + b, err := json.Marshal(wrapped) + if err != nil { + return nil + } + return b + } + return nil +} + +func fetchClientTrafficPolicies(ctx context.Context, kubectlContext string, out *State) error { + var list rawList + if err := kubectlGetJSON(ctx, kubectlContext, "clienttrafficpolicy", &list); err != nil { + return err + } + policies := make([]ClientTrafficPolicy, 0, len(list.Items)) + for _, raw := range list.Items { + var p rawPolicy + if err := json.Unmarshal(raw, &p); err != nil { + return err + } + policies = append(policies, ClientTrafficPolicy{ + Namespace: p.Metadata.Namespace, + Name: p.Metadata.Name, + TargetRefs: extractTargetRefs(p.Spec), + Spec: p.Spec, + Status: PolicyStatus{Ancestors: toPolicyAncestors(p.Status.Ancestors)}, + }) + } + sort.Slice(policies, func(i, j int) bool { + if policies[i].Namespace != policies[j].Namespace { + return policies[i].Namespace < policies[j].Namespace + } + return policies[i].Name < policies[j].Name + }) + out.ClientTrafficPolicies = policies + return nil +} + +func fetchBackendTrafficPolicies(ctx context.Context, kubectlContext string, out *State) error { + var list rawList + if err := kubectlGetJSON(ctx, kubectlContext, "backendtrafficpolicy", &list); err != nil { + return err + } + policies := make([]BackendTrafficPolicy, 0, len(list.Items)) + for _, raw := range list.Items { + var p rawPolicy + if err := json.Unmarshal(raw, &p); err != nil { + return err + } + policies = append(policies, BackendTrafficPolicy{ + Namespace: p.Metadata.Namespace, + Name: p.Metadata.Name, + TargetRefs: extractTargetRefs(p.Spec), + Spec: p.Spec, + Status: PolicyStatus{Ancestors: toPolicyAncestors(p.Status.Ancestors)}, + }) + } + sort.Slice(policies, func(i, j int) bool { + if policies[i].Namespace != policies[j].Namespace { + return policies[i].Namespace < policies[j].Namespace + } + return policies[i].Name < policies[j].Name + }) + out.BackendTrafficPolicies = policies + return nil +} diff --git a/pkg/gateway/state.go b/pkg/gateway/state.go new file mode 100644 index 0000000..b6ba55c --- /dev/null +++ b/pkg/gateway/state.go @@ -0,0 +1,349 @@ +// Package gateway exposes a maintainer-facing snapshot of the +// y-cluster Gateway API state in the cluster: the GatewayClass, +// the Gateway resources and their reconciled listener status, +// HTTPRoute / GRPCRoute attachments + filters, and Envoy Gateway +// extension policies (ClientTrafficPolicy, BackendTrafficPolicy) +// that affect maintenance-relevant runtime behavior such as XFF +// trust + client-IP detection. +// +// The snapshot is deliberately RECONCILED -- every kind is read +// from the cluster post-controller (kubectl get -o json includes +// .status), not derived from the operator's source-of-truth +// kustomize. That distinction matters for maintenance: the spec +// says "trust 1 XFF hop"; the status says whether envoy-gateway +// accepted the policy and is actually applying it. A cluster +// where a ClientTrafficPolicy is rejected reports the spec but +// shows Accepted=False on the status, which a downstream +// consumer can flag. +// +// The package is also responsible for a small write operation: +// clearing the yolean.se/dns-hint-ip GatewayClass annotation +// before an appliance is exported, so the per-deploy IP is not +// baked into the customer's snapshot. This belongs here because +// the dns-hint annotation is part of the gateway's surface, not +// the lifecycle of the cluster itself. +package gateway + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os/exec" + "strings" + "time" + + "github.com/Yolean/y-cluster/pkg/provision/envoygateway" +) + +// SchemaID is the canonical $id for the generated JSON Schema. +// Kept in sync with the file at pkg/gateway/state.schema.json +// (regenerated by `go generate ./pkg/provision/...`). +const SchemaID = "https://yolean.se/y-cluster/schema/gateway-state.schema.json" + +// State is the top-level snapshot a y-cluster gateway state call +// produces. The shape is a stable contract -- consumers (the +// appliance's gateway-state.json bundled at prepare-export, the +// y-cluster gateway state CLI, future hosting integrations) +// depend on it. New fields are added (with omitempty) without +// breaking them; existing fields are not renamed or retyped. +type State struct { + // GatewayClass identifies the cluster's y-cluster gateway + // class plus its (post-clean) annotations + reconciliation + // status. Returned even when no Gateway has been created + // yet, so consumers can verify the cluster reached the + // envoygateway.Install step. + GatewayClass *GatewayClass `json:"gatewayClass,omitempty"` + + // Gateways are all Gateway resources in the cluster + // (cluster-scoped enumeration). Includes the y-cluster + // shipped Gateway plus any consumer-defined ones. + Gateways []Gateway `json:"gateways"` + + // HTTPRoutes are all HTTPRoute resources in the cluster. + // Each carries enough rule/filter detail for a consumer + // to tell "this port is redirect-only" from "this port + // serves real traffic". + HTTPRoutes []HTTPRoute `json:"httpRoutes"` + + // GRPCRoutes mirrors HTTPRoutes for the gRPC kind. + GRPCRoutes []GRPCRoute `json:"grpcRoutes"` + + // ClientTrafficPolicies are envoy-gateway extension + // resources that adjust client-side listener behavior -- + // the canonical example is xForwardedFor.numTrustedHops + // (controls whether envoy trusts an upstream LB's + // X-Forwarded-Proto). Status conditions tell whether the + // policy was accepted into the listener config. + ClientTrafficPolicies []ClientTrafficPolicy `json:"clientTrafficPolicies"` + + // BackendTrafficPolicies mirror their client-side siblings + // for backend-bound behavior. + BackendTrafficPolicies []BackendTrafficPolicy `json:"backendTrafficPolicies"` + + // FetchedAt is the wall-clock at which the snapshot was + // taken, in RFC 3339 format. Useful when comparing two + // snapshots taken minutes apart during a debug session. + FetchedAt string `json:"fetchedAt"` + + // SchemaID is duplicated from the constant above so a + // snapshot read in isolation knows which schema to + // validate against without the consumer having to know + // the y-cluster version that produced it. + SchemaID string `json:"$schema,omitempty"` +} + +// GatewayClass is the partial GatewayClass shape we surface. +// We don't reflect the full upstream type because the +// upstream changes shape across gateway-api versions; the fields +// below are stable across v1alpha2/v1beta1/v1. +type GatewayClass struct { + Name string `json:"name"` + ControllerName string `json:"controllerName"` + Annotations map[string]string `json:"annotations,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + Conditions []Condition `json:"conditions,omitempty"` +} + +// Gateway is the partial Gateway shape: enough listener config +// for HTTPS-readiness assessment and enough listener status for +// "did envoy-gateway program this?" answers. +type Gateway struct { + Namespace string `json:"namespace"` + Name string `json:"name"` + GatewayClassName string `json:"gatewayClassName"` + Listeners []Listener `json:"listeners"` + Status GatewayStatus `json:"status"` +} + +// Listener is the spec-side listener config. TLS and AllowedRoutes +// are passed through as raw JSON so we don't have to chase the +// gateway-api schema's full nested types for fields that are +// pass-through to the consumer. +type Listener struct { + Name string `json:"name"` + Port int `json:"port"` + Protocol string `json:"protocol"` + Hostname string `json:"hostname,omitempty"` + AllowedRoutes json.RawMessage `json:"allowedRoutes,omitempty"` + TLS json.RawMessage `json:"tls,omitempty"` +} + +// GatewayStatus carries the reconciled per-listener state. +// AttachedRoutes + Programmed condition together answer "is +// this listener actually serving?". +type GatewayStatus struct { + Conditions []Condition `json:"conditions,omitempty"` + Listeners []ListenerStatus `json:"listeners,omitempty"` +} + +// ListenerStatus is one row of the Gateway's status.listeners +// array. +type ListenerStatus struct { + Name string `json:"name"` + AttachedRoutes int `json:"attachedRoutes"` + Conditions []Condition `json:"conditions,omitempty"` + // Programmed surfaces the conventional Programmed=True + // condition as a boolean for quick consumer checks. False + // when the condition is missing or its status is anything + // other than "True". + Programmed bool `json:"programmed"` +} + +// HTTPRoute is the partial HTTPRoute shape. Rules are pass- +// through json.RawMessage so consumers can inspect filters + +// backendRefs without forcing y-cluster to track the full +// upstream rule schema. +type HTTPRoute struct { + Namespace string `json:"namespace"` + Name string `json:"name"` + ParentRefs json.RawMessage `json:"parentRefs,omitempty"` + Hostnames []string `json:"hostnames,omitempty"` + Rules json.RawMessage `json:"rules,omitempty"` + Status RouteStatus `json:"status"` +} + +// GRPCRoute mirrors HTTPRoute for the gRPC kind. +type GRPCRoute struct { + Namespace string `json:"namespace"` + Name string `json:"name"` + ParentRefs json.RawMessage `json:"parentRefs,omitempty"` + Hostnames []string `json:"hostnames,omitempty"` + Rules json.RawMessage `json:"rules,omitempty"` + Status RouteStatus `json:"status"` +} + +// RouteStatus normalises both HTTPRoute and GRPCRoute status +// (both use the .status.parents shape). +type RouteStatus struct { + Parents []RouteParentStatus `json:"parents,omitempty"` +} + +// RouteParentStatus is one parent's reconciliation record on a +// route. +type RouteParentStatus struct { + ParentRef json.RawMessage `json:"parentRef,omitempty"` + ControllerName string `json:"controllerName,omitempty"` + Conditions []Condition `json:"conditions,omitempty"` +} + +// ClientTrafficPolicy mirrors envoy-gateway's +// gateway.envoyproxy.io/v1alpha1 ClientTrafficPolicy. The Spec +// field is the full spec object as raw JSON; consumers that +// care about specific knobs (numTrustedHops, trustedCIDRs) +// drill into spec.clientIPDetection.xForwardedFor. +type ClientTrafficPolicy struct { + Namespace string `json:"namespace"` + Name string `json:"name"` + TargetRefs json.RawMessage `json:"targetRefs,omitempty"` + Spec json.RawMessage `json:"spec"` + Status PolicyStatus `json:"status"` +} + +// BackendTrafficPolicy mirrors ClientTrafficPolicy on the +// backend side. +type BackendTrafficPolicy struct { + Namespace string `json:"namespace"` + Name string `json:"name"` + TargetRefs json.RawMessage `json:"targetRefs,omitempty"` + Spec json.RawMessage `json:"spec"` + Status PolicyStatus `json:"status"` +} + +// PolicyStatus normalises the envoy-gateway extension's +// status.ancestors shape (one entry per resource the policy +// targets). +type PolicyStatus struct { + Ancestors []PolicyAncestorStatus `json:"ancestors,omitempty"` +} + +// PolicyAncestorStatus is one ancestor's reconciliation record. +type PolicyAncestorStatus struct { + AncestorRef json.RawMessage `json:"ancestorRef,omitempty"` + ControllerName string `json:"controllerName,omitempty"` + Conditions []Condition `json:"conditions,omitempty"` +} + +// Condition is the standard k8s condition shape. Subset of +// metav1.Condition that we actually need. +type Condition struct { + Type string `json:"type"` + Status string `json:"status"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` +} + +// Fetch shells out to kubectl to read the cluster's reconciled +// gateway state and returns it as a populated *State. The +// returned object's JSON marshalling matches the schema in +// pkg/gateway/state.schema.json -- callers +// ship the result either to stdout (gateway state subcommand) +// or to /-gateway-state.json (prepare-export). +// +// One kubectl call per kind. Empty results (no Gateways at all, +// no HTTPRoutes, ...) are not errors -- the cluster might be +// pre-workload. The function returns slices that may be empty +// but never nil. +// +// kubectl context defaults to "local"; pass explicitly when +// snapshotting the appliance build cluster from a different +// shell. +func Fetch(ctx context.Context, kubectlContext string) (*State, error) { + st := &State{ + Gateways: []Gateway{}, + HTTPRoutes: []HTTPRoute{}, + GRPCRoutes: []GRPCRoute{}, + ClientTrafficPolicies: []ClientTrafficPolicy{}, + BackendTrafficPolicies: []BackendTrafficPolicy{}, + FetchedAt: time.Now().UTC().Format(time.RFC3339), + SchemaID: SchemaID, + } + + gc, err := fetchGatewayClass(ctx, kubectlContext) + if err != nil { + return nil, fmt.Errorf("fetch gatewayclass: %w", err) + } + st.GatewayClass = gc + + if err := fetchGateways(ctx, kubectlContext, st); err != nil { + return nil, fmt.Errorf("fetch gateway: %w", err) + } + if err := fetchHTTPRoutes(ctx, kubectlContext, st); err != nil { + return nil, fmt.Errorf("fetch httproute: %w", err) + } + if err := fetchGRPCRoutes(ctx, kubectlContext, st); err != nil { + return nil, fmt.Errorf("fetch grpcroute: %w", err) + } + if err := fetchClientTrafficPolicies(ctx, kubectlContext, st); err != nil { + return nil, fmt.Errorf("fetch clienttrafficpolicy: %w", err) + } + if err := fetchBackendTrafficPolicies(ctx, kubectlContext, st); err != nil { + return nil, fmt.Errorf("fetch backendtrafficpolicy: %w", err) + } + return st, nil +} + +// ClearDNSHintIPAnnotation removes the yolean.se/dns-hint-ip +// annotation from the y-cluster GatewayClass. Idempotent: if +// the annotation isn't there (or the GatewayClass doesn't +// exist), the function is a successful no-op. +// +// Used by prepare-export to strip the per-deploy IP from the +// appliance snapshot. Safe in that context because prepare- +// export is only invoked in the export flow, never against a +// cluster a developer is using interactively. +func ClearDNSHintIPAnnotation(ctx context.Context, kubectlContext, gatewayClassName string) error { + if gatewayClassName == "" { + gatewayClassName = "y-cluster" + } + // JSON-patch's remove op fails if the annotation isn't + // present, so we do a describe-first to decide. The patch + // path encodes "/" as ~1 (RFC 6901 JSON Pointer). + annotation := envoygateway.DNSHintIPAnnotation + jsonPath := strings.ReplaceAll(annotation, "/", "~1") + out, err := runKubectl(ctx, kubectlContext, + "get", "gatewayclass", gatewayClassName, + "-o", "jsonpath={.metadata.annotations."+strings.ReplaceAll(annotation, ".", "\\.")+"}") + if err != nil { + return fmt.Errorf("describe gatewayclass: %w", err) + } + if len(bytes.TrimSpace(out)) == 0 { + // Annotation absent (or GatewayClass missing -- both + // fall through to the same no-op). + return nil + } + patch := fmt.Sprintf(`[{"op":"remove","path":"/metadata/annotations/%s"}]`, jsonPath) + if _, err := runKubectl(ctx, kubectlContext, + "patch", "gatewayclass", gatewayClassName, + "--type=json", "-p", patch); err != nil { + return fmt.Errorf("patch gatewayclass: %w", err) + } + return nil +} + +// runKubectl is the shared kubectl shellout helper. Stdout + +// stderr are combined; non-zero exit returns an error with the +// combined output for debugging. +func runKubectl(ctx context.Context, kubectlContext string, args ...string) ([]byte, error) { + full := append([]string{"--context=" + kubectlContext}, args...) + cmd := exec.CommandContext(ctx, "kubectl", full...) + out, err := cmd.CombinedOutput() + if err != nil { + return nil, fmt.Errorf("kubectl %s: %s: %w", strings.Join(args, " "), out, err) + } + return out, nil +} + +// kubectlGetJSON runs `kubectl get -A -o json` and +// unmarshals into the supplied pointer (typically *itemList). +// Returns no error when the kind has zero items. +func kubectlGetJSON(ctx context.Context, kubectlContext, kindArg string, into any) error { + out, err := runKubectl(ctx, kubectlContext, "get", kindArg, "-A", "-o", "json") + if err != nil { + return err + } + if err := json.Unmarshal(out, into); err != nil { + return fmt.Errorf("unmarshal %s: %w", kindArg, err) + } + return nil +} diff --git a/pkg/gateway/state.schema.json b/pkg/gateway/state.schema.json new file mode 100644 index 0000000..2872712 --- /dev/null +++ b/pkg/gateway/state.schema.json @@ -0,0 +1,374 @@ +{ + "$defs": { + "BackendTrafficPolicy": { + "additionalProperties": false, + "properties": { + "name": { + "type": "string" + }, + "namespace": { + "type": "string" + }, + "spec": true, + "status": { + "$ref": "#/$defs/PolicyStatus" + }, + "targetRefs": true + }, + "required": [ + "namespace", + "name", + "spec", + "status" + ], + "type": "object" + }, + "ClientTrafficPolicy": { + "additionalProperties": false, + "properties": { + "name": { + "type": "string" + }, + "namespace": { + "type": "string" + }, + "spec": true, + "status": { + "$ref": "#/$defs/PolicyStatus" + }, + "targetRefs": true + }, + "required": [ + "namespace", + "name", + "spec", + "status" + ], + "type": "object" + }, + "Condition": { + "additionalProperties": false, + "properties": { + "message": { + "type": "string" + }, + "reason": { + "type": "string" + }, + "status": { + "type": "string" + }, + "type": { + "type": "string" + } + }, + "required": [ + "type", + "status" + ], + "type": "object" + }, + "GRPCRoute": { + "additionalProperties": false, + "properties": { + "hostnames": { + "items": { + "type": "string" + }, + "type": "array" + }, + "name": { + "type": "string" + }, + "namespace": { + "type": "string" + }, + "parentRefs": true, + "rules": true, + "status": { + "$ref": "#/$defs/RouteStatus" + } + }, + "required": [ + "namespace", + "name", + "status" + ], + "type": "object" + }, + "Gateway": { + "additionalProperties": false, + "properties": { + "gatewayClassName": { + "type": "string" + }, + "listeners": { + "items": { + "$ref": "#/$defs/Listener" + }, + "type": "array" + }, + "name": { + "type": "string" + }, + "namespace": { + "type": "string" + }, + "status": { + "$ref": "#/$defs/GatewayStatus" + } + }, + "required": [ + "namespace", + "name", + "gatewayClassName", + "listeners", + "status" + ], + "type": "object" + }, + "GatewayClass": { + "additionalProperties": false, + "properties": { + "annotations": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "conditions": { + "items": { + "$ref": "#/$defs/Condition" + }, + "type": "array" + }, + "controllerName": { + "type": "string" + }, + "labels": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "name": { + "type": "string" + } + }, + "required": [ + "name", + "controllerName" + ], + "type": "object" + }, + "GatewayStatus": { + "additionalProperties": false, + "properties": { + "conditions": { + "items": { + "$ref": "#/$defs/Condition" + }, + "type": "array" + }, + "listeners": { + "items": { + "$ref": "#/$defs/ListenerStatus" + }, + "type": "array" + } + }, + "type": "object" + }, + "HTTPRoute": { + "additionalProperties": false, + "properties": { + "hostnames": { + "items": { + "type": "string" + }, + "type": "array" + }, + "name": { + "type": "string" + }, + "namespace": { + "type": "string" + }, + "parentRefs": true, + "rules": true, + "status": { + "$ref": "#/$defs/RouteStatus" + } + }, + "required": [ + "namespace", + "name", + "status" + ], + "type": "object" + }, + "Listener": { + "additionalProperties": false, + "properties": { + "allowedRoutes": true, + "hostname": { + "type": "string" + }, + "name": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "protocol": { + "type": "string" + }, + "tls": true + }, + "required": [ + "name", + "port", + "protocol" + ], + "type": "object" + }, + "ListenerStatus": { + "additionalProperties": false, + "properties": { + "attachedRoutes": { + "type": "integer" + }, + "conditions": { + "items": { + "$ref": "#/$defs/Condition" + }, + "type": "array" + }, + "name": { + "type": "string" + }, + "programmed": { + "type": "boolean" + } + }, + "required": [ + "name", + "attachedRoutes", + "programmed" + ], + "type": "object" + }, + "PolicyAncestorStatus": { + "additionalProperties": false, + "properties": { + "ancestorRef": true, + "conditions": { + "items": { + "$ref": "#/$defs/Condition" + }, + "type": "array" + }, + "controllerName": { + "type": "string" + } + }, + "type": "object" + }, + "PolicyStatus": { + "additionalProperties": false, + "properties": { + "ancestors": { + "items": { + "$ref": "#/$defs/PolicyAncestorStatus" + }, + "type": "array" + } + }, + "type": "object" + }, + "RouteParentStatus": { + "additionalProperties": false, + "properties": { + "conditions": { + "items": { + "$ref": "#/$defs/Condition" + }, + "type": "array" + }, + "controllerName": { + "type": "string" + }, + "parentRef": true + }, + "type": "object" + }, + "RouteStatus": { + "additionalProperties": false, + "properties": { + "parents": { + "items": { + "$ref": "#/$defs/RouteParentStatus" + }, + "type": "array" + } + }, + "type": "object" + }, + "State": { + "additionalProperties": false, + "properties": { + "$schema": { + "type": "string" + }, + "backendTrafficPolicies": { + "items": { + "$ref": "#/$defs/BackendTrafficPolicy" + }, + "type": "array" + }, + "clientTrafficPolicies": { + "items": { + "$ref": "#/$defs/ClientTrafficPolicy" + }, + "type": "array" + }, + "fetchedAt": { + "type": "string" + }, + "gatewayClass": { + "$ref": "#/$defs/GatewayClass" + }, + "gateways": { + "items": { + "$ref": "#/$defs/Gateway" + }, + "type": "array" + }, + "grpcRoutes": { + "items": { + "$ref": "#/$defs/GRPCRoute" + }, + "type": "array" + }, + "httpRoutes": { + "items": { + "$ref": "#/$defs/HTTPRoute" + }, + "type": "array" + } + }, + "required": [ + "gateways", + "httpRoutes", + "grpcRoutes", + "clientTrafficPolicies", + "backendTrafficPolicies", + "fetchedAt" + ], + "type": "object" + } + }, + "$id": "https://yolean.se/y-cluster/schema/gateway-state.schema.json", + "$ref": "#/$defs/State", + "$schema": "https://json-schema.org/draft/2020-12/schema" +} diff --git a/pkg/gateway/state_test.go b/pkg/gateway/state_test.go new file mode 100644 index 0000000..3e9d837 --- /dev/null +++ b/pkg/gateway/state_test.go @@ -0,0 +1,146 @@ +package gateway + +import ( + "encoding/json" + "strings" + "testing" +) + +// TestExtractTargetRefs covers the policy targetRef shape +// flattening: envoy-gateway's CRDs accept both `targetRefs` +// (plural array) and the legacy singular `targetRef` shape; +// our snapshot normalizes to a single `targetRefs` array so +// consumers don't have to branch. +func TestExtractTargetRefs(t *testing.T) { + cases := []struct { + name string + spec string + want string // JSON shape we want, "" for nil + }{ + { + name: "plural targetRefs passes through", + spec: `{"targetRefs":[{"kind":"Gateway","name":"y-cluster"}]}`, + want: `[{"kind":"Gateway","name":"y-cluster"}]`, + }, + { + name: "singular targetRef wrapped to single-element array", + spec: `{"targetRef":{"kind":"Gateway","name":"y-cluster"}}`, + want: `[{"kind":"Gateway","name":"y-cluster"}]`, + }, + { + name: "neither field present", + spec: `{"clientIPDetection":{}}`, + want: "", + }, + { + name: "empty spec", + spec: ``, + want: "", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := extractTargetRefs(json.RawMessage(tc.spec)) + if tc.want == "" { + if got != nil { + t.Errorf("expected nil, got %s", got) + } + return + } + if !jsonEqual(t, got, []byte(tc.want)) { + t.Errorf("got %s, want %s", got, tc.want) + } + }) + } +} + +// TestHasTrueCondition pins the case-insensitive Status compare +// (k8s convention is "True" but we don't want a stray "true" +// from a controller bug to silently flip Programmed to false). +func TestHasTrueCondition(t *testing.T) { + conds := []rawCondition{ + {Type: "Accepted", Status: "True"}, + {Type: "Programmed", Status: "true"}, + {Type: "ResolvedRefs", Status: "False"}, + } + if !hasTrueCondition(conds, "Accepted") { + t.Error("Accepted=True should be true") + } + if !hasTrueCondition(conds, "Programmed") { + t.Error("Programmed=true (lowercase) should be true (case-insensitive)") + } + if hasTrueCondition(conds, "ResolvedRefs") { + t.Error("ResolvedRefs=False should be false") + } + if hasTrueCondition(conds, "Missing") { + t.Error("missing condition should be false") + } +} + +// TestSchemaIDOnState locks the $schema serialization on a +// freshly-marshalled State. Consumers compare against the +// SchemaID constant; if these drift the schema doc would +// validate fine but the produced JSON wouldn't reference it. +func TestSchemaIDOnState(t *testing.T) { + st := &State{SchemaID: SchemaID} + out, err := json.Marshal(st) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(out), `"$schema":"`+SchemaID+`"`) { + t.Errorf("$schema not surfaced in JSON output: %s", out) + } +} + +// TestStateZeroValueMarshals: the zero value must produce +// stable, parseable JSON with empty slices (not null) so +// consumers can index without nil-checking. +func TestStateZeroValueMarshals(t *testing.T) { + st := &State{ + Gateways: []Gateway{}, + HTTPRoutes: []HTTPRoute{}, + GRPCRoutes: []GRPCRoute{}, + ClientTrafficPolicies: []ClientTrafficPolicy{}, + BackendTrafficPolicies: []BackendTrafficPolicy{}, + FetchedAt: "2026-05-06T00:00:00Z", + SchemaID: SchemaID, + } + out, err := json.Marshal(st) + if err != nil { + t.Fatal(err) + } + for _, want := range []string{ + `"gateways":[]`, + `"httpRoutes":[]`, + `"grpcRoutes":[]`, + `"clientTrafficPolicies":[]`, + `"backendTrafficPolicies":[]`, + `"fetchedAt":"2026-05-06T00:00:00Z"`, + } { + if !strings.Contains(string(out), want) { + t.Errorf("zero-value JSON missing %q\nfull: %s", want, out) + } + } +} + +// jsonEqual compares two JSON byte slices for structural +// equality (ignoring whitespace + key order). +func jsonEqual(t *testing.T, a, b []byte) bool { + t.Helper() + var ax, bx any + if err := json.Unmarshal(a, &ax); err != nil { + t.Fatal(err) + } + if err := json.Unmarshal(b, &bx); err != nil { + t.Fatal(err) + } + ar, err := json.Marshal(ax) + if err != nil { + t.Fatal(err) + } + br, err := json.Marshal(bx) + if err != nil { + t.Fatal(err) + } + return string(ar) == string(br) +} diff --git a/pkg/provision/qemu/export.go b/pkg/provision/qemu/export.go index 74681c1..f611394 100644 --- a/pkg/provision/qemu/export.go +++ b/pkg/provision/qemu/export.go @@ -238,6 +238,24 @@ func Export(ctx context.Context, opts ExportOptions) error { return fmt.Errorf("copy keypair: %w", err) } + // Copy the reconciled gateway-state.json that PrepareExport + // dumped during its live phase. Best-effort: a build that + // skipped prepare-export (or one that ran before this file + // existed) won't have the sibling -- log + skip rather than + // fail the export. The maintainer / customer can re-run + // `y-cluster gateway state` against any boot of the appliance + // to regenerate. + gatewayStateSrc := filepath.Join(cfg.CacheDir, cfg.Name+"-gateway-state.json") + gatewayStateDst := filepath.Join(opts.BundleDir, "gateway-state.json") + if err := copyFile(gatewayStateSrc, gatewayStateDst, 0o644); err != nil { + if os.IsNotExist(err) { + logger.Warn("gateway-state.json not found in cache; bundle ships without it -- run y-cluster prepare-export against a running cluster to produce one", + zap.String("expected", gatewayStateSrc)) + } else { + return fmt.Errorf("copy gateway-state.json: %w", err) + } + } + vmdkSub := opts.VMDKSubformat if vmdkSub == "" { vmdkSub = VMDKSubformatDefault diff --git a/pkg/provision/qemu/prepare_export.go b/pkg/provision/qemu/prepare_export.go index 45b4d02..6d57ac9 100644 --- a/pkg/provision/qemu/prepare_export.go +++ b/pkg/provision/qemu/prepare_export.go @@ -3,12 +3,15 @@ package qemu import ( "context" _ "embed" + "encoding/json" "fmt" "os" "os/exec" "path/filepath" "go.uber.org/zap" + + "github.com/Yolean/y-cluster/pkg/gateway" ) // prepareInguestScript is the shared identity-reset script. The @@ -30,22 +33,33 @@ import ( //go:embed prepare_inguest.sh var prepareInguestScript string -// PrepareExport strips host-specific identity from the offline -// disk image so the same disk boots cleanly when imported on a -// different hypervisor (VMware, KVM, cloud providers). It uses -// libguestfs's virt-customize to mount the qcow2 (no boot, no -// SSH, no host kernel involvement) and run the embedded -// prepare-inguest.sh script inside the chrooted filesystem. +// PrepareExport prepares the cluster's qcow2 for shipping as an +// appliance image. Runs in two phases: +// +// - LIVE phase (cluster running): clears the per-deploy +// yolean.se/dns-hint-ip GatewayClass annotation so the +// customer's snapshot doesn't carry our LB IP, then dumps +// the reconciled Gateway state to /- +// gateway-state.json so the bundle ships a record of what +// the appliance looked like at export time. Then stops +// the cluster. +// - OFFLINE phase (cluster stopped): builds the data-seed +// tarball + runs virt-customize to identity-reset the +// filesystem, same as the prior behavior. // -// The same script also runs on the Hetzner Packer build path -// (inline, in a live VM); see prepareInguestScript above. +// The same shared inguest script also runs on the Hetzner +// Packer build path (inline, in a live VM); see +// prepareInguestScript above. // -// VM must be stopped first; virt-customize refuses to operate -// on a disk in use by a running qemu. Run order: +// VM MUST BE RUNNING when invoked. Earlier versions of +// PrepareExport required the VM to be stopped first (operator +// ran `y-cluster stop && y-cluster prepare-export`). The new +// live-phase steps need the apiserver, so callers should drop +// the explicit `y-cluster stop` -- prepare-export stops the VM +// itself between the live and offline phases. Reordered run: // // y-cluster provision -// y-cluster stop -// y-cluster prepare-export +// y-cluster prepare-export # prepare-export now stops internally // // Idempotent. A prepared appliance is no longer a usable dev // cluster locally; the next start runs cloud-init re-init and @@ -55,6 +69,19 @@ func PrepareExport(ctx context.Context, cacheDir, name string, logger *zap.Logge if logger == nil { logger = zap.NewNop() } + // Preflight tool checks first: surface "missing tool" errors + // before doing any cluster-side work that we'd then have to + // undo. virt-customize is needed in the offline phase; + // kubectl is needed in the live phase. Both should be + // addressable by a single `apt install` so it's reasonable + // to surface either error up front. + if _, err := exec.LookPath("virt-customize"); err != nil { + return fmt.Errorf("virt-customize not found in PATH; install with: sudo apt install libguestfs-tools") + } + if _, err := exec.LookPath("kubectl"); err != nil { + return fmt.Errorf("kubectl not found in PATH; install kubectl (prepare-export now snapshots reconciled Gateway state, which needs kubectl)") + } + cfg, err := loadState(cacheDir, name) if err != nil { if os.IsNotExist(err) { @@ -62,17 +89,46 @@ func PrepareExport(ctx context.Context, cacheDir, name string, logger *zap.Logge } return fmt.Errorf("load state: %w", err) } - if running, _ := cfg.IsRunning(); running { - return fmt.Errorf("VM %q is running; run `y-cluster stop` first (virt-customize needs an offline disk)", name) - } - if _, err := exec.LookPath("virt-customize"); err != nil { - return fmt.Errorf("virt-customize not found in PATH; install with: sudo apt install libguestfs-tools") + if running, _ := cfg.IsRunning(); !running { + return fmt.Errorf("VM %q is not running; start the cluster first (prepare-export now needs the apiserver up to snapshot reconciled Gateway state and clear the per-deploy dns-hint-ip annotation -- it stops the VM internally before the offline phase)", name) } diskPath := filepath.Join(cfg.CacheDir, cfg.Name+".qcow2") if _, err := os.Stat(diskPath); err != nil { return fmt.Errorf("disk image not found at %s: %w", diskPath, err) } + // --- LIVE phase --- + // Clear the per-deploy dns-hint-ip annotation so the snapshot + // doesn't ship our LB IP. Then dump reconciled gateway state + // for the bundle. Both steps need the apiserver up. + logger.Info("clearing yolean.se/dns-hint-ip annotation on GatewayClass", + zap.String("context", cfg.Context)) + if err := gateway.ClearDNSHintIPAnnotation(ctx, cfg.Context, "y-cluster"); err != nil { + return fmt.Errorf("clear dns-hint-ip: %w", err) + } + gatewayStatePath := filepath.Join(cacheDir, name+"-gateway-state.json") + logger.Info("snapshotting reconciled gateway state", zap.String("path", gatewayStatePath)) + state, err := gateway.Fetch(ctx, cfg.Context) + if err != nil { + return fmt.Errorf("fetch gateway state: %w", err) + } + stateJSON, err := json.MarshalIndent(state, "", " ") + if err != nil { + return fmt.Errorf("marshal gateway state: %w", err) + } + if err := os.WriteFile(gatewayStatePath, append(stateJSON, '\n'), 0o644); err != nil { + return fmt.Errorf("write gateway state: %w", err) + } + + // Stop the VM. virt-customize (offline phase) needs the disk + // not in use by qemu; libguestfs does its own loopback mount. + logger.Info("stopping VM before offline phase", zap.String("name", name)) + if err := Stop(cacheDir, name, logger); err != nil { + return fmt.Errorf("stop VM: %w", err) + } + + // --- OFFLINE phase --- + scriptPath, err := WritePrepareInguestScript("") if err != nil { return fmt.Errorf("write prepare script: %w", err) diff --git a/pkg/provision/qemu/prepare_export_test.go b/pkg/provision/qemu/prepare_export_test.go index 14648fd..b52a749 100644 --- a/pkg/provision/qemu/prepare_export_test.go +++ b/pkg/provision/qemu/prepare_export_test.go @@ -3,7 +3,6 @@ package qemu import ( "context" "os" - "path/filepath" "strings" "testing" ) @@ -206,28 +205,30 @@ func TestPrepareExport_NoSavedState(t *testing.T) { } } -// TestPrepareExport_VMRunning exercises the IsRunning guard: a -// stale-but-live pidfile (we write our own pid into it) must -// trigger the "run y-cluster stop first" error rather than -// blindly invoking virt-customize on a busy disk. -func TestPrepareExport_VMRunning(t *testing.T) { +// TestPrepareExport_VMNotRunning exercises the new running-state +// precondition: prepare-export needs the cluster up so it can +// clear the dns-hint-ip annotation + snapshot reconciled Gateway +// state. A stopped cluster (no pidfile, IsRunning false) must +// produce an error pointing the operator at `start`, not bubble +// up a generic libguestfs/kubectl failure later. +func TestPrepareExport_VMNotRunning(t *testing.T) { cacheDir := t.TempDir() cfg := defaultedRuntimeConfig(t) cfg.CacheDir = cacheDir if err := saveState(cfg); err != nil { t.Fatal(err) } - pidFile := filepath.Join(cacheDir, cfg.Name+".pid") - if err := os.WriteFile(pidFile, []byte("1\n"), 0o644); err != nil { - t.Fatal(err) - } + // No pidfile -> IsRunning() reports false. err := PrepareExport(context.Background(), cacheDir, cfg.Name, nil) if err == nil { - t.Fatal("expected error when VM still running") + t.Fatal("expected error when VM not running") + } + if !strings.Contains(err.Error(), "not running") { + t.Errorf("error should call out the not-running state: %v", err) } - if !strings.Contains(err.Error(), "y-cluster stop") { - t.Errorf("error should hint at stop: %v", err) + if !strings.Contains(err.Error(), "start the cluster") { + t.Errorf("error should hint at start: %v", err) } } From 11b35c434bab996fbacde31e70391d86a30f192e Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Wed, 6 May 2026 10:46:04 +0000 Subject: [PATCH 2/7] feat(gateway): schemaVersion top-level prop + enum-of-one in schema Adds a `schemaVersion` field to the gateway state JSON (currently "1") and constrains it via a single-value enum on the generated schema. Lets a consumer reading the JSON detect a snapshot shape they don't recognise -- the enum value at fetch time must match what the consumer's copy of the schema doc expects, or validation fails. The schema URL stays UNVERSIONED (the `$id` / `https://yolean.se/y-cluster/schema/gateway-state.schema.json` always points at the current schema doc). Per-document version comes from the new schemaVersion field. Backward-incompatible shape changes (renames, removals) require: bump `gateway.SchemaVersion`, regenerate the schema (so the enum catches up), update consumers in lockstep. Old snapshots remain identifiable by their schemaVersion field; they would validate against an archived copy of the previous schema doc once we need to serve one. Additive changes (new omitempty fields) do NOT require a bump. Implementation: - `gateway.SchemaVersion = "1"` exported constant. - `State.SchemaVersion` field (json:"schemaVersion"), populated by Fetch() from the constant. - `cmd/internal/schemagen` gains an `enumPin` post-process helper -- a small (DefName, PropName, Values) tuple -- plumbed through `writeOutputSchema` as variadic. The gateway-state schema is the only consumer today; the helper generalises cleanly for future single-value enums on other output schemas. - `pkg/gateway/state.schema.json` regenerated with the `schemaVersion: { enum: ["1"], type: "string" }` constraint. Tests: - TestStateZeroValueMarshals updated to assert `"schemaVersion":"1"` in the marshalled output. - TestSchemaVersionMatchesEnum reads back the regenerated schema doc and asserts its schemaVersion enum equals [SchemaVersion]. Fails fast in CI if the constant gets bumped without a `go generate` follow-up. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/internal/schemagen/main.go | 65 ++++++++++++++++++++++++++++++++-- pkg/gateway/state.go | 27 ++++++++++++++ pkg/gateway/state.schema.json | 9 ++++- pkg/gateway/state_test.go | 37 ++++++++++++++++++- 4 files changed, 134 insertions(+), 4 deletions(-) diff --git a/cmd/internal/schemagen/main.go b/cmd/internal/schemagen/main.go index 84dd5b6..6c51879 100644 --- a/cmd/internal/schemagen/main.go +++ b/cmd/internal/schemagen/main.go @@ -118,7 +118,17 @@ func run() error { // for input/config schemas). Add new outputs below as more // y-cluster commands publish stable JSON contracts. gatewayStateOut := filepath.Join(root, "pkg", "gateway", "state.schema.json") - if err := writeOutputSchema(gatewayStateOut, &gateway.State{}, gateway.SchemaID); err != nil { + // schemaVersion is a top-level field on State; pin it to + // the SOURCE version constant via an enum-of-one on the + // generated schema. A future SchemaVersion bump means + // updating that constant + this enum in lockstep; old + // snapshots would then validate against the previous + // version of the schema doc (which can be served from a + // versioned URL once we need it -- the canonical URL stays + // unversioned). + if err := writeOutputSchema(gatewayStateOut, &gateway.State{}, gateway.SchemaID, + enumPin{DefName: "State", PropName: "schemaVersion", Values: []string{gateway.SchemaVersion}}, + ); err != nil { return fmt.Errorf("generate %s: %w", gatewayStateOut, err) } fmt.Printf("wrote %s\n", gatewayStateOut) @@ -126,6 +136,16 @@ func run() error { return nil } +// enumPin is one (definition, property, values) tuple for the +// schemagen output-schema post-processor. Use it to constrain a +// reflected property to a fixed enum (typically a single-value +// enum representing a schema-version stamp). +type enumPin struct { + DefName string + PropName string + Values []string +} + // writeOutputSchema reflects a non-provider Go struct into a // standalone JSON Schema file. Differs from writeProviderSchema // in two ways: @@ -140,7 +160,13 @@ func run() error { // The schemaID is written into the schema's $id so consumers // can validate by URL reference. SchemaID values live in the // source package as exported constants (e.g. gateway.SchemaID). -func writeOutputSchema(outPath string, sample any, schemaID string) error { +// +// enumPins post-process the reflected schema to constrain +// specific properties to a fixed enum -- used today for +// schema-version stamping (single-value enum so consumers +// validate the snapshot's declared version against the schema +// they hold). +func writeOutputSchema(outPath string, sample any, schemaID string, enumPins ...enumPin) error { r := &jsonschema.Reflector{ AllowAdditionalProperties: false, DoNotReference: false, @@ -162,9 +188,44 @@ func writeOutputSchema(outPath string, sample any, schemaID string) error { if err != nil { return err } + for _, pin := range enumPins { + data, err = injectFieldEnum(data, pin.DefName, pin.PropName, pin.Values) + if err != nil { + return fmt.Errorf("inject enum on %s.%s: %w", pin.DefName, pin.PropName, err) + } + } return os.WriteFile(outPath, append(data, '\n'), 0o644) } +// injectFieldEnum sets `enum` on the named property of the named +// definition under $defs. Errors when the definition or property +// can't be found -- a typo there is a real bug in the generator +// wiring, not a runtime data shape question. +func injectFieldEnum(data []byte, defName, propName string, values []string) ([]byte, error) { + var doc map[string]any + if err := json.Unmarshal(data, &doc); err != nil { + return nil, err + } + defs, ok := doc["$defs"].(map[string]any) + if !ok { + return nil, fmt.Errorf("$defs missing or wrong type") + } + def, ok := defs[defName].(map[string]any) + if !ok { + return nil, fmt.Errorf("definition %q missing under $defs", defName) + } + props, ok := def["properties"].(map[string]any) + if !ok { + return nil, fmt.Errorf("properties missing on %s", defName) + } + prop, ok := props[propName].(map[string]any) + if !ok { + return nil, fmt.Errorf("property %q missing on %s", propName, defName) + } + prop["enum"] = stringSliceToAny(values) + return json.MarshalIndent(doc, "", " ") +} + // setSchemaID rewrites the top-level $id field of the schema // document. Returns the raw JSON bytes with the new $id. func setSchemaID(data []byte, schemaID string) ([]byte, error) { diff --git a/pkg/gateway/state.go b/pkg/gateway/state.go index b6ba55c..bf8131b 100644 --- a/pkg/gateway/state.go +++ b/pkg/gateway/state.go @@ -39,8 +39,24 @@ import ( // SchemaID is the canonical $id for the generated JSON Schema. // Kept in sync with the file at pkg/gateway/state.schema.json // (regenerated by `go generate ./pkg/provision/...`). +// +// The URL is intentionally UNVERSIONED -- it always points at the +// current schema doc. Per-document version comes from +// SchemaVersion below, which the schema constrains to a single +// value via an enum-of-one. Bumping the schema (a backward- +// incompatible change to the JSON shape) means: increment +// SchemaVersion, update the schemagen post-process to enum the +// new value, regenerate the schema. Old snapshots remain +// readable + identifiable by their schemaVersion field; new +// snapshots are flagged by the new enum value. const SchemaID = "https://yolean.se/y-cluster/schema/gateway-state.schema.json" +// SchemaVersion is the current snapshot-shape version. Bumped +// when the JSON shape of State changes in a backward-incompatible +// way (e.g. removing or renaming a field). Additive changes +// (new optional field with omitempty) do NOT require a bump. +const SchemaVersion = "1" + // State is the top-level snapshot a y-cluster gateway state call // produces. The shape is a stable contract -- consumers (the // appliance's gateway-state.json bundled at prepare-export, the @@ -91,6 +107,16 @@ type State struct { // validate against without the consumer having to know // the y-cluster version that produced it. SchemaID string `json:"$schema,omitempty"` + + // SchemaVersion stamps the snapshot with the version of + // the JSON shape it conforms to. Constrained to a single + // value (currently "1") via an enum on the generated + // schema, so a consumer reading the JSON can reject an + // unexpected version explicitly rather than parse a stale + // shape blindly. Bumped only on backward-incompatible + // changes (renames, removals); additive (new omitempty + // fields) doesn't require it. + SchemaVersion string `json:"schemaVersion"` } // GatewayClass is the partial GatewayClass shape we surface. @@ -257,6 +283,7 @@ func Fetch(ctx context.Context, kubectlContext string) (*State, error) { BackendTrafficPolicies: []BackendTrafficPolicy{}, FetchedAt: time.Now().UTC().Format(time.RFC3339), SchemaID: SchemaID, + SchemaVersion: SchemaVersion, } gc, err := fetchGatewayClass(ctx, kubectlContext) diff --git a/pkg/gateway/state.schema.json b/pkg/gateway/state.schema.json index 2872712..9eb5c36 100644 --- a/pkg/gateway/state.schema.json +++ b/pkg/gateway/state.schema.json @@ -355,6 +355,12 @@ "$ref": "#/$defs/HTTPRoute" }, "type": "array" + }, + "schemaVersion": { + "enum": [ + "1" + ], + "type": "string" } }, "required": [ @@ -363,7 +369,8 @@ "grpcRoutes", "clientTrafficPolicies", "backendTrafficPolicies", - "fetchedAt" + "fetchedAt", + "schemaVersion" ], "type": "object" } diff --git a/pkg/gateway/state_test.go b/pkg/gateway/state_test.go index 3e9d837..0489f77 100644 --- a/pkg/gateway/state_test.go +++ b/pkg/gateway/state_test.go @@ -2,6 +2,7 @@ package gateway import ( "encoding/json" + "os" "strings" "testing" ) @@ -94,7 +95,9 @@ func TestSchemaIDOnState(t *testing.T) { // TestStateZeroValueMarshals: the zero value must produce // stable, parseable JSON with empty slices (not null) so -// consumers can index without nil-checking. +// consumers can index without nil-checking. Also pins the +// schemaVersion string -- consumers depend on the value +// matching the enum on the schema doc. func TestStateZeroValueMarshals(t *testing.T) { st := &State{ Gateways: []Gateway{}, @@ -104,6 +107,7 @@ func TestStateZeroValueMarshals(t *testing.T) { BackendTrafficPolicies: []BackendTrafficPolicy{}, FetchedAt: "2026-05-06T00:00:00Z", SchemaID: SchemaID, + SchemaVersion: SchemaVersion, } out, err := json.Marshal(st) if err != nil { @@ -116,6 +120,7 @@ func TestStateZeroValueMarshals(t *testing.T) { `"clientTrafficPolicies":[]`, `"backendTrafficPolicies":[]`, `"fetchedAt":"2026-05-06T00:00:00Z"`, + `"schemaVersion":"1"`, } { if !strings.Contains(string(out), want) { t.Errorf("zero-value JSON missing %q\nfull: %s", want, out) @@ -123,6 +128,36 @@ func TestStateZeroValueMarshals(t *testing.T) { } } +// TestSchemaVersionMatchesEnum guards the lock between the +// SchemaVersion source constant and the enum value embedded in +// the generated schema. When the constant gets bumped, the +// schema needs to be regenerated; this test fails fast if they +// drift apart in CI before the regenerate step runs. +func TestSchemaVersionMatchesEnum(t *testing.T) { + // The schema doc is generated by `go generate ./pkg/provision/...` + // from the same source constant. Read it back and assert the + // schemaVersion enum contains exactly SchemaVersion. + data, err := os.ReadFile("state.schema.json") + if err != nil { + t.Fatalf("read state.schema.json: %v", err) + } + var doc map[string]any + if err := json.Unmarshal(data, &doc); err != nil { + t.Fatalf("parse schema: %v", err) + } + defs := doc["$defs"].(map[string]any) + state := defs["State"].(map[string]any) + props := state["properties"].(map[string]any) + sv := props["schemaVersion"].(map[string]any) + enum, ok := sv["enum"].([]any) + if !ok { + t.Fatalf("schemaVersion missing enum constraint: %v", sv) + } + if len(enum) != 1 || enum[0] != SchemaVersion { + t.Errorf("schemaVersion enum mismatch: enum=%v want=[%q] -- did you bump SchemaVersion without re-running go generate?", enum, SchemaVersion) + } +} + // jsonEqual compares two JSON byte slices for structural // equality (ignoring whitespace + key order). func jsonEqual(t *testing.T, a, b []byte) bool { From 8a2bfe3b22f6e97e519e49d27279d49320b1f81f Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Wed, 6 May 2026 14:59:56 +0200 Subject: [PATCH 3/7] review: fix CI lint + qemu test PATH dependency Two CI failures on this PR (run 25433079702): - lint (staticcheck S1016) on pkg/gateway/fetch.go: toConditions and the listener projection in fetchGateways used full struct literals to copy from raw* types into the exported types they shadow field-for-field. Identical-shape conversions are clearer and what staticcheck flags. Replaced with `Condition(c)` and `Listener(l)`. - pkg/provision/qemu test failures on ubuntu-latest: TestPrepareExport_NoSavedState and TestPrepareExport_VMNotRunning passed locally on hosts with libguestfs-tools installed but failed on stock GHA runners because PrepareExport's first step is a virt-customize LookPath guard. The tests want to assert the saved-state and not-running error paths that come AFTER the LookPath guards, so we stub virt-customize + kubectl on PATH (empty shims; the assertion-target branches return before invoking either binary). The existing TestPrepareExport_MissingVirtCustomize keeps its explicit PATH="" override and still proves the LookPath hint fires when the binary is genuinely absent. Other notes from reviewing the PR (no changes needed, just flagging things I confirmed are sound): - The pkg/gateway split between rawCondition / Condition (and the parallel Listener / rawListener pair) is intentional -- rawCondition is the kubectl-JSON unmarshal target, Condition is the public output type. They happen to be identical today; keeping them separate gives room to project / filter without breaking consumers when the kubectl shape evolves. - schemagen now writes two distinct kinds of schema: provision-config schemas under pkg/provision/schema/ and output schemas alongside the Go type that produces them (e.g. pkg/gateway/state.schema.json). The split is documented in the package doc and the gen path is symmetric with the per-provider one. - The unversioned $id + enum-of-one schemaVersion pattern on gateway.State (SchemaVersion = "1") is the right shape for forward compatibility: the canonical URL stays stable, the version stamp identifies any given snapshot, and a future bump means versioning the schema doc URL while leaving the unversioned pointer at the latest. - gateway.Fetch issues one kubectl invocation per kind. That's fine for the volume here (~6 kinds) but a single `kubectl get gatewayclass,gateway,httproute,... -A -o json` is a non-blocking follow-up if the round-trip count ever matters. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/gateway/fetch.go | 11 ++------ pkg/provision/qemu/prepare_export_test.go | 33 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pkg/gateway/fetch.go b/pkg/gateway/fetch.go index 2885d8c..a406fed 100644 --- a/pkg/gateway/fetch.go +++ b/pkg/gateway/fetch.go @@ -40,7 +40,7 @@ func toConditions(in []rawCondition) []Condition { } out := make([]Condition, len(in)) for i, c := range in { - out[i] = Condition{Type: c.Type, Status: c.Status, Reason: c.Reason, Message: c.Message} + out[i] = Condition(c) } return out } @@ -125,14 +125,7 @@ func fetchGateways(ctx context.Context, kubectlContext string, out *State) error } listeners := make([]Listener, len(g.Spec.Listeners)) for i, l := range g.Spec.Listeners { - listeners[i] = Listener{ - Name: l.Name, - Port: l.Port, - Protocol: l.Protocol, - Hostname: l.Hostname, - AllowedRoutes: l.AllowedRoutes, - TLS: l.TLS, - } + listeners[i] = Listener(l) } listenerStatus := make([]ListenerStatus, len(g.Status.Listeners)) for i, ls := range g.Status.Listeners { diff --git a/pkg/provision/qemu/prepare_export_test.go b/pkg/provision/qemu/prepare_export_test.go index b52a749..23bbde7 100644 --- a/pkg/provision/qemu/prepare_export_test.go +++ b/pkg/provision/qemu/prepare_export_test.go @@ -3,10 +3,41 @@ package qemu import ( "context" "os" + "path/filepath" + "runtime" "strings" "testing" ) +// stubPrepareExportTools writes empty executable shims named +// `virt-customize` and `kubectl` to a fresh temp dir and prepends +// that dir to $PATH for the test. PrepareExport's two LookPath +// guards then resolve them as present, so the test reaches the +// state/precondition checks it actually wants to assert against. +// +// On hosts that already have libguestfs-tools installed (developer +// Linux/macOS) the LookPath would have passed anyway. On +// ubuntu-latest GHA runners virt-customize is not present and +// without this shim the test fails at the apt-install hint +// instead of exercising its real target. The shim bodies are +// empty: PrepareExport's no-saved-state and not-running branches +// return before invoking either binary. +// +// Same shape as fakeKubectlOnPATH in pkg/yconverge/kubectl_test.go. +func stubPrepareExportTools(t *testing.T) { + t.Helper() + if runtime.GOOS == "windows" { + t.Skip("PATH stub helper is /bin/sh-only") + } + dir := t.TempDir() + for _, name := range []string{"virt-customize", "kubectl"} { + if err := os.WriteFile(filepath.Join(dir, name), []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatal(err) + } + } + t.Setenv("PATH", dir+string(os.PathListSeparator)+os.Getenv("PATH")) +} + // TestPrepareInguestScript_NoMACPinning is the regression guard // for the original Hetzner failure: nothing in the embedded // netplan body anchors a specific MAC. The script lands the @@ -196,6 +227,7 @@ func TestWritePrepareInguestScript(t *testing.T) { // branch: the error must point the user at `y-cluster provision`, // not bubble up an opaque os.IsNotExist. func TestPrepareExport_NoSavedState(t *testing.T) { + stubPrepareExportTools(t) err := PrepareExport(context.Background(), t.TempDir(), "missing", nil) if err == nil { t.Fatal("expected error when no saved state exists") @@ -212,6 +244,7 @@ func TestPrepareExport_NoSavedState(t *testing.T) { // produce an error pointing the operator at `start`, not bubble // up a generic libguestfs/kubectl failure later. func TestPrepareExport_VMNotRunning(t *testing.T) { + stubPrepareExportTools(t) cacheDir := t.TempDir() cfg := defaultedRuntimeConfig(t) cfg.CacheDir = cacheDir From e251a7ffc987b520487e8085053986bbab8a850d Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Wed, 6 May 2026 14:04:35 +0000 Subject: [PATCH 4/7] feat(gateway): industry-term Summary + envoy /config_dump in State The raw reconciled-resource dump is hard to consume directly. Add two top-level fields on the state JSON: - summary: a fully-typed routing-tree projection in industry-neutral terms (listener -> host -> route -> match/backend). numTrustedHops + trustedCIDRs surface at listener level, where ClientTrafficPolicy actually attaches. Routes without a hostname bucket under "*" (sorted last). GRPC method matches render as "Method=Type:Service/Method" in the same Path field as HTTP path matches. - envoy: a sample of dataplane state (version + verbatim /config_dump) from any one envoy-gateway proxy pod. envoy admin binds 127.0.0.1:19000 inside a distroless container, so we kubectl port-forward (kubelet's apiserver /pods/:19000/proxy can't reach localhost-bound ports). Best-effort: skipped silently when no proxy pod runs yet. Summary is unit-tested with Gateway API payloads + an empty envoy object as input. Envoy.config is schema-typed as type=object via a jsonschema struct tag. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/gateway/envoy.go | 253 ++++++++++++++ pkg/gateway/state.go | 31 ++ pkg/gateway/state.schema.json | 203 +++++++++++ pkg/gateway/summary.go | 612 ++++++++++++++++++++++++++++++++++ pkg/gateway/summary_test.go | 483 +++++++++++++++++++++++++++ 5 files changed, 1582 insertions(+) create mode 100644 pkg/gateway/envoy.go create mode 100644 pkg/gateway/summary.go create mode 100644 pkg/gateway/summary_test.go diff --git a/pkg/gateway/envoy.go b/pkg/gateway/envoy.go new file mode 100644 index 0000000..d243f4f --- /dev/null +++ b/pkg/gateway/envoy.go @@ -0,0 +1,253 @@ +package gateway + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "strconv" + "strings" + "time" +) + +// Envoy is a sample of dataplane state from one envoy-gateway +// proxy pod. Captured live: we pick any one Running pod +// matching the envoy-gateway managed-by label, port-forward to +// its admin port (19000), and pull /server_info + /config_dump. +// +// "Any one pod" is deliberate. envoy-gateway runs the same +// rendered config on every replica, so a single sample +// represents the dataplane truth. Sampling N pods would +// multiply the JSON size without adding signal in the common +// case (a divergence between replicas is itself a bug, not a +// shape consumers should design around). +type Envoy struct { + // Source identifies the proxy pod sampled, in + // "/" form. Documents WHERE the + // snapshot came from so a future maintainer can correlate + // against `kubectl describe pod` output. + Source string `json:"source,omitempty"` + + // Version is the envoy build version from /server_info + // (e.g. "1.34.1/Distribution/RELEASE/BoringSSL"). Empty + // when /server_info isn't reachable or doesn't carry a + // version field. + Version string `json:"version,omitempty"` + + // Config is the verbatim /config_dump JSON (an envoy + // ConfigDump message rendered as JSON). Loose-typed + // (object) so envoy admin schema drift across versions + // doesn't break our schema; consumers parse it as JSON. + Config json.RawMessage `json:"config,omitempty" jsonschema:"type=object"` +} + +// FetchEnvoy samples one envoy-gateway proxy pod's admin +// endpoints. Best-effort: returns (nil, nil) when no proxy pod +// is running yet (e.g. the cluster is pre-Gateway), and +// returns (nil, err) for transient kubectl / port-forward +// failures so the caller can decide whether to surface them. +// +// Implementation: kubectl port-forward to the pod's admin +// port. envoy admin binds 127.0.0.1:19000 inside the +// container, which means kubelet's pod-proxy (apiserver +// /pods/:19000/proxy) can't reach it (localhost in the +// pod is not reachable from the host network namespace). +// port-forward streams through kubelet's port-forward +// mechanism, which executes inside the pod's network +// namespace, so it CAN reach localhost-bound admin. +// +// envoy-gateway proxy images are distroless: kubectl exec +// curl is not an option. +func FetchEnvoy(ctx context.Context, kubectlContext string) (*Envoy, error) { + pod, err := pickEnvoyProxyPod(ctx, kubectlContext) + if err != nil { + return nil, fmt.Errorf("find envoy proxy pod: %w", err) + } + if pod == nil { + // Pre-install or pre-reconcile cluster -- not an error. + return nil, nil + } + + port, cancel, err := startEnvoyAdminPortForward(ctx, kubectlContext, pod.Namespace, pod.Name) + if err != nil { + return nil, fmt.Errorf("port-forward to %s/%s: %w", pod.Namespace, pod.Name, err) + } + defer cancel() + + base := fmt.Sprintf("http://127.0.0.1:%d", port) + serverInfo, err := httpGetBody(ctx, base+"/server_info") + if err != nil { + return nil, fmt.Errorf("GET /server_info: %w", err) + } + configDump, err := httpGetBody(ctx, base+"/config_dump") + if err != nil { + return nil, fmt.Errorf("GET /config_dump: %w", err) + } + + return &Envoy{ + Source: pod.Namespace + "/" + pod.Name, + Version: extractEnvoyVersion(serverInfo), + Config: json.RawMessage(configDump), + }, nil +} + +// envoyPod is the minimal pod identifier we need. +type envoyPod struct { + Namespace string + Name string +} + +// pickEnvoyProxyPod returns the first Running pod managed by +// envoy-gateway, or nil when none exists. We don't try to be +// clever about which pod: any replica's config is the same +// reconciled output, and a divergence between replicas would +// indicate a separate bug consumers can investigate via +// /pkg/gateway State. +func pickEnvoyProxyPod(ctx context.Context, kubectlContext string) (*envoyPod, error) { + out, err := runKubectl(ctx, kubectlContext, + "get", "pods", "-A", + "-l", "app.kubernetes.io/managed-by=envoy-gateway", + "--field-selector=status.phase=Running", + "-o", "json", + ) + if err != nil { + return nil, err + } + var list struct { + Items []struct { + Metadata struct { + Namespace string `json:"namespace"` + Name string `json:"name"` + } `json:"metadata"` + } `json:"items"` + } + if err := json.Unmarshal(out, &list); err != nil { + return nil, fmt.Errorf("unmarshal pods: %w", err) + } + if len(list.Items) == 0 { + return nil, nil + } + return &envoyPod{ + Namespace: list.Items[0].Metadata.Namespace, + Name: list.Items[0].Metadata.Name, + }, nil +} + +// startEnvoyAdminPortForward starts a `kubectl port-forward +// pod/ :19000` and waits for the "Forwarding from +// 127.0.0.1: -> 19000" line. Returns the chosen local +// port plus a cancel func the caller MUST defer to stop the +// background process. +// +// We let kubectl pick the local port (`:19000` syntax) so +// concurrent invocations don't collide. +func startEnvoyAdminPortForward(ctx context.Context, kubectlContext, namespace, podName string) (int, func(), error) { + pfCtx, cancel := context.WithCancel(ctx) + cmd := exec.CommandContext(pfCtx, "kubectl", + "--context="+kubectlContext, + "port-forward", + "-n", namespace, + "pod/"+podName, + ":19000", + ) + stdout, err := cmd.StdoutPipe() + if err != nil { + cancel() + return 0, nil, err + } + cmd.Stderr = os.Stderr + if err := cmd.Start(); err != nil { + cancel() + return 0, nil, err + } + + cleanup := func() { + cancel() + _ = cmd.Wait() + } + + portCh := make(chan int, 1) + errCh := make(chan error, 1) + go func() { + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + line := scanner.Text() + const prefix = "Forwarding from 127.0.0.1:" + idx := strings.Index(line, prefix) + if idx < 0 { + continue + } + rest := line[idx+len(prefix):] + portStr := strings.SplitN(rest, " ", 2)[0] + p, err := strconv.Atoi(portStr) + if err != nil { + errCh <- fmt.Errorf("parse port from %q: %w", line, err) + return + } + portCh <- p + // Keep draining stdout so the pipe doesn't fill up + // and block the kubectl process. + go func() { _, _ = io.Copy(io.Discard, stdout) }() + return + } + if err := scanner.Err(); err != nil { + errCh <- err + return + } + errCh <- errors.New("kubectl port-forward exited without printing forwarding line") + }() + + select { + case <-time.After(15 * time.Second): + cleanup() + return 0, nil, errors.New("timed out waiting for kubectl port-forward to bind") + case err := <-errCh: + cleanup() + return 0, nil, err + case p := <-portCh: + return p, cleanup, nil + } +} + +// httpGetBody is a context-aware GET that returns the body +// bytes or an error. Short timeout: envoy admin is local +// (loopback through port-forward) and should respond +// instantly; anything slower is a bug. +func httpGetBody(ctx context.Context, url string) ([]byte, error) { + reqCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("status %d: %s", resp.StatusCode, body) + } + return io.ReadAll(resp.Body) +} + +// extractEnvoyVersion pulls the version string out of an envoy +// /server_info response. The doc has a top-level `version` +// field plus a nested build identifier; we surface the top-level +// one (e.g. "1.34.1/Distribution/RELEASE/BoringSSL") which is +// what `envoy --version` prints. +func extractEnvoyVersion(serverInfo []byte) string { + var doc struct { + Version string `json:"version"` + } + if err := json.Unmarshal(serverInfo, &doc); err != nil { + return "" + } + return doc.Version +} diff --git a/pkg/gateway/state.go b/pkg/gateway/state.go index bf8131b..7bf1f77 100644 --- a/pkg/gateway/state.go +++ b/pkg/gateway/state.go @@ -97,6 +97,22 @@ type State struct { // for backend-bound behavior. BackendTrafficPolicies []BackendTrafficPolicy `json:"backendTrafficPolicies"` + // Envoy is a sample of dataplane state from one + // envoy-gateway proxy pod (version + verbatim + // /config_dump). Best-effort and optional: nil when no + // proxy pod is reachable. Consumers needing dataplane + // ground-truth (the actually-programmed envoy config) read + // this; consumers wanting the reconciled k8s view read the + // per-kind slices above. + Envoy *Envoy `json:"envoy,omitempty"` + + // Summary is a fully-typed, industry-term projection of + // the routing tree (listener -> host -> route -> + // match/backend) derived deterministically from the + // per-kind slices. The k8s-side data is the source of + // truth; Summary is the ergonomic view on top. + Summary *Summary `json:"summary,omitempty"` + // FetchedAt is the wall-clock at which the snapshot was // taken, in RFC 3339 format. Useful when comparing two // snapshots taken minutes apart during a debug session. @@ -307,6 +323,21 @@ func Fetch(ctx context.Context, kubectlContext string) (*State, error) { if err := fetchBackendTrafficPolicies(ctx, kubectlContext, st); err != nil { return nil, fmt.Errorf("fetch backendtrafficpolicy: %w", err) } + + // Summary is deterministic from the k8s-side slices we + // just populated -- always built. Envoy is best-effort: + // failing to reach a proxy pod doesn't fail the whole + // snapshot (the user might be running this against a + // pre-install cluster, or admin port-forward might be + // blocked by a network policy). Surfacing FetchEnvoy's + // error path to the caller would force them to special- + // case "no proxy yet" everywhere; quietly skipping is + // kinder. Real failures to investigate still print on + // stderr from kubectl itself. + st.Summary = BuildSummary(st) + if env, err := FetchEnvoy(ctx, kubectlContext); err == nil { + st.Envoy = env + } return st, nil } diff --git a/pkg/gateway/state.schema.json b/pkg/gateway/state.schema.json index 9eb5c36..07e96e8 100644 --- a/pkg/gateway/state.schema.json +++ b/pkg/gateway/state.schema.json @@ -68,6 +68,21 @@ ], "type": "object" }, + "Envoy": { + "additionalProperties": false, + "properties": { + "config": { + "type": "object" + }, + "source": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "type": "object" + }, "GRPCRoute": { "additionalProperties": false, "properties": { @@ -332,6 +347,9 @@ }, "type": "array" }, + "envoy": { + "$ref": "#/$defs/Envoy" + }, "fetchedAt": { "type": "string" }, @@ -361,6 +379,9 @@ "1" ], "type": "string" + }, + "summary": { + "$ref": "#/$defs/Summary" } }, "required": [ @@ -373,6 +394,188 @@ "schemaVersion" ], "type": "object" + }, + "Summary": { + "additionalProperties": false, + "properties": { + "listeners": { + "items": { + "$ref": "#/$defs/SummaryListener" + }, + "type": "array" + } + }, + "required": [ + "listeners" + ], + "type": "object" + }, + "SummaryBackend": { + "additionalProperties": false, + "properties": { + "redirect": { + "$ref": "#/$defs/SummaryRedirect" + }, + "service": { + "$ref": "#/$defs/SummaryService" + }, + "type": { + "type": "string" + } + }, + "required": [ + "type" + ], + "type": "object" + }, + "SummaryHost": { + "additionalProperties": false, + "properties": { + "hostname": { + "type": "string" + }, + "routes": { + "items": { + "$ref": "#/$defs/SummaryRoute" + }, + "type": "array" + } + }, + "required": [ + "hostname", + "routes" + ], + "type": "object" + }, + "SummaryListener": { + "additionalProperties": false, + "properties": { + "gateway": { + "type": "string" + }, + "hosts": { + "items": { + "$ref": "#/$defs/SummaryHost" + }, + "type": "array" + }, + "name": { + "type": "string" + }, + "numTrustedHops": { + "type": "integer" + }, + "port": { + "type": "integer" + }, + "programmed": { + "type": "boolean" + }, + "protocol": { + "type": "string" + }, + "trustedCIDRs": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "gateway", + "name", + "port", + "protocol", + "programmed", + "hosts" + ], + "type": "object" + }, + "SummaryMatch": { + "additionalProperties": false, + "properties": { + "headers": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "method": { + "type": "string" + }, + "path": { + "type": "string" + } + }, + "type": "object" + }, + "SummaryRedirect": { + "additionalProperties": false, + "properties": { + "hostname": { + "type": "string" + }, + "path": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "scheme": { + "type": "string" + }, + "status": { + "type": "integer" + } + }, + "type": "object" + }, + "SummaryRoute": { + "additionalProperties": false, + "properties": { + "backends": { + "items": { + "$ref": "#/$defs/SummaryBackend" + }, + "type": "array" + }, + "matches": { + "items": { + "$ref": "#/$defs/SummaryMatch" + }, + "type": "array" + }, + "source": { + "type": "string" + } + }, + "required": [ + "source", + "matches", + "backends" + ], + "type": "object" + }, + "SummaryService": { + "additionalProperties": false, + "properties": { + "name": { + "type": "string" + }, + "namespace": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "weight": { + "type": "integer" + } + }, + "required": [ + "name" + ], + "type": "object" } }, "$id": "https://yolean.se/y-cluster/schema/gateway-state.schema.json", diff --git a/pkg/gateway/summary.go b/pkg/gateway/summary.go new file mode 100644 index 0000000..e05982c --- /dev/null +++ b/pkg/gateway/summary.go @@ -0,0 +1,612 @@ +package gateway + +import ( + "encoding/json" + "sort" + "strconv" +) + +// Summary is a derived, fully-typed projection of the cluster's +// reconciled Gateway state into industry-neutral routing-tree +// terms (listener -> host -> route -> match/backend). Built +// deterministically by BuildSummary from the existing State +// fields; no kubectl, no dataplane lookups, no json.RawMessage +// in the output. +// +// The intent is a reader-friendly view that avoids both +// kubernetes-specific terminology (HTTPRoute, ClientTrafficPolicy, +// ParentRef) and envoy-internal terminology (virtual_host, HCM, +// RouteConfiguration). Fields below use the words a network +// operator would reach for: listener / host / route / match / +// backend / redirect. +// +// State.Envoy still ships the verbatim envoy /config_dump for +// callers who need ground truth from the dataplane; Summary is +// the ergonomic tier on top. +type Summary struct { + // Listeners enumerates each Gateway listener in the cluster + // (one row per ). A listener with no + // attached routes still appears so a consumer can tell + // "configured but unused" from "not configured at all". + Listeners []SummaryListener `json:"listeners"` +} + +// SummaryListener is the routing-tree root: a single ingress +// terminator (port + protocol) on a specific Gateway. +type SummaryListener struct { + // Gateway is the qualifying owner in "/" + // form. A cluster with multiple Gateways would surface each + // listener tagged with its Gateway so consumers can tell + // them apart. + Gateway string `json:"gateway"` + + // Name is the listener's section name on the Gateway spec. + // Used to disambiguate when the same Gateway has more than + // one listener on the same protocol family. + Name string `json:"name"` + + // Port is the L4 port the listener binds. + Port int `json:"port"` + + // Protocol is HTTP / HTTPS / TLS / TCP / UDP -- the + // gateway-api protocol enum, surfaced as-is. + Protocol string `json:"protocol"` + + // Programmed is the reconciled Programmed=True signal -- + // "envoy-gateway accepted this listener and is serving it". + Programmed bool `json:"programmed"` + + // NumTrustedHops, when non-nil, is the per-listener + // X-Forwarded-For trust depth applied via a + // ClientTrafficPolicy. Pointer (rather than int + omitempty) + // so a deliberate 0 is distinguishable from "policy absent". + // Listener-level placement is correct for envoy-gateway + // today: ClientTrafficPolicy targets a Gateway (with + // optional sectionName), not individual route matches. + NumTrustedHops *int `json:"numTrustedHops,omitempty"` + + // TrustedCIDRs lists the per-listener X-Forwarded-For + // trusted-source CIDRs from the same ClientTrafficPolicy. + // numTrustedHops and trustedCIDRs are alternative tuning + // knobs for the same reverse-proxy-trust problem (count + // hops vs. trust source ranges); newer envoy-gateway + // versions treat them as mutually exclusive on a single + // policy, but a snapshot may surface either or neither. + TrustedCIDRs []string `json:"trustedCIDRs,omitempty"` + + // Hosts groups routes by the hostname declared on the + // underlying HTTPRoute / GRPCRoute. Routes that declare no + // hostname land in the "*" bucket, listed last so a + // consumer eyeballing the JSON sees the named hosts first. + Hosts []SummaryHost `json:"hosts"` +} + +// SummaryHost groups routes by hostname under one listener. +type SummaryHost struct { + // Hostname is the literal value declared on the source + // route, or "*" when the route declares no hostname (catch- + // all on this listener). + Hostname string `json:"hostname"` + + // Routes are the route entries that match this hostname on + // this listener. One entry per ; a single + // HTTPRoute with three rules produces three entries. + Routes []SummaryRoute `json:"routes"` +} + +// SummaryRoute is one rule of one route attached to a listener. +type SummaryRoute struct { + // Source identifies the origin route + rule index in + // "//#" form so a consumer + // can find the underlying spec object. + Source string `json:"source"` + + // Matches are the OR'd match conditions on this rule. An + // empty list means "match everything on this hostname". + Matches []SummaryMatch `json:"matches"` + + // Backends are the destinations traffic flows to when a + // match hits. Multiple entries when the rule defines + // weighted splits, or when both a redirect filter and a + // backendRef are present. + Backends []SummaryBackend `json:"backends"` +} + +// SummaryMatch is one match clause: path + optional method + +// optional header set. Pluralism (multiple matches per rule) is +// handled by the surrounding SummaryRoute.Matches slice. +type SummaryMatch struct { + // Path is "=" -- e.g. "PathPrefix=/auth", + // "Exact=/healthz", "RegularExpression=^/api/v[12]/.*". + // For GRPCRoute matches we render the method clause here as + // "Method=:/" so consumers don't need + // a separate field for the gRPC case. + Path string `json:"path,omitempty"` + + // Method is the HTTP method when the rule constrains it. + Method string `json:"method,omitempty"` + + // Headers, when non-empty, lists header-name -> expected-value + // pairs. The match type ("Exact" vs "RegularExpression") is + // dropped from this projection; consumers who need it walk + // HTTPRoutes[].rules in State. + Headers map[string]string `json:"headers,omitempty"` +} + +// SummaryBackend is one destination on a match. Exactly one of +// Service or Redirect is set; Type discriminates. +type SummaryBackend struct { + // Type is "service" or "redirect". + Type string `json:"type"` + + // Service is set when Type == "service". + Service *SummaryService `json:"service,omitempty"` + + // Redirect is set when Type == "redirect". + Redirect *SummaryRedirect `json:"redirect,omitempty"` +} + +// SummaryService is the resolved upstream-service reference. +type SummaryService struct { + Namespace string `json:"namespace,omitempty"` + Name string `json:"name"` + Port int `json:"port,omitempty"` + // Weight is the rule's traffic-split weight when present. + // Omitted (0) for single-backend rules. + Weight int `json:"weight,omitempty"` +} + +// SummaryRedirect is a RequestRedirect filter projection. +type SummaryRedirect struct { + Scheme string `json:"scheme,omitempty"` + Hostname string `json:"hostname,omitempty"` + Port int `json:"port,omitempty"` + // Path, when set, is "=" (e.g. + // "ReplacePrefixMatch=/v2"). Empty when the redirect keeps + // the original path. + Path string `json:"path,omitempty"` + // Status is the redirect HTTP status code (commonly 301 or + // 302). 0 when the source route omits it. + Status int `json:"status,omitempty"` +} + +// BuildSummary derives a Summary from an already-populated +// *State. Pure function, deterministic, no I/O. Callers +// typically invoke it at the end of Fetch() so the produced +// JSON carries both the raw resource view and this projection. +// +// Nil input yields a Summary with an empty Listeners slice (so +// consumers can json.Marshal the result without nil-checking). +func BuildSummary(s *State) *Summary { + out := &Summary{Listeners: []SummaryListener{}} + if s == nil { + return out + } + for _, gw := range s.Gateways { + gwKey := gw.Namespace + "/" + gw.Name + for _, l := range gw.Listeners { + sl := SummaryListener{ + Gateway: gwKey, + Name: l.Name, + Port: l.Port, + Protocol: l.Protocol, + Programmed: listenerProgrammed(gw, l.Name), + Hosts: collectHosts(s, gw, l), + } + if xff := xForwardedForFor(s.ClientTrafficPolicies, gw, l); xff != nil { + if xff.NumTrustedHops != nil { + v := *xff.NumTrustedHops + sl.NumTrustedHops = &v + } + sl.TrustedCIDRs = xff.TrustedCIDRs + } + out.Listeners = append(out.Listeners, sl) + } + } + return out +} + +// xffSettings is the projection of a ClientTrafficPolicy's +// spec.clientIPDetection.xForwardedFor block. Returned by +// xForwardedForFor when at least one CTP applies to the +// listener; nil when no policy applies. +type xffSettings struct { + NumTrustedHops *int + TrustedCIDRs []string +} + +// listenerProgrammed pulls the Programmed=True signal from the +// matching ListenerStatus row. Default false when the controller +// hasn't reported on this listener yet. +func listenerProgrammed(gw Gateway, listenerName string) bool { + for _, ls := range gw.Status.Listeners { + if ls.Name == listenerName { + return ls.Programmed + } + } + return false +} + +// xForwardedForFor walks the ClientTrafficPolicy list and +// returns the first applicable policy's xForwardedFor block +// projected into xffSettings. Returns nil when no CTP applies +// to this listener. +// +// "First" is deterministic because Fetch sorts policies by +// (namespace, name) before populating State. Multiple +// overlapping policies are rare and a reconciliation conflict +// in their own right; consumers wanting full disambiguation +// drill into State.ClientTrafficPolicies. +func xForwardedForFor(ctps []ClientTrafficPolicy, gw Gateway, l Listener) *xffSettings { + for _, ctp := range ctps { + if !ctpAppliesTo(ctp, gw, l) { + continue + } + var spec struct { + ClientIPDetection struct { + XForwardedFor struct { + NumTrustedHops *int `json:"numTrustedHops"` + TrustedCIDRs []string `json:"trustedCIDRs"` + } `json:"xForwardedFor"` + } `json:"clientIPDetection"` + } + if err := json.Unmarshal(ctp.Spec, &spec); err != nil { + continue + } + xff := spec.ClientIPDetection.XForwardedFor + if xff.NumTrustedHops == nil && len(xff.TrustedCIDRs) == 0 { + continue + } + return &xffSettings{ + NumTrustedHops: xff.NumTrustedHops, + TrustedCIDRs: xff.TrustedCIDRs, + } + } + return nil +} + +// ctpAppliesTo returns true if ctp's targetRefs include the +// given Gateway, with sectionName either absent (whole-gateway +// scope) or matching this listener. +func ctpAppliesTo(ctp ClientTrafficPolicy, gw Gateway, l Listener) bool { + for _, t := range parseTargetRefs(ctp.TargetRefs, ctp.Namespace) { + if t.Kind != "" && t.Kind != "Gateway" { + continue + } + if t.Namespace != gw.Namespace || t.Name != gw.Name { + continue + } + if t.SectionName != "" && t.SectionName != l.Name { + continue + } + return true + } + return false +} + +// parsedRef is the union shape we extract from parentRefs and +// targetRefs across HTTPRoute / GRPCRoute / *TrafficPolicy. +// Gateway-api and envoy-gateway use the same field names for +// these references, so one parse covers both. +type parsedRef struct { + Group string + Kind string + Namespace string + Name string + SectionName string + Port int +} + +func parseTargetRefs(refs json.RawMessage, defaultNS string) []parsedRef { + if len(refs) == 0 { + return nil + } + var raw []struct { + Group string `json:"group"` + Kind string `json:"kind"` + Namespace string `json:"namespace"` + Name string `json:"name"` + SectionName string `json:"sectionName"` + Port int `json:"port"` + } + if err := json.Unmarshal(refs, &raw); err != nil { + return nil + } + out := make([]parsedRef, 0, len(raw)) + for _, r := range raw { + ns := r.Namespace + if ns == "" { + ns = defaultNS + } + out = append(out, parsedRef{ + Group: r.Group, Kind: r.Kind, + Namespace: ns, Name: r.Name, + SectionName: r.SectionName, Port: r.Port, + }) + } + return out +} + +// routeAttachesTo decides whether a route's parentRefs include +// the given (gw, l). sectionName empty == applies to all +// listeners on the gateway; non-empty must match l.Name. port +// == 0 means "any port"; non-zero must match l.Port. +func routeAttachesTo(parentRefs json.RawMessage, gw Gateway, l Listener, defaultNS string) bool { + for _, r := range parseTargetRefs(parentRefs, defaultNS) { + if r.Kind != "" && r.Kind != "Gateway" { + continue + } + if r.Namespace != gw.Namespace || r.Name != gw.Name { + continue + } + if r.SectionName != "" && r.SectionName != l.Name { + continue + } + if r.Port != 0 && r.Port != l.Port { + continue + } + return true + } + return false +} + +// collectHosts walks the route lists in State and bunches them +// into per-hostname buckets under this listener. "*" is the +// catch-all bucket for routes that declare no hostname; it +// sorts to the end so consumers reading the JSON top-down see +// the named hosts first. +func collectHosts(s *State, gw Gateway, l Listener) []SummaryHost { + buckets := map[string][]SummaryRoute{} + addRoute := func(hostnames []string, summarized []SummaryRoute) { + if len(summarized) == 0 { + return + } + if len(hostnames) == 0 { + buckets["*"] = append(buckets["*"], summarized...) + return + } + for _, h := range hostnames { + buckets[h] = append(buckets[h], summarized...) + } + } + for _, r := range s.HTTPRoutes { + if !routeAttachesTo(r.ParentRefs, gw, l, r.Namespace) { + continue + } + addRoute(r.Hostnames, summarizeHTTPRules(r)) + } + for _, r := range s.GRPCRoutes { + if !routeAttachesTo(r.ParentRefs, gw, l, r.Namespace) { + continue + } + addRoute(r.Hostnames, summarizeGRPCRules(r)) + } + keys := make([]string, 0, len(buckets)) + for k := range buckets { + keys = append(keys, k) + } + sort.Slice(keys, func(i, j int) bool { + // "*" sorts last; everything else alphabetical. + if keys[i] == "*" { + return false + } + if keys[j] == "*" { + return true + } + return keys[i] < keys[j] + }) + out := make([]SummaryHost, 0, len(keys)) + for _, k := range keys { + out = append(out, SummaryHost{Hostname: k, Routes: buckets[k]}) + } + return out +} + +// rawHTTPRule mirrors the gateway-api HTTPRoute rule shape at +// the field level we need. Filter / match types are partial -- +// the projection drops the variants we don't render. +type rawHTTPRule struct { + Matches []rawHTTPMatch `json:"matches"` + Filters []rawHTTPFilter `json:"filters"` + BackendRefs []rawHTTPBackendRef `json:"backendRefs"` +} + +type rawHTTPMatch struct { + Path *struct { + Type string `json:"type"` + Value string `json:"value"` + } `json:"path"` + Method string `json:"method"` + Headers []struct { + Name string `json:"name"` + Value string `json:"value"` + Type string `json:"type"` + } `json:"headers"` +} + +type rawHTTPFilter struct { + Type string `json:"type"` + RequestRedirect *struct { + Scheme string `json:"scheme"` + Hostname string `json:"hostname"` + Port int `json:"port"` + StatusCode int `json:"statusCode"` + Path *struct { + Type string `json:"type"` + ReplaceFullPath string `json:"replaceFullPath"` + ReplacePrefixMatch string `json:"replacePrefixMatch"` + } `json:"path"` + } `json:"requestRedirect"` +} + +type rawHTTPBackendRef struct { + Group string `json:"group"` + Kind string `json:"kind"` + Namespace string `json:"namespace"` + Name string `json:"name"` + Port int `json:"port"` + Weight int `json:"weight"` +} + +func summarizeHTTPRules(r HTTPRoute) []SummaryRoute { + if len(r.Rules) == 0 { + return nil + } + var rules []rawHTTPRule + if err := json.Unmarshal(r.Rules, &rules); err != nil { + return nil + } + out := make([]SummaryRoute, 0, len(rules)) + for i, rule := range rules { + out = append(out, SummaryRoute{ + Source: "HTTPRoute/" + r.Namespace + "/" + r.Name + "#" + strconv.Itoa(i), + Matches: summarizeHTTPMatches(rule.Matches), + Backends: summarizeHTTPBackends(rule.Filters, rule.BackendRefs, r.Namespace), + }) + } + return out +} + +func summarizeHTTPMatches(matches []rawHTTPMatch) []SummaryMatch { + out := make([]SummaryMatch, 0, len(matches)) + for _, m := range matches { + sm := SummaryMatch{Method: m.Method} + if m.Path != nil { + t := m.Path.Type + if t == "" { + // gateway-api default match type for path is + // PathPrefix when omitted. + t = "PathPrefix" + } + sm.Path = t + "=" + m.Path.Value + } + if len(m.Headers) > 0 { + sm.Headers = make(map[string]string, len(m.Headers)) + for _, h := range m.Headers { + sm.Headers[h.Name] = h.Value + } + } + out = append(out, sm) + } + return out +} + +func summarizeHTTPBackends(filters []rawHTTPFilter, refs []rawHTTPBackendRef, defaultNS string) []SummaryBackend { + var out []SummaryBackend + for _, f := range filters { + if f.Type != "RequestRedirect" || f.RequestRedirect == nil { + continue + } + sr := &SummaryRedirect{ + Scheme: f.RequestRedirect.Scheme, + Hostname: f.RequestRedirect.Hostname, + Port: f.RequestRedirect.Port, + Status: f.RequestRedirect.StatusCode, + } + if p := f.RequestRedirect.Path; p != nil { + switch { + case p.ReplaceFullPath != "": + sr.Path = "ReplaceFullPath=" + p.ReplaceFullPath + case p.ReplacePrefixMatch != "": + sr.Path = "ReplacePrefixMatch=" + p.ReplacePrefixMatch + } + } + out = append(out, SummaryBackend{Type: "redirect", Redirect: sr}) + } + for _, r := range refs { + // Skip non-Service backendRefs (e.g. envoy-gateway's + // Backend CRD); their resolution isn't representable as + // a service tuple. Consumers that need them drill into + // State.HTTPRoutes[].rules. + if r.Kind != "" && r.Kind != "Service" { + continue + } + ns := r.Namespace + if ns == "" { + ns = defaultNS + } + out = append(out, SummaryBackend{ + Type: "service", + Service: &SummaryService{ + Namespace: ns, + Name: r.Name, + Port: r.Port, + Weight: r.Weight, + }, + }) + } + return out +} + +// rawGRPCRule is the GRPCRoute rule shape. Distinct from +// HTTPRoute's because gRPC matches on (service, method) rather +// than path. Backends use the same shape. +type rawGRPCRule struct { + Matches []rawGRPCMatch `json:"matches"` + BackendRefs []rawHTTPBackendRef `json:"backendRefs"` +} + +type rawGRPCMatch struct { + Method *struct { + Type string `json:"type"` + Service string `json:"service"` + Method string `json:"method"` + } `json:"method"` + Headers []struct { + Name string `json:"name"` + Value string `json:"value"` + Type string `json:"type"` + } `json:"headers"` +} + +func summarizeGRPCRules(r GRPCRoute) []SummaryRoute { + if len(r.Rules) == 0 { + return nil + } + var rules []rawGRPCRule + if err := json.Unmarshal(r.Rules, &rules); err != nil { + return nil + } + out := make([]SummaryRoute, 0, len(rules)) + for i, rule := range rules { + out = append(out, SummaryRoute{ + Source: "GRPCRoute/" + r.Namespace + "/" + r.Name + "#" + strconv.Itoa(i), + Matches: summarizeGRPCMatches(rule.Matches), + Backends: summarizeHTTPBackends(nil, rule.BackendRefs, r.Namespace), + }) + } + return out +} + +func summarizeGRPCMatches(matches []rawGRPCMatch) []SummaryMatch { + out := make([]SummaryMatch, 0, len(matches)) + for _, m := range matches { + sm := SummaryMatch{} + if m.Method != nil { + t := m.Method.Type + if t == "" { + t = "Exact" + } + svc := m.Method.Service + meth := m.Method.Method + // Render as "Method=:/". Empty + // service or method just collapse the separator so we + // don't emit "/" or ":/" with stray punctuation. + body := svc + if svc != "" && meth != "" { + body = svc + "/" + meth + } else if meth != "" { + body = "/" + meth + } + sm.Path = "Method=" + t + ":" + body + } + if len(m.Headers) > 0 { + sm.Headers = make(map[string]string, len(m.Headers)) + for _, h := range m.Headers { + sm.Headers[h.Name] = h.Value + } + } + out = append(out, sm) + } + return out +} diff --git a/pkg/gateway/summary_test.go b/pkg/gateway/summary_test.go new file mode 100644 index 0000000..bd715fe --- /dev/null +++ b/pkg/gateway/summary_test.go @@ -0,0 +1,483 @@ +package gateway + +import ( + "encoding/json" + "testing" +) + +// TestBuildSummary_Nil documents the nil-safety contract: +// callers can BuildSummary(nil) without panicking and get a +// non-nil Summary with an empty Listeners slice (so consumers +// can json.Marshal without nil-checks). +func TestBuildSummary_Nil(t *testing.T) { + s := BuildSummary(nil) + if s == nil { + t.Fatal("BuildSummary(nil) returned nil; want empty Summary") + } + if s.Listeners == nil { + t.Errorf("Listeners is nil; want []SummaryListener{}") + } + if len(s.Listeners) != 0 { + t.Errorf("Listeners len=%d; want 0", len(s.Listeners)) + } +} + +// TestBuildSummary_HTTPRouteServiceBackend pins the basic +// shape: one Gateway with one HTTPS listener; one HTTPRoute +// declaring a hostname, a PathPrefix match, and a Service +// backend. The summary should produce one listener row, one +// host bucket for the declared hostname, one route row with +// path "PathPrefix=/" and a service backend. +func TestBuildSummary_HTTPRouteServiceBackend(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + Status: GatewayStatus{ + Listeners: []ListenerStatus{{Name: "https", Programmed: true}}, + }, + }}, + HTTPRoutes: []HTTPRoute{{ + Namespace: "keycloak-v3", + Name: "keycloak-admin", + ParentRefs: rawJSON(t, `[{"name":"y-cluster","namespace":"y-cluster","sectionName":"https"}]`), + Hostnames: []string{"keycloak-admin"}, + Rules: rawJSON(t, `[{ + "matches": [{"path": {"type": "PathPrefix", "value": "/"}}], + "backendRefs": [{"name": "keycloak", "namespace": "keycloak-v3", "port": 8080}] + }]`), + }}, + } + + sum := BuildSummary(st) + if len(sum.Listeners) != 1 { + t.Fatalf("got %d listeners, want 1: %+v", len(sum.Listeners), sum) + } + l := sum.Listeners[0] + if l.Gateway != "y-cluster/y-cluster" { + t.Errorf("listener.gateway=%q want %q", l.Gateway, "y-cluster/y-cluster") + } + if l.Port != 443 || l.Protocol != "HTTPS" || !l.Programmed { + t.Errorf("listener port/proto/programmed: %+v", l) + } + if l.NumTrustedHops != nil { + t.Errorf("numTrustedHops should be nil with no CTP, got %d", *l.NumTrustedHops) + } + if len(l.Hosts) != 1 || l.Hosts[0].Hostname != "keycloak-admin" { + t.Fatalf("hosts: %+v", l.Hosts) + } + rs := l.Hosts[0].Routes + if len(rs) != 1 { + t.Fatalf("routes: %+v", rs) + } + if rs[0].Source != "HTTPRoute/keycloak-v3/keycloak-admin#0" { + t.Errorf("source=%q", rs[0].Source) + } + if len(rs[0].Matches) != 1 || rs[0].Matches[0].Path != "PathPrefix=/" { + t.Errorf("matches: %+v", rs[0].Matches) + } + if len(rs[0].Backends) != 1 || rs[0].Backends[0].Type != "service" { + t.Fatalf("backends: %+v", rs[0].Backends) + } + svc := rs[0].Backends[0].Service + if svc == nil || svc.Name != "keycloak" || svc.Namespace != "keycloak-v3" || svc.Port != 8080 { + t.Errorf("service: %+v", svc) + } +} + +// TestBuildSummary_RedirectFilter pins the redirect-only case: +// port 80 with a RequestRedirect filter and no backendRefs +// should surface as a single backend of type "redirect" with +// scheme + status carried through. +func TestBuildSummary_RedirectFilter(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "http", Port: 80, Protocol: "HTTP"}}, + Status: GatewayStatus{ + Listeners: []ListenerStatus{{Name: "http", Programmed: true}}, + }, + }}, + HTTPRoutes: []HTTPRoute{{ + Namespace: "y-cluster", + Name: "external-http", + ParentRefs: rawJSON(t, `[{"name":"y-cluster","sectionName":"http"}]`), + Hostnames: []string{"keycloak-admin.example.com"}, + Rules: rawJSON(t, `[{ + "filters": [{"type": "RequestRedirect", "requestRedirect": {"scheme": "https", "statusCode": 301}}] + }]`), + }}, + } + + sum := BuildSummary(st) + rs := sum.Listeners[0].Hosts[0].Routes + if len(rs) != 1 || len(rs[0].Backends) != 1 { + t.Fatalf("backends: %+v", rs) + } + b := rs[0].Backends[0] + if b.Type != "redirect" || b.Redirect == nil { + t.Fatalf("backend not a redirect: %+v", b) + } + if b.Redirect.Scheme != "https" || b.Redirect.Status != 301 { + t.Errorf("redirect scheme/status: %+v", b.Redirect) + } +} + +// TestBuildSummary_NoHostnameBucketsAsStar pins the wildcard +// bucket: a route declaring no hostname must show up under the +// "*" host so consumers can tell "served on all hosts" from +// "served on no host". +func TestBuildSummary_NoHostnameBucketsAsStar(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + HTTPRoutes: []HTTPRoute{{ + Namespace: "y-cluster", + Name: "fallback", + ParentRefs: rawJSON(t, `[{"name":"y-cluster"}]`), + // no hostnames + Rules: rawJSON(t, `[{ + "backendRefs": [{"name": "echo", "port": 80}] + }]`), + }}, + } + + sum := BuildSummary(st) + hosts := sum.Listeners[0].Hosts + if len(hosts) != 1 || hosts[0].Hostname != "*" { + t.Errorf("host bucket: %+v (want single \"*\")", hosts) + } +} + +// TestBuildSummary_StarSortsLast pins the order: named hosts +// alphabetical, "*" trailing. A consumer skimming the JSON +// should see the named hosts first. +func TestBuildSummary_StarSortsLast(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + HTTPRoutes: []HTTPRoute{ + { + Namespace: "y-cluster", + Name: "catch-all", + ParentRefs: rawJSON(t, `[{"name":"y-cluster"}]`), + Rules: rawJSON(t, `[{"backendRefs": [{"name": "echo"}]}]`), + }, + { + Namespace: "y-cluster", + Name: "zeta", + ParentRefs: rawJSON(t, `[{"name":"y-cluster"}]`), + Hostnames: []string{"zeta.example.com"}, + Rules: rawJSON(t, `[{"backendRefs": [{"name": "echo"}]}]`), + }, + { + Namespace: "y-cluster", + Name: "alpha", + ParentRefs: rawJSON(t, `[{"name":"y-cluster"}]`), + Hostnames: []string{"alpha.example.com"}, + Rules: rawJSON(t, `[{"backendRefs": [{"name": "echo"}]}]`), + }, + }, + } + + hosts := BuildSummary(st).Listeners[0].Hosts + if len(hosts) != 3 { + t.Fatalf("hosts: %+v", hosts) + } + want := []string{"alpha.example.com", "zeta.example.com", "*"} + for i, h := range hosts { + if h.Hostname != want[i] { + t.Errorf("host[%d]=%q want %q", i, h.Hostname, want[i]) + } + } +} + +// TestBuildSummary_MultiHostnameRouteAppearsInEachBucket pins +// the duplication policy: a single HTTPRoute with three +// hostnames produces one route entry under each hostname +// bucket. A consumer scanning hosts shouldn't have to check +// "does this route also appear elsewhere". +func TestBuildSummary_MultiHostnameRouteAppearsInEachBucket(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + HTTPRoutes: []HTTPRoute{{ + Namespace: "keycloak-v3", + Name: "keycloak-admin", + ParentRefs: rawJSON(t, `[{"name":"y-cluster","namespace":"y-cluster"}]`), + Hostnames: []string{"keycloak-admin", "keycloak-admin.example.com"}, + Rules: rawJSON(t, `[{ + "backendRefs": [{"name": "keycloak", "port": 8080}] + }]`), + }}, + } + + hosts := BuildSummary(st).Listeners[0].Hosts + if len(hosts) != 2 { + t.Fatalf("expected 2 host buckets, got %d: %+v", len(hosts), hosts) + } + for _, h := range hosts { + if len(h.Routes) != 1 { + t.Errorf("host %q routes: %+v", h.Hostname, h.Routes) + } + } +} + +// TestBuildSummary_NumTrustedHopsGatewayScope pins the +// gateway-wide CTP application: a policy with a Gateway +// targetRef (no sectionName) propagates numTrustedHops to +// every listener on that Gateway. +func TestBuildSummary_NumTrustedHopsGatewayScope(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{ + {Name: "http", Port: 80, Protocol: "HTTP"}, + {Name: "https", Port: 443, Protocol: "HTTPS"}, + }, + }}, + ClientTrafficPolicies: []ClientTrafficPolicy{{ + Namespace: "y-cluster", + Name: "trust-lb-xff", + TargetRefs: rawJSON(t, `[{"kind":"Gateway","name":"y-cluster","namespace":"y-cluster"}]`), + Spec: rawJSON(t, `{"clientIPDetection":{"xForwardedFor":{"numTrustedHops":1}}}`), + }}, + } + + for _, l := range BuildSummary(st).Listeners { + if l.NumTrustedHops == nil || *l.NumTrustedHops != 1 { + t.Errorf("listener %q numTrustedHops: %v", l.Name, l.NumTrustedHops) + } + } +} + +// TestBuildSummary_NumTrustedHopsListenerScope pins the +// listener-scoped CTP: a sectionName on the targetRef narrows +// application to that listener only; the other listener must +// stay nil. +func TestBuildSummary_NumTrustedHopsListenerScope(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{ + {Name: "http", Port: 80, Protocol: "HTTP"}, + {Name: "https", Port: 443, Protocol: "HTTPS"}, + }, + }}, + ClientTrafficPolicies: []ClientTrafficPolicy{{ + Namespace: "y-cluster", + Name: "https-only", + TargetRefs: rawJSON(t, `[{"kind":"Gateway","name":"y-cluster","namespace":"y-cluster","sectionName":"https"}]`), + Spec: rawJSON(t, `{"clientIPDetection":{"xForwardedFor":{"numTrustedHops":2}}}`), + }}, + } + + listenersByName := map[string]SummaryListener{} + for _, l := range BuildSummary(st).Listeners { + listenersByName[l.Name] = l + } + if listenersByName["http"].NumTrustedHops != nil { + t.Errorf("http listener should have nil numTrustedHops: %v", listenersByName["http"].NumTrustedHops) + } + if h := listenersByName["https"].NumTrustedHops; h == nil || *h != 2 { + t.Errorf("https listener numTrustedHops: %v", h) + } +} + +// TestBuildSummary_TrustedCIDRs pins that the alternate XFF +// trust knob (trustedCIDRs) is surfaced on the listener +// alongside numTrustedHops. envoy-gateway treats the two as +// alternative tuning paths for the same problem -- a snapshot +// may carry one, the other, or both -- so the projection +// must surface whichever the policy declares. +func TestBuildSummary_TrustedCIDRs(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + ClientTrafficPolicies: []ClientTrafficPolicy{{ + Namespace: "y-cluster", + Name: "trust-cidrs", + TargetRefs: rawJSON(t, `[{"kind":"Gateway","name":"y-cluster","namespace":"y-cluster"}]`), + Spec: rawJSON(t, `{"clientIPDetection":{"xForwardedFor":{ + "trustedCIDRs":["10.0.0.0/8","100.64.0.0/10"] + }}}`), + }}, + } + + l := BuildSummary(st).Listeners[0] + if l.NumTrustedHops != nil { + t.Errorf("numTrustedHops should be nil with trustedCIDRs-only CTP, got %d", *l.NumTrustedHops) + } + want := []string{"10.0.0.0/8", "100.64.0.0/10"} + if len(l.TrustedCIDRs) != len(want) { + t.Fatalf("trustedCIDRs len: got %v want %v", l.TrustedCIDRs, want) + } + for i, c := range want { + if l.TrustedCIDRs[i] != c { + t.Errorf("trustedCIDRs[%d]=%q want %q", i, l.TrustedCIDRs[i], c) + } + } +} + +// TestBuildSummary_TrustedCIDRsAndNumTrustedHops pins the +// "both knobs set" combination. Older envoy-gateway versions +// allow it; newer ones reject the policy at admission. We +// surface whatever the snapshot saw -- the consumer can then +// notice the policy's Accepted=False status if it matters. +func TestBuildSummary_TrustedCIDRsAndNumTrustedHops(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + ClientTrafficPolicies: []ClientTrafficPolicy{{ + Namespace: "y-cluster", + Name: "trust-both", + TargetRefs: rawJSON(t, `[{"kind":"Gateway","name":"y-cluster","namespace":"y-cluster"}]`), + Spec: rawJSON(t, `{"clientIPDetection":{"xForwardedFor":{ + "numTrustedHops": 2, + "trustedCIDRs":["10.0.0.0/8"] + }}}`), + }}, + } + + l := BuildSummary(st).Listeners[0] + if l.NumTrustedHops == nil || *l.NumTrustedHops != 2 { + t.Errorf("numTrustedHops: %v", l.NumTrustedHops) + } + if len(l.TrustedCIDRs) != 1 || l.TrustedCIDRs[0] != "10.0.0.0/8" { + t.Errorf("trustedCIDRs: %v", l.TrustedCIDRs) + } +} + +// TestBuildSummary_GRPCRouteMethodMatch pins the GRPC match +// rendering: a (service, method) clause shows up as +// "Method=Exact:/" on the SummaryMatch.Path +// field, sharing the same projection field as HTTP path +// matches. +func TestBuildSummary_GRPCRouteMethodMatch(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + GRPCRoutes: []GRPCRoute{{ + Namespace: "live-v3", + Name: "live-grpc", + ParentRefs: rawJSON(t, `[{"name":"y-cluster","namespace":"y-cluster"}]`), + Hostnames: []string{"live.example.com"}, + Rules: rawJSON(t, `[{ + "matches": [{"method": {"type": "Exact", "service": "live.v1.LiveService", "method": "Subscribe"}}], + "backendRefs": [{"name": "live", "namespace": "live-v3", "port": 9090}] + }]`), + }}, + } + + rs := BuildSummary(st).Listeners[0].Hosts[0].Routes + if len(rs) != 1 || len(rs[0].Matches) != 1 { + t.Fatalf("routes: %+v", rs) + } + got := rs[0].Matches[0].Path + want := "Method=Exact:live.v1.LiveService/Subscribe" + if got != want { + t.Errorf("grpc match path=%q want %q", got, want) + } + if rs[0].Source != "GRPCRoute/live-v3/live-grpc#0" { + t.Errorf("grpc source=%q", rs[0].Source) + } +} + +// TestBuildSummary_NonGatewayParentRefIgnored pins the +// scoping: a route whose parentRef points at a different +// Gateway (or a non-Gateway kind) must not show up under any +// listener of OUR Gateway. Cross-namespace routing on the +// same controller is real; we don't want adjacent Gateways' +// routes leaking into our summary. +func TestBuildSummary_NonGatewayParentRefIgnored(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + HTTPRoutes: []HTTPRoute{ + { + Namespace: "other", + Name: "other-route", + ParentRefs: rawJSON(t, `[{"name":"other-gateway","namespace":"other"}]`), + Hostnames: []string{"other.example.com"}, + Rules: rawJSON(t, `[{"backendRefs": [{"name": "echo"}]}]`), + }, + }, + } + + hosts := BuildSummary(st).Listeners[0].Hosts + if len(hosts) != 0 { + t.Errorf("expected no hosts (route on different gateway), got %+v", hosts) + } +} + +// TestBuildSummary_MarshalsWithEmptyEnvoy is the user's stated +// shape contract: tests build a State + Summary and an empty +// Envoy object, then JSON-marshal. The result must surface +// "summary" and "envoy" at the top level alongside the +// existing kind slices, all parsable. +func TestBuildSummary_MarshalsWithEmptyEnvoy(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + Envoy: &Envoy{}, // empty envoy as per the test contract + FetchedAt: "2026-05-06T00:00:00Z", + SchemaID: SchemaID, + SchemaVersion: SchemaVersion, + } + st.Summary = BuildSummary(st) + + out, err := json.Marshal(st) + if err != nil { + t.Fatal(err) + } + var back map[string]any + if err := json.Unmarshal(out, &back); err != nil { + t.Fatalf("re-parse: %v", err) + } + if _, ok := back["summary"]; !ok { + t.Errorf("summary missing from marshalled JSON: %s", out) + } + if _, ok := back["envoy"]; !ok { + t.Errorf("envoy missing from marshalled JSON: %s", out) + } +} + +// rawJSON is a test helper that wraps a literal JSON string in +// json.RawMessage with a syntax check to fail loudly on a typo +// in fixture text. +func rawJSON(t *testing.T, s string) json.RawMessage { + t.Helper() + var probe any + if err := json.Unmarshal([]byte(s), &probe); err != nil { + t.Fatalf("invalid fixture JSON %q: %v", s, err) + } + return json.RawMessage(s) +} From 100ddf63b46770738fb90d609ad9d0ae8adf04e5 Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Thu, 7 May 2026 04:01:25 +0000 Subject: [PATCH 5/7] fix(qemu): teardown also removes -gateway-state.json prepare-export's live phase writes the reconciled Gateway snapshot to /-gateway-state.json, but the teardown artefact list didn't include it. The JSON survived teardown, and the next prepare-export bundle picked up a stale dump from the prior cluster. Add the path to perVMArtefacts and update both the explicit-list teardown test and the TestPerVMArtefacts pin. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/provision/qemu/qemu.go | 17 +++++++++++++---- pkg/provision/qemu/qemu_test.go | 24 +++++++++++------------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/pkg/provision/qemu/qemu.go b/pkg/provision/qemu/qemu.go index 976a2f5..602948b 100644 --- a/pkg/provision/qemu/qemu.go +++ b/pkg/provision/qemu/qemu.go @@ -406,10 +406,13 @@ func TeardownConfig(cfg Config, keepDisk bool, logger *zap.Logger) error { return nil } -// perVMArtefacts returns every path Provision creates for a given -// cluster. Used by TeardownConfig to leave the cache dir clean for -// the next provision -- the keypair in particular must go so the -// per-customer "no key reuse" contract holds. +// perVMArtefacts returns every path Provision or PrepareExport +// creates for a given cluster. Used by TeardownConfig to leave +// the cache dir clean for the next provision -- the keypair in +// particular must go so the per-customer "no key reuse" +// contract holds, and the prepare-export gateway-state JSON +// must go so a stale dump from a prior export doesn't ship in +// the next bundle. func perVMArtefacts(cacheDir, name string) []string { prefix := filepath.Join(cacheDir, name) return []string{ @@ -419,6 +422,12 @@ func perVMArtefacts(cacheDir, name string) []string { prefix + "-seed.img", prefix + "-cloud-init.yaml", prefix + "-console.log", + // PrepareExport's live phase writes the reconciled + // Gateway snapshot here; export copies it into + // BUNDLE_DIR/gateway-state.json. Without this entry, + // teardown leaves the JSON behind and a subsequent + // build picks up a stale dump from the prior cluster. + prefix + "-gateway-state.json", } } diff --git a/pkg/provision/qemu/qemu_test.go b/pkg/provision/qemu/qemu_test.go index 627733d..d637524 100644 --- a/pkg/provision/qemu/qemu_test.go +++ b/pkg/provision/qemu/qemu_test.go @@ -174,14 +174,16 @@ func TestTeardownConfig_DeletesKeypair(t *testing.T) { cfg := defaultedRuntimeConfig(t) cfg.CacheDir = t.TempDir() cfg.Kubeconfig = "" - for _, name := range []string{ + artefacts := []string{ cfg.Name + ".qcow2", cfg.Name + "-ssh", cfg.Name + "-ssh.pub", cfg.Name + "-seed.img", cfg.Name + "-cloud-init.yaml", cfg.Name + "-console.log", - } { + cfg.Name + "-gateway-state.json", + } + for _, name := range artefacts { if err := os.WriteFile(filepath.Join(cfg.CacheDir, name), []byte("x"), 0o600); err != nil { t.Fatal(err) } @@ -189,14 +191,7 @@ func TestTeardownConfig_DeletesKeypair(t *testing.T) { if err := TeardownConfig(cfg, false, nil); err != nil { t.Fatal(err) } - for _, name := range []string{ - cfg.Name + ".qcow2", - cfg.Name + "-ssh", - cfg.Name + "-ssh.pub", - cfg.Name + "-seed.img", - cfg.Name + "-cloud-init.yaml", - cfg.Name + "-console.log", - } { + for _, name := range artefacts { if _, err := os.Stat(filepath.Join(cfg.CacheDir, name)); err == nil { t.Errorf("teardown should remove %s", name) } @@ -222,9 +217,11 @@ func TestTeardownConfig_KeepDiskKeepsKeypair(t *testing.T) { } } -// TestPerVMArtefacts pins the path layout. Provision creates these -// files; teardown removes them. A drift between the two leaves -// stale state that breaks the no-key-reuse contract. +// TestPerVMArtefacts pins the path layout. Provision and +// PrepareExport create these files; teardown removes them. A +// drift between the two leaves stale state that breaks the +// no-key-reuse contract OR ships a stale gateway-state dump +// in the next prepare-export bundle. func TestPerVMArtefacts(t *testing.T) { got := perVMArtefacts("/c", "n") want := []string{ @@ -234,6 +231,7 @@ func TestPerVMArtefacts(t *testing.T) { "/c/n-seed.img", "/c/n-cloud-init.yaml", "/c/n-console.log", + "/c/n-gateway-state.json", } if len(got) != len(want) { t.Fatalf("got %v, want %v", got, want) From 1a0067fa0186567a71821d20dcaf3fec0292c395 Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Thu, 7 May 2026 05:43:35 +0000 Subject: [PATCH 6/7] feat(gateway): hostnames subcommand for LB SAN derivation The appliance build flow's external HTTPS LoadBalancer stage needs a SAN list for its self-signed cert. Today the operator declares it twice -- once in HTTPRoute manifests, once in TLS_DOMAINS=foo,bar -- and drift between the two means either the cert covers hostnames the cluster doesn't serve, or the cluster serves hostnames the cert doesn't cover. Add `y-cluster gateway hostnames` that reads the existing `gateway state` snapshot and projects unique non-wildcard hostnames from the typed Summary (.summary.listeners[].hosts[].hostname). Default output is one hostname per line; --csv joins with `,` -- exactly the format TLS_DOMAINS / do_tls_frontend expect. Implementation is a small pure-Go helper (`Hostnames(*State) []string`) plus cobra wiring next to `gateway state`. The filter logic (skip "" and "*", dedupe across listeners) is unit-tested. The "*" sentinel from Summary is the catch-all bucket for routes that declare no `.spec.hostnames` -- not a hostname suitable for a cert SAN. Wildcard support (e.g. *.example.com literal SANs) is out of scope. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/y-cluster/gateway.go | 54 +++++++++++++ pkg/gateway/hostnames.go | 47 +++++++++++ pkg/gateway/hostnames_test.go | 144 ++++++++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+) create mode 100644 pkg/gateway/hostnames.go create mode 100644 pkg/gateway/hostnames_test.go diff --git a/cmd/y-cluster/gateway.go b/cmd/y-cluster/gateway.go index be2c35f..aabbf5e 100644 --- a/cmd/y-cluster/gateway.go +++ b/cmd/y-cluster/gateway.go @@ -3,6 +3,7 @@ package main import ( "encoding/json" "fmt" + "strings" "github.com/spf13/cobra" @@ -23,10 +24,63 @@ func gatewayCmd() *cobra.Command { Short: "Inspect and manage the y-cluster Gateway state", } cmd.AddCommand(gatewayStateCmd()) + cmd.AddCommand(gatewayHostnamesCmd()) cmd.AddCommand(gatewayClearDNSHintIPCmd()) return cmd } +// gatewayHostnamesCmd extracts a deduped, sorted list of the +// non-wildcard hostnames the cluster's HTTPRoutes / GRPCRoutes +// declare, derived from the same `gateway.Fetch` snapshot the +// `state` subcommand emits. +// +// The canonical consumer is the appliance build script's TLS LB +// stage: `TLS_DOMAINS=$(y-cluster gateway hostnames --csv)` +// makes the LB cert's SAN list match exactly what the cluster +// serves, eliminating drift between the operator's env var and +// the cluster's HTTPRoute manifests. +// +// Default output is one hostname per line (works with `xargs`, +// `mapfile`, `read`); --csv joins with `,` for the SAN-list +// shape `do_tls_frontend` expects. +func gatewayHostnamesCmd() *cobra.Command { + var contextName string + var csv bool + cmd := &cobra.Command{ + Use: "hostnames", + Short: "Print non-wildcard hostnames from the cluster's gateway state", + Long: `Reads ` + "`gateway state`" + ` and projects unique non-wildcard hostnames +from .summary.listeners[].hosts[].hostname. Default output is one +hostname per line, sorted; --csv joins with "," for the format +TLS_DOMAINS / do_tls_frontend consume. + +Use case: derive an LB cert's SAN list directly from the cluster's +routing plane so the cert and the routes can't drift. + + TLS_DOMAINS=$(y-cluster gateway hostnames --context=local --csv)`, + Args: cobra.NoArgs, + RunE: func(c *cobra.Command, _ []string) error { + st, err := gateway.Fetch(c.Context(), contextName) + if err != nil { + return err + } + hosts := gateway.Hostnames(st) + out := c.OutOrStdout() + if csv { + fmt.Fprintln(out, strings.Join(hosts, ",")) + return nil + } + for _, h := range hosts { + fmt.Fprintln(out, h) + } + return nil + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + cmd.Flags().BoolVar(&csv, "csv", false, "join hostnames with comma instead of newline") + return cmd +} + func gatewayStateCmd() *cobra.Command { var contextName string cmd := &cobra.Command{ diff --git a/pkg/gateway/hostnames.go b/pkg/gateway/hostnames.go new file mode 100644 index 0000000..5c7125a --- /dev/null +++ b/pkg/gateway/hostnames.go @@ -0,0 +1,47 @@ +package gateway + +import "sort" + +// Hostnames returns the deduped, sorted hostname list reachable +// through the cluster's gateways, derived from the typed Summary +// projection on State. The set is exactly: +// +// { h.Hostname for l in state.Summary.Listeners +// for h in l.Hosts +// if h.Hostname not in {"", "*"} } +// +// Used by the `y-cluster gateway hostnames` subcommand and by +// downstream consumers (e.g. an external LoadBalancer setup that +// needs the SAN list for its TLS cert). +// +// "*" is the catch-all bucket (route declared no `.spec.hostnames`) +// and is dropped: a wildcard-SAN cert is a different concern and +// out of scope here. Routes that explicitly declare a wildcard +// hostname like "*.example.com" pass through verbatim -- +// consumers that can't handle them should filter further. +// +// Reading from Summary (rather than re-walking HTTPRoutes / +// GRPCRoutes here) keeps the parent-ref / listener-attachment +// filtering in one place. Summary is built deterministically +// from the same State, so this function is a pure projection. +func Hostnames(s *State) []string { + out := []string{} + if s == nil || s.Summary == nil { + return out + } + seen := map[string]struct{}{} + for _, l := range s.Summary.Listeners { + for _, h := range l.Hosts { + if h.Hostname == "" || h.Hostname == "*" { + continue + } + if _, ok := seen[h.Hostname]; ok { + continue + } + seen[h.Hostname] = struct{}{} + out = append(out, h.Hostname) + } + } + sort.Strings(out) + return out +} diff --git a/pkg/gateway/hostnames_test.go b/pkg/gateway/hostnames_test.go new file mode 100644 index 0000000..a80155f --- /dev/null +++ b/pkg/gateway/hostnames_test.go @@ -0,0 +1,144 @@ +package gateway + +import ( + "reflect" + "testing" +) + +// TestHostnames_Nil documents the nil-safety contract: callers +// can pass nil State (or a State with nil Summary) without +// panicking and get a non-nil empty slice back. JSON consumers +// can index without checks; bash consumers see no output. +func TestHostnames_Nil(t *testing.T) { + if got := Hostnames(nil); got == nil { + t.Errorf("nil input should yield empty slice, not nil") + } else if len(got) != 0 { + t.Errorf("nil input should yield empty slice, got %v", got) + } + if got := Hostnames(&State{}); got == nil || len(got) != 0 { + t.Errorf("State with nil Summary should yield empty slice, got %v", got) + } +} + +// TestHostnames_Sorted pins the ordering contract: bash +// consumers piping through `sort -u | diff` would notice +// non-determinism otherwise. +func TestHostnames_Sorted(t *testing.T) { + s := &State{Summary: &Summary{Listeners: []SummaryListener{{ + Hosts: []SummaryHost{ + {Hostname: "zeta.example.com"}, + {Hostname: "alpha.example.com"}, + {Hostname: "mid.example.com"}, + }, + }}}} + got := Hostnames(s) + want := []string{"alpha.example.com", "mid.example.com", "zeta.example.com"} + if !reflect.DeepEqual(got, want) { + t.Errorf("got %v, want %v", got, want) + } +} + +// TestHostnames_DropsWildcardBucket pins that the "*" host +// (Summary's bucket for routes declaring no hostname) is not a +// hostname suitable for a cert SAN -- it's the in-snapshot +// sentinel for catch-all routes. Cert generators should not see +// it. +func TestHostnames_DropsWildcardBucket(t *testing.T) { + s := &State{Summary: &Summary{Listeners: []SummaryListener{{ + Hosts: []SummaryHost{ + {Hostname: "real.example.com"}, + {Hostname: "*"}, + {Hostname: ""}, + }, + }}}} + got := Hostnames(s) + if len(got) != 1 || got[0] != "real.example.com" { + t.Errorf("got %v, want [real.example.com]", got) + } +} + +// TestHostnames_DedupAcrossListeners pins that a hostname +// appearing on multiple listeners (e.g. the same HTTPRoute +// attached to both http and https) appears once in the output. +// A duplicated SAN entry isn't strictly invalid in a cert but +// makes downstream review noisy. +func TestHostnames_DedupAcrossListeners(t *testing.T) { + s := &State{Summary: &Summary{Listeners: []SummaryListener{ + { + Name: "http", + Hosts: []SummaryHost{ + {Hostname: "site.example.com"}, + }, + }, + { + Name: "https", + Hosts: []SummaryHost{ + {Hostname: "site.example.com"}, + {Hostname: "extra.example.com"}, + }, + }, + }}} + got := Hostnames(s) + want := []string{"extra.example.com", "site.example.com"} + if !reflect.DeepEqual(got, want) { + t.Errorf("got %v, want %v", got, want) + } +} + +// TestHostnames_FromHTTPRouteRoundTrip end-to-end: build a State +// with raw HTTPRoute payloads, call BuildSummary (which routes +// through to Summary.Listeners[].Hosts[]), then Hostnames. This +// guards against a regression where Summary's hostname-bucket +// shape changes; consumers using Hostnames don't have to track +// the intermediate. +func TestHostnames_FromHTTPRouteRoundTrip(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + HTTPRoutes: []HTTPRoute{{ + Namespace: "keycloak-v3", + Name: "keycloak-admin", + ParentRefs: rawJSON(t, `[{"name":"y-cluster","namespace":"y-cluster"}]`), + Hostnames: []string{"keycloak-admin", "keycloak-admin.example.com"}, + Rules: rawJSON(t, `[{"backendRefs":[{"name":"keycloak"}]}]`), + }}, + } + st.Summary = BuildSummary(st) + + got := Hostnames(st) + want := []string{"keycloak-admin", "keycloak-admin.example.com"} + if !reflect.DeepEqual(got, want) { + t.Errorf("got %v, want %v", got, want) + } +} + +// TestHostnames_FromGRPCRouteRoundTrip mirrors the HTTP case for +// gRPC: a GRPCRoute attached to the cluster Gateway contributes +// its hostname to the LB SAN list. (gRPC over TLS through the +// same LB is a real shape; even if rare today, including it +// here costs nothing.) +func TestHostnames_FromGRPCRouteRoundTrip(t *testing.T) { + st := &State{ + Gateways: []Gateway{{ + Namespace: "y-cluster", + Name: "y-cluster", + Listeners: []Listener{{Name: "https", Port: 443, Protocol: "HTTPS"}}, + }}, + GRPCRoutes: []GRPCRoute{{ + Namespace: "live-v3", + Name: "live-grpc", + ParentRefs: rawJSON(t, `[{"name":"y-cluster","namespace":"y-cluster"}]`), + Hostnames: []string{"live.example.com"}, + Rules: rawJSON(t, `[{"backendRefs":[{"name":"live"}]}]`), + }}, + } + st.Summary = BuildSummary(st) + + got := Hostnames(st) + if len(got) != 1 || got[0] != "live.example.com" { + t.Errorf("got %v, want [live.example.com]", got) + } +} From 7808a32a1e17b39f5a9864fc2f3273006395f786 Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Thu, 7 May 2026 06:23:22 +0000 Subject: [PATCH 7/7] refactor(gateway): wrap xForwardedFor settings on SummaryListener Two loose props at the listener root (numTrustedHops, trustedCIDRs) didn't carry their context: a consumer reading the JSON saw the values but had to know they were XFF settings, not generic listener tuning. Group them under a `xForwardedFor` wrapper that mirrors the source CRD shape (`spec.clientIPDetection.xForwardedFor` on a ClientTrafficPolicy), at one wrapping level. Single-level wrap matches the only currently-defined detection mechanism in envoy-gateway; if `customHeader` (the alternate clientIPDetection mechanism) becomes relevant, it lands as a sibling at the same level. Schema regenerated; tests updated to walk the new path (`l.XForwardedFor.NumTrustedHops` / `.TrustedCIDRs`). Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/gateway/state.schema.json | 25 +++++++++---- pkg/gateway/summary.go | 70 ++++++++++++++++++----------------- pkg/gateway/summary_test.go | 42 ++++++++++++--------- 3 files changed, 77 insertions(+), 60 deletions(-) diff --git a/pkg/gateway/state.schema.json b/pkg/gateway/state.schema.json index 07e96e8..6e06d49 100644 --- a/pkg/gateway/state.schema.json +++ b/pkg/gateway/state.schema.json @@ -462,9 +462,6 @@ "name": { "type": "string" }, - "numTrustedHops": { - "type": "integer" - }, "port": { "type": "integer" }, @@ -474,11 +471,8 @@ "protocol": { "type": "string" }, - "trustedCIDRs": { - "items": { - "type": "string" - }, - "type": "array" + "xForwardedFor": { + "$ref": "#/$defs/XForwardedForSettings" } }, "required": [ @@ -576,6 +570,21 @@ "name" ], "type": "object" + }, + "XForwardedForSettings": { + "additionalProperties": false, + "properties": { + "numTrustedHops": { + "type": "integer" + }, + "trustedCIDRs": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object" } }, "$id": "https://yolean.se/y-cluster/schema/gateway-state.schema.json", diff --git a/pkg/gateway/summary.go b/pkg/gateway/summary.go index e05982c..418ea5d 100644 --- a/pkg/gateway/summary.go +++ b/pkg/gateway/summary.go @@ -56,23 +56,20 @@ type SummaryListener struct { // "envoy-gateway accepted this listener and is serving it". Programmed bool `json:"programmed"` - // NumTrustedHops, when non-nil, is the per-listener - // X-Forwarded-For trust depth applied via a - // ClientTrafficPolicy. Pointer (rather than int + omitempty) - // so a deliberate 0 is distinguishable from "policy absent". - // Listener-level placement is correct for envoy-gateway - // today: ClientTrafficPolicy targets a Gateway (with - // optional sectionName), not individual route matches. - NumTrustedHops *int `json:"numTrustedHops,omitempty"` - - // TrustedCIDRs lists the per-listener X-Forwarded-For - // trusted-source CIDRs from the same ClientTrafficPolicy. - // numTrustedHops and trustedCIDRs are alternative tuning - // knobs for the same reverse-proxy-trust problem (count - // hops vs. trust source ranges); newer envoy-gateway - // versions treat them as mutually exclusive on a single - // policy, but a snapshot may surface either or neither. - TrustedCIDRs []string `json:"trustedCIDRs,omitempty"` + // XForwardedFor, when non-nil, is the per-listener + // X-Forwarded-For trust configuration applied via a + // ClientTrafficPolicy. Mirrors the source CRD shape + // (`spec.clientIPDetection.xForwardedFor` on the policy) + // at one wrapping level so a consumer reading the JSON + // sees the field in context rather than two loose + // `numTrustedHops` / `trustedCIDRs` props at the listener + // root. Listener-level placement is correct for + // envoy-gateway today: ClientTrafficPolicy targets a + // Gateway (with optional sectionName), not individual + // route matches. If envoy-gateway's `customHeader` + // alternate detection mechanism becomes relevant, it + // lands as a sibling at the same level here. + XForwardedFor *XForwardedForSettings `json:"xForwardedFor,omitempty"` // Hosts groups routes by the hostname declared on the // underlying HTTPRoute / GRPCRoute. Routes that declare no @@ -81,6 +78,23 @@ type SummaryListener struct { Hosts []SummaryHost `json:"hosts"` } +// XForwardedForSettings projects the +// `spec.clientIPDetection.xForwardedFor` block of a +// ClientTrafficPolicy that targets the surrounding listener. +// Pointer-only fields (`numTrustedHops`) preserve "policy +// absent" vs. "policy says 0"; slices are nil when the policy +// doesn't declare them. +// +// envoy-gateway treats `numTrustedHops` and `trustedCIDRs` as +// alternative tuning knobs for the same reverse-proxy-trust +// problem (count hops vs. trust source ranges); newer +// versions treat them as mutually exclusive on a single +// policy, but a snapshot may surface either or neither. +type XForwardedForSettings struct { + NumTrustedHops *int `json:"numTrustedHops,omitempty"` + TrustedCIDRs []string `json:"trustedCIDRs,omitempty"` +} + // SummaryHost groups routes by hostname under one listener. type SummaryHost struct { // Hostname is the literal value declared on the source @@ -194,11 +208,7 @@ func BuildSummary(s *State) *Summary { Hosts: collectHosts(s, gw, l), } if xff := xForwardedForFor(s.ClientTrafficPolicies, gw, l); xff != nil { - if xff.NumTrustedHops != nil { - v := *xff.NumTrustedHops - sl.NumTrustedHops = &v - } - sl.TrustedCIDRs = xff.TrustedCIDRs + sl.XForwardedFor = xff } out.Listeners = append(out.Listeners, sl) } @@ -206,14 +216,6 @@ func BuildSummary(s *State) *Summary { return out } -// xffSettings is the projection of a ClientTrafficPolicy's -// spec.clientIPDetection.xForwardedFor block. Returned by -// xForwardedForFor when at least one CTP applies to the -// listener; nil when no policy applies. -type xffSettings struct { - NumTrustedHops *int - TrustedCIDRs []string -} // listenerProgrammed pulls the Programmed=True signal from the // matching ListenerStatus row. Default false when the controller @@ -229,15 +231,15 @@ func listenerProgrammed(gw Gateway, listenerName string) bool { // xForwardedForFor walks the ClientTrafficPolicy list and // returns the first applicable policy's xForwardedFor block -// projected into xffSettings. Returns nil when no CTP applies -// to this listener. +// projected into XForwardedForSettings. Returns nil when no +// CTP applies to this listener. // // "First" is deterministic because Fetch sorts policies by // (namespace, name) before populating State. Multiple // overlapping policies are rare and a reconciliation conflict // in their own right; consumers wanting full disambiguation // drill into State.ClientTrafficPolicies. -func xForwardedForFor(ctps []ClientTrafficPolicy, gw Gateway, l Listener) *xffSettings { +func xForwardedForFor(ctps []ClientTrafficPolicy, gw Gateway, l Listener) *XForwardedForSettings { for _, ctp := range ctps { if !ctpAppliesTo(ctp, gw, l) { continue @@ -257,7 +259,7 @@ func xForwardedForFor(ctps []ClientTrafficPolicy, gw Gateway, l Listener) *xffSe if xff.NumTrustedHops == nil && len(xff.TrustedCIDRs) == 0 { continue } - return &xffSettings{ + return &XForwardedForSettings{ NumTrustedHops: xff.NumTrustedHops, TrustedCIDRs: xff.TrustedCIDRs, } diff --git a/pkg/gateway/summary_test.go b/pkg/gateway/summary_test.go index bd715fe..1c84d48 100644 --- a/pkg/gateway/summary_test.go +++ b/pkg/gateway/summary_test.go @@ -61,8 +61,8 @@ func TestBuildSummary_HTTPRouteServiceBackend(t *testing.T) { if l.Port != 443 || l.Protocol != "HTTPS" || !l.Programmed { t.Errorf("listener port/proto/programmed: %+v", l) } - if l.NumTrustedHops != nil { - t.Errorf("numTrustedHops should be nil with no CTP, got %d", *l.NumTrustedHops) + if l.XForwardedFor != nil { + t.Errorf("xForwardedFor should be nil with no CTP, got %+v", l.XForwardedFor) } if len(l.Hosts) != 1 || l.Hosts[0].Hostname != "keycloak-admin" { t.Fatalf("hosts: %+v", l.Hosts) @@ -257,8 +257,8 @@ func TestBuildSummary_NumTrustedHopsGatewayScope(t *testing.T) { } for _, l := range BuildSummary(st).Listeners { - if l.NumTrustedHops == nil || *l.NumTrustedHops != 1 { - t.Errorf("listener %q numTrustedHops: %v", l.Name, l.NumTrustedHops) + if l.XForwardedFor == nil || l.XForwardedFor.NumTrustedHops == nil || *l.XForwardedFor.NumTrustedHops != 1 { + t.Errorf("listener %q xForwardedFor: %+v", l.Name, l.XForwardedFor) } } } @@ -289,11 +289,11 @@ func TestBuildSummary_NumTrustedHopsListenerScope(t *testing.T) { for _, l := range BuildSummary(st).Listeners { listenersByName[l.Name] = l } - if listenersByName["http"].NumTrustedHops != nil { - t.Errorf("http listener should have nil numTrustedHops: %v", listenersByName["http"].NumTrustedHops) + if listenersByName["http"].XForwardedFor != nil { + t.Errorf("http listener should have nil xForwardedFor: %+v", listenersByName["http"].XForwardedFor) } - if h := listenersByName["https"].NumTrustedHops; h == nil || *h != 2 { - t.Errorf("https listener numTrustedHops: %v", h) + if x := listenersByName["https"].XForwardedFor; x == nil || x.NumTrustedHops == nil || *x.NumTrustedHops != 2 { + t.Errorf("https listener xForwardedFor: %+v", x) } } @@ -321,16 +321,19 @@ func TestBuildSummary_TrustedCIDRs(t *testing.T) { } l := BuildSummary(st).Listeners[0] - if l.NumTrustedHops != nil { - t.Errorf("numTrustedHops should be nil with trustedCIDRs-only CTP, got %d", *l.NumTrustedHops) + if l.XForwardedFor == nil { + t.Fatalf("xForwardedFor should be set with trustedCIDRs-only CTP") + } + if l.XForwardedFor.NumTrustedHops != nil { + t.Errorf("numTrustedHops should be nil with trustedCIDRs-only CTP, got %d", *l.XForwardedFor.NumTrustedHops) } want := []string{"10.0.0.0/8", "100.64.0.0/10"} - if len(l.TrustedCIDRs) != len(want) { - t.Fatalf("trustedCIDRs len: got %v want %v", l.TrustedCIDRs, want) + if len(l.XForwardedFor.TrustedCIDRs) != len(want) { + t.Fatalf("trustedCIDRs len: got %v want %v", l.XForwardedFor.TrustedCIDRs, want) } for i, c := range want { - if l.TrustedCIDRs[i] != c { - t.Errorf("trustedCIDRs[%d]=%q want %q", i, l.TrustedCIDRs[i], c) + if l.XForwardedFor.TrustedCIDRs[i] != c { + t.Errorf("trustedCIDRs[%d]=%q want %q", i, l.XForwardedFor.TrustedCIDRs[i], c) } } } @@ -359,11 +362,14 @@ func TestBuildSummary_TrustedCIDRsAndNumTrustedHops(t *testing.T) { } l := BuildSummary(st).Listeners[0] - if l.NumTrustedHops == nil || *l.NumTrustedHops != 2 { - t.Errorf("numTrustedHops: %v", l.NumTrustedHops) + if l.XForwardedFor == nil { + t.Fatalf("xForwardedFor should be set when CTP declares both knobs") + } + if l.XForwardedFor.NumTrustedHops == nil || *l.XForwardedFor.NumTrustedHops != 2 { + t.Errorf("numTrustedHops: %v", l.XForwardedFor.NumTrustedHops) } - if len(l.TrustedCIDRs) != 1 || l.TrustedCIDRs[0] != "10.0.0.0/8" { - t.Errorf("trustedCIDRs: %v", l.TrustedCIDRs) + if len(l.XForwardedFor.TrustedCIDRs) != 1 || l.XForwardedFor.TrustedCIDRs[0] != "10.0.0.0/8" { + t.Errorf("trustedCIDRs: %v", l.XForwardedFor.TrustedCIDRs) } }