diff --git a/internal/managementrouter/health_get.go b/internal/managementrouter/health_get.go new file mode 100644 index 000000000..2db846c85 --- /dev/null +++ b/internal/managementrouter/health_get.go @@ -0,0 +1,32 @@ +package managementrouter + +import ( + "encoding/json" + "net/http" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +type GetHealthResponse struct { + Alerting *k8s.AlertingHealth `json:"alerting,omitempty"` +} + +func (hr *httpRouter) GetHealth(w http.ResponseWriter, r *http.Request) { + resp := GetHealthResponse{} + + if hr.managementClient != nil { + health, err := hr.managementClient.GetAlertingHealth(r.Context()) + if err != nil { + handleError(w, err) + return + } + resp.Alerting = &health + } + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Cache-Control", "no-store") + w.WriteHeader(http.StatusOK) + if err := json.NewEncoder(w).Encode(resp); err != nil { + log.WithError(err).Warn("failed to encode health response") + } +} diff --git a/internal/managementrouter/health_get_test.go b/internal/managementrouter/health_get_test.go new file mode 100644 index 000000000..1dc7976a2 --- /dev/null +++ b/internal/managementrouter/health_get_test.go @@ -0,0 +1,261 @@ +package managementrouter_test + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "testing" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" +) + +// stubClient is a configurable stub implementing management.Client. +// Fields are set per-test; all methods default to no-op returns. +type stubClient struct { + getRules func(ctx context.Context, req k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) + alertingHealth func(ctx context.Context) (k8s.AlertingHealth, error) +} + +func (s *stubClient) ListRules(_ context.Context, _ management.PrometheusRuleOptions, _ management.AlertRuleOptions, _ management.PaginationOptions) (management.ListRulesResult, error) { + return management.ListRulesResult{}, nil +} +func (s *stubClient) GetRuleById(_ context.Context, _ string) (monitoringv1.Rule, error) { + return monitoringv1.Rule{}, nil +} +func (s *stubClient) CreateUserDefinedAlertRule(_ context.Context, _ monitoringv1.Rule, _ management.PrometheusRuleOptions) (string, error) { + return "", nil +} +func (s *stubClient) CreatePlatformAlertRule(_ context.Context, _ monitoringv1.Rule) (string, error) { + return "", nil +} +func (s *stubClient) UpdateUserDefinedAlertRule(_ context.Context, _ string, _ monitoringv1.Rule) (string, error) { + return "", nil +} +func (s *stubClient) DeleteUserDefinedAlertRuleById(_ context.Context, _ string) error { return nil } +func (s *stubClient) UpdatePlatformAlertRule(_ context.Context, _ string, _ monitoringv1.Rule) error { + return nil +} +func (s *stubClient) DropPlatformAlertRule(_ context.Context, _ string) error { return nil } +func (s *stubClient) RestorePlatformAlertRule(_ context.Context, _ string) error { return nil } +func (s *stubClient) GetAlerts(_ context.Context, _ k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + return nil, nil +} +func (s *stubClient) GetRules(ctx context.Context, req k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + if s.getRules != nil { + return s.getRules(ctx, req) + } + return []k8s.PrometheusRuleGroup{}, nil +} +func (s *stubClient) GetAlertingHealth(ctx context.Context) (k8s.AlertingHealth, error) { + if s.alertingHealth != nil { + return s.alertingHealth(ctx) + } + return k8s.AlertingHealth{}, nil +} +func (s *stubClient) UpdateAlertRuleClassification(_ context.Context, _ management.UpdateRuleClassificationRequest) error { + return nil +} +func (s *stubClient) BulkUpdateAlertRuleClassification(_ context.Context, _ []management.UpdateRuleClassificationRequest) []error { + return nil +} + +// newStubRouter builds a router backed by stub and adds a Bearer token header +// to requests via the helper get/getNoAuth methods. +func newStubRouter(stub *stubClient) http.Handler { + return managementrouter.New(stub) +} + +func stubGet(router http.Handler, url string) *httptest.ResponseRecorder { + req := httptest.NewRequest(http.MethodGet, url, nil) + req.Header.Set("Authorization", "Bearer test-token") + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + return w +} + +// --- health_get tests --- + +func healthStub() *stubClient { + return &stubClient{ + alertingHealth: func(_ context.Context) (k8s.AlertingHealth, error) { + return k8s.AlertingHealth{ + Platform: &k8s.AlertingStackHealth{ + Prometheus: k8s.AlertingRouteHealth{Name: "prometheus-k8s", Namespace: "openshift-monitoring", Status: k8s.RouteReachable}, + Alertmanager: k8s.AlertingRouteHealth{Name: "alertmanager-main", Namespace: "openshift-monitoring", Status: k8s.RouteReachable}, + }, + UserWorkloadEnabled: true, + UserWorkload: &k8s.AlertingStackHealth{ + Prometheus: k8s.AlertingRouteHealth{Name: "prometheus-user-workload", Namespace: "openshift-user-workload-monitoring", Status: k8s.RouteReachable}, + Alertmanager: k8s.AlertingRouteHealth{Name: "alertmanager-user-workload", Namespace: "openshift-user-workload-monitoring", Status: k8s.RouteReachable}, + }, + }, nil + }, + } +} + +func TestGetHealth_Returns200(t *testing.T) { + router := newStubRouter(healthStub()) + w := stubGet(router, "/api/v1/alerting/health") + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } +} + +func TestGetHealth_ReturnsAlertingStructure(t *testing.T) { + router := newStubRouter(healthStub()) + w := stubGet(router, "/api/v1/alerting/health") + + var response managementrouter.GetHealthResponse + if err := json.NewDecoder(w.Body).Decode(&response); err != nil { + t.Fatalf("decode error: %v", err) + } + if response.Alerting == nil { + t.Error("expected non-nil Alerting in response") + } +} + +func TestGetHealth_Returns500OnError(t *testing.T) { + stub := &stubClient{ + alertingHealth: func(_ context.Context) (k8s.AlertingHealth, error) { + return k8s.AlertingHealth{}, fmt.Errorf("connection refused") + }, + } + router := newStubRouter(stub) + w := stubGet(router, "/api/v1/alerting/health") + + if w.Code != http.StatusInternalServerError { + t.Fatalf("expected 500, got %d: %s", w.Code, w.Body) + } + var errResp map[string]string + if err := json.NewDecoder(w.Body).Decode(&errResp); err != nil { + t.Fatalf("decode error: %v", err) + } + if errResp["error"] != "An unexpected error occurred" { + t.Errorf("unexpected error message: %q", errResp["error"]) + } +} + +// --- rules_get tests --- + +func TestGetRules_ParsesFlatQueryParams(t *testing.T) { + stub := &stubClient{} + var captured k8s.GetRulesRequest + stub.getRules = func(_ context.Context, req k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + captured = req + return []k8s.PrometheusRuleGroup{}, nil + } + router := newStubRouter(stub) + w := stubGet(router, "/api/v1/alerting/rules?namespace=ns1&severity=critical&state=firing&team=sre") + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + if captured.State != "firing" { + t.Errorf("expected state=firing, got %q", captured.State) + } + for k, want := range map[string]string{"namespace": "ns1", "severity": "critical", "team": "sre"} { + if got := captured.Labels[k]; got != want { + t.Errorf("label[%s]: want %q, got %q", k, want, got) + } + } +} + +func TestGetRules_ReturnsGroupsInResponse(t *testing.T) { + stub := &stubClient{ + getRules: func(_ context.Context, _ k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + return []k8s.PrometheusRuleGroup{{Name: "group-a"}}, nil + }, + } + router := newStubRouter(stub) + w := stubGet(router, "/api/v1/alerting/rules") + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + if ct := w.Header().Get("Content-Type"); ct != "application/json" { + t.Errorf("expected application/json, got %q", ct) + } + var response managementrouter.GetRulesResponse + if err := json.NewDecoder(w.Body).Decode(&response); err != nil { + t.Fatalf("decode error: %v", err) + } + if len(response.Data.Groups) != 1 || response.Data.Groups[0].Name != "group-a" { + t.Errorf("unexpected groups: %v", response.Data.Groups) + } +} + +func TestGetRules_WarningWhenUserWorkloadPromRouteMissing(t *testing.T) { + stub := &stubClient{ + alertingHealth: func(_ context.Context) (k8s.AlertingHealth, error) { + return k8s.AlertingHealth{ + UserWorkloadEnabled: true, + UserWorkload: &k8s.AlertingStackHealth{ + Prometheus: k8s.AlertingRouteHealth{Status: k8s.RouteNotFound}, + }, + }, nil + }, + } + router := newStubRouter(stub) + w := stubGet(router, "/api/v1/alerting/rules") + + var response managementrouter.GetRulesResponse + if err := json.NewDecoder(w.Body).Decode(&response); err != nil { + t.Fatalf("decode error: %v", err) + } + found := false + for _, warn := range response.Warnings { + if warn == "user workload Prometheus route is missing" { + found = true + break + } + } + if !found { + t.Errorf("expected Prometheus route warning, got: %v", response.Warnings) + } +} + +func TestGetRules_SuppressesWarningWhenFallbackHealthy(t *testing.T) { + stub := &stubClient{ + alertingHealth: func(_ context.Context) (k8s.AlertingHealth, error) { + return k8s.AlertingHealth{ + UserWorkloadEnabled: true, + UserWorkload: &k8s.AlertingStackHealth{ + Prometheus: k8s.AlertingRouteHealth{Status: k8s.RouteUnreachable, FallbackReachable: true}, + }, + }, nil + }, + } + router := newStubRouter(stub) + w := stubGet(router, "/api/v1/alerting/rules") + + var response managementrouter.GetRulesResponse + if err := json.NewDecoder(w.Body).Decode(&response); err != nil { + t.Fatalf("decode error: %v", err) + } + if len(response.Warnings) != 0 { + t.Errorf("expected no warnings, got: %v", response.Warnings) + } +} + +func TestGetRules_Returns500OnError(t *testing.T) { + stub := &stubClient{ + getRules: func(_ context.Context, _ k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + return nil, fmt.Errorf("connection error") + }, + } + router := newStubRouter(stub) + w := stubGet(router, "/api/v1/alerting/rules") + + if w.Code != http.StatusInternalServerError { + t.Fatalf("expected 500, got %d: %s", w.Code, w.Body) + } + if body := w.Body.String(); !containsStr(body, "An unexpected error occurred") { + t.Errorf("expected error message in body, got: %s", body) + } +} diff --git a/internal/managementrouter/router.go b/internal/managementrouter/router.go index 93326daeb..d5e1688b9 100644 --- a/internal/managementrouter/router.go +++ b/internal/managementrouter/router.go @@ -40,10 +40,12 @@ func New(managementClient management.Client) *mux.Router { BaseURL: "/api/v1/alerting", BaseRouter: r, }) - // GET /alerts and GET /rules are not yet in the OpenAPI spec; registered - // manually until their respective branches add the spec entries. + // GET /alerts, GET /rules, and GET /health are not yet in the OpenAPI + // spec; registered manually until their respective branches add the spec + // entries. r.HandleFunc("/api/v1/alerting/alerts", hr.GetAlerts).Methods(http.MethodGet) r.HandleFunc("/api/v1/alerting/rules", hr.GetRules).Methods(http.MethodGet) + r.HandleFunc("/api/v1/alerting/health", hr.GetHealth).Methods(http.MethodGet) return r } diff --git a/pkg/management/types.go b/pkg/management/types.go index 310c40328..0f7d71f4b 100644 --- a/pkg/management/types.go +++ b/pkg/management/types.go @@ -49,7 +49,7 @@ type Client interface { // GetRules retrieves Prometheus alerting rules and active alerts GetRules(ctx context.Context, req k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) - // GetAlertingHealth retrieves the alerting stack health status + // GetAlertingHealth retrieves alerting health details GetAlertingHealth(ctx context.Context) (k8s.AlertingHealth, error) } @@ -65,7 +65,6 @@ type PrometheusRuleOptions struct { GroupName string `json:"groupName"` } -// AlertRuleOptions specifies additional filtering options for alert rules type AlertRuleOptions struct { // Name filters alert rules by alert name Name string `json:"name,omitempty"` diff --git a/test/e2e/health_test.go b/test/e2e/health_test.go new file mode 100644 index 000000000..d0485293e --- /dev/null +++ b/test/e2e/health_test.go @@ -0,0 +1,57 @@ +package e2e + +import ( + "context" + "encoding/json" + "net/http" + "testing" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +func TestGetHealth(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + healthURL := f.PluginURL + "/api/v1/alerting/health" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthURL, nil) + if err != nil { + t.Fatalf("Failed to create HTTP request: %v", err) + } + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + t.Fatalf("Failed to make health request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("Expected status 200, got %d", resp.StatusCode) + } + + var healthResp struct { + Alerting *k8s.AlertingHealth `json:"alerting"` + } + if err := json.NewDecoder(resp.Body).Decode(&healthResp); err != nil { + t.Fatalf("Failed to decode health response: %v", err) + } + + if healthResp.Alerting == nil { + t.Fatal("Expected 'alerting' field in health response") + } + + if healthResp.Alerting.Platform == nil { + t.Error("Expected 'platform' field in alerting health") + } + + t.Logf("Health response: userWorkloadEnabled=%v", healthResp.Alerting.UserWorkloadEnabled) + t.Log("GET /health e2e test passed successfully") +}