diff --git a/internal/managementrouter/query_filters.go b/internal/managementrouter/query_filters.go index f8e3e5e9d..5f1d58498 100644 --- a/internal/managementrouter/query_filters.go +++ b/internal/managementrouter/query_filters.go @@ -13,23 +13,55 @@ var validStates = map[string]bool{ "silenced": true, } -// parseStateAndLabels returns the optional state filter and label matches. -// Any query param other than "state" is treated as a label match. -// Returns an error if the state value is not one of the known states. -func parseStateAndLabels(q url.Values) (string, map[string]string, error) { +// reservedQueryKeys lists query parameter names that have special meaning +// and must not be treated as label equality filters. +var reservedQueryKeys = map[string]bool{ + "state": true, + "match[]": true, + "limit": true, + "next_token": true, +} + +// parseStateLabelsAndMatchers returns the optional state filter, label equality +// matches, and Prometheus-style label matchers from the query string. +// +// Reserved keys ("state", "match[]") are handled specially. Every other key is +// treated as a label equality filter (e.g. ?severity=critical). +// +// match[] values follow upstream Prometheus API conventions and may contain +// equality, inequality, regex, or negative-regex matchers: +// +// ?match[]=severity="critical"&match[]=alertname=~"Kube.*" +func parseStateLabelsAndMatchers(q url.Values) (string, map[string]string, []string, error) { state := strings.ToLower(strings.TrimSpace(q.Get("state"))) if !validStates[state] { - return "", nil, fmt.Errorf("invalid state filter %q: must be one of pending, firing, silenced", q.Get("state")) + return "", nil, nil, fmt.Errorf("invalid state filter %q: must be one of pending, firing, silenced", q.Get("state")) } labels := make(map[string]string) for key, vals := range q { - if key == "state" { + if reservedQueryKeys[key] { continue } if len(vals) > 0 && strings.TrimSpace(vals[0]) != "" { labels[strings.TrimSpace(key)] = strings.TrimSpace(vals[0]) } } - return state, labels, nil + + var matchers []string + for _, raw := range q["match[]"] { + v := strings.TrimSpace(raw) + if v != "" { + matchers = append(matchers, v) + } + } + + return state, labels, matchers, nil +} + +// parseStateAndLabels returns the optional state filter and label matches. +// Any query param other than reserved keys is treated as a label match. +func parseStateAndLabels(q url.Values) (string, map[string]string, error) { + state, labels, _, err := parseStateLabelsAndMatchers(q) + return state, labels, err } diff --git a/internal/managementrouter/query_filters_test.go b/internal/managementrouter/query_filters_test.go new file mode 100644 index 000000000..e417245e1 --- /dev/null +++ b/internal/managementrouter/query_filters_test.go @@ -0,0 +1,166 @@ +package managementrouter + +import ( + "net/url" + "testing" +) + +func TestParseStateLabelsAndMatchers(t *testing.T) { + tests := []struct { + name string + query string + wantState string + wantLabels map[string]string + wantMatchers []string + wantMatchersLen int + wantErr bool + }{ + { + name: "empty query", + query: "", + wantState: "", + wantLabels: map[string]string{}, + wantMatchers: nil, + }, + { + name: "state only", + query: "state=firing", + wantState: "firing", + wantLabels: map[string]string{}, + }, + { + name: "flat labels only", + query: "severity=critical&namespace=openshift-monitoring", + wantState: "", + wantLabels: map[string]string{ + "severity": "critical", + "namespace": "openshift-monitoring", + }, + }, + { + name: "match[] only with equality", + query: `match[]=severity="critical"`, + wantState: "", + wantLabels: map[string]string{}, + wantMatchers: []string{ + `severity="critical"`, + }, + }, + { + name: "match[] with regex", + query: `match[]=alertname=~"Kube.*CPU.*"`, + wantState: "", + wantLabels: map[string]string{}, + wantMatchers: []string{ + `alertname=~"Kube.*CPU.*"`, + }, + }, + { + name: "multiple match[] values", + query: `match[]=severity="critical"&match[]=namespace="openshift-monitoring"`, + wantState: "", + wantLabels: map[string]string{}, + wantMatchersLen: 2, + }, + { + name: "mixed flat labels and match[]", + query: `state=firing&team=sre&match[]=severity=~"critical|warning"`, + wantState: "firing", + wantLabels: map[string]string{ + "team": "sre", + }, + wantMatchers: []string{ + `severity=~"critical|warning"`, + }, + }, + { + name: "match[] is not treated as a label", + query: `match[]=severity="critical"`, + wantState: "", + wantLabels: map[string]string{}, + }, + { + name: "invalid state", + query: "state=invalid", + wantErr: true, + }, + { + name: "empty match[] values are skipped", + query: `match[]=&match[]=%20&match[]=severity="warning"`, + wantState: "", + wantLabels: map[string]string{}, + wantMatchersLen: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + q, err := url.ParseQuery(tt.query) + if err != nil { + t.Fatalf("invalid test query: %v", err) + } + + state, labels, matchers, err := parseStateLabelsAndMatchers(q) + if tt.wantErr { + if err == nil { + t.Fatal("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if state != tt.wantState { + t.Errorf("state = %q, want %q", state, tt.wantState) + } + + if tt.wantLabels != nil { + if len(labels) != len(tt.wantLabels) { + t.Errorf("labels length = %d, want %d", len(labels), len(tt.wantLabels)) + } + for k, v := range tt.wantLabels { + if labels[k] != v { + t.Errorf("labels[%q] = %q, want %q", k, labels[k], v) + } + } + if _, found := labels["match[]"]; found { + t.Error("match[] should not appear in labels map") + } + } + + if tt.wantMatchers != nil { + if len(matchers) != len(tt.wantMatchers) { + t.Errorf("matchers length = %d, want %d", len(matchers), len(tt.wantMatchers)) + } + for i, want := range tt.wantMatchers { + if i < len(matchers) && matchers[i] != want { + t.Errorf("matchers[%d] = %q, want %q", i, matchers[i], want) + } + } + } + + if tt.wantMatchersLen > 0 && len(matchers) != tt.wantMatchersLen { + t.Errorf("matchers length = %d, want %d", len(matchers), tt.wantMatchersLen) + } + }) + } +} + +func TestParseStateAndLabelsBackcompat(t *testing.T) { + q, _ := url.ParseQuery(`state=firing&severity=critical&match[]=alertname=~"Foo.*"`) + + state, labels, err := parseStateAndLabels(q) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if state != "firing" { + t.Errorf("state = %q, want %q", state, "firing") + } + if labels["severity"] != "critical" { + t.Errorf("severity = %q, want %q", labels["severity"], "critical") + } + if _, found := labels["match[]"]; found { + t.Error("match[] should not appear in labels map") + } +} diff --git a/internal/managementrouter/router.go b/internal/managementrouter/router.go index 8888648bb..93326daeb 100644 --- a/internal/managementrouter/router.go +++ b/internal/managementrouter/router.go @@ -40,9 +40,10 @@ func New(managementClient management.Client) *mux.Router { BaseURL: "/api/v1/alerting", BaseRouter: r, }) - // GET /alerts is not yet in the OpenAPI spec; registered manually - // until its branch adds the spec entry and generated bindings. + // GET /alerts and GET /rules are not yet in the OpenAPI spec; registered + // manually until their respective branches add the spec entries. r.HandleFunc("/api/v1/alerting/alerts", hr.GetAlerts).Methods(http.MethodGet) + r.HandleFunc("/api/v1/alerting/rules", hr.GetRules).Methods(http.MethodGet) return r } diff --git a/internal/managementrouter/rules_get.go b/internal/managementrouter/rules_get.go new file mode 100644 index 000000000..fe9a59409 --- /dev/null +++ b/internal/managementrouter/rules_get.go @@ -0,0 +1,48 @@ +package managementrouter + +import ( + "encoding/json" + "net/http" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +type GetRulesResponse struct { + Data GetRulesResponseData `json:"data"` + Warnings []string `json:"warnings,omitempty"` +} + +type GetRulesResponseData struct { + Groups []k8s.PrometheusRuleGroup `json:"groups"` +} + +func (hr *httpRouter) GetRules(w http.ResponseWriter, req *http.Request) { + state, labels, matchers, err := parseStateLabelsAndMatchers(req.URL.Query()) + if err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + ctx := req.Context() + + groups, err := hr.managementClient.GetRules(ctx, k8s.GetRulesRequest{ + Labels: labels, + Matchers: matchers, + State: state, + }) + if err != nil { + handleError(w, err) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Cache-Control", "no-store") + w.WriteHeader(http.StatusOK) + if err := json.NewEncoder(w).Encode(GetRulesResponse{ + Data: GetRulesResponseData{ + Groups: groups, + }, + Warnings: hr.rulesWarnings(ctx), + }); err != nil { + log.WithError(err).Warn("failed to encode rules response") + } +} diff --git a/pkg/k8s/prometheus_rules_types.go b/pkg/k8s/prometheus_rules_types.go index 3f5c289fb..c41ea4e89 100644 --- a/pkg/k8s/prometheus_rules_types.go +++ b/pkg/k8s/prometheus_rules_types.go @@ -5,6 +5,11 @@ import ( "time" ) +const ( + RuleTypeAlerting = "alerting" + RuleTypeRecording = "recording" +) + // GetRulesRequest holds parameters for filtering rules alerts. type GetRulesRequest struct { // Labels filters rules by exact label equality. The special key "namespace" diff --git a/pkg/management/get_rules.go b/pkg/management/get_rules.go new file mode 100644 index 000000000..f30822d35 --- /dev/null +++ b/pkg/management/get_rules.go @@ -0,0 +1,391 @@ +package management + +import ( + "context" + "fmt" + "math" + "sort" + "strings" + "time" + "unicode" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/promql/parser" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +func (c *client) GetRules(ctx context.Context, req k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + groups, err := c.k8sClient.PrometheusAlerts().GetRules(ctx, req) + if err != nil { + return nil, fmt.Errorf("failed to get prometheus rules: %w", err) + } + + configs := c.k8sClient.RelabeledRules().Config() + relabeledByAlert := indexRelabeledRules(c.k8sClient.RelabeledRules().List(ctx)) + applyFilters := req.State != "" || len(req.Labels) > 0 + + // Deduplicate rules that carry the same openshift_io_alert_rule_id across + // groups. This occurs when the same PrometheusRule group name is defined in + // multiple CRDs — Prometheus returns separate groups with identical rules + // that hash to the same ID after enrichment. + seenIDs := make(map[string]struct{}) + + filteredGroups := make([]k8s.PrometheusRuleGroup, 0, len(groups)) + for groupIdx := range groups { + group := groups[groupIdx] + filteredRules := make([]k8s.PrometheusRule, 0, len(group.Rules)) + + for ruleIdx := range group.Rules { + rule := group.Rules[ruleIdx] + if applyFilters && rule.Type != k8s.RuleTypeAlerting { + continue + } + applyRelabeledRuleLabels(&rule, relabeledByAlert) + + if ruleID := rule.Labels[k8s.AlertRuleLabelId]; ruleID != "" { + if _, seen := seenIDs[ruleID]; seen { + continue + } + seenIDs[ruleID] = struct{}{} + } + + if len(rule.Alerts) == 0 { + if applyFilters && rule.Type == k8s.RuleTypeAlerting { + continue + } + filteredRules = append(filteredRules, rule) + continue + } + + relabeledAlerts := make([]k8s.PrometheusRuleAlert, 0, len(rule.Alerts)) + for _, alert := range rule.Alerts { + if alert.State == "pending" || alert.State == "firing" { + if alert.Labels[k8s.AlertSourceLabel] != k8s.AlertSourceUser { + // Apply relabeling to the "real" alert labels only; preserve plugin meta labels. + src := alert.Labels[k8s.AlertSourceLabel] + in := make(map[string]string, len(alert.Labels)) + for k, v := range alert.Labels { + in[k] = v + } + delete(in, k8s.AlertSourceLabel) + + relabeledLabels, keep := relabel.Process(labels.FromMap(in), configs...) + if !keep { + continue + } + alert.Labels = relabeledLabels.Map() + if src != "" { + alert.Labels[k8s.AlertSourceLabel] = src + } + } + } + + if req.State != "" && alert.State != req.State { + continue + } + if !ruleAlertLabelsMatch(&req, &alert) { + continue + } + relabeledAlerts = append(relabeledAlerts, alert) + } + rule.Alerts = relabeledAlerts + + if applyFilters && rule.Type == k8s.RuleTypeAlerting && len(rule.Alerts) == 0 { + continue + } + + filteredRules = append(filteredRules, rule) + } + + group.Rules = filteredRules + if applyFilters && len(group.Rules) == 0 { + continue + } + filteredGroups = append(filteredGroups, group) + } + + return filteredGroups, nil +} + +func indexRelabeledRules(rules []monitoringv1.Rule) map[string][]monitoringv1.Rule { + byAlert := make(map[string][]monitoringv1.Rule, len(rules)) + for _, rule := range rules { + alertName := rule.Alert + if alertName == "" && rule.Labels != nil { + alertName = rule.Labels[managementlabels.AlertNameLabel] + } + if alertName == "" { + continue + } + byAlert[alertName] = append(byAlert[alertName], rule) + } + return byAlert +} + +func relabeledAlertName(rule *monitoringv1.Rule) string { + if rule == nil { + return "" + } + if rule.Alert != "" { + return rule.Alert + } + if rule.Labels != nil { + return rule.Labels[managementlabels.AlertNameLabel] + } + return "" +} + +func applyRelabeledRuleLabels(rule *k8s.PrometheusRule, relabeledByAlert map[string][]monitoringv1.Rule) { + if rule == nil || rule.Name == "" || rule.Type == k8s.RuleTypeRecording { + return + } + + // Preserve plugin meta labels added during API fetch. + source := "" + if rule.Labels != nil { + source = rule.Labels[k8s.AlertSourceLabel] + } + + match := findRelabeledMatch(rule, relabeledByAlert[rule.Name]) + if match == nil || match.Labels == nil { + return + } + + // Replace rule labels with the relabeled cache version so that actions which + // remove/rename labels (e.g. LabelDrop/LabelKeep/LabelMap) are faithfully reflected. + labelsOut := make(map[string]string, len(match.Labels)+1) + for k, v := range match.Labels { + labelsOut[k] = v + } + if source != "" { + labelsOut[k8s.AlertSourceLabel] = source + } + rule.Labels = labelsOut +} + +func findRelabeledMatch(rule *k8s.PrometheusRule, candidates []monitoringv1.Rule) *monitoringv1.Rule { + // Strict match first (preserves correctness when multiple rules share alertname). + for i := range candidates { + candidate := &candidates[i] + if promRuleMatchesRelabeled(rule, candidate) { + return candidate + } + } + + // If relabeling modified rule labels (e.g. severity), strict label matching may fail. + // Retry on a best-effort basis using (alertname, expr, for) only. If this is ambiguous, + // do not guess. + var relaxed *monitoringv1.Rule + for i := range candidates { + candidate := &candidates[i] + if rule == nil || candidate == nil { + continue + } + candidateName := relabeledAlertName(candidate) + if rule.Name == "" || candidateName == "" || rule.Name != candidateName { + continue + } + if canonicalizePromQL(rule.Query) != canonicalizePromQL(candidate.Expr.String()) { + continue + } + if !durationMatches(rule.Duration, candidate.For) { + continue + } + if relaxed != nil { + // ambiguous + relaxed = nil + break + } + relaxed = candidate + } + if relaxed != nil { + return relaxed + } + + // Fallback: if alertname is globally unique, avoid brittle PromQL/metadata matching. + // This helps when Prometheus stringifies PromQL differently than PrometheusRule YAML + // (e.g. label matcher ordering). + if len(candidates) == 1 { + return &candidates[0] + } + return nil +} + +func promRuleMatchesRelabeled(rule *k8s.PrometheusRule, candidate *monitoringv1.Rule) bool { + if rule == nil || candidate == nil { + return false + } + candidateName := relabeledAlertName(candidate) + if rule.Name == "" || candidateName == "" || rule.Name != candidateName { + return false + } + if canonicalizePromQL(rule.Query) != canonicalizePromQL(candidate.Expr.String()) { + return false + } + if !durationMatches(rule.Duration, candidate.For) { + return false + } + if !stringMapEqual(filterBusinessLabels(rule.Labels), filterBusinessLabels(candidate.Labels)) { + return false + } + return true +} + +func canonicalizePromQL(in string) string { + s := strings.TrimSpace(in) + if s == "" { + return "" + } + expr, err := parser.ParseExpr(s) + if err == nil && expr != nil { + parser.Inspect(expr, func(node parser.Node, _ []parser.Node) error { + switch n := node.(type) { + case *parser.VectorSelector: + sort.Slice(n.LabelMatchers, func(i, j int) bool { + mi, mj := n.LabelMatchers[i], n.LabelMatchers[j] + if mi == nil || mj == nil { + return mi != nil + } + if mi.Name != mj.Name { + return mi.Name < mj.Name + } + if mi.Type != mj.Type { + return mi.Type < mj.Type + } + return mi.Value < mj.Value + }) + case *parser.AggregateExpr: + sort.Strings(n.Grouping) + case *parser.BinaryExpr: + if n.VectorMatching != nil { + sort.Strings(n.VectorMatching.MatchingLabels) + sort.Strings(n.VectorMatching.Include) + } + } + return nil + }) + + return expr.String() + } + return normalizeSpaceOutsideQuotes(s) +} + +func normalizeSpaceOutsideQuotes(in string) string { + if in == "" { + return "" + } + in = strings.TrimSpace(in) + + var b strings.Builder + b.Grow(len(in)) + + inQuote := false + escaped := false + pendingSpace := false + lastNoSpaceToken := false + + isNoSpaceToken := func(r rune) bool { + switch r { + case '(', ')', '{', '}', ',', '+', '-', '*', '/', '%', '^', '=', '!', '<', '>': + return true + default: + return false + } + } + + for _, r := range in { + if escaped { + if pendingSpace { + if !lastNoSpaceToken { + b.WriteByte(' ') + } + pendingSpace = false + } + b.WriteRune(r) + escaped = false + lastNoSpaceToken = false + continue + } + + if inQuote && r == '\\' { + if pendingSpace { + if !lastNoSpaceToken { + b.WriteByte(' ') + } + pendingSpace = false + } + b.WriteRune(r) + escaped = true + lastNoSpaceToken = false + continue + } + + if r == '"' { + if pendingSpace { + if !lastNoSpaceToken { + b.WriteByte(' ') + } + pendingSpace = false + } + inQuote = !inQuote + b.WriteRune(r) + lastNoSpaceToken = false + continue + } + + if !inQuote && unicode.IsSpace(r) { + pendingSpace = true + continue + } + + if pendingSpace && !lastNoSpaceToken && !isNoSpaceToken(r) { + b.WriteByte(' ') + } + pendingSpace = false + + b.WriteRune(r) + lastNoSpaceToken = !inQuote && isNoSpaceToken(r) + } + + return strings.TrimSpace(b.String()) +} + +func durationMatches(seconds float64, duration *monitoringv1.Duration) bool { + if duration == nil { + return seconds == 0 + } + parsed, err := time.ParseDuration(string(*duration)) + if err != nil { + return false + } + return math.Abs(parsed.Seconds()-seconds) < 0.001 +} + +func stringMapEqual(a, b map[string]string) bool { + if len(a) == 0 && len(b) == 0 { + return true + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if b[k] != v { + return false + } + } + return true +} + +func ruleAlertLabelsMatch(req *k8s.GetRulesRequest, alert *k8s.PrometheusRuleAlert) bool { + for key, value := range req.Labels { + if alertValue, exists := alert.Labels[key]; !exists || alertValue != value { + return false + } + } + + return true +} diff --git a/pkg/management/get_rules_test.go b/pkg/management/get_rules_test.go new file mode 100644 index 000000000..56a5844fe --- /dev/null +++ b/pkg/management/get_rules_test.go @@ -0,0 +1,442 @@ +package management_test + +import ( + "context" + "testing" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/relabel" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +// grFixture builds a management client with a PrometheusAlerts mock returning +// the given groups and a RelabeledRules mock returning the given configs/rules. +type grFixture struct { + groups []k8s.PrometheusRuleGroup + relabelRules []monitoringv1.Rule + relabelConfig []*relabel.Config +} + +func (f grFixture) client(t *testing.T) management.Client { + t.Helper() + mockK8s := &testutils.MockClient{ + PrometheusAlertsFunc: func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetRulesFunc: func(_ context.Context, _ k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + return f.groups, nil + }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(_ context.Context) []monitoringv1.Rule { return f.relabelRules }, + ConfigFunc: func() []*relabel.Config { return f.relabelConfig }, + } + }, + } + return management.New(context.Background(), mockK8s) +} + +// threeAlertGroup returns a rule group containing one alerting rule with +// firing Alert1, pending Alert2, and inactive Alert3. +func threeAlertGroup() []k8s.PrometheusRuleGroup { + return []k8s.PrometheusRuleGroup{ + { + Name: "group-a", + Rules: []k8s.PrometheusRule{ + { + Name: "rule-a", + Type: k8s.RuleTypeAlerting, + Alerts: []k8s.PrometheusRuleAlert{ + {State: "firing", Labels: map[string]string{"alertname": "Alert1", "severity": "warning"}}, + {State: "pending", Labels: map[string]string{"alertname": "Alert2", "severity": "critical"}}, + {State: "inactive", Labels: map[string]string{"alertname": "Alert3", "severity": "warning"}}, + }, + }, + }, + }, + } +} + +func dropAlert2ReplaceAlert1Severity() []*relabel.Config { + return []*relabel.Config{ + { + SourceLabels: model.LabelNames{"alertname"}, + Regex: relabel.MustNewRegexp("Alert2"), + Action: relabel.Drop, + NameValidationScheme: model.UTF8Validation, + }, + { + SourceLabels: model.LabelNames{"alertname"}, + Regex: relabel.MustNewRegexp("Alert1"), + TargetLabel: "severity", + Replacement: "critical", + Action: relabel.Replace, + NameValidationScheme: model.UTF8Validation, + }, + } +} + +func TestGetRules_AppliesRelabelConfigsToPendingFiringOnly(t *testing.T) { + f := grFixture{ + groups: threeAlertGroup(), + relabelRules: []monitoringv1.Rule{}, + relabelConfig: dropAlert2ReplaceAlert1Severity(), + } + client := f.client(t) + + groups, err := client.GetRules(context.Background(), k8s.GetRulesRequest{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(groups) != 1 { + t.Fatalf("expected 1 group, got %d", len(groups)) + } + rules := groups[0].Rules + if len(rules) != 1 { + t.Fatalf("expected 1 rule, got %d", len(rules)) + } + alerts := rules[0].Alerts + if len(alerts) != 2 { + t.Fatalf("expected 2 alerts after drop, got %d", len(alerts)) + } + if alerts[0].Labels["alertname"] != "Alert1" || alerts[0].Labels["severity"] != "critical" { + t.Errorf("alert[0]: got alertname=%s severity=%s", alerts[0].Labels["alertname"], alerts[0].Labels["severity"]) + } + if alerts[1].Labels["alertname"] != "Alert3" || alerts[1].Labels["severity"] != "warning" { + t.Errorf("alert[1]: got alertname=%s severity=%s", alerts[1].Labels["alertname"], alerts[1].Labels["severity"]) + } +} + +func TestGetRules_FiltersByStateAndLabels(t *testing.T) { + f := grFixture{ + groups: threeAlertGroup(), + relabelRules: []monitoringv1.Rule{}, + relabelConfig: dropAlert2ReplaceAlert1Severity(), + } + client := f.client(t) + + groups, err := client.GetRules(context.Background(), k8s.GetRulesRequest{ + State: "firing", + Labels: map[string]string{"severity": "critical"}, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(groups) != 1 { + t.Fatalf("expected 1 group, got %d", len(groups)) + } + alerts := groups[0].Rules[0].Alerts + if len(alerts) != 1 { + t.Fatalf("expected 1 alert, got %d", len(alerts)) + } + if alerts[0].Labels["alertname"] != "Alert1" || alerts[0].Labels["severity"] != "critical" { + t.Errorf("unexpected alert: %v", alerts[0].Labels) + } +} + +func TestGetRules_DropsNonMatchingRulesWhenFiltered(t *testing.T) { + f := grFixture{ + groups: threeAlertGroup(), + relabelRules: []monitoringv1.Rule{}, + relabelConfig: dropAlert2ReplaceAlert1Severity(), + } + client := f.client(t) + + groups, err := client.GetRules(context.Background(), k8s.GetRulesRequest{ + State: "firing", + Labels: map[string]string{"severity": "does-not-exist"}, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(groups) != 0 { + t.Errorf("expected 0 groups, got %d", len(groups)) + } +} + +func TestGetRules_AddsManagedByLabelsFromRelabeledRules(t *testing.T) { + mockK8s := &testutils.MockClient{ + PrometheusAlertsFunc: func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetRulesFunc: func(_ context.Context, _ k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + return []k8s.PrometheusRuleGroup{ + { + Name: "group-a", + Rules: []k8s.PrometheusRule{ + { + Name: "AlertWithManagedBy", + Type: "alerting", + Query: "up == 0", + Labels: map[string]string{"severity": "critical"}, + Annotations: map[string]string{"summary": "test alert"}, + }, + }, + }, + }, nil + }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(_ context.Context) []monitoringv1.Rule { + return []monitoringv1.Rule{ + { + Alert: "AlertWithManagedBy", + Expr: intstr.FromString("up ==\n 0"), + Labels: map[string]string{ + "severity": "critical", + k8s.AlertRuleLabelId: "alert-id-1", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + managementlabels.RuleManagedByLabel: "operator", + managementlabels.RelabelConfigManagedByLabel: "gitops", + }, + Annotations: map[string]string{"summary": "test alert"}, + }, + } + }, + ConfigFunc: func() []*relabel.Config { return []*relabel.Config{} }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + groups, err := client.GetRules(context.Background(), k8s.GetRulesRequest{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(groups) != 1 || len(groups[0].Rules) != 1 { + t.Fatalf("expected 1 group with 1 rule") + } + rule := groups[0].Rules[0] + checks := map[string]string{ + k8s.AlertRuleLabelId: "alert-id-1", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + managementlabels.RuleManagedByLabel: "operator", + managementlabels.RelabelConfigManagedByLabel: "gitops", + } + for k, want := range checks { + if got := rule.Labels[k]; got != want { + t.Errorf("label[%s]: want %q, got %q", k, want, got) + } + } +} + +func TestGetRules_EnrichesWithAllLabelTypes(t *testing.T) { + mockK8s := &testutils.MockClient{ + PrometheusAlertsFunc: func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetRulesFunc: func(_ context.Context, _ k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + return []k8s.PrometheusRuleGroup{ + { + Name: "group-a", + Rules: []k8s.PrometheusRule{ + { + Name: "ARCUpdatedRule", + Type: "alerting", + Query: "up == 0", + Labels: map[string]string{"severity": "warning", k8s.AlertSourceLabel: k8s.AlertSourcePlatform}, + }, + }, + }, + }, nil + }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(_ context.Context) []monitoringv1.Rule { + return []monitoringv1.Rule{ + { + Alert: "ARCUpdatedRule", + Expr: intstr.FromString("up ==\n 0"), + Labels: map[string]string{ + "severity": "critical", + "team": "sre", + k8s.AlertRuleLabelId: "rid-arc-1", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + k8s.AlertRuleClassificationComponentKey: "compute", + k8s.AlertRuleClassificationLayerKey: "cluster", + managementlabels.RuleManagedByLabel: "operator", + managementlabels.RelabelConfigManagedByLabel: "gitops", + }, + }, + } + }, + ConfigFunc: func() []*relabel.Config { return []*relabel.Config{} }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + groups, err := client.GetRules(context.Background(), k8s.GetRulesRequest{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(groups) != 1 || len(groups[0].Rules) != 1 { + t.Fatalf("expected 1 group with 1 rule") + } + rule := groups[0].Rules[0] + checks := map[string]string{ + k8s.AlertSourceLabel: k8s.AlertSourcePlatform, + k8s.AlertRuleLabelId: "rid-arc-1", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + k8s.AlertRuleClassificationComponentKey: "compute", + k8s.AlertRuleClassificationLayerKey: "cluster", + "severity": "critical", + "team": "sre", + managementlabels.RuleManagedByLabel: "operator", + managementlabels.RelabelConfigManagedByLabel: "gitops", + } + for k, want := range checks { + if got := rule.Labels[k]; got != want { + t.Errorf("label[%s]: want %q, got %q", k, want, got) + } + } +} + +func TestGetRules_EnrichesWhenAlertFieldEmpty(t *testing.T) { + mockK8s := &testutils.MockClient{ + PrometheusAlertsFunc: func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetRulesFunc: func(_ context.Context, _ k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + return []k8s.PrometheusRuleGroup{ + { + Name: "group-a", + Rules: []k8s.PrometheusRule{ + { + Name: "EmptyAlertFieldRule", + Type: "alerting", + Query: "up == 0", + Labels: map[string]string{"severity": "warning", k8s.AlertSourceLabel: k8s.AlertSourcePlatform}, + }, + }, + }, + }, nil + }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(_ context.Context) []monitoringv1.Rule { + return []monitoringv1.Rule{ + { + Alert: "", + Expr: intstr.FromString("up ==\n 0"), + Labels: map[string]string{ + managementlabels.AlertNameLabel: "EmptyAlertFieldRule", + "severity": "critical", + k8s.AlertRuleLabelId: "rid-empty-alert-1", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + }, + }, + } + }, + ConfigFunc: func() []*relabel.Config { return []*relabel.Config{} }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + groups, err := client.GetRules(context.Background(), k8s.GetRulesRequest{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(groups) != 1 || len(groups[0].Rules) != 1 { + t.Fatalf("expected 1 group with 1 rule") + } + rule := groups[0].Rules[0] + checks := map[string]string{ + k8s.AlertSourceLabel: k8s.AlertSourcePlatform, + k8s.AlertRuleLabelId: "rid-empty-alert-1", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + "severity": "critical", + } + for k, want := range checks { + if got := rule.Labels[k]; got != want { + t.Errorf("label[%s]: want %q, got %q", k, want, got) + } + } +} + +func TestGetRules_NoEnrichmentWhenMultipleCandidatesMatch(t *testing.T) { + mockK8s := &testutils.MockClient{ + PrometheusAlertsFunc: func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetRulesFunc: func(_ context.Context, _ k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) { + return []k8s.PrometheusRuleGroup{ + { + Name: "group-a", + Rules: []k8s.PrometheusRule{ + { + Name: "AmbiguousRule", + Type: "alerting", + Query: "up == 0", + Labels: map[string]string{"severity": "warning", k8s.AlertSourceLabel: k8s.AlertSourcePlatform}, + }, + }, + }, + }, nil + }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(_ context.Context) []monitoringv1.Rule { + return []monitoringv1.Rule{ + { + Alert: "", + Expr: intstr.FromString("up ==\n 0"), + Labels: map[string]string{ + managementlabels.AlertNameLabel: "AmbiguousRule", + "severity": "critical", + k8s.AlertRuleLabelId: "rid-amb-1", + }, + }, + { + Alert: "", + Expr: intstr.FromString("up==0"), + Labels: map[string]string{ + managementlabels.AlertNameLabel: "AmbiguousRule", + "severity": "critical", + k8s.AlertRuleLabelId: "rid-amb-2", + }, + }, + } + }, + ConfigFunc: func() []*relabel.Config { return []*relabel.Config{} }, + } + }, + } + client := management.New(context.Background(), mockK8s) + + groups, err := client.GetRules(context.Background(), k8s.GetRulesRequest{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(groups) != 1 || len(groups[0].Rules) != 1 { + t.Fatalf("expected 1 group with 1 rule") + } + rule := groups[0].Rules[0] + if rule.Labels[k8s.AlertSourceLabel] != k8s.AlertSourcePlatform { + t.Errorf("expected source=%s, got %s", k8s.AlertSourcePlatform, rule.Labels[k8s.AlertSourceLabel]) + } + if _, hasId := rule.Labels[k8s.AlertRuleLabelId]; hasId { + t.Errorf("expected no AlertRuleLabelId on ambiguous rule, but found: %s", rule.Labels[k8s.AlertRuleLabelId]) + } + if rule.Labels["severity"] != "warning" { + t.Errorf("expected severity=warning (from original), got %s", rule.Labels["severity"]) + } +} diff --git a/pkg/management/list_rules.go b/pkg/management/list_rules.go new file mode 100644 index 000000000..1b3d354eb --- /dev/null +++ b/pkg/management/list_rules.go @@ -0,0 +1,83 @@ +package management + +import ( + "context" + "sort" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +func (c *client) ListRules(ctx context.Context, prOptions PrometheusRuleOptions, arOptions AlertRuleOptions, pgOptions PaginationOptions) (ListRulesResult, error) { + if prOptions.Name != "" && prOptions.Namespace == "" { + return ListRulesResult{}, &ValidationError{Message: "namespace is required when prometheusRuleName is specified"} + } + + allRules := c.k8sClient.RelabeledRules().List(ctx) + var filteredRules []monitoringv1.Rule + + for _, rule := range allRules { + if prOptions.Name != "" && prOptions.Namespace != "" { + namespace := rule.Labels[k8s.PrometheusRuleLabelNamespace] + name := rule.Labels[k8s.PrometheusRuleLabelName] + if namespace != prOptions.Namespace || name != prOptions.Name { + continue + } + } + + if !c.matchesAlertRuleFilters(rule, arOptions) { + continue + } + + filteredRules = append(filteredRules, rule) + } + + sort.Slice(filteredRules, func(i, j int) bool { + return filteredRules[i].Labels[k8s.AlertRuleLabelId] < filteredRules[j].Labels[k8s.AlertRuleLabelId] + }) + + if pgOptions.NextToken != "" { + idx := sort.Search(len(filteredRules), func(i int) bool { + return filteredRules[i].Labels[k8s.AlertRuleLabelId] > pgOptions.NextToken + }) + filteredRules = filteredRules[idx:] + } + + var nextToken string + if pgOptions.Limit > 0 && len(filteredRules) > pgOptions.Limit { + nextToken = filteredRules[pgOptions.Limit-1].Labels[k8s.AlertRuleLabelId] + filteredRules = filteredRules[:pgOptions.Limit] + } + + return ListRulesResult{Rules: filteredRules, NextToken: nextToken}, nil +} + +func (c *client) matchesAlertRuleFilters(rule monitoringv1.Rule, arOptions AlertRuleOptions) bool { + // Filter by alert name + if arOptions.Name != "" && string(rule.Alert) != arOptions.Name { + return false + } + + // Filter by source (platform) + if arOptions.Source == k8s.AlertSourcePlatform { + source, exists := rule.Labels[k8s.AlertSourceLabel] + if !exists { + return false + } + + return source == k8s.AlertSourcePlatform + } + + // Filter by labels + if len(arOptions.Labels) > 0 { + for key, value := range arOptions.Labels { + ruleValue, exists := rule.Labels[key] + if !exists || ruleValue != value { + return false + } + } + } + + return true +} diff --git a/pkg/management/list_rules_test.go b/pkg/management/list_rules_test.go new file mode 100644 index 000000000..d3564d382 --- /dev/null +++ b/pkg/management/list_rules_test.go @@ -0,0 +1,444 @@ +package management_test + +import ( + "context" + "errors" + "testing" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var ( + lrRule1 = monitoringv1.Rule{ + Alert: "Alert1", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + k8s.PrometheusRuleLabelNamespace: "namespace1", + k8s.PrometheusRuleLabelName: "rule1", + k8s.AlertRuleLabelId: "rid_aaa", + }, + } + + lrRule2 = monitoringv1.Rule{ + Alert: "Alert2", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "critical", + k8s.PrometheusRuleLabelNamespace: "namespace1", + k8s.PrometheusRuleLabelName: "rule2", + k8s.AlertRuleLabelId: "rid_bbb", + }, + } + + lrRule3 = monitoringv1.Rule{ + Alert: "Alert3", + Expr: intstr.FromString("down == 1"), + Labels: map[string]string{ + "severity": "warning", + k8s.PrometheusRuleLabelNamespace: "namespace2", + k8s.PrometheusRuleLabelName: "rule3", + k8s.AlertRuleLabelId: "rid_ccc", + }, + } + + lrPlatformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("node_down == 1"), + Labels: map[string]string{ + "severity": "critical", + k8s.AlertSourceLabel: k8s.AlertSourcePlatform, + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + k8s.AlertRuleLabelId: "rid_ddd", + }, + } + + lrCustomLabelRule = monitoringv1.Rule{ + Alert: "CustomLabelAlert", + Expr: intstr.FromString("custom == 1"), + Labels: map[string]string{ + "severity": "info", + "team": "backend", + "env": "production", + k8s.PrometheusRuleLabelNamespace: "namespace1", + k8s.PrometheusRuleLabelName: "rule1", + k8s.AlertRuleLabelId: "rid_eee", + }, + } +) + +func newListRulesClient(t *testing.T, rules []monitoringv1.Rule) management.Client { + t.Helper() + mockK8s := &testutils.MockClient{ + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(_ context.Context) []monitoringv1.Rule { return rules }, + } + }, + } + return management.New(context.Background(), mockK8s) +} + +var allLRRules = []monitoringv1.Rule{lrRule1, lrRule2, lrRule3, lrPlatformRule, lrCustomLabelRule} +var noPagination = management.PaginationOptions{} + +func TestListRules_MissingNamespaceReturnsValidationError(t *testing.T) { + client := newListRulesClient(t, allLRRules) + _, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{Name: "rule1"}, + management.AlertRuleOptions{}, + noPagination, + ) + if err == nil { + t.Fatal("expected error, got nil") + } + var ve *management.ValidationError + if !errors.As(err, &ve) { + t.Fatalf("expected ValidationError, got %T: %v", err, err) + } + if !containsSubstring(err.Error(), "namespace is required when prometheusRuleName is specified") { + t.Errorf("unexpected error message: %v", err) + } +} + +func TestListRules_NoFiltersReturnsAll(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 5 { + t.Errorf("expected 5 rules, got %d", len(result.Rules)) + } + if result.NextToken != "" { + t.Errorf("expected no next token, got %q", result.NextToken) + } +} + +func TestListRules_FilterByNameAndNamespace(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{Name: "rule1", Namespace: "namespace1"}, + management.AlertRuleOptions{}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 2 { + t.Fatalf("expected 2 rules, got %d", len(result.Rules)) + } + for _, r := range result.Rules { + if r.Alert != "Alert1" && r.Alert != "CustomLabelAlert" { + t.Errorf("unexpected rule: %s", r.Alert) + } + } +} + +func TestListRules_FilterByNameAndNamespace_NoMatch(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{Name: "nonexistent", Namespace: "namespace1"}, + management.AlertRuleOptions{}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 0 { + t.Errorf("expected 0 rules, got %d", len(result.Rules)) + } +} + +func TestListRules_FilterByAlertName(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{Name: "Alert1"}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 1 || result.Rules[0].Alert != "Alert1" { + t.Errorf("expected 1 rule Alert1, got %v", result.Rules) + } +} + +func TestListRules_FilterByAlertName_NoMatch(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{Name: "NonexistentAlert"}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 0 { + t.Errorf("expected 0 rules, got %d", len(result.Rules)) + } +} + +func TestListRules_FilterBySourcePlatform(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{Source: k8s.AlertSourcePlatform}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 1 { + t.Fatalf("expected 1 rule, got %d", len(result.Rules)) + } + if result.Rules[0].Alert != "PlatformAlert" { + t.Errorf("expected PlatformAlert, got %s", result.Rules[0].Alert) + } + if result.Rules[0].Labels[k8s.AlertSourceLabel] != k8s.AlertSourcePlatform { + t.Errorf("expected source=platform, got %s", result.Rules[0].Labels[k8s.AlertSourceLabel]) + } +} + +func TestListRules_FilterBySingleLabel(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{Labels: map[string]string{"severity": "warning"}}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 2 { + t.Errorf("expected 2 warning rules, got %d", len(result.Rules)) + } +} + +func TestListRules_FilterByMultipleLabels(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{Labels: map[string]string{"team": "backend", "env": "production"}}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 1 || result.Rules[0].Alert != "CustomLabelAlert" { + t.Errorf("expected 1 CustomLabelAlert, got %v", result.Rules) + } +} + +func TestListRules_FilterByLabels_NoMatch(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{Labels: map[string]string{"nonexistent": "value"}}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 0 { + t.Errorf("expected 0 rules, got %d", len(result.Rules)) + } +} + +func TestListRules_CombinedFilters(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{Name: "rule1", Namespace: "namespace1"}, + management.AlertRuleOptions{Labels: map[string]string{"severity": "warning"}}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 1 || result.Rules[0].Alert != "Alert1" { + t.Errorf("expected 1 Alert1, got %v", result.Rules) + } +} + +func TestListRules_CombinedFilters_NoMatch(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{Name: "rule1", Namespace: "namespace1"}, + management.AlertRuleOptions{Labels: map[string]string{"severity": "critical"}}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 0 { + t.Errorf("expected 0 rules, got %d", len(result.Rules)) + } +} + +func TestListRules_EmptyRelabeledRules(t *testing.T) { + client := newListRulesClient(t, []monitoringv1.Rule{}) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 0 { + t.Errorf("expected 0 rules, got %d", len(result.Rules)) + } +} + +func TestListRules_Pagination_FirstPage(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + management.PaginationOptions{Limit: 2}, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 2 { + t.Fatalf("expected 2 rules, got %d", len(result.Rules)) + } + if result.NextToken == "" { + t.Error("expected next token, got empty") + } + if result.Rules[0].Labels[k8s.AlertRuleLabelId] != "rid_aaa" { + t.Errorf("expected rid_aaa, got %s", result.Rules[0].Labels[k8s.AlertRuleLabelId]) + } + if result.Rules[1].Labels[k8s.AlertRuleLabelId] != "rid_bbb" { + t.Errorf("expected rid_bbb, got %s", result.Rules[1].Labels[k8s.AlertRuleLabelId]) + } +} + +func TestListRules_Pagination_SecondPage(t *testing.T) { + client := newListRulesClient(t, allLRRules) + first, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + management.PaginationOptions{Limit: 2}, + ) + if err != nil { + t.Fatalf("unexpected error on first page: %v", err) + } + + second, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + management.PaginationOptions{Limit: 2, NextToken: first.NextToken}, + ) + if err != nil { + t.Fatalf("unexpected error on second page: %v", err) + } + if len(second.Rules) != 2 { + t.Fatalf("expected 2 rules on second page, got %d", len(second.Rules)) + } + if second.Rules[0].Labels[k8s.AlertRuleLabelId] != "rid_ccc" { + t.Errorf("expected rid_ccc, got %s", second.Rules[0].Labels[k8s.AlertRuleLabelId]) + } + if second.Rules[1].Labels[k8s.AlertRuleLabelId] != "rid_ddd" { + t.Errorf("expected rid_ddd, got %s", second.Rules[1].Labels[k8s.AlertRuleLabelId]) + } + if second.NextToken == "" { + t.Error("expected next token for third page") + } +} + +func TestListRules_Pagination_LastPageNoNextToken(t *testing.T) { + client := newListRulesClient(t, allLRRules) + first, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + management.PaginationOptions{Limit: 2}, + ) + if err != nil { + t.Fatalf("page 1 error: %v", err) + } + second, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + management.PaginationOptions{Limit: 2, NextToken: first.NextToken}, + ) + if err != nil { + t.Fatalf("page 2 error: %v", err) + } + third, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + management.PaginationOptions{Limit: 2, NextToken: second.NextToken}, + ) + if err != nil { + t.Fatalf("page 3 error: %v", err) + } + if len(third.Rules) != 1 { + t.Errorf("expected 1 rule on last page, got %d", len(third.Rules)) + } + if third.NextToken != "" { + t.Errorf("expected no next token on last page, got %q", third.NextToken) + } +} + +func TestListRules_Pagination_LimitExceedsTotal(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + management.PaginationOptions{Limit: 100}, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result.Rules) != 5 { + t.Errorf("expected 5 rules, got %d", len(result.Rules)) + } + if result.NextToken != "" { + t.Errorf("expected no next token, got %q", result.NextToken) + } +} + +func TestListRules_Pagination_SortedByRuleId(t *testing.T) { + client := newListRulesClient(t, allLRRules) + result, err := client.ListRules(context.Background(), + management.PrometheusRuleOptions{}, + management.AlertRuleOptions{}, + noPagination, + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for i := 1; i < len(result.Rules); i++ { + prev := result.Rules[i-1].Labels[k8s.AlertRuleLabelId] + curr := result.Rules[i].Labels[k8s.AlertRuleLabelId] + if prev >= curr { + t.Errorf("rules not sorted: %s >= %s at index %d", prev, curr, i) + } + } +} + +// containsSubstring is a local helper to avoid importing strings in test files +// that don't otherwise need it. +func containsSubstring(s, sub string) bool { + if len(sub) == 0 { + return true + } + for i := 0; i <= len(s)-len(sub); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/pkg/management/types.go b/pkg/management/types.go index 74df9485b..310c40328 100644 --- a/pkg/management/types.go +++ b/pkg/management/types.go @@ -41,8 +41,13 @@ type Client interface { // BulkUpdateAlertRuleClassification updates classification for multiple rule ids BulkUpdateAlertRuleClassification(ctx context.Context, items []UpdateRuleClassificationRequest) []error + // ListRules lists alert rules, optionally paginated via cursor-based pagination + ListRules(ctx context.Context, prOptions PrometheusRuleOptions, arOptions AlertRuleOptions, pgOptions PaginationOptions) (ListRulesResult, error) + // GetAlerts retrieves Prometheus alerts GetAlerts(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) + // GetRules retrieves Prometheus alerting rules and active alerts + GetRules(ctx context.Context, req k8s.GetRulesRequest) ([]k8s.PrometheusRuleGroup, error) // GetAlertingHealth retrieves the alerting stack health status GetAlertingHealth(ctx context.Context) (k8s.AlertingHealth, error) @@ -59,3 +64,31 @@ type PrometheusRuleOptions struct { // GroupName of the RuleGroup within the PrometheusRule resource GroupName string `json:"groupName"` } + +// AlertRuleOptions specifies additional filtering options for alert rules +type AlertRuleOptions struct { + // Name filters alert rules by alert name + Name string `json:"name,omitempty"` + + // Source filters alert rules by source type (platform or user-defined) + Source string `json:"source,omitempty"` + + // Labels filters alert rules by arbitrary label key-value pairs + Labels map[string]string `json:"labels,omitempty"` +} + +// PaginationOptions controls cursor-based pagination for list endpoints. +type PaginationOptions struct { + // Limit is the maximum number of results to return. Zero means no limit. + Limit int + + // NextToken is an opaque cursor returned by a previous call; results will + // start after the rule identified by this token. + NextToken string +} + +// ListRulesResult holds a page of rules and an optional cursor for the next page. +type ListRulesResult struct { + Rules []monitoringv1.Rule `json:"rules"` + NextToken string `json:"nextToken,omitempty"` +} diff --git a/test/e2e/relabeled_rules_test.go b/test/e2e/relabeled_rules_test.go new file mode 100644 index 000000000..12ceae3ce --- /dev/null +++ b/test/e2e/relabeled_rules_test.go @@ -0,0 +1,290 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "testing" + "time" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +type listRulesRuleGroup struct { + Name string `json:"name"` + Rules []monitoringv1.Rule `json:"rules"` +} + +type listRulesResponse struct { + Data struct { + Groups []listRulesRuleGroup `json:"groups"` + } `json:"data"` +} + +func listRules(ctx context.Context, f *framework.Framework) ([]monitoringv1.Rule, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, f.PluginURL+"/api/v1/alerting/rules", nil) + if err != nil { + return nil, err + } + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var listResp listRulesResponse + if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil { + return nil, err + } + + var allRules []monitoringv1.Rule + for _, group := range listResp.Data.Groups { + allRules = append(allRules, group.Rules...) + } + return allRules, nil +} + +func TestPrometheusRuleAppearsInMemory(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-prometheus-rule", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + testAlertName := "TestAlert" + forDuration := monitoringv1.Duration("5m") + testRule := monitoringv1.Rule{ + Alert: testAlertName, + Expr: intstr.FromString("up == 0"), + For: &forDuration, + Labels: map[string]string{ + "severity": "warning", + }, + Annotations: map[string]string{ + "description": "Test alert for e2e testing", + "summary": "Test alert", + }, + } + + _, err = createPrometheusRule(ctx, f, testNamespace, testRule) + if err != nil { + t.Fatalf("Failed to create PrometheusRule: %v", err) + } + + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + rules, err := listRules(ctx, f) + if err != nil { + t.Logf("Failed to list rules: %v", err) + return false, nil + } + + for _, rule := range rules { + if rule.Alert == testAlertName { + expectedLabels := map[string]string{ + k8s.PrometheusRuleLabelNamespace: testNamespace, + k8s.PrometheusRuleLabelName: "test-prometheus-rule", + } + + if err := compareRuleLabels(t, testAlertName, rule.Labels, expectedLabels); err != nil { + return false, err + } + + if _, ok := rule.Labels[k8s.AlertRuleLabelId]; !ok { + t.Errorf("Alert %s missing openshift_io_alert_rule_id label", testAlertName) + return false, fmt.Errorf("alert missing openshift_io_alert_rule_id label") + } + + t.Logf("Found alert %s in memory with all expected labels", testAlertName) + return true, nil + } + } + + t.Logf("Alert %s not found in memory yet (found %d rules)", testAlertName, len(rules)) + return false, nil + }) + + if err != nil { + t.Fatalf("Timeout waiting for alert to appear in memory: %v", err) + } +} + +func TestRelabelAlert(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-relabel-alert", true) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + forDuration := monitoringv1.Duration("5m") + + criticalRule := monitoringv1.Rule{ + Alert: "TestRelabelAlert", + Expr: intstr.FromString("up == 0"), + For: &forDuration, + Labels: map[string]string{ + "severity": "critical", + "team": "web", + }, + Annotations: map[string]string{ + "description": "Critical alert for relabel testing", + "summary": "Critical test alert", + }, + } + + warningRule := monitoringv1.Rule{ + Alert: "TestRelabelAlert", + Expr: intstr.FromString("up == 1"), + For: &forDuration, + Labels: map[string]string{ + "severity": "warning", + "team": "web", + }, + Annotations: map[string]string{ + "description": "Warning alert for relabel testing", + "summary": "Warning test alert", + }, + } + + _, err = createPrometheusRule(ctx, f, testNamespace, criticalRule, warningRule) + if err != nil { + t.Fatalf("Failed to create PrometheusRule: %v", err) + } + + relabelConfigName := "change-critical-team" + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: relabelConfigName, + Namespace: k8s.ClusterMonitoringNamespace, + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname", "severity"}, + Regex: "TestRelabelAlert;critical", + Separator: ";", + TargetLabel: "team", + Replacement: "ops", + Action: "Replace", + }, + }, + }, + } + + _, err = f.Osmv1clientset.MonitoringV1().AlertRelabelConfigs(k8s.ClusterMonitoringNamespace).Create( + ctx, arc, metav1.CreateOptions{}, + ) + if err != nil { + t.Fatalf("Failed to create AlertRelabelConfig: %v", err) + } + defer func() { + err = f.Osmv1clientset.MonitoringV1().AlertRelabelConfigs(k8s.ClusterMonitoringNamespace).Delete(ctx, relabelConfigName, metav1.DeleteOptions{}) + if err != nil { + t.Fatalf("Failed to delete AlertRelabelConfig: %v", err) + } + }() + + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + rules, err := listRules(ctx, f) + if err != nil { + t.Logf("Failed to list rules: %v", err) + return false, nil + } + + foundCriticalWithOps := false + + for _, rule := range rules { + if rule.Alert == "TestRelabelAlert" { + if rule.Labels["team"] == "ops" && rule.Labels["severity"] == "critical" { + t.Logf("Found critical alert with team=ops (relabeling successful)") + foundCriticalWithOps = true + } + } + } + + if foundCriticalWithOps { + t.Logf("Relabeling verified: critical alert has team=ops") + return true, nil + } + + t.Logf("Waiting for relabeling to take effect") + return false, nil + }) + + if err != nil { + t.Fatalf("Timeout waiting for relabeling to take effect: %v", err) + } +} + +func createPrometheusRule(ctx context.Context, f *framework.Framework, namespace string, rules ...monitoringv1.Rule) (*monitoringv1.PrometheusRule, error) { + interval := monitoringv1.Duration("30s") + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-prometheus-rule", + Namespace: namespace, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Interval: &interval, + Rules: rules, + }, + }, + }, + } + + return f.Monitoringv1clientset.MonitoringV1().PrometheusRules(namespace).Create( + ctx, prometheusRule, metav1.CreateOptions{}, + ) +} + +func compareRuleLabels(t *testing.T, alertName string, foundLabels map[string]string, wantedLabels map[string]string) error { + t.Helper() + if foundLabels == nil { + t.Errorf("Alert %s has no labels", alertName) + return fmt.Errorf("alert has no labels") + } + + for key, wantValue := range wantedLabels { + if gotValue, ok := foundLabels[key]; !ok { + t.Errorf("Alert %s missing %s label", alertName, key) + return fmt.Errorf("alert missing %s label", key) + } else if gotValue != wantValue { + t.Errorf("Alert %s has wrong %s label. Expected %s, got %s", + alertName, key, wantValue, gotValue) + return fmt.Errorf("alert has wrong %s label", key) + } + } + + return nil +}