diff --git a/api/openapi.yaml b/api/openapi.yaml index 4deda8b93..5ce7e3aa2 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -12,6 +12,52 @@ servers: paths: /rules: + patch: + operationId: BulkUpdateAlertRules + summary: Bulk update alert rules + description: > + Updates one or more alert rules by their stable IDs. Each rule is + updated independently; per-rule status is returned in the response + so partial success is visible to the caller. + Supports label overrides, drop/restore toggles (platform rules only), + and classification label updates. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/BulkUpdateAlertRulesRequest" + responses: + "200": + description: Update results (may include per-rule errors) + content: + application/json: + schema: + $ref: "#/components/schemas/BulkUpdateAlertRulesResponse" + "400": + description: Invalid request body + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "401": + description: Missing or invalid authorization token + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "413": + description: Request body exceeds the 1 MB limit + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "500": + description: Unexpected server error + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" delete: operationId: BulkDeleteUserDefinedAlertRules summary: Bulk delete user-defined alert rules @@ -202,6 +248,9 @@ components: description: The stable alert rule ID that was processed. status_code: type: integer + format: int32 + minimum: 100 + maximum: 599 description: HTTP status code for this rule's deletion result. message: type: string @@ -218,6 +267,96 @@ components: $ref: "#/components/schemas/DeleteAlertRuleResult" description: Per-rule deletion results. + AlertRuleClassificationUpdate: + type: object + description: > + Partial update for alert rule classification labels. + Each field supports three states: omitted (leave unchanged), + null (clear the override), or a string value (set the override). + The three-state semantics require a custom JSON decoder; the Go + type AlertRuleClassificationPatch is used at runtime instead of + the generated struct. + x-go-type: AlertRuleClassificationPatch + properties: + openshift_io_alert_rule_component: + type: string + nullable: true + description: Component classification label override. + openshift_io_alert_rule_layer: + type: string + nullable: true + description: Layer classification label override. + openshift_io_alert_rule_component_from: + type: string + nullable: true + description: Dynamic component source label key. + openshift_io_alert_rule_layer_from: + type: string + nullable: true + description: Dynamic layer source label key. + + BulkUpdateAlertRulesRequest: + type: object + required: + - ruleIds + properties: + ruleIds: + type: array + minItems: 1 + items: + type: string + description: List of stable alert rule IDs to update. + labels: + type: object + additionalProperties: + type: string + nullable: true + description: > + Label key/value pairs to set. A null or empty-string value removes + the label. Omitting this field leaves existing labels unchanged. + alertingRuleEnabled: + type: boolean + nullable: true + description: > + When false, drops (silences) the platform alert rule. + When true, restores a previously dropped rule. + Not applicable to user-defined rules; if set on a user-defined + rule alongside other update fields (labels, classification) that + succeed, the toggle rejection is silently absorbed and the + overall per-rule result is still 204. + classification: + $ref: "#/components/schemas/AlertRuleClassificationUpdate" + + UpdateAlertRuleResult: + type: object + required: + - id + - status_code + properties: + id: + type: string + description: The stable alert rule ID that was processed. + status_code: + type: integer + format: int32 + minimum: 100 + maximum: 599 + description: HTTP status code for this rule's update result. + message: + type: string + description: Error message if update failed; omitted on success. + + BulkUpdateAlertRulesResponse: + type: object + required: + - rules + properties: + rules: + type: array + items: + $ref: "#/components/schemas/UpdateAlertRuleResult" + description: Per-rule update results. + ErrorResponse: type: object required: diff --git a/internal/managementrouter/alert_rule_bulk_update.go b/internal/managementrouter/alert_rule_bulk_update.go new file mode 100644 index 000000000..466e91cd5 --- /dev/null +++ b/internal/managementrouter/alert_rule_bulk_update.go @@ -0,0 +1,218 @@ +package managementrouter + +import ( + "encoding/json" + "errors" + "io" + "net/http" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/management" +) + +func (hr *httpRouter) BulkUpdateAlertRules(w http.ResponseWriter, req *http.Request) { + req.Body = http.MaxBytesReader(w, req.Body, maxRequestBodyBytes) + + body, err := io.ReadAll(req.Body) + if err != nil { + writeError(w, http.StatusRequestEntityTooLarge, "request body too large") + return + } + + // BulkUpdateAlertRulesRequest.Classification is typed as + // *AlertRuleClassificationPatch (via x-go-type in the spec), so the + // three-state omitted/null/string semantics are preserved on decode. + var payload BulkUpdateAlertRulesRequest + if err := json.Unmarshal(body, &payload); err != nil { + writeError(w, http.StatusBadRequest, "invalid request body") + return + } + + if len(payload.RuleIds) == 0 { + writeError(w, http.StatusBadRequest, "ruleIds is required") + return + } + + if payload.AlertingRuleEnabled == nil && payload.Labels == nil && payload.Classification == nil { + writeError(w, http.StatusBadRequest, "alertingRuleEnabled (toggle drop/restore) or labels (set/unset) or classification is required") + return + } + + var haveToggle bool + var enabled bool + if payload.AlertingRuleEnabled != nil { + enabled = *payload.AlertingRuleEnabled + haveToggle = true + } + + results := make([]UpdateAlertRuleResult, 0, len(payload.RuleIds)) + + for _, rawId := range payload.RuleIds { + id, err := parseParam(rawId, "ruleId") + if err != nil { + msg := err.Error() + results = append(results, UpdateAlertRuleResult{ + Id: rawId, + StatusCode: int32(http.StatusBadRequest), + Message: &msg, + }) + continue + } + + notAllowedEnabled := false + if haveToggle { + var derr error + if !enabled { + derr = hr.managementClient.DropPlatformAlertRule(req.Context(), id) + } else { + derr = hr.managementClient.RestorePlatformAlertRule(req.Context(), id) + } + if derr != nil { + var na *management.NotAllowedError + if errors.As(derr, &na) { + notAllowedEnabled = true + } else { + status, message := parseError(derr) + results = append(results, UpdateAlertRuleResult{ + Id: id, + StatusCode: int32(status), + Message: &message, + }) + continue + } + } + } + + if payload.Classification != nil { + cl := payload.Classification + update := management.UpdateRuleClassificationRequest{RuleId: id} + if cl.ComponentSet { + update.Component = cl.Component + update.ComponentSet = true + } + if cl.LayerSet { + update.Layer = cl.Layer + update.LayerSet = true + } + if cl.ComponentFromSet { + update.ComponentFrom = cl.ComponentFrom + update.ComponentFromSet = true + } + if cl.LayerFromSet { + update.LayerFrom = cl.LayerFrom + update.LayerFromSet = true + } + + if update.ComponentSet || update.LayerSet || update.ComponentFromSet || update.LayerFromSet { + if err := hr.managementClient.UpdateAlertRuleClassification(req.Context(), update); err != nil { + status, message := parseError(err) + results = append(results, UpdateAlertRuleResult{ + Id: id, + StatusCode: int32(status), + Message: &message, + }) + continue + } + } + } + + if payload.Labels != nil { + currentRule, err := hr.managementClient.GetRuleById(req.Context(), id) + if err != nil { + status, message := parseError(err) + results = append(results, UpdateAlertRuleResult{ + Id: id, + StatusCode: int32(status), + Message: &message, + }) + continue + } + + // platformLabels uses "" to signal "drop this label"; the management + // layer's UpdatePlatformAlertRule interprets "" as a delete directive. + // userLabels is the fully-merged map for user-defined rules where we + // simply omit deleted keys rather than set them to "". + platformLabels := make(map[string]string) + userLabels := make(map[string]string) + for k, v := range currentRule.Labels { + userLabels[k] = v + } + for k, pv := range *payload.Labels { + if pv == nil || *pv == "" { + platformLabels[k] = "" + delete(userLabels, k) + } else { + platformLabels[k] = *pv + userLabels[k] = *pv + } + } + + updatedPlatformRule := monitoringv1.Rule{Labels: platformLabels} + + err = hr.managementClient.UpdatePlatformAlertRule(req.Context(), id, updatedPlatformRule) + if err != nil { + var ve *management.ValidationError + var nf *management.NotFoundError + if errors.As(err, &ve) || errors.As(err, &nf) { + status, message := parseError(err) + results = append(results, UpdateAlertRuleResult{ + Id: id, + StatusCode: int32(status), + Message: &message, + }) + continue + } + + var na *management.NotAllowedError + if errors.As(err, &na) { + updatedUserRule := currentRule + updatedUserRule.Labels = userLabels + + newRuleId, err := hr.managementClient.UpdateUserDefinedAlertRule(req.Context(), id, updatedUserRule) + if err != nil { + status, message := parseError(err) + results = append(results, UpdateAlertRuleResult{ + Id: id, + StatusCode: int32(status), + Message: &message, + }) + continue + } + results = append(results, UpdateAlertRuleResult{ + Id: newRuleId, + StatusCode: int32(http.StatusNoContent), + }) + continue + } + + status, message := parseError(err) + results = append(results, UpdateAlertRuleResult{ + Id: id, + StatusCode: int32(status), + Message: &message, + }) + continue + } + } + + if notAllowedEnabled && payload.Labels == nil && payload.Classification == nil { + results = append(results, UpdateAlertRuleResult{ + Id: id, + StatusCode: int32(http.StatusMethodNotAllowed), + }) + continue + } + + results = append(results, UpdateAlertRuleResult{ + Id: id, + StatusCode: int32(http.StatusNoContent), + }) + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + if err := json.NewEncoder(w).Encode(BulkUpdateAlertRulesResponse{Rules: results}); err != nil { + log.WithError(err).Warn("failed to encode bulk update response") + } +} diff --git a/internal/managementrouter/alert_rule_bulk_update_test.go b/internal/managementrouter/alert_rule_bulk_update_test.go new file mode 100644 index 000000000..3ae77bcee --- /dev/null +++ b/internal/managementrouter/alert_rule_bulk_update_test.go @@ -0,0 +1,560 @@ +package managementrouter_test + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "regexp" + "strings" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +// buFixture holds all mocks and the router under test for bulk-update tests. +// Mutate mockK8s fields, then call rebuild() before the next request. +type buFixture struct { + router http.Handler + mockK8sRules *testutils.MockPrometheusRuleInterface + mockK8s *testutils.MockClient + mockRelabeledRules *testutils.MockRelabeledRulesInterface +} + +func (f *buFixture) rebuild() { + mgmt := management.New(context.Background(), f.mockK8s) + f.router = managementrouter.New(mgmt) +} + +func newBUFixture(t *testing.T) *buFixture { + t.Helper() + + userRule1 := monitoringv1.Rule{ + Alert: "user-alert-1", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{"severity": "warning"}, + } + userRule1Id := alertrule.GetAlertingRuleId(&userRule1) + + userRule2 := monitoringv1.Rule{ + Alert: "user-alert-2", + Expr: intstr.FromString("cpu > 80"), + Labels: map[string]string{"severity": "info"}, + } + userRule2Id := alertrule.GetAlertingRuleId(&userRule2) + + platformRule := monitoringv1.Rule{ + Alert: "platform-alert", + Expr: intstr.FromString("memory > 90"), + Labels: map[string]string{"severity": "critical"}, + } + platformRuleId := alertrule.GetAlertingRuleId(&platformRule) + + mockK8sRules := &testutils.MockPrometheusRuleInterface{} + + userPR := monitoringv1.PrometheusRule{} + userPR.Name = "user-pr" + userPR.Namespace = "default" + userPR.Spec.Groups = []monitoringv1.RuleGroup{{ + Name: "g1", + Rules: []monitoringv1.Rule{ + {Alert: userRule1.Alert, Expr: userRule1.Expr, Labels: map[string]string{"severity": "warning", k8s.AlertRuleLabelId: userRule1Id}}, + {Alert: userRule2.Alert, Expr: userRule2.Expr, Labels: map[string]string{"severity": "info", k8s.AlertRuleLabelId: userRule2Id}}, + }, + }} + + platformPR := monitoringv1.PrometheusRule{} + platformPR.Name = "platform-pr" + platformPR.Namespace = "platform-namespace-1" + platformPR.Spec.Groups = []monitoringv1.RuleGroup{{ + Name: "pg1", + Rules: []monitoringv1.Rule{ + {Alert: "platform-alert", Expr: intstr.FromString("memory > 90"), Labels: map[string]string{"severity": "critical"}}, + }, + }} + + mockK8sRules.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "default/user-pr": &userPR, + "platform-namespace-1/platform-pr": &platformPR, + }) + + mockRelabeledRules := &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + switch id { + case userRule1Id: + return monitoringv1.Rule{ + Alert: userRule1.Alert, Expr: userRule1.Expr, + Labels: map[string]string{ + "severity": "warning", k8s.AlertRuleLabelId: userRule1Id, + k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr", + }, + }, true + case userRule2Id: + return monitoringv1.Rule{ + Alert: userRule2.Alert, Expr: userRule2.Expr, + Labels: map[string]string{ + "severity": "info", k8s.AlertRuleLabelId: userRule2Id, + k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr", + }, + }, true + case platformRuleId: + return monitoringv1.Rule{ + Alert: "platform-alert", Expr: intstr.FromString("memory > 90"), + Labels: map[string]string{ + "severity": "critical", k8s.AlertRuleLabelId: platformRuleId, + k8s.PrometheusRuleLabelNamespace: "platform-namespace-1", k8s.PrometheusRuleLabelName: "platform-pr", + }, + }, true + } + return monitoringv1.Rule{}, false + }, + } + + mockK8s := &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockK8sRules }, + NamespaceFunc: func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "platform-namespace-1" || name == "platform-namespace-2" + }, + } + }, + RelabeledRulesFunc: func() k8s.RelabeledRulesInterface { return mockRelabeledRules }, + } + + f := &buFixture{ + mockK8sRules: mockK8sRules, + mockK8s: mockK8s, + mockRelabeledRules: mockRelabeledRules, + } + f.rebuild() + return f +} + +// ids returns stable rule IDs for the three default fixture rules in order: +// user1, user2, platform. +func buFixtureIDs() (user1, user2, platform string) { + r1 := monitoringv1.Rule{Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), Labels: map[string]string{"severity": "warning"}} + r2 := monitoringv1.Rule{Alert: "user-alert-2", Expr: intstr.FromString("cpu > 80"), Labels: map[string]string{"severity": "info"}} + rp := monitoringv1.Rule{Alert: "platform-alert", Expr: intstr.FromString("memory > 90"), Labels: map[string]string{"severity": "critical"}} + return alertrule.GetAlertingRuleId(&r1), alertrule.GetAlertingRuleId(&r2), alertrule.GetAlertingRuleId(&rp) +} + +func (f *buFixture) do(t *testing.T, body any) *httptest.ResponseRecorder { + t.Helper() + buf, err := json.Marshal(body) + if err != nil { + t.Fatalf("marshal: %v", err) + } + req := httptest.NewRequest(http.MethodPatch, "/api/v1/alerting/rules", bytes.NewReader(buf)) + req.Header.Set("Authorization", "Bearer test-token") + w := httptest.NewRecorder() + f.router.ServeHTTP(w, req) + return w +} + +func (f *buFixture) decodeResp(t *testing.T, w *httptest.ResponseRecorder) managementrouter.BulkUpdateAlertRulesResponse { + t.Helper() + var resp managementrouter.BulkUpdateAlertRulesResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode response: %v", err) + } + return resp +} + +// --- Tests --- + +func TestBulkUpdateAlertRules_UpdatesAllUserRules(t *testing.T) { + user1Id, user2Id, _ := buFixtureIDs() + f := newBUFixture(t) + + expectedId1 := alertrule.GetAlertingRuleId(&monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{"severity": "warning", "component": "api", "team": "backend"}, + }) + expectedId2 := alertrule.GetAlertingRuleId(&monitoringv1.Rule{ + Alert: "user-alert-2", Expr: intstr.FromString("cpu > 80"), + Labels: map[string]string{"severity": "info", "component": "api", "team": "backend"}, + }) + + w := f.do(t, map[string]any{ + "ruleIds": []string{user1Id, user2Id}, + "labels": map[string]string{"component": "api", "team": "backend"}, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 2 { + t.Fatalf("expected 2 rules, got %d", len(resp.Rules)) + } + if resp.Rules[0].Id != expectedId1 || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("rule[0]: id=%s status=%d", resp.Rules[0].Id, resp.Rules[0].StatusCode) + } + if resp.Rules[1].Id != expectedId2 || resp.Rules[1].StatusCode != http.StatusNoContent { + t.Errorf("rule[1]: id=%s status=%d", resp.Rules[1].Id, resp.Rules[1].StatusCode) + } +} + +func TestBulkUpdateAlertRules_DropsLabelWithEmptyString(t *testing.T) { + user1Id, _, _ := buFixtureIDs() + f := newBUFixture(t) + + expectedId := alertrule.GetAlertingRuleId(&monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{"severity": "critical"}, + }) + + f.mockRelabeledRules.GetFunc = func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == user1Id { + return monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", "team": "backend", + k8s.AlertRuleLabelId: user1Id, k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr", + }, + }, true + } + return monitoringv1.Rule{}, false + } + f.rebuild() + + w := f.do(t, map[string]any{ + "ruleIds": []string{user1Id}, + "labels": map[string]string{"team": "", "severity": "critical"}, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 1 || resp.Rules[0].Id != expectedId || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("unexpected result: %+v", resp.Rules) + } +} + +func TestBulkUpdateAlertRules_DropsLabelWithNull(t *testing.T) { + user1Id, _, _ := buFixtureIDs() + f := newBUFixture(t) + + // JSON null for a label key means "drop", same as empty string. + expectedId := alertrule.GetAlertingRuleId(&monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{"severity": "critical"}, + }) + + f.mockRelabeledRules.GetFunc = func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == user1Id { + return monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", "team": "backend", + k8s.AlertRuleLabelId: user1Id, k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr", + }, + }, true + } + return monitoringv1.Rule{}, false + } + f.rebuild() + + // Send {"team": null, "severity": "critical"} — null drops the label. + body := map[string]any{ + "ruleIds": []string{user1Id}, + "labels": map[string]any{"team": nil, "severity": "critical"}, + } + w := f.do(t, body) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 1 || resp.Rules[0].Id != expectedId || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("unexpected result: %+v", resp.Rules) + } +} + +func TestBulkUpdateAlertRules_MixedPlatformAndUserRules(t *testing.T) { + user1Id, _, platformId := buFixtureIDs() + f := newBUFixture(t) + + expectedId1 := alertrule.GetAlertingRuleId(&monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{"severity": "warning", "component": "api"}, + }) + + f.mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{} + } + f.rebuild() + + w := f.do(t, map[string]any{ + "ruleIds": []string{user1Id, platformId}, + "labels": map[string]string{"component": "api"}, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 2 { + t.Fatalf("expected 2 rules, got %d", len(resp.Rules)) + } + if resp.Rules[0].Id != expectedId1 || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("rule[0]: id=%s status=%d", resp.Rules[0].Id, resp.Rules[0].StatusCode) + } + if resp.Rules[1].Id != platformId || resp.Rules[1].StatusCode != http.StatusNoContent { + t.Errorf("rule[1]: id=%s status=%d", resp.Rules[1].Id, resp.Rules[1].StatusCode) + } +} + +func TestBulkUpdateAlertRules_InvalidBody(t *testing.T) { + f := newBUFixture(t) + req := httptest.NewRequest(http.MethodPatch, "/api/v1/alerting/rules", bytes.NewBufferString("{")) + req.Header.Set("Authorization", "Bearer test-token") + w := httptest.NewRecorder() + f.router.ServeHTTP(w, req) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } + if !strings.Contains(w.Body.String(), "invalid request body") { + t.Errorf("expected 'invalid request body', got: %s", w.Body) + } +} + +func TestBulkUpdateAlertRules_BodyTooLarge(t *testing.T) { + f := newBUFixture(t) + // Build a body larger than maxRequestBodyBytes (1 MB). + large := make([]byte, 1<<20+1) + for i := range large { + large[i] = 'a' + } + req := httptest.NewRequest(http.MethodPatch, "/api/v1/alerting/rules", bytes.NewReader(large)) + req.Header.Set("Authorization", "Bearer test-token") + w := httptest.NewRecorder() + f.router.ServeHTTP(w, req) + + if w.Code != http.StatusRequestEntityTooLarge { + t.Fatalf("expected 413, got %d: %s", w.Code, w.Body) + } + if !strings.Contains(w.Body.String(), "request body too large") { + t.Errorf("expected 'request body too large', got: %s", w.Body) + } +} + +func TestBulkUpdateAlertRules_EmptyRuleIds(t *testing.T) { + f := newBUFixture(t) + w := f.do(t, map[string]any{ + "ruleIds": []string{}, + "labels": map[string]string{"component": "api"}, + }) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } + if !strings.Contains(w.Body.String(), "ruleIds is required") { + t.Errorf("expected 'ruleIds is required', got: %s", w.Body) + } +} + +func TestBulkUpdateAlertRules_MissingAllUpdateFields(t *testing.T) { + user1Id, _, _ := buFixtureIDs() + f := newBUFixture(t) + w := f.do(t, map[string]any{"ruleIds": []string{user1Id}}) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } + if !strings.Contains(w.Body.String(), "alertingRuleEnabled") { + t.Errorf("expected 'alertingRuleEnabled' in message, got: %s", w.Body) + } +} + +func TestBulkUpdateAlertRules_EnabledToggle(t *testing.T) { + user1Id, _, platformId := buFixtureIDs() + f := newBUFixture(t) + + f.mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{} + } + f.rebuild() + + w := f.do(t, map[string]any{ + "ruleIds": []string{platformId, user1Id, "rid_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}, + "alertingRuleEnabled": false, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 3 { + t.Fatalf("expected 3 rules, got %d", len(resp.Rules)) + } + if resp.Rules[0].Id != platformId || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("platform[0]: id=%s status=%d", resp.Rules[0].Id, resp.Rules[0].StatusCode) + } + if resp.Rules[1].Id != user1Id || resp.Rules[1].StatusCode != http.StatusMethodNotAllowed { + t.Errorf("user[1]: expected 405, got id=%s status=%d", resp.Rules[1].Id, resp.Rules[1].StatusCode) + } + if resp.Rules[2].StatusCode != http.StatusNotFound { + t.Errorf("missing[2]: expected 404, got status=%d", resp.Rules[2].StatusCode) + } +} + +func TestBulkUpdateAlertRules_MixedNotFound(t *testing.T) { + user1Id, _, _ := buFixtureIDs() + f := newBUFixture(t) + + expectedId := alertrule.GetAlertingRuleId(&monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{"severity": "warning", "component": "api"}, + }) + + f.mockRelabeledRules.GetFunc = func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == user1Id { + return monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", k8s.AlertRuleLabelId: user1Id, + k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr", + }, + }, true + } + return monitoringv1.Rule{}, false + } + f.rebuild() + + w := f.do(t, map[string]any{ + "ruleIds": []string{user1Id, "rid_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}, + "labels": map[string]string{"component": "api"}, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 2 { + t.Fatalf("expected 2 rules, got %d", len(resp.Rules)) + } + if resp.Rules[0].Id != expectedId || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("rule[0]: id=%s status=%d", resp.Rules[0].Id, resp.Rules[0].StatusCode) + } + if resp.Rules[1].StatusCode != http.StatusNotFound { + t.Errorf("rule[1]: expected 404, got %d", resp.Rules[1].StatusCode) + } +} + +func TestBulkUpdateAlertRules_InvalidRuleId(t *testing.T) { + user1Id, _, _ := buFixtureIDs() + f := newBUFixture(t) + + expectedId := alertrule.GetAlertingRuleId(&monitoringv1.Rule{ + Alert: "user-alert-1", Expr: intstr.FromString("up == 0"), + Labels: map[string]string{"severity": "warning", "component": "api"}, + }) + + w := f.do(t, map[string]any{ + "ruleIds": []string{user1Id, ""}, + "labels": map[string]string{"component": "api"}, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 2 { + t.Fatalf("expected 2 rules, got %d", len(resp.Rules)) + } + if resp.Rules[0].Id != expectedId || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("rule[0]: id=%s status=%d", resp.Rules[0].Id, resp.Rules[0].StatusCode) + } + if resp.Rules[1].StatusCode != http.StatusBadRequest { + t.Errorf("rule[1]: expected 400, got %d", resp.Rules[1].StatusCode) + } + if resp.Rules[1].Message == nil || !strings.Contains(*resp.Rules[1].Message, "missing ruleId") { + t.Errorf("rule[1]: expected 'missing ruleId', got %v", resp.Rules[1].Message) + } +} + +func TestBulkUpdateAlertRules_RestoreToggle(t *testing.T) { + _, _, platformId := buFixtureIDs() + f := newBUFixture(t) + + // Simulate an existing ARC that holds a Drop config for the platform rule. + // RestorePlatformAlertRule will call AlertRelabelConfigs().Get() and then + // Delete() the ARC once the Drop entry is removed (stamp-only ARC gets deleted). + mockARC := &testutils.MockAlertRelabelConfigInterface{} + mockARC.GetFunc = func(_ context.Context, namespace, name string) (*osmv1.AlertRelabelConfig, bool, error) { + arc := &osmv1.AlertRelabelConfig{} + arc.Namespace = namespace + arc.Name = name + arc.Spec.Configs = []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"openshift_io_alert_rule_id"}, + Regex: regexp.QuoteMeta(platformId), + Action: "Drop", + }, + } + return arc, true, nil + } + f.mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { return mockARC } + f.rebuild() + + w := f.do(t, map[string]any{ + "ruleIds": []string{platformId}, + "alertingRuleEnabled": true, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 1 { + t.Fatalf("expected 1 rule, got %d", len(resp.Rules)) + } + if resp.Rules[0].Id != platformId || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("restore: id=%s status=%d", resp.Rules[0].Id, resp.Rules[0].StatusCode) + } +} + +func TestBulkUpdateAlertRules_ClassificationOnly(t *testing.T) { + t.Setenv("ENABLE_USER_WORKLOAD_ARCS", "true") + user1Id, user2Id, _ := buFixtureIDs() + + // Build fixture after Setenv so management.New captures the env flag. + f := newBUFixture(t) + f.mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{} + } + f.rebuild() + + w := f.do(t, map[string]any{ + "ruleIds": []string{user1Id, user2Id}, + "classification": map[string]any{ + "openshift_io_alert_rule_component": "team-x", + "openshift_io_alert_rule_layer": "namespace", + }, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body) + } + resp := f.decodeResp(t, w) + if len(resp.Rules) != 2 { + t.Fatalf("expected 2 rules, got %d", len(resp.Rules)) + } + if resp.Rules[0].StatusCode != http.StatusNoContent || resp.Rules[1].StatusCode != http.StatusNoContent { + t.Errorf("expected both rules 204, got %d / %d", resp.Rules[0].StatusCode, resp.Rules[1].StatusCode) + } +} diff --git a/internal/managementrouter/api_generated.go b/internal/managementrouter/api_generated.go index a886b9d8d..e5a5b7c44 100644 --- a/internal/managementrouter/api_generated.go +++ b/internal/managementrouter/api_generated.go @@ -10,6 +10,9 @@ import ( "github.com/gorilla/mux" ) +// AlertRuleClassificationUpdate Partial update for alert rule classification labels. Each field supports three states: omitted (leave unchanged), null (clear the override), or a string value (set the override). The three-state semantics require a custom JSON decoder; the Go type AlertRuleClassificationPatch is used at runtime instead of the generated struct. +type AlertRuleClassificationUpdate = AlertRuleClassificationPatch + // AlertRuleSpec Specification of a Prometheus alerting or recording rule. Maps to prometheus-operator Rule fields. type AlertRuleSpec struct { // Alert Name of the alert. Must be set for alerting rules. @@ -46,6 +49,27 @@ type BulkDeleteAlertRulesResponse struct { Rules []DeleteAlertRuleResult `json:"rules"` } +// BulkUpdateAlertRulesRequest defines model for BulkUpdateAlertRulesRequest. +type BulkUpdateAlertRulesRequest struct { + // AlertingRuleEnabled When false, drops (silences) the platform alert rule. When true, restores a previously dropped rule. Not applicable to user-defined rules; if set on a user-defined rule alongside other update fields (labels, classification) that succeed, the toggle rejection is silently absorbed and the overall per-rule result is still 204. + AlertingRuleEnabled *bool `json:"alertingRuleEnabled,omitempty"` + + // Classification Partial update for alert rule classification labels. Each field supports three states: omitted (leave unchanged), null (clear the override), or a string value (set the override). The three-state semantics require a custom JSON decoder; the Go type AlertRuleClassificationPatch is used at runtime instead of the generated struct. + Classification *AlertRuleClassificationUpdate `json:"classification,omitempty"` + + // Labels Label key/value pairs to set. A null or empty-string value removes the label. Omitting this field leaves existing labels unchanged. + Labels *map[string]*string `json:"labels,omitempty"` + + // RuleIds List of stable alert rule IDs to update. + RuleIds []string `json:"ruleIds"` +} + +// BulkUpdateAlertRulesResponse defines model for BulkUpdateAlertRulesResponse. +type BulkUpdateAlertRulesResponse struct { + // Rules Per-rule update results. + Rules []UpdateAlertRuleResult `json:"rules"` +} + // CreateAlertRuleRequest defines model for CreateAlertRuleRequest. type CreateAlertRuleRequest struct { // AlertingRule Specification of a Prometheus alerting or recording rule. Maps to prometheus-operator Rule fields. @@ -70,7 +94,7 @@ type DeleteAlertRuleResult struct { Message *string `json:"message,omitempty"` // StatusCode HTTP status code for this rule's deletion result. - StatusCode int `json:"status_code"` + StatusCode int32 `json:"status_code"` } // ErrorResponse defines model for ErrorResponse. @@ -91,9 +115,24 @@ type PrometheusRuleTarget struct { PrometheusRuleNamespace string `json:"prometheusRuleNamespace"` } +// UpdateAlertRuleResult defines model for UpdateAlertRuleResult. +type UpdateAlertRuleResult struct { + // Id The stable alert rule ID that was processed. + Id string `json:"id"` + + // Message Error message if update failed; omitted on success. + Message *string `json:"message,omitempty"` + + // StatusCode HTTP status code for this rule's update result. + StatusCode int32 `json:"status_code"` +} + // BulkDeleteUserDefinedAlertRulesJSONRequestBody defines body for BulkDeleteUserDefinedAlertRules for application/json ContentType. type BulkDeleteUserDefinedAlertRulesJSONRequestBody = BulkDeleteAlertRulesRequest +// BulkUpdateAlertRulesJSONRequestBody defines body for BulkUpdateAlertRules for application/json ContentType. +type BulkUpdateAlertRulesJSONRequestBody = BulkUpdateAlertRulesRequest + // CreateAlertRuleJSONRequestBody defines body for CreateAlertRule for application/json ContentType. type CreateAlertRuleJSONRequestBody = CreateAlertRuleRequest @@ -102,6 +141,9 @@ type ServerInterface interface { // Bulk delete user-defined alert rules // (DELETE /rules) BulkDeleteUserDefinedAlertRules(w http.ResponseWriter, r *http.Request) + // Bulk update alert rules + // (PATCH /rules) + BulkUpdateAlertRules(w http.ResponseWriter, r *http.Request) // Create an alert rule // (POST /rules) CreateAlertRule(w http.ResponseWriter, r *http.Request) @@ -130,6 +172,20 @@ func (siw *ServerInterfaceWrapper) BulkDeleteUserDefinedAlertRules(w http.Respon handler.ServeHTTP(w, r) } +// BulkUpdateAlertRules operation middleware +func (siw *ServerInterfaceWrapper) BulkUpdateAlertRules(w http.ResponseWriter, r *http.Request) { + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.BulkUpdateAlertRules(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + // CreateAlertRule operation middleware func (siw *ServerInterfaceWrapper) CreateAlertRule(w http.ResponseWriter, r *http.Request) { @@ -259,6 +315,8 @@ func HandlerWithOptions(si ServerInterface, options GorillaServerOptions) http.H r.HandleFunc(options.BaseURL+"/rules", wrapper.BulkDeleteUserDefinedAlertRules).Methods("DELETE") + r.HandleFunc(options.BaseURL+"/rules", wrapper.BulkUpdateAlertRules).Methods("PATCH") + r.HandleFunc(options.BaseURL+"/rules", wrapper.CreateAlertRule).Methods("POST") return r diff --git a/internal/managementrouter/router.go b/internal/managementrouter/router.go index 443f4d889..8bb62a765 100644 --- a/internal/managementrouter/router.go +++ b/internal/managementrouter/router.go @@ -40,7 +40,6 @@ func New(managementClient management.Client) *mux.Router { BaseURL: "/api/v1/alerting", BaseRouter: r, }) - return r } diff --git a/internal/managementrouter/user_defined_alert_rule_bulk_delete.go b/internal/managementrouter/user_defined_alert_rule_bulk_delete.go index 167ccceaf..6c130fd4c 100644 --- a/internal/managementrouter/user_defined_alert_rule_bulk_delete.go +++ b/internal/managementrouter/user_defined_alert_rule_bulk_delete.go @@ -27,7 +27,7 @@ func (hr *httpRouter) BulkDeleteUserDefinedAlertRules(w http.ResponseWriter, req msg := err.Error() results = append(results, DeleteAlertRuleResult{ Id: rawId, - StatusCode: http.StatusBadRequest, + StatusCode: int32(http.StatusBadRequest), Message: &msg, }) continue @@ -37,14 +37,14 @@ func (hr *httpRouter) BulkDeleteUserDefinedAlertRules(w http.ResponseWriter, req status, message := parseError(err) results = append(results, DeleteAlertRuleResult{ Id: id, - StatusCode: status, + StatusCode: int32(status), Message: &message, }) continue } results = append(results, DeleteAlertRuleResult{ Id: id, - StatusCode: http.StatusNoContent, + StatusCode: int32(http.StatusNoContent), }) } diff --git a/pkg/management/alert_rule_preconditions.go b/pkg/management/alert_rule_preconditions.go index 3e730156c..ae6939cd9 100644 --- a/pkg/management/alert_rule_preconditions.go +++ b/pkg/management/alert_rule_preconditions.go @@ -8,9 +8,15 @@ import ( "github.com/openshift/monitoring-plugin/pkg/managementlabels" ) +func notAllowedGitOpsEdit() error { + return &NotAllowedError{Message: "This alert is managed by GitOps; edit it in Git."} +} func notAllowedGitOpsRemove() error { return &NotAllowedError{Message: "This alert is managed by GitOps; remove it in Git."} } +func notAllowedOperatorUpdate() error { + return &NotAllowedError{Message: "This alert is managed by an operator; it can't be updated and can only be silenced."} +} func notAllowedOperatorDelete() error { return &NotAllowedError{Message: "This alert is managed by an operator; it can't be deleted and can only be silenced."} } @@ -36,6 +42,21 @@ func validateUserDeletePreconditions(relabeled monitoringv1.Rule) error { return nil } +func validateUserUpdatePreconditions(relabeled monitoringv1.Rule, pr *monitoringv1.PrometheusRule) error { + if isRuleManagedByGitOpsLabel(relabeled) { + return notAllowedGitOpsEdit() + } + if isRuleManagedByOperator(relabeled) { + return notAllowedOperatorUpdate() + } + if pr != nil { + if _, operatorManaged := k8s.IsExternallyManagedObject(pr); operatorManaged { + return notAllowedOperatorUpdate() + } + } + return nil +} + func validatePlatformDeletePreconditions(ar *osmv1.AlertingRule) error { if ar != nil { if gitOpsManaged, operatorManaged := k8s.IsExternallyManagedObject(ar); gitOpsManaged { @@ -46,3 +67,49 @@ func validatePlatformDeletePreconditions(ar *osmv1.AlertingRule) error { } return nil } + +// validateGitOpsPreconditions checks only GitOps-related constraints on the +// rule and its parent PrometheusRule. Used by UpdatePlatformAlertRule before +// the ARC is fetched — operator-managed rules are allowed to proceed because +// the ARC path handles them. +func validateGitOpsPreconditions(relabeled monitoringv1.Rule, pr *monitoringv1.PrometheusRule) error { + if isRuleManagedByGitOpsLabel(relabeled) { + return notAllowedGitOpsEdit() + } + if pr != nil { + if gitOpsManaged, _ := k8s.IsExternallyManagedObject(pr); gitOpsManaged { + return notAllowedGitOpsEdit() + } + } + return nil +} + +func validatePlatformUpdatePreconditions(relabeled monitoringv1.Rule, pr *monitoringv1.PrometheusRule, arc *osmv1.AlertRelabelConfig) error { + if isRuleManagedByGitOpsLabel(relabeled) { + return notAllowedGitOpsEdit() + } + if pr != nil { + if gitOpsManaged, _ := k8s.IsExternallyManagedObject(pr); gitOpsManaged { + return notAllowedGitOpsEdit() + } + } + if arc != nil { + if k8s.IsManagedByGitOps(arc.Annotations, arc.Labels) { + return notAllowedGitOpsEdit() + } + } + if isRuleManagedByOperator(relabeled) { + return notAllowedOperatorUpdate() + } + if pr != nil { + if _, operatorManaged := k8s.IsExternallyManagedObject(pr); operatorManaged { + return notAllowedOperatorUpdate() + } + } + if arc != nil { + if _, operatorManaged := k8s.IsExternallyManagedObject(arc); operatorManaged { + return notAllowedOperatorUpdate() + } + } + return nil +} diff --git a/pkg/management/delete_user_defined_alert_rule_by_id.go b/pkg/management/delete_user_defined_alert_rule_by_id.go index e2848e8a3..39c0fe331 100644 --- a/pkg/management/delete_user_defined_alert_rule_by_id.go +++ b/pkg/management/delete_user_defined_alert_rule_by_id.go @@ -64,7 +64,7 @@ func (c *client) deletePlatformAlertRuleById(ctx context.Context, relabeled moni return fmt.Errorf("failed to get AlertingRule %s: %w", arName, err) } if !found || ar == nil { - return &NotFoundError{Resource: "AlertingRule", Id: arName} + return c.deleteUserAlertRuleById(ctx, namespace, name, alertRuleId) } // Common preconditions for platform delete if err := validatePlatformDeletePreconditions(ar); err != nil { @@ -85,9 +85,20 @@ func (c *client) deletePlatformAlertRuleById(ctx context.Context, relabeled moni AdditionalInfo: fmt.Sprintf("alert %q not found in AlertingRule %s", originalRule.Alert, arName), } } - ar.Spec.Groups = newGroups - if err := c.k8sClient.AlertingRules().Update(ctx, *ar); err != nil { - return fmt.Errorf("failed to update AlertingRule %s: %w", ar.Name, err) + + if len(newGroups) == 0 { + if err := c.k8sClient.AlertingRules().Delete(ctx, ar.Name); err != nil { + return fmt.Errorf("failed to delete AlertingRule %s: %w", ar.Name, err) + } + } else { + ar.Spec.Groups = newGroups + if err := c.k8sClient.AlertingRules().Update(ctx, *ar); err != nil { + return fmt.Errorf("failed to update AlertingRule %s: %w", ar.Name, err) + } + } + + if err := c.deleteAssociatedARC(ctx, k8s.ClusterMonitoringNamespace, name, alertRuleId); err != nil { + return fmt.Errorf("failed to clean up ARC for platform rule %s: %w", alertRuleId, err) } return nil } @@ -101,6 +112,9 @@ func (c *client) deleteUserAlertRuleById(ctx context.Context, namespace, name, a if !found { return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", namespace, name)} } + if gitOpsManaged, _ := k8s.IsExternallyManagedObject(pr); gitOpsManaged { + return notAllowedGitOpsRemove() + } updated := false var newGroups []monitoringv1.RuleGroup @@ -121,14 +135,39 @@ func (c *client) deleteUserAlertRuleById(ctx context.Context, namespace, name, a if err := c.k8sClient.PrometheusRules().Delete(ctx, pr.Namespace, pr.Name); err != nil { return fmt.Errorf("failed to delete PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) } - return nil + return c.cleanupARCForDeletedRule(ctx, namespace, name, alertRuleId) } pr.Spec.Groups = newGroups if err := c.k8sClient.PrometheusRules().Update(ctx, *pr); err != nil { return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) } - return nil + return c.cleanupARCForDeletedRule(ctx, namespace, name, alertRuleId) +} + +// cleanupARCForDeletedRule attempts to remove any associated ARC after a rule is deleted. +// It determines the ARC namespace from the rule's namespace and silently skips if no ARC exists. +func (c *client) cleanupARCForDeletedRule(ctx context.Context, namespace, name, alertRuleId string) error { + nn := types.NamespacedName{Namespace: namespace, Name: name} + arcNamespace, err := c.arcNamespaceForRule(nn) + if err != nil { + return nil + } + return c.deleteAssociatedARC(ctx, arcNamespace, name, alertRuleId) +} + +// deleteAssociatedARC removes the AlertRelabelConfig associated with an alert rule, if it exists. +// This is best-effort: if the ARC does not exist or is GitOps-managed, it is silently skipped. +func (c *client) deleteAssociatedARC(ctx context.Context, namespace, prName, alertRuleId string) error { + arcName := k8s.GetAlertRelabelConfigName(prName, alertRuleId) + arc, found, err := c.k8sClient.AlertRelabelConfigs().Get(ctx, namespace, arcName) + if err != nil || !found { + return nil + } + if gitOpsManaged, _ := k8s.IsExternallyManagedObject(arc); gitOpsManaged { + return nil + } + return c.k8sClient.AlertRelabelConfigs().Delete(ctx, namespace, arcName) } // removeAlertFromAlertingRuleGroups removes all instances of an alert by alert name across groups. diff --git a/pkg/management/get_rule_by_id.go b/pkg/management/get_rule_by_id.go new file mode 100644 index 000000000..e786ee464 --- /dev/null +++ b/pkg/management/get_rule_by_id.go @@ -0,0 +1,16 @@ +package management + +import ( + "context" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" +) + +func (c *client) GetRuleById(ctx context.Context, alertRuleId string) (monitoringv1.Rule, error) { + rule, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found { + return monitoringv1.Rule{}, &NotFoundError{Resource: "AlertRule", Id: alertRuleId} + } + + return rule, nil +} diff --git a/pkg/management/get_rule_by_id_test.go b/pkg/management/get_rule_by_id_test.go new file mode 100644 index 000000000..e9b8ff8eb --- /dev/null +++ b/pkg/management/get_rule_by_id_test.go @@ -0,0 +1,377 @@ +package management_test + +import ( + "context" + "errors" + "maps" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +var ( + grTestRule = monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + k8s.PrometheusRuleLabelNamespace: "test-namespace", + k8s.PrometheusRuleLabelName: "test-rule", + }, + } + grTestRuleId = alertrule.GetAlertingRuleId(&grTestRule) +) + +func newGetRuleClient(t *testing.T) (management.Client, *testutils.MockClient) { + t.Helper() + mockK8s := &testutils.MockClient{} + return management.New(context.Background(), mockK8s), mockK8s +} + +func TestGetRuleById_Found(t *testing.T) { + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == grTestRuleId { + return grTestRule, true + } + return monitoringv1.Rule{}, false + }, + } + } + + rule, err := client.GetRuleById(context.Background(), grTestRuleId) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if rule.Alert != "TestAlert" { + t.Errorf("expected alert %q, got %q", "TestAlert", rule.Alert) + } + if rule.Labels["severity"] != "warning" { + t.Errorf("expected severity %q, got %q", "warning", rule.Labels["severity"]) + } +} + +func TestGetRuleById_NotFound(t *testing.T) { + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, _ string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + } + + _, err := client.GetRuleById(context.Background(), "nonexistent-id") + if err == nil { + t.Fatal("expected NotFoundError") + } + var nf *management.NotFoundError + if !errors.As(err, &nf) { + t.Errorf("expected NotFoundError, got %T: %v", err, err) + } + if nf.Resource != "AlertRule" { + t.Errorf("expected Resource %q, got %q", "AlertRule", nf.Resource) + } + if nf.Id != "nonexistent-id" { + t.Errorf("expected Id %q, got %q", "nonexistent-id", nf.Id) + } +} + +func TestGetRuleById_MultipleRules(t *testing.T) { + rule1 := monitoringv1.Rule{Alert: "Alert1", Expr: intstr.FromString("up == 0")} + rule1Id := alertrule.GetAlertingRuleId(&rule1) + rule2 := monitoringv1.Rule{Alert: "Alert2", Expr: intstr.FromString("down == 1")} + rule2Id := alertrule.GetAlertingRuleId(&rule2) + + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + switch id { + case rule1Id: + return rule1, true + case rule2Id: + return rule2, true + } + return monitoringv1.Rule{}, false + }, + } + } + + r1, err := client.GetRuleById(context.Background(), rule1Id) + if err != nil || r1.Alert != "Alert1" { + t.Errorf("rule1: got alert=%q err=%v", r1.Alert, err) + } + r2, err := client.GetRuleById(context.Background(), rule2Id) + if err != nil || r2.Alert != "Alert2" { + t.Errorf("rule2: got alert=%q err=%v", r2.Alert, err) + } +} + +func TestGetRuleById_RecordingRule(t *testing.T) { + recRule := monitoringv1.Rule{ + Record: "job:request_latency_seconds:mean5m", + Expr: intstr.FromString("avg by (job) (request_latency_seconds)"), + } + recRuleId := alertrule.GetAlertingRuleId(&recRule) + + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == recRuleId { + return recRule, true + } + return monitoringv1.Rule{}, false + }, + } + } + + rule, err := client.GetRuleById(context.Background(), recRuleId) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if rule.Record != "job:request_latency_seconds:mean5m" { + t.Errorf("expected record name, got %q", rule.Record) + } +} + +func buildRuleWithManagedBy(base monitoringv1.Rule, ruleId string, prName, prNS string, + ruleManagedBy, relabelManagedBy string) monitoringv1.Rule { + r := base + r.Labels = maps.Clone(base.Labels) + if r.Labels == nil { + r.Labels = make(map[string]string) + } + r.Labels[managementlabels.AlertNameLabel] = r.Alert + r.Labels[k8s.AlertRuleLabelId] = ruleId + r.Labels[k8s.PrometheusRuleLabelNamespace] = prNS + r.Labels[k8s.PrometheusRuleLabelName] = prName + if ruleManagedBy != "" { + r.Labels[managementlabels.RuleManagedByLabel] = ruleManagedBy + } + if relabelManagedBy != "" { + r.Labels[managementlabels.RelabelConfigManagedByLabel] = relabelManagedBy + } + return r +} + +func TestGetRuleById_OperatorManagedByLabel(t *testing.T) { + ctx := context.Background() + mockARC := &testutils.MockAlertRelabelConfigInterface{} + mockNS := &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return false }, + } + + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "operator-rule", Namespace: "test-namespace", + OwnerReferences: []metav1.OwnerReference{ + {APIVersion: "apps/v1", Kind: "Deployment", Name: "test-operator", UID: "test-uid"}, + }, + }, + } + ruleManagedBy, relabelManagedBy := k8s.DetermineManagedBy(ctx, mockARC, mockNS, promRule, grTestRuleId) + ruleWithLabel := buildRuleWithManagedBy(grTestRule, grTestRuleId, promRule.Name, promRule.Namespace, ruleManagedBy, relabelManagedBy) + + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == grTestRuleId { + return ruleWithLabel, true + } + return monitoringv1.Rule{}, false + }, + } + } + + rule, err := client.GetRuleById(context.Background(), grTestRuleId) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if rule.Labels[managementlabels.RuleManagedByLabel] != "operator" { + t.Errorf("expected managed_by=operator, got %q", rule.Labels[managementlabels.RuleManagedByLabel]) + } +} + +func TestGetRuleById_NoManagedByLabelForNormalRule(t *testing.T) { + ctx := context.Background() + mockARC := &testutils.MockAlertRelabelConfigInterface{} + mockNS := &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return false }, + } + + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Name: "local-rule", Namespace: "test-namespace"}, + } + ruleManagedBy, relabelManagedBy := k8s.DetermineManagedBy(ctx, mockARC, mockNS, promRule, grTestRuleId) + ruleWithLabel := buildRuleWithManagedBy(grTestRule, grTestRuleId, promRule.Name, promRule.Namespace, ruleManagedBy, relabelManagedBy) + + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == grTestRuleId { + return ruleWithLabel, true + } + return monitoringv1.Rule{}, false + }, + } + } + + rule, err := client.GetRuleById(context.Background(), grTestRuleId) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, ok := rule.Labels[managementlabels.RuleManagedByLabel]; ok { + t.Error("expected no managed_by label for normal rule") + } +} + +func TestGetRuleById_RelabelConfigGitOpsManagedBy(t *testing.T) { + ctx := context.Background() + mockARC := &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, ns, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, Namespace: ns, + Annotations: map[string]string{"argocd.argoproj.io/tracking-id": "test-id"}, + }, + }, true, nil + }, + } + mockNS := &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return true }, + } + + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "platform-rule", Namespace: "openshift-monitoring", + OwnerReferences: []metav1.OwnerReference{ + {APIVersion: "apps/v1", Kind: "Deployment", Name: "test-operator", UID: "test-uid"}, + }, + }, + } + ruleManagedBy, relabelManagedBy := k8s.DetermineManagedBy(ctx, mockARC, mockNS, promRule, grTestRuleId) + ruleWithLabel := buildRuleWithManagedBy(grTestRule, grTestRuleId, promRule.Name, promRule.Namespace, ruleManagedBy, relabelManagedBy) + + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == grTestRuleId { + return ruleWithLabel, true + } + return monitoringv1.Rule{}, false + }, + } + } + + rule, err := client.GetRuleById(context.Background(), grTestRuleId) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if rule.Labels[managementlabels.RuleManagedByLabel] != "operator" { + t.Errorf("expected managed_by=operator, got %q", rule.Labels[managementlabels.RuleManagedByLabel]) + } + if rule.Labels[managementlabels.RelabelConfigManagedByLabel] != "gitops" { + t.Errorf("expected relabel_config_managed_by=gitops, got %q", rule.Labels[managementlabels.RelabelConfigManagedByLabel]) + } +} + +func TestGetRuleById_GitOpsManagedByLabel(t *testing.T) { + ctx := context.Background() + mockARC := &testutils.MockAlertRelabelConfigInterface{} + mockNS := &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return true }, + } + + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "platform-rule", Namespace: "openshift-monitoring", + Annotations: map[string]string{"argocd.argoproj.io/tracking-id": "test-id"}, + }, + } + ruleManagedBy, relabelManagedBy := k8s.DetermineManagedBy(ctx, mockARC, mockNS, promRule, grTestRuleId) + ruleWithLabel := buildRuleWithManagedBy(grTestRule, grTestRuleId, promRule.Name, promRule.Namespace, ruleManagedBy, relabelManagedBy) + + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == grTestRuleId { + return ruleWithLabel, true + } + return monitoringv1.Rule{}, false + }, + } + } + + rule, err := client.GetRuleById(context.Background(), grTestRuleId) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if rule.Labels[managementlabels.RuleManagedByLabel] != "gitops" { + t.Errorf("expected managed_by=gitops, got %q", rule.Labels[managementlabels.RuleManagedByLabel]) + } +} + +func TestGetRuleById_NoRelabelConfigManagedByWhenNotGitOps(t *testing.T) { + ctx := context.Background() + mockARC := &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, ns, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + }, true, nil + }, + } + mockNS := &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return true }, + } + + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "platform-rule", Namespace: "openshift-monitoring", + OwnerReferences: []metav1.OwnerReference{ + {APIVersion: "apps/v1", Kind: "Deployment", Name: "test-operator", UID: "test-uid"}, + }, + }, + } + ruleManagedBy, relabelManagedBy := k8s.DetermineManagedBy(ctx, mockARC, mockNS, promRule, grTestRuleId) + ruleWithLabel := buildRuleWithManagedBy(grTestRule, grTestRuleId, promRule.Name, promRule.Namespace, ruleManagedBy, relabelManagedBy) + + client, mockK8s := newGetRuleClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == grTestRuleId { + return ruleWithLabel, true + } + return monitoringv1.Rule{}, false + }, + } + } + + rule, err := client.GetRuleById(context.Background(), grTestRuleId) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if rule.Labels[managementlabels.RuleManagedByLabel] != "operator" { + t.Errorf("expected managed_by=operator, got %q", rule.Labels[managementlabels.RuleManagedByLabel]) + } + if _, ok := rule.Labels[managementlabels.RelabelConfigManagedByLabel]; ok { + t.Error("expected no relabel_config_managed_by label when ARC not GitOps-managed") + } +} diff --git a/pkg/management/label_utils.go b/pkg/management/label_utils.go index a4d4bb2df..3c3d6c211 100644 --- a/pkg/management/label_utils.go +++ b/pkg/management/label_utils.go @@ -1,6 +1,20 @@ package management -import "strings" +import ( + "strings" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +var protectedLabels = map[string]bool{ + managementlabels.AlertNameLabel: true, + k8s.AlertRuleLabelId: true, +} + +func isProtectedLabel(label string) bool { + return protectedLabels[label] +} var validSeverities = map[string]bool{ "critical": true, diff --git a/pkg/management/types.go b/pkg/management/types.go index 96fb4cd7d..b636cfc50 100644 --- a/pkg/management/types.go +++ b/pkg/management/types.go @@ -8,15 +8,32 @@ import ( // Client is the interface for managing alert rules type Client interface { + // GetRuleById retrieves a specific alert rule by its ID + GetRuleById(ctx context.Context, alertRuleId string) (monitoringv1.Rule, error) + // CreateUserDefinedAlertRule creates a new user-defined alert rule CreateUserDefinedAlertRule(ctx context.Context, alertRule monitoringv1.Rule, prOptions PrometheusRuleOptions) (alertRuleId string, err error) + // UpdateUserDefinedAlertRule updates an existing user-defined alert rule by its ID + // Returns the new rule ID after the update + UpdateUserDefinedAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) (newRuleId string, err error) + // DeleteUserDefinedAlertRuleById deletes a user-defined alert rule by its ID DeleteUserDefinedAlertRuleById(ctx context.Context, alertRuleId string) error // CreatePlatformAlertRule creates a new platform alert rule CreatePlatformAlertRule(ctx context.Context, alertRule monitoringv1.Rule) (alertRuleId string, err error) + // UpdatePlatformAlertRule updates an existing platform alert rule by its ID + // Platform alert rules can only have the labels updated through AlertRelabelConfigs + UpdatePlatformAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error + + // DropPlatformAlertRule hides a platform alert by adding a scoped Drop relabel entry + DropPlatformAlertRule(ctx context.Context, alertRuleId string) error + + // RestorePlatformAlertRule restores a previously dropped platform alert by removing its Drop relabel entry + RestorePlatformAlertRule(ctx context.Context, alertRuleId string) error + // UpdateAlertRuleClassification updates component/layer for a single alert rule id UpdateAlertRuleClassification(ctx context.Context, req UpdateRuleClassificationRequest) error // BulkUpdateAlertRuleClassification updates classification for multiple rule ids diff --git a/pkg/management/update_platform_alert_rule.go b/pkg/management/update_platform_alert_rule.go new file mode 100644 index 000000000..9b2680545 --- /dev/null +++ b/pkg/management/update_platform_alert_rule.go @@ -0,0 +1,417 @@ +package management + +import ( + "context" + "fmt" + "regexp" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +// arcNamespaceForRule returns the ARC namespace for the given rule's PrometheusRule. +// Platform rules use openshift-monitoring. User-defined workload rules use +// openshift-user-workload-monitoring when ENABLE_USER_WORKLOAD_ARCS is enabled. +func (c *client) arcNamespaceForRule(nn types.NamespacedName) (string, error) { + if c.isPlatformManagedPrometheusRule(nn) { + return k8s.ClusterMonitoringNamespace, nil + } + if !c.enableUserWorkloadARCs { + return "", &NotAllowedError{ + Message: fmt.Sprintf("alert rule management for user-defined workload rules requires ENABLE_USER_WORKLOAD_ARCS (%s/%s)", nn.Namespace, nn.Name), + } + } + return k8s.UserWorkloadMonitoringNamespace, nil +} + +func (c *client) UpdatePlatformAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error { + rule, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found { + return &NotFoundError{Resource: "AlertRule", Id: alertRuleId} + } + + namespace := rule.Labels[k8s.PrometheusRuleLabelNamespace] + name := rule.Labels[k8s.PrometheusRuleLabelName] + nn := types.NamespacedName{Namespace: namespace, Name: name} + + arcNamespace, err := c.arcNamespaceForRule(nn) + if err != nil { + return err + } + isPlatform := c.isPlatformManagedPrometheusRule(nn) + + var prMeta *monitoringv1.PrometheusRule + if pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, namespace, name); err != nil { + return err + } else if found { + prMeta = pr + } + if err := validateGitOpsPreconditions(rule, prMeta); err != nil { + return err + } + + originalRule, err := getOriginalPlatformRuleFromPR(prMeta, namespace, name, alertRuleId) + if err != nil { + return err + } + + if v, ok := alertRule.Labels[managementlabels.AlertNameLabel]; ok { + if v != originalRule.Alert { + return &ValidationError{Message: fmt.Sprintf("label %q is immutable", managementlabels.AlertNameLabel)} + } + } + + // AlertingRule CR path is only available for platform rules + if isPlatform { + arName := rule.Labels[managementlabels.AlertingRuleLabelName] + if arName == "" { + arName = defaultAlertingRuleName + } + ar, arFound, arErr := c.getAlertingRule(ctx, arName) + if arErr != nil { + return arErr + } + if arFound && ar != nil { + if gitOpsManaged, operatorManaged := k8s.IsExternallyManagedObject(ar); gitOpsManaged { + return &NotAllowedError{Message: "This alert is managed by GitOps; edit it in Git."} + } else if operatorManaged { + return c.applyLabelChangesViaAlertRelabelConfig(ctx, arcNamespace, alertRuleId, *originalRule, alertRule.Labels) + } + return c.updateAlertingRuleLabels(ctx, ar, originalRule.Alert, alertRuleId, alertRule.Labels, arName) + } + } + + return c.applyLabelChangesViaAlertRelabelConfig(ctx, arcNamespace, alertRuleId, *originalRule, alertRule.Labels) +} + +func filterAndValidatePlatformLabelChanges(labels map[string]string) (map[string]string, error) { + filtered := make(map[string]string) + for k, v := range labels { + if !isProtectedLabel(k) { + filtered[k] = v + } + } + for k, v := range filtered { + if k == managementlabels.AlertNameLabel { + continue + } + if k == "severity" { + if v == "" { + return nil, &NotAllowedError{Message: fmt.Sprintf("label %q cannot be dropped for platform alerts", k)} + } + if !isValidSeverity(v) { + return nil, &ValidationError{Message: fmt.Sprintf("invalid severity %q: must be one of critical|warning|info|none", v)} + } + } + } + return filtered, nil +} + +func (c *client) getAlertingRule(ctx context.Context, name string) (*osmv1.AlertingRule, bool, error) { + ar, found, err := c.k8sClient.AlertingRules().Get(ctx, name) + if err != nil { + return nil, false, fmt.Errorf("failed to get AlertingRule %s: %w", name, err) + } + return ar, found, nil +} + +func (c *client) updateAlertingRuleLabels( + ctx context.Context, + ar *osmv1.AlertingRule, + originalAlertName string, + alertRuleId string, + rawLabels map[string]string, + arName string, +) error { + filteredLabels, err := filterAndValidatePlatformLabelChanges(rawLabels) + if err != nil { + return err + } + target, found := findAlertByNameInAlertingRule(ar, originalAlertName) + if !found || target == nil { + return &NotFoundError{ + Resource: "AlertRule", + Id: alertRuleId, + AdditionalInfo: fmt.Sprintf("alert %q not found in AlertingRule %s", originalAlertName, arName), + } + } + if target.Labels == nil { + target.Labels = map[string]string{} + } + for k, v := range filteredLabels { + if v == "" { + delete(target.Labels, k) + } else { + target.Labels[k] = v + } + } + if err := c.k8sClient.AlertingRules().Update(ctx, *ar); err != nil { + return fmt.Errorf("failed to update AlertingRule %s: %w", ar.Name, err) + } + return nil +} + +func findAlertByNameInAlertingRule(ar *osmv1.AlertingRule, alertName string) (*osmv1.Rule, bool) { + for gi := range ar.Spec.Groups { + for ri := range ar.Spec.Groups[gi].Rules { + r := &ar.Spec.Groups[gi].Rules[ri] + if r.Alert == alertName { + return r, true + } + } + } + return nil, false +} + +func (c *client) applyLabelChangesViaAlertRelabelConfig(ctx context.Context, namespace string, alertRuleId string, originalRule monitoringv1.Rule, rawLabels map[string]string) error { + filtered, err := filterAndValidatePlatformLabelChanges(rawLabels) + if err != nil { + return err + } + relabeled, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found || relabeled.Labels == nil { + return &NotFoundError{ + Resource: "AlertRule", + Id: alertRuleId, + AdditionalInfo: "relabeled rule not found or has no labels", + } + } + prName := relabeled.Labels[k8s.PrometheusRuleLabelName] + arcName := k8s.GetAlertRelabelConfigName(prName, alertRuleId) + + existingArc, found, err := c.k8sClient.AlertRelabelConfigs().Get(ctx, namespace, arcName) + if err != nil { + return fmt.Errorf("failed to get AlertRelabelConfig %s/%s: %w", namespace, arcName, err) + } + if err := validatePlatformUpdatePreconditions(relabeled, nil, relabelConfigIfFound(found, existingArc)); err != nil { + return err + } + + original := copyStringMap(originalRule.Labels) + existingOverrides, existingDrops := collectExistingFromARC(found, existingArc) + existingRuleDrops := getExistingRuleDrops(existingArc, alertRuleId) + effective := computeEffectiveLabels(original, existingOverrides, existingDrops) + + if len(filtered) == 0 { + return nil + } + + desired := buildDesiredLabels(effective, filtered) + nextChanges := buildNextLabelChanges(original, desired) + + if len(nextChanges) == 0 { + if found { + if err := c.k8sClient.AlertRelabelConfigs().Delete(ctx, namespace, arcName); err != nil { + return fmt.Errorf("failed to delete AlertRelabelConfig %s/%s: %w", namespace, arcName, err) + } + } + return nil + } + + relabelConfigs := buildRelabelConfigs(originalRule.Alert, original, alertRuleId, nextChanges) + relabelConfigs = appendPreservedRuleDrops(relabelConfigs, existingRuleDrops) + + return upsertAlertRelabelConfig(c.k8sClient, ctx, namespace, arcName, prName, originalRule.Alert, alertRuleId, found, existingArc, relabelConfigs) +} + +func relabelConfigIfFound(found bool, arc *osmv1.AlertRelabelConfig) *osmv1.AlertRelabelConfig { + if found { + return arc + } + return nil +} + +func ensureStampAndDrop(next *[]osmv1.RelabelConfig, stamp osmv1.RelabelConfig, dropCfg osmv1.RelabelConfig, alertRuleId string) bool { + stampExists := false + dropExists := false + for _, rc := range *next { + if rc.Action == "Replace" && rc.TargetLabel == k8s.AlertRuleLabelId && + rc.Regex == stamp.Regex && rc.Replacement == alertRuleId { + stampExists = true + } + if rc.Action == "Drop" && rc.Regex == dropCfg.Regex && + len(rc.SourceLabels) == 1 && rc.SourceLabels[0] == k8s.AlertRuleLabelId { + dropExists = true + } + } + changed := false + if !stampExists { + *next = append(*next, stamp) + changed = true + } + if !dropExists { + *next = append(*next, dropCfg) + changed = true + } + return changed +} + +func filterOutDrop(configs []osmv1.RelabelConfig, alertRuleId string) ([]osmv1.RelabelConfig, bool) { + target := regexp.QuoteMeta(alertRuleId) + var out []osmv1.RelabelConfig + removed := false + for _, rc := range configs { + if rc.Action == "Drop" && (rc.Regex == target || rc.Regex == alertRuleId) { + removed = true + continue + } + out = append(out, rc) + } + return out, removed +} + +func isStampOnly(configs []osmv1.RelabelConfig) bool { + if len(configs) == 0 { + return true + } + for _, rc := range configs { + if rc.Action != "Replace" || rc.TargetLabel != k8s.AlertRuleLabelId { + return false + } + } + return true +} + +func (c *client) DropPlatformAlertRule(ctx context.Context, alertRuleId string) error { + relabeled, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found || relabeled.Labels == nil { + return &NotFoundError{Resource: "AlertRule", Id: alertRuleId} + } + + namespace := relabeled.Labels[k8s.PrometheusRuleLabelNamespace] + name := relabeled.Labels[k8s.PrometheusRuleLabelName] + + arcNamespace, err := c.arcNamespaceForRule(types.NamespacedName{Namespace: namespace, Name: name}) + if err != nil { + return err + } + + originalRule, err := c.getOriginalPlatformRule(ctx, namespace, name, alertRuleId) + if err != nil { + return err + } + + arcName := k8s.GetAlertRelabelConfigName(name, alertRuleId) + + existingArc, arcExists, err := c.k8sClient.AlertRelabelConfigs().Get(ctx, arcNamespace, arcName) + if err != nil { + return fmt.Errorf("failed to get AlertRelabelConfig %s/%s: %w", arcNamespace, arcName, err) + } + if err := validatePlatformUpdatePreconditions(relabeled, nil, relabelConfigIfFound(arcExists, existingArc)); err != nil { + return err + } + + original := copyStringMap(originalRule.Labels) + stampOnly := buildRelabelConfigs(originalRule.Alert, original, alertRuleId, nil) + var stamp osmv1.RelabelConfig + if len(stampOnly) > 0 { + stamp = stampOnly[0] + } + + dropCfg := osmv1.RelabelConfig{ + SourceLabels: []osmv1.LabelName{"openshift_io_alert_rule_id"}, + Regex: regexp.QuoteMeta(alertRuleId), + Action: "Drop", + } + + var next []osmv1.RelabelConfig + if arcExists && existingArc != nil { + next = append(next, existingArc.Spec.Configs...) + } + + changed := ensureStampAndDrop(&next, stamp, dropCfg, alertRuleId) + + if !changed { + return nil + } + + return upsertAlertRelabelConfig(c.k8sClient, ctx, arcNamespace, arcName, name, originalRule.Alert, alertRuleId, arcExists, existingArc, next) +} + +func (c *client) RestorePlatformAlertRule(ctx context.Context, alertRuleId string) error { + relabeled, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + var existingArc *osmv1.AlertRelabelConfig + var arcName string + var arcNamespace string + var err error + if found && relabeled.Labels != nil { + namespace := relabeled.Labels[k8s.PrometheusRuleLabelNamespace] + name := relabeled.Labels[k8s.PrometheusRuleLabelName] + arcNamespace, err = c.arcNamespaceForRule(types.NamespacedName{Namespace: namespace, Name: name}) + if err != nil { + return err + } + arcName = k8s.GetAlertRelabelConfigName(name, alertRuleId) + var arcExists bool + existingArc, arcExists, err = c.k8sClient.AlertRelabelConfigs().Get(ctx, arcNamespace, arcName) + if err != nil { + return fmt.Errorf("failed to get AlertRelabelConfig %s/%s: %w", arcNamespace, arcName, err) + } + if !arcExists || existingArc == nil { + return nil + } + if err := validatePlatformUpdatePreconditions(relabeled, nil, existingArc); err != nil { + return err + } + } else { + // Dropped rules may not appear in the relabeled rules cache because + // the Drop action suppresses them from Prometheus results. Fall back + // to scanning ARCs by annotation to locate the one to restore. + arcNamespace, existingArc, arcName = c.findARCByAlertRuleID(ctx, alertRuleId) + if existingArc == nil { + return nil + } + } + + filtered, removed := filterOutDrop(existingArc.Spec.Configs, alertRuleId) + + if !removed { + return nil + } + + if len(filtered) == 0 || isStampOnly(filtered) { + if err := c.k8sClient.AlertRelabelConfigs().Delete(ctx, arcNamespace, arcName); err != nil { + return fmt.Errorf("failed to delete AlertRelabelConfig %s/%s: %w", arcNamespace, arcName, err) + } + return nil + } + + arc := existingArc + arc.Spec = osmv1.AlertRelabelConfigSpec{Configs: filtered} + if arc.Annotations == nil { + arc.Annotations = map[string]string{} + } + arc.Annotations[managementlabels.ARCAnnotationAlertRuleIDKey] = alertRuleId + + if err := c.k8sClient.AlertRelabelConfigs().Update(ctx, *arc); err != nil { + return fmt.Errorf("failed to update AlertRelabelConfig %s/%s: %w", arc.Namespace, arc.Name, err) + } + return nil +} + +// findARCByAlertRuleID searches for an ARC by its alert-rule-id annotation across +// the platform namespace and (if enabled) the user-workload namespace. +func (c *client) findARCByAlertRuleID(ctx context.Context, alertRuleId string) (string, *osmv1.AlertRelabelConfig, string) { + namespaces := []string{k8s.ClusterMonitoringNamespace} + if c.enableUserWorkloadARCs { + namespaces = append(namespaces, k8s.UserWorkloadMonitoringNamespace) + } + for _, ns := range namespaces { + arcs, err := c.k8sClient.AlertRelabelConfigs().List(ctx, ns) + if err != nil { + continue + } + for i := range arcs { + arc := arcs[i] + if arc.Annotations != nil && arc.Annotations[managementlabels.ARCAnnotationAlertRuleIDKey] == alertRuleId { + arcCopy := arc + return ns, &arcCopy, arc.Name + } + } + } + return "", nil, "" +} diff --git a/pkg/management/update_platform_alert_rule_test.go b/pkg/management/update_platform_alert_rule_test.go new file mode 100644 index 000000000..85178810c --- /dev/null +++ b/pkg/management/update_platform_alert_rule_test.go @@ -0,0 +1,864 @@ +package management_test + +import ( + "context" + "errors" + "strings" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +var ( + // upOriginalPlatformRule is as stored in the PrometheusRule (without k8s labels). + upOriginalPlatformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("node_down == 1"), + Labels: map[string]string{ + "severity": "critical", + }, + } + upOriginalPlatformRuleId = alertrule.GetAlertingRuleId(&upOriginalPlatformRule) + + // upPlatformRule is as seen by RelabeledRules (with k8s labels added). + upPlatformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("node_down == 1"), + Labels: map[string]string{ + "severity": "critical", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + k8s.AlertRuleLabelId: upOriginalPlatformRuleId, + }, + } + upPlatformRuleId = alertrule.GetAlertingRuleId(&upPlatformRule) + + upUserRule = monitoringv1.Rule{ + Alert: "UserAlert", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", + }, + } + upUserRuleId = alertrule.GetAlertingRuleId(&upUserRule) +) + +func newUpdatePlatformClient(t *testing.T) (management.Client, *testutils.MockClient) { + t.Helper() + mockK8s := &testutils.MockClient{} + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" + }, + } + } + return management.New(context.Background(), mockK8s), mockK8s +} + +func mockPlatformRelabeledGet(ruleId string, rule monitoringv1.Rule) func() k8s.RelabeledRulesInterface { + return func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == ruleId { + return rule, true + } + return monitoringv1.Rule{}, false + }, + } + } +} + +func makePlatformPR(namespace, name string, rules ...monitoringv1.Rule) *testutils.MockPrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, ns, n string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: n}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "grp", Rules: rules}}, + }, + }, true, nil + }, + } +} + +// --- Managed-by / GitOps blocks --- + +func TestUpdatePlatformAlertRule_BlocksOperatorManagedWithGitOpsPR(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + opRule := copyRuleWithLabels(upPlatformRule, managementlabels.RuleManagedByLabel, managementlabels.ManagedByOperator) + mockK8s.RelabeledRulesFunc = mockPlatformRelabeledGet(upPlatformRuleId, opRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + Annotations: map[string]string{"argocd.argoproj.io/tracking-id": "gitops-track"}, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "grp", Rules: []monitoringv1.Rule{upOriginalPlatformRule}}}, + }, + }, true, nil + }, + } + } + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { + return nil, false, nil + }, + } + } + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, upOriginalPlatformRule) + if err == nil || !strings.Contains(err.Error(), "managed by GitOps") { + t.Errorf("expected GitOps block, got: %v", err) + } +} + +func TestUpdatePlatformAlertRule_BlocksGitOpsManagedARC(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + opRule := copyRuleWithLabels(upPlatformRule, managementlabels.RuleManagedByLabel, managementlabels.ManagedByOperator) + mockK8s.RelabeledRulesFunc = mockPlatformRelabeledGet(upPlatformRuleId, opRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return makePlatformPR("openshift-monitoring", "platform-rule", upOriginalPlatformRule) + } + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, ns, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, Namespace: ns, + Annotations: map[string]string{"argocd.argoproj.io/tracking-id": "abc"}, + }, + }, true, nil + }, + } + } + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, upOriginalPlatformRule) + if err == nil || !strings.Contains(err.Error(), "managed by GitOps") { + t.Errorf("expected GitOps block (ARC), got: %v", err) + } +} + +func TestUpdatePlatformAlertRule_BlocksGitOpsManagedRule(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + gitopsRule := copyRuleWithLabels(upPlatformRule, managementlabels.RuleManagedByLabel, managementlabels.ManagedByGitOps) + mockK8s.RelabeledRulesFunc = mockPlatformRelabeledGet(upPlatformRuleId, gitopsRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return makePlatformPR("openshift-monitoring", "platform-rule", upOriginalPlatformRule) + } + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { + return nil, false, nil + }, + } + } + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, upOriginalPlatformRule) + if err == nil || !strings.Contains(err.Error(), "managed by GitOps") { + t.Errorf("expected GitOps block (rule), got: %v", err) + } +} + +// --- Not found / wrong type --- + +func TestUpdatePlatformAlertRule_NotFound(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, _ string) (monitoringv1.Rule, bool) { return monitoringv1.Rule{}, false }, + } + } + + err := client.UpdatePlatformAlertRule(context.Background(), "nonexistent-id", upPlatformRule) + var nf *management.NotFoundError + if !errors.As(err, &nf) || nf.Resource != "AlertRule" { + t.Errorf("expected NotFoundError for AlertRule, got: %v", err) + } +} + +func TestUpdatePlatformAlertRule_UserRuleReturnsError(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + mockK8s.RelabeledRulesFunc = mockPlatformRelabeledGet(upUserRuleId, upUserRule) + + err := client.UpdatePlatformAlertRule(context.Background(), upUserRuleId, upUserRule) + if err == nil || !strings.Contains(err.Error(), "ENABLE_USER_WORKLOAD_ARCS") { + t.Errorf("expected user workload error, got: %v", err) + } +} + +func TestUpdatePlatformAlertRule_PRNotFound(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + mockK8s.RelabeledRulesFunc = mockPlatformRelabeledGet(upPlatformRuleId, upPlatformRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, _, _ string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, nil + }, + } + } + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, upPlatformRule) + var nf *management.NotFoundError + if !errors.As(err, &nf) || nf.Resource != "PrometheusRule" { + t.Errorf("expected NotFoundError for PrometheusRule, got: %v", err) + } +} + +func TestUpdatePlatformAlertRule_PRGetError(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + mockK8s.RelabeledRulesFunc = mockPlatformRelabeledGet(upPlatformRuleId, upPlatformRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, _, _ string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, errors.New("failed to get PrometheusRule") + }, + } + } + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, upPlatformRule) + if err == nil || !strings.Contains(err.Error(), "failed to get PrometheusRule") { + t.Errorf("expected PR get error, got: %v", err) + } +} + +// --- No label changes / revert --- + +func setupPlatformWithARC(t *testing.T, mockK8s *testutils.MockClient, arcFn func() k8s.AlertRelabelConfigInterface) { + t.Helper() + mockK8s.RelabeledRulesFunc = mockPlatformRelabeledGet(upPlatformRuleId, upPlatformRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return makePlatformPR("openshift-monitoring", "platform-rule", upOriginalPlatformRule) + } + mockK8s.AlertRelabelConfigsFunc = arcFn +} + +func TestUpdatePlatformAlertRule_DeletesARCOnRevert(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + deleted := false + setupPlatformWithARC(t, mockK8s, func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, ns, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + Spec: osmv1.AlertRelabelConfigSpec{Configs: []osmv1.RelabelConfig{}}, + }, true, nil + }, + DeleteFunc: func(_ context.Context, _, _ string) error { deleted = true; return nil }, + } + }) + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, upOriginalPlatformRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !deleted { + t.Error("expected ARC to be deleted on revert") + } +} + +// --- Label changes / ARC creation --- + +func TestUpdatePlatformAlertRule_CreatesARCForLabelChange(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + var createdARC *osmv1.AlertRelabelConfig + setupPlatformWithARC(t, mockK8s, func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { return nil, false, nil }, + CreateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + createdARC = &arc + return &arc, nil + }, + } + }) + + updatedRule := copyRule(upOriginalPlatformRule) + updatedRule.Labels["new_label"] = "new_value" + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if createdARC == nil { + t.Fatal("expected ARC to be created") + } + if createdARC.Namespace != "openshift-monitoring" { + t.Errorf("expected ARC namespace openshift-monitoring, got %q", createdARC.Namespace) + } + if !strings.HasPrefix(createdARC.Name, "arc-") { + t.Errorf("expected ARC name to start with arc-, got %q", createdARC.Name) + } + if len(createdARC.Spec.Configs) == 0 { + t.Error("expected ARC to have relabel configs") + } +} + +func TestUpdatePlatformAlertRule_IdStampAndSeverityChange(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + var createdARC *osmv1.AlertRelabelConfig + setupPlatformWithARC(t, mockK8s, func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { return nil, false, nil }, + CreateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + createdARC = &arc + return &arc, nil + }, + } + }) + + updatedRule := copyRule(upOriginalPlatformRule) + updatedRule.Labels["severity"] = "info" + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if createdARC == nil { + t.Fatal("expected ARC to be created") + } + if len(createdARC.Spec.Configs) != 2 { + t.Fatalf("expected 2 relabel configs (id-stamp + severity), got %d", len(createdARC.Spec.Configs)) + } + cfg0 := createdARC.Spec.Configs[0] + if string(cfg0.Action) != "Replace" || string(cfg0.TargetLabel) != "openshift_io_alert_rule_id" { + t.Errorf("cfg0: expected id-stamp Replace, got action=%s target=%s", cfg0.Action, cfg0.TargetLabel) + } + if cfg0.Replacement != upPlatformRuleId { + t.Errorf("cfg0.Replacement: expected %q, got %q", upPlatformRuleId, cfg0.Replacement) + } + cfg1 := createdARC.Spec.Configs[1] + if string(cfg1.Action) != "Replace" || string(cfg1.TargetLabel) != "severity" || cfg1.Replacement != "info" { + t.Errorf("cfg1: expected severity Replace info, got action=%s target=%s replacement=%s", cfg1.Action, cfg1.TargetLabel, cfg1.Replacement) + } +} + +func TestUpdatePlatformAlertRule_IdStampScopesStaticLabels(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + // Override PR to have extra stable labels. + origWithExtras := copyRule(upOriginalPlatformRule) + origWithExtras.Labels = map[string]string{"severity": "critical", "component": "kube", "team": "sre"} + idForExtras := alertrule.GetAlertingRuleId(&origWithExtras) + + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == idForExtras { + return monitoringv1.Rule{ + Alert: "PlatformAlert", Expr: intstr.FromString("node_down == 1"), + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + k8s.AlertRuleLabelId: idForExtras, + "severity": "critical", + }, + }, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "test-group", Rules: []monitoringv1.Rule{origWithExtras}}}, + }, + }, true, nil + }, + } + } + + var createdARC *osmv1.AlertRelabelConfig + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { return nil, false, nil }, + CreateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + createdARC = &arc + return &arc, nil + }, + } + } + + updatedRule := copyRule(upOriginalPlatformRule) + updatedRule.Labels = map[string]string{"severity": "info"} + + err := client.UpdatePlatformAlertRule(context.Background(), idForExtras, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if createdARC == nil || len(createdARC.Spec.Configs) != 2 { + t.Fatalf("expected 2 ARC configs, got %d", len(createdARC.Spec.Configs)) + } + + idCfg := createdARC.Spec.Configs[0] + if string(idCfg.Action) != "Replace" || string(idCfg.TargetLabel) != "openshift_io_alert_rule_id" { + t.Errorf("expected id-stamp config, got action=%s target=%s", idCfg.Action, idCfg.TargetLabel) + } + var srcLabels []string + for _, s := range idCfg.SourceLabels { + srcLabels = append(srcLabels, string(s)) + } + for _, expected := range []string{"alertname", "component", "severity", "team"} { + found := false + for _, sl := range srcLabels { + if sl == expected { + found = true + break + } + } + if !found { + t.Errorf("expected source label %q in id-stamp config", expected) + } + } + for _, unexpected := range []string{"namespace"} { + for _, sl := range srcLabels { + if sl == unexpected { + t.Errorf("unexpected source label %q in id-stamp config", unexpected) + } + } + } + if !strings.HasPrefix(idCfg.Regex, "^") || !strings.HasSuffix(idCfg.Regex, "$") { + t.Errorf("expected anchored regex, got %q", idCfg.Regex) + } + if !strings.Contains(idCfg.Regex, "^PlatformAlert;kube;critical;sre$") { + t.Errorf("expected sorted label values in regex, got %q", idCfg.Regex) + } +} + +func TestUpdatePlatformAlertRule_UpdatesExistingARC(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + expectedArcName := k8s.GetAlertRelabelConfigName("platform-rule", upPlatformRuleId) + var updatedARC *osmv1.AlertRelabelConfig + + setupPlatformWithARC(t, mockK8s, func() k8s.AlertRelabelConfigInterface { + existing := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: expectedArcName, Namespace: "openshift-monitoring"}, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{{TargetLabel: "testing2", Replacement: "newlabel2", Action: "Replace"}}, + }, + } + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, name string) (*osmv1.AlertRelabelConfig, bool, error) { + if name == expectedArcName { + return existing, true, nil + } + return nil, false, nil + }, + UpdateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) error { updatedARC = &arc; return nil }, + } + }) + + updatedRule := copyRule(upOriginalPlatformRule) + updatedRule.Labels["severity"] = "info" + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if updatedARC == nil { + t.Fatal("expected existing ARC to be updated") + } + if len(updatedARC.Spec.Configs) == 0 { + t.Error("expected updated ARC to have configs") + } +} + +func TestUpdatePlatformAlertRule_DeletesARCWhenNoOverridesRemain(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + expectedArcName := k8s.GetAlertRelabelConfigName("platform-rule", upPlatformRuleId) + deleted := false + var updatedARC *osmv1.AlertRelabelConfig + + setupPlatformWithARC(t, mockK8s, func() k8s.AlertRelabelConfigInterface { + existing := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: expectedArcName, Namespace: "openshift-monitoring"}, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{{TargetLabel: "testing2", Replacement: "newlabel2", Action: "Replace"}}, + }, + } + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, name string) (*osmv1.AlertRelabelConfig, bool, error) { + if name == expectedArcName { + return existing, true, nil + } + return nil, false, nil + }, + UpdateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) error { updatedARC = &arc; return nil }, + DeleteFunc: func(_ context.Context, _, _ string) error { deleted = true; return nil }, + } + }) + + // Drop testing2 (explicit delete); keep severity unchanged (no override needed) + updatedRule := copyRule(upOriginalPlatformRule) + updatedRule.Labels = map[string]string{"severity": "critical", "testing2": ""} + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if updatedARC != nil { + t.Error("expected ARC to be deleted, not updated") + } + if !deleted { + t.Error("expected ARC to be deleted when no overrides remain") + } +} + +// --- Validation --- + +func TestUpdatePlatformAlertRule_RejectDropSeverity(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + setupPlatformWithARC(t, mockK8s, func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{} + }) + + updatedRule := copyRule(upOriginalPlatformRule) + updatedRule.Labels = map[string]string{"severity": ""} + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, updatedRule) + if err == nil || !strings.Contains(err.Error(), `label "severity" cannot be dropped`) { + t.Errorf("expected severity drop error, got: %v", err) + } +} + +func TestUpdatePlatformAlertRule_IgnoresProtectedLabels(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + + var createdARC *osmv1.AlertRelabelConfig + setupPlatformWithARC(t, mockK8s, func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { return nil, false, nil }, + CreateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + createdARC = &arc + return &arc, nil + }, + } + }) + + updatedRule := copyRule(upOriginalPlatformRule) + updatedRule.Labels["openshift_io_alert_rule_id"] = "fake" + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + _ = createdARC +} + +func TestUpdatePlatformAlertRule_RejectsAlertNameChange(t *testing.T) { + client, mockK8s := newUpdatePlatformClient(t) + setupPlatformWithARC(t, mockK8s, func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{} + }) + + updatedRule := copyRule(upOriginalPlatformRule) + updatedRule.Labels = map[string]string{"alertname": "NewName"} + + err := client.UpdatePlatformAlertRule(context.Background(), upPlatformRuleId, updatedRule) + if err == nil || !strings.Contains(err.Error(), "immutable") { + t.Errorf("expected immutable alertname error, got: %v", err) + } +} + +// ============================================================ +// Drop/Restore Platform Alert Rule tests +// ============================================================ + +var ( + drOriginalPlatformRule = monitoringv1.Rule{ + Alert: "PlatformAlertDrop", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + "team": "sre", + }, + } + drOriginalPlatformRuleId = alertrule.GetAlertingRuleId(&drOriginalPlatformRule) + + drPlatformRule = monitoringv1.Rule{ + Alert: "PlatformAlertDrop", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + "team": "sre", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule-drop", + k8s.AlertRuleLabelId: drOriginalPlatformRuleId, + }, + } + drPlatformRuleId = alertrule.GetAlertingRuleId(&drPlatformRule) +) + +func newDropRestoreClient(t *testing.T) (management.Client, *testutils.MockClient) { + t.Helper() + mockK8s := &testutils.MockClient{} + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return name == "openshift-monitoring" }, + } + } + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == drPlatformRuleId { + return drPlatformRule, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return makePlatformPR("openshift-monitoring", "platform-rule-drop", drOriginalPlatformRule) + } + return management.New(context.Background(), mockK8s), mockK8s +} + +func TestDropPlatformAlertRule_CreatesARCWithIdStampAndDrop(t *testing.T) { + client, mockK8s := newDropRestoreClient(t) + + var result *osmv1.AlertRelabelConfig + existing := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "arc-platform-rule-drop-xxxx", Namespace: "openshift-monitoring"}, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{{TargetLabel: "component", Replacement: "kube-apiserver", Action: "Replace"}}, + }, + } + + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, ns, name string) (*osmv1.AlertRelabelConfig, bool, error) { + if ns == "openshift-monitoring" && strings.HasPrefix(name, "arc-") { + return existing, true, nil + } + return nil, false, nil + }, + UpdateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) error { result = &arc; return nil }, + CreateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + result = &arc + return &arc, nil + }, + } + } + + err := client.DropPlatformAlertRule(context.Background(), drPlatformRuleId) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == nil { + t.Fatal("expected ARC to be created/updated") + } + if result.Namespace != "openshift-monitoring" || !strings.HasPrefix(result.Name, "arc-") { + t.Errorf("unexpected ARC name/namespace: %s/%s", result.Namespace, result.Name) + } + + var hasPriorReplace, hasIdStamp, hasDrop bool + for _, rc := range result.Spec.Configs { + switch string(rc.Action) { + case "Replace": + if string(rc.TargetLabel) == "component" && rc.Replacement == "kube-apiserver" { + hasPriorReplace = true + } + if string(rc.TargetLabel) == "openshift_io_alert_rule_id" && rc.Replacement == drPlatformRuleId { + hasIdStamp = true + } + case "Drop": + if len(rc.SourceLabels) == 1 && string(rc.SourceLabels[0]) == "openshift_io_alert_rule_id" && rc.Regex == drPlatformRuleId { + hasDrop = true + } + } + } + if !hasPriorReplace { + t.Error("expected prior Replace config to be preserved") + } + if !hasIdStamp { + t.Error("expected id-stamp Replace config") + } + if !hasDrop { + t.Error("expected Drop config") + } +} + +func TestDropPlatformAlertRule_Idempotent(t *testing.T) { + client, mockK8s := newDropRestoreClient(t) + + var stored *osmv1.AlertRelabelConfig + var last *osmv1.AlertRelabelConfig + + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { + if stored == nil { + return nil, false, nil + } + return stored, true, nil + }, + CreateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + stored = &arc + last = &arc + return &arc, nil + }, + UpdateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) error { + stored = &arc + last = &arc + return nil + }, + } + } + + if err := client.DropPlatformAlertRule(context.Background(), drPlatformRuleId); err != nil { + t.Fatalf("first drop: %v", err) + } + cfgCount := len(last.Spec.Configs) + + if err := client.DropPlatformAlertRule(context.Background(), drPlatformRuleId); err != nil { + t.Fatalf("second drop: %v", err) + } + if len(last.Spec.Configs) != cfgCount { + t.Errorf("expected same config count after second drop: got %d, want %d", len(last.Spec.Configs), cfgCount) + } +} + +func TestRestorePlatformAlertRule_DeletesARCWhenOnlyDropRemains(t *testing.T) { + client, mockK8s := newDropRestoreClient(t) + deleted := false + + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + onlyDrop := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "arc-to-delete", Namespace: "openshift-monitoring"}, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + {SourceLabels: []osmv1.LabelName{"openshift_io_alert_rule_id"}, Regex: drPlatformRuleId, Action: "Drop"}, + }, + }, + } + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { + return onlyDrop, true, nil + }, + DeleteFunc: func(_ context.Context, _, _ string) error { deleted = true; return nil }, + UpdateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) error { return errors.New("should not update") }, + } + } + + if err := client.RestorePlatformAlertRule(context.Background(), drPlatformRuleId); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !deleted { + t.Error("expected ARC to be deleted") + } +} + +func TestRestorePlatformAlertRule_KeepsOtherConfigsRemovesDropOnly(t *testing.T) { + client, mockK8s := newDropRestoreClient(t) + deleted := false + var updated *osmv1.AlertRelabelConfig + + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + withOthers := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "arc-keep", Namespace: "openshift-monitoring"}, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + {TargetLabel: "component", Replacement: "kube-apiserver", Action: "Replace"}, + {SourceLabels: []osmv1.LabelName{"openshift_io_alert_rule_id"}, Regex: drPlatformRuleId, Action: "Drop"}, + }, + }, + } + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { + return withOthers, true, nil + }, + DeleteFunc: func(_ context.Context, _, _ string) error { deleted = true; return nil }, + UpdateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) error { updated = &arc; return nil }, + } + } + + if err := client.RestorePlatformAlertRule(context.Background(), drPlatformRuleId); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if deleted { + t.Error("expected ARC to be updated, not deleted") + } + if updated == nil { + t.Fatal("expected ARC to be updated") + } + for _, rc := range updated.Spec.Configs { + if string(rc.Action) == "Drop" { + t.Error("Drop config should have been removed") + } + } + found := false + for _, rc := range updated.Spec.Configs { + if string(rc.Action) == "Replace" && string(rc.TargetLabel) == "component" && rc.Replacement == "kube-apiserver" { + found = true + } + } + if !found { + t.Error("expected Replace config for component to be preserved") + } +} + +func TestRestorePlatformAlertRule_DeletesARCWhenOnlyStampAndDropRemain(t *testing.T) { + client, mockK8s := newDropRestoreClient(t) + deleted := false + var updated *osmv1.AlertRelabelConfig + + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + stampAndDrop := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "arc-stamp-drop", Namespace: "openshift-monitoring"}, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname", "severity", "team"}, + Regex: "^PlatformAlertDrop;warning;sre$", + TargetLabel: "openshift_io_alert_rule_id", + Replacement: drPlatformRuleId, + Action: "Replace", + }, + {SourceLabels: []osmv1.LabelName{"openshift_io_alert_rule_id"}, Regex: drPlatformRuleId, Action: "Drop"}, + }, + }, + } + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(_ context.Context, _, _ string) (*osmv1.AlertRelabelConfig, bool, error) { + return stampAndDrop, true, nil + }, + DeleteFunc: func(_ context.Context, _, _ string) error { deleted = true; return nil }, + UpdateFunc: func(_ context.Context, arc osmv1.AlertRelabelConfig) error { updated = &arc; return nil }, + } + } + + if err := client.RestorePlatformAlertRule(context.Background(), drPlatformRuleId); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !deleted { + t.Error("expected ARC to be deleted when only stamp remains after removing Drop") + } + if updated != nil { + t.Error("ARC should not be updated when deleted") + } +} diff --git a/pkg/management/update_user_defined_alert_rule.go b/pkg/management/update_user_defined_alert_rule.go new file mode 100644 index 000000000..e139d7236 --- /dev/null +++ b/pkg/management/update_user_defined_alert_rule.go @@ -0,0 +1,200 @@ +package management + +import ( + "context" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +func (c *client) UpdateUserDefinedAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) (string, error) { + rule, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found { + return "", &NotFoundError{Resource: "AlertRule", Id: alertRuleId} + } + + namespace := rule.Labels[k8s.PrometheusRuleLabelNamespace] + name := rule.Labels[k8s.PrometheusRuleLabelName] + + // Common preconditions on relabeled rule (labels-based) + if err := validateUserUpdatePreconditions(rule, nil); err != nil { + return "", err + } + + if c.isPlatformManagedPrometheusRule(types.NamespacedName{Namespace: namespace, Name: name}) { + return "", &NotAllowedError{Message: "cannot update alert rule in a platform-managed PrometheusRule"} + } + + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, namespace, name) + if err != nil { + return "", err + } + + if !found { + return "", &NotFoundError{ + Resource: "PrometheusRule", + Id: alertRuleId, + AdditionalInfo: fmt.Sprintf("PrometheusRule %s/%s not found", namespace, name), + } + } + + // After fetching the PR, block edits for operator-managed PrometheusRules (they will be reconciled) + if err := validateUserUpdatePreconditions(rule, pr); err != nil { + return "", err + } + + // Locate the target rule once and update it after validation + var foundGroupIdx, foundRuleIdx int + ruleFound := false + for groupIdx := range pr.Spec.Groups { + for ruleIdx := range pr.Spec.Groups[groupIdx].Rules { + rule := &pr.Spec.Groups[groupIdx].Rules[ruleIdx] + if ruleMatchesAlertRuleID(*rule, alertRuleId) { + foundGroupIdx = groupIdx + foundRuleIdx = ruleIdx + ruleFound = true + break + } + } + if ruleFound { + break + } + } + + if !ruleFound { + return "", &NotFoundError{ + Resource: "AlertRule", + Id: alertRuleId, + AdditionalInfo: fmt.Sprintf("in PrometheusRule %s/%s", namespace, name), + } + } + + // Validate severity if present + if sev, ok := alertRule.Labels["severity"]; ok && sev != "" { + if !isValidSeverity(sev) { + return "", &ValidationError{Message: fmt.Sprintf("invalid severity %q: must be one of critical|warning|info|none", sev)} + } + } + + computedId := alertrule.GetAlertingRuleId(&alertRule) + + // Treat "true clones" (spec-identical rules that compute to the same id) as unsupported. + // If the updated rule would collide with some other existing rule, reject the update. + if computedId != "" && computedId != alertRuleId { + // Check within the same PrometheusRule first (authoritative). + for groupIdx := range pr.Spec.Groups { + for ruleIdx := range pr.Spec.Groups[groupIdx].Rules { + if groupIdx == foundGroupIdx && ruleIdx == foundRuleIdx { + continue + } + existing := pr.Spec.Groups[groupIdx].Rules[ruleIdx] + // Treat "true clones" as unsupported: identical definitions compute to the same id. + if existing.Alert != "" && alertrule.GetAlertingRuleId(&existing) == computedId { + return "", &ConflictError{Message: "alert rule with exact config already exists"} + } + } + } + + _, found := c.k8sClient.RelabeledRules().Get(ctx, computedId) + if found { + return "", &ConflictError{Message: "alert rule with exact config already exists"} + } + } + + if alertRule.Labels == nil { + alertRule.Labels = map[string]string{} + } + alertRule.Labels[k8s.AlertRuleLabelId] = computedId + + // Perform the update in-place exactly once + pr.Spec.Groups[foundGroupIdx].Rules[foundRuleIdx] = alertRule + + err = c.k8sClient.PrometheusRules().Update(ctx, *pr) + if err != nil { + return "", fmt.Errorf("failed to update PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) + } + + if err := c.migrateClassificationOverrideIfRuleIDChanged(ctx, namespace, name, alertRuleId, computedId, alertRule.Alert); err != nil { + return "", err + } + + return computedId, nil +} + +func (c *client) migrateClassificationOverrideIfRuleIDChanged( + ctx context.Context, + ruleNamespace string, + prometheusRuleName string, + oldRuleId string, + newRuleId string, + alertName string, +) error { + if oldRuleId == "" || newRuleId == "" || oldRuleId == newRuleId { + return nil + } + + if !c.enableUserWorkloadARCs { + return nil + } + + oldArcName := k8s.GetAlertRelabelConfigName(prometheusRuleName, oldRuleId) + arcNamespace := k8s.UserWorkloadMonitoringNamespace + + oldArc, found, err := c.k8sClient.AlertRelabelConfigs().Get(ctx, arcNamespace, oldArcName) + if err != nil { + return err + } + if !found { + return nil + } + + existingOverrides, _ := collectExistingFromARC(true, oldArc) + existingRuleDrops := getExistingRuleDrops(oldArc, oldRuleId) + + if err := c.k8sClient.AlertRelabelConfigs().Delete(ctx, arcNamespace, oldArcName); err != nil { + return fmt.Errorf("failed to delete old AlertRelabelConfig %s/%s: %w", arcNamespace, oldArcName, err) + } + + if len(existingOverrides) == 0 && len(existingRuleDrops) == 0 { + return nil + } + + pr, prFound, err := c.k8sClient.PrometheusRules().Get(ctx, ruleNamespace, prometheusRuleName) + if err != nil { + return err + } + if !prFound { + return nil + } + + var updatedRule *monitoringv1.Rule + for groupIdx := range pr.Spec.Groups { + for ruleIdx := range pr.Spec.Groups[groupIdx].Rules { + r := &pr.Spec.Groups[groupIdx].Rules[ruleIdx] + if ruleMatchesAlertRuleID(*r, newRuleId) { + updatedRule = r + break + } + } + if updatedRule != nil { + break + } + } + if updatedRule == nil { + return nil + } + + original := copyStringMap(updatedRule.Labels) + desired := buildDesiredLabels(original, existingOverrides) + nextChanges := buildNextLabelChanges(original, desired) + + newArcName := k8s.GetAlertRelabelConfigName(prometheusRuleName, newRuleId) + relabelConfigs := buildRelabelConfigs(updatedRule.Alert, original, newRuleId, nextChanges) + relabelConfigs = appendPreservedRuleDrops(relabelConfigs, existingRuleDrops) + + return upsertAlertRelabelConfig(c.k8sClient, ctx, arcNamespace, newArcName, prometheusRuleName, updatedRule.Alert, newRuleId, false, nil, relabelConfigs) +} diff --git a/pkg/management/update_user_defined_alert_rule_test.go b/pkg/management/update_user_defined_alert_rule_test.go new file mode 100644 index 000000000..e4a5cf5e0 --- /dev/null +++ b/pkg/management/update_user_defined_alert_rule_test.go @@ -0,0 +1,402 @@ +package management_test + +import ( + "context" + "errors" + "fmt" + "strings" + "testing" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +var ( + // originalUserRule is as stored in the PrometheusRule (without k8s labels). + originalUserRule = monitoringv1.Rule{ + Alert: "UserAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + }, + } + originalUserRuleId = alertrule.GetAlertingRuleId(&originalUserRule) + + // userRule is as seen by RelabeledRules (with k8s labels added). + udUserRule = monitoringv1.Rule{ + Alert: "UserAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", + }, + } + + udPlatformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + }, + } + udPlatformRuleId = alertrule.GetAlertingRuleId(&udPlatformRule) +) + +func newUpdateUserDefinedClient(t *testing.T) (management.Client, *testutils.MockClient) { + t.Helper() + mockK8s := &testutils.MockClient{} + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" + }, + } + } + return management.New(context.Background(), mockK8s), mockK8s +} + +func mockUDRelabeledGet(ruleId string, rule monitoringv1.Rule) func() k8s.RelabeledRulesInterface { + return func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == ruleId { + return rule, true + } + return monitoringv1.Rule{}, false + }, + } + } +} + +func makePRWithRule(ns, name string, rule monitoringv1.Rule) *testutils.MockPrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, prName string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: prName}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "test-group", Rules: []monitoringv1.Rule{rule}}}, + }, + }, true, nil + }, + } +} + +// --- Managed-by enforcement --- + +func TestUpdateUserDefinedAlertRule_BlocksGitOpsManaged(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + gitopsRule := copyRuleWithLabels(udUserRule, managementlabels.RuleManagedByLabel, managementlabels.ManagedByGitOps) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, gitopsRule) + + _, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, udUserRule) + if err == nil || !strings.Contains(err.Error(), "managed by GitOps") { + t.Errorf("expected GitOps block error, got: %v", err) + } +} + +func TestUpdateUserDefinedAlertRule_BlocksOperatorManaged(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + opRule := copyRuleWithLabels(udUserRule, managementlabels.RuleManagedByLabel, managementlabels.ManagedByOperator) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, opRule) + + _, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, udUserRule) + if err == nil || !strings.Contains(err.Error(), "managed by an operator") { + t.Errorf("expected operator block error, got: %v", err) + } +} + +// --- Not found --- + +func TestUpdateUserDefinedAlertRule_NotFound(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, _ string) (monitoringv1.Rule, bool) { return monitoringv1.Rule{}, false }, + } + } + + _, err := client.UpdateUserDefinedAlertRule(context.Background(), "nonexistent-id", udUserRule) + var nf *management.NotFoundError + if !errors.As(err, &nf) || nf.Resource != "AlertRule" { + t.Errorf("expected NotFoundError for AlertRule, got: %v", err) + } +} + +func TestUpdateUserDefinedAlertRule_PlatformRuleReturnsError(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(udPlatformRuleId, udPlatformRule) + + _, err := client.UpdateUserDefinedAlertRule(context.Background(), udPlatformRuleId, udPlatformRule) + if err == nil || !strings.Contains(err.Error(), "cannot update alert rule in a platform-managed PrometheusRule") { + t.Errorf("expected platform rule error, got: %v", err) + } +} + +// --- PrometheusRule errors --- + +func TestUpdateUserDefinedAlertRule_PRNotFound(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, _, _ string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, nil + }, + } + } + + _, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, udUserRule) + var nf *management.NotFoundError + if !errors.As(err, &nf) || nf.Resource != "PrometheusRule" { + t.Errorf("expected NotFoundError for PrometheusRule, got: %v", err) + } +} + +func TestUpdateUserDefinedAlertRule_PRGetError(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, _, _ string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, errors.New("failed to get PrometheusRule") + }, + } + } + + _, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, udUserRule) + if err == nil || !strings.Contains(err.Error(), "failed to get PrometheusRule") { + t.Errorf("expected PR get error, got: %v", err) + } +} + +func TestUpdateUserDefinedAlertRule_RuleNotInPR(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "test-group", Rules: []monitoringv1.Rule{}}}, + }, + }, true, nil + }, + } + } + + _, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, udUserRule) + if err == nil || !strings.Contains(err.Error(), fmt.Sprintf("AlertRule with id %s not found", originalUserRuleId)) { + t.Errorf("expected 'not found in PR' error, got: %v", err) + } +} + +func TestUpdateUserDefinedAlertRule_PRUpdateError(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + pr := makePRWithRule("user-namespace", "user-rule", originalUserRule) + pr.UpdateFunc = func(_ context.Context, _ monitoringv1.PrometheusRule) error { + return errors.New("failed to update PrometheusRule") + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { return pr } + + _, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, originalUserRule) + if err == nil || !strings.Contains(err.Error(), "failed to update PrometheusRule") { + t.Errorf("expected PR update error, got: %v", err) + } +} + +// --- Successful updates --- + +func TestUpdateUserDefinedAlertRule_UpdatesRule(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + + var savedPR *monitoringv1.PrometheusRule + pr := makePRWithRule("user-namespace", "user-rule", originalUserRule) + pr.UpdateFunc = func(_ context.Context, p monitoringv1.PrometheusRule) error { + savedPR = &p + return nil + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { return pr } + + updatedRule := copyRule(originalUserRule) + updatedRule.Labels["severity"] = "critical" + updatedRule.Expr = intstr.FromString("up == 1") + expectedId := alertrule.GetAlertingRuleId(&updatedRule) + + newId, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if newId != expectedId { + t.Errorf("expected newId %q, got %q", expectedId, newId) + } + if savedPR == nil { + t.Fatal("expected PR to be updated") + } + if savedPR.Spec.Groups[0].Rules[0].Labels["severity"] != "critical" { + t.Errorf("expected severity=critical in saved PR") + } +} + +func TestUpdateUserDefinedAlertRule_RuleIdChanges(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + + var savedPR *monitoringv1.PrometheusRule + pr := makePRWithRule("user-namespace", "user-rule", originalUserRule) + pr.UpdateFunc = func(_ context.Context, p monitoringv1.PrometheusRule) error { savedPR = &p; return nil } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { return pr } + + updatedRule := copyRule(originalUserRule) + updatedRule.Labels["severity"] = "critical" + updatedRule.Expr = intstr.FromString("up == 1") + expectedId := alertrule.GetAlertingRuleId(&updatedRule) + + newId, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if newId == originalUserRuleId { + t.Error("expected new ID to differ from original") + } + if newId != expectedId { + t.Errorf("expected new ID %q, got %q", expectedId, newId) + } + if savedPR == nil { + t.Fatal("expected PR to be saved") + } +} + +func TestUpdateUserDefinedAlertRule_OnlyMatchingRuleUpdated(t *testing.T) { + anotherRule := monitoringv1.Rule{Alert: "AnotherAlert", Expr: intstr.FromString("down == 1")} + + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + + var savedPR *monitoringv1.PrometheusRule + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "test-group", Rules: []monitoringv1.Rule{originalUserRule, anotherRule}}}, + }, + }, true, nil + }, + UpdateFunc: func(_ context.Context, p monitoringv1.PrometheusRule) error { savedPR = &p; return nil }, + } + } + + updatedRule := copyRule(originalUserRule) + updatedRule.Labels["severity"] = "info" + expectedId := alertrule.GetAlertingRuleId(&updatedRule) + + newId, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if newId != expectedId { + t.Errorf("expected %q, got %q", expectedId, newId) + } + if len(savedPR.Spec.Groups[0].Rules) != 2 { + t.Fatalf("expected 2 rules, got %d", len(savedPR.Spec.Groups[0].Rules)) + } + if savedPR.Spec.Groups[0].Rules[0].Labels["severity"] != "info" { + t.Error("expected severity=info on first rule") + } + if savedPR.Spec.Groups[0].Rules[1].Alert != "AnotherAlert" { + t.Error("expected second rule to be AnotherAlert") + } +} + +func TestUpdateUserDefinedAlertRule_MultipleGroups(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + + var savedPR *monitoringv1.PrometheusRule + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + {Name: "group1", Rules: []monitoringv1.Rule{}}, + {Name: "group2", Rules: []monitoringv1.Rule{originalUserRule}}, + }, + }, + }, true, nil + }, + UpdateFunc: func(_ context.Context, p monitoringv1.PrometheusRule) error { savedPR = &p; return nil }, + } + } + + updatedRule := copyRule(originalUserRule) + updatedRule.Labels["new_label"] = "new_value" + expectedId := alertrule.GetAlertingRuleId(&updatedRule) + + newId, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, updatedRule) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if newId != expectedId { + t.Errorf("expected %q, got %q", expectedId, newId) + } + if len(savedPR.Spec.Groups) != 2 { + t.Fatalf("expected 2 groups, got %d", len(savedPR.Spec.Groups)) + } + if len(savedPR.Spec.Groups[0].Rules) != 0 { + t.Error("expected group1 to remain empty") + } + if len(savedPR.Spec.Groups[1].Rules) != 1 { + t.Error("expected group2 to have 1 rule") + } + if savedPR.Spec.Groups[1].Rules[0].Labels["new_label"] != "new_value" { + t.Error("expected new_label in group2 rule") + } +} + +// --- Severity validation --- + +func TestUpdateUserDefinedAlertRule_InvalidSeverity(t *testing.T) { + client, mockK8s := newUpdateUserDefinedClient(t) + mockK8s.RelabeledRulesFunc = mockUDRelabeledGet(originalUserRuleId, udUserRule) + pr := makePRWithRule("user-namespace", "user-rule", originalUserRule) + pr.UpdateFunc = func(_ context.Context, _ monitoringv1.PrometheusRule) error { return nil } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { return pr } + + updatedRule := copyRule(originalUserRule) + updatedRule.Labels = map[string]string{"severity": "urgent"} + _, err := client.UpdateUserDefinedAlertRule(context.Background(), originalUserRuleId, updatedRule) + if err == nil || !strings.Contains(err.Error(), "invalid severity") { + t.Errorf("expected invalid severity error, got: %v", err) + } +} + +// --- Helpers --- + +func copyRule(r monitoringv1.Rule) monitoringv1.Rule { + out := r + out.Labels = make(map[string]string) + for k, v := range r.Labels { + out.Labels[k] = v + } + return out +} + +func copyRuleWithLabels(r monitoringv1.Rule, extraKey, extraVal string) monitoringv1.Rule { + out := copyRule(r) + out.Labels[extraKey] = extraVal + return out +} diff --git a/test/e2e/update_alert_rule_test.go b/test/e2e/update_alert_rule_test.go new file mode 100644 index 000000000..bfd6e9dd4 --- /dev/null +++ b/test/e2e/update_alert_rule_test.go @@ -0,0 +1,220 @@ +package e2e + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +func TestUpdateAlertRule_DropRestore(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-update-drop", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + ruleID := createRuleViaAPI(t, f, ctx, testNamespace, "DropRestoreAlert", "e2e-update-pr") + t.Logf("Created rule with ID: %s", ruleID) + + patchDrop(t, f, ctx, ruleID, false) + + arcList, err := f.Osmv1clientset.MonitoringV1().AlertRelabelConfigs(k8s.ClusterMonitoringNamespace).List(ctx, metav1.ListOptions{}) + if err != nil { + t.Fatalf("Failed to list ARCs: %v", err) + } + + var foundDropARC bool + for _, arc := range arcList.Items { + if hasDropActionForRule(arc, ruleID) { + foundDropARC = true + t.Logf("Found ARC %s/%s with drop action for rule %s", arc.Namespace, arc.Name, ruleID) + break + } + } + if !foundDropARC { + t.Fatal("Expected to find an ARC with drop action after disabling rule") + } + + patchDrop(t, f, ctx, ruleID, true) + + arcList, err = f.Osmv1clientset.MonitoringV1().AlertRelabelConfigs(k8s.ClusterMonitoringNamespace).List(ctx, metav1.ListOptions{}) + if err != nil { + t.Fatalf("Failed to list ARCs after restore: %v", err) + } + + for _, arc := range arcList.Items { + if hasDropActionForRule(arc, ruleID) { + t.Errorf("ARC %s/%s still has drop action after restore", arc.Namespace, arc.Name) + } + } + + t.Log("Drop/restore e2e test passed successfully") +} + +func TestUpdateAlertRule_Classification(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-update-class", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + ruleID := createRuleViaAPI(t, f, ctx, testNamespace, "ClassificationAlert", "e2e-update-pr") + t.Logf("Created rule with ID: %s", ruleID) + + component := "networking" + layer := "cluster" + classificationPatch := managementrouter.AlertRuleClassificationPatch{ + Component: &component, + ComponentSet: true, + Layer: &layer, + LayerSet: true, + } + + patchReq := managementrouter.BulkUpdateAlertRulesRequest{ + RuleIds: []string{ruleID}, + Classification: &classificationPatch, + } + + reqBody, err := json.Marshal(patchReq) + if err != nil { + t.Fatalf("Failed to marshal classification patch: %v", err) + } + + patchURL := f.PluginURL + "/api/v1/alerting/rules" + req, err := http.NewRequestWithContext(ctx, http.MethodPatch, patchURL, bytes.NewBuffer(reqBody)) + if err != nil { + t.Fatalf("Failed to create patch request: %v", err) + } + req.Header.Set("Content-Type", "application/json") + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + t.Fatalf("Failed to make classification patch request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Expected status 200, got %d. Body: %s", resp.StatusCode, string(body)) + } + + var patchResp managementrouter.BulkUpdateAlertRulesResponse + if err := json.NewDecoder(resp.Body).Decode(&patchResp); err != nil { + t.Fatalf("Failed to decode patch response: %v", err) + } + + if len(patchResp.Rules) != 1 { + t.Fatalf("Expected 1 rule result, got %d", len(patchResp.Rules)) + } + if patchResp.Rules[0].StatusCode != http.StatusNoContent { + t.Fatalf("Expected per-rule status 204, got %d: %v", + patchResp.Rules[0].StatusCode, patchResp.Rules[0].Message) + } + + arcList, err := f.Osmv1clientset.MonitoringV1().AlertRelabelConfigs(k8s.ClusterMonitoringNamespace).List(ctx, metav1.ListOptions{}) + if err != nil { + t.Fatalf("Failed to list ARCs after classification: %v", err) + } + + var foundClassificationARC bool + for _, arc := range arcList.Items { + if hasClassificationForRule(arc, "networking", "cluster") { + foundClassificationARC = true + t.Logf("Found ARC %s/%s with classification labels", arc.Namespace, arc.Name) + break + } + } + if !foundClassificationARC { + t.Fatal("Expected to find an ARC with classification relabel configs") + } + + t.Log("Classification e2e test passed successfully") +} + +func patchDrop(t *testing.T, f *framework.Framework, ctx context.Context, ruleID string, enable bool) { + t.Helper() + + patchReq := managementrouter.BulkUpdateAlertRulesRequest{ + RuleIds: []string{ruleID}, + AlertingRuleEnabled: &enable, + } + + reqBody, err := json.Marshal(patchReq) + if err != nil { + t.Fatalf("Failed to marshal drop/restore patch: %v", err) + } + + patchURL := f.PluginURL + "/api/v1/alerting/rules" + req, err := http.NewRequestWithContext(ctx, http.MethodPatch, patchURL, bytes.NewBuffer(reqBody)) + if err != nil { + t.Fatalf("Failed to create patch request: %v", err) + } + req.Header.Set("Content-Type", "application/json") + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + t.Fatalf("Failed to make drop/restore request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Drop/restore: expected 200, got %d. Body: %s", resp.StatusCode, string(body)) + } +} + +func hasDropActionForRule(arc osmv1.AlertRelabelConfig, ruleID string) bool { + hasRuleID := arc.Annotations[managementlabels.ARCAnnotationAlertRuleIDKey] == ruleID + if !hasRuleID { + return false + } + for _, cfg := range arc.Spec.Configs { + if cfg.Action == "Drop" { + return true + } + } + return false +} + +func hasClassificationForRule(arc osmv1.AlertRelabelConfig, component, layer string) bool { + for _, cfg := range arc.Spec.Configs { + if cfg.TargetLabel == "openshift_io_alert_rule_component" && cfg.Replacement == component { + return true + } + if cfg.TargetLabel == "openshift_io_alert_rule_layer" && cfg.Replacement == layer { + return true + } + } + return false +}