From 8c8f53083ccc4c9e180ec90a5d978aaa6cfa50c6 Mon Sep 17 00:00:00 2001 From: Shirly Radco Date: Thu, 12 Mar 2026 19:43:37 +0200 Subject: [PATCH] management: add delete alert rule API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add DELETE /api/v1/alerting/rules endpoint for bulk deletion of user-defined alert rules with full test coverage. Signed-off-by: Shirly Radco Signed-off-by: João Vilaça Signed-off-by: Aviv Litman Co-authored-by: AI Assistant --- .github/workflows/unit-tests.yaml | 21 - api/openapi.yaml | 77 +++ internal/managementrouter/api_generated.go | 46 ++ internal/managementrouter/router.go | 15 + .../user_defined_alert_rule_bulk_delete.go | 56 ++ ...ser_defined_alert_rule_bulk_delete_test.go | 226 ++++++++ pkg/management/alert_rule_id_match.go | 49 ++ pkg/management/alert_rule_preconditions.go | 48 ++ .../delete_user_defined_alert_rule_by_id.go | 156 ++++++ ...lete_user_defined_alert_rule_by_id_test.go | 504 ++++++++++++++++++ pkg/management/types.go | 3 + test/e2e/delete_alert_rule_test.go | 106 ++++ test/e2e/helpers_test.go | 67 +++ 13 files changed, 1353 insertions(+), 21 deletions(-) delete mode 100644 .github/workflows/unit-tests.yaml create mode 100644 internal/managementrouter/user_defined_alert_rule_bulk_delete.go create mode 100644 internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go create mode 100644 pkg/management/alert_rule_id_match.go create mode 100644 pkg/management/alert_rule_preconditions.go create mode 100644 pkg/management/delete_user_defined_alert_rule_by_id.go create mode 100644 pkg/management/delete_user_defined_alert_rule_by_id_test.go create mode 100644 test/e2e/delete_alert_rule_test.go diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml deleted file mode 100644 index 8a29befa9..000000000 --- a/.github/workflows/unit-tests.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: Unit Tests - -on: - pull_request: - branches: - - add-alert-management-api-base - -jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - - - name: Run tests - run: go test -count=1 $(go list ./... | grep -v /test/e2e) diff --git a/api/openapi.yaml b/api/openapi.yaml index 758cac0a3..4deda8b93 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -12,6 +12,44 @@ servers: paths: /rules: + delete: + operationId: BulkDeleteUserDefinedAlertRules + summary: Bulk delete user-defined alert rules + description: > + Deletes one or more user-defined alert rules by their stable IDs. + Each rule is deleted independently; per-rule status is returned in + the response so partial success is visible to the caller. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/BulkDeleteAlertRulesRequest" + responses: + "200": + description: Deletion results (may include per-rule errors) + content: + application/json: + schema: + $ref: "#/components/schemas/BulkDeleteAlertRulesResponse" + "400": + description: Invalid request body + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "401": + description: Missing or invalid authorization token + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "500": + description: Unexpected server error + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" post: operationId: CreateAlertRule summary: Create an alert rule @@ -141,6 +179,45 @@ components: type: string description: Computed stable ID for the created alert rule. + BulkDeleteAlertRulesRequest: + type: object + required: + - ruleIds + properties: + ruleIds: + type: array + minItems: 1 + items: + type: string + description: List of stable alert rule IDs to delete. + + DeleteAlertRuleResult: + type: object + required: + - id + - status_code + properties: + id: + type: string + description: The stable alert rule ID that was processed. + status_code: + type: integer + description: HTTP status code for this rule's deletion result. + message: + type: string + description: Error message if deletion failed; omitted on success. + + BulkDeleteAlertRulesResponse: + type: object + required: + - rules + properties: + rules: + type: array + items: + $ref: "#/components/schemas/DeleteAlertRuleResult" + description: Per-rule deletion results. + ErrorResponse: type: object required: diff --git a/internal/managementrouter/api_generated.go b/internal/managementrouter/api_generated.go index 555d00eaf..a886b9d8d 100644 --- a/internal/managementrouter/api_generated.go +++ b/internal/managementrouter/api_generated.go @@ -34,6 +34,18 @@ type AlertRuleSpec struct { Record *string `json:"record,omitempty"` } +// BulkDeleteAlertRulesRequest defines model for BulkDeleteAlertRulesRequest. +type BulkDeleteAlertRulesRequest struct { + // RuleIds List of stable alert rule IDs to delete. + RuleIds []string `json:"ruleIds"` +} + +// BulkDeleteAlertRulesResponse defines model for BulkDeleteAlertRulesResponse. +type BulkDeleteAlertRulesResponse struct { + // Rules Per-rule deletion results. + Rules []DeleteAlertRuleResult `json:"rules"` +} + // CreateAlertRuleRequest defines model for CreateAlertRuleRequest. type CreateAlertRuleRequest struct { // AlertingRule Specification of a Prometheus alerting or recording rule. Maps to prometheus-operator Rule fields. @@ -49,6 +61,18 @@ type CreateAlertRuleResponse struct { Id string `json:"id"` } +// DeleteAlertRuleResult defines model for DeleteAlertRuleResult. +type DeleteAlertRuleResult struct { + // Id The stable alert rule ID that was processed. + Id string `json:"id"` + + // Message Error message if deletion failed; omitted on success. + Message *string `json:"message,omitempty"` + + // StatusCode HTTP status code for this rule's deletion result. + StatusCode int `json:"status_code"` +} + // ErrorResponse defines model for ErrorResponse. type ErrorResponse struct { // Error Human-readable error message. @@ -67,11 +91,17 @@ type PrometheusRuleTarget struct { PrometheusRuleNamespace string `json:"prometheusRuleNamespace"` } +// BulkDeleteUserDefinedAlertRulesJSONRequestBody defines body for BulkDeleteUserDefinedAlertRules for application/json ContentType. +type BulkDeleteUserDefinedAlertRulesJSONRequestBody = BulkDeleteAlertRulesRequest + // CreateAlertRuleJSONRequestBody defines body for CreateAlertRule for application/json ContentType. type CreateAlertRuleJSONRequestBody = CreateAlertRuleRequest // ServerInterface represents all server handlers. type ServerInterface interface { + // Bulk delete user-defined alert rules + // (DELETE /rules) + BulkDeleteUserDefinedAlertRules(w http.ResponseWriter, r *http.Request) // Create an alert rule // (POST /rules) CreateAlertRule(w http.ResponseWriter, r *http.Request) @@ -86,6 +116,20 @@ type ServerInterfaceWrapper struct { type MiddlewareFunc func(http.Handler) http.Handler +// BulkDeleteUserDefinedAlertRules operation middleware +func (siw *ServerInterfaceWrapper) BulkDeleteUserDefinedAlertRules(w http.ResponseWriter, r *http.Request) { + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.BulkDeleteUserDefinedAlertRules(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + // CreateAlertRule operation middleware func (siw *ServerInterfaceWrapper) CreateAlertRule(w http.ResponseWriter, r *http.Request) { @@ -213,6 +257,8 @@ func HandlerWithOptions(si ServerInterface, options GorillaServerOptions) http.H ErrorHandlerFunc: options.ErrorHandlerFunc, } + r.HandleFunc(options.BaseURL+"/rules", wrapper.BulkDeleteUserDefinedAlertRules).Methods("DELETE") + r.HandleFunc(options.BaseURL+"/rules", wrapper.CreateAlertRule).Methods("POST") return r diff --git a/internal/managementrouter/router.go b/internal/managementrouter/router.go index 85c7a7b31..443f4d889 100644 --- a/internal/managementrouter/router.go +++ b/internal/managementrouter/router.go @@ -7,7 +7,10 @@ package managementrouter import ( "encoding/json" "errors" + "fmt" "net/http" + "net/url" + "strings" "github.com/gorilla/mux" "github.com/sirupsen/logrus" @@ -98,3 +101,15 @@ func parseError(err error) (int, string) { log.WithError(err).Error("unexpected management API error") return http.StatusInternalServerError, "An unexpected error occurred" } + +func parseParam(raw string, name string) (string, error) { + decoded, err := url.PathUnescape(raw) + if err != nil { + return "", fmt.Errorf("invalid %s encoding", name) + } + value := strings.TrimSpace(decoded) + if value == "" { + return "", fmt.Errorf("missing %s", name) + } + return value, nil +} diff --git a/internal/managementrouter/user_defined_alert_rule_bulk_delete.go b/internal/managementrouter/user_defined_alert_rule_bulk_delete.go new file mode 100644 index 000000000..167ccceaf --- /dev/null +++ b/internal/managementrouter/user_defined_alert_rule_bulk_delete.go @@ -0,0 +1,56 @@ +package managementrouter + +import ( + "encoding/json" + "net/http" +) + +// BulkDeleteUserDefinedAlertRules implements ServerInterface. +func (hr *httpRouter) BulkDeleteUserDefinedAlertRules(w http.ResponseWriter, req *http.Request) { + req.Body = http.MaxBytesReader(w, req.Body, maxRequestBodyBytes) + + var payload BulkDeleteAlertRulesRequest + if err := json.NewDecoder(req.Body).Decode(&payload); err != nil { + writeError(w, http.StatusBadRequest, "invalid request body") + return + } + if len(payload.RuleIds) == 0 { + writeError(w, http.StatusBadRequest, "ruleIds is required") + return + } + + results := make([]DeleteAlertRuleResult, 0, len(payload.RuleIds)) + + for _, rawId := range payload.RuleIds { + id, err := parseParam(rawId, "ruleId") + if err != nil { + msg := err.Error() + results = append(results, DeleteAlertRuleResult{ + Id: rawId, + StatusCode: http.StatusBadRequest, + Message: &msg, + }) + continue + } + + if err := hr.managementClient.DeleteUserDefinedAlertRuleById(req.Context(), id); err != nil { + status, message := parseError(err) + results = append(results, DeleteAlertRuleResult{ + Id: id, + StatusCode: status, + Message: &message, + }) + continue + } + results = append(results, DeleteAlertRuleResult{ + Id: id, + StatusCode: http.StatusNoContent, + }) + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + if err := json.NewEncoder(w).Encode(BulkDeleteAlertRulesResponse{Rules: results}); err != nil { + log.WithError(err).Warn("failed to encode bulk delete response") + } +} diff --git a/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go b/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go new file mode 100644 index 000000000..07d90f7dd --- /dev/null +++ b/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go @@ -0,0 +1,226 @@ +package managementrouter_test + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +// deleteRuleTestVars holds the shared rule fixtures used across delete tests. +type deleteRuleTestVars struct { + userRule1Id string + userRule2Id string + platformRuleId string + router http.Handler +} + +func newDeleteRuleRouter(t *testing.T) deleteRuleTestVars { + t.Helper() + + userRule1Name := "u1" + userRule1 := monitoringv1.Rule{Alert: userRule1Name, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr"}} + userRule1Id := alertrule.GetAlertingRuleId(&userRule1) + + userRule2Name := "u2" + userRule2 := monitoringv1.Rule{Alert: userRule2Name, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr"}} + userRule2Id := alertrule.GetAlertingRuleId(&userRule2) + + platformRuleName := "platform" + platformRule := monitoringv1.Rule{Alert: platformRuleName, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "platform-namespace-1", k8s.PrometheusRuleLabelName: "platform-pr"}} + platformRuleId := alertrule.GetAlertingRuleId(&platformRule) + + mockK8s := &testutils.MockClient{} + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + {Rules: []monitoringv1.Rule{userRule1, userRule2, platformRule}}, + }, + }, + }, true, nil + }, + DeleteFunc: func(_ context.Context, _, _ string) error { return nil }, + UpdateFunc: func(_ context.Context, _ monitoringv1.PrometheusRule) error { return nil }, + } + } + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + switch id { + case userRule1Id: + return userRule1, true + case userRule2Id: + return userRule2, true + case platformRuleId: + return platformRule, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.AlertingRulesFunc = func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, name string) (*osmv1.AlertingRule, bool, error) { + if name == "platform-alert-rules" { + return &osmv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "platform-alert-rules", + Namespace: k8s.ClusterMonitoringNamespace, + }, + Spec: osmv1.AlertingRuleSpec{ + Groups: []osmv1.RuleGroup{ + {Name: "test-group", Rules: []osmv1.Rule{{Alert: platformRuleName}}}, + }, + }, + }, true, nil + } + return nil, false, nil + }, + UpdateFunc: func(_ context.Context, _ osmv1.AlertingRule) error { return nil }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return strings.HasPrefix(name, "platform-namespace-") + }, + } + } + + mgmt := management.New(context.Background(), mockK8s) + r := managementrouter.New(mgmt) + + return deleteRuleTestVars{ + userRule1Id: userRule1Id, + userRule2Id: userRule2Id, + platformRuleId: platformRuleId, + router: r, + } +} + +func deleteRequest(t *testing.T, router http.Handler, body any) *httptest.ResponseRecorder { + t.Helper() + buf, err := json.Marshal(body) + if err != nil { + t.Fatalf("marshal request body: %v", err) + } + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewReader(buf)) + req.Header.Set("Authorization", "Bearer test-token") + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + return w +} + +func TestBulkDeleteUserDefinedAlertRules_MixedResults(t *testing.T) { + tv := newDeleteRuleRouter(t) + + w := deleteRequest(t, tv.router, map[string]any{ + "ruleIds": []string{tv.userRule1Id, tv.platformRuleId, ""}, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + + var resp struct { + Rules []struct { + Id string `json:"id"` + StatusCode int `json:"status_code"` + Message string `json:"message"` + } `json:"rules"` + } + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode response: %v", err) + } + if len(resp.Rules) != 3 { + t.Fatalf("expected 3 results, got %d", len(resp.Rules)) + } + if resp.Rules[0].Id != tv.userRule1Id || resp.Rules[0].StatusCode != http.StatusNoContent { + t.Errorf("rule[0]: want id=%s status=204, got id=%s status=%d msg=%s", tv.userRule1Id, resp.Rules[0].Id, resp.Rules[0].StatusCode, resp.Rules[0].Message) + } + if resp.Rules[1].Id != tv.platformRuleId || resp.Rules[1].StatusCode != http.StatusNoContent { + t.Errorf("rule[1]: want id=%s status=204, got id=%s status=%d msg=%s", tv.platformRuleId, resp.Rules[1].Id, resp.Rules[1].StatusCode, resp.Rules[1].Message) + } + if resp.Rules[2].Id != "" || resp.Rules[2].StatusCode != http.StatusBadRequest { + t.Errorf("rule[2]: want id='' status=400, got id=%s status=%d", resp.Rules[2].Id, resp.Rules[2].StatusCode) + } + if !strings.Contains(resp.Rules[2].Message, "missing ruleId") { + t.Errorf("rule[2]: want 'missing ruleId' in message, got %q", resp.Rules[2].Message) + } +} + +func TestBulkDeleteUserDefinedAlertRules_AllSucceed(t *testing.T) { + tv := newDeleteRuleRouter(t) + + w := deleteRequest(t, tv.router, map[string]any{ + "ruleIds": []string{tv.userRule1Id, tv.userRule2Id}, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + var resp struct { + Rules []struct { + Id string `json:"id"` + StatusCode int `json:"status_code"` + Message string `json:"message"` + } `json:"rules"` + } + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode response: %v", err) + } + if len(resp.Rules) != 2 { + t.Fatalf("expected 2 results, got %d", len(resp.Rules)) + } + for i, rule := range resp.Rules { + if rule.StatusCode != http.StatusNoContent { + t.Errorf("rule[%d]: expected 204, got %d: %s", i, rule.StatusCode, rule.Message) + } + } +} + +func TestBulkDeleteUserDefinedAlertRules_InvalidBody(t *testing.T) { + tv := newDeleteRuleRouter(t) + + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewBufferString("{")) + req.Header.Set("Authorization", "Bearer test-token") + w := httptest.NewRecorder() + tv.router.ServeHTTP(w, req) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } + if !strings.Contains(w.Body.String(), "invalid request body") { + t.Errorf("expected 'invalid request body', got: %s", w.Body.String()) + } +} + +func TestBulkDeleteUserDefinedAlertRules_EmptyRuleIds(t *testing.T) { + tv := newDeleteRuleRouter(t) + + w := deleteRequest(t, tv.router, map[string]interface{}{"ruleIds": []string{}}) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } + if !strings.Contains(w.Body.String(), "ruleIds is required") { + t.Errorf("expected 'ruleIds is required', got: %s", w.Body.String()) + } +} diff --git a/pkg/management/alert_rule_id_match.go b/pkg/management/alert_rule_id_match.go new file mode 100644 index 000000000..b202b16dd --- /dev/null +++ b/pkg/management/alert_rule_id_match.go @@ -0,0 +1,49 @@ +package management + +import ( + "context" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" +) + +// ruleMatchesAlertRuleID returns true when the provided rule's computed, deterministic +// alert rule id matches the requested id. +// +// Note: we intentionally compute the id from the rule spec rather than trusting any +// label value, since labels can be user-controlled/tampered with. +func ruleMatchesAlertRuleID(rule monitoringv1.Rule, alertRuleId string) bool { + return alertRuleId != "" && alertRuleId == alertrule.GetAlertingRuleId(&rule) +} + +func (c *client) getOriginalPlatformRule(ctx context.Context, namespace string, name string, alertRuleId string) (*monitoringv1.Rule, error) { + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, namespace, name) + if err != nil { + return nil, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", namespace, name, err) + } + + if !found { + return nil, &NotFoundError{ + Resource: "PrometheusRule", + Id: alertRuleId, + AdditionalInfo: fmt.Sprintf("PrometheusRule %s/%s not found", namespace, name), + } + } + + for groupIdx := range pr.Spec.Groups { + for ruleIdx := range pr.Spec.Groups[groupIdx].Rules { + rule := &pr.Spec.Groups[groupIdx].Rules[ruleIdx] + if ruleMatchesAlertRuleID(*rule, alertRuleId) { + return rule, nil + } + } + } + + return nil, &NotFoundError{ + Resource: "AlertRule", + Id: alertRuleId, + AdditionalInfo: fmt.Sprintf("in PrometheusRule %s/%s", namespace, name), + } +} diff --git a/pkg/management/alert_rule_preconditions.go b/pkg/management/alert_rule_preconditions.go new file mode 100644 index 000000000..3e730156c --- /dev/null +++ b/pkg/management/alert_rule_preconditions.go @@ -0,0 +1,48 @@ +package management + +import ( + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +func notAllowedGitOpsRemove() error { + return &NotAllowedError{Message: "This alert is managed by GitOps; remove it in Git."} +} +func notAllowedOperatorDelete() error { + return &NotAllowedError{Message: "This alert is managed by an operator; it can't be deleted and can only be silenced."} +} + +func isRuleManagedByGitOpsLabel(relabeled monitoringv1.Rule) bool { + if relabeled.Labels == nil { + return false + } + return relabeled.Labels[managementlabels.RuleManagedByLabel] == managementlabels.ManagedByGitOps +} + +func isRuleManagedByOperator(relabeled monitoringv1.Rule) bool { + return relabeled.Labels != nil && relabeled.Labels[managementlabels.RuleManagedByLabel] == managementlabels.ManagedByOperator +} + +func validateUserDeletePreconditions(relabeled monitoringv1.Rule) error { + if isRuleManagedByGitOpsLabel(relabeled) { + return notAllowedGitOpsRemove() + } + if isRuleManagedByOperator(relabeled) { + return notAllowedOperatorDelete() + } + return nil +} + +func validatePlatformDeletePreconditions(ar *osmv1.AlertingRule) error { + if ar != nil { + if gitOpsManaged, operatorManaged := k8s.IsExternallyManagedObject(ar); gitOpsManaged { + return notAllowedGitOpsRemove() + } else if operatorManaged { + return notAllowedOperatorDelete() + } + } + return nil +} diff --git a/pkg/management/delete_user_defined_alert_rule_by_id.go b/pkg/management/delete_user_defined_alert_rule_by_id.go new file mode 100644 index 000000000..e2848e8a3 --- /dev/null +++ b/pkg/management/delete_user_defined_alert_rule_by_id.go @@ -0,0 +1,156 @@ +package management + +import ( + "context" + "fmt" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/managementlabels" +) + +func (c *client) DeleteUserDefinedAlertRuleById(ctx context.Context, alertRuleId string) error { + rule, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found { + return &NotFoundError{Resource: "AlertRule", Id: alertRuleId} + } + + namespace := rule.Labels[k8s.PrometheusRuleLabelNamespace] + name := rule.Labels[k8s.PrometheusRuleLabelName] + + // Disallow deleting any GitOps-managed rule + if err := validateUserDeletePreconditions(rule); err != nil { + return err + } + + if c.isPlatformManagedPrometheusRule(types.NamespacedName{Namespace: namespace, Name: name}) { + return c.deletePlatformAlertRuleById(ctx, rule, alertRuleId) + } + + // user-source branch: preconditions were validated above + + return c.deleteUserAlertRuleById(ctx, namespace, name, alertRuleId) +} + +func (c *client) filterRulesById(rules []monitoringv1.Rule, alertRuleId string, updated *bool) []monitoringv1.Rule { + var newRules []monitoringv1.Rule + + for _, rule := range rules { + if ruleMatchesAlertRuleID(rule, alertRuleId) { + *updated = true + continue + } + newRules = append(newRules, rule) + } + + return newRules +} + +// deletePlatformAlertRuleById deletes a platform rule from its owning AlertingRule CR. +func (c *client) deletePlatformAlertRuleById(ctx context.Context, relabeled monitoringv1.Rule, alertRuleId string) error { + namespace := relabeled.Labels[k8s.PrometheusRuleLabelNamespace] + name := relabeled.Labels[k8s.PrometheusRuleLabelName] + + // Delete from owning AlertingRule + arName := relabeled.Labels[managementlabels.AlertingRuleLabelName] + if arName == "" { + arName = defaultAlertingRuleName + } + ar, found, err := c.k8sClient.AlertingRules().Get(ctx, arName) + if err != nil { + return fmt.Errorf("failed to get AlertingRule %s: %w", arName, err) + } + if !found || ar == nil { + return &NotFoundError{Resource: "AlertingRule", Id: arName} + } + // Common preconditions for platform delete + if err := validatePlatformDeletePreconditions(ar); err != nil { + return err + } + + // Find original platform rule for reliable match by alert name + originalRule, err := c.getOriginalPlatformRule(ctx, namespace, name, alertRuleId) + if err != nil { + return err + } + + updated, newGroups := removeAlertFromAlertingRuleGroups(ar.Spec.Groups, originalRule.Alert) + if !updated { + return &NotFoundError{ + Resource: "AlertRule", + Id: alertRuleId, + AdditionalInfo: fmt.Sprintf("alert %q not found in AlertingRule %s", originalRule.Alert, arName), + } + } + ar.Spec.Groups = newGroups + if err := c.k8sClient.AlertingRules().Update(ctx, *ar); err != nil { + return fmt.Errorf("failed to update AlertingRule %s: %w", ar.Name, err) + } + return nil +} + +// deleteUserAlertRuleById deletes a user-sourced rule from its PrometheusRule. +func (c *client) deleteUserAlertRuleById(ctx context.Context, namespace, name, alertRuleId string) error { + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, namespace, name) + if err != nil { + return err + } + if !found { + return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", namespace, name)} + } + + updated := false + var newGroups []monitoringv1.RuleGroup + for _, group := range pr.Spec.Groups { + newRules := c.filterRulesById(group.Rules, alertRuleId, &updated) + if len(newRules) > 0 { + group.Rules = newRules + newGroups = append(newGroups, group) + } else if len(newRules) != len(group.Rules) { + updated = true + } + } + if !updated { + return &NotFoundError{Resource: "AlertRule", Id: alertRuleId, AdditionalInfo: "rule not found in the given PrometheusRule"} + } + + if len(newGroups) == 0 { + if err := c.k8sClient.PrometheusRules().Delete(ctx, pr.Namespace, pr.Name); err != nil { + return fmt.Errorf("failed to delete PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) + } + return nil + } + + pr.Spec.Groups = newGroups + if err := c.k8sClient.PrometheusRules().Update(ctx, *pr); err != nil { + return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) + } + return nil +} + +// removeAlertFromAlertingRuleGroups removes all instances of an alert by alert name across groups. +// Returns whether any change occurred and the resulting groups (dropping empty groups). +func removeAlertFromAlertingRuleGroups(groups []osmv1.RuleGroup, alertName string) (bool, []osmv1.RuleGroup) { + updated := false + newGroups := make([]osmv1.RuleGroup, 0, len(groups)) + for _, g := range groups { + var kept []osmv1.Rule + for _, r := range g.Rules { + if r.Alert == alertName { + updated = true + continue + } + kept = append(kept, r) + } + if len(kept) > 0 { + g.Rules = kept + newGroups = append(newGroups, g) + } else if len(g.Rules) > 0 { + updated = true + } + } + return updated, newGroups +} diff --git a/pkg/management/delete_user_defined_alert_rule_by_id_test.go b/pkg/management/delete_user_defined_alert_rule_by_id_test.go new file mode 100644 index 000000000..18b67604f --- /dev/null +++ b/pkg/management/delete_user_defined_alert_rule_by_id_test.go @@ -0,0 +1,504 @@ +package management_test + +import ( + "context" + "errors" + "strings" + "testing" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var ( + deleteUserRule1 = monitoringv1.Rule{ + Alert: "UserAlert1", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", + }, + } + deleteUserRule1Id = alertrule.GetAlertingRuleId(&deleteUserRule1) + + deleteUserRule2 = monitoringv1.Rule{ + Alert: "UserAlert2", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", + }, + } + + deletePlatformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + }, + } + deletePlatformRuleId = alertrule.GetAlertingRuleId(&deletePlatformRule) +) + +func newDeleteClient(mockK8s *testutils.MockClient) management.Client { + return management.New(context.Background(), mockK8s) +} + +func TestDeleteUserDefinedAlertRuleById_NotFoundInRelabeledRules(t *testing.T) { + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, _ string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } + } + client := newDeleteClient(mockK8s) + + err := client.DeleteUserDefinedAlertRuleById(context.Background(), "nonexistent-id") + if err == nil { + t.Fatal("expected error, got nil") + } + var notFoundErr *management.NotFoundError + if !errors.As(err, ¬FoundErr) { + t.Fatalf("expected NotFoundError, got %T: %v", err, err) + } + if notFoundErr.Resource != "AlertRule" { + t.Errorf("expected Resource='AlertRule', got %q", notFoundErr.Resource) + } + if notFoundErr.Id != "nonexistent-id" { + t.Errorf("expected Id='nonexistent-id', got %q", notFoundErr.Id) + } +} + +func TestDeleteUserDefinedAlertRuleById_PlatformRuleNotOperatorManaged(t *testing.T) { + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deletePlatformRuleId { + return deletePlatformRule, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" + }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "test-group", Rules: []monitoringv1.Rule{deletePlatformRule}}}, + }, + }, true, nil + }, + } + } + mockK8s.AlertingRulesFunc = func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, name string) (*osmv1.AlertingRule, bool, error) { + if name == "platform-alert-rules" { + return &osmv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "platform-alert-rules", + Namespace: k8s.ClusterMonitoringNamespace, + }, + Spec: osmv1.AlertingRuleSpec{ + Groups: []osmv1.RuleGroup{ + {Name: "test-group", Rules: []osmv1.Rule{{Alert: deletePlatformRule.Alert}}}, + }, + }, + }, true, nil + } + return nil, false, nil + }, + UpdateFunc: func(_ context.Context, _ osmv1.AlertingRule) error { return nil }, + } + } + + if err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deletePlatformRuleId); err != nil { + t.Fatalf("expected no error, got: %v", err) + } +} + +func TestDeleteUserDefinedAlertRuleById_PlatformRuleGitOpsManaged(t *testing.T) { + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deletePlatformRuleId { + return deletePlatformRule, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return name == "openshift-monitoring" }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "grp", Rules: []monitoringv1.Rule{deletePlatformRule}}}, + }, + }, true, nil + }, + } + } + mockK8s.AlertingRulesFunc = func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, _ string) (*osmv1.AlertingRule, bool, error) { + return &osmv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"argocd.argoproj.io/tracking-id": "gitops"}, + }, + Spec: osmv1.AlertingRuleSpec{ + Groups: []osmv1.RuleGroup{{Name: "grp", Rules: []osmv1.Rule{{Alert: deletePlatformRule.Alert}}}}, + }, + }, true, nil + }, + } + } + + err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deletePlatformRuleId) + if err == nil { + t.Fatal("expected error for GitOps-managed rule") + } + if !errors.As(err, new(*management.NotAllowedError)) { + t.Errorf("expected NotAllowedError, got %T: %v", err, err) + } +} + +func TestDeleteUserDefinedAlertRuleById_PlatformRuleOperatorManaged(t *testing.T) { + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deletePlatformRuleId { + return deletePlatformRule, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { return name == "openshift-monitoring" }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "grp", Rules: []monitoringv1.Rule{deletePlatformRule}}}, + }, + }, true, nil + }, + } + } + controller := true + mockK8s.AlertingRulesFunc = func() k8s.AlertingRuleInterface { + return &testutils.MockAlertingRuleInterface{ + GetFunc: func(_ context.Context, _ string) (*osmv1.AlertingRule, bool, error) { + return &osmv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + {Kind: "SomeOperatorKind", Name: "operator", Controller: &controller}, + }, + }, + Spec: osmv1.AlertingRuleSpec{ + Groups: []osmv1.RuleGroup{{Name: "grp", Rules: []osmv1.Rule{{Alert: deletePlatformRule.Alert}}}}, + }, + }, true, nil + }, + } + } + + err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deletePlatformRuleId) + if err == nil { + t.Fatal("expected error for operator-managed rule") + } + if !errors.As(err, new(*management.NotAllowedError)) { + t.Errorf("expected NotAllowedError, got %T: %v", err, err) + } +} + +func TestDeleteUserDefinedAlertRuleById_PrometheusRuleNotFound(t *testing.T) { + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deleteUserRule1Id { + return deleteUserRule1, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return false }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, _, _ string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, nil + }, + } + } + + err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deleteUserRule1Id) + if err == nil { + t.Fatal("expected error") + } + var notFoundErr *management.NotFoundError + if !errors.As(err, ¬FoundErr) { + t.Fatalf("expected NotFoundError, got %T: %v", err, err) + } + if notFoundErr.Resource != "PrometheusRule" { + t.Errorf("expected Resource='PrometheusRule', got %q", notFoundErr.Resource) + } +} + +func TestDeleteUserDefinedAlertRuleById_PrometheusRuleGetError(t *testing.T) { + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deleteUserRule1Id { + return deleteUserRule1, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return false }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, _, _ string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, errors.New("failed to get PrometheusRule") + }, + } + } + + err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deleteUserRule1Id) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "failed to get PrometheusRule") { + t.Errorf("expected 'failed to get PrometheusRule' in error, got: %v", err) + } +} + +func TestDeleteUserDefinedAlertRuleById_RuleNotInPrometheusRule(t *testing.T) { + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deleteUserRule1Id { + return deleteUserRule1, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return false }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "test-group", Rules: []monitoringv1.Rule{deleteUserRule2}}}, + }, + }, true, nil + }, + } + } + + err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deleteUserRule1Id) + if err == nil { + t.Fatal("expected NotFoundError") + } + var notFoundErr *management.NotFoundError + if !errors.As(err, ¬FoundErr) { + t.Fatalf("expected NotFoundError, got %T: %v", err, err) + } + if notFoundErr.Id != deleteUserRule1Id { + t.Errorf("expected Id=%q, got %q", deleteUserRule1Id, notFoundErr.Id) + } +} + +func TestDeleteUserDefinedAlertRuleById_DeletesEntirePrometheusRule(t *testing.T) { + var deleteCalled bool + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deleteUserRule1Id { + return deleteUserRule1, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return false }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "test-group", Rules: []monitoringv1.Rule{deleteUserRule1}}}, + }, + }, true, nil + }, + DeleteFunc: func(_ context.Context, _, _ string) error { + deleteCalled = true + return nil + }, + } + } + + if err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deleteUserRule1Id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !deleteCalled { + t.Error("expected PrometheusRule.Delete to be called") + } +} + +func TestDeleteUserDefinedAlertRuleById_UpdatesRemainingRules(t *testing.T) { + var updatedPR *monitoringv1.PrometheusRule + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deleteUserRule1Id { + return deleteUserRule1, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return false }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{{Name: "test-group", Rules: []monitoringv1.Rule{deleteUserRule1, deleteUserRule2}}}, + }, + }, true, nil + }, + UpdateFunc: func(_ context.Context, pr monitoringv1.PrometheusRule) error { + updatedPR = &pr + return nil + }, + } + } + + if err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deleteUserRule1Id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if updatedPR == nil { + t.Fatal("expected PrometheusRule.Update to be called") + } + if len(updatedPR.Spec.Groups) != 1 || len(updatedPR.Spec.Groups[0].Rules) != 1 { + t.Errorf("expected 1 group with 1 rule, got groups=%v", updatedPR.Spec.Groups) + } + if updatedPR.Spec.Groups[0].Rules[0].Alert != "UserAlert2" { + t.Errorf("expected remaining rule to be UserAlert2, got %q", updatedPR.Spec.Groups[0].Rules[0].Alert) + } +} + +func TestDeleteUserDefinedAlertRuleById_RemovesEmptyGroup(t *testing.T) { + anotherRule := monitoringv1.Rule{ + Alert: "AnotherAlert", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", + }, + } + + var updatedPR *monitoringv1.PrometheusRule + mockK8s := &testutils.MockClient{} + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(_ context.Context, id string) (monitoringv1.Rule, bool) { + if id == deleteUserRule1Id { + return deleteUserRule1, true + } + return monitoringv1.Rule{}, false + }, + } + } + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(_ string) bool { return false }, + } + } + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(_ context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + {Name: "group-to-be-empty", Rules: []monitoringv1.Rule{deleteUserRule1}}, + {Name: "group-with-rules", Rules: []monitoringv1.Rule{anotherRule}}, + }, + }, + }, true, nil + }, + UpdateFunc: func(_ context.Context, pr monitoringv1.PrometheusRule) error { + updatedPR = &pr + return nil + }, + } + } + + if err := newDeleteClient(mockK8s).DeleteUserDefinedAlertRuleById(context.Background(), deleteUserRule1Id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(updatedPR.Spec.Groups) != 1 || updatedPR.Spec.Groups[0].Name != "group-with-rules" { + t.Errorf("expected only 'group-with-rules' to remain, got %v", updatedPR.Spec.Groups) + } +} diff --git a/pkg/management/types.go b/pkg/management/types.go index 5ec3fc055..837f75e88 100644 --- a/pkg/management/types.go +++ b/pkg/management/types.go @@ -11,6 +11,9 @@ type Client interface { // CreateUserDefinedAlertRule creates a new user-defined alert rule CreateUserDefinedAlertRule(ctx context.Context, alertRule monitoringv1.Rule, prOptions PrometheusRuleOptions) (alertRuleId string, err error) + // DeleteUserDefinedAlertRuleById deletes a user-defined alert rule by its ID + DeleteUserDefinedAlertRuleById(ctx context.Context, alertRuleId string) error + // CreatePlatformAlertRule creates a new platform alert rule CreatePlatformAlertRule(ctx context.Context, alertRule monitoringv1.Rule) (alertRuleId string, err error) } diff --git a/test/e2e/delete_alert_rule_test.go b/test/e2e/delete_alert_rule_test.go new file mode 100644 index 000000000..fb5b6445d --- /dev/null +++ b/test/e2e/delete_alert_rule_test.go @@ -0,0 +1,106 @@ +package e2e + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +func TestDeleteAlertRule(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-delete-rule", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + ruleNames := []string{"DeleteAlert1", "DeleteAlert2", "KeepAlert3"} + ruleIDs := make([]string, 0, len(ruleNames)) + + for _, name := range ruleNames { + id := createRuleViaAPI(t, f, ctx, testNamespace, name, "e2e-delete-pr") + ruleIDs = append(ruleIDs, id) + } + + t.Logf("Created 3 rules with IDs: %v", ruleIDs) + + deleteReq := managementrouter.BulkDeleteAlertRulesRequest{ + RuleIds: []string{ruleIDs[0], ruleIDs[1]}, + } + reqBody, err := json.Marshal(deleteReq) + if err != nil { + t.Fatalf("Failed to marshal delete request: %v", err) + } + + deleteURL := f.PluginURL + "/api/v1/alerting/rules" + req, err := http.NewRequestWithContext(ctx, http.MethodDelete, deleteURL, bytes.NewBuffer(reqBody)) + if err != nil { + t.Fatalf("Failed to create delete request: %v", err) + } + req.Header.Set("Content-Type", "application/json") + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + t.Fatalf("Failed to make delete request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Expected status 200, got %d. Body: %s", resp.StatusCode, string(body)) + } + + var deleteResp managementrouter.BulkDeleteAlertRulesResponse + if err := json.NewDecoder(resp.Body).Decode(&deleteResp); err != nil { + t.Fatalf("Failed to decode delete response: %v", err) + } + + if len(deleteResp.Rules) != 2 { + t.Fatalf("Expected 2 results, got %d", len(deleteResp.Rules)) + } + for _, result := range deleteResp.Rules { + if result.StatusCode != http.StatusNoContent { + t.Errorf("Rule %s deletion failed with status %d: %v", result.Id, result.StatusCode, result.Message) + } + } + + promRule, err := f.Monitoringv1clientset.MonitoringV1().PrometheusRules(testNamespace).Get( + ctx, "e2e-delete-pr", metav1.GetOptions{}, + ) + if err != nil { + t.Fatalf("Failed to get PrometheusRule after deletion: %v", err) + } + + var remainingAlerts []string + for _, group := range promRule.Spec.Groups { + for _, rule := range group.Rules { + remainingAlerts = append(remainingAlerts, rule.Alert) + } + } + + if len(remainingAlerts) != 1 { + t.Fatalf("Expected 1 remaining rule, got %d: %v", len(remainingAlerts), remainingAlerts) + } + if remainingAlerts[0] != "KeepAlert3" { + t.Errorf("Expected remaining rule 'KeepAlert3', got %q", remainingAlerts[0]) + } + + t.Log("Delete alert rule e2e test passed successfully") +} diff --git a/test/e2e/helpers_test.go b/test/e2e/helpers_test.go index 3f060d7fd..8071266fe 100644 --- a/test/e2e/helpers_test.go +++ b/test/e2e/helpers_test.go @@ -1,3 +1,70 @@ package e2e +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "testing" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + func strPtr(s string) *string { return &s } + +func createRuleViaAPI(t *testing.T, f *framework.Framework, ctx context.Context, namespace, alertName, prName string) string { + t.Helper() + + payload := managementrouter.CreateAlertRuleRequest{ + AlertingRule: &managementrouter.AlertRuleSpec{ + Alert: &alertName, + Expr: strPtr("vector(1)"), + For: strPtr("1m"), + Labels: &map[string]string{ + "severity": "info", + }, + }, + PrometheusRule: &managementrouter.PrometheusRuleTarget{ + PrometheusRuleName: prName, + PrometheusRuleNamespace: namespace, + }, + } + + reqBody, err := json.Marshal(payload) + if err != nil { + t.Fatalf("Failed to marshal create request for %s: %v", alertName, err) + } + + createURL := f.PluginURL + "/api/v1/alerting/rules" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, createURL, bytes.NewBuffer(reqBody)) + if err != nil { + t.Fatalf("Failed to create HTTP request for %s: %v", alertName, err) + } + req.Header.Set("Content-Type", "application/json") + if f.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+f.BearerToken) + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + t.Fatalf("Failed to make create request for %s: %v", alertName, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusCreated { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Create %s: expected 201, got %d. Body: %s", alertName, resp.StatusCode, string(body)) + } + + var createResp managementrouter.CreateAlertRuleResponse + if err := json.NewDecoder(resp.Body).Decode(&createResp); err != nil { + t.Fatalf("Failed to decode create response for %s: %v", alertName, err) + } + + if createResp.Id == "" { + t.Fatalf("Got empty ID for %s", alertName) + } + return createResp.Id +}