From 3f5ac43bf0cbefca447ab9219c092b017e4071cf Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Mon, 23 Mar 2026 17:53:48 +0100 Subject: [PATCH 01/25] [CRE] [1/5] Gateway handler for confidential relay Add a new gateway handler type "confidential-compute-relay" that fans out enclave JSON-RPC requests to relay DON nodes and aggregates responses using F+1 quorum. Supports secrets_get and capability_exec methods. Part of #21635 --- core/services/gateway/handler_factory.go | 14 +- .../handlers/confidentialrelay/aggregator.go | 53 ++ .../handlers/confidentialrelay/handler.go | 431 ++++++++++++++ .../confidentialrelay/handler_test.go | 533 ++++++++++++++++++ deployment/cre/jobs/pkg/gateway_job.go | 31 +- 5 files changed, 1055 insertions(+), 7 deletions(-) create mode 100644 core/services/gateway/handlers/confidentialrelay/aggregator.go create mode 100644 core/services/gateway/handlers/confidentialrelay/handler.go create mode 100644 core/services/gateway/handlers/confidentialrelay/handler_test.go diff --git a/core/services/gateway/handler_factory.go b/core/services/gateway/handler_factory.go index 76172b3dc9b..5078a9ec6ac 100644 --- a/core/services/gateway/handler_factory.go +++ b/core/services/gateway/handler_factory.go @@ -18,6 +18,7 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/capabilities" v2 "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/capabilities/v2" + "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/confidentialrelay" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/functions" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/vault" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/network" @@ -25,11 +26,12 @@ import ( ) const ( - FunctionsHandlerType HandlerType = "functions" - DummyHandlerType HandlerType = "dummy" - WebAPICapabilitiesType HandlerType = "web-api-capabilities" // Handler for v0.1 HTTP capabilities for DAG workflows - HTTPCapabilityType HandlerType = "http-capabilities" // Handler for v1.0 HTTP capabilities for NoDAG workflows - VaultHandlerType HandlerType = "vault" + FunctionsHandlerType HandlerType = "functions" + DummyHandlerType HandlerType = "dummy" + WebAPICapabilitiesType HandlerType = "web-api-capabilities" // Handler for v0.1 HTTP capabilities for DAG workflows + HTTPCapabilityType HandlerType = "http-capabilities" // Handler for v1.0 HTTP capabilities for NoDAG workflows + VaultHandlerType HandlerType = "vault" + ConfidentialRelayHandlerType HandlerType = "confidential-compute-relay" ) type handlerFactory struct { @@ -87,6 +89,8 @@ func (hf *handlerFactory) NewHandler( case VaultHandlerType: requestAuthorizer := vaultcap.NewRequestAuthorizer(hf.lggr, hf.workflowRegistrySyncer) return vault.NewHandler(handlerConfig, donConfig, don, hf.capabilitiesRegistry, requestAuthorizer, hf.lggr, clockwork.NewRealClock(), hf.lf) + case ConfidentialRelayHandlerType: + return confidentialrelay.NewHandler(handlerConfig, donConfig, don, hf.lggr, clockwork.NewRealClock()) default: return nil, fmt.Errorf("unsupported handler type %s", handlerType) } diff --git a/core/services/gateway/handlers/confidentialrelay/aggregator.go b/core/services/gateway/handlers/confidentialrelay/aggregator.go new file mode 100644 index 00000000000..64cec11404f --- /dev/null +++ b/core/services/gateway/handlers/confidentialrelay/aggregator.go @@ -0,0 +1,53 @@ +package confidentialrelay + +import ( + "encoding/json" + "errors" + "strconv" + + jsonrpc "github.com/smartcontractkit/chainlink-common/pkg/jsonrpc2" + "github.com/smartcontractkit/chainlink-common/pkg/logger" +) + +var ( + errInsufficientResponsesForQuorum = errors.New("insufficient valid responses to reach quorum") + errQuorumUnobtainable = errors.New("quorum unobtainable") +) + +type aggregator struct{} + +func (a *aggregator) Aggregate(resps map[string]jsonrpc.Response[json.RawMessage], donF int, donMembersCount int, l logger.Logger) (*jsonrpc.Response[json.RawMessage], error) { + // F+1 is sufficient: each honest node independently validates the enclave's + // Nitro attestation, so F+1 matching responses guarantees at least one + // honest node vouched for the result. + requiredQuorum := donF + 1 + + if len(resps) < requiredQuorum { + return nil, errInsufficientResponsesForQuorum + } + + shaToCount := map[string]int{} + maxShaToCount := 0 + for _, r := range resps { + sha, err := r.Digest() + if err != nil { + l.Errorw("failed to compute digest of response during quorum validation, skipping...", "error", err) + continue + } + shaToCount[sha]++ + if shaToCount[sha] > maxShaToCount { + maxShaToCount = shaToCount[sha] + } + if shaToCount[sha] >= requiredQuorum { + return &r, nil + } + } + + remainingResponses := donMembersCount - len(resps) + if maxShaToCount+remainingResponses < requiredQuorum { + l.Warnw("quorum unattainable for request", "requiredQuorum", requiredQuorum, "remainingResponses", remainingResponses, "maxShaToCount", maxShaToCount) + return nil, errors.New(errQuorumUnobtainable.Error() + ". RequiredQuorum=" + strconv.Itoa(requiredQuorum) + ". maxShaToCount=" + strconv.Itoa(maxShaToCount) + " remainingResponses=" + strconv.Itoa(remainingResponses)) + } + + return nil, errInsufficientResponsesForQuorum +} diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go new file mode 100644 index 00000000000..17511587011 --- /dev/null +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -0,0 +1,431 @@ +package confidentialrelay + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "maps" + "strconv" + "sync" + "time" + + "github.com/jonboulle/clockwork" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + + "github.com/smartcontractkit/chainlink-common/pkg/beholder" + relaytypes "github.com/smartcontractkit/chainlink-common/pkg/capabilities/v2/actions/confidentialrelay" + jsonrpc "github.com/smartcontractkit/chainlink-common/pkg/jsonrpc2" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/ratelimit" + "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink/v2/core/services/gateway/api" + "github.com/smartcontractkit/chainlink/v2/core/services/gateway/config" + gwhandlers "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers" +) + +const ( + defaultCleanUpPeriod = 5 * time.Second + + // Re-exported from chainlink-common for local use and test convenience. + MethodSecretsGet = relaytypes.MethodSecretsGet + MethodCapabilityExec = relaytypes.MethodCapabilityExec +) + +var _ gwhandlers.Handler = (*handler)(nil) + +type metrics struct { + requestInternalError metric.Int64Counter + requestUserError metric.Int64Counter + requestSuccess metric.Int64Counter +} + +func newMetrics() (*metrics, error) { + requestInternalError, err := beholder.GetMeter().Int64Counter("confidential_relay_gateway_request_internal_error") + if err != nil { + return nil, fmt.Errorf("failed to register internal error counter: %w", err) + } + + requestUserError, err := beholder.GetMeter().Int64Counter("confidential_relay_gateway_request_user_error") + if err != nil { + return nil, fmt.Errorf("failed to register user error counter: %w", err) + } + + requestSuccess, err := beholder.GetMeter().Int64Counter("confidential_relay_gateway_request_success") + if err != nil { + return nil, fmt.Errorf("failed to register success counter: %w", err) + } + + return &metrics{ + requestInternalError: requestInternalError, + requestUserError: requestUserError, + requestSuccess: requestSuccess, + }, nil +} + +type activeRequest struct { + req jsonrpc.Request[json.RawMessage] + responses map[string]*jsonrpc.Response[json.RawMessage] + mu sync.Mutex + + createdAt time.Time + gwhandlers.Callback +} + +func (ar *activeRequest) addResponseForNode(nodeAddr string, resp *jsonrpc.Response[json.RawMessage]) bool { + ar.mu.Lock() + defer ar.mu.Unlock() + _, exists := ar.responses[nodeAddr] + if exists { + return false + } + + ar.responses[nodeAddr] = resp + return true +} + +func (ar *activeRequest) copiedResponses() map[string]jsonrpc.Response[json.RawMessage] { + ar.mu.Lock() + defer ar.mu.Unlock() + copied := make(map[string]jsonrpc.Response[json.RawMessage], len(ar.responses)) + for k, response := range ar.responses { + var copiedResponse jsonrpc.Response[json.RawMessage] + if response != nil { + copiedResponse = *response + if response.Result != nil { + copiedResult := *response.Result + copiedResponse.Result = &copiedResult + } + if response.Error != nil { + copiedError := *response.Error + copiedResponse.Error = &copiedError + } + } + copied[k] = copiedResponse + } + return copied +} + +type relayAggregator interface { + Aggregate(resps map[string]jsonrpc.Response[json.RawMessage], donF int, donMembersCount int, l logger.Logger) (*jsonrpc.Response[json.RawMessage], error) +} + +type Config struct { + NodeRateLimiter ratelimit.RateLimiterConfig `json:"nodeRateLimiter"` + RequestTimeoutSec int `json:"requestTimeoutSec"` +} + +type handler struct { + services.StateMachine + donConfig *config.DONConfig + don gwhandlers.DON + codec api.JsonRPCCodec + lggr logger.Logger + mu sync.RWMutex + stopCh services.StopChan + + nodeRateLimiter *ratelimit.RateLimiter + requestTimeout time.Duration + + activeRequests map[string]*activeRequest + metrics *metrics + + aggregator relayAggregator + + clock clockwork.Clock +} + +func (h *handler) HealthReport() map[string]error { + return map[string]error{h.Name(): h.Healthy()} +} + +func (h *handler) Name() string { + return h.lggr.Name() +} + +func NewHandler(methodConfig json.RawMessage, donConfig *config.DONConfig, don gwhandlers.DON, lggr logger.Logger, clock clockwork.Clock) (*handler, error) { + var cfg Config + if err := json.Unmarshal(methodConfig, &cfg); err != nil { + return nil, fmt.Errorf("failed to unmarshal method config: %w", err) + } + + if cfg.RequestTimeoutSec == 0 { + cfg.RequestTimeoutSec = 30 + } + + nodeRateLimiter, err := ratelimit.NewRateLimiter(cfg.NodeRateLimiter) + if err != nil { + return nil, fmt.Errorf("failed to create node rate limiter: %w", err) + } + + metrics, err := newMetrics() + if err != nil { + return nil, fmt.Errorf("failed to create metrics: %w", err) + } + + return &handler{ + donConfig: donConfig, + don: don, + lggr: logger.Named(lggr, "ConfidentialRelayHandler:"+donConfig.DonId), + requestTimeout: time.Duration(cfg.RequestTimeoutSec) * time.Second, + nodeRateLimiter: nodeRateLimiter, + activeRequests: make(map[string]*activeRequest), + mu: sync.RWMutex{}, + stopCh: make(services.StopChan), + metrics: metrics, + aggregator: &aggregator{}, + clock: clock, + }, nil +} + +func (h *handler) Start(_ context.Context) error { + return h.StartOnce("ConfidentialRelayHandler", func() error { + h.lggr.Info("starting confidential relay handler") + go func() { + ctx, cancel := h.stopCh.NewCtx() + defer cancel() + ticker := h.clock.NewTicker(defaultCleanUpPeriod) + defer ticker.Stop() + for { + select { + case <-ticker.Chan(): + h.removeExpiredRequests(ctx) + case <-h.stopCh: + return + } + } + }() + return nil + }) +} + +func (h *handler) Close() error { + return h.StopOnce("ConfidentialRelayHandler", func() error { + h.lggr.Info("closing confidential relay handler") + close(h.stopCh) + return nil + }) +} + +func (h *handler) removeExpiredRequests(ctx context.Context) { + h.mu.RLock() + var expiredRequests []*activeRequest + now := h.clock.Now() + for _, userRequest := range h.activeRequests { + if now.Sub(userRequest.createdAt) > h.requestTimeout { + expiredRequests = append(expiredRequests, userRequest) + } + } + h.mu.RUnlock() + + for _, er := range expiredRequests { + responses := er.copiedResponses() + h.lggr.Debugw("request expired without quorum", "requestID", er.req.ID, "responseCount", len(responses), "required", h.donConfig.F+1) + errMsg := fmt.Sprintf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1) + err := h.sendResponse(ctx, er, h.errorResponse(er.req, api.RequestTimeoutError, errors.New(errMsg), nil)) + if err != nil { + h.lggr.Errorw("error sending response to user", "requestID", er.req.ID, "error", err) + } + } +} + +func (h *handler) Methods() []string { + return []string{MethodSecretsGet, MethodCapabilityExec} +} + +func (h *handler) HandleLegacyUserMessage(_ context.Context, _ *api.Message, _ gwhandlers.Callback) error { + return errors.New("confidential relay handler does not support legacy messages") +} + +func (h *handler) HandleJSONRPCUserMessage(ctx context.Context, req jsonrpc.Request[json.RawMessage], callback gwhandlers.Callback) error { + if req.ID == "" { + return errors.New("request ID cannot be empty") + } + if len(req.ID) > 200 { + return errors.New("request ID is too long: " + strconv.Itoa(len(req.ID)) + ". max is 200 characters") + } + + l := logger.With(h.lggr, "method", req.Method, "requestID", req.ID) + l.Debugw("handling confidential relay request") + + ar, err := h.newActiveRequest(req, callback) + if err != nil { + return err + } + + return h.fanOutToNodes(ctx, l, ar) +} + +func (h *handler) newActiveRequest(req jsonrpc.Request[json.RawMessage], callback gwhandlers.Callback) (*activeRequest, error) { + h.mu.Lock() + defer h.mu.Unlock() + if h.activeRequests[req.ID] != nil { + h.lggr.Errorw("request id already exists", "requestID", req.ID) + return nil, errors.New("request ID already exists: " + req.ID) + } + ar := &activeRequest{ + Callback: callback, + req: req, + createdAt: h.clock.Now(), + responses: map[string]*jsonrpc.Response[json.RawMessage]{}, + } + h.activeRequests[req.ID] = ar + return ar, nil +} + +func (h *handler) getActiveRequest(requestID string) *activeRequest { + h.mu.RLock() + defer h.mu.RUnlock() + return h.activeRequests[requestID] +} + +func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[json.RawMessage], nodeAddr string) error { + l := logger.With(h.lggr, "method", resp.Method, "requestID", resp.ID, "nodeAddr", nodeAddr) + l.Debugw("handling node response") + + if !h.nodeRateLimiter.Allow(nodeAddr) { + l.Debugw("node is rate limited", "nodeAddr", nodeAddr) + return nil + } + + ar := h.getActiveRequest(resp.ID) + if ar == nil { + l.Debugw("no pending request found for ID") + return nil + } + + ok := ar.addResponseForNode(nodeAddr, resp) + if !ok { + l.Errorw("duplicate response from node, ignoring", "nodeAddr", nodeAddr) + return nil + } + + copiedResponses := ar.copiedResponses() + aggregatedResp, err := h.aggregator.Aggregate(copiedResponses, h.donConfig.F, len(h.donConfig.Members), l) + switch { + case errors.Is(err, errInsufficientResponsesForQuorum): + l.Debugw("aggregating responses, waiting for other nodes...", "error", err) + return nil + case err != nil: + l.Error("quorum unobtainable, returning response to user...", "error", err, "responses", maps.Values(ar.responses)) + return h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) + } + + return h.sendSuccessResponse(ctx, l, ar, aggregatedResp) +} + +func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *activeRequest) error { + var nodeErrors []error + for _, node := range h.donConfig.Members { + err := h.don.SendToNode(ctx, node.Address, &ar.req) + if err != nil { + nodeErrors = append(nodeErrors, err) + l.Errorw("error sending request to node", "node", node.Address, "error", err) + } + } + + if len(nodeErrors) == len(h.donConfig.Members) && len(nodeErrors) > 0 { + return h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"), nil)) + } + + l.Debugw("successfully forwarded request to relay nodes") + return nil +} + +func (h *handler) sendSuccessResponse(ctx context.Context, l logger.Logger, ar *activeRequest, resp *jsonrpc.Response[json.RawMessage]) error { + rawResponse, err := jsonrpc.EncodeResponse(resp) + if err != nil { + l.Errorw("failed to encode response", "error", err) + return h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.NodeReponseEncodingError, fmt.Errorf("failed to marshal response: %w", err), nil)) + } + + var errorCode api.ErrorCode + if resp.Error != nil { + errorCode = api.FromJSONRPCErrorCode(resp.Error.Code) + } else { + errorCode = api.NoError + } + + l.Debugw("issued user callback", "errorCode", errorCode) + successResp := gwhandlers.UserCallbackPayload{ + RawResponse: rawResponse, + ErrorCode: errorCode, + } + return h.sendResponse(ctx, ar, successResp) +} + +func (h *handler) errorResponse( + req jsonrpc.Request[json.RawMessage], + errorCode api.ErrorCode, + err error, + data []byte, +) gwhandlers.UserCallbackPayload { + switch errorCode { + case api.FatalError: + case api.NodeReponseEncodingError: + h.lggr.Errorw(err.Error(), "requestID", req.ID) + err = errors.New(errorCode.String()) + case api.InvalidParamsError: + h.lggr.Errorw("invalid params", "requestID", req.ID, "params", string(*req.Params)) + err = errors.New("invalid params error: " + err.Error()) + case api.UnsupportedMethodError: + h.lggr.Errorw("unsupported method", "requestID", req.ID, "method", req.Method, "error", err.Error()) + err = errors.New("unsupported method(" + req.Method + "): " + err.Error()) + case api.UserMessageParseError: + h.lggr.Errorw("user message parse error", "requestID", req.ID, "error", err.Error()) + err = errors.New("user message parse error: " + err.Error()) + case api.NoError: + case api.UnsupportedDONIdError: + case api.HandlerError: + case api.RequestTimeoutError: + case api.StaleNodeResponseError: + } + + return gwhandlers.UserCallbackPayload{ + RawResponse: h.codec.EncodeNewErrorResponse( + req.ID, + api.ToJSONRPCErrorCode(errorCode), + err.Error(), + data, + ), + ErrorCode: errorCode, + } +} + +func (h *handler) sendResponse(ctx context.Context, userRequest *activeRequest, resp gwhandlers.UserCallbackPayload) error { + switch resp.ErrorCode { + case api.StaleNodeResponseError: + case api.FatalError: + case api.NodeReponseEncodingError: + case api.RequestTimeoutError: + case api.HandlerError: + h.metrics.requestInternalError.Add(ctx, 1, metric.WithAttributes( + attribute.String("don_id", h.donConfig.DonId), + attribute.String("error", resp.ErrorCode.String()), + )) + case api.InvalidParamsError: + case api.UnsupportedMethodError: + case api.UserMessageParseError: + case api.UnsupportedDONIdError: + h.metrics.requestUserError.Add(ctx, 1, metric.WithAttributes( + attribute.String("don_id", h.donConfig.DonId), + )) + case api.NoError: + h.metrics.requestSuccess.Add(ctx, 1, metric.WithAttributes( + attribute.String("don_id", h.donConfig.DonId), + )) + } + + err := userRequest.SendResponse(resp) + if err != nil { + h.lggr.Errorw("error sending response to user", "requestID", userRequest.req.ID, "error", err) + return err + } + + h.mu.Lock() + defer h.mu.Unlock() + delete(h.activeRequests, userRequest.req.ID) + h.lggr.Debugw("response sent to user", "requestID", userRequest.req.ID, "errorCode", resp.ErrorCode) + return nil +} diff --git a/core/services/gateway/handlers/confidentialrelay/handler_test.go b/core/services/gateway/handlers/confidentialrelay/handler_test.go new file mode 100644 index 00000000000..efc6e2c1689 --- /dev/null +++ b/core/services/gateway/handlers/confidentialrelay/handler_test.go @@ -0,0 +1,533 @@ +package confidentialrelay + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "sync" + "testing" + "time" + + "github.com/jonboulle/clockwork" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + jsonrpc "github.com/smartcontractkit/chainlink-common/pkg/jsonrpc2" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/ratelimit" + + "github.com/smartcontractkit/chainlink/v2/core/services/gateway/api" + "github.com/smartcontractkit/chainlink/v2/core/services/gateway/config" + "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/common" + "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/mocks" +) + +var nodeOne = config.NodeConfig{ + Name: "node1", + Address: "0x1234", +} + +func setupHandler(t *testing.T, numNodes int) (*handler, *common.Callback, *mocks.DON, *clockwork.FakeClock) { + t.Helper() + lggr := logger.Test(t) + don := mocks.NewDON(t) + + members := make([]config.NodeConfig, numNodes) + for i := range numNodes { + members[i] = config.NodeConfig{ + Name: fmt.Sprintf("node%d", i), + Address: fmt.Sprintf("0x%04d", i), + } + } + + donConfig := &config.DONConfig{ + DonId: "test_relay_don", + F: 1, + Members: members, + } + handlerConfig := Config{ + RequestTimeoutSec: 30, + NodeRateLimiter: ratelimit.RateLimiterConfig{ + GlobalRPS: 100, + GlobalBurst: 100, + PerSenderRPS: 10, + PerSenderBurst: 10, + }, + } + methodConfig, err := json.Marshal(handlerConfig) + require.NoError(t, err) + + clock := clockwork.NewFakeClock() + h, err := NewHandler(methodConfig, donConfig, don, lggr, clock) + require.NoError(t, err) + h.aggregator = &mockAggregator{} + cb := common.NewCallback() + return h, cb, don, clock +} + +type mockAggregator struct { + err error +} + +func (m *mockAggregator) Aggregate(_ map[string]jsonrpc.Response[json.RawMessage], _ int, _ int, _ logger.Logger) (*jsonrpc.Response[json.RawMessage], error) { + return nil, m.err +} + +type respondingMockAggregator struct{} + +func (m *respondingMockAggregator) Aggregate(resps map[string]jsonrpc.Response[json.RawMessage], _ int, _ int, _ logger.Logger) (*jsonrpc.Response[json.RawMessage], error) { + if len(resps) == 0 { + return nil, errInsufficientResponsesForQuorum + } + // Return the first response we find. + for _, r := range resps { + return &r, nil + } + return nil, errInsufficientResponsesForQuorum +} + +func TestConfidentialRelayHandler_Methods(t *testing.T) { + h, _, _, _ := setupHandler(t, 4) + methods := h.Methods() + assert.Equal(t, []string{MethodSecretsGet, MethodCapabilityExec}, methods) +} + +func TestConfidentialRelayHandler_HandleLegacyUserMessage(t *testing.T) { + h, cb, _, _ := setupHandler(t, 4) + err := h.HandleLegacyUserMessage(t.Context(), nil, cb) + require.ErrorContains(t, err, "confidential relay handler does not support legacy messages") +} + +func TestConfidentialRelayHandler_RequestIDTooLong(t *testing.T) { + h, cb, _, _ := setupHandler(t, 4) + + longID := strings.Repeat("x", 201) + req := jsonrpc.Request[json.RawMessage]{ + ID: longID, + Method: MethodCapabilityExec, + } + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + expected := fmt.Sprintf("request ID is too long: %d. max is 200 characters", len(longID)) + require.EqualError(t, err, expected) +} + +func TestConfidentialRelayHandler_EmptyRequestID(t *testing.T) { + h, cb, _, _ := setupHandler(t, 4) + + req := jsonrpc.Request[json.RawMessage]{ + ID: "", + Method: MethodCapabilityExec, + } + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.EqualError(t, err, "request ID cannot be empty") +} + +func TestConfidentialRelayHandler_FanOutAndQuorumSuccess(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + h.aggregator = &respondingMockAggregator{} + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) + + params := json.RawMessage(`{"workflow_id":"wf1","secrets":[{"key":"k","namespace":"ns"}],"enclave_public_key":"pk"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-1", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + resultData := json.RawMessage(`{"secrets":[],"master_public_key":"mpk","threshold":1}`) + response := jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "req-1", + Method: MethodCapabilityExec, + Result: &resultData, + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + resp, err := cb.Wait(t.Context()) + assert.NoError(t, err) + assert.Equal(t, api.NoError, resp.ErrorCode) + var jsonResp jsonrpc.Response[json.RawMessage] + err = json.Unmarshal(resp.RawResponse, &jsonResp) + assert.NoError(t, err) + assert.Equal(t, "req-1", jsonResp.ID) + }() + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + err = h.HandleNodeMessage(t.Context(), &response, "0x0000") + require.NoError(t, err) + wg.Wait() +} + +func TestConfidentialRelayHandler_QuorumWithRealAggregator(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + // Use the real aggregator; DON F=1 so quorum = F+1 = 2 + h.aggregator = &aggregator{} + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) + + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-quorum", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + resultData := json.RawMessage(`{"payload":"result"}`) + makeResp := func() *jsonrpc.Response[json.RawMessage] { + rd := make(json.RawMessage, len(resultData)) + copy(rd, resultData) + return &jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "req-quorum", + Method: MethodCapabilityExec, + Result: &rd, + } + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + resp, err := cb.Wait(t.Context()) + assert.NoError(t, err) + assert.Equal(t, api.NoError, resp.ErrorCode) + }() + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + // Send 2 matching responses (F+1 = 2) + for i := range 2 { + err = h.HandleNodeMessage(t.Context(), makeResp(), fmt.Sprintf("0x%04d", i)) + require.NoError(t, err) + } + wg.Wait() +} + +func TestConfidentialRelayHandler_QuorumWithDivergentResponses(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + h.aggregator = &aggregator{} + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) + + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-diverge", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + resp, err := cb.Wait(t.Context()) + assert.NoError(t, err) + assert.Equal(t, api.NoError, resp.ErrorCode) + }() + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + // One divergent response + divergentResult := json.RawMessage(`{"secrets":[],"master_public_key":"DIFFERENT","threshold":1}`) + divergentResp := &jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "req-diverge", + Method: MethodCapabilityExec, + Result: &divergentResult, + } + err = h.HandleNodeMessage(t.Context(), divergentResp, "0x0000") + require.NoError(t, err) + + // Two matching responses (quorum = F+1 = 2) + matchingResult := json.RawMessage(`{"secrets":[],"master_public_key":"mpk","threshold":1}`) + for i := 1; i <= 2; i++ { + rd := make(json.RawMessage, len(matchingResult)) + copy(rd, matchingResult) + resp := &jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "req-diverge", + Method: MethodCapabilityExec, + Result: &rd, + } + err = h.HandleNodeMessage(t.Context(), resp, fmt.Sprintf("0x%04d", i)) + require.NoError(t, err) + } + wg.Wait() +} + +func TestConfidentialRelayHandler_QuorumUnobtainable(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + h.aggregator = &mockAggregator{err: errQuorumUnobtainable} + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) + + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-unobtainable", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + response := jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "req-unobtainable", + Method: MethodCapabilityExec, + Error: &jsonrpc.WireError{ + Code: -32603, + Message: errQuorumUnobtainable.Error(), + }, + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + resp, err := cb.Wait(t.Context()) + assert.NoError(t, err) + var jsonResp jsonrpc.Response[json.RawMessage] + err = json.Unmarshal(resp.RawResponse, &jsonResp) + assert.NoError(t, err) + assert.Equal(t, "req-unobtainable", jsonResp.ID) + assert.NotNil(t, jsonResp.Error) + assert.Contains(t, jsonResp.Error.Message, "quorum unobtainable") + }() + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + err = h.HandleNodeMessage(t.Context(), &response, "0x0000") + require.NoError(t, err) + wg.Wait() +} + +func TestConfidentialRelayHandler_RequestTimeout(t *testing.T) { + h, cb, don, clock := setupHandler(t, 4) + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) + // Use the real aggregator so responses are not immediately satisfied + h.aggregator = &aggregator{} + + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-timeout", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + resp, err := cb.Wait(t.Context()) + assert.NoError(t, err) + assert.Equal(t, api.RequestTimeoutError, resp.ErrorCode) + }() + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + // Advance clock past the request timeout and trigger cleanup + clock.Advance(31 * time.Second) + h.removeExpiredRequests(t.Context()) + wg.Wait() +} + +func TestConfidentialRelayHandler_DuplicateRequestID(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) + + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-dup", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + cb2 := common.NewCallback() + err = h.HandleJSONRPCUserMessage(t.Context(), req, cb2) + require.ErrorContains(t, err, "request ID already exists") +} + +func TestConfidentialRelayHandler_RateLimitedNode(t *testing.T) { + handlerConfig := Config{ + RequestTimeoutSec: 30, + NodeRateLimiter: ratelimit.RateLimiterConfig{ + GlobalRPS: 100, + GlobalBurst: 100, + PerSenderRPS: 0.001, // Effectively zero + PerSenderBurst: 1, + }, + } + methodConfig, err := json.Marshal(handlerConfig) + require.NoError(t, err) + + lggr := logger.Test(t) + don := mocks.NewDON(t) + donConfig := &config.DONConfig{ + DonId: "test_relay_don", + F: 1, + Members: []config.NodeConfig{nodeOne}, + } + clock := clockwork.NewFakeClock() + h, err := NewHandler(methodConfig, donConfig, don, lggr, clock) + require.NoError(t, err) + h.aggregator = &respondingMockAggregator{} + + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) + + cb := common.NewCallback() + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-ratelimit", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + err = h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + resultData := json.RawMessage(`{"secrets":[]}`) + response := jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "req-ratelimit", + Method: MethodCapabilityExec, + Result: &resultData, + } + + // First response from node uses the burst allowance + err = h.HandleNodeMessage(t.Context(), &response, nodeOne.Address) + require.NoError(t, err) + + // Verify callback was called + ctx, cancel := context.WithTimeout(t.Context(), 100*time.Millisecond) + defer cancel() + resp, err := cb.Wait(ctx) + require.NoError(t, err) + assert.Equal(t, api.NoError, resp.ErrorCode) + + // Start a new request + cb2 := common.NewCallback() + req2 := jsonrpc.Request[json.RawMessage]{ + ID: "req-ratelimit-2", + Method: MethodCapabilityExec, + Params: ¶ms, + } + err = h.HandleJSONRPCUserMessage(t.Context(), req2, cb2) + require.NoError(t, err) + + response2 := jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "req-ratelimit-2", + Method: MethodCapabilityExec, + Result: &resultData, + } + + // Second response should be rate limited (silently dropped) + err = h.HandleNodeMessage(t.Context(), &response2, nodeOne.Address) + require.NoError(t, err) + + // Callback should NOT be called - verify with timeout + ctx2, cancel2 := context.WithTimeout(t.Context(), 50*time.Millisecond) + defer cancel2() + _, err = cb2.Wait(ctx2) + require.Error(t, err) // Should timeout +} + +func TestConfidentialRelayHandler_LateNodeResponse(t *testing.T) { + h, cb, _, _ := setupHandler(t, 4) + + resultData := json.RawMessage(`{"secrets":[]}`) + staleResponse := jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "nonexistent-request", + Method: MethodCapabilityExec, + Result: &resultData, + } + + // This should not error, just silently ignore + err := h.HandleNodeMessage(t.Context(), &staleResponse, "0x0000") + require.NoError(t, err) + + // Verify callback was not triggered + ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond) + defer cancel() + _, err = cb.Wait(ctx) + require.Error(t, err) +} + +func TestConfidentialRelayHandler_AllNodesFanOutFail(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(errors.New("connection refused")) + + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-allfail", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + resp, err := cb.Wait(t.Context()) + assert.NoError(t, err) + assert.Equal(t, api.FatalError, resp.ErrorCode) + var jsonResp jsonrpc.Response[json.RawMessage] + err = json.Unmarshal(resp.RawResponse, &jsonResp) + assert.NoError(t, err) + assert.Contains(t, jsonResp.Error.Message, "failed to forward user request to nodes") + }() + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + wg.Wait() +} + +func TestConfidentialRelayHandler_CapabilityExecMethod(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + h.aggregator = &respondingMockAggregator{} + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) + + params := json.RawMessage(`{"workflow_id":"wf1","capability_id":"cap1","payload":"data"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-cap", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + resp, err := cb.Wait(t.Context()) + assert.NoError(t, err) + assert.Equal(t, api.NoError, resp.ErrorCode) + }() + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + resultData := json.RawMessage(`{"payload":"result"}`) + response := jsonrpc.Response[json.RawMessage]{ + Version: jsonrpc.JsonRpcVersion, + ID: "req-cap", + Method: MethodCapabilityExec, + Result: &resultData, + } + err = h.HandleNodeMessage(t.Context(), &response, "0x0000") + require.NoError(t, err) + wg.Wait() + don.AssertCalled(t, "SendToNode", mock.Anything, mock.Anything, mock.Anything) +} diff --git a/deployment/cre/jobs/pkg/gateway_job.go b/deployment/cre/jobs/pkg/gateway_job.go index 950ebc79b41..f73aca48fcd 100644 --- a/deployment/cre/jobs/pkg/gateway_job.go +++ b/deployment/cre/jobs/pkg/gateway_job.go @@ -14,9 +14,11 @@ const ( GatewayHandlerTypeWebAPICapabilities = "web-api-capabilities" GatewayHandlerTypeHTTPCapabilities = "http-capabilities" GatewayHandlerTypeVault = "vault" + GatewayHandlerTypeConfidentialRelay = "confidential-compute-relay" - ServiceNameWorkflows = "workflows" - ServiceNameVault = "vault" + ServiceNameWorkflows = "workflows" + ServiceNameVault = "vault" + ServiceNameConfidential = "confidential" minimumRequestTimeoutSec = 5 ) @@ -28,6 +30,8 @@ func HandlerServiceName(handlerType string) string { return ServiceNameVault case GatewayHandlerTypeHTTPCapabilities, GatewayHandlerTypeWebAPICapabilities: return ServiceNameWorkflows + case GatewayHandlerTypeConfidentialRelay: + return ServiceNameConfidential default: return handlerType } @@ -226,6 +230,8 @@ func (g GatewayJob) buildLegacyDons() ([]legacyDON, error) { hs = append(hs, newDefaultVaultHandler(g.RequestTimeoutSec)) case GatewayHandlerTypeHTTPCapabilities: hs = append(hs, newDefaultHTTPCapabilitiesHandler()) + case GatewayHandlerTypeConfidentialRelay: + hs = append(hs, newDefaultConfidentialRelayHandler()) default: return nil, errors.New("unknown handler type: " + ht) } @@ -266,6 +272,8 @@ func (g GatewayJob) buildServicesAndShardedDONs() ([]shardedDON, []service, erro handlers = append(handlers, newDefaultVaultHandler(g.RequestTimeoutSec)) case GatewayHandlerTypeHTTPCapabilities: handlers = append(handlers, newDefaultHTTPCapabilitiesHandler()) + case GatewayHandlerTypeConfidentialRelay: + handlers = append(handlers, newDefaultConfidentialRelayHandler()) default: return nil, nil, errors.New("unknown handler type: " + ht) } @@ -444,3 +452,22 @@ func newDefaultHTTPCapabilitiesHandler() handler { }, } } + +type confidentialRelayHandlerConfig struct { + NodeRateLimiter nodeRateLimiterConfig `toml:"NodeRateLimiter"` +} + +func newDefaultConfidentialRelayHandler() handler { + return handler{ + Name: GatewayHandlerTypeConfidentialRelay, + ServiceName: "confidential", + Config: confidentialRelayHandlerConfig{ + NodeRateLimiter: nodeRateLimiterConfig{ + GlobalBurst: 10, + GlobalRPS: 50, + PerSenderBurst: 10, + PerSenderRPS: 10, + }, + }, + } +} From ac120bef1fe222a54ebbfd96d9ceb9a3d4f27f95 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Mon, 23 Mar 2026 18:18:25 +0100 Subject: [PATCH 02/25] Fix exhaustive switch lint and tidy integration-tests modules Add missing api.ConflictError and api.LimitExceededError cases to both switch statements in handler.go. Run go mod tidy on integration-tests and integration-tests/load. --- core/services/gateway/handlers/confidentialrelay/handler.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 17511587011..7e85370d320 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -380,6 +380,8 @@ func (h *handler) errorResponse( case api.HandlerError: case api.RequestTimeoutError: case api.StaleNodeResponseError: + case api.ConflictError: + case api.LimitExceededError: } return gwhandlers.UserCallbackPayload{ @@ -415,6 +417,8 @@ func (h *handler) sendResponse(ctx context.Context, userRequest *activeRequest, h.metrics.requestSuccess.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), )) + case api.ConflictError: + case api.LimitExceededError: } err := userRequest.SendResponse(resp) From 7d8d2aa2df03f92769b39c80f107345d8d350adf Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Mon, 23 Mar 2026 20:06:18 +0100 Subject: [PATCH 03/25] Fix goimports formatting in gateway_job.go --- deployment/cre/jobs/pkg/gateway_job.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deployment/cre/jobs/pkg/gateway_job.go b/deployment/cre/jobs/pkg/gateway_job.go index f73aca48fcd..bf35bf40346 100644 --- a/deployment/cre/jobs/pkg/gateway_job.go +++ b/deployment/cre/jobs/pkg/gateway_job.go @@ -16,9 +16,9 @@ const ( GatewayHandlerTypeVault = "vault" GatewayHandlerTypeConfidentialRelay = "confidential-compute-relay" - ServiceNameWorkflows = "workflows" - ServiceNameVault = "vault" - ServiceNameConfidential = "confidential" + ServiceNameWorkflows = "workflows" + ServiceNameVault = "vault" + ServiceNameConfidential = "confidential" minimumRequestTimeoutSec = 5 ) From 24fc4c05550e1b162a80dbe85960b64c6af9b3ce Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Mon, 23 Mar 2026 20:15:57 +0100 Subject: [PATCH 04/25] Use ServiceName constants and add RequestTimeoutSec to relay handler - Replace string literals with ServiceNameVault, ServiceNameWorkflows, ServiceNameConfidential constants in all handler constructors. - Add RequestTimeoutSec to confidentialRelayHandlerConfig, set to gateway timeout minus 1s (matching vault handler pattern). Ensures the handler times out before the gateway, returning a clean error instead of the gateway killing the connection. --- deployment/cre/jobs/pkg/gateway_job.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/deployment/cre/jobs/pkg/gateway_job.go b/deployment/cre/jobs/pkg/gateway_job.go index bf35bf40346..2bbc540b8a0 100644 --- a/deployment/cre/jobs/pkg/gateway_job.go +++ b/deployment/cre/jobs/pkg/gateway_job.go @@ -231,7 +231,7 @@ func (g GatewayJob) buildLegacyDons() ([]legacyDON, error) { case GatewayHandlerTypeHTTPCapabilities: hs = append(hs, newDefaultHTTPCapabilitiesHandler()) case GatewayHandlerTypeConfidentialRelay: - hs = append(hs, newDefaultConfidentialRelayHandler()) + hs = append(hs, newDefaultConfidentialRelayHandler(g.RequestTimeoutSec)) default: return nil, errors.New("unknown handler type: " + ht) } @@ -273,7 +273,7 @@ func (g GatewayJob) buildServicesAndShardedDONs() ([]shardedDON, []service, erro case GatewayHandlerTypeHTTPCapabilities: handlers = append(handlers, newDefaultHTTPCapabilitiesHandler()) case GatewayHandlerTypeConfidentialRelay: - handlers = append(handlers, newDefaultConfidentialRelayHandler()) + handlers = append(handlers, newDefaultConfidentialRelayHandler(g.RequestTimeoutSec)) default: return nil, nil, errors.New("unknown handler type: " + ht) } @@ -315,8 +315,8 @@ type vaultHandlerConfig struct { func newDefaultVaultHandler(requestTimeoutSec int) handler { return handler{ - Name: "vault", - ServiceName: "vault", + Name: GatewayHandlerTypeVault, + ServiceName: ServiceNameVault, Config: vaultHandlerConfig{ // must be lower than the overall gateway request timeout. // so we allow for the response to be sent back. @@ -440,7 +440,7 @@ type httpCapabilitiesHandlerConfig struct { func newDefaultHTTPCapabilitiesHandler() handler { return handler{ Name: GatewayHandlerTypeHTTPCapabilities, - ServiceName: "workflows", + ServiceName: ServiceNameWorkflows, Config: httpCapabilitiesHandlerConfig{ CleanUpPeriodMs: 10 * 60 * 1000, // 10 minutes NodeRateLimiter: nodeRateLimiterConfig{ @@ -454,14 +454,16 @@ func newDefaultHTTPCapabilitiesHandler() handler { } type confidentialRelayHandlerConfig struct { - NodeRateLimiter nodeRateLimiterConfig `toml:"NodeRateLimiter"` + RequestTimeoutSec int `toml:"requestTimeoutSec"` + NodeRateLimiter nodeRateLimiterConfig `toml:"NodeRateLimiter"` } -func newDefaultConfidentialRelayHandler() handler { +func newDefaultConfidentialRelayHandler(requestTimeoutSec int) handler { return handler{ Name: GatewayHandlerTypeConfidentialRelay, - ServiceName: "confidential", + ServiceName: ServiceNameConfidential, Config: confidentialRelayHandlerConfig{ + RequestTimeoutSec: requestTimeoutSec - 1, NodeRateLimiter: nodeRateLimiterConfig{ GlobalBurst: 10, GlobalRPS: 50, From 974346fdc334ed093f06fce78f1ea5d863dd4190 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Tue, 24 Mar 2026 16:20:46 +0100 Subject: [PATCH 05/25] Add comment explaining requestTimeoutSec - 1 in relay handler --- deployment/cre/jobs/pkg/gateway_job.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deployment/cre/jobs/pkg/gateway_job.go b/deployment/cre/jobs/pkg/gateway_job.go index 2bbc540b8a0..d472024c0b5 100644 --- a/deployment/cre/jobs/pkg/gateway_job.go +++ b/deployment/cre/jobs/pkg/gateway_job.go @@ -463,6 +463,8 @@ func newDefaultConfidentialRelayHandler(requestTimeoutSec int) handler { Name: GatewayHandlerTypeConfidentialRelay, ServiceName: ServiceNameConfidential, Config: confidentialRelayHandlerConfig{ + // must be lower than the overall gateway request timeout, + // so we allow for the response to be sent back. RequestTimeoutSec: requestTimeoutSec - 1, NodeRateLimiter: nodeRateLimiterConfig{ GlobalBurst: 10, From c6ff9200e79a44444da942ba67b1bb90c00fbbff Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Tue, 24 Mar 2026 16:48:56 +0100 Subject: [PATCH 06/25] Use fmt.Errorf instead of errors.New with string concatenation Replace errors.New(x.Error() + ...) and fmt.Sprintf + errors.New patterns with fmt.Errorf throughout the relay handler and aggregator. Use %w for error wrapping where appropriate. Add comment clarifying sendResponse deletes expired requests. --- .../gateway/handlers/confidentialrelay/aggregator.go | 4 ++-- .../gateway/handlers/confidentialrelay/handler.go | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/aggregator.go b/core/services/gateway/handlers/confidentialrelay/aggregator.go index 64cec11404f..8843f122516 100644 --- a/core/services/gateway/handlers/confidentialrelay/aggregator.go +++ b/core/services/gateway/handlers/confidentialrelay/aggregator.go @@ -3,7 +3,7 @@ package confidentialrelay import ( "encoding/json" "errors" - "strconv" + "fmt" jsonrpc "github.com/smartcontractkit/chainlink-common/pkg/jsonrpc2" "github.com/smartcontractkit/chainlink-common/pkg/logger" @@ -46,7 +46,7 @@ func (a *aggregator) Aggregate(resps map[string]jsonrpc.Response[json.RawMessage remainingResponses := donMembersCount - len(resps) if maxShaToCount+remainingResponses < requiredQuorum { l.Warnw("quorum unattainable for request", "requiredQuorum", requiredQuorum, "remainingResponses", remainingResponses, "maxShaToCount", maxShaToCount) - return nil, errors.New(errQuorumUnobtainable.Error() + ". RequiredQuorum=" + strconv.Itoa(requiredQuorum) + ". maxShaToCount=" + strconv.Itoa(maxShaToCount) + " remainingResponses=" + strconv.Itoa(remainingResponses)) + return nil, fmt.Errorf("%w: requiredQuorum=%d, maxShaToCount=%d, remainingResponses=%d", errQuorumUnobtainable, requiredQuorum, maxShaToCount, remainingResponses) } return nil, errInsufficientResponsesForQuorum diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 7e85370d320..0f746fa456d 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -222,8 +222,8 @@ func (h *handler) removeExpiredRequests(ctx context.Context) { for _, er := range expiredRequests { responses := er.copiedResponses() h.lggr.Debugw("request expired without quorum", "requestID", er.req.ID, "responseCount", len(responses), "required", h.donConfig.F+1) - errMsg := fmt.Sprintf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1) - err := h.sendResponse(ctx, er, h.errorResponse(er.req, api.RequestTimeoutError, errors.New(errMsg), nil)) + // sendResponse deletes the request from activeRequests after sending. + err := h.sendResponse(ctx, er, h.errorResponse(er.req, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil)) if err != nil { h.lggr.Errorw("error sending response to user", "requestID", er.req.ID, "error", err) } @@ -368,13 +368,13 @@ func (h *handler) errorResponse( err = errors.New(errorCode.String()) case api.InvalidParamsError: h.lggr.Errorw("invalid params", "requestID", req.ID, "params", string(*req.Params)) - err = errors.New("invalid params error: " + err.Error()) + err = fmt.Errorf("invalid params error: %w", err) case api.UnsupportedMethodError: h.lggr.Errorw("unsupported method", "requestID", req.ID, "method", req.Method, "error", err.Error()) - err = errors.New("unsupported method(" + req.Method + "): " + err.Error()) + err = fmt.Errorf("unsupported method(%s): %w", req.Method, err) case api.UserMessageParseError: h.lggr.Errorw("user message parse error", "requestID", req.ID, "error", err.Error()) - err = errors.New("user message parse error: " + err.Error()) + err = fmt.Errorf("user message parse error: %w", err) case api.NoError: case api.UnsupportedDONIdError: case api.HandlerError: From 5cd788f12167d5daae7f2b426830a362424b41bb Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Wed, 25 Mar 2026 11:55:33 +0100 Subject: [PATCH 07/25] Improve F+1 quorum comment in relay aggregator Explain why F+1 is correct: relay nodes proxy already-aggregated DON responses through deterministic translation, so honest nodes produce byte-identical outputs. --- .../gateway/handlers/confidentialrelay/aggregator.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/aggregator.go b/core/services/gateway/handlers/confidentialrelay/aggregator.go index 8843f122516..5eb00664571 100644 --- a/core/services/gateway/handlers/confidentialrelay/aggregator.go +++ b/core/services/gateway/handlers/confidentialrelay/aggregator.go @@ -17,9 +17,12 @@ var ( type aggregator struct{} func (a *aggregator) Aggregate(resps map[string]jsonrpc.Response[json.RawMessage], donF int, donMembersCount int, l logger.Logger) (*jsonrpc.Response[json.RawMessage], error) { - // F+1 is sufficient: each honest node independently validates the enclave's - // Nitro attestation, so F+1 matching responses guarantees at least one - // honest node vouched for the result. + // F+1 (QuorumFPlusOne) is sufficient because each relay node calls the + // target DON (Vault or capability) through CRE's standard capability + // dispatch, which includes DON-level consensus. Every honest relay node + // receives the same consensus-aggregated response and performs deterministic + // translation, producing byte-identical outputs. F+1 matching responses + // therefore guarantees at least one honest node vouched for the result. requiredQuorum := donF + 1 if len(resps) < requiredQuorum { From 481675e8369cf05f6fee919e4382cc2366d42ffd Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Wed, 25 Mar 2026 12:05:50 +0100 Subject: [PATCH 08/25] Move requestTimeoutSec - 1 to call site for relay handler Make the buffer visible where handlers are wired up instead of hiding it inside the constructor. The vault handler does the same subtraction internally; a follow-up should unify both to use this pattern. --- deployment/cre/jobs/pkg/gateway_job.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/deployment/cre/jobs/pkg/gateway_job.go b/deployment/cre/jobs/pkg/gateway_job.go index d472024c0b5..cca265a0bb6 100644 --- a/deployment/cre/jobs/pkg/gateway_job.go +++ b/deployment/cre/jobs/pkg/gateway_job.go @@ -231,7 +231,9 @@ func (g GatewayJob) buildLegacyDons() ([]legacyDON, error) { case GatewayHandlerTypeHTTPCapabilities: hs = append(hs, newDefaultHTTPCapabilitiesHandler()) case GatewayHandlerTypeConfidentialRelay: - hs = append(hs, newDefaultConfidentialRelayHandler(g.RequestTimeoutSec)) + // -1 so the handler times out before the gateway, allowing a clean error response. + // TODO: the vault handler does the same -1 internally; unify both to use this pattern. + hs = append(hs, newDefaultConfidentialRelayHandler(g.RequestTimeoutSec-1)) default: return nil, errors.New("unknown handler type: " + ht) } @@ -273,7 +275,8 @@ func (g GatewayJob) buildServicesAndShardedDONs() ([]shardedDON, []service, erro case GatewayHandlerTypeHTTPCapabilities: handlers = append(handlers, newDefaultHTTPCapabilitiesHandler()) case GatewayHandlerTypeConfidentialRelay: - handlers = append(handlers, newDefaultConfidentialRelayHandler(g.RequestTimeoutSec)) + // -1 so the handler times out before the gateway, allowing a clean error response. + handlers = append(handlers, newDefaultConfidentialRelayHandler(g.RequestTimeoutSec-1)) default: return nil, nil, errors.New("unknown handler type: " + ht) } @@ -463,9 +466,7 @@ func newDefaultConfidentialRelayHandler(requestTimeoutSec int) handler { Name: GatewayHandlerTypeConfidentialRelay, ServiceName: ServiceNameConfidential, Config: confidentialRelayHandlerConfig{ - // must be lower than the overall gateway request timeout, - // so we allow for the response to be sent back. - RequestTimeoutSec: requestTimeoutSec - 1, + RequestTimeoutSec: requestTimeoutSec, NodeRateLimiter: nodeRateLimiterConfig{ GlobalBurst: 10, GlobalRPS: 50, From dbe318dfadbbeb037a40d799bf190a26908e3a51 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Thu, 26 Mar 2026 12:02:52 +0100 Subject: [PATCH 09/25] Extract deleteActiveRequest from sendResponse sendResponse no longer has the side effect of deleting from activeRequests. Callers explicitly call deleteActiveRequest after sendResponse, making the cleanup visible at every call site. --- .../handlers/confidentialrelay/handler.go | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 0f746fa456d..56e82e9660a 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -222,11 +222,11 @@ func (h *handler) removeExpiredRequests(ctx context.Context) { for _, er := range expiredRequests { responses := er.copiedResponses() h.lggr.Debugw("request expired without quorum", "requestID", er.req.ID, "responseCount", len(responses), "required", h.donConfig.F+1) - // sendResponse deletes the request from activeRequests after sending. err := h.sendResponse(ctx, er, h.errorResponse(er.req, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil)) if err != nil { h.lggr.Errorw("error sending response to user", "requestID", er.req.ID, "error", err) } + h.deleteActiveRequest(er.req.ID) } } @@ -309,10 +309,14 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ return nil case err != nil: l.Error("quorum unobtainable, returning response to user...", "error", err, "responses", maps.Values(ar.responses)) - return h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) + sendErr := h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) + h.deleteActiveRequest(ar.req.ID) + return sendErr } - return h.sendSuccessResponse(ctx, l, ar, aggregatedResp) + sendErr := h.sendSuccessResponse(ctx, l, ar, aggregatedResp) + h.deleteActiveRequest(ar.req.ID) + return sendErr } func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *activeRequest) error { @@ -326,7 +330,9 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active } if len(nodeErrors) == len(h.donConfig.Members) && len(nodeErrors) > 0 { - return h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"), nil)) + sendErr := h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"), nil)) + h.deleteActiveRequest(ar.req.ID) + return sendErr } l.Debugw("successfully forwarded request to relay nodes") @@ -427,9 +433,12 @@ func (h *handler) sendResponse(ctx context.Context, userRequest *activeRequest, return err } - h.mu.Lock() - defer h.mu.Unlock() - delete(h.activeRequests, userRequest.req.ID) h.lggr.Debugw("response sent to user", "requestID", userRequest.req.ID, "errorCode", resp.ErrorCode) return nil } + +func (h *handler) deleteActiveRequest(id string) { + h.mu.Lock() + defer h.mu.Unlock() + delete(h.activeRequests, id) +} From 043ec1decec9e1c7c720f97fb95f5d582d5794d8 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Thu, 26 Mar 2026 12:08:20 +0100 Subject: [PATCH 10/25] Rename sendResponse to sendResponseAndCleanup, fix cleanup-on-error bug The old sendResponse skipped the delete if SendResponse failed, leaving the request in activeRequests forever. Now the delete always runs regardless of send outcome. The method name makes the cleanup explicit. --- .../handlers/confidentialrelay/handler.go | 37 ++++++++----------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 56e82e9660a..1b564949783 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -222,11 +222,10 @@ func (h *handler) removeExpiredRequests(ctx context.Context) { for _, er := range expiredRequests { responses := er.copiedResponses() h.lggr.Debugw("request expired without quorum", "requestID", er.req.ID, "responseCount", len(responses), "required", h.donConfig.F+1) - err := h.sendResponse(ctx, er, h.errorResponse(er.req, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil)) + err := h.sendResponseAndCleanup(ctx, er, h.errorResponse(er.req, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil)) if err != nil { h.lggr.Errorw("error sending response to user", "requestID", er.req.ID, "error", err) } - h.deleteActiveRequest(er.req.ID) } } @@ -309,14 +308,10 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ return nil case err != nil: l.Error("quorum unobtainable, returning response to user...", "error", err, "responses", maps.Values(ar.responses)) - sendErr := h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) - h.deleteActiveRequest(ar.req.ID) - return sendErr + return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) } - sendErr := h.sendSuccessResponse(ctx, l, ar, aggregatedResp) - h.deleteActiveRequest(ar.req.ID) - return sendErr + return h.sendSuccessResponseAndCleanup(ctx, l, ar, aggregatedResp) } func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *activeRequest) error { @@ -330,20 +325,18 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active } if len(nodeErrors) == len(h.donConfig.Members) && len(nodeErrors) > 0 { - sendErr := h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"), nil)) - h.deleteActiveRequest(ar.req.ID) - return sendErr + return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"), nil)) } l.Debugw("successfully forwarded request to relay nodes") return nil } -func (h *handler) sendSuccessResponse(ctx context.Context, l logger.Logger, ar *activeRequest, resp *jsonrpc.Response[json.RawMessage]) error { +func (h *handler) sendSuccessResponseAndCleanup(ctx context.Context, l logger.Logger, ar *activeRequest, resp *jsonrpc.Response[json.RawMessage]) error { rawResponse, err := jsonrpc.EncodeResponse(resp) if err != nil { l.Errorw("failed to encode response", "error", err) - return h.sendResponse(ctx, ar, h.errorResponse(ar.req, api.NodeReponseEncodingError, fmt.Errorf("failed to marshal response: %w", err), nil)) + return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.NodeReponseEncodingError, fmt.Errorf("failed to marshal response: %w", err), nil)) } var errorCode api.ErrorCode @@ -358,7 +351,7 @@ func (h *handler) sendSuccessResponse(ctx context.Context, l logger.Logger, ar * RawResponse: rawResponse, ErrorCode: errorCode, } - return h.sendResponse(ctx, ar, successResp) + return h.sendResponseAndCleanup(ctx, ar, successResp) } func (h *handler) errorResponse( @@ -401,7 +394,10 @@ func (h *handler) errorResponse( } } -func (h *handler) sendResponse(ctx context.Context, userRequest *activeRequest, resp gwhandlers.UserCallbackPayload) error { +// sendResponseAndCleanup sends the response to the user and removes the +// request from activeRequests. The request is always removed regardless of +// whether the send succeeds, since a failed callback cannot be retried. +func (h *handler) sendResponseAndCleanup(ctx context.Context, userRequest *activeRequest, resp gwhandlers.UserCallbackPayload) error { switch resp.ErrorCode { case api.StaleNodeResponseError: case api.FatalError: @@ -428,6 +424,11 @@ func (h *handler) sendResponse(ctx context.Context, userRequest *activeRequest, } err := userRequest.SendResponse(resp) + + h.mu.Lock() + delete(h.activeRequests, userRequest.req.ID) + h.mu.Unlock() + if err != nil { h.lggr.Errorw("error sending response to user", "requestID", userRequest.req.ID, "error", err) return err @@ -436,9 +437,3 @@ func (h *handler) sendResponse(ctx context.Context, userRequest *activeRequest, h.lggr.Debugw("response sent to user", "requestID", userRequest.req.ID, "errorCode", resp.ErrorCode) return nil } - -func (h *handler) deleteActiveRequest(id string) { - h.mu.Lock() - defer h.mu.Unlock() - delete(h.activeRequests, id) -} From 1bf76d5f05a3260e4d8ccbfbf8bbed4b561e8dd8 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Thu, 26 Mar 2026 21:13:32 +0100 Subject: [PATCH 11/25] Handle errQuorumUnobtainable explicitly in aggregation switch --- core/services/gateway/handlers/confidentialrelay/handler.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 1b564949783..e7333e09576 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -5,7 +5,6 @@ import ( "encoding/json" "errors" "fmt" - "maps" "strconv" "sync" "time" @@ -306,8 +305,11 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ case errors.Is(err, errInsufficientResponsesForQuorum): l.Debugw("aggregating responses, waiting for other nodes...", "error", err) return nil + case errors.Is(err, errQuorumUnobtainable): + l.Errorw("quorum unobtainable, returning error to user", "error", err) + return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) case err != nil: - l.Error("quorum unobtainable, returning response to user...", "error", err, "responses", maps.Values(ar.responses)) + l.Errorw("unexpected aggregation error", "error", err) return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) } From 4edeaf43e345c274f8bd56ef2805d8d858348c27 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Mon, 30 Mar 2026 18:00:46 +0200 Subject: [PATCH 12/25] Merge errorResponse into sendErrorResponseAndCleanup --- .../handlers/confidentialrelay/handler.go | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index e7333e09576..82acd1a212a 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -221,7 +221,7 @@ func (h *handler) removeExpiredRequests(ctx context.Context) { for _, er := range expiredRequests { responses := er.copiedResponses() h.lggr.Debugw("request expired without quorum", "requestID", er.req.ID, "responseCount", len(responses), "required", h.donConfig.F+1) - err := h.sendResponseAndCleanup(ctx, er, h.errorResponse(er.req, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil)) + err := h.sendErrorResponseAndCleanup(ctx, er, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil) if err != nil { h.lggr.Errorw("error sending response to user", "requestID", er.req.ID, "error", err) } @@ -307,10 +307,10 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ return nil case errors.Is(err, errQuorumUnobtainable): l.Errorw("quorum unobtainable, returning error to user", "error", err) - return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) + return h.sendErrorResponseAndCleanup(ctx, ar, api.FatalError, err, nil) case err != nil: l.Errorw("unexpected aggregation error", "error", err) - return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.FatalError, err, nil)) + return h.sendErrorResponseAndCleanup(ctx, ar, api.FatalError, err, nil) } return h.sendSuccessResponseAndCleanup(ctx, l, ar, aggregatedResp) @@ -327,7 +327,7 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active } if len(nodeErrors) == len(h.donConfig.Members) && len(nodeErrors) > 0 { - return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"), nil)) + return h.sendErrorResponseAndCleanup(ctx, ar, api.FatalError, errors.New("failed to forward user request to nodes"), nil) } l.Debugw("successfully forwarded request to relay nodes") @@ -338,7 +338,7 @@ func (h *handler) sendSuccessResponseAndCleanup(ctx context.Context, l logger.Lo rawResponse, err := jsonrpc.EncodeResponse(resp) if err != nil { l.Errorw("failed to encode response", "error", err) - return h.sendResponseAndCleanup(ctx, ar, h.errorResponse(ar.req, api.NodeReponseEncodingError, fmt.Errorf("failed to marshal response: %w", err), nil)) + return h.sendErrorResponseAndCleanup(ctx, ar, api.NodeReponseEncodingError, fmt.Errorf("failed to marshal response: %w", err), nil) } var errorCode api.ErrorCode @@ -356,12 +356,10 @@ func (h *handler) sendSuccessResponseAndCleanup(ctx context.Context, l logger.Lo return h.sendResponseAndCleanup(ctx, ar, successResp) } -func (h *handler) errorResponse( - req jsonrpc.Request[json.RawMessage], - errorCode api.ErrorCode, - err error, - data []byte, -) gwhandlers.UserCallbackPayload { +// sendErrorResponseAndCleanup builds a sanitized error payload, records +// metrics, sends the response, and removes the request from activeRequests. +func (h *handler) sendErrorResponseAndCleanup(ctx context.Context, ar *activeRequest, errorCode api.ErrorCode, err error, data []byte) error { + req := ar.req switch errorCode { case api.FatalError: case api.NodeReponseEncodingError: @@ -385,7 +383,7 @@ func (h *handler) errorResponse( case api.LimitExceededError: } - return gwhandlers.UserCallbackPayload{ + resp := gwhandlers.UserCallbackPayload{ RawResponse: h.codec.EncodeNewErrorResponse( req.ID, api.ToJSONRPCErrorCode(errorCode), @@ -394,6 +392,7 @@ func (h *handler) errorResponse( ), ErrorCode: errorCode, } + return h.sendResponseAndCleanup(ctx, ar, resp) } // sendResponseAndCleanup sends the response to the user and removes the From 60a30f46d69fd60cda63147b397bb391b10513e7 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Mon, 30 Mar 2026 18:47:21 +0200 Subject: [PATCH 13/25] Move error sanitization into sendResponseAndCleanup --- .../handlers/confidentialrelay/handler.go | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 82acd1a212a..1e0c9b31b41 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -221,7 +221,7 @@ func (h *handler) removeExpiredRequests(ctx context.Context) { for _, er := range expiredRequests { responses := er.copiedResponses() h.lggr.Debugw("request expired without quorum", "requestID", er.req.ID, "responseCount", len(responses), "required", h.donConfig.F+1) - err := h.sendErrorResponseAndCleanup(ctx, er, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil) + err := h.sendResponseAndCleanup(ctx, er, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil) if err != nil { h.lggr.Errorw("error sending response to user", "requestID", er.req.ID, "error", err) } @@ -307,10 +307,10 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ return nil case errors.Is(err, errQuorumUnobtainable): l.Errorw("quorum unobtainable, returning error to user", "error", err) - return h.sendErrorResponseAndCleanup(ctx, ar, api.FatalError, err, nil) + return h.sendResponseAndCleanup(ctx, ar, api.FatalError, err, nil) case err != nil: l.Errorw("unexpected aggregation error", "error", err) - return h.sendErrorResponseAndCleanup(ctx, ar, api.FatalError, err, nil) + return h.sendResponseAndCleanup(ctx, ar, api.FatalError, err, nil) } return h.sendSuccessResponseAndCleanup(ctx, l, ar, aggregatedResp) @@ -327,7 +327,7 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active } if len(nodeErrors) == len(h.donConfig.Members) && len(nodeErrors) > 0 { - return h.sendErrorResponseAndCleanup(ctx, ar, api.FatalError, errors.New("failed to forward user request to nodes"), nil) + return h.sendResponseAndCleanup(ctx, ar, api.FatalError, errors.New("failed to forward user request to nodes"), nil) } l.Debugw("successfully forwarded request to relay nodes") @@ -338,7 +338,7 @@ func (h *handler) sendSuccessResponseAndCleanup(ctx context.Context, l logger.Lo rawResponse, err := jsonrpc.EncodeResponse(resp) if err != nil { l.Errorw("failed to encode response", "error", err) - return h.sendErrorResponseAndCleanup(ctx, ar, api.NodeReponseEncodingError, fmt.Errorf("failed to marshal response: %w", err), nil) + return h.sendResponseAndCleanup(ctx, ar, api.NodeReponseEncodingError, fmt.Errorf("failed to marshal response: %w", err), nil) } var errorCode api.ErrorCode @@ -349,16 +349,17 @@ func (h *handler) sendSuccessResponseAndCleanup(ctx context.Context, l logger.Lo } l.Debugw("issued user callback", "errorCode", errorCode) - successResp := gwhandlers.UserCallbackPayload{ + payload := gwhandlers.UserCallbackPayload{ RawResponse: rawResponse, ErrorCode: errorCode, } - return h.sendResponseAndCleanup(ctx, ar, successResp) + return h.sendPayloadAndCleanup(ctx, ar, payload) } -// sendErrorResponseAndCleanup builds a sanitized error payload, records -// metrics, sends the response, and removes the request from activeRequests. -func (h *handler) sendErrorResponseAndCleanup(ctx context.Context, ar *activeRequest, errorCode api.ErrorCode, err error, data []byte) error { +// sendResponseAndCleanup sanitizes the error, encodes a JSON-RPC error +// response, records metrics, sends the response to the user, and removes +// the request from activeRequests. +func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, errorCode api.ErrorCode, err error, data []byte) error { req := ar.req switch errorCode { case api.FatalError: @@ -383,7 +384,7 @@ func (h *handler) sendErrorResponseAndCleanup(ctx context.Context, ar *activeReq case api.LimitExceededError: } - resp := gwhandlers.UserCallbackPayload{ + payload := gwhandlers.UserCallbackPayload{ RawResponse: h.codec.EncodeNewErrorResponse( req.ID, api.ToJSONRPCErrorCode(errorCode), @@ -392,13 +393,14 @@ func (h *handler) sendErrorResponseAndCleanup(ctx context.Context, ar *activeReq ), ErrorCode: errorCode, } - return h.sendResponseAndCleanup(ctx, ar, resp) + return h.sendPayloadAndCleanup(ctx, ar, payload) } -// sendResponseAndCleanup sends the response to the user and removes the -// request from activeRequests. The request is always removed regardless of -// whether the send succeeds, since a failed callback cannot be retried. -func (h *handler) sendResponseAndCleanup(ctx context.Context, userRequest *activeRequest, resp gwhandlers.UserCallbackPayload) error { +// sendPayloadAndCleanup sends the pre-built payload, records metrics, and +// removes the request from activeRequests. The request is always removed +// regardless of whether the send succeeds, since a failed callback cannot +// be retried. +func (h *handler) sendPayloadAndCleanup(ctx context.Context, userRequest *activeRequest, resp gwhandlers.UserCallbackPayload) error { switch resp.ErrorCode { case api.StaleNodeResponseError: case api.FatalError: From 64daccecd766d964a9a58a5d62e758e94ed95f71 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Mon, 30 Mar 2026 18:49:20 +0200 Subject: [PATCH 14/25] Inline send+cleanup into sendResponseAndCleanup and sendSuccessResponseAndCleanup --- .../handlers/confidentialrelay/handler.go | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 1e0c9b31b41..9026f78c113 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -353,12 +353,27 @@ func (h *handler) sendSuccessResponseAndCleanup(ctx context.Context, l logger.Lo RawResponse: rawResponse, ErrorCode: errorCode, } - return h.sendPayloadAndCleanup(ctx, ar, payload) + + h.recordMetrics(ctx, errorCode) + sendErr := ar.SendResponse(payload) + + h.mu.Lock() + delete(h.activeRequests, ar.req.ID) + h.mu.Unlock() + + if sendErr != nil { + h.lggr.Errorw("error sending response to user", "requestID", ar.req.ID, "error", sendErr) + return sendErr + } + + h.lggr.Debugw("response sent to user", "requestID", ar.req.ID, "errorCode", errorCode) + return nil } // sendResponseAndCleanup sanitizes the error, encodes a JSON-RPC error // response, records metrics, sends the response to the user, and removes -// the request from activeRequests. +// the request from activeRequests. The request is always removed regardless +// of whether the send succeeds, since a failed callback cannot be retried. func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, errorCode api.ErrorCode, err error, data []byte) error { req := ar.req switch errorCode { @@ -393,15 +408,25 @@ func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, ), ErrorCode: errorCode, } - return h.sendPayloadAndCleanup(ctx, ar, payload) + + h.recordMetrics(ctx, errorCode) + sendErr := ar.SendResponse(payload) + + h.mu.Lock() + delete(h.activeRequests, ar.req.ID) + h.mu.Unlock() + + if sendErr != nil { + h.lggr.Errorw("error sending response to user", "requestID", ar.req.ID, "error", sendErr) + return sendErr + } + + h.lggr.Debugw("response sent to user", "requestID", ar.req.ID, "errorCode", errorCode) + return nil } -// sendPayloadAndCleanup sends the pre-built payload, records metrics, and -// removes the request from activeRequests. The request is always removed -// regardless of whether the send succeeds, since a failed callback cannot -// be retried. -func (h *handler) sendPayloadAndCleanup(ctx context.Context, userRequest *activeRequest, resp gwhandlers.UserCallbackPayload) error { - switch resp.ErrorCode { +func (h *handler) recordMetrics(ctx context.Context, errorCode api.ErrorCode) { + switch errorCode { case api.StaleNodeResponseError: case api.FatalError: case api.NodeReponseEncodingError: @@ -409,7 +434,7 @@ func (h *handler) sendPayloadAndCleanup(ctx context.Context, userRequest *active case api.HandlerError: h.metrics.requestInternalError.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), - attribute.String("error", resp.ErrorCode.String()), + attribute.String("error", errorCode.String()), )) case api.InvalidParamsError: case api.UnsupportedMethodError: @@ -425,18 +450,4 @@ func (h *handler) sendPayloadAndCleanup(ctx context.Context, userRequest *active case api.ConflictError: case api.LimitExceededError: } - - err := userRequest.SendResponse(resp) - - h.mu.Lock() - delete(h.activeRequests, userRequest.req.ID) - h.mu.Unlock() - - if err != nil { - h.lggr.Errorw("error sending response to user", "requestID", userRequest.req.ID, "error", err) - return err - } - - h.lggr.Debugw("response sent to user", "requestID", userRequest.req.ID, "errorCode", resp.ErrorCode) - return nil } From c3d18de7c59495544a2a7eefb302f600c0818509 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Mon, 30 Mar 2026 19:05:30 +0200 Subject: [PATCH 15/25] Unify sendResponseAndCleanup to handle both success and error paths --- .../handlers/confidentialrelay/handler.go | 139 ++++++++---------- 1 file changed, 64 insertions(+), 75 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 9026f78c113..20ad2b018e6 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -221,7 +221,7 @@ func (h *handler) removeExpiredRequests(ctx context.Context) { for _, er := range expiredRequests { responses := er.copiedResponses() h.lggr.Debugw("request expired without quorum", "requestID", er.req.ID, "responseCount", len(responses), "required", h.donConfig.F+1) - err := h.sendResponseAndCleanup(ctx, er, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1), nil) + err := h.sendResponseAndCleanup(ctx, er, nil, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1)) if err != nil { h.lggr.Errorw("error sending response to user", "requestID", er.req.ID, "error", err) } @@ -307,13 +307,13 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ return nil case errors.Is(err, errQuorumUnobtainable): l.Errorw("quorum unobtainable, returning error to user", "error", err) - return h.sendResponseAndCleanup(ctx, ar, api.FatalError, err, nil) + return h.sendResponseAndCleanup(ctx, ar, nil, api.FatalError, err) case err != nil: l.Errorw("unexpected aggregation error", "error", err) - return h.sendResponseAndCleanup(ctx, ar, api.FatalError, err, nil) + return h.sendResponseAndCleanup(ctx, ar, nil, api.FatalError, err) } - return h.sendSuccessResponseAndCleanup(ctx, l, ar, aggregatedResp) + return h.sendResponseAndCleanup(ctx, ar, aggregatedResp, 0, nil) } func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *activeRequest) error { @@ -327,86 +327,75 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active } if len(nodeErrors) == len(h.donConfig.Members) && len(nodeErrors) > 0 { - return h.sendResponseAndCleanup(ctx, ar, api.FatalError, errors.New("failed to forward user request to nodes"), nil) + return h.sendResponseAndCleanup(ctx, ar, nil, api.FatalError, errors.New("failed to forward user request to nodes")) } l.Debugw("successfully forwarded request to relay nodes") return nil } -func (h *handler) sendSuccessResponseAndCleanup(ctx context.Context, l logger.Logger, ar *activeRequest, resp *jsonrpc.Response[json.RawMessage]) error { - rawResponse, err := jsonrpc.EncodeResponse(resp) - if err != nil { - l.Errorw("failed to encode response", "error", err) - return h.sendResponseAndCleanup(ctx, ar, api.NodeReponseEncodingError, fmt.Errorf("failed to marshal response: %w", err), nil) - } - - var errorCode api.ErrorCode - if resp.Error != nil { - errorCode = api.FromJSONRPCErrorCode(resp.Error.Code) - } else { - errorCode = api.NoError - } - - l.Debugw("issued user callback", "errorCode", errorCode) - payload := gwhandlers.UserCallbackPayload{ - RawResponse: rawResponse, - ErrorCode: errorCode, - } - - h.recordMetrics(ctx, errorCode) - sendErr := ar.SendResponse(payload) - - h.mu.Lock() - delete(h.activeRequests, ar.req.ID) - h.mu.Unlock() - - if sendErr != nil { - h.lggr.Errorw("error sending response to user", "requestID", ar.req.ID, "error", sendErr) - return sendErr +// sendResponseAndCleanup handles both success and error responses. For +// success, pass the aggregated resp; for errors, pass nil resp with an +// errorCode and err. The request is always removed from activeRequests +// regardless of whether the send succeeds, since a failed callback cannot +// be retried. +func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, resp *jsonrpc.Response[json.RawMessage], errorCode api.ErrorCode, err error) error { + var payload gwhandlers.UserCallbackPayload + + if resp != nil { + rawResponse, encErr := jsonrpc.EncodeResponse(resp) + if encErr != nil { + h.lggr.Errorw("failed to encode response", "requestID", ar.req.ID, "error", encErr) + errorCode = api.NodeReponseEncodingError + err = fmt.Errorf("failed to marshal response: %w", encErr) + resp = nil + } else { + if resp.Error != nil { + errorCode = api.FromJSONRPCErrorCode(resp.Error.Code) + } else { + errorCode = api.NoError + } + payload = gwhandlers.UserCallbackPayload{ + RawResponse: rawResponse, + ErrorCode: errorCode, + } + } } - h.lggr.Debugw("response sent to user", "requestID", ar.req.ID, "errorCode", errorCode) - return nil -} - -// sendResponseAndCleanup sanitizes the error, encodes a JSON-RPC error -// response, records metrics, sends the response to the user, and removes -// the request from activeRequests. The request is always removed regardless -// of whether the send succeeds, since a failed callback cannot be retried. -func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, errorCode api.ErrorCode, err error, data []byte) error { - req := ar.req - switch errorCode { - case api.FatalError: - case api.NodeReponseEncodingError: - h.lggr.Errorw(err.Error(), "requestID", req.ID) - err = errors.New(errorCode.String()) - case api.InvalidParamsError: - h.lggr.Errorw("invalid params", "requestID", req.ID, "params", string(*req.Params)) - err = fmt.Errorf("invalid params error: %w", err) - case api.UnsupportedMethodError: - h.lggr.Errorw("unsupported method", "requestID", req.ID, "method", req.Method, "error", err.Error()) - err = fmt.Errorf("unsupported method(%s): %w", req.Method, err) - case api.UserMessageParseError: - h.lggr.Errorw("user message parse error", "requestID", req.ID, "error", err.Error()) - err = fmt.Errorf("user message parse error: %w", err) - case api.NoError: - case api.UnsupportedDONIdError: - case api.HandlerError: - case api.RequestTimeoutError: - case api.StaleNodeResponseError: - case api.ConflictError: - case api.LimitExceededError: - } + if resp == nil { + req := ar.req + switch errorCode { + case api.FatalError: + case api.NodeReponseEncodingError: + h.lggr.Errorw(err.Error(), "requestID", req.ID) + err = errors.New(errorCode.String()) + case api.InvalidParamsError: + h.lggr.Errorw("invalid params", "requestID", req.ID, "params", string(*req.Params)) + err = fmt.Errorf("invalid params error: %w", err) + case api.UnsupportedMethodError: + h.lggr.Errorw("unsupported method", "requestID", req.ID, "method", req.Method, "error", err.Error()) + err = fmt.Errorf("unsupported method(%s): %w", req.Method, err) + case api.UserMessageParseError: + h.lggr.Errorw("user message parse error", "requestID", req.ID, "error", err.Error()) + err = fmt.Errorf("user message parse error: %w", err) + case api.NoError: + case api.UnsupportedDONIdError: + case api.HandlerError: + case api.RequestTimeoutError: + case api.StaleNodeResponseError: + case api.ConflictError: + case api.LimitExceededError: + } - payload := gwhandlers.UserCallbackPayload{ - RawResponse: h.codec.EncodeNewErrorResponse( - req.ID, - api.ToJSONRPCErrorCode(errorCode), - err.Error(), - data, - ), - ErrorCode: errorCode, + payload = gwhandlers.UserCallbackPayload{ + RawResponse: h.codec.EncodeNewErrorResponse( + req.ID, + api.ToJSONRPCErrorCode(errorCode), + err.Error(), + nil, + ), + ErrorCode: errorCode, + } } h.recordMetrics(ctx, errorCode) From 4e968c5c84f2ed909797e040d35cfd45c7958a1a Mon Sep 17 00:00:00 2001 From: pavel-raykov Date: Mon, 30 Mar 2026 22:25:27 +0200 Subject: [PATCH 16/25] Simplify `sendResponseAndCleanup`. --- .../handlers/confidentialrelay/handler.go | 118 ++++++------------ 1 file changed, 40 insertions(+), 78 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 20ad2b018e6..f0739a3cd23 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -221,7 +221,7 @@ func (h *handler) removeExpiredRequests(ctx context.Context) { for _, er := range expiredRequests { responses := er.copiedResponses() h.lggr.Debugw("request expired without quorum", "requestID", er.req.ID, "responseCount", len(responses), "required", h.donConfig.F+1) - err := h.sendResponseAndCleanup(ctx, er, nil, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1)) + err := h.sendResponseAndCleanup(ctx, er, h.constructErrorResponse(er.req, api.RequestTimeoutError, fmt.Errorf("request expired: got %d/%d responses", len(responses), h.donConfig.F+1))) if err != nil { h.lggr.Errorw("error sending response to user", "requestID", er.req.ID, "error", err) } @@ -307,13 +307,21 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ return nil case errors.Is(err, errQuorumUnobtainable): l.Errorw("quorum unobtainable, returning error to user", "error", err) - return h.sendResponseAndCleanup(ctx, ar, nil, api.FatalError, err) + return h.sendResponseAndCleanup(ctx, ar, h.constructErrorResponse(ar.req, api.FatalError, err)) case err != nil: l.Errorw("unexpected aggregation error", "error", err) - return h.sendResponseAndCleanup(ctx, ar, nil, api.FatalError, err) + return h.sendResponseAndCleanup(ctx, ar, h.constructErrorResponse(ar.req, api.FatalError, err)) } - return h.sendResponseAndCleanup(ctx, ar, aggregatedResp, 0, nil) + rawResponse, err := jsonrpc.EncodeResponse(aggregatedResp) + if err != nil { + h.lggr.Errorw("failed to encode response", "requestID", ar.req.ID, "error", err) + return h.sendResponseAndCleanup(ctx, ar, h.constructErrorResponse(ar.req, api.NodeReponseEncodingError, err)) + } + return h.sendResponseAndCleanup(ctx, ar, gwhandlers.UserCallbackPayload{ + RawResponse: rawResponse, + ErrorCode: api.NoError, + }) } func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *activeRequest) error { @@ -327,78 +335,19 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active } if len(nodeErrors) == len(h.donConfig.Members) && len(nodeErrors) > 0 { - return h.sendResponseAndCleanup(ctx, ar, nil, api.FatalError, errors.New("failed to forward user request to nodes")) + return h.sendResponseAndCleanup(ctx, ar, h.constructErrorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"))) } l.Debugw("successfully forwarded request to relay nodes") return nil } -// sendResponseAndCleanup handles both success and error responses. For -// success, pass the aggregated resp; for errors, pass nil resp with an -// errorCode and err. The request is always removed from activeRequests +// sendResponseAndCleanup sends payload. +// The request is always removed from activeRequests // regardless of whether the send succeeds, since a failed callback cannot // be retried. -func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, resp *jsonrpc.Response[json.RawMessage], errorCode api.ErrorCode, err error) error { - var payload gwhandlers.UserCallbackPayload - - if resp != nil { - rawResponse, encErr := jsonrpc.EncodeResponse(resp) - if encErr != nil { - h.lggr.Errorw("failed to encode response", "requestID", ar.req.ID, "error", encErr) - errorCode = api.NodeReponseEncodingError - err = fmt.Errorf("failed to marshal response: %w", encErr) - resp = nil - } else { - if resp.Error != nil { - errorCode = api.FromJSONRPCErrorCode(resp.Error.Code) - } else { - errorCode = api.NoError - } - payload = gwhandlers.UserCallbackPayload{ - RawResponse: rawResponse, - ErrorCode: errorCode, - } - } - } - - if resp == nil { - req := ar.req - switch errorCode { - case api.FatalError: - case api.NodeReponseEncodingError: - h.lggr.Errorw(err.Error(), "requestID", req.ID) - err = errors.New(errorCode.String()) - case api.InvalidParamsError: - h.lggr.Errorw("invalid params", "requestID", req.ID, "params", string(*req.Params)) - err = fmt.Errorf("invalid params error: %w", err) - case api.UnsupportedMethodError: - h.lggr.Errorw("unsupported method", "requestID", req.ID, "method", req.Method, "error", err.Error()) - err = fmt.Errorf("unsupported method(%s): %w", req.Method, err) - case api.UserMessageParseError: - h.lggr.Errorw("user message parse error", "requestID", req.ID, "error", err.Error()) - err = fmt.Errorf("user message parse error: %w", err) - case api.NoError: - case api.UnsupportedDONIdError: - case api.HandlerError: - case api.RequestTimeoutError: - case api.StaleNodeResponseError: - case api.ConflictError: - case api.LimitExceededError: - } - - payload = gwhandlers.UserCallbackPayload{ - RawResponse: h.codec.EncodeNewErrorResponse( - req.ID, - api.ToJSONRPCErrorCode(errorCode), - err.Error(), - nil, - ), - ErrorCode: errorCode, - } - } - - h.recordMetrics(ctx, errorCode) +func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, payload gwhandlers.UserCallbackPayload) error { + h.recordMetrics(ctx, payload.ErrorCode) sendErr := ar.SendResponse(payload) h.mu.Lock() @@ -410,24 +359,17 @@ func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, return sendErr } - h.lggr.Debugw("response sent to user", "requestID", ar.req.ID, "errorCode", errorCode) + h.lggr.Debugw("response sent to user", "requestID", ar.req.ID, "errorCode", payload.ErrorCode) return nil } func (h *handler) recordMetrics(ctx context.Context, errorCode api.ErrorCode) { switch errorCode { - case api.StaleNodeResponseError: - case api.FatalError: - case api.NodeReponseEncodingError: - case api.RequestTimeoutError: case api.HandlerError: h.metrics.requestInternalError.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), attribute.String("error", errorCode.String()), )) - case api.InvalidParamsError: - case api.UnsupportedMethodError: - case api.UserMessageParseError: case api.UnsupportedDONIdError: h.metrics.requestUserError.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), @@ -436,7 +378,27 @@ func (h *handler) recordMetrics(ctx context.Context, errorCode api.ErrorCode) { h.metrics.requestSuccess.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), )) - case api.ConflictError: - case api.LimitExceededError: + } +} + +func (h *handler) constructErrorResponse(req jsonrpc.Request[json.RawMessage], errorCode api.ErrorCode, err error) gwhandlers.UserCallbackPayload { + switch errorCode { + case api.NodeReponseEncodingError: + err = errors.New(errorCode.String()) + case api.InvalidParamsError: + err = fmt.Errorf("invalid params error: %w", err) + case api.UnsupportedMethodError: + err = fmt.Errorf("unsupported method(%s): %w", req.Method, err) + case api.UserMessageParseError: + err = fmt.Errorf("user message parse error: %w", err) + } + return gwhandlers.UserCallbackPayload{ + RawResponse: h.codec.EncodeNewErrorResponse( + req.ID, + api.ToJSONRPCErrorCode(errorCode), + err.Error(), + nil, + ), + ErrorCode: errorCode, } } From 78a2b67793c35534a42fc95b5cd1b8b810dc4bce Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Tue, 31 Mar 2026 12:01:27 +0200 Subject: [PATCH 17/25] Fix exhaustive lint: restore missing switch cases in recordMetrics and constructErrorResponse --- .../handlers/confidentialrelay/handler.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index f0739a3cd23..e123da52307 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -365,11 +365,18 @@ func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, func (h *handler) recordMetrics(ctx context.Context, errorCode api.ErrorCode) { switch errorCode { + case api.StaleNodeResponseError: + case api.FatalError: + case api.NodeReponseEncodingError: + case api.RequestTimeoutError: case api.HandlerError: h.metrics.requestInternalError.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), attribute.String("error", errorCode.String()), )) + case api.InvalidParamsError: + case api.UnsupportedMethodError: + case api.UserMessageParseError: case api.UnsupportedDONIdError: h.metrics.requestUserError.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), @@ -378,6 +385,8 @@ func (h *handler) recordMetrics(ctx context.Context, errorCode api.ErrorCode) { h.metrics.requestSuccess.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), )) + case api.ConflictError: + case api.LimitExceededError: } } @@ -391,6 +400,14 @@ func (h *handler) constructErrorResponse(req jsonrpc.Request[json.RawMessage], e err = fmt.Errorf("unsupported method(%s): %w", req.Method, err) case api.UserMessageParseError: err = fmt.Errorf("user message parse error: %w", err) + case api.NoError: + case api.UnsupportedDONIdError: + case api.HandlerError: + case api.FatalError: + case api.RequestTimeoutError: + case api.StaleNodeResponseError: + case api.ConflictError: + case api.LimitExceededError: } return gwhandlers.UserCallbackPayload{ RawResponse: h.codec.EncodeNewErrorResponse( From 71b884c58befe74ac5c7adf81562ff3909659a79 Mon Sep 17 00:00:00 2001 From: pavel-raykov Date: Tue, 31 Mar 2026 13:04:35 +0200 Subject: [PATCH 18/25] Suppress exhaustive switch warning. --- .../handlers/confidentialrelay/handler.go | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index e123da52307..3a1007dbaaa 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -364,19 +364,13 @@ func (h *handler) sendResponseAndCleanup(ctx context.Context, ar *activeRequest, } func (h *handler) recordMetrics(ctx context.Context, errorCode api.ErrorCode) { + //nolint:exhaustive // do not record other errors switch errorCode { - case api.StaleNodeResponseError: - case api.FatalError: - case api.NodeReponseEncodingError: - case api.RequestTimeoutError: case api.HandlerError: h.metrics.requestInternalError.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), attribute.String("error", errorCode.String()), )) - case api.InvalidParamsError: - case api.UnsupportedMethodError: - case api.UserMessageParseError: case api.UnsupportedDONIdError: h.metrics.requestUserError.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), @@ -385,12 +379,11 @@ func (h *handler) recordMetrics(ctx context.Context, errorCode api.ErrorCode) { h.metrics.requestSuccess.Add(ctx, 1, metric.WithAttributes( attribute.String("don_id", h.donConfig.DonId), )) - case api.ConflictError: - case api.LimitExceededError: } } func (h *handler) constructErrorResponse(req jsonrpc.Request[json.RawMessage], errorCode api.ErrorCode, err error) gwhandlers.UserCallbackPayload { + //nolint:exhaustive // do not modify other error codes switch errorCode { case api.NodeReponseEncodingError: err = errors.New(errorCode.String()) @@ -400,14 +393,6 @@ func (h *handler) constructErrorResponse(req jsonrpc.Request[json.RawMessage], e err = fmt.Errorf("unsupported method(%s): %w", req.Method, err) case api.UserMessageParseError: err = fmt.Errorf("user message parse error: %w", err) - case api.NoError: - case api.UnsupportedDONIdError: - case api.HandlerError: - case api.FatalError: - case api.RequestTimeoutError: - case api.StaleNodeResponseError: - case api.ConflictError: - case api.LimitExceededError: } return gwhandlers.UserCallbackPayload{ RawResponse: h.codec.EncodeNewErrorResponse( From 981c2c02b0b17ebfc0ce32f77e0e7a2b34b8ddf4 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Wed, 1 Apr 2026 13:44:47 +0200 Subject: [PATCH 19/25] fan out relay requests to don nodes concurrently --- .../handlers/confidentialrelay/handler.go | 28 ++++-- .../confidentialrelay/handler_test.go | 85 +++++++++++++++++++ 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 3a1007dbaaa..05799529d67 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -12,6 +12,7 @@ import ( "github.com/jonboulle/clockwork" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" + "golang.org/x/sync/errgroup" "github.com/smartcontractkit/chainlink-common/pkg/beholder" relaytypes "github.com/smartcontractkit/chainlink-common/pkg/capabilities/v2/actions/confidentialrelay" @@ -325,16 +326,29 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ } func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *activeRequest) error { - var nodeErrors []error + var ( + group errgroup.Group + nodeErrors int + nodeErrorsMu sync.Mutex + ) + for _, node := range h.donConfig.Members { - err := h.don.SendToNode(ctx, node.Address, &ar.req) - if err != nil { - nodeErrors = append(nodeErrors, err) - l.Errorw("error sending request to node", "node", node.Address, "error", err) - } + node := node + group.Go(func() error { + err := h.don.SendToNode(ctx, node.Address, &ar.req) + if err != nil { + nodeErrorsMu.Lock() + nodeErrors++ + nodeErrorsMu.Unlock() + l.Errorw("error sending request to node", "node", node.Address, "error", err) + } + return nil + }) } - if len(nodeErrors) == len(h.donConfig.Members) && len(nodeErrors) > 0 { + _ = group.Wait() + + if nodeErrors == len(h.donConfig.Members) && nodeErrors > 0 { return h.sendResponseAndCleanup(ctx, ar, h.constructErrorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"))) } diff --git a/core/services/gateway/handlers/confidentialrelay/handler_test.go b/core/services/gateway/handlers/confidentialrelay/handler_test.go index efc6e2c1689..7d3f3056d65 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler_test.go +++ b/core/services/gateway/handlers/confidentialrelay/handler_test.go @@ -25,6 +25,38 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/mocks" ) +type barrierDON struct { + total int + mu sync.Mutex + started int + allStarted chan struct{} + releaseOnce sync.Once +} + +func newBarrierDON(total int) *barrierDON { + return &barrierDON{ + total: total, + allStarted: make(chan struct{}), + } +} + +func (d *barrierDON) SendToNode(_ context.Context, _ string, _ *jsonrpc.Request[json.RawMessage]) error { + d.mu.Lock() + d.started++ + if d.started == d.total { + d.releaseOnce.Do(func() { close(d.allStarted) }) + } + ch := d.allStarted + d.mu.Unlock() + + <-ch + return nil +} + +func (d *barrierDON) forceRelease() { + d.releaseOnce.Do(func() { close(d.allStarted) }) +} + var nodeOne = config.NodeConfig{ Name: "node1", Address: "0x1234", @@ -495,6 +527,59 @@ func TestConfidentialRelayHandler_AllNodesFanOutFail(t *testing.T) { wg.Wait() } +func TestConfidentialRelayHandler_FanOutToNodes_IsConcurrent(t *testing.T) { + lggr := logger.Test(t) + don := newBarrierDON(2) + donConfig := &config.DONConfig{ + DonId: "test_relay_don", + F: 1, + Members: []config.NodeConfig{ + {Name: "node0", Address: "0x0000"}, + {Name: "node1", Address: "0x0001"}, + }, + } + + methodConfig, err := json.Marshal(Config{ + RequestTimeoutSec: 30, + NodeRateLimiter: ratelimit.RateLimiterConfig{ + GlobalRPS: 100, + GlobalBurst: 100, + PerSenderRPS: 10, + PerSenderBurst: 10, + }, + }) + require.NoError(t, err) + + h, err := NewHandler(methodConfig, donConfig, don, lggr, clockwork.NewFakeClock()) + require.NoError(t, err) + + cb := common.NewCallback() + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-concurrent-fanout", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + done := make(chan error, 1) + go func() { + done <- h.HandleJSONRPCUserMessage(t.Context(), req, cb) + }() + + select { + case err := <-done: + require.NoError(t, err) + case <-time.After(100 * time.Millisecond): + don.forceRelease() + t.Fatal("HandleJSONRPCUserMessage did not fan out to nodes concurrently") + } + + don.mu.Lock() + started := don.started + don.mu.Unlock() + assert.Equal(t, 2, started) +} + func TestConfidentialRelayHandler_CapabilityExecMethod(t *testing.T) { h, cb, don, _ := setupHandler(t, 4) h.aggregator = &respondingMockAggregator{} From ebbee318d56aa44a49d6a1192cbc72912c5c6de4 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Wed, 1 Apr 2026 15:05:58 +0200 Subject: [PATCH 20/25] Clean up confidential relay concurrency test helper --- .../confidentialrelay/handler_test.go | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler_test.go b/core/services/gateway/handlers/confidentialrelay/handler_test.go index 7d3f3056d65..7de5705e42c 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler_test.go +++ b/core/services/gateway/handlers/confidentialrelay/handler_test.go @@ -40,7 +40,7 @@ func newBarrierDON(total int) *barrierDON { } } -func (d *barrierDON) SendToNode(_ context.Context, _ string, _ *jsonrpc.Request[json.RawMessage]) error { +func (d *barrierDON) SendToNode(ctx context.Context, _ string, _ *jsonrpc.Request[json.RawMessage]) error { d.mu.Lock() d.started++ if d.started == d.total { @@ -49,12 +49,12 @@ func (d *barrierDON) SendToNode(_ context.Context, _ string, _ *jsonrpc.Request[ ch := d.allStarted d.mu.Unlock() - <-ch - return nil -} - -func (d *barrierDON) forceRelease() { - d.releaseOnce.Do(func() { close(d.allStarted) }) + select { + case <-ch: + return nil + case <-ctx.Done(): + return ctx.Err() + } } var nodeOne = config.NodeConfig{ @@ -561,16 +561,20 @@ func TestConfidentialRelayHandler_FanOutToNodes_IsConcurrent(t *testing.T) { Params: ¶ms, } + ctx, cancel := context.WithCancel(t.Context()) + defer cancel() + done := make(chan error, 1) go func() { - done <- h.HandleJSONRPCUserMessage(t.Context(), req, cb) + done <- h.HandleJSONRPCUserMessage(ctx, req, cb) }() select { case err := <-done: require.NoError(t, err) case <-time.After(100 * time.Millisecond): - don.forceRelease() + cancel() + <-done t.Fatal("HandleJSONRPCUserMessage did not fan out to nodes concurrently") } From 28285475e4f5972b1cf5fce31a0044fbefebbca5 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Wed, 1 Apr 2026 18:18:20 +0200 Subject: [PATCH 21/25] Remove redundant loop variable copy in relay fanout --- core/services/gateway/handlers/confidentialrelay/handler.go | 1 - 1 file changed, 1 deletion(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 05799529d67..0028488646d 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -333,7 +333,6 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active ) for _, node := range h.donConfig.Members { - node := node group.Go(func() error { err := h.don.SendToNode(ctx, node.Address, &ar.req) if err != nil { From 2c01583568e821137732b7cc4ccfbfc05a956880 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Thu, 2 Apr 2026 13:37:39 +0200 Subject: [PATCH 22/25] Use atomic counter for relay node send errors --- .../gateway/handlers/confidentialrelay/handler.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 0028488646d..e4c3f4ba0fc 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -7,6 +7,7 @@ import ( "fmt" "strconv" "sync" + "sync/atomic" "time" "github.com/jonboulle/clockwork" @@ -327,18 +328,15 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *activeRequest) error { var ( - group errgroup.Group - nodeErrors int - nodeErrorsMu sync.Mutex + group errgroup.Group + nodeErrors atomic.Uint32 ) for _, node := range h.donConfig.Members { group.Go(func() error { err := h.don.SendToNode(ctx, node.Address, &ar.req) if err != nil { - nodeErrorsMu.Lock() - nodeErrors++ - nodeErrorsMu.Unlock() + nodeErrors.Add(1) l.Errorw("error sending request to node", "node", node.Address, "error", err) } return nil @@ -347,7 +345,8 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active _ = group.Wait() - if nodeErrors == len(h.donConfig.Members) && nodeErrors > 0 { + numNodeErrors := nodeErrors.Load() + if numNodeErrors == uint32(len(h.donConfig.Members)) && numNodeErrors > 0 { return h.sendResponseAndCleanup(ctx, ar, h.constructErrorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"))) } From 40fb4833ed6a403d099bbb3263e1a322fd20ab00 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Thu, 2 Apr 2026 14:55:02 +0200 Subject: [PATCH 23/25] use cre settings for relay gateway rate limits --- core/services/gateway/handler_factory.go | 2 +- .../handlers/confidentialrelay/handler.go | 72 +++++++++++++------ .../confidentialrelay/handler_test.go | 33 +++------ deployment/cre/jobs/pkg/gateway_job.go | 9 +-- deployment/cre/jobs/pkg/gateway_job_test.go | 10 +++ deployment/go.mod | 4 +- deployment/go.sum | 8 +-- go.mod | 4 +- go.sum | 8 +-- 9 files changed, 84 insertions(+), 66 deletions(-) diff --git a/core/services/gateway/handler_factory.go b/core/services/gateway/handler_factory.go index 5078a9ec6ac..c2338503eb7 100644 --- a/core/services/gateway/handler_factory.go +++ b/core/services/gateway/handler_factory.go @@ -90,7 +90,7 @@ func (hf *handlerFactory) NewHandler( requestAuthorizer := vaultcap.NewRequestAuthorizer(hf.lggr, hf.workflowRegistrySyncer) return vault.NewHandler(handlerConfig, donConfig, don, hf.capabilitiesRegistry, requestAuthorizer, hf.lggr, clockwork.NewRealClock(), hf.lf) case ConfidentialRelayHandlerType: - return confidentialrelay.NewHandler(handlerConfig, donConfig, don, hf.lggr, clockwork.NewRealClock()) + return confidentialrelay.NewHandler(handlerConfig, donConfig, don, hf.lggr, clockwork.NewRealClock(), hf.lf) default: return nil, fmt.Errorf("unsupported handler type %s", handlerType) } diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index e4c3f4ba0fc..4048c9c02bf 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -19,8 +19,9 @@ import ( relaytypes "github.com/smartcontractkit/chainlink-common/pkg/capabilities/v2/actions/confidentialrelay" jsonrpc "github.com/smartcontractkit/chainlink-common/pkg/jsonrpc2" "github.com/smartcontractkit/chainlink-common/pkg/logger" - "github.com/smartcontractkit/chainlink-common/pkg/ratelimit" "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink-common/pkg/settings/cresettings" + "github.com/smartcontractkit/chainlink-common/pkg/settings/limits" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/api" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/config" gwhandlers "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers" @@ -113,8 +114,7 @@ type relayAggregator interface { } type Config struct { - NodeRateLimiter ratelimit.RateLimiterConfig `json:"nodeRateLimiter"` - RequestTimeoutSec int `json:"requestTimeoutSec"` + RequestTimeoutSec int `json:"requestTimeoutSec"` } type handler struct { @@ -126,8 +126,9 @@ type handler struct { mu sync.RWMutex stopCh services.StopChan - nodeRateLimiter *ratelimit.RateLimiter - requestTimeout time.Duration + globalNodeRateLimiter limits.RateLimiter + perNodeRateLimiters map[string]limits.RateLimiter + requestTimeout time.Duration activeRequests map[string]*activeRequest metrics *metrics @@ -145,7 +146,7 @@ func (h *handler) Name() string { return h.lggr.Name() } -func NewHandler(methodConfig json.RawMessage, donConfig *config.DONConfig, don gwhandlers.DON, lggr logger.Logger, clock clockwork.Clock) (*handler, error) { +func NewHandler(methodConfig json.RawMessage, donConfig *config.DONConfig, don gwhandlers.DON, lggr logger.Logger, clock clockwork.Clock, limitsFactory limits.Factory) (*handler, error) { var cfg Config if err := json.Unmarshal(methodConfig, &cfg); err != nil { return nil, fmt.Errorf("failed to unmarshal method config: %w", err) @@ -155,9 +156,18 @@ func NewHandler(methodConfig json.RawMessage, donConfig *config.DONConfig, don g cfg.RequestTimeoutSec = 30 } - nodeRateLimiter, err := ratelimit.NewRateLimiter(cfg.NodeRateLimiter) + globalNodeRateLimiter, err := limitsFactory.MakeRateLimiter(cresettings.Default.GatewayConfidentialRelayGlobalRate) if err != nil { - return nil, fmt.Errorf("failed to create node rate limiter: %w", err) + return nil, fmt.Errorf("failed to create global node rate limiter: %w", err) + } + + perNodeRateLimiters := make(map[string]limits.RateLimiter, len(donConfig.Members)) + for _, member := range donConfig.Members { + rl, makeErr := limitsFactory.MakeRateLimiter(cresettings.Default.GatewayConfidentialRelayPerNodeRate) + if makeErr != nil { + return nil, fmt.Errorf("failed to create per-node rate limiter for %s: %w", member.Address, makeErr) + } + perNodeRateLimiters[member.Address] = rl } metrics, err := newMetrics() @@ -166,17 +176,18 @@ func NewHandler(methodConfig json.RawMessage, donConfig *config.DONConfig, don g } return &handler{ - donConfig: donConfig, - don: don, - lggr: logger.Named(lggr, "ConfidentialRelayHandler:"+donConfig.DonId), - requestTimeout: time.Duration(cfg.RequestTimeoutSec) * time.Second, - nodeRateLimiter: nodeRateLimiter, - activeRequests: make(map[string]*activeRequest), - mu: sync.RWMutex{}, - stopCh: make(services.StopChan), - metrics: metrics, - aggregator: &aggregator{}, - clock: clock, + donConfig: donConfig, + don: don, + lggr: logger.Named(lggr, "ConfidentialRelayHandler:"+donConfig.DonId), + requestTimeout: time.Duration(cfg.RequestTimeoutSec) * time.Second, + globalNodeRateLimiter: globalNodeRateLimiter, + perNodeRateLimiters: perNodeRateLimiters, + activeRequests: make(map[string]*activeRequest), + mu: sync.RWMutex{}, + stopCh: make(services.StopChan), + metrics: metrics, + aggregator: &aggregator{}, + clock: clock, }, nil } @@ -205,7 +216,14 @@ func (h *handler) Close() error { return h.StopOnce("ConfidentialRelayHandler", func() error { h.lggr.Info("closing confidential relay handler") close(h.stopCh) - return nil + var err error + if h.globalNodeRateLimiter != nil { + err = errors.Join(err, h.globalNodeRateLimiter.Close()) + } + for _, rl := range h.perNodeRateLimiters { + err = errors.Join(err, rl.Close()) + } + return err }) } @@ -284,10 +302,18 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ l := logger.With(h.lggr, "method", resp.Method, "requestID", resp.ID, "nodeAddr", nodeAddr) l.Debugw("handling node response") - if !h.nodeRateLimiter.Allow(nodeAddr) { + nodeRateLimiter, ok := h.perNodeRateLimiters[nodeAddr] + if !ok { + return fmt.Errorf("received message from unexpected node %s", nodeAddr) + } + if !nodeRateLimiter.Allow(ctx) { l.Debugw("node is rate limited", "nodeAddr", nodeAddr) return nil } + if !h.globalNodeRateLimiter.Allow(ctx) { + l.Debug("global relay rate limit exceeded") + return nil + } ar := h.getActiveRequest(resp.ID) if ar == nil { @@ -295,8 +321,8 @@ func (h *handler) HandleNodeMessage(ctx context.Context, resp *jsonrpc.Response[ return nil } - ok := ar.addResponseForNode(nodeAddr, resp) - if !ok { + added := ar.addResponseForNode(nodeAddr, resp) + if !added { l.Errorw("duplicate response from node, ignoring", "nodeAddr", nodeAddr) return nil } diff --git a/core/services/gateway/handlers/confidentialrelay/handler_test.go b/core/services/gateway/handlers/confidentialrelay/handler_test.go index 7de5705e42c..0183b1cafdb 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler_test.go +++ b/core/services/gateway/handlers/confidentialrelay/handler_test.go @@ -14,10 +14,12 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "golang.org/x/time/rate" jsonrpc "github.com/smartcontractkit/chainlink-common/pkg/jsonrpc2" "github.com/smartcontractkit/chainlink-common/pkg/logger" - "github.com/smartcontractkit/chainlink-common/pkg/ratelimit" + "github.com/smartcontractkit/chainlink-common/pkg/settings/cresettings" + "github.com/smartcontractkit/chainlink-common/pkg/settings/limits" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/api" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/config" @@ -82,18 +84,13 @@ func setupHandler(t *testing.T, numNodes int) (*handler, *common.Callback, *mock } handlerConfig := Config{ RequestTimeoutSec: 30, - NodeRateLimiter: ratelimit.RateLimiterConfig{ - GlobalRPS: 100, - GlobalBurst: 100, - PerSenderRPS: 10, - PerSenderBurst: 10, - }, } methodConfig, err := json.Marshal(handlerConfig) require.NoError(t, err) clock := clockwork.NewFakeClock() - h, err := NewHandler(methodConfig, donConfig, don, lggr, clock) + limitsFactory := limits.Factory{Settings: cresettings.DefaultGetter, Logger: lggr} + h, err := NewHandler(methodConfig, donConfig, don, lggr, clock, limitsFactory) require.NoError(t, err) h.aggregator = &mockAggregator{} cb := common.NewCallback() @@ -394,12 +391,6 @@ func TestConfidentialRelayHandler_DuplicateRequestID(t *testing.T) { func TestConfidentialRelayHandler_RateLimitedNode(t *testing.T) { handlerConfig := Config{ RequestTimeoutSec: 30, - NodeRateLimiter: ratelimit.RateLimiterConfig{ - GlobalRPS: 100, - GlobalBurst: 100, - PerSenderRPS: 0.001, // Effectively zero - PerSenderBurst: 1, - }, } methodConfig, err := json.Marshal(handlerConfig) require.NoError(t, err) @@ -412,9 +403,12 @@ func TestConfidentialRelayHandler_RateLimitedNode(t *testing.T) { Members: []config.NodeConfig{nodeOne}, } clock := clockwork.NewFakeClock() - h, err := NewHandler(methodConfig, donConfig, don, lggr, clock) + limitsFactory := limits.Factory{Settings: cresettings.DefaultGetter, Logger: lggr} + h, err := NewHandler(methodConfig, donConfig, don, lggr, clock, limitsFactory) require.NoError(t, err) h.aggregator = &respondingMockAggregator{} + h.globalNodeRateLimiter = limits.GlobalRateLimiter(rate.Limit(100), 100) + h.perNodeRateLimiters[nodeOne.Address] = limits.GlobalRateLimiter(rate.Limit(0.001), 1) don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return(nil) @@ -541,16 +535,11 @@ func TestConfidentialRelayHandler_FanOutToNodes_IsConcurrent(t *testing.T) { methodConfig, err := json.Marshal(Config{ RequestTimeoutSec: 30, - NodeRateLimiter: ratelimit.RateLimiterConfig{ - GlobalRPS: 100, - GlobalBurst: 100, - PerSenderRPS: 10, - PerSenderBurst: 10, - }, }) require.NoError(t, err) - h, err := NewHandler(methodConfig, donConfig, don, lggr, clockwork.NewFakeClock()) + limitsFactory := limits.Factory{Settings: cresettings.DefaultGetter, Logger: lggr} + h, err := NewHandler(methodConfig, donConfig, don, lggr, clockwork.NewFakeClock(), limitsFactory) require.NoError(t, err) cb := common.NewCallback() diff --git a/deployment/cre/jobs/pkg/gateway_job.go b/deployment/cre/jobs/pkg/gateway_job.go index cca265a0bb6..3bc6828cac1 100644 --- a/deployment/cre/jobs/pkg/gateway_job.go +++ b/deployment/cre/jobs/pkg/gateway_job.go @@ -457,8 +457,7 @@ func newDefaultHTTPCapabilitiesHandler() handler { } type confidentialRelayHandlerConfig struct { - RequestTimeoutSec int `toml:"requestTimeoutSec"` - NodeRateLimiter nodeRateLimiterConfig `toml:"NodeRateLimiter"` + RequestTimeoutSec int `toml:"requestTimeoutSec"` } func newDefaultConfidentialRelayHandler(requestTimeoutSec int) handler { @@ -467,12 +466,6 @@ func newDefaultConfidentialRelayHandler(requestTimeoutSec int) handler { ServiceName: ServiceNameConfidential, Config: confidentialRelayHandlerConfig{ RequestTimeoutSec: requestTimeoutSec, - NodeRateLimiter: nodeRateLimiterConfig{ - GlobalBurst: 10, - GlobalRPS: 50, - PerSenderBurst: 10, - PerSenderRPS: 10, - }, }, } } diff --git a/deployment/cre/jobs/pkg/gateway_job_test.go b/deployment/cre/jobs/pkg/gateway_job_test.go index 90d438284b3..d10f56b2df1 100644 --- a/deployment/cre/jobs/pkg/gateway_job_test.go +++ b/deployment/cre/jobs/pkg/gateway_job_test.go @@ -30,6 +30,16 @@ func TestGateway_Validate_ServiceCentric(t *testing.T) { require.ErrorContains(t, g.Validate(), "must provide at least one service") } +func TestNewDefaultConfidentialRelayHandler(t *testing.T) { + t.Parallel() + + got := newDefaultConfidentialRelayHandler(14) + + assert.Equal(t, GatewayHandlerTypeConfidentialRelay, got.Name) + assert.Equal(t, ServiceNameConfidential, got.ServiceName) + assert.Equal(t, confidentialRelayHandlerConfig{RequestTimeoutSec: 14}, got.Config) +} + const ( expected = `type = 'gateway' schemaVersion = 1 diff --git a/deployment/go.mod b/deployment/go.mod index e84ed0b4b2b..550136bfd35 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -44,7 +44,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326 - github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 + github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260330133421-5151ea0c3b05 @@ -233,7 +233,7 @@ require ( github.com/felixge/httpsnoop v1.0.4 // indirect github.com/ferranbt/fastssz v0.1.4 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect - github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/gabriel-vasile/mimetype v1.4.13 // indirect github.com/gagliardetto/treeout v0.1.4 // indirect github.com/gagliardetto/utilz v0.1.3 // indirect diff --git a/deployment/go.sum b/deployment/go.sum index 96952eb652c..ee90c55a682 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -519,8 +519,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4 github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM= github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= github.com/gagliardetto/anchor-go v1.0.0 h1:YNt9I/9NOrNzz5uuzfzByAcbp39Ft07w63iPqC/wi34= @@ -1387,8 +1387,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2 h1:5HdH/A6yn8INZAltYDLb7UkUi5IKemhJzJkDW4Bgxyg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2/go.mod h1:wDHq2E0KwUWG0lQ9f5frW1a7CKVW17MJLPuvKmtSRDg= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 h1:yzkeWzWoPTbpDvVIz0ohmNVqAkvE8UwuLqqcUt47gYk= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843/go.mod h1:6tlxlsiWypGdpaZI+Kz5gFm53gCAcU/pTU3PR9CiFB8= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 h1:oaXslIvcy5HD3zkWhx3nu8vRGdWGedYJ+XCsBD8mYkA= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6/go.mod h1:Ea94/OgfFPRTByGO2Qo+uZ7/4sWhwE2HKu7dDwdojME= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/go.mod b/go.mod index 85ee2f01b1a..5d54ffc1011 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/esote/minmaxheap v1.0.0 github.com/ethereum/go-ethereum v1.17.1 github.com/fatih/color v1.18.0 - github.com/fxamacker/cbor/v2 v2.7.0 + github.com/fxamacker/cbor/v2 v2.9.0 github.com/gagliardetto/binary v0.8.0 github.com/gagliardetto/solana-go v1.13.0 github.com/getsentry/sentry-go v0.27.0 @@ -85,7 +85,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5 github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2 - github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 + github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 github.com/smartcontractkit/chainlink-data-streams v0.1.13 diff --git a/go.sum b/go.sum index 528e58b8358..468bdf0820d 100644 --- a/go.sum +++ b/go.sum @@ -425,8 +425,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4 github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0= github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= github.com/gagliardetto/anchor-go v1.0.0 h1:YNt9I/9NOrNzz5uuzfzByAcbp39Ft07w63iPqC/wi34= @@ -1235,8 +1235,8 @@ github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5/go.mod h1:xtZNi6pOKdC3sLvokDvXOhgHzT+cyBqH/gWwvxTxqrg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2 h1:5HdH/A6yn8INZAltYDLb7UkUi5IKemhJzJkDW4Bgxyg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2/go.mod h1:wDHq2E0KwUWG0lQ9f5frW1a7CKVW17MJLPuvKmtSRDg= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 h1:yzkeWzWoPTbpDvVIz0ohmNVqAkvE8UwuLqqcUt47gYk= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843/go.mod h1:6tlxlsiWypGdpaZI+Kz5gFm53gCAcU/pTU3PR9CiFB8= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 h1:oaXslIvcy5HD3zkWhx3nu8vRGdWGedYJ+XCsBD8mYkA= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6/go.mod h1:Ea94/OgfFPRTByGO2Qo+uZ7/4sWhwE2HKu7dDwdojME= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= From 7d1d54f566647b3b3644d9a17d193247f97da092 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Thu, 2 Apr 2026 15:05:37 +0200 Subject: [PATCH 24/25] Fail fast when relay quorum becomes impossible --- .../handlers/confidentialrelay/handler.go | 3 +- .../confidentialrelay/handler_test.go | 71 +++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/core/services/gateway/handlers/confidentialrelay/handler.go b/core/services/gateway/handlers/confidentialrelay/handler.go index 4048c9c02bf..3d99174da44 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler.go +++ b/core/services/gateway/handlers/confidentialrelay/handler.go @@ -372,7 +372,8 @@ func (h *handler) fanOutToNodes(ctx context.Context, l logger.Logger, ar *active _ = group.Wait() numNodeErrors := nodeErrors.Load() - if numNodeErrors == uint32(len(h.donConfig.Members)) && numNodeErrors > 0 { + remainingPossibleResponses := len(h.donConfig.Members) - int(numNodeErrors) + if remainingPossibleResponses < h.donConfig.F+1 && numNodeErrors > 0 { return h.sendResponseAndCleanup(ctx, ar, h.constructErrorResponse(ar.req, api.FatalError, errors.New("failed to forward user request to nodes"))) } diff --git a/core/services/gateway/handlers/confidentialrelay/handler_test.go b/core/services/gateway/handlers/confidentialrelay/handler_test.go index 0183b1cafdb..0cf3537405d 100644 --- a/core/services/gateway/handlers/confidentialrelay/handler_test.go +++ b/core/services/gateway/handlers/confidentialrelay/handler_test.go @@ -521,6 +521,77 @@ func TestConfidentialRelayHandler_AllNodesFanOutFail(t *testing.T) { wg.Wait() } +func TestConfidentialRelayHandler_FanOutWaitsWhileQuorumStillPossible(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return( + func(_ context.Context, nodeAddress string, _ *jsonrpc.Request[json.RawMessage]) error { + switch nodeAddress { + case "0x0000", "0x0001": + return errors.New("connection refused") + default: + return nil + } + }, + ) + + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-still-possible", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + + require.NotNil(t, h.getActiveRequest(req.ID), "request should remain active while quorum is still possible") + + ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond) + defer cancel() + _, err = cb.Wait(ctx) + require.Error(t, err) +} + +func TestConfidentialRelayHandler_FanOutFailsWhenQuorumBecomesImpossible(t *testing.T) { + h, cb, don, _ := setupHandler(t, 4) + don.On("SendToNode", mock.Anything, mock.Anything, mock.Anything).Return( + func(_ context.Context, nodeAddress string, _ *jsonrpc.Request[json.RawMessage]) error { + switch nodeAddress { + case "0x0000", "0x0001", "0x0002": + return errors.New("connection refused") + default: + return nil + } + }, + ) + + params := json.RawMessage(`{"workflow_id":"wf1"}`) + req := jsonrpc.Request[json.RawMessage]{ + ID: "req-quorum-impossible", + Method: MethodCapabilityExec, + Params: ¶ms, + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + resp, err := cb.Wait(t.Context()) + assert.NoError(t, err) + assert.Equal(t, api.FatalError, resp.ErrorCode) + var jsonResp jsonrpc.Response[json.RawMessage] + err = json.Unmarshal(resp.RawResponse, &jsonResp) + assert.NoError(t, err) + assert.Contains(t, jsonResp.Error.Message, "failed to forward user request to nodes") + }() + + err := h.HandleJSONRPCUserMessage(t.Context(), req, cb) + require.NoError(t, err) + wg.Wait() + + require.Nil(t, h.getActiveRequest(req.ID), "request should be cleaned up once quorum is impossible") +} + func TestConfidentialRelayHandler_FanOutToNodes_IsConcurrent(t *testing.T) { lggr := logger.Test(t) don := newBarrierDON(2) From 2c460ea1d6dfed944ac3fb39664b174685bfe431 Mon Sep 17 00:00:00 2001 From: Tejaswi Nadahalli Date: Thu, 2 Apr 2026 17:45:02 +0200 Subject: [PATCH 25/25] Run gomodtidy to fix CI module drift --- core/scripts/go.mod | 2 +- core/scripts/go.sum | 4 ++-- integration-tests/go.mod | 4 ++-- integration-tests/go.sum | 8 ++++---- integration-tests/load/go.mod | 4 ++-- integration-tests/load/go.sum | 8 ++++---- system-tests/lib/go.mod | 2 +- system-tests/lib/go.sum | 4 ++-- system-tests/tests/go.mod | 2 +- system-tests/tests/go.sum | 4 ++-- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/core/scripts/go.mod b/core/scripts/go.mod index d4383bb6274..ec5bcfb4afa 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -45,7 +45,7 @@ require ( github.com/smartcontractkit/chain-selectors v1.0.97 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 - github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 + github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.13 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/core/scripts/go.sum b/core/scripts/go.sum index 576f3339ecd..ca1f837539f 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1634,8 +1634,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2 h1:5HdH/A6yn8INZAltYDLb7UkUi5IKemhJzJkDW4Bgxyg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2/go.mod h1:wDHq2E0KwUWG0lQ9f5frW1a7CKVW17MJLPuvKmtSRDg= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 h1:yzkeWzWoPTbpDvVIz0ohmNVqAkvE8UwuLqqcUt47gYk= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843/go.mod h1:6tlxlsiWypGdpaZI+Kz5gFm53gCAcU/pTU3PR9CiFB8= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 h1:oaXslIvcy5HD3zkWhx3nu8vRGdWGedYJ+XCsBD8mYkA= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6/go.mod h1:Ea94/OgfFPRTByGO2Qo+uZ7/4sWhwE2HKu7dDwdojME= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index 0e76e93edb5..f92893bdfb1 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -38,7 +38,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 + github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260330133421-5151ea0c3b05 @@ -213,7 +213,7 @@ require ( github.com/felixge/httpsnoop v1.0.4 // indirect github.com/ferranbt/fastssz v0.1.4 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect - github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/gabriel-vasile/mimetype v1.4.13 // indirect github.com/gagliardetto/anchor-go v1.0.0 // indirect github.com/gagliardetto/binary v0.8.0 // indirect diff --git a/integration-tests/go.sum b/integration-tests/go.sum index fc524bc8e82..30491f0cda2 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -504,8 +504,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4 github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM= github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= github.com/gagliardetto/anchor-go v1.0.0 h1:YNt9I/9NOrNzz5uuzfzByAcbp39Ft07w63iPqC/wi34= @@ -1374,8 +1374,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2 h1:5HdH/A6yn8INZAltYDLb7UkUi5IKemhJzJkDW4Bgxyg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2/go.mod h1:wDHq2E0KwUWG0lQ9f5frW1a7CKVW17MJLPuvKmtSRDg= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 h1:yzkeWzWoPTbpDvVIz0ohmNVqAkvE8UwuLqqcUt47gYk= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843/go.mod h1:6tlxlsiWypGdpaZI+Kz5gFm53gCAcU/pTU3PR9CiFB8= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 h1:oaXslIvcy5HD3zkWhx3nu8vRGdWGedYJ+XCsBD8mYkA= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6/go.mod h1:Ea94/OgfFPRTByGO2Qo+uZ7/4sWhwE2HKu7dDwdojME= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index 689db65f6a0..5dfdd0fa8e3 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -27,7 +27,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 + github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260330133421-5151ea0c3b05 github.com/smartcontractkit/chainlink-testing-framework/framework v0.15.3 @@ -208,7 +208,7 @@ require ( github.com/felixge/httpsnoop v1.0.4 // indirect github.com/ferranbt/fastssz v0.1.4 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect - github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/gabriel-vasile/mimetype v1.4.13 // indirect github.com/gagliardetto/anchor-go v1.0.0 // indirect github.com/gagliardetto/binary v0.8.0 // indirect diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index 8dae65f12b1..c25f5818c21 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -569,8 +569,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4 github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM= github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= github.com/gagliardetto/anchor-go v1.0.0 h1:YNt9I/9NOrNzz5uuzfzByAcbp39Ft07w63iPqC/wi34= @@ -1588,8 +1588,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2 h1:5HdH/A6yn8INZAltYDLb7UkUi5IKemhJzJkDW4Bgxyg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2/go.mod h1:wDHq2E0KwUWG0lQ9f5frW1a7CKVW17MJLPuvKmtSRDg= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 h1:yzkeWzWoPTbpDvVIz0ohmNVqAkvE8UwuLqqcUt47gYk= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843/go.mod h1:6tlxlsiWypGdpaZI+Kz5gFm53gCAcU/pTU3PR9CiFB8= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 h1:oaXslIvcy5HD3zkWhx3nu8vRGdWGedYJ+XCsBD8mYkA= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6/go.mod h1:Ea94/OgfFPRTByGO2Qo+uZ7/4sWhwE2HKu7dDwdojME= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/lib/go.mod b/system-tests/lib/go.mod index 56f655f6b68..3ec3065cb5c 100644 --- a/system-tests/lib/go.mod +++ b/system-tests/lib/go.mod @@ -35,7 +35,7 @@ require ( github.com/smartcontractkit/chain-selectors v1.0.97 github.com/smartcontractkit/chainlink-aptos v0.0.0-20260324144720-484863604698 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 + github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260330133421-5151ea0c3b05 diff --git a/system-tests/lib/go.sum b/system-tests/lib/go.sum index 20cce88dc92..2e7e1bf574e 100644 --- a/system-tests/lib/go.sum +++ b/system-tests/lib/go.sum @@ -1601,8 +1601,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2 h1:5HdH/A6yn8INZAltYDLb7UkUi5IKemhJzJkDW4Bgxyg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2/go.mod h1:wDHq2E0KwUWG0lQ9f5frW1a7CKVW17MJLPuvKmtSRDg= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 h1:yzkeWzWoPTbpDvVIz0ohmNVqAkvE8UwuLqqcUt47gYk= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843/go.mod h1:6tlxlsiWypGdpaZI+Kz5gFm53gCAcU/pTU3PR9CiFB8= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 h1:oaXslIvcy5HD3zkWhx3nu8vRGdWGedYJ+XCsBD8mYkA= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6/go.mod h1:Ea94/OgfFPRTByGO2Qo+uZ7/4sWhwE2HKu7dDwdojME= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/tests/go.mod b/system-tests/tests/go.mod index 4c2ef6c9094..bb7136e9246 100644 --- a/system-tests/tests/go.mod +++ b/system-tests/tests/go.mod @@ -62,7 +62,7 @@ require ( github.com/rs/zerolog v1.34.0 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chain-selectors v1.0.97 - github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 + github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.13 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/system-tests/tests/go.sum b/system-tests/tests/go.sum index 1481b3e768b..6df70e6640e 100644 --- a/system-tests/tests/go.sum +++ b/system-tests/tests/go.sum @@ -1785,8 +1785,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2 h1:5HdH/A6yn8INZAltYDLb7UkUi5IKemhJzJkDW4Bgxyg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260324000441-d4cfddc9f7d2/go.mod h1:wDHq2E0KwUWG0lQ9f5frW1a7CKVW17MJLPuvKmtSRDg= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843 h1:yzkeWzWoPTbpDvVIz0ohmNVqAkvE8UwuLqqcUt47gYk= -github.com/smartcontractkit/chainlink-common v0.11.2-0.20260331163339-a3c0d217e843/go.mod h1:6tlxlsiWypGdpaZI+Kz5gFm53gCAcU/pTU3PR9CiFB8= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6 h1:oaXslIvcy5HD3zkWhx3nu8vRGdWGedYJ+XCsBD8mYkA= +github.com/smartcontractkit/chainlink-common v0.11.2-0.20260402120824-48154c0c65a6/go.mod h1:Ea94/OgfFPRTByGO2Qo+uZ7/4sWhwE2HKu7dDwdojME= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4=