From 2af49130817d670c79faefb50ab69484755fc643 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 19 Mar 2026 15:14:39 -0400 Subject: [PATCH 1/9] Initial PoC --- .../beholder/durable_event_store_orm.go | 86 +++++++++++++++++++ .../migrations/0294_chip_durable_events.sql | 15 ++++ go.mod | 2 + go.sum | 2 - 4 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 core/services/beholder/durable_event_store_orm.go create mode 100644 core/store/migrate/migrations/0294_chip_durable_events.sql diff --git a/core/services/beholder/durable_event_store_orm.go b/core/services/beholder/durable_event_store_orm.go new file mode 100644 index 00000000000..1f393cc256e --- /dev/null +++ b/core/services/beholder/durable_event_store_orm.go @@ -0,0 +1,86 @@ +package beholder + +import ( + "context" + "fmt" + "time" + + "github.com/smartcontractkit/chainlink-common/pkg/beholder" + "github.com/smartcontractkit/chainlink-common/pkg/sqlutil" +) + +const chipDurableEventsTable = "cre.chip_durable_events" + +// PgDurableEventStore is a Postgres-backed implementation of beholder.DurableEventStore. +type PgDurableEventStore struct { + ds sqlutil.DataSource +} + +var _ beholder.DurableEventStore = (*PgDurableEventStore)(nil) + +func NewPgDurableEventStore(ds sqlutil.DataSource) *PgDurableEventStore { + return &PgDurableEventStore{ds: ds} +} + +func (s *PgDurableEventStore) Insert(ctx context.Context, payload []byte) (int64, error) { + const q = `INSERT INTO ` + chipDurableEventsTable + ` (payload) VALUES ($1) RETURNING id` + var id int64 + if err := s.ds.GetContext(ctx, &id, q, payload); err != nil { + return 0, fmt.Errorf("failed to insert chip durable event: %w", err) + } + return id, nil +} + +func (s *PgDurableEventStore) Delete(ctx context.Context, id int64) error { + const q = `DELETE FROM ` + chipDurableEventsTable + ` WHERE id = $1` + if _, err := s.ds.ExecContext(ctx, q, id); err != nil { + return fmt.Errorf("failed to delete chip durable event id=%d: %w", id, err) + } + return nil +} + +func (s *PgDurableEventStore) ListPending(ctx context.Context, createdBefore time.Time, limit int) ([]beholder.DurableEvent, error) { + const q = ` +SELECT id, payload, created_at +FROM ` + chipDurableEventsTable + ` +WHERE created_at < $1 +ORDER BY created_at ASC +LIMIT $2` + + type row struct { + ID int64 `db:"id"` + Payload []byte `db:"payload"` + CreatedAt time.Time `db:"created_at"` + } + + var rows []row + if err := s.ds.SelectContext(ctx, &rows, q, createdBefore, limit); err != nil { + return nil, fmt.Errorf("failed to list pending chip durable events: %w", err) + } + + out := make([]beholder.DurableEvent, 0, len(rows)) + for _, r := range rows { + out = append(out, beholder.DurableEvent{ + ID: r.ID, + Payload: r.Payload, + CreatedAt: r.CreatedAt, + }) + } + return out, nil +} + +func (s *PgDurableEventStore) DeleteExpired(ctx context.Context, ttl time.Duration) (int64, error) { + const q = ` +WITH deleted AS ( + DELETE FROM ` + chipDurableEventsTable + ` + WHERE created_at < now() - $1::interval + RETURNING id +) +SELECT count(*) FROM deleted` + + var count int64 + if err := s.ds.GetContext(ctx, &count, q, ttl.String()); err != nil { + return 0, fmt.Errorf("failed to delete expired chip durable events: %w", err) + } + return count, nil +} diff --git a/core/store/migrate/migrations/0294_chip_durable_events.sql b/core/store/migrate/migrations/0294_chip_durable_events.sql new file mode 100644 index 00000000000..f578699c39f --- /dev/null +++ b/core/store/migrate/migrations/0294_chip_durable_events.sql @@ -0,0 +1,15 @@ +-- +goose Up + +CREATE TABLE IF NOT EXISTS cre.chip_durable_events ( + id BIGSERIAL PRIMARY KEY, + payload BYTEA NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX idx_chip_durable_events_created_at + ON cre.chip_durable_events (created_at ASC); + +-- +goose Down + +DROP INDEX IF EXISTS cre.idx_chip_durable_events_created_at; +DROP TABLE IF EXISTS cre.chip_durable_events; diff --git a/go.mod b/go.mod index 77430cbacfa..03cd67f68b3 100644 --- a/go.mod +++ b/go.mod @@ -439,4 +439,6 @@ require ( replace github.com/fbsobreira/gotron-sdk => github.com/smartcontractkit/chainlink-tron/relayer/gotron-sdk v0.0.5-0.20260218133534-cbd44da2856b +replace github.com/smartcontractkit/chainlink-common => ../chainlink-common + tool github.com/smartcontractkit/chainlink-common/pkg/loop/cmd/loopinstall diff --git a/go.sum b/go.sum index 126dab1cb32..bcedc093dd5 100644 --- a/go.sum +++ b/go.sum @@ -1235,8 +1235,6 @@ github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5/go.mod h1:xtZNi6pOKdC3sLvokDvXOhgHzT+cyBqH/gWwvxTxqrg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 h1:nvv1kiv/7jwALkFztO//NhIq4Y9M4kmJ0UCgTZMC/qI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= From f112c7cacc2cf3d533d38d1e474fdf9992750a91 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Mon, 23 Mar 2026 13:22:58 -0400 Subject: [PATCH 2/9] Add tests --- core/config/telemetry_config.go | 1 + core/config/toml/types.go | 4 + core/scripts/go.mod | 2 +- core/scripts/go.sum | 4 +- .../beholder/chip_load_test_demo.pb.go | 169 +++ .../beholder/chip_load_test_demo.proto | 17 + .../beholder/durable_emitter_load_test.go | 1147 +++++++++++++++++ .../beholder/durable_event_store_orm_test.go | 363 ++++++ core/services/chainlink/application.go | 51 + core/services/chainlink/config_telemetry.go | 7 + ...vents.sql => 0295_chip_durable_events.sql} | 0 deployment/go.mod | 2 +- deployment/go.sum | 4 +- go.mod | 4 +- go.sum | 2 + integration-tests/go.mod | 2 +- integration-tests/go.sum | 4 +- integration-tests/load/go.mod | 2 +- integration-tests/load/go.sum | 4 +- system-tests/lib/cre/don/config/config.go | 2 + system-tests/lib/go.mod | 2 +- system-tests/lib/go.sum | 4 +- system-tests/tests/go.mod | 2 +- system-tests/tests/go.sum | 4 +- .../tests/smoke/cre/cre_suite_test.go | 10 + .../smoke/cre/v2_durable_emitter_test.go | 248 ++++ 26 files changed, 2040 insertions(+), 21 deletions(-) create mode 100644 core/services/beholder/chip_load_test_demo.pb.go create mode 100644 core/services/beholder/chip_load_test_demo.proto create mode 100644 core/services/beholder/durable_emitter_load_test.go create mode 100644 core/services/beholder/durable_event_store_orm_test.go rename core/store/migrate/migrations/{0294_chip_durable_events.sql => 0295_chip_durable_events.sql} (100%) create mode 100644 system-tests/tests/smoke/cre/v2_durable_emitter_test.go diff --git a/core/config/telemetry_config.go b/core/config/telemetry_config.go index db066f0d003..a0a175ff1a7 100644 --- a/core/config/telemetry_config.go +++ b/core/config/telemetry_config.go @@ -18,6 +18,7 @@ type Telemetry interface { EmitterExportTimeout() time.Duration ChipIngressEndpoint() string ChipIngressInsecureConnection() bool + DurableEmitterEnabled() bool HeartbeatInterval() time.Duration LogStreamingEnabled() bool LogLevel() zapcore.Level diff --git a/core/config/toml/types.go b/core/config/toml/types.go index 74984ec5db9..dca2976df90 100644 --- a/core/config/toml/types.go +++ b/core/config/toml/types.go @@ -2762,6 +2762,7 @@ type Telemetry struct { AuthHeadersTTL *commonconfig.Duration ChipIngressEndpoint *string ChipIngressInsecureConnection *bool + DurableEmitterEnabled *bool HeartbeatInterval *commonconfig.Duration LogLevel *string LogStreamingEnabled *bool @@ -2806,6 +2807,9 @@ func (b *Telemetry) setFrom(f *Telemetry) { if v := f.ChipIngressInsecureConnection; v != nil { b.ChipIngressInsecureConnection = v } + if v := f.DurableEmitterEnabled; v != nil { + b.DurableEmitterEnabled = v + } if v := f.HeartbeatInterval; v != nil { b.HeartbeatInterval = v } diff --git a/core/scripts/go.mod b/core/scripts/go.mod index 023e6862a1a..7292cac22ec 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -46,7 +46,7 @@ require ( github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.12 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/core/scripts/go.sum b/core/scripts/go.sum index fc6f5475c55..6820ee94cb5 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1634,8 +1634,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 h1:nvv1kiv/7jwALkFztO//NhIq4Y9M4kmJ0UCgTZMC/qI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/core/services/beholder/chip_load_test_demo.pb.go b/core/services/beholder/chip_load_test_demo.pb.go new file mode 100644 index 00000000000..3eb07682288 --- /dev/null +++ b/core/services/beholder/chip_load_test_demo.pb.go @@ -0,0 +1,169 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.11 +// protoc v5.29.3 +// source: chip_load_test_demo.proto + +package beholder + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// Used for testing +type DemoClientPayload struct { + state protoimpl.MessageState `protogen:"open.v1"` + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Domain string `protobuf:"bytes,2,opt,name=domain,proto3" json:"domain,omitempty"` + Entity string `protobuf:"bytes,3,opt,name=entity,proto3" json:"entity,omitempty"` + BatchNum int64 `protobuf:"varint,4,opt,name=batch_num,json=batchNum,proto3" json:"batch_num,omitempty"` + MessageNum int64 `protobuf:"varint,5,opt,name=message_num,json=messageNum,proto3" json:"message_num,omitempty"` + BatchPosition int64 `protobuf:"varint,6,opt,name=batch_position,json=batchPosition,proto3" json:"batch_position,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DemoClientPayload) Reset() { + *x = DemoClientPayload{} + mi := &file_chip_load_test_demo_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DemoClientPayload) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DemoClientPayload) ProtoMessage() {} + +func (x *DemoClientPayload) ProtoReflect() protoreflect.Message { + mi := &file_chip_load_test_demo_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DemoClientPayload.ProtoReflect.Descriptor instead. +func (*DemoClientPayload) Descriptor() ([]byte, []int) { + return file_chip_load_test_demo_proto_rawDescGZIP(), []int{0} +} + +func (x *DemoClientPayload) GetId() string { + if x != nil { + return x.Id + } + return "" +} + +func (x *DemoClientPayload) GetDomain() string { + if x != nil { + return x.Domain + } + return "" +} + +func (x *DemoClientPayload) GetEntity() string { + if x != nil { + return x.Entity + } + return "" +} + +func (x *DemoClientPayload) GetBatchNum() int64 { + if x != nil { + return x.BatchNum + } + return 0 +} + +func (x *DemoClientPayload) GetMessageNum() int64 { + if x != nil { + return x.MessageNum + } + return 0 +} + +func (x *DemoClientPayload) GetBatchPosition() int64 { + if x != nil { + return x.BatchPosition + } + return 0 +} + +var File_chip_load_test_demo_proto protoreflect.FileDescriptor + +const file_chip_load_test_demo_proto_rawDesc = "" + + "\n" + + "\x19chip_load_test_demo.proto\x12\x02pb\"\xb8\x01\n" + + "\x11DemoClientPayload\x12\x0e\n" + + "\x02id\x18\x01 \x01(\tR\x02id\x12\x16\n" + + "\x06domain\x18\x02 \x01(\tR\x06domain\x12\x16\n" + + "\x06entity\x18\x03 \x01(\tR\x06entity\x12\x1b\n" + + "\tbatch_num\x18\x04 \x01(\x03R\bbatchNum\x12\x1f\n" + + "\vmessage_num\x18\x05 \x01(\x03R\n" + + "messageNum\x12%\n" + + "\x0ebatch_position\x18\x06 \x01(\x03R\rbatchPositionBJZHgithub.com/smartcontractkit/chainlink/v2/core/services/beholder;beholderb\x06proto3" + +var ( + file_chip_load_test_demo_proto_rawDescOnce sync.Once + file_chip_load_test_demo_proto_rawDescData []byte +) + +func file_chip_load_test_demo_proto_rawDescGZIP() []byte { + file_chip_load_test_demo_proto_rawDescOnce.Do(func() { + file_chip_load_test_demo_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_chip_load_test_demo_proto_rawDesc), len(file_chip_load_test_demo_proto_rawDesc))) + }) + return file_chip_load_test_demo_proto_rawDescData +} + +var file_chip_load_test_demo_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_chip_load_test_demo_proto_goTypes = []any{ + (*DemoClientPayload)(nil), // 0: pb.DemoClientPayload +} +var file_chip_load_test_demo_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_chip_load_test_demo_proto_init() } +func file_chip_load_test_demo_proto_init() { + if File_chip_load_test_demo_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_chip_load_test_demo_proto_rawDesc), len(file_chip_load_test_demo_proto_rawDesc)), + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_chip_load_test_demo_proto_goTypes, + DependencyIndexes: file_chip_load_test_demo_proto_depIdxs, + MessageInfos: file_chip_load_test_demo_proto_msgTypes, + }.Build() + File_chip_load_test_demo_proto = out.File + file_chip_load_test_demo_proto_goTypes = nil + file_chip_load_test_demo_proto_depIdxs = nil +} diff --git a/core/services/beholder/chip_load_test_demo.proto b/core/services/beholder/chip_load_test_demo.proto new file mode 100644 index 00000000000..6cb9c51c995 --- /dev/null +++ b/core/services/beholder/chip_load_test_demo.proto @@ -0,0 +1,17 @@ +syntax = "proto3"; + +// Matches atlas/chip-ingress/cmd/demo_client/pb/demo_client_payload.proto for schema subject +// chip-demo-pb.DemoClientPayload (see atlas chip-ingress make create-schema). +option go_package = "github.com/smartcontractkit/chainlink/v2/core/services/beholder;beholder"; + +package pb; + +// Used for testing +message DemoClientPayload { + string id = 1; + string domain = 2; + string entity = 3; + int64 batch_num = 4; + int64 message_num = 5; + int64 batch_position = 6; +} diff --git a/core/services/beholder/durable_emitter_load_test.go b/core/services/beholder/durable_emitter_load_test.go new file mode 100644 index 00000000000..084ebe648d2 --- /dev/null +++ b/core/services/beholder/durable_emitter_load_test.go @@ -0,0 +1,1147 @@ +package beholder_test + +// External Chip Ingress (integration): +// +// Set CHIP_INGRESS_TEST_ADDR=host:port to dial a real Chip Ingress instead of the in-process mock. +// Optional: +// CHIP_INGRESS_TEST_TLS=1|true — use TLS (default: insecure plaintext gRPC) +// CHIP_INGRESS_TEST_BASIC_AUTH_USER — basic auth user (e.g. admin) +// CHIP_INGRESS_TEST_BASIC_AUTH_PASS — basic auth password +// +// Tests that inject Chip failures or count in-process receives (outage, slow-Chip) are skipped +// when CHIP_INGRESS_TEST_ADDR is set. +// +// Running a real server: see atlas/chip-ingress/README.md. You need Kafka/Redpanda, the +// `chip-demo` topic, and schema subject `chip-demo-pb.DemoClientPayload` (run +// `make create-topic-and-schema` from atlas/chip-ingress, or equivalent rpk commands). +// Tests call RegisterSchemas with the bundled proto; Chip still needs the topic to exist for Kafka. +// External mode uses the Atlas demo shape: chip-demo / pb.DemoClientPayload + protobuf payload. +// If unset, CHIP_INGRESS_TEST_BASIC_AUTH_USER/PASS default to chip-ingress-demo-client / password +// (atlas docker-compose demo account). Set CHIP_INGRESS_TEST_SKIP_BASIC_AUTH=1 to omit auth. +// Set CHIP_INGRESS_TEST_SKIP_SCHEMA_REGISTRATION=1 to skip RegisterSchemas (schema pre-created only). + +import ( + "context" + "fmt" + "net" + "os" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + cepb "github.com/cloudevents/sdk-go/binding/format/protobuf/v2/pb" + "github.com/jmoiron/sqlx" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + + "github.com/smartcontractkit/chainlink-common/pkg/beholder" + "github.com/smartcontractkit/chainlink-common/pkg/chipingress" + "github.com/smartcontractkit/chainlink-common/pkg/chipingress/pb" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + + beholdersvc "github.com/smartcontractkit/chainlink/v2/core/services/beholder" + + "github.com/smartcontractkit/chainlink/v2/core/config/env" + "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" + "github.com/smartcontractkit/chainlink/v2/core/internal/testutils/pgtest" +) + +// chipLoadTestDemoProto is the raw .proto registered with Chip for subject chip-demo-pb.DemoClientPayload +// (keep in sync with chip_load_test_demo.proto). +const chipLoadTestDemoProto = `syntax = "proto3"; + +option go_package = "github.com/smartcontractkit/chainlink/v2/core/services/beholder;beholder"; + +package pb; + +message DemoClientPayload { + string id = 1; + string domain = 2; + string entity = 3; + int64 batch_num = 4; + int64 message_num = 5; + int64 batch_position = 6; +} +` + +// loadTestServer is a controllable gRPC ChipIngress server for load tests. +type loadTestServer struct { + pb.UnimplementedChipIngressServer + + mu sync.Mutex + publishErr error + batchErr error + publishDelay time.Duration + + publishCount atomic.Int64 + batchCount atomic.Int64 + totalEvents atomic.Int64 +} + +func (s *loadTestServer) Publish(_ context.Context, _ *cepb.CloudEvent) (*pb.PublishResponse, error) { + if s.publishDelay > 0 { + time.Sleep(s.publishDelay) + } + s.publishCount.Add(1) + s.totalEvents.Add(1) + s.mu.Lock() + defer s.mu.Unlock() + return &pb.PublishResponse{}, s.publishErr +} + +func (s *loadTestServer) PublishBatch(_ context.Context, in *pb.CloudEventBatch) (*pb.PublishResponse, error) { + s.batchCount.Add(1) + s.totalEvents.Add(int64(len(in.Events))) + s.mu.Lock() + defer s.mu.Unlock() + return &pb.PublishResponse{}, s.batchErr +} + +func (s *loadTestServer) Ping(context.Context, *pb.EmptyRequest) (*pb.PingResponse, error) { + return &pb.PingResponse{Message: "pong"}, nil +} + +func (s *loadTestServer) setPublishErr(err error) { + s.mu.Lock() + defer s.mu.Unlock() + s.publishErr = err +} + +func (s *loadTestServer) setBatchErr(err error) { + s.mu.Lock() + defer s.mu.Unlock() + s.batchErr = err +} + +func startLoadServer(t testing.TB) (*loadTestServer, string) { + t.Helper() + srv := &loadTestServer{} + lis, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + + gs := grpc.NewServer() + pb.RegisterChipIngressServer(gs, srv) + go func() { _ = gs.Serve(lis) }() + t.Cleanup(func() { gs.GracefulStop() }) + + return srv, lis.Addr().String() +} + +func chipClient(t testing.TB, addr string) chipingress.Client { + t.Helper() + c, err := chipingress.NewClient(addr, chipingress.WithInsecureConnection()) + require.NoError(t, err) + t.Cleanup(func() { _ = c.Close() }) + return c +} + +const ( + envChipIngressTestAddr = "CHIP_INGRESS_TEST_ADDR" + envChipIngressTestTLS = "CHIP_INGRESS_TEST_TLS" + envChipIngressTestBasicUser = "CHIP_INGRESS_TEST_BASIC_AUTH_USER" + envChipIngressTestBasicPass = "CHIP_INGRESS_TEST_BASIC_AUTH_PASS" +) + +func externalChipConfigured() bool { + return strings.TrimSpace(os.Getenv(envChipIngressTestAddr)) != "" +} + +func newChipClientFromEnv(t testing.TB) chipingress.Client { + t.Helper() + addr := strings.TrimSpace(os.Getenv(envChipIngressTestAddr)) + require.NotEmpty(t, addr, envChipIngressTestAddr) + + var opts []chipingress.Opt + if tlsEnv := os.Getenv(envChipIngressTestTLS); tlsEnv == "1" || strings.EqualFold(tlsEnv, "true") { + opts = append(opts, chipingress.WithTLS()) + } else { + opts = append(opts, chipingress.WithInsecureConnection()) + } + user := os.Getenv(envChipIngressTestBasicUser) + pass := os.Getenv(envChipIngressTestBasicPass) + skipAuth := os.Getenv("CHIP_INGRESS_TEST_SKIP_BASIC_AUTH") + if skipAuth != "1" && !strings.EqualFold(skipAuth, "true") { + if user == "" && pass == "" { + // Default matches atlas/chip-ingress docker-compose CE_SA_CHIP_INGRESS_DEMO_CLIENT. + user = "chip-ingress-demo-client" + pass = "password" + } + } + if user != "" && pass != "" { + opts = append(opts, chipingress.WithBasicAuth(user, pass)) + } + + c, err := chipingress.NewClient(addr, opts...) + require.NoError(t, err) + t.Cleanup(func() { _ = c.Close() }) + return c +} + +// startChipIngressOrMock starts the in-process mock ChipIngress server unless +// CHIP_INGRESS_TEST_ADDR is set; then it returns mock=nil and a client to the external server. +func startChipIngressOrMock(t testing.TB) (mock *loadTestServer, client chipingress.Client) { + t.Helper() + if externalChipConfigured() { + t.Logf("Using external Chip Ingress at %s (%s)", os.Getenv(envChipIngressTestAddr), envChipIngressTestAddr) + c := newChipClientFromEnv(t) + registerChipDemoSchema(t, c) + return nil, c + } + mock, addr := startLoadServer(t) + return mock, chipClient(t, addr) +} + +// registerChipDemoSchema registers the demo protobuf with Chip (via chip-config) so GetSchema +// succeeds for subject chip-demo-pb.DemoClientPayload. Skip with CHIP_INGRESS_TEST_SKIP_SCHEMA_REGISTRATION=1. +func registerChipDemoSchema(t testing.TB, client chipingress.Client) { + t.Helper() + if os.Getenv("CHIP_INGRESS_TEST_SKIP_SCHEMA_REGISTRATION") == "1" || + strings.EqualFold(os.Getenv("CHIP_INGRESS_TEST_SKIP_SCHEMA_REGISTRATION"), "true") { + t.Logf("skipping RegisterSchemas (%s)", "CHIP_INGRESS_TEST_SKIP_SCHEMA_REGISTRATION") + return + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + _, err := client.RegisterSchemas(ctx, &pb.Schema{ + Subject: "chip-demo-pb.DemoClientPayload", + Schema: chipLoadTestDemoProto, + Format: pb.SchemaType_PROTOBUF, + }) + if err != nil { + // Common when schema was already registered (e.g. make create-schema). + msg := strings.ToLower(err.Error()) + if strings.Contains(msg, "already") || strings.Contains(msg, "exists") || strings.Contains(msg, "duplicate") { + t.Logf("RegisterSchemas: treating as OK (schema likely present): %v", err) + return + } + require.NoError(t, err, "RegisterSchemas for chip-demo; try atlas/chip-ingress make create-topic-and-schema") + } +} + +func skipIfExternalChip(t *testing.T, reason string) { + t.Helper() + if externalChipConfigured() { + t.Skipf("requires in-process mock Chip: %s (unset %s)", reason, envChipIngressTestAddr) + } +} + +func formatMockServerEvents(srv *loadTestServer) string { + if srv == nil { + return "N/A" + } + return strconv.FormatInt(srv.totalEvents.Load(), 10) +} + +func loadEmitAttrs() []any { + if externalChipConfigured() { + // Wire-compatible with atlas chip-ingress demo (see chip_load_test_demo.proto). + return []any{ + "source", "chip-demo", + "type", "pb.DemoClientPayload", + "datacontenttype", "application/protobuf", + "dataschema", "https://example.com/demo-client-schema", + "time", time.Now(), + } + } + return []any{"source", "cre.billing", "type", "workflow_execution_finished"} +} + +// buildLoadTestPayload returns raw bytes for Emit(). For the in-process mock, arbitrary bytes are +// fine. For real Chip Ingress, payload must protobuf-decode as pb.DemoClientPayload (subject +// chip-demo-pb.DemoClientPayload in schema registry). +func buildLoadTestPayload(targetSize int) []byte { + if !externalChipConfigured() { + if targetSize < 0 { + targetSize = 0 + } + b := make([]byte, targetSize) + return b + } + if targetSize <= 0 { + targetSize = 1 + } + p := &beholdersvc.DemoClientPayload{ + Domain: "chip-demo", + Entity: "pb.DemoClientPayload", + BatchNum: 1, + MessageNum: 1, + BatchPosition: 0, + } + id := "" + for range targetSize*4 + 512 { + p.Id = id + b, err := proto.Marshal(p) + if err != nil { + return []byte{0x0a, 0x00} + } + if len(b) >= targetSize { + for len(id) > 0 && len(b) > targetSize { + id = id[:len(id)-1] + p.Id = id + b, _ = proto.Marshal(p) + } + return b + } + id += "x" + } + b, _ := proto.Marshal(p) + return b +} + +// TestChipIngressExternalPing is a smoke test: verifies gRPC connectivity when CHIP_INGRESS_TEST_ADDR is set. +func TestChipIngressExternalPing(t *testing.T) { + if !externalChipConfigured() { + t.Skipf("set %s to dial a real Chip Ingress (e.g. 127.0.0.1:50051)", envChipIngressTestAddr) + } + client := newChipClientFromEnv(t) + ctx := testutils.Context(t) + _, err := client.Ping(ctx, &pb.EmptyRequest{}) + require.NoError(t, err) + t.Logf("Ping OK to %s", os.Getenv(envChipIngressTestAddr)) +} + +// ---------- Full-stack load tests: DurableEmitter + Postgres + gRPC ---------- + +// TestFullStack_SustainedThroughput measures steady-state throughput with +// real Postgres persistence and gRPC delivery. This answers: "how many +// events/sec can we sustain end-to-end?" +func TestFullStack_SustainedThroughput(t *testing.T) { + db := pgtest.NewSqlxDB(t) + srv, client := startChipIngressOrMock(t) + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + cfg.RetransmitInterval = 500 * time.Millisecond + cfg.RetransmitAfter = 2 * time.Second + cfg.RetransmitBatchSize = 200 + cfg.PublishTimeout = 5 * time.Second + + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Test(t)) + require.NoError(t, err) + + ctx := testutils.Context(t) + em.Start(ctx) + defer em.Close() + + const ( + totalEvents = 1000 + concurrency = 10 + ) + + payload := buildLoadTestPayload(256) // ~256 byte record (protobuf for external Chip) + + start := time.Now() + + var wg sync.WaitGroup + var emitErrors atomic.Int64 + for w := 0; w < concurrency; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < totalEvents/concurrency; i++ { + if err := em.Emit(ctx, payload, loadEmitAttrs()...); err != nil { + emitErrors.Add(1) + } + } + }() + } + wg.Wait() + emitElapsed := time.Since(start) + + t.Logf("--- Emit Phase ---") + t.Logf("Events emitted: %d", totalEvents) + t.Logf("Emit errors: %d", emitErrors.Load()) + t.Logf("Elapsed: %s", emitElapsed.Round(time.Millisecond)) + t.Logf("Emit rate: %.0f events/sec", float64(totalEvents)/emitElapsed.Seconds()) + + assert.Equal(t, int64(0), emitErrors.Load(), "all emits should succeed") + + // Wait for all events to be delivered and store to drain. + require.Eventually(t, func() bool { + pending, _ := store.ListPending(ctx, time.Now().Add(time.Hour), 1) + return len(pending) == 0 + }, 30*time.Second, 100*time.Millisecond, "store should drain completely") + + totalElapsed := time.Since(start) + + t.Logf("--- Delivery Phase ---") + t.Logf("Server received: %s events (mock only; use external Chip metrics otherwise)", formatMockServerEvents(srv)) + if srv != nil { + t.Logf("Publish calls: %d", srv.publishCount.Load()) + t.Logf("Batch calls: %d", srv.batchCount.Load()) + } + t.Logf("Total elapsed: %s", totalElapsed.Round(time.Millisecond)) + t.Logf("End-to-end rate: %.0f events/sec", float64(totalEvents)/totalElapsed.Seconds()) + + if srv != nil { + assert.GreaterOrEqual(t, srv.totalEvents.Load(), int64(totalEvents), + "server should have received all events (may have retransmit duplicates)") + } +} + +// TestFullStack_ChipOutage simulates Chip going down during sustained load, +// then recovering. Measures: how events accumulate in Postgres, and how +// fast they drain once Chip comes back. +func TestFullStack_ChipOutage(t *testing.T) { + skipIfExternalChip(t, "inject Unavailable errors on mock server") + + db := pgtest.NewSqlxDB(t) + srv, client := startChipIngressOrMock(t) + require.NotNil(t, srv) + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + cfg.RetransmitInterval = 200 * time.Millisecond + cfg.RetransmitAfter = 100 * time.Millisecond + cfg.RetransmitBatchSize = 100 + cfg.PublishTimeout = 1 * time.Second + + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Test(t)) + require.NoError(t, err) + + ctx := testutils.Context(t) + em.Start(ctx) + defer em.Close() + + // Phase 1: Chip is available — emit 200 events. + for i := 0; i < 200; i++ { + require.NoError(t, em.Emit(ctx, []byte("pre-outage"), loadEmitAttrs()...)) + } + require.Eventually(t, func() bool { + pending, _ := store.ListPending(ctx, time.Now().Add(time.Hour), 1) + return len(pending) == 0 + }, 10*time.Second, 50*time.Millisecond, "pre-outage events should all deliver") + t.Logf("Phase 1: %d events delivered pre-outage", srv.totalEvents.Load()) + + // Phase 2: Chip goes down — emit 500 more events. + srv.setPublishErr(status.Error(codes.Unavailable, "chip down")) + srv.setBatchErr(status.Error(codes.Unavailable, "chip down")) + + outageStart := time.Now() + for i := 0; i < 500; i++ { + require.NoError(t, em.Emit(ctx, []byte("during-outage"), loadEmitAttrs()...)) + } + t.Logf("Phase 2: emitted 500 events during outage in %s", time.Since(outageStart).Round(time.Millisecond)) + + // Verify events are accumulating in Postgres. + time.Sleep(500 * time.Millisecond) // let some retransmits fail + pending, err := store.ListPending(ctx, time.Now().Add(time.Hour), 1000) + require.NoError(t, err) + t.Logf("Phase 2: %d events pending in Postgres during outage", len(pending)) + assert.Greater(t, len(pending), 0, "events should accumulate during outage") + + // Phase 3: Chip recovers. + srv.setPublishErr(nil) + srv.setBatchErr(nil) + recoveryStart := time.Now() + + require.Eventually(t, func() bool { + pending, _ := store.ListPending(ctx, time.Now().Add(time.Hour), 1) + return len(pending) == 0 + }, 30*time.Second, 100*time.Millisecond, "all events should drain after recovery") + + recoveryElapsed := time.Since(recoveryStart) + t.Logf("Phase 3: drained in %s after recovery (%.0f events/sec drain rate)", + recoveryElapsed.Round(time.Millisecond), + float64(500)/recoveryElapsed.Seconds()) + t.Logf("Total server events: %d", srv.totalEvents.Load()) +} + +// TestFullStack_SlowChip simulates a slow Chip server (high latency per +// publish). This tests whether the async design keeps Emit() fast even +// when gRPC is slow. +func TestFullStack_SlowChip(t *testing.T) { + skipIfExternalChip(t, "inject publish latency on mock server") + + db := pgtest.NewSqlxDB(t) + srv, client := startChipIngressOrMock(t) + require.NotNil(t, srv) + srv.publishDelay = 50 * time.Millisecond // 50ms per publish = ~20 RPS max + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + cfg.RetransmitInterval = 500 * time.Millisecond + cfg.RetransmitAfter = 2 * time.Second + cfg.RetransmitBatchSize = 50 + + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Test(t)) + require.NoError(t, err) + + ctx := testutils.Context(t) + em.Start(ctx) + defer em.Close() + + const totalEvents = 200 + + // Emit should still be fast because it only does DB insert (async gRPC). + start := time.Now() + for i := 0; i < totalEvents; i++ { + require.NoError(t, em.Emit(ctx, []byte("slow-chip-event"), loadEmitAttrs()...)) + } + emitElapsed := time.Since(start) + + t.Logf("Emit %d events in %s (%.0f events/sec) despite 50ms server latency", + totalEvents, emitElapsed.Round(time.Millisecond), + float64(totalEvents)/emitElapsed.Seconds()) + + // Emit rate should be much higher than the server can handle, + // proving the async design works. + assert.Less(t, emitElapsed, 5*time.Second, + "Emit() should not be bottlenecked by slow gRPC server") + + // Wait for everything to eventually deliver. + require.Eventually(t, func() bool { + pending, _ := store.ListPending(ctx, time.Now().Add(time.Hour), 1) + return len(pending) == 0 + }, 60*time.Second, 200*time.Millisecond, "all events should eventually deliver") + + t.Logf("All %d events delivered (server received %d, including retransmits)", + totalEvents, srv.totalEvents.Load()) +} + +// Benchmark_FullStack_EmitThroughput benchmarks the Emit() path with real Postgres +// and a fast mock gRPC server. This gives the upper bound of events/sec. +func Benchmark_FullStack_EmitThroughput(b *testing.B) { + db := pgtest.NewSqlxDB(b) + _, client := startChipIngressOrMock(b) + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) + require.NoError(b, err) + + ctx := testutils.Context(b) + em.Start(ctx) + defer em.Close() + + payload := buildLoadTestPayload(256) + + b.ResetTimer() + for b.Loop() { + err := em.Emit(ctx, payload, loadEmitAttrs()...) + require.NoError(b, err) + } +} + +// Benchmark_FullStack_EmitPayloadSizes benchmarks Emit throughput at +// different payload sizes to understand the DB I/O impact. +func Benchmark_FullStack_EmitPayloadSizes(b *testing.B) { + sizes := []int{64, 256, 1024, 4096} + for _, size := range sizes { + b.Run(fmt.Sprintf("%dB", size), func(b *testing.B) { + db := pgtest.NewSqlxDB(b) + _, client := startChipIngressOrMock(b) + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) + require.NoError(b, err) + + ctx := testutils.Context(b) + em.Start(ctx) + defer em.Close() + + payload := buildLoadTestPayload(size) + + b.ResetTimer() + for b.Loop() { + err := em.Emit(ctx, payload, loadEmitAttrs()...) + require.NoError(b, err) + } + }) + } +} + +// ---------- 1k TPS Target Tests ---------- + +// tpsSummaryBlocks collects human-readable result blocks from each TPS test; +// TestMain prints them together after the full test run. +var ( + tpsSummaryMu sync.Mutex + tpsSummaryBlocks []string + + tpsRampMu sync.Mutex + tpsRampRows []string + + tpsPayloadMu sync.Mutex + tpsPayloadRows []string +) + +func appendTPSummaryBlock(title string, lines ...string) { + tpsSummaryMu.Lock() + defer tpsSummaryMu.Unlock() + var b strings.Builder + b.WriteString("--- ") + b.WriteString(title) + b.WriteString(" ---\n") + for _, ln := range lines { + b.WriteString(ln) + b.WriteByte('\n') + } + tpsSummaryBlocks = append(tpsSummaryBlocks, b.String()) +} + +func TestMain(m *testing.M) { + code := m.Run() + tpsSummaryMu.Lock() + blocks := append([]string(nil), tpsSummaryBlocks...) + tpsSummaryMu.Unlock() + if len(blocks) > 0 { + fmt.Println() + fmt.Println(strings.Repeat("=", 72)) + fmt.Println("TPS LOAD TEST SUMMARY (full run)") + fmt.Println(strings.Repeat("=", 72)) + for _, blk := range blocks { + fmt.Print(blk) + fmt.Println() + } + fmt.Println(strings.Repeat("=", 72)) + } + os.Exit(code) +} + +func progressBar(pct float64, width int) string { + if pct < 0 { + pct = 0 + } + if pct > 1 { + pct = 1 + } + filled := int(pct * float64(width)) + if filled > width { + filled = width + } + var b strings.Builder + b.WriteByte('[') + for i := 0; i < width; i++ { + if i < filled { + b.WriteRune('█') + } else { + b.WriteRune('░') + } + } + b.WriteByte(']') + b.WriteString(fmt.Sprintf(" %3.0f%%", pct*100)) + return b.String() +} + +// directDB opens a real (non-txdb) Postgres connection for concurrent load tests. +// txdb serializes all operations through a single transaction, which bottlenecks +// concurrent writes. For TPS testing we need real connection pooling. +func directDB(t testing.TB) *sqlx.DB { + t.Helper() + testutils.SkipShortDB(t) + dbURL := string(env.DatabaseURL.Get()) + if dbURL == "" { + t.Fatal("CL_DATABASE_URL is required for TPS tests") + } + db, err := sqlx.Open("postgres", dbURL) + require.NoError(t, err) + require.NoError(t, db.Ping()) + db.SetMaxOpenConns(20) + db.SetMaxIdleConns(10) + + // Clean the table before and after the test. + _, _ = db.Exec("DELETE FROM cre.chip_durable_events") + t.Cleanup(func() { + _, _ = db.Exec("DELETE FROM cre.chip_durable_events") + _ = db.Close() + }) + return db +} + +// emitLatencyStats tracks Emit() call latencies. +type emitLatencyStats struct { + mu sync.Mutex + samples []time.Duration + failures atomic.Int64 +} + +func (s *emitLatencyStats) record(d time.Duration) { + s.mu.Lock() + s.samples = append(s.samples, d) + s.mu.Unlock() +} + +func (s *emitLatencyStats) percentile(p float64) time.Duration { + s.mu.Lock() + defer s.mu.Unlock() + if len(s.samples) == 0 { + return 0 + } + sorted := make([]time.Duration, len(s.samples)) + copy(sorted, s.samples) + sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] }) + idx := int(float64(len(sorted)-1) * p) + return sorted[idx] +} + +func (s *emitLatencyStats) count() int { + s.mu.Lock() + defer s.mu.Unlock() + return len(s.samples) +} + +// runRateLimitedEmit emits events at a target rate for the given duration, +// using the specified concurrency. Returns latency stats. +// If progressLabel is non-empty, prints a live progress bar and emit count to stdout every 500ms. +func runRateLimitedEmit( + ctx context.Context, + t testing.TB, + em *beholder.DurableEmitter, + targetTPS int, + duration time.Duration, + concurrency int, + payloadSize int, + progressLabel string, +) *emitLatencyStats { + t.Helper() + + stats := &emitLatencyStats{} + var emitCount atomic.Int64 + payload := buildLoadTestPayload(payloadSize) + + // Each worker gets an equal share of the target TPS. + perWorkerTPS := targetTPS / concurrency + if perWorkerTPS < 1 { + perWorkerTPS = 1 + } + interval := time.Duration(float64(time.Second) / float64(perWorkerTPS)) + + var wg sync.WaitGroup + + if progressLabel != "" { + startAll := time.Now() + deadline := time.After(duration) + done := make(chan struct{}) + go func() { + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + fmt.Fprintf(os.Stdout, "\n") + return + case <-done: + return + case <-ticker.C: + elapsed := time.Since(startAll) + pct := float64(elapsed) / float64(duration) + if pct >= 1 { + fmt.Fprintf(os.Stdout, "\r%s %s | %s / %s | emits=%d\n", + progressBar(1, 36), progressLabel, + duration.Round(time.Millisecond), duration.Round(time.Millisecond), emitCount.Load()) + return + } + fmt.Fprintf(os.Stdout, "\r%s %s | %s / %s | emits=%d ", + progressBar(pct, 36), progressLabel, + elapsed.Round(time.Millisecond), duration.Round(time.Millisecond), emitCount.Load()) + } + } + }() + wg.Add(1) + go func() { + defer wg.Done() + <-deadline + close(done) + }() + } + + for w := 0; w < concurrency; w++ { + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(interval) + defer ticker.Stop() + localDeadline := time.After(duration) + + for { + select { + case <-localDeadline: + return + case <-ctx.Done(): + return + case <-ticker.C: + start := time.Now() + if err := em.Emit(ctx, payload, loadEmitAttrs()...); err != nil { + stats.failures.Add(1) + } else { + emitCount.Add(1) + stats.record(time.Since(start)) + } + } + } + }() + } + + wg.Wait() + return stats +} + +// TestTPS_RampUp tests the durable emitter at increasing TPS levels to find +// the throughput ceiling. Each level gets its own DurableEmitter to avoid +// carry-over. Measures achieved rate, Emit() latency, and queue depth. +func TestTPS_RampUp(t *testing.T) { + levels := []int{100, 500, 1000, 2000} + testStart := time.Now() + + tpsRampMu.Lock() + tpsRampRows = nil + tpsRampMu.Unlock() + + t.Logf("TPS ramp-up: levels=%v (each level: fresh DB + server + emitter)", levels) + + t.Logf("╔════════════════════════════════════════════════════════════════════════════════════════════╗") + t.Logf("║ TPS RAMP-UP TEST RESULTS ║") + t.Logf("╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╣") + t.Logf("║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Server ║ Queue ║") + t.Logf("║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ recv* ║ depth ║") + t.Logf("╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╣") + + for _, targetTPS := range levels { + t.Run(fmt.Sprintf("%d_tps", targetTPS), func(t *testing.T) { + levelStart := time.Now() + t.Logf(">>> level %d TPS: provisioning direct DB + Chip endpoint...", targetTPS) + + db := directDB(t) + srv, client := startChipIngressOrMock(t) + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + cfg.RetransmitInterval = 1 * time.Second + cfg.RetransmitAfter = 3 * time.Second + cfg.RetransmitBatchSize = 500 + + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) + require.NoError(t, err) + ctx := testutils.Context(t) + em.Start(ctx) + defer em.Close() + + const duration = 10 * time.Second + const concurrency = 20 + + t.Logf(">>> level %d TPS: emitting for %s @ concurrency=%d (progress bar on stdout)", targetTPS, duration, concurrency) + stats := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, + fmt.Sprintf("ramp_up/%d_tps", targetTPS)) + emitPhase := time.Since(levelStart) + t.Logf(">>> level %d TPS: emit phase wall time %s", targetTPS, emitPhase.Round(time.Millisecond)) + + // Brief pause for async publishes to complete. + t.Logf(">>> level %d TPS: sleeping 2s for async publishes...", targetTPS) + time.Sleep(2 * time.Second) + + achieved := float64(stats.count()) / duration.Seconds() + p50 := stats.percentile(0.50) + p99 := stats.percentile(0.99) + serverCol := formatMockServerEvents(srv) + + var queueDepth int64 + row := db.QueryRow("SELECT count(*) FROM cre.chip_durable_events") + _ = row.Scan(&queueDepth) + + totalEmits := stats.count() + rowLine := fmt.Sprintf("║ %-9d ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-8d ║ %-8s ║ %-8d ║", + targetTPS, achieved, totalEmits, + float64(p50.Microseconds())/1000.0, + float64(p99.Microseconds())/1000.0, + stats.failures.Load(), + serverCol, queueDepth) + t.Log(rowLine) + + tpsRampMu.Lock() + tpsRampRows = append(tpsRampRows, rowLine) + tpsRampMu.Unlock() + }) + } + + t.Logf("╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") + t.Logf("* Server recv: in-process mock gRPC publish/batch event count. With CHIP_INGRESS_TEST_ADDR (real Chip), "+ + "this is N/A — observe Kafka/Chip metrics instead. Total emits = successful Emit() completions in the window.") + t.Logf("TestTPS_RampUp finished in %s", time.Since(testStart).Round(time.Millisecond)) + + summaryLines := []string{ + fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), + "╔════════════════════════════════════════════════════════════════════════════════════════════╗", + "║ TPS RAMP-UP TEST RESULTS ║", + "╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╣", + "║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Server ║ Queue ║", + "║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ recv* ║ depth ║", + "╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╣", + } + tpsRampMu.Lock() + summaryLines = append(summaryLines, tpsRampRows...) + tpsRampMu.Unlock() + summaryLines = append(summaryLines, "╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╝", + "* Server recv: mock-only; N/A with real Chip. Total emits = successful Emit() calls per level.") + appendTPSummaryBlock("TestTPS_RampUp", summaryLines...) +} + +// TestTPS_Sustained1k runs at exactly 1000 TPS for 60 seconds and verifies +// the pipeline keeps up: deletes match inserts, queue stays bounded, and +// Emit() latency stays low. +func TestTPS_Sustained1k(t *testing.T) { + testStart := time.Now() + t.Logf("TestTPS_Sustained1k: provisioning DB + Chip server + emitter...") + + db := directDB(t) + srv, client := startChipIngressOrMock(t) + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + cfg.RetransmitInterval = 1 * time.Second + cfg.RetransmitAfter = 3 * time.Second + cfg.RetransmitBatchSize = 500 + + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) + require.NoError(t, err) + + ctx := testutils.Context(t) + em.Start(ctx) + defer em.Close() + + const targetTPS = 1000 + const duration = 60 * time.Second + const concurrency = 20 + + t.Logf("Emit phase: target=%d TPS for %s @ concurrency=%d (progress bar on stdout)", targetTPS, duration, concurrency) + emitStart := time.Now() + + stats := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, "sustained_1k") + + achievedTPS := float64(stats.count()) / duration.Seconds() + t.Logf("Emit phase complete in %s: %d events (%.0f TPS)", time.Since(emitStart).Round(time.Millisecond), stats.count(), achievedTPS) + + // Wait for the pipeline to drain. + t.Logf("Waiting for pipeline to drain...") + drainStart := time.Now() + require.Eventually(t, func() bool { + var count int64 + _ = db.QueryRow("SELECT count(*) FROM cre.chip_durable_events").Scan(&count) + return count == 0 + }, 30*time.Second, 500*time.Millisecond, "pipeline should drain after emit phase ends") + drainTime := time.Since(drainStart) + + t.Logf("╔════════════════════════════════════════════════════╗") + t.Logf("║ SUSTAINED 1k TPS TEST RESULTS ║") + t.Logf("╠════════════════════════════════════════════════════╣") + t.Logf("║ Target TPS: %-6d ║", targetTPS) + t.Logf("║ Duration: %-6s ║", duration) + t.Logf("║ Total emitted: %-6d ║", stats.count()) + t.Logf("║ Achieved TPS: %-6.0f ║", achievedTPS) + t.Logf("║ Emit failures: %-6d ║", stats.failures.Load()) + t.Logf("║ Emit p50 latency: %-6.2f ms ║", float64(stats.percentile(0.50).Microseconds())/1000.0) + t.Logf("║ Emit p99 latency: %-6.2f ms ║", float64(stats.percentile(0.99).Microseconds())/1000.0) + t.Logf("║ Server received: %-6s (mock event count) ║", formatMockServerEvents(srv)) + t.Logf("║ Drain time: %-6s ║", drainTime.Round(time.Millisecond)) + t.Logf("╚════════════════════════════════════════════════════╝") + t.Logf("TestTPS_Sustained1k finished in %s", time.Since(testStart).Round(time.Millisecond)) + + appendTPSummaryBlock("TestTPS_Sustained1k", + fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), + fmt.Sprintf("emit phase: %s", time.Since(emitStart).Round(time.Millisecond)), + fmt.Sprintf("target TPS: %d, achieved: %.0f, failures: %d", targetTPS, achievedTPS, stats.failures.Load()), + fmt.Sprintf("emit p50/p99 ms: %.2f / %.2f", float64(stats.percentile(0.50).Microseconds())/1000.0, float64(stats.percentile(0.99).Microseconds())/1000.0), + fmt.Sprintf("server events: %s, drain time: %s", formatMockServerEvents(srv), drainTime.Round(time.Millisecond)), + ) + + assert.GreaterOrEqual(t, achievedTPS, float64(targetTPS)*0.9, + "should achieve at least 90%% of target TPS") + assert.Equal(t, int64(0), stats.failures.Load(), + "no Emit() calls should fail") + assert.Less(t, stats.percentile(0.99), 50*time.Millisecond, + "p99 Emit() latency should be under 50ms") +} + +// TestTPS_1k_WithChipOutage runs at 1000 TPS, takes Chip down mid-test, +// and verifies events accumulate safely then drain on recovery. +func TestTPS_1k_WithChipOutage(t *testing.T) { + skipIfExternalChip(t, "inject Unavailable errors on mock server") + + testStart := time.Now() + t.Logf("TestTPS_1k_WithChipOutage: provisioning...") + + db := directDB(t) + srv, client := startChipIngressOrMock(t) + require.NotNil(t, srv) + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + cfg.RetransmitInterval = 1 * time.Second + cfg.RetransmitAfter = 2 * time.Second + cfg.RetransmitBatchSize = 500 + + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) + require.NoError(t, err) + + ctx := testutils.Context(t) + em.Start(ctx) + defer em.Close() + + const targetTPS = 1000 + const concurrency = 20 + + // Phase 1: 15s of healthy operation at 1k TPS. + t.Logf("Phase 1: Healthy — emitting at %d TPS for 15s...", targetTPS) + p1Start := time.Now() + phase1Stats := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase1_healthy") + t.Logf("Phase 1 emit finished in %s", time.Since(p1Start).Round(time.Millisecond)) + time.Sleep(3 * time.Second) // let pipeline drain + t.Logf("Phase 1 done: %d events emitted (%.0f TPS)", phase1Stats.count(), + float64(phase1Stats.count())/15.0) + + // Phase 2: Chip goes down. Continue emitting for 15s. + t.Logf("Phase 2: Chip UNAVAILABLE — emitting at %d TPS for 15s...", targetTPS) + srv.setPublishErr(status.Error(codes.Unavailable, "chip down")) + srv.setBatchErr(status.Error(codes.Unavailable, "chip down")) + + p2Start := time.Now() + phase2Stats := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase2_chip_down") + t.Logf("Phase 2 emit finished in %s", time.Since(p2Start).Round(time.Millisecond)) + + // Check queue depth during outage. + var queueDuringOutage int64 + _ = db.QueryRow("SELECT count(*) FROM cre.chip_durable_events").Scan(&queueDuringOutage) + t.Logf("Phase 2 done: %d events emitted (%.0f TPS), queue depth: %d", + phase2Stats.count(), float64(phase2Stats.count())/15.0, queueDuringOutage) + + assert.Equal(t, int64(0), phase2Stats.failures.Load(), + "Emit must not fail during Chip outage — DB insert should still work") + + // Phase 3: Chip recovers. Stop emitting. Measure drain. + t.Logf("Phase 3: Chip RECOVERED — measuring drain...") + srv.setPublishErr(nil) + srv.setBatchErr(nil) + + drainStart := time.Now() + require.Eventually(t, func() bool { + var count int64 + _ = db.QueryRow("SELECT count(*) FROM cre.chip_durable_events").Scan(&count) + return count == 0 + }, 60*time.Second, 500*time.Millisecond, "queue should drain after Chip recovery") + drainTime := time.Since(drainStart) + drainRate := float64(queueDuringOutage) / drainTime.Seconds() + + t.Logf("╔════════════════════════════════════════════════════╗") + t.Logf("║ 1k TPS WITH CHIP OUTAGE — RESULTS ║") + t.Logf("╠════════════════════════════════════════════════════╣") + t.Logf("║ Phase 1 (healthy): ║") + t.Logf("║ Emitted: %-6d events ║", phase1Stats.count()) + t.Logf("║ p99 latency: %-6.2f ms ║", float64(phase1Stats.percentile(0.99).Microseconds())/1000.0) + t.Logf("║ Phase 2 (Chip down): ║") + t.Logf("║ Emitted: %-6d events ║", phase2Stats.count()) + t.Logf("║ p99 latency: %-6.2f ms ║", float64(phase2Stats.percentile(0.99).Microseconds())/1000.0) + t.Logf("║ Emit failures: %-6d ║", phase2Stats.failures.Load()) + t.Logf("║ Queue depth: %-6d events ║", queueDuringOutage) + t.Logf("║ Phase 3 (recovery): ║") + t.Logf("║ Drain time: %-6s ║", drainTime.Round(time.Millisecond)) + t.Logf("║ Drain rate: %-6.0f events/sec ║", drainRate) + t.Logf("║ Server received: %-6d total ║", srv.totalEvents.Load()) + t.Logf("╚════════════════════════════════════════════════════╝") + t.Logf("TestTPS_1k_WithChipOutage finished in %s", time.Since(testStart).Round(time.Millisecond)) + + appendTPSummaryBlock("TestTPS_1k_WithChipOutage", + fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), + fmt.Sprintf("phase1 events: %d, phase2 events: %d, queue at outage: %d", phase1Stats.count(), phase2Stats.count(), queueDuringOutage), + fmt.Sprintf("drain time: %s, drain rate: %.0f ev/s, server total: %d", drainTime.Round(time.Millisecond), drainRate, srv.totalEvents.Load()), + ) +} + +// TestTPS_PayloadSizeScaling tests 1k TPS at different payload sizes to +// understand how billing record size affects throughput. +func TestTPS_PayloadSizeScaling(t *testing.T) { + testStart := time.Now() + sizes := []struct { + name string + size int + }{ + {"64B", 64}, + {"256B", 256}, + {"1KB", 1024}, + {"4KB", 4096}, + } + + tpsPayloadMu.Lock() + tpsPayloadRows = nil + tpsPayloadMu.Unlock() + + t.Logf("TestTPS_PayloadSizeScaling: 1k TPS × payload sizes %v", sizes) + + const payloadDuration = 15 * time.Second + + t.Logf("╔══════════════════════════════════════════════════════════════════════════╗") + t.Logf("║ 1k TPS × PAYLOAD SIZE SCALING ║") + t.Logf("╠══════════╦══════════╦═════════════╦══════════╦══════════╦════════════════╣") + t.Logf("║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║") + t.Logf("║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║") + t.Logf("╠══════════╬══════════╬═════════════╬══════════╬══════════╬════════════════╣") + + for _, s := range sizes { + t.Run(s.name, func(t *testing.T) { + t.Logf(">>> payload %s: provisioning...", s.name) + db := directDB(t) + _, client := startChipIngressOrMock(t) + store := beholdersvc.NewPgDurableEventStore(db) + + cfg := beholder.DefaultDurableEmitterConfig() + cfg.RetransmitInterval = 1 * time.Second + cfg.RetransmitAfter = 3 * time.Second + cfg.RetransmitBatchSize = 500 + + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) + require.NoError(t, err) + + ctx := testutils.Context(t) + em.Start(ctx) + defer em.Close() + + const targetTPS = 1000 + const concurrency = 20 + + t.Logf(">>> payload %s: emitting %d TPS for %s", s.name, targetTPS, payloadDuration) + stats := runRateLimitedEmit(ctx, t, em, targetTPS, payloadDuration, concurrency, s.size, + fmt.Sprintf("payload/%s", s.name)) + + achieved := float64(stats.count()) / payloadDuration.Seconds() + totalEmits := stats.count() + + rowLine := fmt.Sprintf("║ %-8s ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-14d ║", + s.name, achieved, totalEmits, + float64(stats.percentile(0.50).Microseconds())/1000.0, + float64(stats.percentile(0.99).Microseconds())/1000.0, + stats.failures.Load()) + t.Log(rowLine) + + tpsPayloadMu.Lock() + tpsPayloadRows = append(tpsPayloadRows, rowLine) + tpsPayloadMu.Unlock() + }) + } + + t.Logf("╚══════════╩══════════╩═════════════╩══════════╩══════════╩════════════════╝") + t.Logf("Total emits = successful Emit() calls in each %s window (per payload size).", payloadDuration) + t.Logf("TestTPS_PayloadSizeScaling finished in %s", time.Since(testStart).Round(time.Millisecond)) + + summaryLines := []string{ + fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), + "╔══════════════════════════════════════════════════════════════════════════╗", + "║ 1k TPS × PAYLOAD SIZE SCALING ║", + "╠══════════╦══════════╦═════════════╦══════════╦══════════╦════════════════╣", + "║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║", + "║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║", + "╠══════════╬══════════╬═════════════╬══════════╬══════════╬════════════════╣", + } + tpsPayloadMu.Lock() + summaryLines = append(summaryLines, tpsPayloadRows...) + tpsPayloadMu.Unlock() + summaryLines = append(summaryLines, "╚══════════╩══════════╩═════════════╩══════════╩══════════╩════════════════╝") + appendTPSummaryBlock("TestTPS_PayloadSizeScaling", summaryLines...) +} diff --git a/core/services/beholder/durable_event_store_orm_test.go b/core/services/beholder/durable_event_store_orm_test.go new file mode 100644 index 00000000000..3cdd8789994 --- /dev/null +++ b/core/services/beholder/durable_event_store_orm_test.go @@ -0,0 +1,363 @@ +package beholder_test + +import ( + "context" + "crypto/rand" + "fmt" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + beholdersvc "github.com/smartcontractkit/chainlink/v2/core/services/beholder" + + "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" + "github.com/smartcontractkit/chainlink/v2/core/internal/testutils/pgtest" +) + +func TestPgDurableEventStore_InsertDeleteRoundTrip(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + id, err := store.Insert(ctx, []byte("test-payload")) + require.NoError(t, err) + require.Greater(t, id, int64(0)) + + events, err := store.ListPending(ctx, time.Now().Add(time.Second), 10) + require.NoError(t, err) + require.Len(t, events, 1) + assert.Equal(t, id, events[0].ID) + assert.Equal(t, []byte("test-payload"), events[0].Payload) + + require.NoError(t, store.Delete(ctx, id)) + + events, err = store.ListPending(ctx, time.Now().Add(time.Second), 10) + require.NoError(t, err) + assert.Len(t, events, 0) +} + +func TestPgDurableEventStore_ListPending_RespectsCreatedBefore(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + _, err := store.Insert(ctx, []byte("event-1")) + require.NoError(t, err) + + // createdBefore in the past should return nothing (event was just created). + events, err := store.ListPending(ctx, time.Now().Add(-time.Hour), 10) + require.NoError(t, err) + assert.Len(t, events, 0) + + // createdBefore in the future should return the event. + events, err = store.ListPending(ctx, time.Now().Add(time.Hour), 10) + require.NoError(t, err) + assert.Len(t, events, 1) +} + +func TestPgDurableEventStore_ListPending_RespectsLimit(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + for i := 0; i < 20; i++ { + _, err := store.Insert(ctx, []byte(fmt.Sprintf("event-%d", i))) + require.NoError(t, err) + } + + events, err := store.ListPending(ctx, time.Now().Add(time.Second), 5) + require.NoError(t, err) + assert.Len(t, events, 5) +} + +func TestPgDurableEventStore_DeleteExpired(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + _, err := store.Insert(ctx, []byte("will-expire")) + require.NoError(t, err) + + // TTL of 1 hour — nothing should be deleted (event is <1s old). + deleted, err := store.DeleteExpired(ctx, time.Hour) + require.NoError(t, err) + assert.Equal(t, int64(0), deleted) + + // TTL of 0 — everything should be deleted. + deleted, err = store.DeleteExpired(ctx, 0) + require.NoError(t, err) + assert.Equal(t, int64(1), deleted) +} + +// ---------- Benchmarks ---------- + +func randomPayload(size int) []byte { + buf := make([]byte, size) + _, _ = rand.Read(buf) + return buf +} + +// Benchmark_Insert measures raw INSERT throughput for individual events. +func Benchmark_Insert(b *testing.B) { + db := pgtest.NewSqlxDB(b) + ctx := testutils.Context(b) + store := beholdersvc.NewPgDurableEventStore(db) + payload := randomPayload(256) + + b.ResetTimer() + for b.Loop() { + _, err := store.Insert(ctx, payload) + require.NoError(b, err) + } +} + +// Benchmark_InsertDelete measures the insert + delete cycle (the hot path when +// events are delivered successfully on the first attempt). +func Benchmark_InsertDelete(b *testing.B) { + db := pgtest.NewSqlxDB(b) + ctx := testutils.Context(b) + store := beholdersvc.NewPgDurableEventStore(db) + payload := randomPayload(256) + + b.ResetTimer() + for b.Loop() { + id, err := store.Insert(ctx, payload) + require.NoError(b, err) + require.NoError(b, store.Delete(ctx, id)) + } +} + +// Benchmark_InsertPayloadSizes measures INSERT throughput at different payload sizes +// to understand how payload size affects DB performance. +func Benchmark_InsertPayloadSizes(b *testing.B) { + sizes := []int{64, 256, 1024, 4096} + for _, size := range sizes { + b.Run(fmt.Sprintf("%dB", size), func(b *testing.B) { + db := pgtest.NewSqlxDB(b) + ctx := testutils.Context(b) + store := beholdersvc.NewPgDurableEventStore(db) + payload := randomPayload(size) + + b.ResetTimer() + for b.Loop() { + _, err := store.Insert(ctx, payload) + require.NoError(b, err) + } + }) + } +} + +// Benchmark_ListPending measures query performance with varying store depths. +func Benchmark_ListPending(b *testing.B) { + depths := []int{100, 1000} + for _, depth := range depths { + b.Run(fmt.Sprintf("depth_%d", depth), func(b *testing.B) { + db := pgtest.NewSqlxDB(b) + ctx := testutils.Context(b) + store := beholdersvc.NewPgDurableEventStore(db) + payload := randomPayload(256) + + for i := 0; i < depth; i++ { + _, err := store.Insert(ctx, payload) + require.NoError(b, err) + } + + b.ResetTimer() + for b.Loop() { + _, err := store.ListPending(ctx, time.Now().Add(time.Second), 100) + require.NoError(b, err) + } + }) + } +} + +// ---------- Load tests ---------- + +// TestLoad_SustainedInsertDelete simulates the durable emitter's steady-state: +// concurrent inserts with concurrent deletes, measuring achieved throughput +// and verifying the store drains cleanly. +func TestLoad_SustainedInsertDelete(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + const ( + totalEvents = 2000 + concurrency = 10 + ) + + payload := randomPayload(256) + ids := make(chan int64, totalEvents) + var insertCount, deleteCount atomic.Int64 + + start := time.Now() + + // Producer goroutines: insert events. + var insertWg sync.WaitGroup + for w := 0; w < concurrency; w++ { + insertWg.Add(1) + go func() { + defer insertWg.Done() + for i := 0; i < totalEvents/concurrency; i++ { + id, err := store.Insert(ctx, payload) + if err != nil { + t.Errorf("insert failed: %v", err) + return + } + insertCount.Add(1) + ids <- id + } + }() + } + + // Consumer goroutines: delete events as they're inserted. + var deleteWg sync.WaitGroup + for w := 0; w < concurrency; w++ { + deleteWg.Add(1) + go func() { + defer deleteWg.Done() + for id := range ids { + if err := store.Delete(ctx, id); err != nil { + t.Errorf("delete failed: %v", err) + return + } + deleteCount.Add(1) + } + }() + } + + insertWg.Wait() + close(ids) + deleteWg.Wait() + + elapsed := time.Since(start) + insertRate := float64(insertCount.Load()) / elapsed.Seconds() + deleteRate := float64(deleteCount.Load()) / elapsed.Seconds() + + t.Logf("--- Load Test Results ---") + t.Logf("Total events: %d", totalEvents) + t.Logf("Concurrency: %d", concurrency) + t.Logf("Elapsed: %s", elapsed.Round(time.Millisecond)) + t.Logf("Insert rate: %.0f events/sec", insertRate) + t.Logf("Delete rate: %.0f events/sec", deleteRate) + t.Logf("Insert+Delete: %.0f ops/sec (combined)", insertRate+deleteRate) + + assert.Equal(t, int64(totalEvents), insertCount.Load()) + assert.Equal(t, int64(totalEvents), deleteCount.Load()) + + // Verify store is fully drained. + remaining, err := store.ListPending(ctx, time.Now().Add(time.Hour), totalEvents) + require.NoError(t, err) + assert.Len(t, remaining, 0, "store should be empty after load test") +} + +// TestLoad_BurstThenDrain simulates Chip going down: a burst of inserts with +// no deletes (events pile up), then a drain phase where everything is deleted +// via ListPending + batch Delete. +func TestLoad_BurstThenDrain(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + const burstSize = 1000 + payload := randomPayload(512) + + // Phase 1: burst insert (simulates events arriving while Chip is down). + burstStart := time.Now() + for i := 0; i < burstSize; i++ { + _, err := store.Insert(ctx, payload) + require.NoError(t, err) + } + burstElapsed := time.Since(burstStart) + t.Logf("Burst insert: %d events in %s (%.0f events/sec)", + burstSize, burstElapsed.Round(time.Millisecond), + float64(burstSize)/burstElapsed.Seconds()) + + // Phase 2: drain via ListPending + Delete (simulates retransmit loop). + drainStart := time.Now() + totalDrained := 0 + for { + batch, err := store.ListPending(ctx, time.Now().Add(time.Second), 100) + require.NoError(t, err) + if len(batch) == 0 { + break + } + for _, e := range batch { + require.NoError(t, store.Delete(ctx, e.ID)) + } + totalDrained += len(batch) + } + drainElapsed := time.Since(drainStart) + t.Logf("Drain: %d events in %s (%.0f events/sec)", + totalDrained, drainElapsed.Round(time.Millisecond), + float64(totalDrained)/drainElapsed.Seconds()) + + assert.Equal(t, burstSize, totalDrained) +} + +// TestLoad_ConcurrentInsertWithListPending simulates the real contention pattern: +// inserts happening concurrently with ListPending queries from the retransmit loop. +func TestLoad_ConcurrentInsertWithListPending(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + const ( + duration = 3 * time.Second + concurrency = 5 + ) + + payload := randomPayload(256) + var insertCount, queryCount atomic.Int64 + + ctx, cancel := context.WithTimeout(ctx, duration) + defer cancel() + + var wg sync.WaitGroup + + // Inserters. + for w := 0; w < concurrency; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + default: + } + if _, err := store.Insert(ctx, payload); err != nil { + return // context cancelled + } + insertCount.Add(1) + } + }() + } + + // ListPending poller (simulates retransmit loop). + wg.Add(1) + go func() { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + default: + } + if _, err := store.ListPending(ctx, time.Now().Add(time.Second), 100); err != nil { + return + } + queryCount.Add(1) + } + }() + + wg.Wait() + + t.Logf("--- Contention Test Results (%s) ---", duration) + t.Logf("Inserts: %d (%.0f/sec)", insertCount.Load(), float64(insertCount.Load())/duration.Seconds()) + t.Logf("ListPending calls: %d (%.0f/sec)", queryCount.Load(), float64(queryCount.Load())/duration.Seconds()) +} diff --git a/core/services/chainlink/application.go b/core/services/chainlink/application.go index 9ee3b2bc8c9..cbce2c2cad2 100644 --- a/core/services/chainlink/application.go +++ b/core/services/chainlink/application.go @@ -26,6 +26,7 @@ import ( "go.uber.org/zap/zapcore" "github.com/smartcontractkit/chainlink-common/pkg/beholder" + "github.com/smartcontractkit/chainlink-common/pkg/chipingress" "github.com/smartcontractkit/chainlink-common/pkg/loop" nodeauthjwt "github.com/smartcontractkit/chainlink-common/pkg/nodeauth/jwt" commonsrv "github.com/smartcontractkit/chainlink-common/pkg/services" @@ -52,6 +53,8 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/services/ccv/ccvexecutor" "github.com/smartcontractkit/chainlink/v2/core/services/cresettings" + beholdersvc "github.com/smartcontractkit/chainlink/v2/core/services/beholder" + "github.com/smartcontractkit/chainlink/v2/core/bridges" "github.com/smartcontractkit/chainlink/v2/core/build" "github.com/smartcontractkit/chainlink/v2/core/capabilities" @@ -378,6 +381,13 @@ func NewApplication(ctx context.Context, opts ApplicationOpts) (Application, err } jwtGenerator := nodeauthjwt.NewNodeJWTGenerator(csaSigner, csaPubKey) + // Wire DurableEmitter for persistent chip ingress delivery when enabled. + if cfg.Telemetry().DurableEmitterEnabled() && cfg.Telemetry().ChipIngressEndpoint() != "" { + if err := setupDurableEmitter(ctx, opts.DS, globalLogger); err != nil { + globalLogger.Warnw("Failed to set up durable emitter, continuing without it", "error", err) + } + } + creServices, err := cre.NewServices( globalLogger, opts.DS, @@ -1259,3 +1269,44 @@ func (app *ChainlinkApplication) DeleteLogPollerDataAfter(ctx context.Context, c return nil } + +// setupDurableEmitter replaces the global beholder emitter with a DurableEmitter +// backed by Postgres. Events are persisted before async gRPC delivery, surviving +// node restarts and chip ingress outages. +func setupDurableEmitter(ctx context.Context, ds sqlutil.DataSource, lggr logger.SugaredLogger) error { + client := beholder.GetClient() + if client == nil { + return fmt.Errorf("beholder client not initialized") + } + + chipClient := client.Chip + if chipClient == nil || isNoopChipClient(chipClient) { + return fmt.Errorf("chip ingress client not available") + } + + pgStore := beholdersvc.NewPgDurableEventStore(ds) + durableCfg := beholder.DefaultDurableEmitterConfig() + durableEmitter, err := beholder.NewDurableEmitter(pgStore, chipClient, durableCfg, lggr) + if err != nil { + return fmt.Errorf("failed to create durable emitter: %w", err) + } + + // Build a new DualSourceEmitter: durable chip + OTLP. + messageLogger := client.MessageLoggerProvider.Logger("durable-emitter") + otlpEmitter := beholder.NewMessageEmitter(messageLogger) + dualEmitter, err := beholder.NewDualSourceEmitter(durableEmitter, otlpEmitter) + if err != nil { + return fmt.Errorf("failed to create dual source emitter: %w", err) + } + + durableEmitter.Start(ctx) + client.Emitter = dualEmitter + + lggr.Infow("Durable emitter enabled — chip events will be persisted to Postgres") + return nil +} + +func isNoopChipClient(c chipingress.Client) bool { + _, ok := c.(*chipingress.NoopClient) + return ok +} diff --git a/core/services/chainlink/config_telemetry.go b/core/services/chainlink/config_telemetry.go index 5d1f65b7442..cc2e50c517c 100644 --- a/core/services/chainlink/config_telemetry.go +++ b/core/services/chainlink/config_telemetry.go @@ -97,6 +97,13 @@ func (b *telemetryConfig) ChipIngressInsecureConnection() bool { return *b.s.ChipIngressInsecureConnection } +func (b *telemetryConfig) DurableEmitterEnabled() bool { + if b.s.DurableEmitterEnabled == nil { + return false + } + return *b.s.DurableEmitterEnabled +} + func (b *telemetryConfig) HeartbeatInterval() time.Duration { if b.s.HeartbeatInterval == nil || b.s.HeartbeatInterval.Duration() <= 0 { return defaultHeartbeatInterval diff --git a/core/store/migrate/migrations/0294_chip_durable_events.sql b/core/store/migrate/migrations/0295_chip_durable_events.sql similarity index 100% rename from core/store/migrate/migrations/0294_chip_durable_events.sql rename to core/store/migrate/migrations/0295_chip_durable_events.sql diff --git a/deployment/go.mod b/deployment/go.mod index 00fe58b1676..df724899f01 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -44,7 +44,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/deployment/go.sum b/deployment/go.sum index acdecd591f0..b2f5082ab61 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1387,8 +1387,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 h1:nvv1kiv/7jwALkFztO//NhIq4Y9M4kmJ0UCgTZMC/qI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/go.mod b/go.mod index 03cd67f68b3..65b0d29d153 100644 --- a/go.mod +++ b/go.mod @@ -85,7 +85,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5 github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 github.com/smartcontractkit/chainlink-data-streams v0.1.12 @@ -439,6 +439,4 @@ require ( replace github.com/fbsobreira/gotron-sdk => github.com/smartcontractkit/chainlink-tron/relayer/gotron-sdk v0.0.5-0.20260218133534-cbd44da2856b -replace github.com/smartcontractkit/chainlink-common => ../chainlink-common - tool github.com/smartcontractkit/chainlink-common/pkg/loop/cmd/loopinstall diff --git a/go.sum b/go.sum index bcedc093dd5..7ebf7e4fd78 100644 --- a/go.sum +++ b/go.sum @@ -1235,6 +1235,8 @@ github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5/go.mod h1:xtZNi6pOKdC3sLvokDvXOhgHzT+cyBqH/gWwvxTxqrg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index 126d6cf6d10..80fad1bd0d2 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -50,7 +50,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/integration-tests/go.sum b/integration-tests/go.sum index 4315e91a94f..d2e1935b9a4 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1626,8 +1626,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 h1:nvv1kiv/7jwALkFztO//NhIq4Y9M4kmJ0UCgTZMC/qI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index b7c50bcbf76..e5b9159ffbc 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -31,7 +31,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20260119171452-39c98c3b33cd diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index 81fa9f6eff0..4f5d8e5a280 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1604,8 +1604,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 h1:nvv1kiv/7jwALkFztO//NhIq4Y9M4kmJ0UCgTZMC/qI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/lib/cre/don/config/config.go b/system-tests/lib/cre/don/config/config.go index 9e17161b6b3..020e81f3638 100644 --- a/system-tests/lib/cre/don/config/config.go +++ b/system-tests/lib/cre/don/config/config.go @@ -348,6 +348,7 @@ func addBootstrapNodeConfig( existingConfig.Telemetry.ChipIngressEndpoint = ptr.Ptr(strings.TrimPrefix(framework.HostDockerInternal(), "http://") + ":" + chipingressset.DEFAULT_CHIP_INGRESS_GRPC_PORT) existingConfig.Telemetry.ChipIngressInsecureConnection = ptr.Ptr(true) + existingConfig.Telemetry.DurableEmitterEnabled = ptr.Ptr(true) existingConfig.Telemetry.HeartbeatInterval = commonconfig.MustNewDuration(30 * time.Second) existingConfig.Billing = coretoml.Billing{ @@ -425,6 +426,7 @@ func addWorkerNodeConfig( existingConfig.Telemetry.ChipIngressEndpoint = ptr.Ptr(strings.TrimPrefix(framework.HostDockerInternal(), "http://") + ":" + chipingressset.DEFAULT_CHIP_INGRESS_GRPC_PORT) existingConfig.Telemetry.ChipIngressInsecureConnection = ptr.Ptr(true) + existingConfig.Telemetry.DurableEmitterEnabled = ptr.Ptr(true) existingConfig.Telemetry.HeartbeatInterval = commonconfig.MustNewDuration(30 * time.Second) existingConfig.Billing = coretoml.Billing{ diff --git a/system-tests/lib/go.mod b/system-tests/lib/go.mod index 969b2f67df9..31961aaade5 100644 --- a/system-tests/lib/go.mod +++ b/system-tests/lib/go.mod @@ -32,7 +32,7 @@ require ( github.com/sethvargo/go-retry v0.3.0 github.com/smartcontractkit/chain-selectors v1.0.97 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/system-tests/lib/go.sum b/system-tests/lib/go.sum index 248ce1eef6b..a6a2faa6d33 100644 --- a/system-tests/lib/go.sum +++ b/system-tests/lib/go.sum @@ -1597,8 +1597,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 h1:nvv1kiv/7jwALkFztO//NhIq4Y9M4kmJ0UCgTZMC/qI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/tests/go.mod b/system-tests/tests/go.mod index 6476ffef3ef..6b88a78879a 100644 --- a/system-tests/tests/go.mod +++ b/system-tests/tests/go.mod @@ -54,7 +54,7 @@ require ( github.com/rs/zerolog v1.34.0 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chain-selectors v1.0.97 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.12 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/system-tests/tests/go.sum b/system-tests/tests/go.sum index cf24fa02749..5125b702884 100644 --- a/system-tests/tests/go.sum +++ b/system-tests/tests/go.sum @@ -1781,8 +1781,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87 h1:nvv1kiv/7jwALkFztO//NhIq4Y9M4kmJ0UCgTZMC/qI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260317233127-178dd2eeaa87/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/system-tests/tests/smoke/cre/cre_suite_test.go b/system-tests/tests/smoke/cre/cre_suite_test.go index 19a0379a3e3..29ff17329b7 100644 --- a/system-tests/tests/smoke/cre/cre_suite_test.go +++ b/system-tests/tests/smoke/cre/cre_suite_test.go @@ -230,6 +230,16 @@ func Test_CRE_V2_Beholder_Suite(t *testing.T) { ExecuteLogStreamingTest(t, testEnv) } +func Test_CRE_V2_DurableEmitter(t *testing.T) { + testEnv := t_helpers.SetupTestEnvironmentWithConfig(t, t_helpers.GetDefaultTestConfig(t)) + ExecuteDurableEmitterTest(t, testEnv) +} + +func Test_CRE_V2_DurableEmitter_Load(t *testing.T) { + testEnv := t_helpers.SetupTestEnvironmentWithConfig(t, t_helpers.GetDefaultTestConfig(t)) + ExecuteDurableEmitterLoadTest(t, testEnv) +} + func Test_CRE_V2_Sharding(t *testing.T) { testEnv := t_helpers.SetupTestEnvironmentWithConfig( t, diff --git a/system-tests/tests/smoke/cre/v2_durable_emitter_test.go b/system-tests/tests/smoke/cre/v2_durable_emitter_test.go new file mode 100644 index 00000000000..15a9b351a02 --- /dev/null +++ b/system-tests/tests/smoke/cre/v2_durable_emitter_test.go @@ -0,0 +1,248 @@ +package cre + +import ( + "context" + "database/sql" + "fmt" + "slices" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-testing-framework/framework" + + crontypes "github.com/smartcontractkit/chainlink/core/scripts/cre/environment/examples/workflows/v2/cron/types" + + "github.com/smartcontractkit/chainlink/system-tests/lib/cre" + t_helpers "github.com/smartcontractkit/chainlink/system-tests/tests/test-helpers" + ttypes "github.com/smartcontractkit/chainlink/system-tests/tests/test-helpers/configuration" +) + +// connectWorkflowDONDB connects to the Postgres database of the first workflow +// DON NodeSet. This is where cre.chip_durable_events lives. +func connectWorkflowDONDB(t *testing.T, nodeSets []*cre.NodeSet) *sql.DB { + t.Helper() + + var port int + var label string + for _, ns := range nodeSets { + if slices.Contains(ns.DONTypes, cre.WorkflowDON) { + port = ns.DbInput.Port + label = ns.Name + break + } + } + require.NotZerof(t, port, "no workflow DON NodeSet found") + + dsn := fmt.Sprintf( + "host=localhost port=%d user=chainlink password=thispasswordislongenough dbname=db_0 sslmode=disable", + port, + ) + db, err := sql.Open("postgres", dsn) + require.NoError(t, err) + require.NoError(t, db.PingContext(t.Context())) + t.Logf("connected to %s workflow DON DB (port %d) for durable emitter tracking", label, port) + t.Cleanup(func() { _ = db.Close() }) + return db +} + +type durableEventStats struct { + inserts int64 + deletes int64 +} + +// snapshotDurableEventStats returns cumulative insert/delete counts for +// chip_durable_events from pg_stat_user_tables. +func snapshotDurableEventStats(ctx context.Context, db *sql.DB) (durableEventStats, error) { + var s durableEventStats + err := db.QueryRowContext(ctx, + `SELECT COALESCE(n_tup_ins,0), COALESCE(n_tup_del,0) + FROM pg_stat_user_tables + WHERE relname = 'chip_durable_events'`, + ).Scan(&s.inserts, &s.deletes) + if err == sql.ErrNoRows { + return durableEventStats{}, nil + } + return s, err +} + +// countPendingDurableEvents returns the current number of rows in +// cre.chip_durable_events (events that haven't been delivered yet). +func countPendingDurableEvents(ctx context.Context, db *sql.DB) (int64, error) { + var count int64 + err := db.QueryRowContext(ctx, + `SELECT count(*) FROM cre.chip_durable_events`, + ).Scan(&count) + return count, err +} + +// ExecuteDurableEmitterTest verifies the DurableEmitter is active and +// functioning by deploying a cron workflow that emits events, then checking +// that chip_durable_events sees sustained insert+delete activity over time. +func ExecuteDurableEmitterTest(t *testing.T, testEnv *ttypes.TestEnvironment) { + lggr := framework.L + workflowFileLocation := "../../../../core/scripts/cre/environment/examples/workflows/v2/cron/main.go" + + db := connectWorkflowDONDB(t, testEnv.Config.NodeSets) + + _, err := countPendingDurableEvents(t.Context(), db) + require.NoError(t, err, "cre.chip_durable_events table should exist — check migration 0295") + + baseline, err := snapshotDurableEventStats(t.Context(), db) + require.NoError(t, err) + t.Logf("baseline chip_durable_events stats: inserts=%d deletes=%d", baseline.inserts, baseline.deletes) + + // Deploy a cron workflow that fires every 5 seconds. + lggr.Info().Msg("Deploying cron workflow for durable emitter test...") + workflowConfig := crontypes.WorkflowConfig{ + Schedule: "*/5 * * * * *", + } + _ = t_helpers.CompileAndDeployWorkflow(t, testEnv, lggr, "durable-emitter-test", &workflowConfig, workflowFileLocation) + + // Wait for a meaningful volume of events to flow through the pipeline. + // Each cron execution emits ~3-5 beholder events across the DON. + // At every-5s with 4 nodes, expect ~50+ events per minute. + const minExpectedEvents int64 = 30 + + lggr.Info().Msg("Waiting for sustained durable event activity...") + + require.Eventually(t, func() bool { + stats, statsErr := snapshotDurableEventStats(t.Context(), db) + if statsErr != nil { + t.Logf("failed to snapshot stats: %v", statsErr) + return false + } + + newInserts := stats.inserts - baseline.inserts + newDeletes := stats.deletes - baseline.deletes + + pending, _ := countPendingDurableEvents(t.Context(), db) + t.Logf("chip_durable_events: +%d inserts, +%d deletes, %d pending", newInserts, newDeletes, pending) + + return newInserts >= minExpectedEvents && newDeletes >= minExpectedEvents + }, 4*time.Minute, 10*time.Second, "expected at least %d insert+delete events", minExpectedEvents) + + pending, err := countPendingDurableEvents(t.Context(), db) + require.NoError(t, err) + t.Logf("pending durable events at end of test: %d", pending) + assert.LessOrEqual(t, pending, int64(10), + "durable event queue should be near-empty when chip ingress is healthy") + + final, err := snapshotDurableEventStats(t.Context(), db) + require.NoError(t, err) + t.Logf("final chip_durable_events stats: inserts=%d (+%d) deletes=%d (+%d)", + final.inserts, final.inserts-baseline.inserts, + final.deletes, final.deletes-baseline.deletes) + + lggr.Info().Msg("Durable emitter test completed successfully") +} + +// ExecuteDurableEmitterLoadTest deploys multiple high-frequency cron workflows +// to stress the durable emitter pipeline. This measures the maximum sustained +// throughput of the persist → publish → delete cycle against real Postgres. +func ExecuteDurableEmitterLoadTest(t *testing.T, testEnv *ttypes.TestEnvironment) { + lggr := framework.L + workflowFileLocation := "../../../../core/scripts/cre/environment/examples/workflows/v2/cron/main.go" + + db := connectWorkflowDONDB(t, testEnv.Config.NodeSets) + + _, err := countPendingDurableEvents(t.Context(), db) + require.NoError(t, err, "cre.chip_durable_events table should exist") + + baseline, err := snapshotDurableEventStats(t.Context(), db) + require.NoError(t, err) + t.Logf("baseline: inserts=%d deletes=%d", baseline.inserts, baseline.deletes) + + // Deploy multiple cron workflows, each firing every second. + // Each execution emits ~3-5 events per node. With 4 nodes and N workflows, + // we expect roughly N * 4 * 4 = 16N events/sec across the DON. + const numWorkflows = 5 + cronConfig := crontypes.WorkflowConfig{ + Schedule: "*/1 * * * * *", // every second + } + + lggr.Info().Msgf("Deploying %d high-frequency cron workflows...", numWorkflows) + for i := 0; i < numWorkflows; i++ { + name := fmt.Sprintf("durable-load-%d", i) + _ = t_helpers.CompileAndDeployWorkflow(t, testEnv, lggr, name, &cronConfig, workflowFileLocation) + } + + // Let the load run for a fixed observation window. + const observationPeriod = 3 * time.Minute + lggr.Info().Msgf("Load running for %s — monitoring durable event stats...", observationPeriod) + + ticker := time.NewTicker(15 * time.Second) + defer ticker.Stop() + deadline := time.After(observationPeriod) + + var maxPending int64 + var lastStats durableEventStats + + for { + select { + case <-deadline: + goto done + case <-ticker.C: + stats, statsErr := snapshotDurableEventStats(t.Context(), db) + if statsErr != nil { + t.Logf("stats error: %v", statsErr) + continue + } + pending, _ := countPendingDurableEvents(t.Context(), db) + + newInserts := stats.inserts - baseline.inserts + newDeletes := stats.deletes - baseline.deletes + + if pending > maxPending { + maxPending = pending + } + + // Calculate rates over the last interval. + var insertRate, deleteRate float64 + if lastStats.inserts > 0 { + insertRate = float64(stats.inserts-lastStats.inserts) / 15.0 + deleteRate = float64(stats.deletes-lastStats.deletes) / 15.0 + } + lastStats = stats + + t.Logf("durable events: +%d ins, +%d del | pending: %d (max %d) | rate: %.1f ins/s, %.1f del/s", + newInserts, newDeletes, pending, maxPending, insertRate, deleteRate) + } + } + +done: + final, err := snapshotDurableEventStats(t.Context(), db) + require.NoError(t, err) + pending, err := countPendingDurableEvents(t.Context(), db) + require.NoError(t, err) + + totalInserts := final.inserts - baseline.inserts + totalDeletes := final.deletes - baseline.deletes + avgInsertRate := float64(totalInserts) / observationPeriod.Seconds() + avgDeleteRate := float64(totalDeletes) / observationPeriod.Seconds() + + t.Logf("╔════════════════════════════════════════════════╗") + t.Logf("║ DURABLE EMITTER LOAD TEST RESULTS ║") + t.Logf("╠════════════════════════════════════════════════╣") + t.Logf("║ Workflows deployed: %d ║", numWorkflows) + t.Logf("║ Observation period: %s ║", observationPeriod) + t.Logf("║ Total inserts: %-6d ║", totalInserts) + t.Logf("║ Total deletes: %-6d ║", totalDeletes) + t.Logf("║ Avg insert rate: %-6.1f events/sec ║", avgInsertRate) + t.Logf("║ Avg delete rate: %-6.1f events/sec ║", avgDeleteRate) + t.Logf("║ Max queue depth: %-6d ║", maxPending) + t.Logf("║ Final pending: %-6d ║", pending) + t.Logf("╚════════════════════════════════════════════════╝") + + // Sanity checks. + assert.Greater(t, totalInserts, int64(100), + "expected significant event volume from %d workflows", numWorkflows) + assert.Greater(t, totalDeletes, int64(0), + "deletes must occur — chip delivery is required") + assert.LessOrEqual(t, pending, int64(50), + "queue should not grow unboundedly with healthy chip ingress") + + lggr.Info().Msg("Durable emitter load test completed") +} From 041d89a9ad61c208d2d2441db3a40d05ce2dc532 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Mon, 23 Mar 2026 15:28:44 -0400 Subject: [PATCH 3/9] Update tests --- .../beholder/durable_emitter_load_test.go | 249 +++++++++++++----- .../tests/smoke/cre/cre_suite_test.go | 2 + .../smoke/cre/v2_durable_emitter_test.go | 49 +++- 3 files changed, 220 insertions(+), 80 deletions(-) diff --git a/core/services/beholder/durable_emitter_load_test.go b/core/services/beholder/durable_emitter_load_test.go index 084ebe648d2..772b2825356 100644 --- a/core/services/beholder/durable_emitter_load_test.go +++ b/core/services/beholder/durable_emitter_load_test.go @@ -144,10 +144,10 @@ func chipClient(t testing.TB, addr string) chipingress.Client { } const ( - envChipIngressTestAddr = "CHIP_INGRESS_TEST_ADDR" - envChipIngressTestTLS = "CHIP_INGRESS_TEST_TLS" - envChipIngressTestBasicUser = "CHIP_INGRESS_TEST_BASIC_AUTH_USER" - envChipIngressTestBasicPass = "CHIP_INGRESS_TEST_BASIC_AUTH_PASS" + envChipIngressTestAddr = "CHIP_INGRESS_TEST_ADDR" + envChipIngressTestTLS = "CHIP_INGRESS_TEST_TLS" + envChipIngressTestBasicUser = "CHIP_INGRESS_TEST_BASIC_AUTH_USER" + envChipIngressTestBasicPass = "CHIP_INGRESS_TEST_BASIC_AUTH_PASS" ) func externalChipConfigured() bool { @@ -690,8 +690,60 @@ func (s *emitLatencyStats) count() int { return len(s.samples) } +// rateLimitEmitResult is the outcome of runRateLimitedEmit. +type rateLimitEmitResult struct { + stats *emitLatencyStats + // maxQueueDepth is the maximum observed row count in cre.chip_durable_events + // during the emit window (polled periodically; nil DB disables sampling). + maxQueueDepth int64 + // maxQueuePayloadBytes is the maximum observed sum(octet_length(payload)) for + // rows still in the queue (serialized CloudEvent bytes stored in BYTEA). + maxQueuePayloadBytes int64 +} + +func bumpMaxQueueDepth(maxQ *atomic.Int64, c int64) { + for { + old := maxQ.Load() + if c <= old { + return + } + if maxQ.CompareAndSwap(old, c) { + return + } + } +} + +func bumpMaxQueuePayloadBytes(maxB *atomic.Int64, b int64) { + for { + old := maxB.Load() + if b <= old { + return + } + if maxB.CompareAndSwap(old, b) { + return + } + } +} + +// queuePayloadStats returns row count and total payload bytes for cre.chip_durable_events. +func queuePayloadStats(db *sqlx.DB, ctx context.Context) (rows int64, payloadBytes int64, err error) { + err = db.QueryRowContext(ctx, + `SELECT count(*), coalesce(sum(octet_length(payload)), 0) FROM cre.chip_durable_events`, + ).Scan(&rows, &payloadBytes) + return rows, payloadBytes, err +} + +func formatQueueKB(payloadBytes int64) string { + if payloadBytes == 0 { + return "0.0" + } + return fmt.Sprintf("%.1f", float64(payloadBytes)/1024.0) +} + // runRateLimitedEmit emits events at a target rate for the given duration, -// using the specified concurrency. Returns latency stats. +// using the specified concurrency. Returns latency stats and optional max queue depth. +// If maxQueueDB is non-nil, polls cre.chip_durable_events during the emit window to +// record peak backlog (async publish may lag inserts). // If progressLabel is non-empty, prints a live progress bar and emit count to stdout every 500ms. func runRateLimitedEmit( ctx context.Context, @@ -702,10 +754,12 @@ func runRateLimitedEmit( concurrency int, payloadSize int, progressLabel string, -) *emitLatencyStats { + maxQueueDB *sqlx.DB, +) *rateLimitEmitResult { t.Helper() stats := &emitLatencyStats{} + var maxQ, maxPayloadBytes atomic.Int64 var emitCount atomic.Int64 payload := buildLoadTestPayload(payloadSize) @@ -716,6 +770,28 @@ func runRateLimitedEmit( } interval := time.Duration(float64(time.Second) / float64(perWorkerTPS)) + pollCtx, pollCancel := context.WithCancel(ctx) + defer pollCancel() + if maxQueueDB != nil { + go func() { + ticker := time.NewTicker(50 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-pollCtx.Done(): + return + case <-ticker.C: + c, b, err := queuePayloadStats(maxQueueDB, pollCtx) + if err != nil { + continue + } + bumpMaxQueueDepth(&maxQ, c) + bumpMaxQueuePayloadBytes(&maxPayloadBytes, b) + } + } + }() + } + var wg sync.WaitGroup if progressLabel != "" { @@ -783,14 +859,25 @@ func runRateLimitedEmit( } wg.Wait() - return stats + if maxQueueDB != nil { + c, b, err := queuePayloadStats(maxQueueDB, ctx) + if err == nil { + bumpMaxQueueDepth(&maxQ, c) + bumpMaxQueuePayloadBytes(&maxPayloadBytes, b) + } + } + return &rateLimitEmitResult{ + stats: stats, + maxQueueDepth: maxQ.Load(), + maxQueuePayloadBytes: maxPayloadBytes.Load(), + } } // TestTPS_RampUp tests the durable emitter at increasing TPS levels to find // the throughput ceiling. Each level gets its own DurableEmitter to avoid // carry-over. Measures achieved rate, Emit() latency, and queue depth. func TestTPS_RampUp(t *testing.T) { - levels := []int{100, 500, 1000, 2000} + levels := []int{100, 500, 1000, 2000, 5000, 10000} testStart := time.Now() tpsRampMu.Lock() @@ -799,12 +886,12 @@ func TestTPS_RampUp(t *testing.T) { t.Logf("TPS ramp-up: levels=%v (each level: fresh DB + server + emitter)", levels) - t.Logf("╔════════════════════════════════════════════════════════════════════════════════════════════╗") - t.Logf("║ TPS RAMP-UP TEST RESULTS ║") - t.Logf("╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╣") - t.Logf("║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Server ║ Queue ║") - t.Logf("║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ recv* ║ depth ║") - t.Logf("╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╣") + t.Logf("╔════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗") + t.Logf("║ TPS RAMP-UP TEST RESULTS ║") + t.Logf("╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣") + t.Logf("║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Server ║ Q max ║ Q end ║ Q max ║ Q end ║") + t.Logf("║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ recv* ║ (rows) ║ (rows) ║ (KB)* ║ (KB)* ║") + t.Logf("╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣") for _, targetTPS := range levels { t.Run(fmt.Sprintf("%d_tps", targetTPS), func(t *testing.T) { @@ -830,8 +917,9 @@ func TestTPS_RampUp(t *testing.T) { const concurrency = 20 t.Logf(">>> level %d TPS: emitting for %s @ concurrency=%d (progress bar on stdout)", targetTPS, duration, concurrency) - stats := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, - fmt.Sprintf("ramp_up/%d_tps", targetTPS)) + emitRes := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, + fmt.Sprintf("ramp_up/%d_tps", targetTPS), db) + stats := emitRes.stats emitPhase := time.Since(levelStart) t.Logf(">>> level %d TPS: emit phase wall time %s", targetTPS, emitPhase.Round(time.Millisecond)) @@ -844,17 +932,17 @@ func TestTPS_RampUp(t *testing.T) { p99 := stats.percentile(0.99) serverCol := formatMockServerEvents(srv) - var queueDepth int64 - row := db.QueryRow("SELECT count(*) FROM cre.chip_durable_events") - _ = row.Scan(&queueDepth) + queueEnd, queueEndBytes, err := queuePayloadStats(db, ctx) + require.NoError(t, err) totalEmits := stats.count() - rowLine := fmt.Sprintf("║ %-9d ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-8d ║ %-8s ║ %-8d ║", + rowLine := fmt.Sprintf("║ %-9d ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-8d ║ %-8s ║ %-8d ║ %-8d ║ %-8s ║ %-8s ║", targetTPS, achieved, totalEmits, float64(p50.Microseconds())/1000.0, float64(p99.Microseconds())/1000.0, stats.failures.Load(), - serverCol, queueDepth) + serverCol, emitRes.maxQueueDepth, queueEnd, + formatQueueKB(emitRes.maxQueuePayloadBytes), formatQueueKB(queueEndBytes)) t.Log(rowLine) tpsRampMu.Lock() @@ -863,25 +951,26 @@ func TestTPS_RampUp(t *testing.T) { }) } - t.Logf("╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") - t.Logf("* Server recv: in-process mock gRPC publish/batch event count. With CHIP_INGRESS_TEST_ADDR (real Chip), "+ - "this is N/A — observe Kafka/Chip metrics instead. Total emits = successful Emit() completions in the window.") + t.Logf("╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") + t.Logf("* Q max/end rows: peak & final row counts. Q max/end KB: sum(octet_length(payload)) for queued rows / 1024 " + + "(serialized event bytes; excludes index & heap overhead). Sampled ~50ms during emit; Q end after 2s settle. " + + "Server recv: mock; N/A with real Chip.") t.Logf("TestTPS_RampUp finished in %s", time.Since(testStart).Round(time.Millisecond)) summaryLines := []string{ fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), - "╔════════════════════════════════════════════════════════════════════════════════════════════╗", - "║ TPS RAMP-UP TEST RESULTS ║", - "╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╣", - "║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Server ║ Queue ║", - "║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ recv* ║ depth ║", - "╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╣", + "╔════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗", + "║ TPS RAMP-UP TEST RESULTS ║", + "╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣", + "║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Server ║ Q max ║ Q end ║ Q max ║ Q end ║", + "║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ recv* ║ (rows) ║ (rows) ║ (KB)* ║ (KB)* ║", + "╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣", } tpsRampMu.Lock() summaryLines = append(summaryLines, tpsRampRows...) tpsRampMu.Unlock() - summaryLines = append(summaryLines, "╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╝", - "* Server recv: mock-only; N/A with real Chip. Total emits = successful Emit() calls per level.") + summaryLines = append(summaryLines, "╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝", + "* Q KB = payload column bytes (sum octet_length) / 1024; excludes table/index overhead. Server recv: mock-only.") appendTPSummaryBlock("TestTPS_RampUp", summaryLines...) } @@ -915,7 +1004,8 @@ func TestTPS_Sustained1k(t *testing.T) { t.Logf("Emit phase: target=%d TPS for %s @ concurrency=%d (progress bar on stdout)", targetTPS, duration, concurrency) emitStart := time.Now() - stats := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, "sustained_1k") + emitRes := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, "sustained_1k", db) + stats := emitRes.stats achievedTPS := float64(stats.count()) / duration.Seconds() t.Logf("Emit phase complete in %s: %d events (%.0f TPS)", time.Since(emitStart).Round(time.Millisecond), stats.count(), achievedTPS) @@ -931,18 +1021,21 @@ func TestTPS_Sustained1k(t *testing.T) { drainTime := time.Since(drainStart) t.Logf("╔════════════════════════════════════════════════════╗") - t.Logf("║ SUSTAINED 1k TPS TEST RESULTS ║") + t.Logf("║ SUSTAINED 1k TPS TEST RESULTS ║") t.Logf("╠════════════════════════════════════════════════════╣") - t.Logf("║ Target TPS: %-6d ║", targetTPS) - t.Logf("║ Duration: %-6s ║", duration) - t.Logf("║ Total emitted: %-6d ║", stats.count()) - t.Logf("║ Achieved TPS: %-6.0f ║", achievedTPS) - t.Logf("║ Emit failures: %-6d ║", stats.failures.Load()) - t.Logf("║ Emit p50 latency: %-6.2f ms ║", float64(stats.percentile(0.50).Microseconds())/1000.0) - t.Logf("║ Emit p99 latency: %-6.2f ms ║", float64(stats.percentile(0.99).Microseconds())/1000.0) - t.Logf("║ Server received: %-6s (mock event count) ║", formatMockServerEvents(srv)) - t.Logf("║ Drain time: %-6s ║", drainTime.Round(time.Millisecond)) + t.Logf("║ Target TPS: %-6d ║", targetTPS) + t.Logf("║ Duration: %-6s ║", duration) + t.Logf("║ Total emitted: %-6d ║", stats.count()) + t.Logf("║ Achieved TPS: %-6.0f ║", achievedTPS) + t.Logf("║ Emit failures: %-6d ║", stats.failures.Load()) + t.Logf("║ Emit p50 latency: %-6.2f ms ║", float64(stats.percentile(0.50).Microseconds())/1000.0) + t.Logf("║ Emit p99 latency: %-6.2f ms ║", float64(stats.percentile(0.99).Microseconds())/1000.0) + t.Logf("║ Queue max (emit): %-6d rows ║", emitRes.maxQueueDepth) + t.Logf("║ Queue max (emit): %-10s KB payload* ║", formatQueueKB(emitRes.maxQueuePayloadBytes)) + t.Logf("║ Server received: %-6s (mock event count) ║", formatMockServerEvents(srv)) + t.Logf("║ Drain time: %-6s ║", drainTime.Round(time.Millisecond)) t.Logf("╚════════════════════════════════════════════════════╝") + t.Logf("* Queue KB = sum(octet_length(payload))/1024 for queued rows (excludes index/heap overhead).") t.Logf("TestTPS_Sustained1k finished in %s", time.Since(testStart).Round(time.Millisecond)) appendTPSummaryBlock("TestTPS_Sustained1k", @@ -950,6 +1043,7 @@ func TestTPS_Sustained1k(t *testing.T) { fmt.Sprintf("emit phase: %s", time.Since(emitStart).Round(time.Millisecond)), fmt.Sprintf("target TPS: %d, achieved: %.0f, failures: %d", targetTPS, achievedTPS, stats.failures.Load()), fmt.Sprintf("emit p50/p99 ms: %.2f / %.2f", float64(stats.percentile(0.50).Microseconds())/1000.0, float64(stats.percentile(0.99).Microseconds())/1000.0), + fmt.Sprintf("queue max during emit: %d rows, %s KB payload (sum octet_length/1024)", emitRes.maxQueueDepth, formatQueueKB(emitRes.maxQueuePayloadBytes)), fmt.Sprintf("server events: %s, drain time: %s", formatMockServerEvents(srv), drainTime.Round(time.Millisecond)), ) @@ -992,7 +1086,8 @@ func TestTPS_1k_WithChipOutage(t *testing.T) { // Phase 1: 15s of healthy operation at 1k TPS. t.Logf("Phase 1: Healthy — emitting at %d TPS for 15s...", targetTPS) p1Start := time.Now() - phase1Stats := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase1_healthy") + phase1Res := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase1_healthy", db) + phase1Stats := phase1Res.stats t.Logf("Phase 1 emit finished in %s", time.Since(p1Start).Round(time.Millisecond)) time.Sleep(3 * time.Second) // let pipeline drain t.Logf("Phase 1 done: %d events emitted (%.0f TPS)", phase1Stats.count(), @@ -1004,14 +1099,17 @@ func TestTPS_1k_WithChipOutage(t *testing.T) { srv.setBatchErr(status.Error(codes.Unavailable, "chip down")) p2Start := time.Now() - phase2Stats := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase2_chip_down") + phase2Res := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase2_chip_down", db) + phase2Stats := phase2Res.stats t.Logf("Phase 2 emit finished in %s", time.Since(p2Start).Round(time.Millisecond)) - // Check queue depth during outage. - var queueDuringOutage int64 - _ = db.QueryRow("SELECT count(*) FROM cre.chip_durable_events").Scan(&queueDuringOutage) - t.Logf("Phase 2 done: %d events emitted (%.0f TPS), queue depth: %d", - phase2Stats.count(), float64(phase2Stats.count())/15.0, queueDuringOutage) + // Queue at end of outage phase (for drain math) + peak sampled during phase 2 emit window. + queueDuringOutage, queueDuringOutageBytes, err := queuePayloadStats(db, ctx) + require.NoError(t, err) + t.Logf("Phase 2 done: %d events emitted (%.0f TPS), queue end: %d rows / %s KB payload*, queue max (emit): %d rows / %s KB*", + phase2Stats.count(), float64(phase2Stats.count())/15.0, + queueDuringOutage, formatQueueKB(queueDuringOutageBytes), + phase2Res.maxQueueDepth, formatQueueKB(phase2Res.maxQueuePayloadBytes)) assert.Equal(t, int64(0), phase2Stats.failures.Load(), "Emit must not fail during Chip outage — DB insert should still work") @@ -1036,21 +1134,25 @@ func TestTPS_1k_WithChipOutage(t *testing.T) { t.Logf("║ Phase 1 (healthy): ║") t.Logf("║ Emitted: %-6d events ║", phase1Stats.count()) t.Logf("║ p99 latency: %-6.2f ms ║", float64(phase1Stats.percentile(0.99).Microseconds())/1000.0) + t.Logf("║ Queue max (emit): %-6d rows / %-8s KB* ║", phase1Res.maxQueueDepth, formatQueueKB(phase1Res.maxQueuePayloadBytes)) t.Logf("║ Phase 2 (Chip down): ║") t.Logf("║ Emitted: %-6d events ║", phase2Stats.count()) t.Logf("║ p99 latency: %-6.2f ms ║", float64(phase2Stats.percentile(0.99).Microseconds())/1000.0) t.Logf("║ Emit failures: %-6d ║", phase2Stats.failures.Load()) - t.Logf("║ Queue depth: %-6d events ║", queueDuringOutage) + t.Logf("║ Queue max (emit): %-6d rows / %-8s KB* ║", phase2Res.maxQueueDepth, formatQueueKB(phase2Res.maxQueuePayloadBytes)) + t.Logf("║ Queue end: %-6d rows / %-8s KB* ║", queueDuringOutage, formatQueueKB(queueDuringOutageBytes)) t.Logf("║ Phase 3 (recovery): ║") t.Logf("║ Drain time: %-6s ║", drainTime.Round(time.Millisecond)) t.Logf("║ Drain rate: %-6.0f events/sec ║", drainRate) t.Logf("║ Server received: %-6d total ║", srv.totalEvents.Load()) t.Logf("╚════════════════════════════════════════════════════╝") + t.Logf("* KB = sum(octet_length(payload))/1024 for queued rows (excludes index/heap overhead).") t.Logf("TestTPS_1k_WithChipOutage finished in %s", time.Since(testStart).Round(time.Millisecond)) appendTPSummaryBlock("TestTPS_1k_WithChipOutage", fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), - fmt.Sprintf("phase1 events: %d, phase2 events: %d, queue at outage: %d", phase1Stats.count(), phase2Stats.count(), queueDuringOutage), + fmt.Sprintf("phase1 events: %d, phase2 events: %d, queue end: %d rows / %s KB, phase2 queue max: %d rows / %s KB", + phase1Stats.count(), phase2Stats.count(), queueDuringOutage, formatQueueKB(queueDuringOutageBytes), phase2Res.maxQueueDepth, formatQueueKB(phase2Res.maxQueuePayloadBytes)), fmt.Sprintf("drain time: %s, drain rate: %.0f ev/s, server total: %d", drainTime.Round(time.Millisecond), drainRate, srv.totalEvents.Load()), ) } @@ -1077,12 +1179,12 @@ func TestTPS_PayloadSizeScaling(t *testing.T) { const payloadDuration = 15 * time.Second - t.Logf("╔══════════════════════════════════════════════════════════════════════════╗") - t.Logf("║ 1k TPS × PAYLOAD SIZE SCALING ║") - t.Logf("╠══════════╦══════════╦═════════════╦══════════╦══════════╦════════════════╣") - t.Logf("║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║") - t.Logf("║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║") - t.Logf("╠══════════╬══════════╬═════════════╬══════════╬══════════╬════════════════╣") + t.Logf("╔════════════════════════════════════════════════════════════════════════════════════════════════════════════╗") + t.Logf("║ 1k TPS × PAYLOAD SIZE SCALING ║") + t.Logf("╠══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣") + t.Logf("║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Q max ║ Q end ║ Q max ║ Q end ║") + t.Logf("║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ (rows) ║ (rows) ║ (KB)* ║ (KB)* ║") + t.Logf("╠══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣") for _, s := range sizes { t.Run(s.name, func(t *testing.T) { @@ -1107,17 +1209,22 @@ func TestTPS_PayloadSizeScaling(t *testing.T) { const concurrency = 20 t.Logf(">>> payload %s: emitting %d TPS for %s", s.name, targetTPS, payloadDuration) - stats := runRateLimitedEmit(ctx, t, em, targetTPS, payloadDuration, concurrency, s.size, - fmt.Sprintf("payload/%s", s.name)) + emitRes := runRateLimitedEmit(ctx, t, em, targetTPS, payloadDuration, concurrency, s.size, + fmt.Sprintf("payload/%s", s.name), db) + stats := emitRes.stats + + queueEnd, queueEndBytes, err := queuePayloadStats(db, ctx) + require.NoError(t, err) achieved := float64(stats.count()) / payloadDuration.Seconds() totalEmits := stats.count() - rowLine := fmt.Sprintf("║ %-8s ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-14d ║", + rowLine := fmt.Sprintf("║ %-8s ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-8d ║ %-8d ║ %-8d ║ %-8s ║ %-8s ║", s.name, achieved, totalEmits, float64(stats.percentile(0.50).Microseconds())/1000.0, float64(stats.percentile(0.99).Microseconds())/1000.0, - stats.failures.Load()) + stats.failures.Load(), emitRes.maxQueueDepth, queueEnd, + formatQueueKB(emitRes.maxQueuePayloadBytes), formatQueueKB(queueEndBytes)) t.Log(rowLine) tpsPayloadMu.Lock() @@ -1126,22 +1233,22 @@ func TestTPS_PayloadSizeScaling(t *testing.T) { }) } - t.Logf("╚══════════╩══════════╩═════════════╩══════════╩══════════╩════════════════╝") - t.Logf("Total emits = successful Emit() calls in each %s window (per payload size).", payloadDuration) + t.Logf("╚══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") + t.Logf("Total emits = successful Emit() calls in each %s window. Q KB* = sum(octet_length(payload))/1024 (excludes index overhead).", payloadDuration) t.Logf("TestTPS_PayloadSizeScaling finished in %s", time.Since(testStart).Round(time.Millisecond)) summaryLines := []string{ fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), - "╔══════════════════════════════════════════════════════════════════════════╗", - "║ 1k TPS × PAYLOAD SIZE SCALING ║", - "╠══════════╦══════════╦═════════════╦══════════╦══════════╦════════════════╣", - "║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║", - "║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║", - "╠══════════╬══════════╬═════════════╬══════════╬══════════╬════════════════╣", + "╔════════════════════════════════════════════════════════════════════════════════════════════════════════════╗", + "║ 1k TPS × PAYLOAD SIZE SCALING ║", + "╠══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣", + "║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Q max ║ Q end ║ Q max ║ Q end ║", + "║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ (rows) ║ (rows) ║ (KB)* ║ (KB)* ║", + "╠══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣", } tpsPayloadMu.Lock() summaryLines = append(summaryLines, tpsPayloadRows...) tpsPayloadMu.Unlock() - summaryLines = append(summaryLines, "╚══════════╩══════════╩═════════════╩══════════╩══════════╩════════════════╝") + summaryLines = append(summaryLines, "╚══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") appendTPSummaryBlock("TestTPS_PayloadSizeScaling", summaryLines...) } diff --git a/system-tests/tests/smoke/cre/cre_suite_test.go b/system-tests/tests/smoke/cre/cre_suite_test.go index 29ff17329b7..df615e4e9e2 100644 --- a/system-tests/tests/smoke/cre/cre_suite_test.go +++ b/system-tests/tests/smoke/cre/cre_suite_test.go @@ -230,11 +230,13 @@ func Test_CRE_V2_Beholder_Suite(t *testing.T) { ExecuteLogStreamingTest(t, testEnv) } +// TODO: Add tests to suite func Test_CRE_V2_DurableEmitter(t *testing.T) { testEnv := t_helpers.SetupTestEnvironmentWithConfig(t, t_helpers.GetDefaultTestConfig(t)) ExecuteDurableEmitterTest(t, testEnv) } +// TODO: Add tests to suite func Test_CRE_V2_DurableEmitter_Load(t *testing.T) { testEnv := t_helpers.SetupTestEnvironmentWithConfig(t, t_helpers.GetDefaultTestConfig(t)) ExecuteDurableEmitterLoadTest(t, testEnv) diff --git a/system-tests/tests/smoke/cre/v2_durable_emitter_test.go b/system-tests/tests/smoke/cre/v2_durable_emitter_test.go index 15a9b351a02..cc467648d4b 100644 --- a/system-tests/tests/smoke/cre/v2_durable_emitter_test.go +++ b/system-tests/tests/smoke/cre/v2_durable_emitter_test.go @@ -78,6 +78,17 @@ func countPendingDurableEvents(ctx context.Context, db *sql.DB) (int64, error) { return count, err } +// resetDurableEventQueue removes all pending durable events so queue depth and pending +// counts don't carry over from other tests or earlier suite steps on the same DB. +func resetDurableEventQueue(ctx context.Context, t *testing.T, db *sql.DB) { + t.Helper() + res, err := db.ExecContext(ctx, `DELETE FROM cre.chip_durable_events`) + require.NoError(t, err) + n, err := res.RowsAffected() + require.NoError(t, err) + t.Logf("cleared cre.chip_durable_events (%d rows removed before test)", n) +} + // ExecuteDurableEmitterTest verifies the DurableEmitter is active and // functioning by deploying a cron workflow that emits events, then checking // that chip_durable_events sees sustained insert+delete activity over time. @@ -90,6 +101,8 @@ func ExecuteDurableEmitterTest(t *testing.T, testEnv *ttypes.TestEnvironment) { _, err := countPendingDurableEvents(t.Context(), db) require.NoError(t, err, "cre.chip_durable_events table should exist — check migration 0295") + resetDurableEventQueue(t.Context(), t, db) + baseline, err := snapshotDurableEventStats(t.Context(), db) require.NoError(t, err) t.Logf("baseline chip_durable_events stats: inserts=%d deletes=%d", baseline.inserts, baseline.deletes) @@ -151,16 +164,26 @@ func ExecuteDurableEmitterLoadTest(t *testing.T, testEnv *ttypes.TestEnvironment _, err := countPendingDurableEvents(t.Context(), db) require.NoError(t, err, "cre.chip_durable_events table should exist") + resetDurableEventQueue(t.Context(), t, db) + baseline, err := snapshotDurableEventStats(t.Context(), db) require.NoError(t, err) t.Logf("baseline: inserts=%d deletes=%d", baseline.inserts, baseline.deletes) - // Deploy multiple cron workflows, each firing every second. + // Deploy multiple cron workflows, each firing as fast as CRE allows. + // + // Cron uses standard_capabilities.json: "fastestScheduleIntervalSeconds": 1, so the + // minimum interval between ticks is 1s — sub-second schedules are not supported. + // */1 * * * * * = every second (maximum trigger rate for this stack). + // // Each execution emits ~3-5 events per node. With 4 nodes and N workflows, - // we expect roughly N * 4 * 4 = 16N events/sec across the DON. - const numWorkflows = 5 + // rough order-of-magnitude: ~16N events/sec across the DON (varies by workflow). + // + // Tune numWorkflows down if registration/deploy flaps (resource limits are env-specific). + // Soak tests use 20 workflows; we default higher for load here. + const numWorkflows = 20 // TODO: Lower for CI or don't run this test on CI? cronConfig := crontypes.WorkflowConfig{ - Schedule: "*/1 * * * * *", // every second + Schedule: "*/1 * * * * *", // every 1s (fastest allowed; cannot go faster without capability changes) } lggr.Info().Msgf("Deploying %d high-frequency cron workflows...", numWorkflows) @@ -236,13 +259,21 @@ done: t.Logf("║ Final pending: %-6d ║", pending) t.Logf("╚════════════════════════════════════════════════╝") - // Sanity checks. - assert.Greater(t, totalInserts, int64(100), - "expected significant event volume from %d workflows", numWorkflows) + // Sanity checks (scale with workflow count × 3min window). + minInserts := int64(numWorkflows * 40) + assert.Greater(t, totalInserts, minInserts, + "expected significant event volume from %d workflows (min inserts %d)", numWorkflows, minInserts) assert.Greater(t, totalDeletes, int64(0), "deletes must occur — chip delivery is required") - assert.LessOrEqual(t, pending, int64(50), - "queue should not grow unboundedly with healthy chip ingress") + + // Backlog scales with how many workflows emit concurrently; this bounds "runaway" growth while allowing + // steady-state queue depth under multi-workflow load. (12 was tight — real runs can spike a few rows over.) + const maxPendingPerWorkflow = 16 + maxAllowedPending := int64(numWorkflows * maxPendingPerWorkflow) + assert.LessOrEqual(t, maxPending, maxAllowedPending, + "peak queue depth should stay bounded (max %d rows for %d workflows)", maxAllowedPending, numWorkflows) + assert.LessOrEqual(t, pending, maxAllowedPending, + "final pending should stay bounded (max %d rows for %d workflows)", maxAllowedPending, numWorkflows) lggr.Info().Msg("Durable emitter load test completed") } From 16f423fbb83e5320a4a91d05f5da82637f14057a Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Wed, 25 Mar 2026 10:31:03 -0400 Subject: [PATCH 4/9] Test --- .../cre/environment/configs/chip-ingress.toml | 4 +- .../cre/environment/environment/beholder.go | 6 + .../environment/chip_demo_loadtest_schema.go | 69 +++++++++++ core/scripts/go.mod | 4 +- core/scripts/go.sum | 4 +- .../beholder/durable_emitter_load_test.go | 107 +++++++++++++++++- deployment/go.mod | 2 +- deployment/go.sum | 4 +- go.mod | 2 +- go.sum | 4 +- integration-tests/go.mod | 2 +- integration-tests/go.sum | 4 +- integration-tests/load/go.mod | 2 +- integration-tests/load/go.sum | 4 +- system-tests/lib/go.mod | 2 +- system-tests/lib/go.sum | 4 +- system-tests/tests/go.mod | 2 +- system-tests/tests/go.sum | 4 +- 18 files changed, 206 insertions(+), 24 deletions(-) create mode 100644 core/scripts/cre/environment/environment/chip_demo_loadtest_schema.go diff --git a/core/scripts/cre/environment/configs/chip-ingress.toml b/core/scripts/cre/environment/configs/chip-ingress.toml index 7240b29f3d0..23c59a70e78 100644 --- a/core/scripts/cre/environment/configs/chip-ingress.toml +++ b/core/scripts/cre/environment/configs/chip-ingress.toml @@ -4,4 +4,6 @@ # compose_file='https://raw.githubusercontent.com/smartcontractkit/chainlink-testing-framework/refs/tags/framework/components/dockercompose/v0.1.19/framework/components/dockercompose/chip_ingress_set/docker-compose.yml' [kafka] - topics=['cre'] \ No newline at end of file + # `cre` — workflow telemetry. `chip-demo` — Kafka topic for Atlas demo / DurableEmitter load tests + # when using CHIP_INGRESS_TEST_ADDR against local Beholder (see core/services/beholder/durable_emitter_load_test.go). + topics=['cre', 'chip-demo'] \ No newline at end of file diff --git a/core/scripts/cre/environment/environment/beholder.go b/core/scripts/cre/environment/environment/beholder.go index 826498bcf80..7ed29938972 100644 --- a/core/scripts/cre/environment/environment/beholder.go +++ b/core/scripts/cre/environment/environment/beholder.go @@ -701,6 +701,12 @@ and make sure that the sink is pointing to correct upstream endpoint ('localhost return errors.Wrap(topicsErr, "failed to create topics") } + if out.ChipIngress != nil && out.ChipIngress.GRPCExternalURL != "" { + if regErr := registerChipDemoLoadTestSchema(cmdContext, out.ChipIngress.GRPCExternalURL); regErr != nil { + framework.L.Warn().Err(regErr).Msg("chip-demo schema registration failed (durable emitter load tests with CHIP_INGRESS_TEST_ADDR may not drain until this succeeds; check Chip / auth)") + } + } + fmt.Print(libformat.PurpleText("%s", stageGen.WrapAndNext("Created topics in %.2f seconds", stageGen.Elapsed().Seconds()))) for _, topic := range in.Kafka.Topics { diff --git a/core/scripts/cre/environment/environment/chip_demo_loadtest_schema.go b/core/scripts/cre/environment/environment/chip_demo_loadtest_schema.go new file mode 100644 index 00000000000..e6f86a89ec1 --- /dev/null +++ b/core/scripts/cre/environment/environment/chip_demo_loadtest_schema.go @@ -0,0 +1,69 @@ +package environment + +import ( + "context" + "strings" + "time" + + "github.com/pkg/errors" + + "github.com/smartcontractkit/chainlink-testing-framework/framework" + "github.com/smartcontractkit/chainlink-common/pkg/chipingress" + chipingresspb "github.com/smartcontractkit/chainlink-common/pkg/chipingress/pb" +) + +// chipDemoLoadTestProto is the raw .proto for schema subject chip-demo-pb.DemoClientPayload. +// Keep in sync with core/services/beholder/chip_load_test_demo.proto and atlas chip-ingress demo client. +const chipDemoLoadTestProto = `syntax = "proto3"; + +option go_package = "github.com/smartcontractkit/chainlink/v2/core/services/beholder;beholder"; + +package pb; + +message DemoClientPayload { + string id = 1; + string domain = 2; + string entity = 3; + int64 batch_num = 4; + int64 message_num = 5; + int64 batch_position = 6; +} +` + +// registerChipDemoLoadTestSchema registers the chip-demo protobuf used by DurableEmitter load tests +// (TestTPS_* with CHIP_INGRESS_TEST_ADDR) against the local CRE Beholder Chip Ingress. +// It uses the same demo basic-auth account as atlas/chip-ingress docker-compose (CE_SA_CHIP_INGRESS_DEMO_CLIENT). +func registerChipDemoLoadTestSchema(ctx context.Context, chipGRPCAddress string) error { + if strings.TrimSpace(chipGRPCAddress) == "" { + return errors.New("chip gRPC address is empty") + } + + opts := []chipingress.Opt{ + chipingress.WithInsecureConnection(), + chipingress.WithBasicAuth("chip-ingress-demo-client", "password"), + } + c, err := chipingress.NewClient(chipGRPCAddress, opts...) + if err != nil { + return errors.Wrap(err, "chipingress.NewClient") + } + defer func() { _ = c.Close() }() + + regCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + _, err = c.RegisterSchemas(regCtx, &chipingresspb.Schema{ + Subject: "chip-demo-pb.DemoClientPayload", + Schema: chipDemoLoadTestProto, + Format: chipingresspb.SchemaType_PROTOBUF, + }) + if err != nil { + msg := strings.ToLower(err.Error()) + if strings.Contains(msg, "already") || strings.Contains(msg, "exists") || strings.Contains(msg, "duplicate") { + framework.L.Info().Msg("chip-demo load-test schema already registered (chip-demo-pb.DemoClientPayload)") + return nil + } + return errors.Wrap(err, "RegisterSchemas chip-demo-pb.DemoClientPayload") + } + framework.L.Info().Msg("registered chip-demo load-test schema (chip-demo-pb.DemoClientPayload) for durable emitter / external Chip tests") + return nil +} diff --git a/core/scripts/go.mod b/core/scripts/go.mod index 7292cac22ec..c377c629693 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -46,8 +46,9 @@ require ( github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db github.com/smartcontractkit/chainlink-common/keystore v1.0.2 + github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 github.com/smartcontractkit/chainlink-data-streams v0.1.12 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 @@ -492,7 +493,6 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 // indirect github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70 // indirect github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a // indirect - github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 // indirect github.com/smartcontractkit/chainlink-evm/contracts/cre/gobindings v0.0.0-20260107191744-4b93f62cffe3 // indirect github.com/smartcontractkit/chainlink-feeds v0.1.2-0.20250227211209-7cd000095135 // indirect github.com/smartcontractkit/chainlink-framework/capabilities v0.0.0-20250818175541-3389ac08a563 // indirect diff --git a/core/scripts/go.sum b/core/scripts/go.sum index 6820ee94cb5..d5d634c1245 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1634,8 +1634,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/core/services/beholder/durable_emitter_load_test.go b/core/services/beholder/durable_emitter_load_test.go index 772b2825356..0bc09e8aee2 100644 --- a/core/services/beholder/durable_emitter_load_test.go +++ b/core/services/beholder/durable_emitter_load_test.go @@ -14,6 +14,8 @@ package beholder_test // Running a real server: see atlas/chip-ingress/README.md. You need Kafka/Redpanda, the // `chip-demo` topic, and schema subject `chip-demo-pb.DemoClientPayload` (run // `make create-topic-and-schema` from atlas/chip-ingress, or equivalent rpk commands). +// CRE local Beholder (`go run . env beholder start` / `env start --with-beholder`) creates +// `chip-demo` and registers this schema automatically; see core/scripts/cre/environment/configs/chip-ingress.toml. // Tests call RegisterSchemas with the bundled proto; Chip still needs the topic to exist for Kafka. // External mode uses the Atlas demo shape: chip-demo / pb.DemoClientPayload + protobuf payload. // If unset, CHIP_INGRESS_TEST_BASIC_AUTH_USER/PASS default to chip-ingress-demo-client / password @@ -690,6 +692,98 @@ func (s *emitLatencyStats) count() int { return len(s.samples) } +func (s *emitLatencyStats) mean() time.Duration { + s.mu.Lock() + defer s.mu.Unlock() + if len(s.samples) == 0 { + return 0 + } + var sum time.Duration + for _, v := range s.samples { + sum += v + } + return sum / time.Duration(len(s.samples)) +} + +func (s *emitLatencyStats) sum() time.Duration { + s.mu.Lock() + defer s.mu.Unlock() + var t time.Duration + for _, v := range s.samples { + t += v + } + return t +} + +// pipelineDeliveryStats aggregates DurableEmitterHooks samples to compare Chip Publish vs DB Delete cost. +type pipelineDeliveryStats struct { + immPub, immDel, batchPub, batchDel emitLatencyStats + immPubErr, batchPubErr atomic.Int64 +} + +func newPipelineHooks(p *pipelineDeliveryStats) *beholder.DurableEmitterHooks { + return &beholder.DurableEmitterHooks{ + OnImmediatePublish: func(d time.Duration, err error) { + if err != nil { + p.immPubErr.Add(1) + } + p.immPub.record(d) + }, + OnImmediateDelete: func(d time.Duration, _ error) { + p.immDel.record(d) + }, + OnRetransmitBatchPublish: func(d time.Duration, _ int, err error) { + if err != nil { + p.batchPubErr.Add(1) + } + p.batchPub.record(d) + }, + OnRetransmitBatchDeletes: func(d time.Duration, _ int) { + p.batchDel.record(d) + }, + } +} + +func durMs(d time.Duration) float64 { + return float64(d.Microseconds()) / 1000.0 +} + +func logPipelineDeliverySummary(t *testing.T, pipe *pipelineDeliveryStats) { + t.Helper() + ipN := pipe.immPub.count() + idN := pipe.immDel.count() + t.Logf("Pipeline — immediate Publish: n=%d errs=%d p50=%.3f ms p99=%.3f ms mean=%.3f ms Σ=%.1f ms", + ipN, pipe.immPubErr.Load(), + durMs(pipe.immPub.percentile(0.50)), durMs(pipe.immPub.percentile(0.99)), + durMs(pipe.immPub.mean()), durMs(pipe.immPub.sum())) + t.Logf("Pipeline — immediate Delete: n=%d p50=%.3f ms p99=%.3f ms mean=%.3f ms Σ=%.1f ms", + idN, + durMs(pipe.immDel.percentile(0.50)), durMs(pipe.immDel.percentile(0.99)), + durMs(pipe.immDel.mean()), durMs(pipe.immDel.sum())) + + bpN := pipe.batchPub.count() + if bpN > 0 { + t.Logf("Pipeline — retransmit PublishBatch: batches=%d errs=%d p50=%.3f ms mean=%.3f ms | delete-loop batches=%d mean_loop=%.3f ms", + bpN, pipe.batchPubErr.Load(), + durMs(pipe.batchPub.percentile(0.50)), durMs(pipe.batchPub.mean()), + pipe.batchDel.count(), durMs(pipe.batchDel.mean())) + } + + if ipN >= 50 && idN >= 50 { + pm, dm := durMs(pipe.immPub.mean()), durMs(pipe.immDel.mean()) + switch { + case pm > 3*dm && pm > 0.5: + t.Logf("Bottleneck hint: Publish mean %.3f ms ≫ Delete mean %.3f ms — likely Chip / gRPC bound", pm, dm) + case dm > 3*pm && dm > 0.5: + t.Logf("Bottleneck hint: Delete mean %.3f ms ≫ Publish mean %.3f ms — likely Postgres delete bound", dm, pm) + default: + t.Logf("Bottleneck hint: Publish %.3f ms vs Delete %.3f ms comparable (per successful immediate delivery)", pm, dm) + } + } else { + t.Logf("Bottleneck hint: few completed immediate deliveries in window (pub=%d del=%d); extend duration or check async backlog", ipN, idN) + } +} + // rateLimitEmitResult is the outcome of runRateLimitedEmit. type rateLimitEmitResult struct { stats *emitLatencyStats @@ -906,6 +1000,8 @@ func TestTPS_RampUp(t *testing.T) { cfg.RetransmitInterval = 1 * time.Second cfg.RetransmitAfter = 3 * time.Second cfg.RetransmitBatchSize = 500 + pipe := &pipelineDeliveryStats{} + cfg.Hooks = newPipelineHooks(pipe) em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) require.NoError(t, err) @@ -913,7 +1009,7 @@ func TestTPS_RampUp(t *testing.T) { em.Start(ctx) defer em.Close() - const duration = 10 * time.Second + const duration = 2 * time.Minute const concurrency = 20 t.Logf(">>> level %d TPS: emitting for %s @ concurrency=%d (progress bar on stdout)", targetTPS, duration, concurrency) @@ -927,6 +1023,9 @@ func TestTPS_RampUp(t *testing.T) { t.Logf(">>> level %d TPS: sleeping 2s for async publishes...", targetTPS) time.Sleep(2 * time.Second) + t.Logf(">>> level %d TPS: pipeline delivery (Publish vs Delete)", targetTPS) + logPipelineDeliverySummary(t, pipe) + achieved := float64(stats.count()) / duration.Seconds() p50 := stats.percentile(0.50) p99 := stats.percentile(0.99) @@ -989,6 +1088,8 @@ func TestTPS_Sustained1k(t *testing.T) { cfg.RetransmitInterval = 1 * time.Second cfg.RetransmitAfter = 3 * time.Second cfg.RetransmitBatchSize = 500 + pipe := &pipelineDeliveryStats{} + cfg.Hooks = newPipelineHooks(pipe) em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) require.NoError(t, err) @@ -1020,6 +1121,9 @@ func TestTPS_Sustained1k(t *testing.T) { }, 30*time.Second, 500*time.Millisecond, "pipeline should drain after emit phase ends") drainTime := time.Since(drainStart) + t.Logf("Pipeline delivery after drain (full async + retransmit settled):") + logPipelineDeliverySummary(t, pipe) + t.Logf("╔════════════════════════════════════════════════════╗") t.Logf("║ SUSTAINED 1k TPS TEST RESULTS ║") t.Logf("╠════════════════════════════════════════════════════╣") @@ -1044,6 +1148,7 @@ func TestTPS_Sustained1k(t *testing.T) { fmt.Sprintf("target TPS: %d, achieved: %.0f, failures: %d", targetTPS, achievedTPS, stats.failures.Load()), fmt.Sprintf("emit p50/p99 ms: %.2f / %.2f", float64(stats.percentile(0.50).Microseconds())/1000.0, float64(stats.percentile(0.99).Microseconds())/1000.0), fmt.Sprintf("queue max during emit: %d rows, %s KB payload (sum octet_length/1024)", emitRes.maxQueueDepth, formatQueueKB(emitRes.maxQueuePayloadBytes)), + fmt.Sprintf("pipeline imm Publish/Delete means ms: %.3f / %.3f (n=%d/%d)", durMs(pipe.immPub.mean()), durMs(pipe.immDel.mean()), pipe.immPub.count(), pipe.immDel.count()), fmt.Sprintf("server events: %s, drain time: %s", formatMockServerEvents(srv), drainTime.Round(time.Millisecond)), ) diff --git a/deployment/go.mod b/deployment/go.mod index df724899f01..da9ad763038 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -44,7 +44,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/deployment/go.sum b/deployment/go.sum index b2f5082ab61..ea1aee515b9 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1387,8 +1387,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/go.mod b/go.mod index 65b0d29d153..4107fe6f0f2 100644 --- a/go.mod +++ b/go.mod @@ -85,7 +85,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5 github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 github.com/smartcontractkit/chainlink-data-streams v0.1.12 diff --git a/go.sum b/go.sum index 7ebf7e4fd78..9a41f24f570 100644 --- a/go.sum +++ b/go.sum @@ -1235,8 +1235,8 @@ github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5/go.mod h1:xtZNi6pOKdC3sLvokDvXOhgHzT+cyBqH/gWwvxTxqrg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index 80fad1bd0d2..3ae89dd322a 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -50,7 +50,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/integration-tests/go.sum b/integration-tests/go.sum index d2e1935b9a4..7f511f14fc3 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1626,8 +1626,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index e5b9159ffbc..4bf8129f4e3 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -31,7 +31,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20260119171452-39c98c3b33cd diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index 4f5d8e5a280..abcc6d62f5c 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1604,8 +1604,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/lib/go.mod b/system-tests/lib/go.mod index 31961aaade5..a28f3ea8a13 100644 --- a/system-tests/lib/go.mod +++ b/system-tests/lib/go.mod @@ -32,7 +32,7 @@ require ( github.com/sethvargo/go-retry v0.3.0 github.com/smartcontractkit/chain-selectors v1.0.97 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/system-tests/lib/go.sum b/system-tests/lib/go.sum index a6a2faa6d33..fc9f69bfe3e 100644 --- a/system-tests/lib/go.sum +++ b/system-tests/lib/go.sum @@ -1597,8 +1597,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/tests/go.mod b/system-tests/tests/go.mod index 6b88a78879a..9e83527f6d5 100644 --- a/system-tests/tests/go.mod +++ b/system-tests/tests/go.mod @@ -54,7 +54,7 @@ require ( github.com/rs/zerolog v1.34.0 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chain-selectors v1.0.97 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.12 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/system-tests/tests/go.sum b/system-tests/tests/go.sum index 5125b702884..50bd9873589 100644 --- a/system-tests/tests/go.sum +++ b/system-tests/tests/go.sum @@ -1781,8 +1781,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e h1:JQ78g44kY0Cf83MvwUOvRxAiDBrTm+NkUZx4iuSYzcg= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260323153253-cc10b6d11a7e/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= From 4f3f018c431614e9bd8bd6f519f2140fcea13d7d Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Wed, 25 Mar 2026 18:06:32 -0400 Subject: [PATCH 5/9] Latest changes --- core/config/telemetry_config.go | 3 + core/config/toml/types.go | 4 + .../cre/environment/configs/chip-ingress.toml | 7 +- .../cre/environment/environment/beholder.go | 81 +++++--- core/scripts/go.mod | 2 +- core/scripts/go.sum | 4 +- .../beholder/durable_emitter_load_test.go | 190 +++++++++++------- .../beholder/durable_event_store_orm.go | 45 ++++- .../beholder/durable_event_store_orm_test.go | 18 ++ core/services/chainlink/application.go | 18 +- core/services/chainlink/config_telemetry.go | 23 +++ deployment/go.mod | 2 +- deployment/go.sum | 4 +- go.mod | 2 +- go.sum | 4 +- integration-tests/go.mod | 2 +- integration-tests/go.sum | 4 +- integration-tests/load/go.mod | 2 +- integration-tests/load/go.sum | 4 +- system-tests/lib/go.mod | 2 +- system-tests/lib/go.sum | 4 +- system-tests/tests/go.mod | 2 +- system-tests/tests/go.sum | 4 +- .../tests/smoke/cre/cre_suite_test.go | 2 +- 24 files changed, 299 insertions(+), 134 deletions(-) diff --git a/core/config/telemetry_config.go b/core/config/telemetry_config.go index a0a175ff1a7..a347fb4afaa 100644 --- a/core/config/telemetry_config.go +++ b/core/config/telemetry_config.go @@ -19,6 +19,9 @@ type Telemetry interface { ChipIngressEndpoint() string ChipIngressInsecureConnection() bool DurableEmitterEnabled() bool + // DurableEmitterPersistSources lists CloudEvent Source values (beholder_domain) that may be + // written to the durable Chip queue. See chainlink telemetry config for defaults and wildcards. + DurableEmitterPersistSources() []string HeartbeatInterval() time.Duration LogStreamingEnabled() bool LogLevel() zapcore.Level diff --git a/core/config/toml/types.go b/core/config/toml/types.go index dca2976df90..259495c95e9 100644 --- a/core/config/toml/types.go +++ b/core/config/toml/types.go @@ -2763,6 +2763,7 @@ type Telemetry struct { ChipIngressEndpoint *string ChipIngressInsecureConnection *bool DurableEmitterEnabled *bool + DurableEmitterPersistSources []string `toml:",omitempty"` HeartbeatInterval *commonconfig.Duration LogLevel *string LogStreamingEnabled *bool @@ -2810,6 +2811,9 @@ func (b *Telemetry) setFrom(f *Telemetry) { if v := f.DurableEmitterEnabled; v != nil { b.DurableEmitterEnabled = v } + if f.DurableEmitterPersistSources != nil { + b.DurableEmitterPersistSources = f.DurableEmitterPersistSources + } if v := f.HeartbeatInterval; v != nil { b.HeartbeatInterval = v } diff --git a/core/scripts/cre/environment/configs/chip-ingress.toml b/core/scripts/cre/environment/configs/chip-ingress.toml index 23c59a70e78..d0908832164 100644 --- a/core/scripts/cre/environment/configs/chip-ingress.toml +++ b/core/scripts/cre/environment/configs/chip-ingress.toml @@ -4,6 +4,7 @@ # compose_file='https://raw.githubusercontent.com/smartcontractkit/chainlink-testing-framework/refs/tags/framework/components/dockercompose/v0.1.19/framework/components/dockercompose/chip_ingress_set/docker-compose.yml' [kafka] - # `cre` — workflow telemetry. `chip-demo` — Kafka topic for Atlas demo / DurableEmitter load tests - # when using CHIP_INGRESS_TEST_ADDR against local Beholder (see core/services/beholder/durable_emitter_load_test.go). - topics=['cre', 'chip-demo'] \ No newline at end of file + # `cre` — workflow telemetry (source platform→cre shim). `chip-demo` — Atlas demo / DurableEmitter load tests + # (CHIP_INGRESS_TEST_ADDR; see core/services/beholder/durable_emitter_load_test.go). + # `node-platform` — PluginRelayerConfigEmitter / common.v1.ChainPluginConfig (chainlink-protos node-platform/chip-schemas.json). + topics=['cre', 'chip-demo', 'node-platform'] \ No newline at end of file diff --git a/core/scripts/cre/environment/environment/beholder.go b/core/scripts/cre/environment/environment/beholder.go index 7ed29938972..6f98fbe599b 100644 --- a/core/scripts/cre/environment/environment/beholder.go +++ b/core/scripts/cre/environment/environment/beholder.go @@ -37,56 +37,71 @@ type moduleInfo struct { Version string `json:"Version"` } -// getSchemaSetFromGoMod uses `go list` to extract the version/commit ref -// from the github.com/smartcontractkit/chainlink-protos/workflows/go dependency. -// It returns a SchemaSet with hardcoded values matching default.toml config. -func getSchemaSetFromGoMod(ctx context.Context) ([]chipingressset.SchemaSet, error) { - const targetModule = "github.com/smartcontractkit/chainlink-protos/workflows/go" +const chainlinkProtosGitURI = "https://github.com/smartcontractkit/chainlink-protos" - // Get the absolute path to the repository root (where go.mod is located) - repoRoot, err := filepath.Abs(relativePathToRepoRoot) - if err != nil { - return nil, errors.Wrap(err, "failed to get absolute path to repository root") - } - - // Use `go list -m -json` to get module information +// schemaCommitRefFromGoMod runs `go list -m -json` for targetModule from repoRoot and returns the ref for FetchAndRegisterProtos. +func schemaCommitRefFromGoMod(ctx context.Context, repoRoot, targetModule string) (ref string, rawVersion string, err error) { cmd := exec.CommandContext(ctx, "go", "list", "-m", "-json", targetModule) cmd.Dir = repoRoot - output, err := cmd.Output() - if err != nil { - return nil, errors.Wrapf(err, "failed to run 'go list -m -json %s'", targetModule) + output, cmdErr := cmd.Output() + if cmdErr != nil { + return "", "", errors.Wrapf(cmdErr, "failed to run 'go list -m -json %s'", targetModule) } - // Parse JSON output var modInfo moduleInfo - if err := json.Unmarshal(output, &modInfo); err != nil { - return nil, errors.Wrap(err, "failed to parse go list JSON output") + if unmarshalErr := json.Unmarshal(output, &modInfo); unmarshalErr != nil { + return "", "", errors.Wrap(unmarshalErr, "failed to parse go list JSON output") } if modInfo.Version == "" { - return nil, errors.Errorf("no version found for module %s", targetModule) + return "", "", errors.Errorf("no version found for module %s", targetModule) } - // Extract commit ref from version string - // Support various formats: - // 1. v1.2.1 -> use as-is - // 2. v0.0.0-20211026045750-20ab5afb07e3 -> extract short hash (20ab5afb07e3) - // 3. 2a35b54f48ae06be4cc81c768dc9cc9e92249571 -> full commit hash, use as-is - // 4. v0.0.0-YYYYMMDDHHMMSS-SHORTHASH -> extract short hash commitRef := extractCommitRef(modInfo.Version) + return commitRef, modInfo.Version, nil +} - framework.L.Info().Msgf("Extracted commit ref for %s: %s (from version: %s)", targetModule, commitRef, modInfo.Version) +// getSchemaSetFromGoMod resolves SchemaSets from chainlink-protos commits pinned in go.mod: +// - workflows (chip-cre.json) for CRE/workflow telemetry +// - node-platform (chip-schemas.json) for PluginRelayerConfigEmitter / common.v1.ChainPluginConfig +func getSchemaSetFromGoMod(ctx context.Context) ([]chipingressset.SchemaSet, error) { + const ( + workflowsModule = "github.com/smartcontractkit/chainlink-protos/workflows/go" + nodePlatformModule = "github.com/smartcontractkit/chainlink-protos/node-platform" + ) + + repoRoot, err := filepath.Abs(relativePathToRepoRoot) + if err != nil { + return nil, errors.Wrap(err, "failed to get absolute path to repository root") + } - // Return SchemaSet with hardcoded values from default.toml - schemaSet := chipingressset.SchemaSet{ - URI: "https://github.com/smartcontractkit/chainlink-protos", - Ref: commitRef, - SchemaDir: "workflows", - ConfigFile: "chip-cre.json", // file with mappings of protobufs to subjects, together with references + wfRef, wfVer, err := schemaCommitRefFromGoMod(ctx, repoRoot, workflowsModule) + if err != nil { + return nil, err + } + framework.L.Info().Msgf("Extracted commit ref for %s: %s (from version: %s)", workflowsModule, wfRef, wfVer) + + npRef, npVer, err := schemaCommitRefFromGoMod(ctx, repoRoot, nodePlatformModule) + if err != nil { + return nil, err } + framework.L.Info().Msgf("Extracted commit ref for %s: %s (from version: %s)", nodePlatformModule, npRef, npVer) - return []chipingressset.SchemaSet{schemaSet}, nil + return []chipingressset.SchemaSet{ + { + URI: chainlinkProtosGitURI, + Ref: wfRef, + SchemaDir: "workflows", + ConfigFile: "chip-cre.json", + }, + { + URI: chainlinkProtosGitURI, + Ref: npRef, + SchemaDir: "node-platform", + ConfigFile: "chip-schemas.json", + }, + }, nil } // extractCommitRef extracts a commit reference from various version formats diff --git a/core/scripts/go.mod b/core/scripts/go.mod index c377c629693..5551463ca74 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -46,7 +46,7 @@ require ( github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 github.com/smartcontractkit/chainlink-data-streams v0.1.12 diff --git a/core/scripts/go.sum b/core/scripts/go.sum index d5d634c1245..2ce2ac969fa 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1634,8 +1634,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/core/services/beholder/durable_emitter_load_test.go b/core/services/beholder/durable_emitter_load_test.go index 0bc09e8aee2..9c18094b786 100644 --- a/core/services/beholder/durable_emitter_load_test.go +++ b/core/services/beholder/durable_emitter_load_test.go @@ -334,7 +334,7 @@ func TestFullStack_SustainedThroughput(t *testing.T) { defer em.Close() const ( - totalEvents = 1000 + totalEvents = 100000 concurrency = 10 ) @@ -719,6 +719,8 @@ func (s *emitLatencyStats) sum() time.Duration { type pipelineDeliveryStats struct { immPub, immDel, batchPub, batchDel emitLatencyStats immPubErr, batchPubErr atomic.Int64 + // batchPubEventErrs is the sum of event counts for each failed retransmit Publish (1 per failed RPC). + batchPubEventErrs atomic.Int64 } func newPipelineHooks(p *pipelineDeliveryStats) *beholder.DurableEmitterHooks { @@ -732,9 +734,10 @@ func newPipelineHooks(p *pipelineDeliveryStats) *beholder.DurableEmitterHooks { OnImmediateDelete: func(d time.Duration, _ error) { p.immDel.record(d) }, - OnRetransmitBatchPublish: func(d time.Duration, _ int, err error) { + OnRetransmitBatchPublish: func(d time.Duration, eventCount int, err error) { if err != nil { p.batchPubErr.Add(1) + p.batchPubEventErrs.Add(int64(eventCount)) } p.batchPub.record(d) }, @@ -763,8 +766,8 @@ func logPipelineDeliverySummary(t *testing.T, pipe *pipelineDeliveryStats) { bpN := pipe.batchPub.count() if bpN > 0 { - t.Logf("Pipeline — retransmit PublishBatch: batches=%d errs=%d p50=%.3f ms mean=%.3f ms | delete-loop batches=%d mean_loop=%.3f ms", - bpN, pipe.batchPubErr.Load(), + t.Logf("Pipeline — retransmit Publish (serial): rpcs=%d rpc_errs=%d evt_errs=%d p50=%.3f ms mean=%.3f ms | delete-hook_calls=%d mean_loop=%.3f ms", + bpN, pipe.batchPubErr.Load(), pipe.batchPubEventErrs.Load(), durMs(pipe.batchPub.percentile(0.50)), durMs(pipe.batchPub.mean()), pipe.batchDel.count(), durMs(pipe.batchDel.mean())) } @@ -793,6 +796,23 @@ type rateLimitEmitResult struct { // maxQueuePayloadBytes is the maximum observed sum(octet_length(payload)) for // rows still in the queue (serialized CloudEvent bytes stored in BYTEA). maxQueuePayloadBytes int64 + // ImmPublishFails is the count of failed immediate Publish RPCs in this window (one event each; needs retransmit). + ImmPublishFails int64 + // BatchPublishFailEvents is the sum of batch sizes for failed PublishBatch calls in this window. + BatchPublishFailEvents int64 +} + +// formatPubFailColumn formats publish-failure counts for result tables (8-char column). +// If there were failed batches, shows "imm+batchEv" when it fits, else "imm+…". +func formatPubFailColumn(imm, batchEv int64) string { + if batchEv == 0 { + return fmt.Sprintf("%-8d", imm) + } + combo := fmt.Sprintf("%d+%d", imm, batchEv) + if len(combo) <= 8 { + return fmt.Sprintf("%-8s", combo) + } + return fmt.Sprintf("%-8s", fmt.Sprintf("%d+..", imm)) } func bumpMaxQueueDepth(maxQ *atomic.Int64, c int64) { @@ -839,6 +859,8 @@ func formatQueueKB(payloadBytes int64) string { // If maxQueueDB is non-nil, polls cre.chip_durable_events during the emit window to // record peak backlog (async publish may lag inserts). // If progressLabel is non-empty, prints a live progress bar and emit count to stdout every 500ms. +// If pipe is non-nil (same *pipelineDeliveryStats wired via cfg.Hooks), ImmPublishFails and +// BatchPublishFailEvents are deltas for this emit window only. func runRateLimitedEmit( ctx context.Context, t testing.TB, @@ -849,9 +871,16 @@ func runRateLimitedEmit( payloadSize int, progressLabel string, maxQueueDB *sqlx.DB, + pipe *pipelineDeliveryStats, ) *rateLimitEmitResult { t.Helper() + var imm0, batchEv0 int64 + if pipe != nil { + imm0 = pipe.immPubErr.Load() + batchEv0 = pipe.batchPubEventErrs.Load() + } + stats := &emitLatencyStats{} var maxQ, maxPayloadBytes atomic.Int64 var emitCount atomic.Int64 @@ -960,18 +989,23 @@ func runRateLimitedEmit( bumpMaxQueuePayloadBytes(&maxPayloadBytes, b) } } - return &rateLimitEmitResult{ + res := &rateLimitEmitResult{ stats: stats, maxQueueDepth: maxQ.Load(), maxQueuePayloadBytes: maxPayloadBytes.Load(), } + if pipe != nil { + res.ImmPublishFails = pipe.immPubErr.Load() - imm0 + res.BatchPublishFailEvents = pipe.batchPubEventErrs.Load() - batchEv0 + } + return res } // TestTPS_RampUp tests the durable emitter at increasing TPS levels to find // the throughput ceiling. Each level gets its own DurableEmitter to avoid // carry-over. Measures achieved rate, Emit() latency, and queue depth. func TestTPS_RampUp(t *testing.T) { - levels := []int{100, 500, 1000, 2000, 5000, 10000} + levels := []int{100, 500, 1000, 2000} testStart := time.Now() tpsRampMu.Lock() @@ -980,12 +1014,12 @@ func TestTPS_RampUp(t *testing.T) { t.Logf("TPS ramp-up: levels=%v (each level: fresh DB + server + emitter)", levels) - t.Logf("╔════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗") - t.Logf("║ TPS RAMP-UP TEST RESULTS ║") - t.Logf("╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣") - t.Logf("║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Server ║ Q max ║ Q end ║ Q max ║ Q end ║") - t.Logf("║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ recv* ║ (rows) ║ (rows) ║ (KB)* ║ (KB)* ║") - t.Logf("╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣") + t.Logf("╔══════════════════════════════════════════════════════════════════════════════════════════════╗") + t.Logf("║ TPS RAMP-UP TEST RESULTS ║") + t.Logf("╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣") + t.Logf("║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Pub fail ║ Q max ║ Q end ║ Q max ║") + t.Logf("║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ (retry)* ║ (rows) ║ (rows) ║ (KB)* ║") + t.Logf("╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣") for _, targetTPS := range levels { t.Run(fmt.Sprintf("%d_tps", targetTPS), func(t *testing.T) { @@ -993,7 +1027,7 @@ func TestTPS_RampUp(t *testing.T) { t.Logf(">>> level %d TPS: provisioning direct DB + Chip endpoint...", targetTPS) db := directDB(t) - srv, client := startChipIngressOrMock(t) + _, client := startChipIngressOrMock(t) store := beholdersvc.NewPgDurableEventStore(db) cfg := beholder.DefaultDurableEmitterConfig() @@ -1009,12 +1043,12 @@ func TestTPS_RampUp(t *testing.T) { em.Start(ctx) defer em.Close() - const duration = 2 * time.Minute + const duration = 1 * time.Minute const concurrency = 20 t.Logf(">>> level %d TPS: emitting for %s @ concurrency=%d (progress bar on stdout)", targetTPS, duration, concurrency) emitRes := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, - fmt.Sprintf("ramp_up/%d_tps", targetTPS), db) + fmt.Sprintf("ramp_up/%d_tps", targetTPS), db, pipe) stats := emitRes.stats emitPhase := time.Since(levelStart) t.Logf(">>> level %d TPS: emit phase wall time %s", targetTPS, emitPhase.Round(time.Millisecond)) @@ -1029,19 +1063,21 @@ func TestTPS_RampUp(t *testing.T) { achieved := float64(stats.count()) / duration.Seconds() p50 := stats.percentile(0.50) p99 := stats.percentile(0.99) - serverCol := formatMockServerEvents(srv) - queueEnd, queueEndBytes, err := queuePayloadStats(db, ctx) + queueEnd, _, err := queuePayloadStats(db, ctx) require.NoError(t, err) totalEmits := stats.count() - rowLine := fmt.Sprintf("║ %-9d ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-8d ║ %-8s ║ %-8d ║ %-8d ║ %-8s ║ %-8s ║", + if stats.failures.Load() > 0 { + t.Logf(">>> level %d TPS: Emit() (DB insert) failures: %d", targetTPS, stats.failures.Load()) + } + rowLine := fmt.Sprintf("║ %-9d ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-8s ║ %-8d ║ %-8d ║ %-8s ║", targetTPS, achieved, totalEmits, float64(p50.Microseconds())/1000.0, float64(p99.Microseconds())/1000.0, - stats.failures.Load(), - serverCol, emitRes.maxQueueDepth, queueEnd, - formatQueueKB(emitRes.maxQueuePayloadBytes), formatQueueKB(queueEndBytes)) + formatPubFailColumn(emitRes.ImmPublishFails, emitRes.BatchPublishFailEvents), + emitRes.maxQueueDepth, queueEnd, + formatQueueKB(emitRes.maxQueuePayloadBytes)) t.Log(rowLine) tpsRampMu.Lock() @@ -1050,26 +1086,26 @@ func TestTPS_RampUp(t *testing.T) { }) } - t.Logf("╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") - t.Logf("* Q max/end rows: peak & final row counts. Q max/end KB: sum(octet_length(payload)) for queued rows / 1024 " + - "(serialized event bytes; excludes index & heap overhead). Sampled ~50ms during emit; Q end after 2s settle. " + - "Server recv: mock; N/A with real Chip.") + t.Logf("╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") + t.Logf("* Pub fail: immediate Publish RPC errors (events need retransmit). a+b = a immediate fails + b events in failed PublishBatch. " + + "Emit() insert failures are logged per level if non-zero.") + t.Logf("* Q max / Q end: peak & final row counts (polled ~50ms; Q end after settle). Q max KB* = sum(octet_length(payload))/1024 for queued rows.") t.Logf("TestTPS_RampUp finished in %s", time.Since(testStart).Round(time.Millisecond)) summaryLines := []string{ fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), - "╔════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗", - "║ TPS RAMP-UP TEST RESULTS ║", - "╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣", - "║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Server ║ Q max ║ Q end ║ Q max ║ Q end ║", - "║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ recv* ║ (rows) ║ (rows) ║ (KB)* ║ (KB)* ║", - "╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣", + "╔══════════════════════════════════════════════════════════════════════════════════════════════╗", + "║ TPS RAMP-UP TEST RESULTS ║", + "╠═══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣", + "║ Target ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Pub fail ║ Q max ║ Q end ║ Q max ║", + "║ TPS ║ TPS ║ (success) ║ (ms) ║ (ms) ║ (retry)* ║ (rows) ║ (rows) ║ (KB)* ║", + "╠═══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣", } tpsRampMu.Lock() summaryLines = append(summaryLines, tpsRampRows...) tpsRampMu.Unlock() - summaryLines = append(summaryLines, "╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝", - "* Q KB = payload column bytes (sum octet_length) / 1024; excludes table/index overhead. Server recv: mock-only.") + summaryLines = append(summaryLines, "╚═══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝", + "* Q max KB* = sum(octet_length(payload))/1024 for queued rows (see test log footnotes).") appendTPSummaryBlock("TestTPS_RampUp", summaryLines...) } @@ -1081,7 +1117,7 @@ func TestTPS_Sustained1k(t *testing.T) { t.Logf("TestTPS_Sustained1k: provisioning DB + Chip server + emitter...") db := directDB(t) - srv, client := startChipIngressOrMock(t) + _, client := startChipIngressOrMock(t) store := beholdersvc.NewPgDurableEventStore(db) cfg := beholder.DefaultDurableEmitterConfig() @@ -1105,7 +1141,7 @@ func TestTPS_Sustained1k(t *testing.T) { t.Logf("Emit phase: target=%d TPS for %s @ concurrency=%d (progress bar on stdout)", targetTPS, duration, concurrency) emitStart := time.Now() - emitRes := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, "sustained_1k", db) + emitRes := runRateLimitedEmit(ctx, t, em, targetTPS, duration, concurrency, 256, "sustained_1k", db, pipe) stats := emitRes.stats achievedTPS := float64(stats.count()) / duration.Seconds() @@ -1131,12 +1167,12 @@ func TestTPS_Sustained1k(t *testing.T) { t.Logf("║ Duration: %-6s ║", duration) t.Logf("║ Total emitted: %-6d ║", stats.count()) t.Logf("║ Achieved TPS: %-6.0f ║", achievedTPS) - t.Logf("║ Emit failures: %-6d ║", stats.failures.Load()) + t.Logf("║ Pub fail (retry): %-8s (1st+batch ev) ║", formatPubFailColumn(emitRes.ImmPublishFails, emitRes.BatchPublishFailEvents)) + t.Logf("║ Emit insert fail: %-6d (DB path) ║", stats.failures.Load()) t.Logf("║ Emit p50 latency: %-6.2f ms ║", float64(stats.percentile(0.50).Microseconds())/1000.0) t.Logf("║ Emit p99 latency: %-6.2f ms ║", float64(stats.percentile(0.99).Microseconds())/1000.0) t.Logf("║ Queue max (emit): %-6d rows ║", emitRes.maxQueueDepth) t.Logf("║ Queue max (emit): %-10s KB payload* ║", formatQueueKB(emitRes.maxQueuePayloadBytes)) - t.Logf("║ Server received: %-6s (mock event count) ║", formatMockServerEvents(srv)) t.Logf("║ Drain time: %-6s ║", drainTime.Round(time.Millisecond)) t.Logf("╚════════════════════════════════════════════════════╝") t.Logf("* Queue KB = sum(octet_length(payload))/1024 for queued rows (excludes index/heap overhead).") @@ -1145,11 +1181,12 @@ func TestTPS_Sustained1k(t *testing.T) { appendTPSummaryBlock("TestTPS_Sustained1k", fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), fmt.Sprintf("emit phase: %s", time.Since(emitStart).Round(time.Millisecond)), - fmt.Sprintf("target TPS: %d, achieved: %.0f, failures: %d", targetTPS, achievedTPS, stats.failures.Load()), + fmt.Sprintf("target TPS: %d, achieved: %.0f, pub_fail imm/batch_ev: %d/%d, emit_insert_fail: %d", + targetTPS, achievedTPS, emitRes.ImmPublishFails, emitRes.BatchPublishFailEvents, stats.failures.Load()), fmt.Sprintf("emit p50/p99 ms: %.2f / %.2f", float64(stats.percentile(0.50).Microseconds())/1000.0, float64(stats.percentile(0.99).Microseconds())/1000.0), fmt.Sprintf("queue max during emit: %d rows, %s KB payload (sum octet_length/1024)", emitRes.maxQueueDepth, formatQueueKB(emitRes.maxQueuePayloadBytes)), fmt.Sprintf("pipeline imm Publish/Delete means ms: %.3f / %.3f (n=%d/%d)", durMs(pipe.immPub.mean()), durMs(pipe.immDel.mean()), pipe.immPub.count(), pipe.immDel.count()), - fmt.Sprintf("server events: %s, drain time: %s", formatMockServerEvents(srv), drainTime.Round(time.Millisecond)), + fmt.Sprintf("drain time: %s", drainTime.Round(time.Millisecond)), ) assert.GreaterOrEqual(t, achievedTPS, float64(targetTPS)*0.9, @@ -1177,6 +1214,8 @@ func TestTPS_1k_WithChipOutage(t *testing.T) { cfg.RetransmitInterval = 1 * time.Second cfg.RetransmitAfter = 2 * time.Second cfg.RetransmitBatchSize = 500 + outagePipe := &pipelineDeliveryStats{} + cfg.Hooks = newPipelineHooks(outagePipe) em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) require.NoError(t, err) @@ -1191,7 +1230,7 @@ func TestTPS_1k_WithChipOutage(t *testing.T) { // Phase 1: 15s of healthy operation at 1k TPS. t.Logf("Phase 1: Healthy — emitting at %d TPS for 15s...", targetTPS) p1Start := time.Now() - phase1Res := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase1_healthy", db) + phase1Res := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase1_healthy", db, outagePipe) phase1Stats := phase1Res.stats t.Logf("Phase 1 emit finished in %s", time.Since(p1Start).Round(time.Millisecond)) time.Sleep(3 * time.Second) // let pipeline drain @@ -1204,20 +1243,20 @@ func TestTPS_1k_WithChipOutage(t *testing.T) { srv.setBatchErr(status.Error(codes.Unavailable, "chip down")) p2Start := time.Now() - phase2Res := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase2_chip_down", db) + phase2Res := runRateLimitedEmit(ctx, t, em, targetTPS, 15*time.Second, concurrency, 256, "outage/phase2_chip_down", db, outagePipe) phase2Stats := phase2Res.stats t.Logf("Phase 2 emit finished in %s", time.Since(p2Start).Round(time.Millisecond)) // Queue at end of outage phase (for drain math) + peak sampled during phase 2 emit window. - queueDuringOutage, queueDuringOutageBytes, err := queuePayloadStats(db, ctx) + queueDuringOutage, _, err := queuePayloadStats(db, ctx) require.NoError(t, err) - t.Logf("Phase 2 done: %d events emitted (%.0f TPS), queue end: %d rows / %s KB payload*, queue max (emit): %d rows / %s KB*", + t.Logf("Phase 2 done: %d events emitted (%.0f TPS), queue end: %d rows, queue max (emit): %d rows / %s KB*", phase2Stats.count(), float64(phase2Stats.count())/15.0, - queueDuringOutage, formatQueueKB(queueDuringOutageBytes), + queueDuringOutage, phase2Res.maxQueueDepth, formatQueueKB(phase2Res.maxQueuePayloadBytes)) assert.Equal(t, int64(0), phase2Stats.failures.Load(), - "Emit must not fail during Chip outage — DB insert should still work") + "Emit() must not fail during Chip outage — DB insert should still work") // Phase 3: Chip recovers. Stop emitting. Measure drain. t.Logf("Phase 3: Chip RECOVERED — measuring drain...") @@ -1239,26 +1278,27 @@ func TestTPS_1k_WithChipOutage(t *testing.T) { t.Logf("║ Phase 1 (healthy): ║") t.Logf("║ Emitted: %-6d events ║", phase1Stats.count()) t.Logf("║ p99 latency: %-6.2f ms ║", float64(phase1Stats.percentile(0.99).Microseconds())/1000.0) + t.Logf("║ Pub fail (retry): %-8s ║", formatPubFailColumn(phase1Res.ImmPublishFails, phase1Res.BatchPublishFailEvents)) t.Logf("║ Queue max (emit): %-6d rows / %-8s KB* ║", phase1Res.maxQueueDepth, formatQueueKB(phase1Res.maxQueuePayloadBytes)) t.Logf("║ Phase 2 (Chip down): ║") t.Logf("║ Emitted: %-6d events ║", phase2Stats.count()) t.Logf("║ p99 latency: %-6.2f ms ║", float64(phase2Stats.percentile(0.99).Microseconds())/1000.0) - t.Logf("║ Emit failures: %-6d ║", phase2Stats.failures.Load()) + t.Logf("║ Pub fail (retry): %-8s (Publish RPC errors) ║", formatPubFailColumn(phase2Res.ImmPublishFails, phase2Res.BatchPublishFailEvents)) + t.Logf("║ Emit insert fail: %-6d ║", phase2Stats.failures.Load()) t.Logf("║ Queue max (emit): %-6d rows / %-8s KB* ║", phase2Res.maxQueueDepth, formatQueueKB(phase2Res.maxQueuePayloadBytes)) - t.Logf("║ Queue end: %-6d rows / %-8s KB* ║", queueDuringOutage, formatQueueKB(queueDuringOutageBytes)) + t.Logf("║ Queue end: %-6d rows ║", queueDuringOutage) t.Logf("║ Phase 3 (recovery): ║") t.Logf("║ Drain time: %-6s ║", drainTime.Round(time.Millisecond)) t.Logf("║ Drain rate: %-6.0f events/sec ║", drainRate) - t.Logf("║ Server received: %-6d total ║", srv.totalEvents.Load()) t.Logf("╚════════════════════════════════════════════════════╝") - t.Logf("* KB = sum(octet_length(payload))/1024 for queued rows (excludes index/heap overhead).") + t.Logf("* Queue max KB = sum(octet_length(payload))/1024 for queued rows (excludes index/heap overhead).") t.Logf("TestTPS_1k_WithChipOutage finished in %s", time.Since(testStart).Round(time.Millisecond)) appendTPSummaryBlock("TestTPS_1k_WithChipOutage", fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), - fmt.Sprintf("phase1 events: %d, phase2 events: %d, queue end: %d rows / %s KB, phase2 queue max: %d rows / %s KB", - phase1Stats.count(), phase2Stats.count(), queueDuringOutage, formatQueueKB(queueDuringOutageBytes), phase2Res.maxQueueDepth, formatQueueKB(phase2Res.maxQueuePayloadBytes)), - fmt.Sprintf("drain time: %s, drain rate: %.0f ev/s, server total: %d", drainTime.Round(time.Millisecond), drainRate, srv.totalEvents.Load()), + fmt.Sprintf("phase1 events: %d, phase2 events: %d, queue end: %d rows, phase2 queue max: %d rows / %s KB", + phase1Stats.count(), phase2Stats.count(), queueDuringOutage, phase2Res.maxQueueDepth, formatQueueKB(phase2Res.maxQueuePayloadBytes)), + fmt.Sprintf("drain time: %s, drain rate: %.0f ev/s", drainTime.Round(time.Millisecond), drainRate), ) } @@ -1284,12 +1324,12 @@ func TestTPS_PayloadSizeScaling(t *testing.T) { const payloadDuration = 15 * time.Second - t.Logf("╔════════════════════════════════════════════════════════════════════════════════════════════════════════════╗") - t.Logf("║ 1k TPS × PAYLOAD SIZE SCALING ║") - t.Logf("╠══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣") - t.Logf("║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Q max ║ Q end ║ Q max ║ Q end ║") - t.Logf("║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ (rows) ║ (rows) ║ (KB)* ║ (KB)* ║") - t.Logf("╠══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣") + t.Logf("╔════════════════════════════════════════════════════════════════════════════════════════════╗") + t.Logf("║ 1k TPS × PAYLOAD SIZE SCALING ║") + t.Logf("╠══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣") + t.Logf("║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Pub fail ║ Q max ║ Q end ║ Q max ║") + t.Logf("║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ (retry)* ║ (rows) ║ (rows) ║ (KB)* ║") + t.Logf("╠══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣") for _, s := range sizes { t.Run(s.name, func(t *testing.T) { @@ -1302,6 +1342,8 @@ func TestTPS_PayloadSizeScaling(t *testing.T) { cfg.RetransmitInterval = 1 * time.Second cfg.RetransmitAfter = 3 * time.Second cfg.RetransmitBatchSize = 500 + pipe := &pipelineDeliveryStats{} + cfg.Hooks = newPipelineHooks(pipe) em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Nop()) require.NoError(t, err) @@ -1315,21 +1357,25 @@ func TestTPS_PayloadSizeScaling(t *testing.T) { t.Logf(">>> payload %s: emitting %d TPS for %s", s.name, targetTPS, payloadDuration) emitRes := runRateLimitedEmit(ctx, t, em, targetTPS, payloadDuration, concurrency, s.size, - fmt.Sprintf("payload/%s", s.name), db) + fmt.Sprintf("payload/%s", s.name), db, pipe) stats := emitRes.stats - queueEnd, queueEndBytes, err := queuePayloadStats(db, ctx) + queueEnd, _, err := queuePayloadStats(db, ctx) require.NoError(t, err) achieved := float64(stats.count()) / payloadDuration.Seconds() totalEmits := stats.count() - rowLine := fmt.Sprintf("║ %-8s ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-8d ║ %-8d ║ %-8d ║ %-8s ║ %-8s ║", + if stats.failures.Load() > 0 { + t.Logf(">>> payload %s: Emit() insert failures: %d", s.name, stats.failures.Load()) + } + rowLine := fmt.Sprintf("║ %-8s ║ %-8.0f ║ %-11d ║ %-8.2f ║ %-8.2f ║ %-8s ║ %-8d ║ %-8d ║ %-8s ║", s.name, achieved, totalEmits, float64(stats.percentile(0.50).Microseconds())/1000.0, float64(stats.percentile(0.99).Microseconds())/1000.0, - stats.failures.Load(), emitRes.maxQueueDepth, queueEnd, - formatQueueKB(emitRes.maxQueuePayloadBytes), formatQueueKB(queueEndBytes)) + formatPubFailColumn(emitRes.ImmPublishFails, emitRes.BatchPublishFailEvents), + emitRes.maxQueueDepth, queueEnd, + formatQueueKB(emitRes.maxQueuePayloadBytes)) t.Log(rowLine) tpsPayloadMu.Lock() @@ -1338,22 +1384,22 @@ func TestTPS_PayloadSizeScaling(t *testing.T) { }) } - t.Logf("╚══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") - t.Logf("Total emits = successful Emit() calls in each %s window. Q KB* = sum(octet_length(payload))/1024 (excludes index overhead).", payloadDuration) + t.Logf("╚══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") + t.Logf("* Pub fail: failed Publish / PublishBatch (see ramp test footnote). Q max KB* = sum(octet_length(payload))/1024. Total emits = successful Emit() per %s.", payloadDuration) t.Logf("TestTPS_PayloadSizeScaling finished in %s", time.Since(testStart).Round(time.Millisecond)) summaryLines := []string{ fmt.Sprintf("total wall clock: %s", time.Since(testStart).Round(time.Millisecond)), - "╔════════════════════════════════════════════════════════════════════════════════════════════════════════════╗", - "║ 1k TPS × PAYLOAD SIZE SCALING ║", - "╠══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣", - "║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Failures ║ Q max ║ Q end ║ Q max ║ Q end ║", - "║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ ║ (rows) ║ (rows) ║ (KB)* ║ (KB)* ║", - "╠══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣", + "╔════════════════════════════════════════════════════════════════════════════════════════════╗", + "║ 1k TPS × PAYLOAD SIZE SCALING ║", + "╠══════════╦══════════╦═════════════╦══════════╦══════════╦══════════╦══════════╦══════════╦══════════╣", + "║ Payload ║ Achieved ║ Total emits ║ Emit p50 ║ Emit p99 ║ Pub fail ║ Q max ║ Q end ║ Q max ║", + "║ Size ║ TPS ║ (success) ║ (ms) ║ (ms) ║ (retry)* ║ (rows) ║ (rows) ║ (KB)* ║", + "╠══════════╬══════════╬═════════════╬══════════╬══════════╬══════════╬══════════╬══════════╬══════════╣", } tpsPayloadMu.Lock() summaryLines = append(summaryLines, tpsPayloadRows...) tpsPayloadMu.Unlock() - summaryLines = append(summaryLines, "╚══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") + summaryLines = append(summaryLines, "╚══════════╩══════════╩═════════════╩══════════╩══════════╩══════════╩══════════╩══════════╩══════════╝") appendTPSummaryBlock("TestTPS_PayloadSizeScaling", summaryLines...) } diff --git a/core/services/beholder/durable_event_store_orm.go b/core/services/beholder/durable_event_store_orm.go index 1f393cc256e..ec48b2ae49d 100644 --- a/core/services/beholder/durable_event_store_orm.go +++ b/core/services/beholder/durable_event_store_orm.go @@ -16,7 +16,10 @@ type PgDurableEventStore struct { ds sqlutil.DataSource } -var _ beholder.DurableEventStore = (*PgDurableEventStore)(nil) +var ( + _ beholder.DurableEventStore = (*PgDurableEventStore)(nil) + _ beholder.DurableQueueObserver = (*PgDurableEventStore)(nil) +) func NewPgDurableEventStore(ds sqlutil.DataSource) *PgDurableEventStore { return &PgDurableEventStore{ds: ds} @@ -84,3 +87,43 @@ SELECT count(*) FROM deleted` } return count, nil } + +type chipDurableQueueAgg struct { + Cnt int64 `db:"cnt"` + PayloadSum int64 `db:"payload_sum"` + MinCreated *time.Time `db:"min_created"` +} + +// ObserveDurableQueue implements beholder.DurableQueueObserver for queue depth / age gauges. +func (s *PgDurableEventStore) ObserveDurableQueue(ctx context.Context, eventTTL, nearExpiryLead time.Duration) (beholder.DurableQueueStats, error) { + const qAgg = ` +SELECT + count(*)::bigint AS cnt, + coalesce(sum(octet_length(payload)), 0)::bigint AS payload_sum, + min(created_at) AS min_created +FROM ` + chipDurableEventsTable + + var row chipDurableQueueAgg + if err := s.ds.GetContext(ctx, &row, qAgg); err != nil { + return beholder.DurableQueueStats{}, fmt.Errorf("durable queue aggregate: %w", err) + } + var st beholder.DurableQueueStats + st.Depth = row.Cnt + st.PayloadBytes = row.PayloadSum + if row.MinCreated != nil { + st.OldestPendingAge = time.Since(*row.MinCreated) + } + if eventTTL > 0 && nearExpiryLead > 0 && nearExpiryLead < eventTTL { + ttlSec := int64(eventTTL.Round(time.Second) / time.Second) + leadSec := int64(nearExpiryLead.Round(time.Second) / time.Second) + const qNear = ` +SELECT count(*)::bigint +FROM ` + chipDurableEventsTable + ` +WHERE created_at >= now() - ($1::bigint * interval '1 second') + AND created_at < now() - (($1::bigint - $2::bigint) * interval '1 second')` + if err := s.ds.GetContext(ctx, &st.NearTTLCount, qNear, ttlSec, leadSec); err != nil { + return beholder.DurableQueueStats{}, fmt.Errorf("durable queue near-ttl: %w", err) + } + } + return st, nil +} diff --git a/core/services/beholder/durable_event_store_orm_test.go b/core/services/beholder/durable_event_store_orm_test.go index 3cdd8789994..d75363ce3db 100644 --- a/core/services/beholder/durable_event_store_orm_test.go +++ b/core/services/beholder/durable_event_store_orm_test.go @@ -93,6 +93,24 @@ func TestPgDurableEventStore_DeleteExpired(t *testing.T) { assert.Equal(t, int64(1), deleted) } +func TestPgDurableEventStore_ObserveDurableQueue(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + st, err := store.ObserveDurableQueue(ctx, time.Hour, time.Minute) + require.NoError(t, err) + assert.Equal(t, int64(0), st.Depth) + + _, err = store.Insert(ctx, []byte("payload-bytes")) + require.NoError(t, err) + st, err = store.ObserveDurableQueue(ctx, time.Hour, time.Minute) + require.NoError(t, err) + assert.Equal(t, int64(1), st.Depth) + assert.Equal(t, int64(len("payload-bytes")), st.PayloadBytes) + assert.Positive(t, st.OldestPendingAge) +} + // ---------- Benchmarks ---------- func randomPayload(size int) []byte { diff --git a/core/services/chainlink/application.go b/core/services/chainlink/application.go index cbce2c2cad2..61eba7250a9 100644 --- a/core/services/chainlink/application.go +++ b/core/services/chainlink/application.go @@ -383,7 +383,7 @@ func NewApplication(ctx context.Context, opts ApplicationOpts) (Application, err // Wire DurableEmitter for persistent chip ingress delivery when enabled. if cfg.Telemetry().DurableEmitterEnabled() && cfg.Telemetry().ChipIngressEndpoint() != "" { - if err := setupDurableEmitter(ctx, opts.DS, globalLogger); err != nil { + if err := setupDurableEmitter(ctx, opts.DS, globalLogger, cfg.Telemetry()); err != nil { globalLogger.Warnw("Failed to set up durable emitter, continuing without it", "error", err) } } @@ -1273,7 +1273,7 @@ func (app *ChainlinkApplication) DeleteLogPollerDataAfter(ctx context.Context, c // setupDurableEmitter replaces the global beholder emitter with a DurableEmitter // backed by Postgres. Events are persisted before async gRPC delivery, surviving // node restarts and chip ingress outages. -func setupDurableEmitter(ctx context.Context, ds sqlutil.DataSource, lggr logger.SugaredLogger) error { +func setupDurableEmitter(ctx context.Context, ds sqlutil.DataSource, lggr logger.SugaredLogger, telem config.Telemetry) error { client := beholder.GetClient() if client == nil { return fmt.Errorf("beholder client not initialized") @@ -1286,6 +1286,10 @@ func setupDurableEmitter(ctx context.Context, ds sqlutil.DataSource, lggr logger pgStore := beholdersvc.NewPgDurableEventStore(ds) durableCfg := beholder.DefaultDurableEmitterConfig() + durableCfg.Metrics = &beholder.DurableEmitterMetricsConfig{ + RecordProcessStats: true, + } + durableCfg.PersistCloudEventSources = telem.DurableEmitterPersistSources() durableEmitter, err := beholder.NewDurableEmitter(pgStore, chipClient, durableCfg, lggr) if err != nil { return fmt.Errorf("failed to create durable emitter: %w", err) @@ -1302,7 +1306,15 @@ func setupDurableEmitter(ctx context.Context, ds sqlutil.DataSource, lggr logger durableEmitter.Start(ctx) client.Emitter = dualEmitter - lggr.Infow("Durable emitter enabled — chip events will be persisted to Postgres") + switch { + case durableCfg.PersistCloudEventSources == nil: + lggr.Infow("Durable emitter enabled — every CloudEvent source may be persisted (no source filter)") + case len(durableCfg.PersistCloudEventSources) == 0: + lggr.Infow("Durable emitter enabled — durable persistence disabled for all sources (best-effort Chip publish only)") + default: + lggr.Infow("Durable emitter enabled — durable persistence restricted by CloudEvent source", + "PersistCloudEventSources", durableCfg.PersistCloudEventSources) + } return nil } diff --git a/core/services/chainlink/config_telemetry.go b/core/services/chainlink/config_telemetry.go index cc2e50c517c..2ec398eaf50 100644 --- a/core/services/chainlink/config_telemetry.go +++ b/core/services/chainlink/config_telemetry.go @@ -3,14 +3,25 @@ package chainlink import ( "fmt" "maps" + "slices" + "strings" "time" "go.uber.org/zap/zapcore" + "github.com/smartcontractkit/chainlink/v2/core/config" "github.com/smartcontractkit/chainlink/v2/core/config/toml" "github.com/smartcontractkit/chainlink/v2/core/static" ) +var _ config.Telemetry = (*telemetryConfig)(nil) + +// Default CloudEvent sources allowed in the durable Chip queue when [Telemetry.DurableEmitterPersistSources] +// is unset. Align with topics/schemas configured for Chip in each environment; extend via TOML when needed. +var defaultDurableEmitterPersistSources = []string{ + "platform", "node-platform", "chip-demo", "data-feeds", "cre", +} + const defaultHeartbeatInterval = 1 * time.Second type telemetryConfig struct { @@ -104,6 +115,18 @@ func (b *telemetryConfig) DurableEmitterEnabled() bool { return *b.s.DurableEmitterEnabled } +func (b *telemetryConfig) DurableEmitterPersistSources() []string { + if b.s.DurableEmitterPersistSources == nil { + return slices.Clone(defaultDurableEmitterPersistSources) + } + for _, s := range b.s.DurableEmitterPersistSources { + if strings.TrimSpace(s) == "*" { + return nil + } + } + return slices.Clone(b.s.DurableEmitterPersistSources) +} + func (b *telemetryConfig) HeartbeatInterval() time.Duration { if b.s.HeartbeatInterval == nil || b.s.HeartbeatInterval.Duration() <= 0 { return defaultHeartbeatInterval diff --git a/deployment/go.mod b/deployment/go.mod index da9ad763038..edb2d022d0a 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -44,7 +44,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/deployment/go.sum b/deployment/go.sum index ea1aee515b9..f7d12f14a7b 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1387,8 +1387,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/go.mod b/go.mod index 4107fe6f0f2..20cb95f813b 100644 --- a/go.mod +++ b/go.mod @@ -85,7 +85,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5 github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 github.com/smartcontractkit/chainlink-data-streams v0.1.12 diff --git a/go.sum b/go.sum index 9a41f24f570..99eb1619459 100644 --- a/go.sum +++ b/go.sum @@ -1235,8 +1235,8 @@ github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5/go.mod h1:xtZNi6pOKdC3sLvokDvXOhgHzT+cyBqH/gWwvxTxqrg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index 3ae89dd322a..b3f02179098 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -50,7 +50,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/integration-tests/go.sum b/integration-tests/go.sum index 7f511f14fc3..c882cfa29ce 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1626,8 +1626,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index 4bf8129f4e3..fdcab8a4654 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -31,7 +31,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20260119171452-39c98c3b33cd diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index abcc6d62f5c..f5e45afa05b 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1604,8 +1604,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/lib/go.mod b/system-tests/lib/go.mod index a28f3ea8a13..cf5d366a5e7 100644 --- a/system-tests/lib/go.mod +++ b/system-tests/lib/go.mod @@ -32,7 +32,7 @@ require ( github.com/sethvargo/go-retry v0.3.0 github.com/smartcontractkit/chain-selectors v1.0.97 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/system-tests/lib/go.sum b/system-tests/lib/go.sum index fc9f69bfe3e..6ecf6043e60 100644 --- a/system-tests/lib/go.sum +++ b/system-tests/lib/go.sum @@ -1597,8 +1597,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/tests/go.mod b/system-tests/tests/go.mod index 9e83527f6d5..e21d45d9c45 100644 --- a/system-tests/tests/go.mod +++ b/system-tests/tests/go.mod @@ -54,7 +54,7 @@ require ( github.com/rs/zerolog v1.34.0 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chain-selectors v1.0.97 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.12 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/system-tests/tests/go.sum b/system-tests/tests/go.sum index 50bd9873589..86fad2e6e40 100644 --- a/system-tests/tests/go.sum +++ b/system-tests/tests/go.sum @@ -1781,8 +1781,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db h1:L38sZS8nfmgBwTXV2hO82FoqVf8ajqRSZQfH4ThXE+Q= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325141849-f588de91c2db/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/system-tests/tests/smoke/cre/cre_suite_test.go b/system-tests/tests/smoke/cre/cre_suite_test.go index df615e4e9e2..0da41d1b17e 100644 --- a/system-tests/tests/smoke/cre/cre_suite_test.go +++ b/system-tests/tests/smoke/cre/cre_suite_test.go @@ -231,7 +231,7 @@ func Test_CRE_V2_Beholder_Suite(t *testing.T) { } // TODO: Add tests to suite -func Test_CRE_V2_DurableEmitter(t *testing.T) { +func Test_CRE_V2_DurableEmitter_Simple(t *testing.T) { testEnv := t_helpers.SetupTestEnvironmentWithConfig(t, t_helpers.GetDefaultTestConfig(t)) ExecuteDurableEmitterTest(t, testEnv) } From ebf490907ba8ff1ff462829799dec7eadf64bce6 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Tue, 31 Mar 2026 16:10:02 -0400 Subject: [PATCH 6/9] Update tests --- core/scripts/go.mod | 2 +- core/scripts/go.sum | 4 +- .../beholder/durable_emitter_load_test.go | 124 +++++++++++++----- .../beholder/durable_event_store_orm.go | 41 +++++- .../beholder/durable_event_store_orm_test.go | 31 +++++ .../0296_chip_durable_events_delivered_at.sql | 19 +++ deployment/go.mod | 2 +- deployment/go.sum | 4 +- go.mod | 4 +- go.sum | 4 +- integration-tests/go.mod | 2 +- integration-tests/go.sum | 4 +- integration-tests/load/go.mod | 2 +- integration-tests/load/go.sum | 4 +- system-tests/lib/go.mod | 2 +- system-tests/lib/go.sum | 4 +- system-tests/tests/go.mod | 2 +- system-tests/tests/go.sum | 4 +- .../smoke/cre/v2_durable_emitter_test.go | 5 +- 19 files changed, 205 insertions(+), 59 deletions(-) create mode 100644 core/store/migrate/migrations/0296_chip_durable_events_delivered_at.sql diff --git a/core/scripts/go.mod b/core/scripts/go.mod index 5551463ca74..2ee50d37d28 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -46,7 +46,7 @@ require ( github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 github.com/smartcontractkit/chainlink-data-streams v0.1.12 diff --git a/core/scripts/go.sum b/core/scripts/go.sum index 2ce2ac969fa..b65fb7aa3e8 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1634,8 +1634,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/core/services/beholder/durable_emitter_load_test.go b/core/services/beholder/durable_emitter_load_test.go index 9c18094b786..6ca26d575b7 100644 --- a/core/services/beholder/durable_emitter_load_test.go +++ b/core/services/beholder/durable_emitter_load_test.go @@ -53,7 +53,6 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/config/env" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" - "github.com/smartcontractkit/chainlink/v2/core/internal/testutils/pgtest" ) // chipLoadTestDemoProto is the raw .proto registered with Chip for subject chip-demo-pb.DemoClientPayload @@ -74,14 +73,20 @@ message DemoClientPayload { } ` +// sustainedThroughputMockPublishLatency is the in-process mock's server-side sleep per Publish +// RPC in TestFullStack_SustainedThroughput (only). External Chip ignores this. +const sustainedThroughputMockPublishLatency = 500 * time.Millisecond + // loadTestServer is a controllable gRPC ChipIngress server for load tests. type loadTestServer struct { pb.UnimplementedChipIngressServer - mu sync.Mutex - publishErr error - batchErr error - publishDelay time.Duration + mu sync.Mutex + publishErr error + batchErr error + // publishDelayNs is nanoseconds to sleep in Publish (0 = none). Atomic so handlers see + // the value set before traffic without a data race on the hot path. + publishDelayNs atomic.Int64 publishCount atomic.Int64 batchCount atomic.Int64 @@ -89,8 +94,8 @@ type loadTestServer struct { } func (s *loadTestServer) Publish(_ context.Context, _ *cepb.CloudEvent) (*pb.PublishResponse, error) { - if s.publishDelay > 0 { - time.Sleep(s.publishDelay) + if ns := s.publishDelayNs.Load(); ns > 0 { + time.Sleep(time.Duration(ns)) } s.publishCount.Add(1) s.totalEvents.Add(1) @@ -123,6 +128,14 @@ func (s *loadTestServer) setBatchErr(err error) { s.batchErr = err } +func (s *loadTestServer) setPublishDelay(d time.Duration) { + if d <= 0 { + s.publishDelayNs.Store(0) + return + } + s.publishDelayNs.Store(d.Nanoseconds()) +} + func startLoadServer(t testing.TB) (*loadTestServer, string) { t.Helper() srv := &loadTestServer{} @@ -315,16 +328,35 @@ func TestChipIngressExternalPing(t *testing.T) { // TestFullStack_SustainedThroughput measures steady-state throughput with // real Postgres persistence and gRPC delivery. This answers: "how many // events/sec can we sustain end-to-end?" +// +// Wall time is dominated by totalEvents / sustained Emit (insert) rate — often +// many minutes at 100k events. Run with -short for a 10k-event run (~tens of s). +// Spurious retransmits happen if RetransmitAfter is shorter than tail +// MarkDelivered latency under load; we use a generous RetransmitAfter here. +// With the in-process mock, each Publish RPC sleeps sustainedThroughputMockPublishLatency +// (const); pipeline logs should show ~that much in immediate Publish p50/p99/mean. func TestFullStack_SustainedThroughput(t *testing.T) { - db := pgtest.NewSqlxDB(t) + // Must use non-txdb Postgres: txdb is a single transaction; any SQL error + // aborts it and all follow-up queries fail with SQLSTATE 25P02 under concurrent + // purge/retransmit/mark-delivered (DurableEmitter background loops). + db := directDB(t) srv, client := startChipIngressOrMock(t) + if srv != nil { + srv.setPublishDelay(sustainedThroughputMockPublishLatency) + t.Logf("Sustained throughput: mock Chip Publish server delay = %s (const sustainedThroughputMockPublishLatency)", + sustainedThroughputMockPublishLatency) + } store := beholdersvc.NewPgDurableEventStore(db) + pipe := &pipelineDeliveryStats{} cfg := beholder.DefaultDurableEmitterConfig() cfg.RetransmitInterval = 500 * time.Millisecond - cfg.RetransmitAfter = 2 * time.Second + // Must exceed tail store latency while 100k+ goroutines contend; else + // ListPending sees still-pending rows and duplicates Publish (extra RPCs, slower drain). + cfg.RetransmitAfter = 30 * time.Second cfg.RetransmitBatchSize = 200 cfg.PublishTimeout = 5 * time.Second + cfg.Hooks = newPipelineHooks(pipe) em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Test(t)) require.NoError(t, err) @@ -333,10 +365,14 @@ func TestFullStack_SustainedThroughput(t *testing.T) { em.Start(ctx) defer em.Close() - const ( - totalEvents = 100000 - concurrency = 10 - ) + totalEvents := 100_000 + //if testing.Short() { + //totalEvents = 10_000 + //} + const concurrency = 10 + + t.Logf("Full-stack sustained throughput: totalEvents=%d (100k unless -short), concurrency=%d", + totalEvents, concurrency) payload := buildLoadTestPayload(256) // ~256 byte record (protobuf for external Chip) @@ -366,11 +402,16 @@ func TestFullStack_SustainedThroughput(t *testing.T) { assert.Equal(t, int64(0), emitErrors.Load(), "all emits should succeed") - // Wait for all events to be delivered and store to drain. + // Wait for all events to be delivered and store to drain (pending list empty; + // Postgres may still have tombstones until purge loop catches up). + drainWait := 45 * time.Second + if totalEvents >= 100_000 { + drainWait = 120 * time.Second + } require.Eventually(t, func() bool { pending, _ := store.ListPending(ctx, time.Now().Add(time.Hour), 1) return len(pending) == 0 - }, 30*time.Second, 100*time.Millisecond, "store should drain completely") + }, drainWait, 100*time.Millisecond, "store should drain completely (no pending delivery)") totalElapsed := time.Since(start) @@ -383,6 +424,10 @@ func TestFullStack_SustainedThroughput(t *testing.T) { t.Logf("Total elapsed: %s", totalElapsed.Round(time.Millisecond)) t.Logf("End-to-end rate: %.0f events/sec", float64(totalEvents)/totalElapsed.Seconds()) + t.Logf("--- gRPC Publish / store MarkDelivered latency (%s) ---", chipIngressTargetDescription(srv)) + t.Logf("(Publish = chipingress.Publish round-trip; MarkDelivered = UPDATE delivered_at; rows are batch-deleted asynchronously.)") + logPipelineDeliverySummary(t, pipe) + if srv != nil { assert.GreaterOrEqual(t, srv.totalEvents.Load(), int64(totalEvents), "server should have received all events (may have retransmit duplicates)") @@ -395,7 +440,7 @@ func TestFullStack_SustainedThroughput(t *testing.T) { func TestFullStack_ChipOutage(t *testing.T) { skipIfExternalChip(t, "inject Unavailable errors on mock server") - db := pgtest.NewSqlxDB(t) + db := directDB(t) srv, client := startChipIngressOrMock(t) require.NotNil(t, srv) store := beholdersvc.NewPgDurableEventStore(db) @@ -463,10 +508,10 @@ func TestFullStack_ChipOutage(t *testing.T) { func TestFullStack_SlowChip(t *testing.T) { skipIfExternalChip(t, "inject publish latency on mock server") - db := pgtest.NewSqlxDB(t) + db := directDB(t) srv, client := startChipIngressOrMock(t) require.NotNil(t, srv) - srv.publishDelay = 50 * time.Millisecond // 50ms per publish = ~20 RPS max + srv.setPublishDelay(100 * time.Millisecond) // 50ms per publish = ~20 RPS max store := beholdersvc.NewPgDurableEventStore(db) cfg := beholder.DefaultDurableEmitterConfig() @@ -512,7 +557,7 @@ func TestFullStack_SlowChip(t *testing.T) { // Benchmark_FullStack_EmitThroughput benchmarks the Emit() path with real Postgres // and a fast mock gRPC server. This gives the upper bound of events/sec. func Benchmark_FullStack_EmitThroughput(b *testing.B) { - db := pgtest.NewSqlxDB(b) + db := directDB(b) _, client := startChipIngressOrMock(b) store := beholdersvc.NewPgDurableEventStore(db) @@ -539,7 +584,7 @@ func Benchmark_FullStack_EmitPayloadSizes(b *testing.B) { sizes := []int{64, 256, 1024, 4096} for _, size := range sizes { b.Run(fmt.Sprintf("%dB", size), func(b *testing.B) { - db := pgtest.NewSqlxDB(b) + db := directDB(b) _, client := startChipIngressOrMock(b) store := beholdersvc.NewPgDurableEventStore(db) @@ -636,8 +681,10 @@ func progressBar(pct float64, width int) string { } // directDB opens a real (non-txdb) Postgres connection for concurrent load tests. -// txdb serializes all operations through a single transaction, which bottlenecks -// concurrent writes. For TPS testing we need real connection pooling. +// pgtest.NewSqlxDB uses txdb: one shared transaction per pool. Any SQL error +// aborts that transaction (SQLSTATE 25P02 on later queries). DurableEmitter’s +// concurrent purge/retransmit/mark-delivered + many goroutines requires +// autocommit statements and a real pool, not txdb. func directDB(t testing.TB) *sqlx.DB { t.Helper() testutils.SkipShortDB(t) @@ -715,7 +762,7 @@ func (s *emitLatencyStats) sum() time.Duration { return t } -// pipelineDeliveryStats aggregates DurableEmitterHooks samples to compare Chip Publish vs DB Delete cost. +// pipelineDeliveryStats aggregates DurableEmitterHooks samples to compare Chip Publish vs store MarkDelivered cost. type pipelineDeliveryStats struct { immPub, immDel, batchPub, batchDel emitLatencyStats immPubErr, batchPubErr atomic.Int64 @@ -751,6 +798,18 @@ func durMs(d time.Duration) float64 { return float64(d.Microseconds()) / 1000.0 } +// chipIngressTargetDescription labels latency logs: mock gRPC server vs external Chip Ingress. +func chipIngressTargetDescription(srv *loadTestServer) string { + if srv != nil { + return "in-process mock ChipIngress (loadTestServer)" + } + addr := strings.TrimSpace(os.Getenv(envChipIngressTestAddr)) + if addr == "" { + return "external Chip Ingress" + } + return fmt.Sprintf("external Chip Ingress (%s)", addr) +} + func logPipelineDeliverySummary(t *testing.T, pipe *pipelineDeliveryStats) { t.Helper() ipN := pipe.immPub.count() @@ -759,14 +818,14 @@ func logPipelineDeliverySummary(t *testing.T, pipe *pipelineDeliveryStats) { ipN, pipe.immPubErr.Load(), durMs(pipe.immPub.percentile(0.50)), durMs(pipe.immPub.percentile(0.99)), durMs(pipe.immPub.mean()), durMs(pipe.immPub.sum())) - t.Logf("Pipeline — immediate Delete: n=%d p50=%.3f ms p99=%.3f ms mean=%.3f ms Σ=%.1f ms", + t.Logf("Pipeline — immediate MarkDelivered: n=%d p50=%.3f ms p99=%.3f ms mean=%.3f ms Σ=%.1f ms", idN, durMs(pipe.immDel.percentile(0.50)), durMs(pipe.immDel.percentile(0.99)), durMs(pipe.immDel.mean()), durMs(pipe.immDel.sum())) bpN := pipe.batchPub.count() if bpN > 0 { - t.Logf("Pipeline — retransmit Publish (serial): rpcs=%d rpc_errs=%d evt_errs=%d p50=%.3f ms mean=%.3f ms | delete-hook_calls=%d mean_loop=%.3f ms", + t.Logf("Pipeline — retransmit Publish (serial): rpcs=%d rpc_errs=%d evt_errs=%d p50=%.3f ms mean=%.3f ms | retransmit_mark_delivered_hooks=%d mean_loop=%.3f ms", bpN, pipe.batchPubErr.Load(), pipe.batchPubEventErrs.Load(), durMs(pipe.batchPub.percentile(0.50)), durMs(pipe.batchPub.mean()), pipe.batchDel.count(), durMs(pipe.batchDel.mean())) @@ -776,11 +835,11 @@ func logPipelineDeliverySummary(t *testing.T, pipe *pipelineDeliveryStats) { pm, dm := durMs(pipe.immPub.mean()), durMs(pipe.immDel.mean()) switch { case pm > 3*dm && pm > 0.5: - t.Logf("Bottleneck hint: Publish mean %.3f ms ≫ Delete mean %.3f ms — likely Chip / gRPC bound", pm, dm) + t.Logf("Bottleneck hint: Publish mean %.3f ms ≫ MarkDelivered mean %.3f ms — likely Chip / gRPC bound", pm, dm) case dm > 3*pm && dm > 0.5: - t.Logf("Bottleneck hint: Delete mean %.3f ms ≫ Publish mean %.3f ms — likely Postgres delete bound", dm, pm) + t.Logf("Bottleneck hint: MarkDelivered mean %.3f ms ≫ Publish mean %.3f ms — likely Postgres UPDATE bound", dm, pm) default: - t.Logf("Bottleneck hint: Publish %.3f ms vs Delete %.3f ms comparable (per successful immediate delivery)", pm, dm) + t.Logf("Bottleneck hint: Publish %.3f ms vs MarkDelivered %.3f ms comparable (per successful immediate delivery)", pm, dm) } } else { t.Logf("Bottleneck hint: few completed immediate deliveries in window (pub=%d del=%d); extend duration or check async backlog", ipN, idN) @@ -790,7 +849,8 @@ func logPipelineDeliverySummary(t *testing.T, pipe *pipelineDeliveryStats) { // rateLimitEmitResult is the outcome of runRateLimitedEmit. type rateLimitEmitResult struct { stats *emitLatencyStats - // maxQueueDepth is the maximum observed row count in cre.chip_durable_events + // maxQueueDepth is the maximum observed pending row count in cre.chip_durable_events + // (delivered_at IS NULL). // during the emit window (polled periodically; nil DB disables sampling). maxQueueDepth int64 // maxQueuePayloadBytes is the maximum observed sum(octet_length(payload)) for @@ -839,10 +899,10 @@ func bumpMaxQueuePayloadBytes(maxB *atomic.Int64, b int64) { } } -// queuePayloadStats returns row count and total payload bytes for cre.chip_durable_events. +// queuePayloadStats returns pending row count and payload bytes (delivered_at IS NULL). func queuePayloadStats(db *sqlx.DB, ctx context.Context) (rows int64, payloadBytes int64, err error) { err = db.QueryRowContext(ctx, - `SELECT count(*), coalesce(sum(octet_length(payload)), 0) FROM cre.chip_durable_events`, + `SELECT count(*), coalesce(sum(octet_length(payload)), 0) FROM cre.chip_durable_events WHERE delivered_at IS NULL`, ).Scan(&rows, &payloadBytes) return rows, payloadBytes, err } @@ -1185,7 +1245,7 @@ func TestTPS_Sustained1k(t *testing.T) { targetTPS, achievedTPS, emitRes.ImmPublishFails, emitRes.BatchPublishFailEvents, stats.failures.Load()), fmt.Sprintf("emit p50/p99 ms: %.2f / %.2f", float64(stats.percentile(0.50).Microseconds())/1000.0, float64(stats.percentile(0.99).Microseconds())/1000.0), fmt.Sprintf("queue max during emit: %d rows, %s KB payload (sum octet_length/1024)", emitRes.maxQueueDepth, formatQueueKB(emitRes.maxQueuePayloadBytes)), - fmt.Sprintf("pipeline imm Publish/Delete means ms: %.3f / %.3f (n=%d/%d)", durMs(pipe.immPub.mean()), durMs(pipe.immDel.mean()), pipe.immPub.count(), pipe.immDel.count()), + fmt.Sprintf("pipeline imm Publish/MarkDelivered means ms: %.3f / %.3f (n=%d/%d)", durMs(pipe.immPub.mean()), durMs(pipe.immDel.mean()), pipe.immPub.count(), pipe.immDel.count()), fmt.Sprintf("drain time: %s", drainTime.Round(time.Millisecond)), ) diff --git a/core/services/beholder/durable_event_store_orm.go b/core/services/beholder/durable_event_store_orm.go index ec48b2ae49d..9388a8c8ede 100644 --- a/core/services/beholder/durable_event_store_orm.go +++ b/core/services/beholder/durable_event_store_orm.go @@ -42,11 +42,44 @@ func (s *PgDurableEventStore) Delete(ctx context.Context, id int64) error { return nil } +func (s *PgDurableEventStore) MarkDelivered(ctx context.Context, id int64) error { + const q = `UPDATE ` + chipDurableEventsTable + ` SET delivered_at = now() WHERE id = $1 AND delivered_at IS NULL` + if _, err := s.ds.ExecContext(ctx, q, id); err != nil { + return fmt.Errorf("failed to mark chip durable event delivered id=%d: %w", id, err) + } + return nil +} + +func (s *PgDurableEventStore) PurgeDelivered(ctx context.Context, batchLimit int) (int64, error) { + if batchLimit <= 0 { + return 0, nil + } + const q = ` +WITH picked AS ( + SELECT id FROM ` + chipDurableEventsTable + ` + WHERE delivered_at IS NOT NULL + ORDER BY delivered_at ASC + LIMIT $1 +) +DELETE FROM ` + chipDurableEventsTable + ` AS t +USING picked WHERE t.id = picked.id` + res, err := s.ds.ExecContext(ctx, q, batchLimit) + if err != nil { + return 0, fmt.Errorf("failed to purge delivered chip durable events: %w", err) + } + n, err := res.RowsAffected() + if err != nil { + return 0, fmt.Errorf("purge delivered rows affected: %w", err) + } + return n, nil +} + func (s *PgDurableEventStore) ListPending(ctx context.Context, createdBefore time.Time, limit int) ([]beholder.DurableEvent, error) { const q = ` SELECT id, payload, created_at FROM ` + chipDurableEventsTable + ` -WHERE created_at < $1 +WHERE delivered_at IS NULL + AND created_at < $1 ORDER BY created_at ASC LIMIT $2` @@ -101,7 +134,8 @@ SELECT count(*)::bigint AS cnt, coalesce(sum(octet_length(payload)), 0)::bigint AS payload_sum, min(created_at) AS min_created -FROM ` + chipDurableEventsTable +FROM ` + chipDurableEventsTable + ` +WHERE delivered_at IS NULL` var row chipDurableQueueAgg if err := s.ds.GetContext(ctx, &row, qAgg); err != nil { @@ -119,7 +153,8 @@ FROM ` + chipDurableEventsTable const qNear = ` SELECT count(*)::bigint FROM ` + chipDurableEventsTable + ` -WHERE created_at >= now() - ($1::bigint * interval '1 second') +WHERE delivered_at IS NULL + AND created_at >= now() - ($1::bigint * interval '1 second') AND created_at < now() - (($1::bigint - $2::bigint) * interval '1 second')` if err := s.ds.GetContext(ctx, &st.NearTTLCount, qNear, ttlSec, leadSec); err != nil { return beholder.DurableQueueStats{}, fmt.Errorf("durable queue near-ttl: %w", err) diff --git a/core/services/beholder/durable_event_store_orm_test.go b/core/services/beholder/durable_event_store_orm_test.go index d75363ce3db..58d858863ee 100644 --- a/core/services/beholder/durable_event_store_orm_test.go +++ b/core/services/beholder/durable_event_store_orm_test.go @@ -111,6 +111,37 @@ func TestPgDurableEventStore_ObserveDurableQueue(t *testing.T) { assert.Positive(t, st.OldestPendingAge) } +func TestPgDurableEventStore_MarkDeliveredAndPurgeDelivered(t *testing.T) { + db := pgtest.NewSqlxDB(t) + ctx := testutils.Context(t) + store := beholdersvc.NewPgDurableEventStore(db) + + id, err := store.Insert(ctx, []byte("payload")) + require.NoError(t, err) + + pending, err := store.ListPending(ctx, time.Now().Add(time.Hour), 10) + require.NoError(t, err) + require.Len(t, pending, 1) + + require.NoError(t, store.MarkDelivered(ctx, id)) + require.NoError(t, store.MarkDelivered(ctx, id), "second mark is idempotent") + + pending, err = store.ListPending(ctx, time.Now().Add(time.Hour), 10) + require.NoError(t, err) + require.Len(t, pending, 0) + + var cnt int64 + require.NoError(t, db.GetContext(ctx, &cnt, `SELECT count(*) FROM cre.chip_durable_events`)) + require.Equal(t, int64(1), cnt, "row remains as tombstone until purge") + + n, err := store.PurgeDelivered(ctx, 10) + require.NoError(t, err) + require.Equal(t, int64(1), n) + + require.NoError(t, db.GetContext(ctx, &cnt, `SELECT count(*) FROM cre.chip_durable_events`)) + require.Equal(t, int64(0), cnt) +} + // ---------- Benchmarks ---------- func randomPayload(size int) []byte { diff --git a/core/store/migrate/migrations/0296_chip_durable_events_delivered_at.sql b/core/store/migrate/migrations/0296_chip_durable_events_delivered_at.sql new file mode 100644 index 00000000000..0a02a9df8b1 --- /dev/null +++ b/core/store/migrate/migrations/0296_chip_durable_events_delivered_at.sql @@ -0,0 +1,19 @@ +-- +goose Up + +ALTER TABLE cre.chip_durable_events + ADD COLUMN IF NOT EXISTS delivered_at TIMESTAMPTZ NULL; + +CREATE INDEX IF NOT EXISTS idx_chip_durable_events_pending_delivery + ON cre.chip_durable_events (created_at ASC) + WHERE delivered_at IS NULL; + +CREATE INDEX IF NOT EXISTS idx_chip_durable_events_delivered_purge + ON cre.chip_durable_events (delivered_at ASC) + WHERE delivered_at IS NOT NULL; + +-- +goose Down + +DROP INDEX IF EXISTS cre.idx_chip_durable_events_delivered_purge; +DROP INDEX IF EXISTS cre.idx_chip_durable_events_pending_delivery; + +ALTER TABLE cre.chip_durable_events DROP COLUMN IF EXISTS delivered_at; diff --git a/deployment/go.mod b/deployment/go.mod index edb2d022d0a..3e1c7fc74e0 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -44,7 +44,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/deployment/go.sum b/deployment/go.sum index f7d12f14a7b..183303fe348 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1387,8 +1387,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/go.mod b/go.mod index 20cb95f813b..6c272fcfeae 100644 --- a/go.mod +++ b/go.mod @@ -85,7 +85,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5 github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 github.com/smartcontractkit/chainlink-data-streams v0.1.12 @@ -437,6 +437,8 @@ require ( sigs.k8s.io/yaml v1.4.0 // indirect ) +replace github.com/smartcontractkit/chainlink-common => ../chainlink-common + replace github.com/fbsobreira/gotron-sdk => github.com/smartcontractkit/chainlink-tron/relayer/gotron-sdk v0.0.5-0.20260218133534-cbd44da2856b tool github.com/smartcontractkit/chainlink-common/pkg/loop/cmd/loopinstall diff --git a/go.sum b/go.sum index 99eb1619459..743b5a362e4 100644 --- a/go.sum +++ b/go.sum @@ -1235,8 +1235,8 @@ github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5/go.mod h1:xtZNi6pOKdC3sLvokDvXOhgHzT+cyBqH/gWwvxTxqrg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index b3f02179098..7cd073ab27a 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -50,7 +50,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/integration-tests/go.sum b/integration-tests/go.sum index c882cfa29ce..40f72baea8a 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1626,8 +1626,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index fdcab8a4654..afe490c7cf5 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -31,7 +31,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20260119171452-39c98c3b33cd diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index f5e45afa05b..61e77112e1a 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1604,8 +1604,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/lib/go.mod b/system-tests/lib/go.mod index cf5d366a5e7..0425f6d2d90 100644 --- a/system-tests/lib/go.mod +++ b/system-tests/lib/go.mod @@ -32,7 +32,7 @@ require ( github.com/sethvargo/go-retry v0.3.0 github.com/smartcontractkit/chain-selectors v1.0.97 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/system-tests/lib/go.sum b/system-tests/lib/go.sum index 6ecf6043e60..1ddead56623 100644 --- a/system-tests/lib/go.sum +++ b/system-tests/lib/go.sum @@ -1597,8 +1597,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/tests/go.mod b/system-tests/tests/go.mod index e21d45d9c45..c60e45aaa48 100644 --- a/system-tests/tests/go.mod +++ b/system-tests/tests/go.mod @@ -54,7 +54,7 @@ require ( github.com/rs/zerolog v1.34.0 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chain-selectors v1.0.97 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.12 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/system-tests/tests/go.sum b/system-tests/tests/go.sum index 86fad2e6e40..0cfcdfe49d6 100644 --- a/system-tests/tests/go.sum +++ b/system-tests/tests/go.sum @@ -1781,8 +1781,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c h1:ZAYmqgFd3RHyYTOSQEQ2D+G2lNf1GLY5nCcxpX0FGHo= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260325215343-ac292754837c/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/system-tests/tests/smoke/cre/v2_durable_emitter_test.go b/system-tests/tests/smoke/cre/v2_durable_emitter_test.go index cc467648d4b..a95e1f04789 100644 --- a/system-tests/tests/smoke/cre/v2_durable_emitter_test.go +++ b/system-tests/tests/smoke/cre/v2_durable_emitter_test.go @@ -68,12 +68,11 @@ func snapshotDurableEventStats(ctx context.Context, db *sql.DB) (durableEventSta return s, err } -// countPendingDurableEvents returns the current number of rows in -// cre.chip_durable_events (events that haven't been delivered yet). +// countPendingDurableEvents returns rows still awaiting delivery (delivered_at IS NULL). func countPendingDurableEvents(ctx context.Context, db *sql.DB) (int64, error) { var count int64 err := db.QueryRowContext(ctx, - `SELECT count(*) FROM cre.chip_durable_events`, + `SELECT count(*) FROM cre.chip_durable_events WHERE delivered_at IS NULL`, ).Scan(&count) return count, err } From ed59e2e05b7b4459b4297d21cb631dd580468053 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 2 Apr 2026 15:10:11 -0400 Subject: [PATCH 7/9] Improve tests --- core/scripts/go.mod | 2 +- core/scripts/go.sum | 4 +- .../beholder/durable_emitter_load_test.go | 134 ++++++++++++++++-- deployment/go.mod | 2 +- deployment/go.sum | 4 +- go.mod | 4 +- go.sum | 4 +- integration-tests/go.mod | 2 +- integration-tests/go.sum | 4 +- integration-tests/load/go.mod | 2 +- integration-tests/load/go.sum | 4 +- system-tests/lib/go.mod | 2 +- system-tests/lib/go.sum | 4 +- system-tests/tests/go.mod | 2 +- system-tests/tests/go.sum | 4 +- 15 files changed, 140 insertions(+), 38 deletions(-) diff --git a/core/scripts/go.mod b/core/scripts/go.mod index 2ee50d37d28..c11bb69081f 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -46,7 +46,7 @@ require ( github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 github.com/smartcontractkit/chainlink-data-streams v0.1.12 diff --git a/core/scripts/go.sum b/core/scripts/go.sum index b65fb7aa3e8..66bc07b7e1f 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1634,8 +1634,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/core/services/beholder/durable_emitter_load_test.go b/core/services/beholder/durable_emitter_load_test.go index 6ca26d575b7..ca7d3821f89 100644 --- a/core/services/beholder/durable_emitter_load_test.go +++ b/core/services/beholder/durable_emitter_load_test.go @@ -27,11 +27,13 @@ import ( "fmt" "net" "os" + "runtime" "sort" "strconv" "strings" "sync" "sync/atomic" + "syscall" "testing" "time" @@ -376,6 +378,37 @@ func TestFullStack_SustainedThroughput(t *testing.T) { payload := buildLoadTestPayload(256) // ~256 byte record (protobuf for external Chip) + // CPU snapshot at start (getrusage; cumulative from process start, so we diff). + var cpuStart syscall.Rusage + _ = syscall.Getrusage(syscall.RUSAGE_SELF, &cpuStart) + + // Queue depth sampler: polls DB every 200ms throughout emit + drain phases. + var queueMax, queueSum, queueCnt atomic.Int64 + samplerCtx, samplerCancel := context.WithCancel(ctx) + go func() { + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-samplerCtx.Done(): + return + case <-ticker.C: + rows, _, err := queuePayloadStats(db, samplerCtx) + if err != nil { + continue + } + queueCnt.Add(1) + queueSum.Add(rows) + for { + old := queueMax.Load() + if rows <= old || queueMax.CompareAndSwap(old, rows) { + break + } + } + } + } + }() + start := time.Now() var wg sync.WaitGroup @@ -394,12 +427,6 @@ func TestFullStack_SustainedThroughput(t *testing.T) { wg.Wait() emitElapsed := time.Since(start) - t.Logf("--- Emit Phase ---") - t.Logf("Events emitted: %d", totalEvents) - t.Logf("Emit errors: %d", emitErrors.Load()) - t.Logf("Elapsed: %s", emitElapsed.Round(time.Millisecond)) - t.Logf("Emit rate: %.0f events/sec", float64(totalEvents)/emitElapsed.Seconds()) - assert.Equal(t, int64(0), emitErrors.Load(), "all emits should succeed") // Wait for all events to be delivered and store to drain (pending list empty; @@ -413,20 +440,97 @@ func TestFullStack_SustainedThroughput(t *testing.T) { return len(pending) == 0 }, drainWait, 100*time.Millisecond, "store should drain completely (no pending delivery)") + samplerCancel() // stop queue poller totalElapsed := time.Since(start) - t.Logf("--- Delivery Phase ---") - t.Logf("Server received: %s events (mock only; use external Chip metrics otherwise)", formatMockServerEvents(srv)) + // CPU diff: user + system seconds consumed over the whole test. + var cpuEnd syscall.Rusage + _ = syscall.Getrusage(syscall.RUSAGE_SELF, &cpuEnd) + cpuUserSec := (float64(cpuEnd.Utime.Sec) + float64(cpuEnd.Utime.Usec)/1e6) - + (float64(cpuStart.Utime.Sec) + float64(cpuStart.Utime.Usec)/1e6) + cpuSysSec := (float64(cpuEnd.Stime.Sec) + float64(cpuEnd.Stime.Usec)/1e6) - + (float64(cpuStart.Stime.Sec) + float64(cpuStart.Stime.Usec)/1e6) + cpuTotalSec := cpuUserSec + cpuSysSec + // Utilization: fraction of available CPU time (wall × GOMAXPROCS). + cpuUtilPct := 100.0 * cpuTotalSec / (totalElapsed.Seconds() * float64(runtime.GOMAXPROCS(0))) + + // Queue depth averages. + var queueAvg float64 + if n := queueCnt.Load(); n > 0 { + queueAvg = float64(queueSum.Load()) / float64(n) + } + + pubN := pipe.immPub.count() + pubErrs := pipe.immPubErr.Load() + pubP50 := durMs(pipe.immPub.percentile(0.50)) + pubP99 := durMs(pipe.immPub.percentile(0.99)) + pubMean := durMs(pipe.immPub.mean()) + delN := pipe.immDel.count() + delP50 := durMs(pipe.immDel.percentile(0.50)) + delP99 := durMs(pipe.immDel.percentile(0.99)) + delMean := durMs(pipe.immDel.mean()) + + var bottleneck string + switch { + case pubN < 50 || delN < 50: + bottleneck = "too few samples — extend duration or check async backlog" + case pubMean > 3*delMean && pubMean > 0.5: + bottleneck = "Chip / gRPC bound (Publish ≫ MarkDelivered)" + case delMean > 3*pubMean && delMean > 0.5: + bottleneck = "Postgres UPDATE bound (MarkDelivered ≫ Publish)" + default: + bottleneck = "balanced (Publish ≈ MarkDelivered)" + } + + var serverLine string if srv != nil { - t.Logf("Publish calls: %d", srv.publishCount.Load()) - t.Logf("Batch calls: %d", srv.batchCount.Load()) + serverLine = fmt.Sprintf("%d (mock; Publish RPCs: %d)", srv.totalEvents.Load(), srv.publishCount.Load()) + } else { + serverLine = "N/A (use external Chip metrics)" } - t.Logf("Total elapsed: %s", totalElapsed.Round(time.Millisecond)) - t.Logf("End-to-end rate: %.0f events/sec", float64(totalEvents)/totalElapsed.Seconds()) - t.Logf("--- gRPC Publish / store MarkDelivered latency (%s) ---", chipIngressTargetDescription(srv)) - t.Logf("(Publish = chipingress.Publish round-trip; MarkDelivered = UPDATE delivered_at; rows are batch-deleted asynchronously.)") - logPipelineDeliverySummary(t, pipe) + target := chipIngressTargetDescription(srv) + + t.Logf("╔══════════════════════════════════════════════════════════════╗") + t.Logf("║ SUSTAINED THROUGHPUT TEST RESULTS ║") + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ Target: %-52s ║", target) + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ EMIT (DB insert, async gRPC) ║") + t.Logf("║ Events: %-42d ║", totalEvents) + t.Logf("║ Errors: %-42d ║", emitErrors.Load()) + t.Logf("║ Elapsed: %-42s ║", emitElapsed.Round(time.Millisecond)) + t.Logf("║ Rate: %-42s ║", fmt.Sprintf("%.0f events/sec", float64(totalEvents)/emitElapsed.Seconds())) + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ DELIVERY (Publish → MarkDelivered) ║") + t.Logf("║ Server received:%-42s ║", serverLine) + t.Logf("║ Total elapsed: %-42s ║", totalElapsed.Round(time.Millisecond)) + t.Logf("║ End-to-end rate:%-42s ║", fmt.Sprintf("%.0f events/sec", float64(totalEvents)/totalElapsed.Seconds())) + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ PENDING QUEUE DEPTH (polled every 200ms, delivered_at IS NULL)║") + t.Logf("║ Max: %-42d ║", queueMax.Load()) + t.Logf("║ Avg: %-42s ║", fmt.Sprintf("%.1f rows", queueAvg)) + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ PIPELINE LATENCY (immediate path) ║") + t.Logf("║ %-10s %-10s %-10s %-10s ║", "n", "p50 (ms)", "p99 (ms)", "mean (ms)") + t.Logf("║ Publish: %-10d %-10.2f %-10.2f %-10.2f ║", pubN, pubP50, pubP99, pubMean) + t.Logf("║ MarkDelivered: %-10d %-10.2f %-10.2f %-10.2f ║", delN, delP50, delP99, delMean) + if pipe.batchPub.count() > 0 { + t.Logf("║ Retransmit: %-10d %-10.2f %-10s %-10.2f ║", + pipe.batchPub.count(), + durMs(pipe.batchPub.percentile(0.50)), "—", + durMs(pipe.batchPub.mean())) + } + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ PROCESS CPU (getrusage, GOMAXPROCS=%d) ║", runtime.GOMAXPROCS(0)) + t.Logf("║ User: %-42s ║", fmt.Sprintf("%.2f s", cpuUserSec)) + t.Logf("║ System: %-42s ║", fmt.Sprintf("%.2f s", cpuSysSec)) + t.Logf("║ Total: %-42s ║", fmt.Sprintf("%.2f s", cpuTotalSec)) + t.Logf("║ Utilization: %-42s ║", fmt.Sprintf("%.1f%% of %d cores × %.1fs wall", cpuUtilPct, runtime.GOMAXPROCS(0), totalElapsed.Seconds())) + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ Publish errors (need retransmit): %-24d ║", pubErrs) + t.Logf("║ Bottleneck hint: %-41s ║", bottleneck) + t.Logf("╚══════════════════════════════════════════════════════════════╝") if srv != nil { assert.GreaterOrEqual(t, srv.totalEvents.Load(), int64(totalEvents), diff --git a/deployment/go.mod b/deployment/go.mod index 3e1c7fc74e0..aca7e8409a0 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -44,7 +44,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/deployment/go.sum b/deployment/go.sum index 183303fe348..d94bdb3dc2e 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1387,8 +1387,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/go.mod b/go.mod index 6c272fcfeae..6895d3e2c18 100644 --- a/go.mod +++ b/go.mod @@ -85,7 +85,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5 github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 github.com/smartcontractkit/chainlink-data-streams v0.1.12 @@ -437,8 +437,6 @@ require ( sigs.k8s.io/yaml v1.4.0 // indirect ) -replace github.com/smartcontractkit/chainlink-common => ../chainlink-common - replace github.com/fbsobreira/gotron-sdk => github.com/smartcontractkit/chainlink-tron/relayer/gotron-sdk v0.0.5-0.20260218133534-cbd44da2856b tool github.com/smartcontractkit/chainlink-common/pkg/loop/cmd/loopinstall diff --git a/go.sum b/go.sum index 743b5a362e4..286496b6c6f 100644 --- a/go.sum +++ b/go.sum @@ -1235,8 +1235,8 @@ github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5/go.mod h1:xtZNi6pOKdC3sLvokDvXOhgHzT+cyBqH/gWwvxTxqrg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index 7cd073ab27a..1048f491e9c 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -50,7 +50,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/integration-tests/go.sum b/integration-tests/go.sum index 40f72baea8a..0e6652c69bb 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1626,8 +1626,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index afe490c7cf5..9811a616d0c 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -31,7 +31,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20260119171452-39c98c3b33cd diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index 61e77112e1a..f5c55c5efd5 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1604,8 +1604,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/lib/go.mod b/system-tests/lib/go.mod index 0425f6d2d90..93a317c5a01 100644 --- a/system-tests/lib/go.mod +++ b/system-tests/lib/go.mod @@ -32,7 +32,7 @@ require ( github.com/sethvargo/go-retry v0.3.0 github.com/smartcontractkit/chain-selectors v1.0.97 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/system-tests/lib/go.sum b/system-tests/lib/go.sum index 1ddead56623..ee26a14a654 100644 --- a/system-tests/lib/go.sum +++ b/system-tests/lib/go.sum @@ -1597,8 +1597,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/tests/go.mod b/system-tests/tests/go.mod index c60e45aaa48..072ba995fba 100644 --- a/system-tests/tests/go.mod +++ b/system-tests/tests/go.mod @@ -54,7 +54,7 @@ require ( github.com/rs/zerolog v1.34.0 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chain-selectors v1.0.97 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.12 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/system-tests/tests/go.sum b/system-tests/tests/go.sum index 0cfcdfe49d6..aad926fea29 100644 --- a/system-tests/tests/go.sum +++ b/system-tests/tests/go.sum @@ -1781,8 +1781,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185 h1:mAN+II9btE8431bYvuZD6T9UOEawoybcBuf5CcVQe88= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260331173916-d18962efd185/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= From 02071686f58d70ea00eb6a2d991a49c8c5907a66 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 2 Apr 2026 15:31:52 -0400 Subject: [PATCH 8/9] Update durable_emitter_load_test.go --- .../beholder/durable_emitter_load_test.go | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/core/services/beholder/durable_emitter_load_test.go b/core/services/beholder/durable_emitter_load_test.go index ca7d3821f89..e0ebfd98473 100644 --- a/core/services/beholder/durable_emitter_load_test.go +++ b/core/services/beholder/durable_emitter_load_test.go @@ -46,6 +46,11 @@ import ( "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + sdkresource "go.opentelemetry.io/otel/sdk/resource" + "github.com/smartcontractkit/chainlink-common/pkg/beholder" "github.com/smartcontractkit/chainlink-common/pkg/chipingress" "github.com/smartcontractkit/chainlink-common/pkg/chipingress/pb" @@ -350,6 +355,8 @@ func TestFullStack_SustainedThroughput(t *testing.T) { } store := beholdersvc.NewPgDurableEventStore(db) + ctx := testutils.Context(t) + pipe := &pipelineDeliveryStats{} cfg := beholder.DefaultDurableEmitterConfig() cfg.RetransmitInterval = 500 * time.Millisecond @@ -360,14 +367,45 @@ func TestFullStack_SustainedThroughput(t *testing.T) { cfg.PublishTimeout = 5 * time.Second cfg.Hooks = newPipelineHooks(pipe) + // Wire OTel metrics to the local obs stack when OTEL_EXPORTER_OTLP_ENDPOINT is set. + // Start the obs stack first: ./bin/ctf obs up + // Then run: OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 CHIP_INGRESS_TEST_ADDR=... go test ... + if otlpEndpoint := strings.TrimSpace(os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")); otlpEndpoint != "" { + otlpEndpoint = strings.TrimPrefix(otlpEndpoint, "http://") + exp, otelErr := otlpmetricgrpc.New(ctx, + otlpmetricgrpc.WithEndpoint(otlpEndpoint), + otlpmetricgrpc.WithInsecure(), + ) + require.NoError(t, otelErr, "otlp metric exporter") + res := sdkresource.NewWithAttributes("", + attribute.String("service.name", "durable-emitter-loadtest"), + ) + mp := sdkmetric.NewMeterProvider( + sdkmetric.WithResource(res), + sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exp, + sdkmetric.WithInterval(5*time.Second), + )), + ) + t.Cleanup(func() { _ = mp.Shutdown(context.Background()) }) + bc := beholder.NewNoopClient() + bc.MeterProvider = mp + bc.Meter = mp.Meter("beholder") + beholder.SetClient(bc) + t.Cleanup(func() { beholder.SetClient(beholder.NewNoopClient()) }) + cfg.Metrics = &beholder.DurableEmitterMetricsConfig{ + PollInterval: 5 * time.Second, + RecordProcessStats: true, + } + t.Logf("OTel metrics enabled → %s (5s push interval, Grafana: http://localhost:3000)", otlpEndpoint) + } + em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Test(t)) require.NoError(t, err) - ctx := testutils.Context(t) em.Start(ctx) defer em.Close() - totalEvents := 100_000 + totalEvents := 1_000_000 //if testing.Short() { //totalEvents = 10_000 //} From 0df8c7c2d71fcb5fa33025c5bf6ab56a740d5214 Mon Sep 17 00:00:00 2001 From: Dylan Tinianov Date: Thu, 2 Apr 2026 15:58:38 -0400 Subject: [PATCH 9/9] Add dashboards and increase load testing --- core/scripts/cre/environment/.gitignore | 2 +- core/scripts/cre/environment/obs-up.sh | 76 +++ .../dashboards/beholder/load_dashboard.json | 549 ++++++++++++++++++ .../observability-overrides/otel.yaml | 43 ++ core/scripts/go.mod | 2 +- core/scripts/go.sum | 4 +- .../beholder/durable_emitter_load_test.go | 315 ++++++++-- deployment/go.mod | 2 +- deployment/go.sum | 4 +- go.mod | 6 +- go.sum | 4 +- integration-tests/go.mod | 2 +- integration-tests/go.sum | 4 +- integration-tests/load/go.mod | 2 +- integration-tests/load/go.sum | 4 +- system-tests/lib/go.mod | 2 +- system-tests/lib/go.sum | 4 +- system-tests/tests/go.mod | 2 +- system-tests/tests/go.sum | 4 +- 19 files changed, 968 insertions(+), 63 deletions(-) create mode 100755 core/scripts/cre/environment/obs-up.sh create mode 100644 core/scripts/cre/environment/observability-overrides/dashboards/beholder/load_dashboard.json create mode 100755 core/scripts/cre/environment/observability-overrides/otel.yaml diff --git a/core/scripts/cre/environment/.gitignore b/core/scripts/cre/environment/.gitignore index 51e29dba061..0f8c787d2ff 100644 --- a/core/scripts/cre/environment/.gitignore +++ b/core/scripts/cre/environment/.gitignore @@ -1,4 +1,4 @@ -# ctf binary artifacts +# ctf binary artifacts — fully managed by the ctf binary, do not hand-edit compose/ blockscout/ binaries/ diff --git a/core/scripts/cre/environment/obs-up.sh b/core/scripts/cre/environment/obs-up.sh new file mode 100755 index 00000000000..fc72fad6bb8 --- /dev/null +++ b/core/scripts/cre/environment/obs-up.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# obs-up.sh — wrapper around `./bin/ctf obs up` that applies local overrides. +# +# CTF regenerates the compose/ directory on every `obs up`, overwriting any +# manual edits. This script re-applies our customisations afterwards and +# restarts only the affected containers so Grafana and the OTel collector pick +# them up without a full stack restart. +# +# Usage: +# ./obs-up.sh # bring the stack up (or recreate it) with patches +# ./obs-up.sh --down # tear down first, then bring up with patches +# +# Tracked overrides live in observability-overrides/ and are applied here. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +OVERRIDES="$SCRIPT_DIR/observability-overrides" +COMPOSE_DIR="$SCRIPT_DIR/compose" + +# ── Optionally tear down first ──────────────────────────────────────────────── +if [[ "${1:-}" == "--down" ]]; then + echo "▼ Tearing down obs stack..." + "$SCRIPT_DIR/bin/ctf" obs down || true +fi + +# ── Bring up the stack (CTF regenerates compose/ here) ─────────────────────── +echo "▲ Starting obs stack..." +"$SCRIPT_DIR/bin/ctf" obs up + +# ── Apply otel-collector override ───────────────────────────────────────────── +echo "⚙ Applying otel.yaml override (resource_to_telemetry_conversion)..." +cp "$OVERRIDES/otel.yaml" "$COMPOSE_DIR/otel.yaml" + +# ── Apply dashboard ─────────────────────────────────────────────────────────── +echo "⚙ Copying Durable Emitter Load Test dashboard..." +mkdir -p "$COMPOSE_DIR/conf/provisioning/dashboards/beholder" +cp "$OVERRIDES/dashboards/beholder/load_dashboard.json" \ + "$COMPOSE_DIR/conf/provisioning/dashboards/beholder/load_dashboard.json" + +# ── Patch docker-compose.yaml to add beholder dashboard volume mount ────────── +echo "⚙ Patching docker-compose.yaml to add dashboard volume mount..." +python3 - "$COMPOSE_DIR/docker-compose.yaml" <<'PYEOF' +import sys, re + +path = sys.argv[1] +with open(path) as f: + content = f.read() + +marker = "./conf/provisioning/dashboards/beholder/load_dashboard.json:/var/lib/grafana/dashboards/beholder/load_dashboard.json" +if marker in content: + print(" dashboard volume mount already present, skipping.") + sys.exit(0) + +# Insert our mount after the last existing dashboard volume line. +content = re.sub( + r"([ \t]+- \./conf/provisioning/dashboards/workflow-engine/engine\.json:/var/lib/grafana/dashboards/workflow-engine/engine\.json)", + r"\1\n - ./conf/provisioning/dashboards/beholder/load_dashboard.json:/var/lib/grafana/dashboards/beholder/load_dashboard.json", + content, +) + +with open(path, "w") as f: + f.write(content) +print(" done.") +PYEOF + +# ── Recreate affected containers so new volume mounts and config take effect ── +# `restart` reuses the existing container spec (no new mounts); `up --force-recreate` +# rebuilds the container from the patched docker-compose.yaml. +echo "↺ Recreating otel-collector and grafana with updated config..." +docker compose -f "$COMPOSE_DIR/docker-compose.yaml" up -d --force-recreate otel-collector grafana + +echo "" +echo "✓ Obs stack is up with all overrides applied." +echo " Grafana: http://localhost:3000" +echo " Dashboard: http://localhost:3000/d/durable-emitter-load-test" diff --git a/core/scripts/cre/environment/observability-overrides/dashboards/beholder/load_dashboard.json b/core/scripts/cre/environment/observability-overrides/dashboards/beholder/load_dashboard.json new file mode 100644 index 00000000000..d53300c79ad --- /dev/null +++ b/core/scripts/cre/environment/observability-overrides/dashboards/beholder/load_dashboard.json @@ -0,0 +1,549 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 8, "x": 0, "y": 0 }, + "id": 1, + "options": { + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "none" + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(beholder_durable_emitter_emit_success_total{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]))", + "refId": "A" + } + ], + "title": "Emit Success Rate", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 0 }, + "id": 4, + "options": { + "legend": { "calcs": ["lastNotNull", "mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(beholder_durable_emitter_emit_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval])))", + "legendFormat": "p50", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by (le) (rate(beholder_durable_emitter_emit_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval])))", + "legendFormat": "p95", + "refId": "B" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(beholder_durable_emitter_emit_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval])))", + "legendFormat": "p99", + "refId": "C" + }, + { + "editorMode": "code", + "expr": "rate(beholder_durable_emitter_emit_duration_seconds_sum{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]) / rate(beholder_durable_emitter_emit_duration_seconds_count{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval])", + "legendFormat": "avg", + "refId": "D" + } + ], + "title": "Emit Duration (DB insert)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 0 }, + "id": 7, + "options": { + "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(beholder_durable_emitter_publish_retransmit_events_success_total{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]))", + "legendFormat": "events success", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum(rate(beholder_durable_emitter_publish_retransmit_events_failure_total{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]))", + "legendFormat": "events failure", + "refId": "B" + } + ], + "title": "Retransmit Events", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { "h": 6, "w": 8, "x": 0, "y": 4 }, + "id": 5, + "options": { + "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(beholder_durable_emitter_publish_immediate_success_total{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]))", + "legendFormat": "success", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum(rate(beholder_durable_emitter_publish_immediate_failure_total{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]))", + "legendFormat": "failure", + "refId": "B" + } + ], + "title": "Immediate Publish RPC", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { "h": 5, "w": 8, "x": 8, "y": 7 }, + "id": 9, + "options": { + "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(beholder_durable_emitter_store_operations_total{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]))", + "legendFormat": "store ops", + "refId": "A" + } + ], + "title": "Store Operations", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 5, "w": 8, "x": 16, "y": 7 }, + "id": 10, + "options": { + "legend": { "calcs": ["lastNotNull", "mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(beholder_durable_emitter_store_operation_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval])))", + "legendFormat": "p50", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(beholder_durable_emitter_store_operation_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval])))", + "legendFormat": "p99", + "refId": "B" + }, + { + "editorMode": "code", + "expr": "rate(beholder_durable_emitter_store_operation_duration_seconds_sum{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]) / rate(beholder_durable_emitter_store_operation_duration_seconds_count{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval])", + "legendFormat": "avg (all ops)", + "refId": "C" + } + ], + "title": "Store Operation Duration", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 10 }, + "id": 8, + "options": { + "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(beholder_durable_emitter_delivery_completed_total{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]))", + "legendFormat": "delivery completed", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum(rate(beholder_durable_emitter_expired_purged_total{exported_job=\"durable-emitter-loadtest\"}[$__rate_interval]))", + "legendFormat": "expired purged", + "refId": "B" + } + ], + "title": "Completion / Expiry", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 5, "w": 8, "x": 8, "y": 12 }, + "id": 11, + "options": { + "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "beholder_durable_emitter_queue_depth{exported_job=\"durable-emitter-loadtest\"}", + "legendFormat": "queue depth", + "refId": "A" + } + ], + "title": "Queue Depth", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 12 }, + "id": 17, + "options": { + "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "beholder_durable_emitter_process_cpu_user_seconds{exported_job=\"durable-emitter-loadtest\"}", + "legendFormat": "user cpu sec", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "beholder_durable_emitter_process_cpu_system_seconds{exported_job=\"durable-emitter-loadtest\"}", + "legendFormat": "system cpu sec", + "refId": "B" + } + ], + "title": "Process CPU Cumulative", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 6, "w": 8, "x": 0, "y": 17 }, + "id": 13, + "options": { + "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "beholder_durable_emitter_queue_oldest_pending_age_seconds{exported_job=\"durable-emitter-loadtest\"}", + "legendFormat": "oldest age", + "refId": "A" + } + ], + "title": "Queue Oldest Pending Age", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 6, "w": 8, "x": 8, "y": 17 }, + "id": 16, + "options": { + "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "beholder_durable_emitter_process_memory_heap_inuse_bytes{exported_job=\"durable-emitter-loadtest\"}", + "legendFormat": "heap inuse", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "beholder_durable_emitter_process_memory_heap_sys_bytes{exported_job=\"durable-emitter-loadtest\"}", + "legendFormat": "heap sys", + "refId": "B" + } + ], + "title": "Process Memory", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 6, "w": 8, "x": 16, "y": 17 }, + "id": 19, + "options": { + "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "beholder_durable_emitter_queue_payload_bytes{exported_job=\"durable-emitter-loadtest\"}", + "legendFormat": "payload bytes", + "refId": "A" + } + ], + "title": "Queue Payload Bytes", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 23 }, + "id": 20, + "options": { + "legend": { "calcs": ["lastNotNull", "mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "rate(beholder_durable_emitter_publish_duration_seconds_sum{exported_job=\"durable-emitter-loadtest\", phase=\"immediate\", error=\"false\"}[$__rate_interval]) / rate(beholder_durable_emitter_publish_duration_seconds_count{exported_job=\"durable-emitter-loadtest\", phase=\"immediate\", error=\"false\"}[$__rate_interval])", + "legendFormat": "immediate avg", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(beholder_durable_emitter_publish_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\", phase=\"immediate\", error=\"false\"}[$__rate_interval])))", + "legendFormat": "immediate p99", + "refId": "B" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(beholder_durable_emitter_publish_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\", phase=\"immediate\", error=\"false\"}[$__rate_interval])))", + "legendFormat": "immediate p50", + "refId": "C" + }, + { + "editorMode": "code", + "expr": "rate(beholder_durable_emitter_publish_duration_seconds_sum{exported_job=\"durable-emitter-loadtest\", phase=\"retransmit\", error=\"false\"}[$__rate_interval]) / rate(beholder_durable_emitter_publish_duration_seconds_count{exported_job=\"durable-emitter-loadtest\", phase=\"retransmit\", error=\"false\"}[$__rate_interval])", + "legendFormat": "retransmit avg", + "refId": "D" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(beholder_durable_emitter_publish_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\", phase=\"immediate\", error=\"true\"}[$__rate_interval])))", + "legendFormat": "immediate p99 (errors)", + "refId": "E" + } + ], + "title": "Chip Publish Duration (gRPC RPC)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 23 }, + "id": 18, + "options": { + "legend": { "calcs": ["lastNotNull", "mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "editorMode": "code", + "expr": "rate(beholder_durable_emitter_store_operation_duration_seconds_sum{exported_job=\"durable-emitter-loadtest\", operation=\"mark_delivered\"}[$__rate_interval]) / rate(beholder_durable_emitter_store_operation_duration_seconds_count{exported_job=\"durable-emitter-loadtest\", operation=\"mark_delivered\"}[$__rate_interval])", + "legendFormat": "mark_delivered avg", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(beholder_durable_emitter_store_operation_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\", operation=\"mark_delivered\"}[$__rate_interval])))", + "legendFormat": "mark_delivered p99", + "refId": "B" + }, + { + "editorMode": "code", + "expr": "rate(beholder_durable_emitter_store_operation_duration_seconds_sum{exported_job=\"durable-emitter-loadtest\", operation=\"insert\"}[$__rate_interval]) / rate(beholder_durable_emitter_store_operation_duration_seconds_count{exported_job=\"durable-emitter-loadtest\", operation=\"insert\"}[$__rate_interval])", + "legendFormat": "insert avg", + "refId": "C" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(beholder_durable_emitter_store_operation_duration_seconds_bucket{exported_job=\"durable-emitter-loadtest\", operation=\"insert\"}[$__rate_interval])))", + "legendFormat": "insert p99", + "refId": "D" + }, + { + "editorMode": "code", + "expr": "rate(beholder_durable_emitter_store_operation_duration_seconds_sum{exported_job=\"durable-emitter-loadtest\", operation=\"list_pending\"}[$__rate_interval]) / rate(beholder_durable_emitter_store_operation_duration_seconds_count{exported_job=\"durable-emitter-loadtest\", operation=\"list_pending\"}[$__rate_interval])", + "legendFormat": "list_pending avg", + "refId": "E" + } + ], + "title": "Pipeline Step Latency (insert / mark_delivered / list_pending)", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": ["beholder", "durable-emitter", "load-test"], + "templating": { "list": [] }, + "time": { "from": "now-15m", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Durable Emitter Load Test", + "uid": "durable-emitter-load-test", + "version": 1, + "weekStart": "" +} diff --git a/core/scripts/cre/environment/observability-overrides/otel.yaml b/core/scripts/cre/environment/observability-overrides/otel.yaml new file mode 100755 index 00000000000..b3a017dae1b --- /dev/null +++ b/core/scripts/cre/environment/observability-overrides/otel.yaml @@ -0,0 +1,43 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +exporters: + debug: + verbosity: detailed + sampling_initial: 5 + sampling_thereafter: 200 + otlphttp/logs: + endpoint: "http://loki:3100/otlp" + tls: + insecure: true + otlp: + endpoint: "http://tempo:4317" + tls: + insecure: true + prometheus: + endpoint: "0.0.0.0:8889" + +processors: + transform: + log_statements: + - context: log + statements: + - set(body, Base64Decode(log.body.string)) + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [debug, otlp] + logs: + receivers: [otlp] + processors: [transform] + exporters: [debug, otlphttp/logs] + metrics: + receivers: [otlp] + exporters: [debug, prometheus] \ No newline at end of file diff --git a/core/scripts/go.mod b/core/scripts/go.mod index c11bb69081f..686469f8d29 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -46,7 +46,7 @@ require ( github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 github.com/smartcontractkit/chainlink-data-streams v0.1.12 diff --git a/core/scripts/go.sum b/core/scripts/go.sum index 66bc07b7e1f..7b67c23a848 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1634,8 +1634,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d h1:BFUwEd2R/MkcsUn0stuzaEZZUgwWgnat7pebYCTnmE4= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4= diff --git a/core/services/beholder/durable_emitter_load_test.go b/core/services/beholder/durable_emitter_load_test.go index e0ebfd98473..1128d6d1eef 100644 --- a/core/services/beholder/durable_emitter_load_test.go +++ b/core/services/beholder/durable_emitter_load_test.go @@ -576,9 +576,17 @@ func TestFullStack_SustainedThroughput(t *testing.T) { } } -// TestFullStack_ChipOutage simulates Chip going down during sustained load, -// then recovering. Measures: how events accumulate in Postgres, and how -// fast they drain once Chip comes back. +// TestFullStack_ChipOutage runs sustained emit load while injecting periodic +// Chip outages at fixed intervals. Each cycle: Chip is up for outagePeriod, +// then down for outageDuration, then recovers. The test measures how the DB +// queue accumulates during each outage and drains after each recovery, giving +// a real view of back-pressure, retransmit drain rate, and DB load over time. +// +// OTel metrics are exported when OTEL_EXPORTER_OTLP_ENDPOINT is set (same as +// TestFullStack_SustainedThroughput). Start the obs stack first: +// +// ./bin/ctf obs up +// OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 go test ./core/services/beholder/ -run TestFullStack_ChipOutage -v -count=1 -timeout 20m func TestFullStack_ChipOutage(t *testing.T) { skipIfExternalChip(t, "inject Unavailable errors on mock server") @@ -587,61 +595,290 @@ func TestFullStack_ChipOutage(t *testing.T) { require.NotNil(t, srv) store := beholdersvc.NewPgDurableEventStore(db) + ctx := testutils.Context(t) + + pipe := &pipelineDeliveryStats{} cfg := beholder.DefaultDurableEmitterConfig() cfg.RetransmitInterval = 200 * time.Millisecond - cfg.RetransmitAfter = 100 * time.Millisecond - cfg.RetransmitBatchSize = 100 - cfg.PublishTimeout = 1 * time.Second + cfg.RetransmitAfter = 500 * time.Millisecond + cfg.RetransmitBatchSize = 200 + cfg.PublishTimeout = 2 * time.Second + cfg.Hooks = newPipelineHooks(pipe) + + // OTel metrics wiring (same as SustainedThroughput). + if otlpEndpoint := strings.TrimSpace(os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")); otlpEndpoint != "" { + otlpEndpoint = strings.TrimPrefix(otlpEndpoint, "http://") + exp, otelErr := otlpmetricgrpc.New(ctx, + otlpmetricgrpc.WithEndpoint(otlpEndpoint), + otlpmetricgrpc.WithInsecure(), + ) + require.NoError(t, otelErr, "otlp metric exporter") + res := sdkresource.NewWithAttributes("", + attribute.String("service.name", "durable-emitter-loadtest"), + ) + mp := sdkmetric.NewMeterProvider( + sdkmetric.WithResource(res), + sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exp, sdkmetric.WithInterval(5*time.Second))), + ) + t.Cleanup(func() { _ = mp.Shutdown(context.Background()) }) + bc := beholder.NewNoopClient() + bc.MeterProvider = mp + bc.Meter = mp.Meter("beholder") + beholder.SetClient(bc) + t.Cleanup(func() { beholder.SetClient(beholder.NewNoopClient()) }) + cfg.Metrics = &beholder.DurableEmitterMetricsConfig{ + PollInterval: 5 * time.Second, + RecordProcessStats: true, + } + t.Logf("OTel metrics enabled → %s (5s push interval, Grafana: http://localhost:3000)", otlpEndpoint) + } em, err := beholder.NewDurableEmitter(store, client, cfg, logger.Test(t)) require.NoError(t, err) - - ctx := testutils.Context(t) em.Start(ctx) defer em.Close() - // Phase 1: Chip is available — emit 200 events. - for i := 0; i < 200; i++ { - require.NoError(t, em.Emit(ctx, []byte("pre-outage"), loadEmitAttrs()...)) + // Outage schedule. + const ( + outageCycles = 3 + upDuration = 20 * time.Second // Chip healthy between outages + outageDuration = 10 * time.Second // Chip unavailable per cycle + emitConcurrency = 5 + emitRatePerWorker = 200 // events/s target per worker (throttled) + ) + totalEmitted := outageCycles * int(upDuration.Seconds()+outageDuration.Seconds()) * + emitConcurrency * emitRatePerWorker + // Cap to a sane ceiling so the test completes quickly. + if totalEmitted > 50_000 { + totalEmitted = 50_000 } - require.Eventually(t, func() bool { - pending, _ := store.ListPending(ctx, time.Now().Add(time.Hour), 1) - return len(pending) == 0 - }, 10*time.Second, 50*time.Millisecond, "pre-outage events should all deliver") - t.Logf("Phase 1: %d events delivered pre-outage", srv.totalEvents.Load()) - // Phase 2: Chip goes down — emit 500 more events. - srv.setPublishErr(status.Error(codes.Unavailable, "chip down")) - srv.setBatchErr(status.Error(codes.Unavailable, "chip down")) + t.Logf("ChipOutage: %d cycles up=%s down=%s workers=%d target=%d events", + outageCycles, upDuration, outageDuration, emitConcurrency, totalEmitted) + + // CPU snapshot. + var cpuStart syscall.Rusage + _ = syscall.Getrusage(syscall.RUSAGE_SELF, &cpuStart) - outageStart := time.Now() - for i := 0; i < 500; i++ { - require.NoError(t, em.Emit(ctx, []byte("during-outage"), loadEmitAttrs()...)) + // Queue depth sampler. + var queueMax, queueSum, queueCnt atomic.Int64 + samplerCtx, samplerCancel := context.WithCancel(ctx) + go func() { + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-samplerCtx.Done(): + return + case <-ticker.C: + rows, _, err := queuePayloadStats(db, samplerCtx) + if err != nil { + continue + } + queueCnt.Add(1) + queueSum.Add(rows) + for { + old := queueMax.Load() + if rows <= old || queueMax.CompareAndSwap(old, rows) { + break + } + } + } + } + }() + + // Outage injector: runs cycles of up/down in a background goroutine. + type cycleResult struct { + cycle int + outageStart time.Time + recoveryStart time.Time + drainElapsed time.Duration + peakQueue int64 + drainRate float64 // events/sec } - t.Logf("Phase 2: emitted 500 events during outage in %s", time.Since(outageStart).Round(time.Millisecond)) + cycleResults := make([]cycleResult, 0, outageCycles) + var cyclesMu sync.Mutex - // Verify events are accumulating in Postgres. - time.Sleep(500 * time.Millisecond) // let some retransmits fail - pending, err := store.ListPending(ctx, time.Now().Add(time.Hour), 1000) - require.NoError(t, err) - t.Logf("Phase 2: %d events pending in Postgres during outage", len(pending)) - assert.Greater(t, len(pending), 0, "events should accumulate during outage") + outageCtx, outageCancel := context.WithCancel(ctx) + defer outageCancel() - // Phase 3: Chip recovers. - srv.setPublishErr(nil) - srv.setBatchErr(nil) - recoveryStart := time.Now() + go func() { + for cycle := 1; cycle <= outageCycles; cycle++ { + // Wait for "up" phase. + select { + case <-outageCtx.Done(): + return + case <-time.After(upDuration): + } + + // Take down Chip. + srv.setPublishErr(status.Error(codes.Unavailable, "chip down")) + srv.setBatchErr(status.Error(codes.Unavailable, "chip down")) + outStart := time.Now() + t.Logf("↓ Cycle %d/%d: Chip DOWN at %s", cycle, outageCycles, outStart.Format("15:04:05")) + + // Measure peak queue during outage. + var cyclePeak int64 + peakTicker := time.NewTicker(250 * time.Millisecond) + outageTimer := time.NewTimer(outageDuration) + peakLoop: + for { + select { + case <-outageCtx.Done(): + peakTicker.Stop() + outageTimer.Stop() + return + case <-outageTimer.C: + peakTicker.Stop() + break peakLoop + case <-peakTicker.C: + rows, _, _ := queuePayloadStats(db, outageCtx) + if rows > cyclePeak { + cyclePeak = rows + } + } + } + // Restore Chip. + srv.setPublishErr(nil) + srv.setBatchErr(nil) + recovStart := time.Now() + t.Logf("↑ Cycle %d/%d: Chip UP at %s (was down %s, peak queue %d rows)", + cycle, outageCycles, recovStart.Format("15:04:05"), + recovStart.Sub(outStart).Round(time.Millisecond), cyclePeak) + + // Wait for drain. + drainDeadline := time.Now().Add(60 * time.Second) + var drainElapsed time.Duration + for time.Now().Before(drainDeadline) { + pending, _ := store.ListPending(outageCtx, time.Now().Add(time.Hour), 1) + if len(pending) == 0 { + drainElapsed = time.Since(recovStart) + break + } + time.Sleep(100 * time.Millisecond) + } + + cyclesMu.Lock() + cycleResults = append(cycleResults, cycleResult{ + cycle: cycle, + outageStart: outStart, + recoveryStart: recovStart, + drainElapsed: drainElapsed, + peakQueue: cyclePeak, + drainRate: float64(cyclePeak) / max(drainElapsed.Seconds(), 0.001), + }) + cyclesMu.Unlock() + } + }() + + // Emit loop: concurrent workers emit at a steady rate until totalEmitted. + testStart := time.Now() + payload := buildLoadTestPayload(256) + var emitErrors atomic.Int64 + var emitCount atomic.Int64 + var emitWg sync.WaitGroup + eventsPerWorker := totalEmitted / emitConcurrency + + for w := 0; w < emitConcurrency; w++ { + emitWg.Add(1) + go func() { + defer emitWg.Done() + for i := 0; i < eventsPerWorker; i++ { + if err := em.Emit(ctx, payload, loadEmitAttrs()...); err != nil { + emitErrors.Add(1) + } + emitCount.Add(1) + } + }() + } + emitWg.Wait() + outageCancel() // stop outage injector + emitElapsed := time.Since(testStart) + + // Wait for final drain. require.Eventually(t, func() bool { pending, _ := store.ListPending(ctx, time.Now().Add(time.Hour), 1) return len(pending) == 0 - }, 30*time.Second, 100*time.Millisecond, "all events should drain after recovery") + }, 60*time.Second, 100*time.Millisecond, "all events should drain after final recovery") + + samplerCancel() + totalElapsed := time.Since(testStart) + + // CPU diff. + var cpuEnd syscall.Rusage + _ = syscall.Getrusage(syscall.RUSAGE_SELF, &cpuEnd) + cpuUserSec := (float64(cpuEnd.Utime.Sec) + float64(cpuEnd.Utime.Usec)/1e6) - + (float64(cpuStart.Utime.Sec) + float64(cpuStart.Utime.Usec)/1e6) + cpuSysSec := (float64(cpuEnd.Stime.Sec) + float64(cpuEnd.Stime.Usec)/1e6) - + (float64(cpuStart.Stime.Sec) + float64(cpuStart.Stime.Usec)/1e6) + cpuTotalSec := cpuUserSec + cpuSysSec + cpuUtilPct := 100.0 * cpuTotalSec / (totalElapsed.Seconds() * float64(runtime.GOMAXPROCS(0))) + + var queueAvg float64 + if n := queueCnt.Load(); n > 0 { + queueAvg = float64(queueSum.Load()) / float64(n) + } + + pubMean := durMs(pipe.immPub.mean()) + delMean := durMs(pipe.immDel.mean()) + + t.Logf("╔══════════════════════════════════════════════════════════════╗") + t.Logf("║ CHIP OUTAGE TEST RESULTS ║") + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ EMIT ║") + t.Logf("║ Events emitted: %-42d ║", emitCount.Load()) + t.Logf("║ Errors: %-42d ║", emitErrors.Load()) + t.Logf("║ Elapsed: %-42s ║", emitElapsed.Round(time.Millisecond)) + t.Logf("║ Rate: %-42s ║", fmt.Sprintf("%.0f events/sec", float64(emitCount.Load())/emitElapsed.Seconds())) + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ DELIVERY ║") + t.Logf("║ Server received:%-42d ║", srv.totalEvents.Load()) + t.Logf("║ Total elapsed: %-42s ║", totalElapsed.Round(time.Millisecond)) + t.Logf("║ E2E rate: %-42s ║", fmt.Sprintf("%.0f events/sec", float64(emitCount.Load())/totalElapsed.Seconds())) + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ OUTAGE CYCLES (up=%s / down=%s per cycle) ║", upDuration, outageDuration) + t.Logf("║ %-4s %-12s %-12s %-12s %-12s ║", "Cyc", "Peak queue", "Drain time", "Drain rate", "") + cyclesMu.Lock() + for _, r := range cycleResults { + drainStr := r.drainElapsed.Round(time.Millisecond).String() + if r.drainElapsed == 0 { + drainStr = "timeout" + } + t.Logf("║ %-4d %-12d %-12s %-12s ║", + r.cycle, r.peakQueue, drainStr, + fmt.Sprintf("%.0f/s", r.drainRate)) + } + cyclesMu.Unlock() + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ PENDING QUEUE DEPTH (sampled every 200ms) ║") + t.Logf("║ Max: %-42d ║", queueMax.Load()) + t.Logf("║ Avg: %-42s ║", fmt.Sprintf("%.1f rows", queueAvg)) + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ PIPELINE LATENCY (immediate path) ║") + t.Logf("║ %-10s %-10s %-10s %-10s ║", "n", "p50 (ms)", "p99 (ms)", "mean (ms)") + t.Logf("║ Publish: %-10d %-10.2f %-10.2f %-10.2f ║", + pipe.immPub.count(), durMs(pipe.immPub.percentile(0.50)), + durMs(pipe.immPub.percentile(0.99)), pubMean) + t.Logf("║ MarkDelivered: %-10d %-10.2f %-10.2f %-10.2f ║", + pipe.immDel.count(), durMs(pipe.immDel.percentile(0.50)), + durMs(pipe.immDel.percentile(0.99)), delMean) + if pipe.batchPub.count() > 0 { + t.Logf("║ Retransmit: %-10d %-10.2f %-10s %-10.2f ║", + pipe.batchPub.count(), durMs(pipe.batchPub.percentile(0.50)), "—", + durMs(pipe.batchPub.mean())) + } + t.Logf("╠══════════════════════════════════════════════════════════════╣") + t.Logf("║ PROCESS CPU (getrusage, GOMAXPROCS=%d) ║", runtime.GOMAXPROCS(0)) + t.Logf("║ User: %-42s ║", fmt.Sprintf("%.2f s", cpuUserSec)) + t.Logf("║ System: %-42s ║", fmt.Sprintf("%.2f s", cpuSysSec)) + t.Logf("║ Total: %-42s ║", fmt.Sprintf("%.2f s", cpuTotalSec)) + t.Logf("║ Utilization: %-42s ║", fmt.Sprintf("%.1f%% of %d cores × %.1fs wall", cpuUtilPct, runtime.GOMAXPROCS(0), totalElapsed.Seconds())) + t.Logf("╚══════════════════════════════════════════════════════════════╝") - recoveryElapsed := time.Since(recoveryStart) - t.Logf("Phase 3: drained in %s after recovery (%.0f events/sec drain rate)", - recoveryElapsed.Round(time.Millisecond), - float64(500)/recoveryElapsed.Seconds()) - t.Logf("Total server events: %d", srv.totalEvents.Load()) + assert.Equal(t, int64(0), emitErrors.Load(), "no emit errors expected") + assert.GreaterOrEqual(t, srv.totalEvents.Load(), int64(totalEmitted), + "server should have received all events (may include retransmit duplicates)") } // TestFullStack_SlowChip simulates a slow Chip server (high latency per diff --git a/deployment/go.mod b/deployment/go.mod index aca7e8409a0..3c6d1aad8ac 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -44,7 +44,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/deployment/go.sum b/deployment/go.sum index d94bdb3dc2e..cfaa74cec29 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1387,8 +1387,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317175207-e9ff89561326/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d h1:BFUwEd2R/MkcsUn0stuzaEZZUgwWgnat7pebYCTnmE4= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/go.mod b/go.mod index 6895d3e2c18..1840d958e28 100644 --- a/go.mod +++ b/go.mod @@ -85,7 +85,7 @@ require ( github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260224214816-cb23ec38649f github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5 github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 github.com/smartcontractkit/chainlink-data-streams v0.1.12 @@ -129,8 +129,10 @@ require ( go.dedis.ch/kyber/v3 v3.1.0 go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.49.0 go.opentelemetry.io/otel v1.42.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 go.opentelemetry.io/otel/log v0.15.0 go.opentelemetry.io/otel/metric v1.42.0 + go.opentelemetry.io/otel/sdk v1.41.0 go.opentelemetry.io/otel/sdk/metric v1.41.0 go.opentelemetry.io/otel/trace v1.42.0 go.uber.org/atomic v1.11.0 @@ -405,7 +407,6 @@ require ( go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.12.2 // indirect go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.12.2 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.36.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0 // indirect @@ -413,7 +414,6 @@ require ( go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.13.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.36.0 // indirect - go.opentelemetry.io/otel/sdk v1.41.0 // indirect go.opentelemetry.io/otel/sdk/log v0.15.0 // indirect go.opentelemetry.io/proto/otlp v1.9.0 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/go.sum b/go.sum index 286496b6c6f..a5f30b6be56 100644 --- a/go.sum +++ b/go.sum @@ -1235,8 +1235,8 @@ github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5/go.mod h1:xtZNi6pOKdC3sLvokDvXOhgHzT+cyBqH/gWwvxTxqrg= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d h1:BFUwEd2R/MkcsUn0stuzaEZZUgwWgnat7pebYCTnmE4= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index 1048f491e9c..1a2836596c8 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -50,7 +50,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/integration-tests/go.sum b/integration-tests/go.sum index 0e6652c69bb..b9fefab6e3d 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1626,8 +1626,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d h1:BFUwEd2R/MkcsUn0stuzaEZZUgwWgnat7pebYCTnmE4= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index 9811a616d0c..c38a2fed40d 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -31,7 +31,7 @@ require ( github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260317185256-d5f7db87ae70 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20260119171452-39c98c3b33cd diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index f5c55c5efd5..d8c00cb74af 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1604,8 +1604,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d h1:BFUwEd2R/MkcsUn0stuzaEZZUgwWgnat7pebYCTnmE4= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/lib/go.mod b/system-tests/lib/go.mod index 93a317c5a01..11fababb9bd 100644 --- a/system-tests/lib/go.mod +++ b/system-tests/lib/go.mod @@ -32,7 +32,7 @@ require ( github.com/sethvargo/go-retry v0.3.0 github.com/smartcontractkit/chain-selectors v1.0.97 github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260310183131-8d0f0e383288 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260318010722-59d4165024f1 diff --git a/system-tests/lib/go.sum b/system-tests/lib/go.sum index ee26a14a654..13357422662 100644 --- a/system-tests/lib/go.sum +++ b/system-tests/lib/go.sum @@ -1597,8 +1597,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d h1:BFUwEd2R/MkcsUn0stuzaEZZUgwWgnat7pebYCTnmE4= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg= diff --git a/system-tests/tests/go.mod b/system-tests/tests/go.mod index 072ba995fba..7dea8390251 100644 --- a/system-tests/tests/go.mod +++ b/system-tests/tests/go.mod @@ -54,7 +54,7 @@ require ( github.com/rs/zerolog v1.34.0 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chain-selectors v1.0.97 - github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 + github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d github.com/smartcontractkit/chainlink-common/keystore v1.0.2 github.com/smartcontractkit/chainlink-data-streams v0.1.12 github.com/smartcontractkit/chainlink-deployments-framework v0.86.3 diff --git a/system-tests/tests/go.sum b/system-tests/tests/go.sum index aad926fea29..ef39523cc45 100644 --- a/system-tests/tests/go.sum +++ b/system-tests/tests/go.sum @@ -1781,8 +1781,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7 github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260317185256-d5f7db87ae70/go.mod h1:P0/tjeeIIxfsBupk5MneRjq5uI9mj+ZQpMpYnFla6WM= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a h1:6c6WDGfZB2ehsw9/nBuuKNCw89+rCav2k9so41pIu4o= github.com/smartcontractkit/chainlink-ccv v0.0.0-20260317124520-6b2931b8cd0a/go.mod h1:4+ngpFXBJrxcKR0jd2CUZFYJVtL8pPJfBXHbGfSNJeA= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4 h1:pENQ34S0pYSOIHGkPIKI9gJvkb25k+GFBnwuUgqOpsI= -github.com/smartcontractkit/chainlink-common v0.10.1-0.20260401135924-7678416be8a4/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d h1:BFUwEd2R/MkcsUn0stuzaEZZUgwWgnat7pebYCTnmE4= +github.com/smartcontractkit/chainlink-common v0.10.1-0.20260402193951-734fe94f345d/go.mod h1:0ghbAr7tRO0tT5ZqBXhOyzgUO37tNNe33Yn0hskauVM= github.com/smartcontractkit/chainlink-common/keystore v1.0.2 h1:AWisx4JT3QV8tcgh6J5NCrex+wAgTYpWyHsyNPSXzsQ= github.com/smartcontractkit/chainlink-common/keystore v1.0.2/go.mod h1:rSkIHdomyak3YnUtXLenl6poIq8q0V3UZPiiyYqPdGA= github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4=