From 6163b6ad4582b5341e37907e9e7ecee32340c061 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 27 Feb 2026 16:22:17 +0200
Subject: [PATCH 01/28] 1st commit

---
 pkg/beholder/chip_ingress_batch_emitter.go    | 148 ++++++++
 .../chip_ingress_batch_emitter_test.go        | 351 ++++++++++++++++++
 pkg/beholder/chip_ingress_emitter_worker.go   | 118 ++++++
 pkg/beholder/client.go                        |  18 +-
 pkg/beholder/config.go                        |  15 +-
 pkg/beholder/config_test.go                   |   2 +-
 pkg/beholder/dual_source_emitter.go           |  19 +-
 7 files changed, 649 insertions(+), 22 deletions(-)
 create mode 100644 pkg/beholder/chip_ingress_batch_emitter.go
 create mode 100644 pkg/beholder/chip_ingress_batch_emitter_test.go
 create mode 100644 pkg/beholder/chip_ingress_emitter_worker.go

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
new file mode 100644
index 0000000000..37949069cf
--- /dev/null
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -0,0 +1,148 @@
+package beholder
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
+	"github.com/smartcontractkit/chainlink-common/pkg/logger"
+	"github.com/smartcontractkit/chainlink-common/pkg/services"
+	"github.com/smartcontractkit/chainlink-common/pkg/timeutil"
+)
+
+// ChipIngressBatchEmitter buffers events per (domain, entity) and flushes them
+// via chipingress.Client.PublishBatch on a periodic interval.
+// It satisfies the Emitter interface so it can be used as a drop-in replacement
+// for ChipIngressEmitter.
+type ChipIngressBatchEmitter struct {
+	services.Service
+	eng *services.Engine
+
+	client chipingress.Client
+
+	workers      map[string]*chipIngressEmitterWorker
+	workersMutex sync.RWMutex
+
+	bufferSize   uint
+	maxBatchSize uint
+	sendInterval time.Duration
+	sendTimeout  time.Duration
+}
+
+// NewChipIngressBatchEmitter creates a batch emitter backed by the given chipingress client.
+// Call Start() to begin health monitoring, and Close() to stop all workers.
+func NewChipIngressBatchEmitter(client chipingress.Client, lggr logger.Logger, cfg Config) (*ChipIngressBatchEmitter, error) {
+	if client == nil {
+		return nil, fmt.Errorf("chip ingress client is nil")
+	}
+
+	bufferSize := cfg.ChipIngressBufferSize
+	if bufferSize == 0 {
+		bufferSize = 100
+	}
+	maxBatchSize := cfg.ChipIngressMaxBatchSize
+	if maxBatchSize == 0 {
+		maxBatchSize = 50
+	}
+	sendInterval := cfg.ChipIngressSendInterval
+	if sendInterval == 0 {
+		sendInterval = 500 * time.Millisecond
+	}
+	sendTimeout := cfg.ChipIngressSendTimeout
+	if sendTimeout == 0 {
+		sendTimeout = 10 * time.Second
+	}
+
+	e := &ChipIngressBatchEmitter{
+		client:       client,
+		workers:      make(map[string]*chipIngressEmitterWorker),
+		bufferSize:   bufferSize,
+		maxBatchSize: maxBatchSize,
+		sendInterval: sendInterval,
+		sendTimeout:  sendTimeout,
+	}
+
+	e.Service, e.eng = services.Config{
+		Name:  "ChipIngressBatchEmitter",
+		Start: e.start,
+	}.NewServiceEngine(lggr)
+
+	return e, nil
+}
+
+func (e *ChipIngressBatchEmitter) start(_ context.Context) error {
+	return nil
+}
+
+// Emit extracts (domain, entity) from the attributes, routes the event to the
+// appropriate per-(domain, entity) worker, and returns immediately.
+// If the worker's buffer is full, the event is dropped and a warning is logged.
+func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
+	domain, entity, err := ExtractSourceAndType(attrKVs...)
+	if err != nil {
+		return err
+	}
+
+	attributes := newAttributes(attrKVs...)
+
+	worker := e.findOrCreateWorker(domain, entity)
+
+	payload := emitterPayload{
+		body:       body,
+		attributes: attributes,
+		domain:     domain,
+		entity:     entity,
+	}
+
+	select {
+	case worker.ch <- payload:
+		worker.dropCount.Store(0)
+	case <-ctx.Done():
+		return ctx.Err()
+	default:
+		worker.logBufferFullWithExpBackoff(payload)
+	}
+
+	return nil
+}
+
+// findOrCreateWorker returns the worker for the given (domain, entity) pair,
+// creating one with a new buffered channel and GoTick flush loop if it doesn't exist.
+func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chipIngressEmitterWorker {
+	workerKey := fmt.Sprintf("%s_%s", domain, entity)
+
+	e.workersMutex.RLock()
+	worker, found := e.workers[workerKey]
+	e.workersMutex.RUnlock()
+
+	if found {
+		return worker
+	}
+
+	e.workersMutex.Lock()
+	defer e.workersMutex.Unlock()
+
+	// Double-check after acquiring write lock
+	if worker, found = e.workers[workerKey]; found {
+		return worker
+	}
+
+	worker = newChipIngressEmitterWorker(
+		e.client,
+		make(chan emitterPayload, e.bufferSize),
+		domain,
+		entity,
+		e.maxBatchSize,
+		e.sendTimeout,
+		e.eng,
+	)
+
+	e.eng.GoTick(timeutil.NewTicker(func() time.Duration {
+		return e.sendInterval
+	}), worker.Send)
+
+	e.workers[workerKey] = worker
+	return worker
+}
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
new file mode 100644
index 0000000000..02a6a22789
--- /dev/null
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -0,0 +1,351 @@
+package beholder_test
+
+import (
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/mock"
+	"github.com/stretchr/testify/require"
+
+	"github.com/smartcontractkit/chainlink-common/pkg/beholder"
+	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
+	"github.com/smartcontractkit/chainlink-common/pkg/chipingress/mocks"
+	"github.com/smartcontractkit/chainlink-common/pkg/logger"
+)
+
+func newTestConfig() beholder.Config {
+	return beholder.Config{
+		ChipIngressBufferSize:   10,
+		ChipIngressMaxBatchSize: 5,
+		ChipIngressSendInterval: 50 * time.Millisecond,
+		ChipIngressSendTimeout:  5 * time.Second,
+	}
+}
+
+func newTestLogger(t *testing.T) logger.Logger {
+	t.Helper()
+	lggr, err := logger.New()
+	require.NoError(t, err)
+	return lggr
+}
+
+func TestNewChipIngressBatchEmitter(t *testing.T) {
+	t.Run("happy path", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), newTestConfig())
+		require.NoError(t, err)
+		assert.NotNil(t, emitter)
+	})
+
+	t.Run("returns error when client is nil", func(t *testing.T) {
+		emitter, err := beholder.NewChipIngressBatchEmitter(nil, newTestLogger(t), newTestConfig())
+		assert.Error(t, err)
+		assert.Nil(t, emitter)
+	})
+}
+
+func TestChipIngressBatchEmitter_Emit(t *testing.T) {
+	t.Run("returns error when domain/entity missing", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), newTestConfig())
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+		defer emitter.Close() //nolint:errcheck
+
+		err = emitter.Emit(t.Context(), []byte("test"), "bad_key", "bad_value")
+		assert.Error(t, err)
+	})
+
+	t.Run("enqueues and does not call PublishBatch immediately", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), newTestConfig())
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+		defer emitter.Close() //nolint:errcheck
+
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "test-domain",
+			beholder.AttrKeyEntity, "test-entity",
+		)
+		require.NoError(t, err)
+
+		// PublishBatch should NOT have been called yet (event is just buffered)
+		clientMock.AssertNotCalled(t, "PublishBatch", mock.Anything, mock.Anything)
+	})
+}
+
+func TestChipIngressBatchEmitter_BatchAssembly(t *testing.T) {
+	t.Run("events are batched and sent via PublishBatch", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		var receivedBatches []*chipingress.CloudEventBatch
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				receivedBatches = append(receivedBatches, batch)
+			}).
+			Return(nil, nil)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		// Wait for flush to occur
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			return len(receivedBatches) > 0
+		}, 2*time.Second, 10*time.Millisecond)
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+
+		totalEvents := 0
+		for _, batch := range receivedBatches {
+			totalEvents += len(batch.Events)
+		}
+		assert.Equal(t, 3, totalEvents)
+	})
+}
+
+func TestChipIngressBatchEmitter_MaxBatchSize(t *testing.T) {
+	t.Run("batch is capped at maxBatchSize", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		var batchSizes []int
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				batchSizes = append(batchSizes, len(batch.Events))
+			}).
+			Return(nil, nil)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressBufferSize = 20
+		cfg.ChipIngressMaxBatchSize = 3
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 7; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		// Wait for all events to be flushed
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			total := 0
+			for _, s := range batchSizes {
+				total += s
+			}
+			return total >= 7
+		}, 2*time.Second, 10*time.Millisecond)
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		for _, size := range batchSizes {
+			assert.LessOrEqual(t, size, 3, "batch size should not exceed maxBatchSize")
+		}
+	})
+}
+
+func TestChipIngressBatchEmitter_PerDomainEntityIsolation(t *testing.T) {
+	t.Run("separate workers for different domain/entity pairs", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		domainEntitySeen := make(map[string]int)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				for _, event := range batch.Events {
+					key := event.Source + "/" + event.Type
+					domainEntitySeen[key] += 1
+				}
+			}).
+			Return(nil, nil)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		// Emit events for two different domain/entity pairs
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("workflow"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "WorkflowEvent",
+			)
+			require.NoError(t, err)
+		}
+		for i := 0; i < 2; i++ {
+			err = emitter.Emit(t.Context(), []byte("bridge"),
+				beholder.AttrKeyDomain, "data-feeds",
+				beholder.AttrKeyEntity, "BridgeStatus",
+			)
+			require.NoError(t, err)
+		}
+
+		// Wait for both to be flushed
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			return domainEntitySeen["platform/WorkflowEvent"] >= 3 &&
+				domainEntitySeen["data-feeds/BridgeStatus"] >= 2
+		}, 2*time.Second, 10*time.Millisecond)
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		assert.Equal(t, 3, domainEntitySeen["platform/WorkflowEvent"])
+		assert.Equal(t, 2, domainEntitySeen["data-feeds/BridgeStatus"])
+	})
+}
+
+func TestChipIngressBatchEmitter_BufferFull(t *testing.T) {
+	t.Run("events are dropped when buffer is full", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		// Block PublishBatch so the buffer fills up
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, nil).
+			Maybe()
+
+		cfg := newTestConfig()
+		cfg.ChipIngressBufferSize = 3
+		cfg.ChipIngressSendInterval = 10 * time.Second // very long interval to prevent flushing
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+		defer emitter.Close() //nolint:errcheck
+
+		// Fill the buffer (3 events)
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		// This should not error (it drops silently), but the event won't be delivered
+		err = emitter.Emit(t.Context(), []byte("dropped"),
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		assert.NoError(t, err)
+	})
+}
+
+func TestChipIngressBatchEmitter_Lifecycle(t *testing.T) {
+	t.Run("start and close cleanly", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, nil).
+			Maybe()
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), newTestConfig())
+		require.NoError(t, err)
+
+		require.NoError(t, emitter.Start(t.Context()))
+
+		// Emit a few events to create workers
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		// Close should not hang or error
+		require.NoError(t, emitter.Close())
+	})
+}
+
+func TestChipIngressBatchEmitter_CloudEventFormat(t *testing.T) {
+	t.Run("CloudEvents have correct source, type, and data", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		var receivedBatch *chipingress.CloudEventBatch
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				receivedBatch = args.Get(1).(*chipingress.CloudEventBatch)
+			}).
+			Return(nil, nil)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		err = emitter.Emit(t.Context(), []byte("test-payload"),
+			beholder.AttrKeyDomain, "my-domain",
+			beholder.AttrKeyEntity, "my-entity",
+		)
+		require.NoError(t, err)
+
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			return receivedBatch != nil
+		}, 2*time.Second, 10*time.Millisecond)
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		require.Len(t, receivedBatch.Events, 1)
+
+		event := receivedBatch.Events[0]
+		assert.Equal(t, "my-domain", event.Source)
+		assert.Equal(t, "my-entity", event.Type)
+		assert.NotEmpty(t, event.Id)
+	})
+}
diff --git a/pkg/beholder/chip_ingress_emitter_worker.go b/pkg/beholder/chip_ingress_emitter_worker.go
new file mode 100644
index 0000000000..6b9c61759e
--- /dev/null
+++ b/pkg/beholder/chip_ingress_emitter_worker.go
@@ -0,0 +1,118 @@
+package beholder
+
+import (
+	"context"
+	"fmt"
+	"sync/atomic"
+	"time"
+
+	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
+	"github.com/smartcontractkit/chainlink-common/pkg/logger"
+)
+
+// emitterPayload holds a single event to be batched and sent via chip ingress.
+type emitterPayload struct {
+	body       []byte
+	attributes map[string]any
+	domain     string
+	entity     string
+}
+
+// chipIngressEmitterWorker buffers events for a single (domain, entity) pair
+// and flushes them via PublishBatch on a periodic interval.
+type chipIngressEmitterWorker struct {
+	client       chipingress.Client
+	ch           chan emitterPayload
+	domain       string
+	entity       string
+	maxBatchSize uint
+	sendTimeout  time.Duration
+	lggr         logger.Logger
+	dropCount    atomic.Uint32
+}
+
+func newChipIngressEmitterWorker(
+	client chipingress.Client,
+	ch chan emitterPayload,
+	domain string,
+	entity string,
+	maxBatchSize uint,
+	sendTimeout time.Duration,
+	lggr logger.Logger,
+) *chipIngressEmitterWorker {
+	return &chipIngressEmitterWorker{
+		client:       client,
+		ch:           ch,
+		domain:       domain,
+		entity:       entity,
+		maxBatchSize: maxBatchSize,
+		sendTimeout:  sendTimeout,
+		lggr:         logger.Named(lggr, "ChipIngressEmitterWorker"),
+	}
+}
+
+// Send drains the channel and sends a batch. Called periodically by GoTick.
+func (w *chipIngressEmitterWorker) Send(ctx context.Context) {
+	if len(w.ch) == 0 {
+		return
+	}
+
+	batch := w.buildBatch()
+	if batch == nil || len(batch.Events) == 0 {
+		return
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, w.sendTimeout)
+	defer cancel()
+
+	_, err := w.client.PublishBatch(ctx, batch)
+	if err != nil {
+		w.lggr.Warnf("could not send batch via chip ingress: %v", err)
+		return
+	}
+}
+
+// buildBatch drains the channel up to maxBatchSize and converts payloads to a CloudEventBatch.
+func (w *chipIngressEmitterWorker) buildBatch() *chipingress.CloudEventBatch {
+	var events []chipingress.CloudEvent
+
+	for len(w.ch) > 0 && len(events) < int(w.maxBatchSize) { // #nosec G115
+		payload := <-w.ch
+		event, err := w.payloadToEvent(payload)
+		if err != nil {
+			w.lggr.Warnf("failed to build CloudEvent, dropping: %v", err)
+			continue
+		}
+		events = append(events, event)
+	}
+
+	if len(events) == 0 {
+		return nil
+	}
+
+	batch, err := chipingress.EventsToBatch(events)
+	if err != nil {
+		w.lggr.Warnf("failed to convert events to batch: %v", err)
+		return nil
+	}
+
+	return batch
+}
+
+func (w *chipIngressEmitterWorker) payloadToEvent(payload emitterPayload) (chipingress.CloudEvent, error) {
+	event, err := chipingress.NewEvent(payload.domain, payload.entity, payload.body, payload.attributes)
+	if err != nil {
+		return chipingress.CloudEvent{}, fmt.Errorf("failed to create CloudEvent: %w", err)
+	}
+	return event, nil
+}
+
+// logBufferFullWithExpBackoff logs at 1, 2, 4, 8, 16, 32, 64, 100, 200, 300, ...
+// to avoid flooding logs when the buffer is persistently full.
+func (w *chipIngressEmitterWorker) logBufferFullWithExpBackoff(payload emitterPayload) {
+	count := w.dropCount.Add(1)
+	if count > 0 && (count%100 == 0 || count&(count-1) == 0) {
+		w.lggr.Warnf("chip ingress emitter buffer full, dropping event (domain=%s, entity=%s, droppedCount=%d)",
+			payload.domain, payload.entity, count)
+	}
+}
diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index 328d877c49..f10c01ae29 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"time"
 
+	ccllogger "github.com/smartcontractkit/chainlink-common/pkg/logger"
 	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
@@ -187,7 +188,7 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 	// This will eventually be removed in favor of chip-ingress emitter
 	// and logs will be sent via OTLP using the regular Logger instead of calling Emit
 	emitter := NewMessageEmitter(messageLogger)
-
+	var batchEmitter *ChipIngressBatchEmitter
 	var chipIngressClient chipingress.Client = &chipingress.NoopClient{}
 	// if chip ingress is enabled, create dual source emitter that sends to both otel collector and chip ingress
 	// eventually we will remove the dual source emitter and just use chip ingress
@@ -223,18 +224,27 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 			return nil, err
 		}
 
-		chipIngressEmitter, err := NewChipIngressEmitter(chipIngressClient)
+		lggr, lErr := ccllogger.New()
+		if lErr != nil {
+			return nil, fmt.Errorf("failed to create logger for chip ingress batch emitter: %w", lErr)
+		}
+		batchEmitter, err = NewChipIngressBatchEmitter(chipIngressClient, lggr, cfg)
 		if err != nil {
-			return nil, fmt.Errorf("failed to create chip ingress emitter: %w", err)
+			return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
+		}
+		if err = batchEmitter.Start(context.Background()); err != nil {
+			return nil, fmt.Errorf("failed to start chip ingress batch emitter: %w", err)
 		}
 
-		emitter, err = NewDualSourceEmitter(chipIngressEmitter, emitter)
+		emitter, err = NewDualSourceEmitter(batchEmitter, emitter)
 		if err != nil {
 			return nil, fmt.Errorf("failed to create dual source emitter: %w", err)
 		}
 	}
 
 	onClose := func() (err error) {
+		// batchEmitter is closed via DualSourceEmitter.Close() -> chipIngressEmitter.Close(),
+		// which is called by Client.Close() -> c.Emitter.Close() before OnClose runs.
 		for _, provider := range []shutdowner{messageLoggerProvider, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider} {
 			err = errors.Join(err, provider.Shutdown(context.Background()))
 		}
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index f3e4561b68..506dce9d0e 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -44,6 +44,12 @@ type Config struct {
 	ChipIngressEmitterGRPCEndpoint string
 	ChipIngressInsecureConnection  bool // Disables TLS for Chip Ingress Emitter
 
+	// Chip Ingress Batch Emitter
+	ChipIngressBufferSize   uint          // Per-worker channel buffer size (default 100)
+	ChipIngressMaxBatchSize uint          // Max events per PublishBatch call (default 50)
+	ChipIngressSendInterval time.Duration // Flush interval per worker (default 500ms). Zero disables batching.
+	ChipIngressSendTimeout  time.Duration // Timeout per PublishBatch call (default 10s)
+
 	// OTel Log
 	LogExportTimeout      time.Duration
 	LogExportInterval     time.Duration
@@ -131,8 +137,13 @@ func DefaultConfig() Config {
 		LogMaxQueueSize:       2048,
 		LogBatchProcessor:     true,
 		LogStreamingEnabled:   true, // Enable logs streaming by default
-		LogLevel:              zapcore.InfoLevel,
-		LogCompressor:         "gzip",
+		LogLevel:      zapcore.InfoLevel,
+		LogCompressor: "gzip",
+		// Chip Ingress Batch Emitter
+		ChipIngressBufferSize:   100,
+		ChipIngressMaxBatchSize: 50,
+		ChipIngressSendInterval: 500 * time.Millisecond,
+		ChipIngressSendTimeout:  10 * time.Second,
 		// Auth (defaults to static auth mode with TTL=0)
 		AuthHeadersTTL: 0,
 	}
diff --git a/pkg/beholder/config_test.go b/pkg/beholder/config_test.go
index ae156db9c9..9ecdd2280e 100644
--- a/pkg/beholder/config_test.go
+++ b/pkg/beholder/config_test.go
@@ -67,6 +67,6 @@ func ExampleConfig() {
 	}
 	fmt.Printf("%+v\n", *config.LogRetryConfig)
 	// Output:
-	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
+	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
 	// {InitialInterval:5s MaxInterval:30s MaxElapsedTime:1m0s}
 }
diff --git a/pkg/beholder/dual_source_emitter.go b/pkg/beholder/dual_source_emitter.go
index 5167efaece..d34dbb614e 100644
--- a/pkg/beholder/dual_source_emitter.go
+++ b/pkg/beholder/dual_source_emitter.go
@@ -62,22 +62,11 @@ func (d *DualSourceEmitter) Emit(ctx context.Context, body []byte, attrKVs ...an
 		return err
 	}
 
-	// Emit via chip ingress async
-	if err := d.wg.TryAdd(1); err != nil {
-		return err
+	// Emit via chip ingress. When backed by ChipIngressBatchEmitter this is
+	// non-blocking (just a channel send), so no goroutine wrapper is needed.
+	if err := d.chipIngressEmitter.Emit(ctx, body, attrKVs...); err != nil {
+		d.log.Infof("failed to emit to chip ingress: %v", err)
 	}
-	go func(ctx context.Context) {
-		defer d.wg.Done()
-		var cancel context.CancelFunc
-		ctx, cancel = d.stopCh.Ctx(ctx)
-		defer cancel()
-
-		if err := d.chipIngressEmitter.Emit(ctx, body, attrKVs...); err != nil {
-			// If the chip ingress emitter fails, we ONLY log the error
-			// because we still want to send the data to the OTLP collector and not cause disruption
-			d.log.Infof("failed to emit to chip ingress: %v", err)
-		}
-	}(context.WithoutCancel(ctx))
 
 	return nil
 }

From 5ec2dec06bf822abfa161fd01e2a77d499265bfb Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 27 Feb 2026 16:39:52 +0200
Subject: [PATCH 02/28] Amend chip_ingress_batch_emitter_test.go

---
 .../chip_ingress_batch_emitter_test.go        | 112 ++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
index 02a6a22789..483d55e86f 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -1,6 +1,7 @@
 package beholder_test
 
 import (
+	"context"
 	"sync"
 	"testing"
 	"time"
@@ -349,3 +350,114 @@ func TestChipIngressBatchEmitter_CloudEventFormat(t *testing.T) {
 		assert.NotEmpty(t, event.Id)
 	})
 }
+
+func TestChipIngressBatchEmitter_PublishBatchError(t *testing.T) {
+	t.Run("PublishBatch error is handled gracefully", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		callCount := 0
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(_ mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				callCount++
+			}).
+			Return(nil, assert.AnError)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			return callCount > 0
+		}, 2*time.Second, 10*time.Millisecond)
+
+		require.NoError(t, emitter.Close())
+	})
+}
+
+func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
+	t.Run("Emit returns context error when context is cancelled", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressBufferSize = 1
+		cfg.ChipIngressSendInterval = 10 * time.Second
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+		defer emitter.Close() //nolint:errcheck
+
+		// Fill the buffer so the next Emit will block on channel send
+		err = emitter.Emit(t.Context(), []byte("fill"),
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		require.NoError(t, err)
+
+		ctx, cancel := context.WithCancel(t.Context())
+		cancel()
+
+		err = emitter.Emit(ctx, []byte("should-fail"),
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		assert.ErrorIs(t, err, context.Canceled)
+	})
+}
+
+func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
+	t.Run("zero config uses sane defaults", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		var receivedBatch *chipingress.CloudEventBatch
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				receivedBatch = args.Get(1).(*chipingress.CloudEventBatch)
+			}).
+			Return(nil, nil)
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), beholder.Config{})
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		require.NoError(t, err)
+
+		// Default send interval is 500ms; wait for flush
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			return receivedBatch != nil
+		}, 3*time.Second, 50*time.Millisecond)
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		require.Len(t, receivedBatch.Events, 1)
+	})
+}

From ab95437bc72bb8786a75897171bae02ebd062317 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 27 Feb 2026 20:59:21 +0200
Subject: [PATCH 03/28] Fix comment

---
 pkg/beholder/config.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 506dce9d0e..a19b76ca1e 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -47,7 +47,7 @@ type Config struct {
 	// Chip Ingress Batch Emitter
 	ChipIngressBufferSize   uint          // Per-worker channel buffer size (default 100)
 	ChipIngressMaxBatchSize uint          // Max events per PublishBatch call (default 50)
-	ChipIngressSendInterval time.Duration // Flush interval per worker (default 500ms). Zero disables batching.
+	ChipIngressSendInterval time.Duration // Flush interval per worker (default 500ms when zero or unset)
 	ChipIngressSendTimeout  time.Duration // Timeout per PublishBatch call (default 10s)
 
 	// OTel Log

From f31d3e8a3f615f92402e7eb2de812e165eade2ec Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Mon, 2 Mar 2026 11:30:52 +0200
Subject: [PATCH 04/28] batchEmitter -> batchEmitterService

---
 pkg/beholder/client.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index f10c01ae29..4cdb05c404 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -188,7 +188,7 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 	// This will eventually be removed in favor of chip-ingress emitter
 	// and logs will be sent via OTLP using the regular Logger instead of calling Emit
 	emitter := NewMessageEmitter(messageLogger)
-	var batchEmitter *ChipIngressBatchEmitter
+	var batchEmitterService *ChipIngressBatchEmitter
 	var chipIngressClient chipingress.Client = &chipingress.NoopClient{}
 	// if chip ingress is enabled, create dual source emitter that sends to both otel collector and chip ingress
 	// eventually we will remove the dual source emitter and just use chip ingress
@@ -228,22 +228,22 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 		if lErr != nil {
 			return nil, fmt.Errorf("failed to create logger for chip ingress batch emitter: %w", lErr)
 		}
-		batchEmitter, err = NewChipIngressBatchEmitter(chipIngressClient, lggr, cfg)
+		batchEmitterService, err = NewChipIngressBatchEmitter(chipIngressClient, lggr, cfg)
 		if err != nil {
 			return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
 		}
-		if err = batchEmitter.Start(context.Background()); err != nil {
+		if err = batchEmitterService.Start(context.Background()); err != nil {
 			return nil, fmt.Errorf("failed to start chip ingress batch emitter: %w", err)
 		}
 
-		emitter, err = NewDualSourceEmitter(batchEmitter, emitter)
+		emitter, err = NewDualSourceEmitter(batchEmitterService, emitter)
 		if err != nil {
 			return nil, fmt.Errorf("failed to create dual source emitter: %w", err)
 		}
 	}
 
 	onClose := func() (err error) {
-		// batchEmitter is closed via DualSourceEmitter.Close() -> chipIngressEmitter.Close(),
+		// batchEmitterService is closed via DualSourceEmitter.Close() -> chipIngressEmitter.Close(),
 		// which is called by Client.Close() -> c.Emitter.Close() before OnClose runs.
 		for _, provider := range []shutdowner{messageLoggerProvider, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider} {
 			err = errors.Join(err, provider.Shutdown(context.Background()))

From ba186b773de0cc5f91f209578ded9d9c2d44375e Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Mon, 2 Mar 2026 13:18:18 +0200
Subject: [PATCH 05/28] Add feature flag

---
 pkg/beholder/client.go      | 31 ++++++++++++++++++++-----------
 pkg/beholder/config.go      | 12 +++++++-----
 pkg/beholder/config_test.go |  2 +-
 pkg/loop/config.go          | 15 +++++++++++----
 pkg/loop/config_test.go     | 11 +++++++----
 pkg/loop/server.go          |  1 +
 6 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index 4cdb05c404..e6e7c435e2 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -224,19 +224,28 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 			return nil, err
 		}
 
-		lggr, lErr := ccllogger.New()
-		if lErr != nil {
-			return nil, fmt.Errorf("failed to create logger for chip ingress batch emitter: %w", lErr)
-		}
-		batchEmitterService, err = NewChipIngressBatchEmitter(chipIngressClient, lggr, cfg)
-		if err != nil {
-			return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
-		}
-		if err = batchEmitterService.Start(context.Background()); err != nil {
-			return nil, fmt.Errorf("failed to start chip ingress batch emitter: %w", err)
+		var chipIngressEmitter Emitter
+		if cfg.ChipIngressBatchEmitterEnabled {
+			lggr, lErr := ccllogger.New()
+			if lErr != nil {
+				return nil, fmt.Errorf("failed to create logger for chip ingress batch emitter: %w", lErr)
+			}
+			batchEmitterService, err = NewChipIngressBatchEmitter(chipIngressClient, lggr, cfg)
+			if err != nil {
+				return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
+			}
+			if err = batchEmitterService.Start(context.Background()); err != nil {
+				return nil, fmt.Errorf("failed to start chip ingress batch emitter: %w", err)
+			}
+			chipIngressEmitter = batchEmitterService
+		} else {
+			chipIngressEmitter, err = NewChipIngressEmitter(chipIngressClient)
+			if err != nil {
+				return nil, fmt.Errorf("failed to create chip ingress emitter: %w", err)
+			}
 		}
 
-		emitter, err = NewDualSourceEmitter(batchEmitterService, emitter)
+		emitter, err = NewDualSourceEmitter(chipIngressEmitter, emitter)
 		if err != nil {
 			return nil, fmt.Errorf("failed to create dual source emitter: %w", err)
 		}
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index a19b76ca1e..06b877279e 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -45,10 +45,11 @@ type Config struct {
 	ChipIngressInsecureConnection  bool // Disables TLS for Chip Ingress Emitter
 
 	// Chip Ingress Batch Emitter
-	ChipIngressBufferSize   uint          // Per-worker channel buffer size (default 100)
-	ChipIngressMaxBatchSize uint          // Max events per PublishBatch call (default 50)
-	ChipIngressSendInterval time.Duration // Flush interval per worker (default 500ms when zero or unset)
-	ChipIngressSendTimeout  time.Duration // Timeout per PublishBatch call (default 10s)
+	ChipIngressBatchEmitterEnabled bool          // When true (default), use batch emitter; when false, use legacy per-event emitter
+	ChipIngressBufferSize          uint          // Per-worker channel buffer size (default 100)
+	ChipIngressMaxBatchSize        uint          // Max events per PublishBatch call (default 50)
+	ChipIngressSendInterval        time.Duration // Flush interval per worker (default 500ms when zero or unset)
+	ChipIngressSendTimeout         time.Duration // Timeout per PublishBatch call (default 10s)
 
 	// OTel Log
 	LogExportTimeout      time.Duration
@@ -140,7 +141,8 @@ func DefaultConfig() Config {
 		LogLevel:      zapcore.InfoLevel,
 		LogCompressor: "gzip",
 		// Chip Ingress Batch Emitter
-		ChipIngressBufferSize:   100,
+		ChipIngressBatchEmitterEnabled: false,
+		ChipIngressBufferSize:          100,
 		ChipIngressMaxBatchSize: 50,
 		ChipIngressSendInterval: 500 * time.Millisecond,
 		ChipIngressSendTimeout:  10 * time.Second,
diff --git a/pkg/beholder/config_test.go b/pkg/beholder/config_test.go
index 9ecdd2280e..024ba805d2 100644
--- a/pkg/beholder/config_test.go
+++ b/pkg/beholder/config_test.go
@@ -67,6 +67,6 @@ func ExampleConfig() {
 	}
 	fmt.Printf("%+v\n", *config.LogRetryConfig)
 	// Output:
-	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
+	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
 	// {InitialInterval:5s MaxInterval:30s MaxElapsedTime:1m0s}
 }
diff --git a/pkg/loop/config.go b/pkg/loop/config.go
index 19e1ab7f10..1a0f71e26d 100644
--- a/pkg/loop/config.go
+++ b/pkg/loop/config.go
@@ -77,8 +77,9 @@ const (
 	envTelemetryMetricCompressor          = "CL_TELEMETRY_METRIC_COMPRESSOR"
 	envTelemetryLogCompressor             = "CL_TELEMETRY_LOG_COMPRESSOR"
 
-	envChipIngressEndpoint           = "CL_CHIP_INGRESS_ENDPOINT"
-	envChipIngressInsecureConnection = "CL_CHIP_INGRESS_INSECURE_CONNECTION"
+	envChipIngressEndpoint             = "CL_CHIP_INGRESS_ENDPOINT"
+	envChipIngressInsecureConnection   = "CL_CHIP_INGRESS_INSECURE_CONNECTION"
+	envChipIngressBatchEmitterEnabled  = "CL_CHIP_INGRESS_BATCH_EMITTER_ENABLED"
 
 	envCRESettings        = cresettings.EnvNameSettings
 	envCRESettingsDefault = cresettings.EnvNameSettingsDefault
@@ -148,8 +149,9 @@ type EnvConfig struct {
 	TelemetryMetricCompressor          string
 	TelemetryLogCompressor             string
 
-	ChipIngressEndpoint           string
-	ChipIngressInsecureConnection bool
+	ChipIngressEndpoint            string
+	ChipIngressInsecureConnection  bool
+	ChipIngressBatchEmitterEnabled bool
 
 	CRESettings        string
 	CRESettingsDefault string
@@ -234,6 +236,7 @@ func (e *EnvConfig) AsCmdEnv() (env []string) {
 
 	add(envChipIngressEndpoint, e.ChipIngressEndpoint)
 	add(envChipIngressInsecureConnection, strconv.FormatBool(e.ChipIngressInsecureConnection))
+	add(envChipIngressBatchEmitterEnabled, strconv.FormatBool(e.ChipIngressBatchEmitterEnabled))
 
 	if e.CRESettings != "" {
 		add(envCRESettings, e.CRESettings)
@@ -449,6 +452,10 @@ func (e *EnvConfig) parse() error {
 		if err != nil {
 			return fmt.Errorf("failed to parse %s: %w", envChipIngressInsecureConnection, err)
 		}
+		e.ChipIngressBatchEmitterEnabled, err = getBool(envChipIngressBatchEmitterEnabled)
+		if err != nil {
+			return fmt.Errorf("failed to parse %s: %w", envChipIngressBatchEmitterEnabled, err)
+		}
 	}
 
 	e.CRESettings = os.Getenv(envCRESettings)
diff --git a/pkg/loop/config_test.go b/pkg/loop/config_test.go
index a0b5ef83d9..5207dbbeaa 100644
--- a/pkg/loop/config_test.go
+++ b/pkg/loop/config_test.go
@@ -78,8 +78,9 @@ func TestEnvConfig_parse(t *testing.T) {
 				envTelemetryEmitterMaxQueueSize:       "1000",
 				envTelemetryLogStreamingEnabled:       "false",
 
-				envChipIngressEndpoint:           "chip-ingress.example.com:50051",
-				envChipIngressInsecureConnection: "true",
+				envChipIngressEndpoint:            "chip-ingress.example.com:50051",
+				envChipIngressInsecureConnection:  "true",
+				envChipIngressBatchEmitterEnabled: "false",
 
 				envCRESettings:        `{"global":{}}`,
 				envCRESettingsDefault: `{"foo":"bar"}`,
@@ -182,8 +183,9 @@ var envCfgFull = EnvConfig{
 	TelemetryEmitterMaxQueueSize:       1000,
 	TelemetryLogStreamingEnabled:       false,
 
-	ChipIngressEndpoint:           "chip-ingress.example.com:50051",
-	ChipIngressInsecureConnection: true,
+	ChipIngressEndpoint:            "chip-ingress.example.com:50051",
+	ChipIngressInsecureConnection:  true,
+	ChipIngressBatchEmitterEnabled: false,
 
 	CRESettings:        `{"global":{}}`,
 	CRESettingsDefault: `{"foo":"bar"}`,
@@ -239,6 +241,7 @@ func TestEnvConfig_AsCmdEnv(t *testing.T) {
 	// Assert ChipIngress environment variables
 	assert.Equal(t, "chip-ingress.example.com:50051", got[envChipIngressEndpoint])
 	assert.Equal(t, "true", got[envChipIngressInsecureConnection])
+	assert.Equal(t, "false", got[envChipIngressBatchEmitterEnabled])
 
 	assert.Equal(t, `{"global":{}}`, got[envCRESettings])
 	assert.Equal(t, `{"foo":"bar"}`, got[envCRESettingsDefault])
diff --git a/pkg/loop/server.go b/pkg/loop/server.go
index ae61c9d6c4..b36bc30a04 100644
--- a/pkg/loop/server.go
+++ b/pkg/loop/server.go
@@ -173,6 +173,7 @@ func (s *Server) start(opts ...ServerOpt) error {
 			ChipIngressEmitterEnabled:      s.EnvConfig.ChipIngressEndpoint != "",
 			ChipIngressEmitterGRPCEndpoint: s.EnvConfig.ChipIngressEndpoint,
 			ChipIngressInsecureConnection:  s.EnvConfig.ChipIngressInsecureConnection,
+			ChipIngressBatchEmitterEnabled: s.EnvConfig.ChipIngressBatchEmitterEnabled,
 			MetricCompressor:               s.EnvConfig.TelemetryMetricCompressor,
 		}
 

From dc9e23570efb7ad62de40c3018813ce7e57e22e9 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Mon, 2 Mar 2026 13:47:23 +0200
Subject: [PATCH 06/28] Make logger last parameter

---
 pkg/beholder/chip_ingress_batch_emitter.go    |  2 +-
 .../chip_ingress_batch_emitter_test.go        | 26 +++++++++----------
 pkg/beholder/client.go                        |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index 37949069cf..b8657d1b17 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -33,7 +33,7 @@ type ChipIngressBatchEmitter struct {
 
 // NewChipIngressBatchEmitter creates a batch emitter backed by the given chipingress client.
 // Call Start() to begin health monitoring, and Close() to stop all workers.
-func NewChipIngressBatchEmitter(client chipingress.Client, lggr logger.Logger, cfg Config) (*ChipIngressBatchEmitter, error) {
+func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logger.Logger) (*ChipIngressBatchEmitter, error) {
 	if client == nil {
 		return nil, fmt.Errorf("chip ingress client is nil")
 	}
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
index 483d55e86f..ec2ac01373 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -35,13 +35,13 @@ func newTestLogger(t *testing.T) logger.Logger {
 func TestNewChipIngressBatchEmitter(t *testing.T) {
 	t.Run("happy path", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), newTestConfig())
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
 		require.NoError(t, err)
 		assert.NotNil(t, emitter)
 	})
 
 	t.Run("returns error when client is nil", func(t *testing.T) {
-		emitter, err := beholder.NewChipIngressBatchEmitter(nil, newTestLogger(t), newTestConfig())
+		emitter, err := beholder.NewChipIngressBatchEmitter(nil, newTestConfig(), newTestLogger(t))
 		assert.Error(t, err)
 		assert.Nil(t, emitter)
 	})
@@ -50,7 +50,7 @@ func TestNewChipIngressBatchEmitter(t *testing.T) {
 func TestChipIngressBatchEmitter_Emit(t *testing.T) {
 	t.Run("returns error when domain/entity missing", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), newTestConfig())
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 		defer emitter.Close() //nolint:errcheck
@@ -61,7 +61,7 @@ func TestChipIngressBatchEmitter_Emit(t *testing.T) {
 
 	t.Run("enqueues and does not call PublishBatch immediately", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), newTestConfig())
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 		defer emitter.Close() //nolint:errcheck
@@ -96,7 +96,7 @@ func TestChipIngressBatchEmitter_BatchAssembly(t *testing.T) {
 		cfg := newTestConfig()
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
@@ -149,7 +149,7 @@ func TestChipIngressBatchEmitter_MaxBatchSize(t *testing.T) {
 		cfg.ChipIngressMaxBatchSize = 3
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
@@ -204,7 +204,7 @@ func TestChipIngressBatchEmitter_PerDomainEntityIsolation(t *testing.T) {
 		cfg := newTestConfig()
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
@@ -254,7 +254,7 @@ func TestChipIngressBatchEmitter_BufferFull(t *testing.T) {
 		cfg.ChipIngressBufferSize = 3
 		cfg.ChipIngressSendInterval = 10 * time.Second // very long interval to prevent flushing
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 		defer emitter.Close() //nolint:errcheck
@@ -285,7 +285,7 @@ func TestChipIngressBatchEmitter_Lifecycle(t *testing.T) {
 			Return(nil, nil).
 			Maybe()
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), newTestConfig())
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
 		require.NoError(t, err)
 
 		require.NoError(t, emitter.Start(t.Context()))
@@ -322,7 +322,7 @@ func TestChipIngressBatchEmitter_CloudEventFormat(t *testing.T) {
 		cfg := newTestConfig()
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
@@ -369,7 +369,7 @@ func TestChipIngressBatchEmitter_PublishBatchError(t *testing.T) {
 		cfg := newTestConfig()
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
@@ -399,7 +399,7 @@ func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
 		cfg.ChipIngressBufferSize = 1
 		cfg.ChipIngressSendInterval = 10 * time.Second
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), cfg)
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 		defer emitter.Close() //nolint:errcheck
@@ -437,7 +437,7 @@ func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
 			}).
 			Return(nil, nil)
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestLogger(t), beholder.Config{})
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, beholder.Config{}, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index e6e7c435e2..eaa268223f 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -230,7 +230,7 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 			if lErr != nil {
 				return nil, fmt.Errorf("failed to create logger for chip ingress batch emitter: %w", lErr)
 			}
-			batchEmitterService, err = NewChipIngressBatchEmitter(chipIngressClient, lggr, cfg)
+			batchEmitterService, err = NewChipIngressBatchEmitter(chipIngressClient, cfg, lggr)
 			if err != nil {
 				return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
 			}

From 38c7ca34dc8425b9c4cf1884b946279a8ee97519 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Mon, 2 Mar 2026 14:06:11 +0200
Subject: [PATCH 07/28] Omit ChipIngressBatchEmitter.start

---
 pkg/beholder/chip_ingress_batch_emitter.go | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index b8657d1b17..5240f75099 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -65,16 +65,12 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	}
 
 	e.Service, e.eng = services.Config{
-		Name:  "ChipIngressBatchEmitter",
-		Start: e.start,
+		Name: "ChipIngressBatchEmitter",
 	}.NewServiceEngine(lggr)
 
 	return e, nil
 }
 
-func (e *ChipIngressBatchEmitter) start(_ context.Context) error {
-	return nil
-}
 
 // Emit extracts (domain, entity) from the attributes, routes the event to the
 // appropriate per-(domain, entity) worker, and returns immediately.

From 616ac1e4bffb0ad7a24bed9a4e600d873a92484d Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Wed, 4 Mar 2026 13:32:33 +0200
Subject: [PATCH 08/28] Add retries and drain

---
 pkg/beholder/chip_ingress_batch_emitter.go    |  98 ++++-
 .../chip_ingress_batch_emitter_test.go        | 404 ++++++++++++++++++
 pkg/beholder/chip_ingress_emitter_worker.go   | 144 ++++++-
 pkg/beholder/config.go                        |  27 +-
 pkg/beholder/config_test.go                   |   2 +-
 5 files changed, 652 insertions(+), 23 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index 5240f75099..de41f01b5f 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -6,10 +6,12 @@ import (
 	"sync"
 	"time"
 
+	"go.opentelemetry.io/otel"
+	otelmetric "go.opentelemetry.io/otel/metric"
+
 	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
 	"github.com/smartcontractkit/chainlink-common/pkg/logger"
 	"github.com/smartcontractkit/chainlink-common/pkg/services"
-	"github.com/smartcontractkit/chainlink-common/pkg/timeutil"
 )
 
 // ChipIngressBatchEmitter buffers events per (domain, entity) and flushes them
@@ -29,6 +31,18 @@ type ChipIngressBatchEmitter struct {
 	maxBatchSize uint
 	sendInterval time.Duration
 	sendTimeout  time.Duration
+	retryCfg     *RetryConfig
+	drainTimeout time.Duration
+
+	metrics batchEmitterMetrics
+}
+
+type batchEmitterMetrics struct {
+	eventsSent      otelmetric.Int64Counter
+	eventsDropped   otelmetric.Int64Counter
+	batchRetries    otelmetric.Int64Counter
+	batchFailures   otelmetric.Int64Counter
+	eventsDrained   otelmetric.Int64Counter
 }
 
 // NewChipIngressBatchEmitter creates a batch emitter backed by the given chipingress client.
@@ -54,6 +68,16 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	if sendTimeout == 0 {
 		sendTimeout = 10 * time.Second
 	}
+	drainTimeout := cfg.ChipIngressDrainTimeout
+	if drainTimeout == 0 {
+		drainTimeout = 5 * time.Second
+	}
+
+	meter := otel.Meter("beholder/chip_ingress_batch_emitter")
+	metrics, err := newBatchEmitterMetrics(meter)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create batch emitter metrics: %w", err)
+	}
 
 	e := &ChipIngressBatchEmitter{
 		client:       client,
@@ -62,6 +86,9 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 		maxBatchSize: maxBatchSize,
 		sendInterval: sendInterval,
 		sendTimeout:  sendTimeout,
+		retryCfg:     cfg.ChipIngressRetryConfig,
+		drainTimeout: drainTimeout,
+		metrics:      metrics,
 	}
 
 	e.Service, e.eng = services.Config{
@@ -71,7 +98,6 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	return e, nil
 }
 
-
 // Emit extracts (domain, entity) from the attributes, routes the event to the
 // appropriate per-(domain, entity) worker, and returns immediately.
 // If the worker's buffer is full, the event is dropped and a warning is logged.
@@ -105,7 +131,7 @@ func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs
 }
 
 // findOrCreateWorker returns the worker for the given (domain, entity) pair,
-// creating one with a new buffered channel and GoTick flush loop if it doesn't exist.
+// creating one with a new buffered channel and flush goroutine if it doesn't exist.
 func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chipIngressEmitterWorker {
 	workerKey := fmt.Sprintf("%s_%s", domain, entity)
 
@@ -132,13 +158,73 @@ func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chi
 		entity,
 		e.maxBatchSize,
 		e.sendTimeout,
+		e.retryCfg,
+		e.metrics,
 		e.eng,
 	)
 
-	e.eng.GoTick(timeutil.NewTicker(func() time.Duration {
-		return e.sendInterval
-	}), worker.Send)
+	sendInterval := e.sendInterval
+	drainTimeout := e.drainTimeout
+	e.eng.Go(func(ctx context.Context) {
+		ticker := time.NewTicker(sendInterval)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-ticker.C:
+				worker.Send(ctx)
+			case <-ctx.Done():
+				worker.drain(drainTimeout)
+				return
+			}
+		}
+	})
 
 	e.workers[workerKey] = worker
 	return worker
 }
+
+func newBatchEmitterMetrics(meter otelmetric.Meter) (batchEmitterMetrics, error) {
+	eventsSent, err := meter.Int64Counter("chip_ingress.events_sent",
+		otelmetric.WithDescription("Total events successfully sent via PublishBatch"),
+		otelmetric.WithUnit("{event}"))
+	if err != nil {
+		return batchEmitterMetrics{}, err
+	}
+
+	eventsDropped, err := meter.Int64Counter("chip_ingress.events_dropped",
+		otelmetric.WithDescription("Total events dropped (buffer full or retries exhausted)"),
+		otelmetric.WithUnit("{event}"))
+	if err != nil {
+		return batchEmitterMetrics{}, err
+	}
+
+	batchRetries, err := meter.Int64Counter("chip_ingress.batch_retries",
+		otelmetric.WithDescription("Total PublishBatch retry attempts"),
+		otelmetric.WithUnit("{attempt}"))
+	if err != nil {
+		return batchEmitterMetrics{}, err
+	}
+
+	batchFailures, err := meter.Int64Counter("chip_ingress.batch_failures",
+		otelmetric.WithDescription("Total batches that failed after all retries"),
+		otelmetric.WithUnit("{batch}"))
+	if err != nil {
+		return batchEmitterMetrics{}, err
+	}
+
+	eventsDrained, err := meter.Int64Counter("chip_ingress.events_drained",
+		otelmetric.WithDescription("Total events flushed during graceful shutdown"),
+		otelmetric.WithUnit("{event}"))
+	if err != nil {
+		return batchEmitterMetrics{}, err
+	}
+
+	return batchEmitterMetrics{
+		eventsSent:    eventsSent,
+		eventsDropped: eventsDropped,
+		batchRetries:  batchRetries,
+		batchFailures: batchFailures,
+		eventsDrained: eventsDrained,
+	}, nil
+}
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
index ec2ac01373..98f6808752 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -61,6 +61,11 @@ func TestChipIngressBatchEmitter_Emit(t *testing.T) {
 
 	t.Run("enqueues and does not call PublishBatch immediately", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, nil).
+			Maybe()
+
 		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
@@ -394,6 +399,10 @@ func TestChipIngressBatchEmitter_PublishBatchError(t *testing.T) {
 func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
 	t.Run("Emit returns context error when context is cancelled", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, nil).
+			Maybe()
 
 		cfg := newTestConfig()
 		cfg.ChipIngressBufferSize = 1
@@ -461,3 +470,398 @@ func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
 		require.Len(t, receivedBatch.Events, 1)
 	})
 }
+
+func newRetryTestConfig() beholder.Config {
+	return beholder.Config{
+		ChipIngressBufferSize:   10,
+		ChipIngressMaxBatchSize: 5,
+		ChipIngressSendInterval: 50 * time.Millisecond,
+		ChipIngressSendTimeout:  1 * time.Second,
+		ChipIngressRetryConfig: &beholder.RetryConfig{
+			InitialInterval: 10 * time.Millisecond,
+			MaxInterval:     50 * time.Millisecond,
+			MaxElapsedTime:  500 * time.Millisecond,
+		},
+		ChipIngressDrainTimeout: 5 * time.Second,
+	}
+}
+
+func TestChipIngressBatchEmitter_RetrySuccess(t *testing.T) {
+	t.Run("succeeds after transient failure", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		callCount := 0
+		totalEventsSent := 0
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				callCount++
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				if callCount <= 2 {
+					return // first 2 calls fail
+				}
+				totalEventsSent += len(batch.Events)
+			}).
+			Return(nil, assert.AnError).Times(2)
+
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				callCount++
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				totalEventsSent += len(batch.Events)
+			}).
+			Return(nil, nil)
+
+		cfg := newRetryTestConfig()
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			return totalEventsSent >= 3
+		}, 5*time.Second, 10*time.Millisecond)
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		assert.GreaterOrEqual(t, callCount, 3, "should have retried at least twice before succeeding")
+		assert.Equal(t, 3, totalEventsSent)
+	})
+}
+
+func TestChipIngressBatchEmitter_RetryExhaustion(t *testing.T) {
+	t.Run("drops events after retries exhausted", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		callCount := 0
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(_ mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				callCount++
+			}).
+			Return(nil, assert.AnError)
+
+		cfg := newRetryTestConfig()
+		cfg.ChipIngressRetryConfig = &beholder.RetryConfig{
+			InitialInterval: 10 * time.Millisecond,
+			MaxInterval:     20 * time.Millisecond,
+			MaxElapsedTime:  100 * time.Millisecond,
+		}
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		// Wait long enough for retries to exhaust
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			return callCount >= 2
+		}, 3*time.Second, 10*time.Millisecond)
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		assert.GreaterOrEqual(t, callCount, 2, "should have attempted at least 2 times before exhausting retries")
+	})
+}
+
+func TestChipIngressBatchEmitter_GracefulDrain(t *testing.T) {
+	t.Run("flushes buffered events on close", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		totalEventsSent := 0
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				totalEventsSent += len(batch.Events)
+			}).
+			Return(nil, nil)
+
+		cfg := beholder.Config{
+			ChipIngressBufferSize:   20,
+			ChipIngressMaxBatchSize: 10,
+			ChipIngressSendInterval: 1 * time.Hour, // very long interval — events won't flush via tick
+			ChipIngressSendTimeout:  5 * time.Second,
+			ChipIngressDrainTimeout: 5 * time.Second,
+		}
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 5; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		// Events are buffered but no tick has fired. Close should drain them.
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		assert.Equal(t, 5, totalEventsSent, "all buffered events should be drained on close")
+	})
+}
+
+func TestChipIngressBatchEmitter_DrainMultipleDomains(t *testing.T) {
+	t.Run("drains events from all workers on close", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		domainEntitySent := make(map[string]int)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				for _, event := range batch.Events {
+					key := event.Source + "/" + event.Type
+					domainEntitySent[key] += 1
+				}
+			}).
+			Return(nil, nil)
+
+		cfg := beholder.Config{
+			ChipIngressBufferSize:   20,
+			ChipIngressMaxBatchSize: 10,
+			ChipIngressSendInterval: 1 * time.Hour,
+			ChipIngressSendTimeout:  5 * time.Second,
+			ChipIngressDrainTimeout: 5 * time.Second,
+		}
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("workflow"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "WorkflowEvent",
+			)
+			require.NoError(t, err)
+		}
+		for i := 0; i < 2; i++ {
+			err = emitter.Emit(t.Context(), []byte("bridge"),
+				beholder.AttrKeyDomain, "data-feeds",
+				beholder.AttrKeyEntity, "BridgeStatus",
+			)
+			require.NoError(t, err)
+		}
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		assert.Equal(t, 3, domainEntitySent["platform/WorkflowEvent"])
+		assert.Equal(t, 2, domainEntitySent["data-feeds/BridgeStatus"])
+	})
+}
+
+func TestChipIngressBatchEmitter_DrainPublishBatchFailure(t *testing.T) {
+	t.Run("drain continues attempting batches after failure", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		callCount := 0
+		totalEventsSent := 0
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				callCount++
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				if callCount == 1 {
+					return // first call fails
+				}
+				totalEventsSent += len(batch.Events)
+			}).
+			Return(nil, assert.AnError).Once()
+
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				callCount++
+				batch := args.Get(1).(*chipingress.CloudEventBatch)
+				totalEventsSent += len(batch.Events)
+			}).
+			Return(nil, nil)
+
+		cfg := beholder.Config{
+			ChipIngressBufferSize:   20,
+			ChipIngressMaxBatchSize: 3,
+			ChipIngressSendInterval: 1 * time.Hour,
+			ChipIngressSendTimeout:  5 * time.Second,
+			ChipIngressDrainTimeout: 5 * time.Second,
+		}
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		// Emit 6 events with maxBatchSize=3 => 2 batches during drain
+		for i := 0; i < 6; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		require.NoError(t, emitter.Close())
+
+		mu.Lock()
+		defer mu.Unlock()
+		assert.GreaterOrEqual(t, callCount, 2, "drain should have attempted at least 2 batches")
+		assert.Equal(t, 3, totalEventsSent, "second batch should have succeeded despite first batch failure")
+	})
+}
+
+func TestChipIngressBatchEmitter_DrainTimeout(t *testing.T) {
+	t.Run("close returns promptly when drain timeout expires", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(args mock.Arguments) {
+				ctx := args.Get(0).(context.Context)
+				<-ctx.Done() // simulate a slow server that only returns on context cancellation
+			}).
+			Return(nil, context.DeadlineExceeded).
+			Maybe()
+
+		cfg := beholder.Config{
+			ChipIngressBufferSize:   20,
+			ChipIngressMaxBatchSize: 10,
+			ChipIngressSendInterval: 1 * time.Hour,
+			ChipIngressSendTimeout:  10 * time.Second,
+			ChipIngressDrainTimeout: 200 * time.Millisecond,
+		}
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 5; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		closeDone := make(chan error, 1)
+		go func() {
+			closeDone <- emitter.Close()
+		}()
+
+		select {
+		case err := <-closeDone:
+			assert.NoError(t, err, "close should not error")
+		case <-time.After(5 * time.Second):
+			t.Fatal("Close() did not return within 5s; drain timeout is not working")
+		}
+	})
+}
+
+func TestChipIngressBatchEmitter_RetryShutdownDuringBackoff(t *testing.T) {
+	t.Run("shutdown during retry backoff completes promptly", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		var mu sync.Mutex
+		callCount := 0
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(_ mock.Arguments) {
+				mu.Lock()
+				defer mu.Unlock()
+				callCount++
+			}).
+			Return(nil, assert.AnError)
+
+		cfg := beholder.Config{
+			ChipIngressBufferSize:   10,
+			ChipIngressMaxBatchSize: 5,
+			ChipIngressSendInterval: 50 * time.Millisecond,
+			ChipIngressSendTimeout:  1 * time.Second,
+			ChipIngressRetryConfig: &beholder.RetryConfig{
+				InitialInterval: 10 * time.Second, // very long backoff
+				MaxInterval:     30 * time.Second,
+				MaxElapsedTime:  1 * time.Minute,
+			},
+			ChipIngressDrainTimeout: 5 * time.Second,
+		}
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		for i := 0; i < 3; i++ {
+			err = emitter.Emit(t.Context(), []byte("body"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+			require.NoError(t, err)
+		}
+
+		// Wait for at least one PublishBatch attempt (which will fail and enter 10s backoff)
+		assert.Eventually(t, func() bool {
+			mu.Lock()
+			defer mu.Unlock()
+			return callCount >= 1
+		}, 2*time.Second, 10*time.Millisecond)
+
+		// Close while the worker is sleeping in the 10s retry backoff
+		closeDone := make(chan error, 1)
+		go func() {
+			closeDone <- emitter.Close()
+		}()
+
+		select {
+		case err := <-closeDone:
+			assert.NoError(t, err, "close should not error")
+		case <-time.After(3 * time.Second):
+			t.Fatal("Close() did not return within 3s; timer.Stop() during retry backoff is not working")
+		}
+	})
+}
diff --git a/pkg/beholder/chip_ingress_emitter_worker.go b/pkg/beholder/chip_ingress_emitter_worker.go
index 6b9c61759e..2df4ab130b 100644
--- a/pkg/beholder/chip_ingress_emitter_worker.go
+++ b/pkg/beholder/chip_ingress_emitter_worker.go
@@ -3,9 +3,13 @@ package beholder
 import (
 	"context"
 	"fmt"
+	"math/rand/v2"
 	"sync/atomic"
 	"time"
 
+	"go.opentelemetry.io/otel/attribute"
+	otelmetric "go.opentelemetry.io/otel/metric"
+
 	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
 	"github.com/smartcontractkit/chainlink-common/pkg/logger"
 )
@@ -29,6 +33,12 @@ type chipIngressEmitterWorker struct {
 	sendTimeout  time.Duration
 	lggr         logger.Logger
 	dropCount    atomic.Uint32
+
+	retryInitialInterval time.Duration
+	retryMaxInterval     time.Duration
+	retryMaxElapsed      time.Duration
+
+	metrics batchEmitterMetrics
 }
 
 func newChipIngressEmitterWorker(
@@ -38,9 +48,11 @@ func newChipIngressEmitterWorker(
 	entity string,
 	maxBatchSize uint,
 	sendTimeout time.Duration,
+	retryCfg *RetryConfig,
+	metrics batchEmitterMetrics,
 	lggr logger.Logger,
 ) *chipIngressEmitterWorker {
-	return &chipIngressEmitterWorker{
+	w := &chipIngressEmitterWorker{
 		client:       client,
 		ch:           ch,
 		domain:       domain,
@@ -48,10 +60,18 @@ func newChipIngressEmitterWorker(
 		maxBatchSize: maxBatchSize,
 		sendTimeout:  sendTimeout,
 		lggr:         logger.Named(lggr, "ChipIngressEmitterWorker"),
+		metrics:      metrics,
 	}
+	if retryCfg != nil && retryCfg.Enabled() {
+		w.retryInitialInterval = retryCfg.InitialInterval
+		w.retryMaxInterval = retryCfg.MaxInterval
+		w.retryMaxElapsed = retryCfg.MaxElapsedTime
+	}
+	return w
 }
 
-// Send drains the channel and sends a batch. Called periodically by GoTick.
+// Send drains the channel and sends a batch with retry on failure.
+// Called periodically by the tick loop.
 func (w *chipIngressEmitterWorker) Send(ctx context.Context) {
 	if len(w.ch) == 0 {
 		return
@@ -62,25 +82,84 @@ func (w *chipIngressEmitterWorker) Send(ctx context.Context) {
 		return
 	}
 
-	ctx, cancel := context.WithTimeout(ctx, w.sendTimeout)
+	if w.retryMaxElapsed == 0 {
+		w.publishOnce(ctx, batch)
+		return
+	}
+
+	backoff := w.retryInitialInterval
+	deadline := time.Now().Add(w.retryMaxElapsed)
+
+	batchSize := int64(len(batch.Events))
+	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
+
+	for attempt := 0; ; attempt++ {
+		sendCtx, cancel := context.WithTimeout(ctx, w.sendTimeout)
+		_, err := w.client.PublishBatch(sendCtx, batch)
+		cancel()
+
+		if err == nil {
+			w.metrics.eventsSent.Add(context.Background(), batchSize, metricAttrs)
+			return
+		}
+
+		w.lggr.Warnf("PublishBatch failed (attempt %d, domain=%s, entity=%s): %v",
+			attempt+1, w.domain, w.entity, err)
+		w.metrics.batchRetries.Add(context.Background(), 1, metricAttrs)
+
+		if time.Now().Add(backoff).After(deadline) {
+			w.lggr.Warnf("PublishBatch retries exhausted, dropping %d events (domain=%s, entity=%s)",
+				len(batch.Events), w.domain, w.entity)
+			w.metrics.batchFailures.Add(context.Background(), 1, metricAttrs)
+			w.metrics.eventsDropped.Add(context.Background(), batchSize, metricAttrs)
+			return
+		}
+
+		timer := time.NewTimer(backoff)
+		select {
+		case <-ctx.Done():
+			timer.Stop()
+			w.lggr.Warnf("context cancelled during retry, dropping %d events (domain=%s, entity=%s)",
+				len(batch.Events), w.domain, w.entity)
+			w.metrics.eventsDropped.Add(context.Background(), batchSize, metricAttrs)
+			return
+		case <-timer.C:
+		}
+
+		backoff = min(backoff*2, w.retryMaxInterval)
+		jitter := time.Duration(rand.Int64N(int64(backoff) / 5)) //nolint:gosec
+		backoff += jitter
+	}
+}
+
+func (w *chipIngressEmitterWorker) publishOnce(ctx context.Context, batch *chipingress.CloudEventBatch) {
+	sendCtx, cancel := context.WithTimeout(ctx, w.sendTimeout)
 	defer cancel()
 
-	_, err := w.client.PublishBatch(ctx, batch)
+	batchSize := int64(len(batch.Events))
+	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
+	_, err := w.client.PublishBatch(sendCtx, batch)
 	if err != nil {
-		w.lggr.Warnf("could not send batch via chip ingress: %v", err)
+		w.lggr.Warnf("could not send batch via chip ingress (domain=%s, entity=%s): %v",
+			w.domain, w.entity, err)
+		w.metrics.batchFailures.Add(context.Background(), 1, metricAttrs)
+		w.metrics.eventsDropped.Add(context.Background(), batchSize, metricAttrs)
 		return
 	}
+	w.metrics.eventsSent.Add(context.Background(), batchSize, metricAttrs)
 }
 
 // buildBatch drains the channel up to maxBatchSize and converts payloads to a CloudEventBatch.
 func (w *chipIngressEmitterWorker) buildBatch() *chipingress.CloudEventBatch {
 	var events []chipingress.CloudEvent
+	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
 
 	for len(w.ch) > 0 && len(events) < int(w.maxBatchSize) { // #nosec G115
 		payload := <-w.ch
 		event, err := w.payloadToEvent(payload)
 		if err != nil {
 			w.lggr.Warnf("failed to build CloudEvent, dropping: %v", err)
+			w.metrics.eventsDropped.Add(context.Background(), 1, metricAttrs)
 			continue
 		}
 		events = append(events, event)
@@ -93,6 +172,7 @@ func (w *chipIngressEmitterWorker) buildBatch() *chipingress.CloudEventBatch {
 	batch, err := chipingress.EventsToBatch(events)
 	if err != nil {
 		w.lggr.Warnf("failed to convert events to batch: %v", err)
+		w.metrics.eventsDropped.Add(context.Background(), int64(len(events)), metricAttrs)
 		return nil
 	}
 
@@ -107,12 +187,66 @@ func (w *chipIngressEmitterWorker) payloadToEvent(payload emitterPayload) (chipi
 	return event, nil
 }
 
+// drain flushes all remaining buffered events before shutdown.
+// Uses a fresh context with the given timeout (independent of the cancelled parent).
+// Continues attempting subsequent batches even if one fails.
+func (w *chipIngressEmitterWorker) drain(timeout time.Duration) {
+	if len(w.ch) == 0 {
+		return
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+
+	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
+	w.lggr.Infof("draining %d buffered events (domain=%s, entity=%s)", len(w.ch), w.domain, w.entity)
+
+	for len(w.ch) > 0 {
+		if ctx.Err() != nil {
+			remaining := len(w.ch)
+			if remaining > 0 {
+				w.lggr.Warnf("drain timeout exceeded, dropping %d remaining events (domain=%s, entity=%s)",
+					remaining, w.domain, w.entity)
+				w.metrics.eventsDropped.Add(context.Background(), int64(remaining), metricAttrs)
+			}
+			return
+		}
+
+		batch := w.buildBatch()
+		if batch == nil || len(batch.Events) == 0 {
+			break
+		}
+
+		batchSize := int64(len(batch.Events))
+		sendCtx, sendCancel := context.WithTimeout(ctx, w.sendTimeout)
+		_, err := w.client.PublishBatch(sendCtx, batch)
+		sendCancel()
+
+		if err != nil {
+			w.lggr.Warnf("drain PublishBatch failed, dropping %d events (domain=%s, entity=%s): %v",
+				len(batch.Events), w.domain, w.entity, err)
+			w.metrics.eventsDropped.Add(context.Background(), batchSize, metricAttrs)
+			continue
+		}
+
+		w.metrics.eventsDrained.Add(context.Background(), batchSize, metricAttrs)
+	}
+}
+
 // logBufferFullWithExpBackoff logs at 1, 2, 4, 8, 16, 32, 64, 100, 200, 300, ...
 // to avoid flooding logs when the buffer is persistently full.
 func (w *chipIngressEmitterWorker) logBufferFullWithExpBackoff(payload emitterPayload) {
+	w.metrics.eventsDropped.Add(context.Background(), 1, otelmetric.WithAttributeSet(w.metricAttributes()))
 	count := w.dropCount.Add(1)
 	if count > 0 && (count%100 == 0 || count&(count-1) == 0) {
 		w.lggr.Warnf("chip ingress emitter buffer full, dropping event (domain=%s, entity=%s, droppedCount=%d)",
 			payload.domain, payload.entity, count)
 	}
 }
+
+func (w *chipIngressEmitterWorker) metricAttributes() attribute.Set {
+	return attribute.NewSet(
+		attribute.String("domain", w.domain),
+		attribute.String("entity", w.entity),
+	)
+}
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 06b877279e..27dd6e8f0c 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -45,11 +45,13 @@ type Config struct {
 	ChipIngressInsecureConnection  bool // Disables TLS for Chip Ingress Emitter
 
 	// Chip Ingress Batch Emitter
-	ChipIngressBatchEmitterEnabled bool          // When true (default), use batch emitter; when false, use legacy per-event emitter
-	ChipIngressBufferSize          uint          // Per-worker channel buffer size (default 100)
-	ChipIngressMaxBatchSize        uint          // Max events per PublishBatch call (default 50)
-	ChipIngressSendInterval        time.Duration // Flush interval per worker (default 500ms when zero or unset)
-	ChipIngressSendTimeout         time.Duration // Timeout per PublishBatch call (default 10s)
+	ChipIngressBatchEmitterEnabled  bool          // When true (default), use batch emitter; when false, use legacy per-event emitter
+	ChipIngressBufferSize           uint          // Per-worker channel buffer size (default 100)
+	ChipIngressMaxBatchSize         uint          // Max events per PublishBatch call (default 50)
+	ChipIngressSendInterval         time.Duration // Flush interval per worker (default 500ms when zero or unset)
+	ChipIngressSendTimeout          time.Duration // Timeout per PublishBatch call (default 10s)
+	ChipIngressRetryConfig          *RetryConfig  // Retry config for failed PublishBatch calls (defaults match OTel SDK: 5s/30s/1m)
+	ChipIngressDrainTimeout         time.Duration // Max time to flush remaining events on shutdown (default 5s)
 
 	// OTel Log
 	LogExportTimeout      time.Duration
@@ -143,9 +145,11 @@ func DefaultConfig() Config {
 		// Chip Ingress Batch Emitter
 		ChipIngressBatchEmitterEnabled: false,
 		ChipIngressBufferSize:          100,
-		ChipIngressMaxBatchSize: 50,
-		ChipIngressSendInterval: 500 * time.Millisecond,
-		ChipIngressSendTimeout:  10 * time.Second,
+		ChipIngressMaxBatchSize:        50,
+		ChipIngressSendInterval:        500 * time.Millisecond,
+		ChipIngressSendTimeout:         10 * time.Second,
+		ChipIngressRetryConfig:         defaultRetryConfig.Copy(),
+		ChipIngressDrainTimeout:        5 * time.Second,
 		// Auth (defaults to static auth mode with TTL=0)
 		AuthHeadersTTL: 0,
 	}
@@ -157,9 +161,10 @@ func TestDefaultConfig() Config {
 	config.EmitterBatchProcessor = false
 	config.LogBatchProcessor = false
 	// Retries are disabled for testing
-	config.LogRetryConfig.MaxElapsedTime = 0    // Retry is disabled
-	config.TraceRetryConfig.MaxElapsedTime = 0  // Retry is disabled
-	config.MetricRetryConfig.MaxElapsedTime = 0 // Retry is disabled
+	config.LogRetryConfig.MaxElapsedTime = 0          // Retry is disabled
+	config.TraceRetryConfig.MaxElapsedTime = 0        // Retry is disabled
+	config.MetricRetryConfig.MaxElapsedTime = 0       // Retry is disabled
+	config.ChipIngressRetryConfig.MaxElapsedTime = 0  // Retry is disabled
 	// Auth disabled for testing (TTL=0 means static auth mode)
 	config.AuthHeadersTTL = 0
 	return config
diff --git a/pkg/beholder/config_test.go b/pkg/beholder/config_test.go
index 024ba805d2..f98f77bc60 100644
--- a/pkg/beholder/config_test.go
+++ b/pkg/beholder/config_test.go
@@ -67,6 +67,6 @@ func ExampleConfig() {
 	}
 	fmt.Printf("%+v\n", *config.LogRetryConfig)
 	// Output:
-	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
+	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressRetryConfig:<nil> ChipIngressDrainTimeout:0s LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
 	// {InitialInterval:5s MaxInterval:30s MaxElapsedTime:1m0s}
 }

From c57a9b048ddb5e0dcb3af1c71091bf657c1241ec Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Wed, 4 Mar 2026 13:53:09 +0200
Subject: [PATCH 09/28] Use idiomatic select

---
 pkg/beholder/chip_ingress_emitter_worker.go | 22 +++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/pkg/beholder/chip_ingress_emitter_worker.go b/pkg/beholder/chip_ingress_emitter_worker.go
index 2df4ab130b..f706de6fc7 100644
--- a/pkg/beholder/chip_ingress_emitter_worker.go
+++ b/pkg/beholder/chip_ingress_emitter_worker.go
@@ -154,15 +154,21 @@ func (w *chipIngressEmitterWorker) buildBatch() *chipingress.CloudEventBatch {
 	var events []chipingress.CloudEvent
 	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
 
-	for len(w.ch) > 0 && len(events) < int(w.maxBatchSize) { // #nosec G115
-		payload := <-w.ch
-		event, err := w.payloadToEvent(payload)
-		if err != nil {
-			w.lggr.Warnf("failed to build CloudEvent, dropping: %v", err)
-			w.metrics.eventsDropped.Add(context.Background(), 1, metricAttrs)
-			continue
+	max := int(w.maxBatchSize) // #nosec G115
+drain:
+	for len(events) < max {
+		select {
+		case payload := <-w.ch:
+			event, err := w.payloadToEvent(payload)
+			if err != nil {
+				w.lggr.Warnf("failed to build CloudEvent, dropping: %v", err)
+				w.metrics.eventsDropped.Add(context.Background(), 1, metricAttrs)
+				continue
+			}
+			events = append(events, event)
+		default:
+			break drain
 		}
-		events = append(events, event)
 	}
 
 	if len(events) == 0 {

From b55aafd2e5a2f3c3f0418573786d665424eb842b Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Wed, 4 Mar 2026 16:10:39 +0200
Subject: [PATCH 10/28] Various fixes

---
 pkg/beholder/chip_ingress_batch_emitter.go    | 46 ++++++++--------
 .../chip_ingress_batch_emitter_test.go        | 22 ++++++++
 pkg/beholder/chip_ingress_emitter_worker.go   | 54 +++++++++----------
 pkg/beholder/client.go                        | 10 +++-
 pkg/beholder/config.go                        |  2 +-
 5 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index de41f01b5f..f2e92c1ff8 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -101,33 +101,37 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 // Emit extracts (domain, entity) from the attributes, routes the event to the
 // appropriate per-(domain, entity) worker, and returns immediately.
 // If the worker's buffer is full, the event is dropped and a warning is logged.
+// Returns an error if the emitter has been closed.
 func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
-	domain, entity, err := ExtractSourceAndType(attrKVs...)
-	if err != nil {
-		return err
-	}
+	return e.eng.IfNotStopped(func() error {
+		domain, entity, err := ExtractSourceAndType(attrKVs...)
+		if err != nil {
+			return err
+		}
 
-	attributes := newAttributes(attrKVs...)
+		attributes := newAttributes(attrKVs...)
 
-	worker := e.findOrCreateWorker(domain, entity)
+		worker := e.findOrCreateWorker(domain, entity)
 
-	payload := emitterPayload{
-		body:       body,
-		attributes: attributes,
-		domain:     domain,
-		entity:     entity,
-	}
+		payload := emitterPayload{
+			body:       body,
+			attributes: attributes,
+			domain:     domain,
+			entity:     entity,
+		}
 
-	select {
-	case worker.ch <- payload:
-		worker.dropCount.Store(0)
-	case <-ctx.Done():
-		return ctx.Err()
-	default:
-		worker.logBufferFullWithExpBackoff(payload)
-	}
+		select {
+		case worker.ch <- payload:
+			// Intentionally racy with logBufferFullWithExpBackoff — only affects log frequency, not correctness.
+			worker.dropCount.Store(0)
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+			worker.logBufferFullWithExpBackoff(payload)
+		}
 
-	return nil
+		return nil
+	})
 }
 
 // findOrCreateWorker returns the worker for the given (domain, entity) pair,
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
index 98f6808752..f827a04398 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -29,6 +29,7 @@ func newTestLogger(t *testing.T) logger.Logger {
 	t.Helper()
 	lggr, err := logger.New()
 	require.NoError(t, err)
+	t.Cleanup(func() { _ = lggr.Sync() })
 	return lggr
 }
 
@@ -865,3 +866,24 @@ func TestChipIngressBatchEmitter_RetryShutdownDuringBackoff(t *testing.T) {
 		}
 	})
 }
+
+func TestChipIngressBatchEmitter_EmitAfterClose(t *testing.T) {
+	t.Run("Emit after Close returns error", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, nil).
+			Maybe()
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+		require.NoError(t, emitter.Close())
+
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		assert.Error(t, err, "Emit after Close should return an error")
+	})
+}
diff --git a/pkg/beholder/chip_ingress_emitter_worker.go b/pkg/beholder/chip_ingress_emitter_worker.go
index f706de6fc7..87eab18117 100644
--- a/pkg/beholder/chip_ingress_emitter_worker.go
+++ b/pkg/beholder/chip_ingress_emitter_worker.go
@@ -38,7 +38,8 @@ type chipIngressEmitterWorker struct {
 	retryMaxInterval     time.Duration
 	retryMaxElapsed      time.Duration
 
-	metrics batchEmitterMetrics
+	metrics     batchEmitterMetrics
+	metricAttrs otelmetric.MeasurementOption
 }
 
 func newChipIngressEmitterWorker(
@@ -61,6 +62,10 @@ func newChipIngressEmitterWorker(
 		sendTimeout:  sendTimeout,
 		lggr:         logger.Named(lggr, "ChipIngressEmitterWorker"),
 		metrics:      metrics,
+		metricAttrs: otelmetric.WithAttributeSet(attribute.NewSet(
+			attribute.String("domain", domain),
+			attribute.String("entity", entity),
+		)),
 	}
 	if retryCfg != nil && retryCfg.Enabled() {
 		w.retryInitialInterval = retryCfg.InitialInterval
@@ -91,27 +96,25 @@ func (w *chipIngressEmitterWorker) Send(ctx context.Context) {
 	deadline := time.Now().Add(w.retryMaxElapsed)
 
 	batchSize := int64(len(batch.Events))
-	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
-
 	for attempt := 0; ; attempt++ {
 		sendCtx, cancel := context.WithTimeout(ctx, w.sendTimeout)
 		_, err := w.client.PublishBatch(sendCtx, batch)
 		cancel()
 
 		if err == nil {
-			w.metrics.eventsSent.Add(context.Background(), batchSize, metricAttrs)
+			w.metrics.eventsSent.Add(context.Background(), batchSize, w.metricAttrs)
 			return
 		}
 
 		w.lggr.Warnf("PublishBatch failed (attempt %d, domain=%s, entity=%s): %v",
 			attempt+1, w.domain, w.entity, err)
-		w.metrics.batchRetries.Add(context.Background(), 1, metricAttrs)
+		w.metrics.batchRetries.Add(context.Background(), 1, w.metricAttrs)
 
 		if time.Now().Add(backoff).After(deadline) {
 			w.lggr.Warnf("PublishBatch retries exhausted, dropping %d events (domain=%s, entity=%s)",
 				len(batch.Events), w.domain, w.entity)
-			w.metrics.batchFailures.Add(context.Background(), 1, metricAttrs)
-			w.metrics.eventsDropped.Add(context.Background(), batchSize, metricAttrs)
+			w.metrics.batchFailures.Add(context.Background(), 1, w.metricAttrs)
+			w.metrics.eventsDropped.Add(context.Background(), batchSize, w.metricAttrs)
 			return
 		}
 
@@ -121,14 +124,14 @@ func (w *chipIngressEmitterWorker) Send(ctx context.Context) {
 			timer.Stop()
 			w.lggr.Warnf("context cancelled during retry, dropping %d events (domain=%s, entity=%s)",
 				len(batch.Events), w.domain, w.entity)
-			w.metrics.eventsDropped.Add(context.Background(), batchSize, metricAttrs)
+			w.metrics.eventsDropped.Add(context.Background(), batchSize, w.metricAttrs)
 			return
 		case <-timer.C:
 		}
 
-		backoff = min(backoff*2, w.retryMaxInterval)
-		jitter := time.Duration(rand.Int64N(int64(backoff) / 5)) //nolint:gosec
-		backoff += jitter
+		next := backoff * 2
+		jitter := time.Duration(rand.Int64N(int64(next)/5 + 1)) //nolint:gosec
+		backoff = min(next+jitter, w.retryMaxInterval)
 	}
 }
 
@@ -137,22 +140,20 @@ func (w *chipIngressEmitterWorker) publishOnce(ctx context.Context, batch *chipi
 	defer cancel()
 
 	batchSize := int64(len(batch.Events))
-	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
 	_, err := w.client.PublishBatch(sendCtx, batch)
 	if err != nil {
 		w.lggr.Warnf("could not send batch via chip ingress (domain=%s, entity=%s): %v",
 			w.domain, w.entity, err)
-		w.metrics.batchFailures.Add(context.Background(), 1, metricAttrs)
-		w.metrics.eventsDropped.Add(context.Background(), batchSize, metricAttrs)
+		w.metrics.batchFailures.Add(context.Background(), 1, w.metricAttrs)
+		w.metrics.eventsDropped.Add(context.Background(), batchSize, w.metricAttrs)
 		return
 	}
-	w.metrics.eventsSent.Add(context.Background(), batchSize, metricAttrs)
+	w.metrics.eventsSent.Add(context.Background(), batchSize, w.metricAttrs)
 }
 
 // buildBatch drains the channel up to maxBatchSize and converts payloads to a CloudEventBatch.
 func (w *chipIngressEmitterWorker) buildBatch() *chipingress.CloudEventBatch {
 	var events []chipingress.CloudEvent
-	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
 
 	max := int(w.maxBatchSize) // #nosec G115
 drain:
@@ -162,7 +163,7 @@ drain:
 			event, err := w.payloadToEvent(payload)
 			if err != nil {
 				w.lggr.Warnf("failed to build CloudEvent, dropping: %v", err)
-				w.metrics.eventsDropped.Add(context.Background(), 1, metricAttrs)
+				w.metrics.eventsDropped.Add(context.Background(), 1, w.metricAttrs)
 				continue
 			}
 			events = append(events, event)
@@ -178,7 +179,7 @@ drain:
 	batch, err := chipingress.EventsToBatch(events)
 	if err != nil {
 		w.lggr.Warnf("failed to convert events to batch: %v", err)
-		w.metrics.eventsDropped.Add(context.Background(), int64(len(events)), metricAttrs)
+		w.metrics.eventsDropped.Add(context.Background(), int64(len(events)), w.metricAttrs)
 		return nil
 	}
 
@@ -204,7 +205,6 @@ func (w *chipIngressEmitterWorker) drain(timeout time.Duration) {
 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 	defer cancel()
 
-	metricAttrs := otelmetric.WithAttributeSet(w.metricAttributes())
 	w.lggr.Infof("draining %d buffered events (domain=%s, entity=%s)", len(w.ch), w.domain, w.entity)
 
 	for len(w.ch) > 0 {
@@ -213,7 +213,7 @@ func (w *chipIngressEmitterWorker) drain(timeout time.Duration) {
 			if remaining > 0 {
 				w.lggr.Warnf("drain timeout exceeded, dropping %d remaining events (domain=%s, entity=%s)",
 					remaining, w.domain, w.entity)
-				w.metrics.eventsDropped.Add(context.Background(), int64(remaining), metricAttrs)
+				w.metrics.eventsDropped.Add(context.Background(), int64(remaining), w.metricAttrs)
 			}
 			return
 		}
@@ -231,28 +231,22 @@ func (w *chipIngressEmitterWorker) drain(timeout time.Duration) {
 		if err != nil {
 			w.lggr.Warnf("drain PublishBatch failed, dropping %d events (domain=%s, entity=%s): %v",
 				len(batch.Events), w.domain, w.entity, err)
-			w.metrics.eventsDropped.Add(context.Background(), batchSize, metricAttrs)
+			w.metrics.eventsDropped.Add(context.Background(), batchSize, w.metricAttrs)
 			continue
 		}
 
-		w.metrics.eventsDrained.Add(context.Background(), batchSize, metricAttrs)
+		w.metrics.eventsDrained.Add(context.Background(), batchSize, w.metricAttrs)
 	}
 }
 
 // logBufferFullWithExpBackoff logs at 1, 2, 4, 8, 16, 32, 64, 100, 200, 300, ...
 // to avoid flooding logs when the buffer is persistently full.
+// dropCount is intentionally racy with Emit's Store(0) — this only affects log frequency, not correctness.
 func (w *chipIngressEmitterWorker) logBufferFullWithExpBackoff(payload emitterPayload) {
-	w.metrics.eventsDropped.Add(context.Background(), 1, otelmetric.WithAttributeSet(w.metricAttributes()))
+	w.metrics.eventsDropped.Add(context.Background(), 1, w.metricAttrs)
 	count := w.dropCount.Add(1)
 	if count > 0 && (count%100 == 0 || count&(count-1) == 0) {
 		w.lggr.Warnf("chip ingress emitter buffer full, dropping event (domain=%s, entity=%s, droppedCount=%d)",
 			payload.domain, payload.entity, count)
 	}
 }
-
-func (w *chipIngressEmitterWorker) metricAttributes() attribute.Set {
-	return attribute.NewSet(
-		attribute.String("domain", w.domain),
-		attribute.String("entity", w.entity),
-	)
-}
diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index eaa268223f..e042368f20 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -226,6 +226,8 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 
 		var chipIngressEmitter Emitter
 		if cfg.ChipIngressBatchEmitterEnabled {
+			// TODO: accept a logger from the caller instead of creating a new root logger,
+			// so batch emitter logs respect the node's logging configuration.
 			lggr, lErr := ccllogger.New()
 			if lErr != nil {
 				return nil, fmt.Errorf("failed to create logger for chip ingress batch emitter: %w", lErr)
@@ -247,13 +249,17 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 
 		emitter, err = NewDualSourceEmitter(chipIngressEmitter, emitter)
 		if err != nil {
+			if batchEmitterService != nil {
+				_ = batchEmitterService.Close()
+			}
 			return nil, fmt.Errorf("failed to create dual source emitter: %w", err)
 		}
 	}
 
 	onClose := func() (err error) {
-		// batchEmitterService is closed via DualSourceEmitter.Close() -> chipIngressEmitter.Close(),
-		// which is called by Client.Close() -> c.Emitter.Close() before OnClose runs.
+		if batchEmitterService != nil {
+			err = errors.Join(err, batchEmitterService.Close())
+		}
 		for _, provider := range []shutdowner{messageLoggerProvider, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider} {
 			err = errors.Join(err, provider.Shutdown(context.Background()))
 		}
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 27dd6e8f0c..6f9af30d91 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -45,7 +45,7 @@ type Config struct {
 	ChipIngressInsecureConnection  bool // Disables TLS for Chip Ingress Emitter
 
 	// Chip Ingress Batch Emitter
-	ChipIngressBatchEmitterEnabled  bool          // When true (default), use batch emitter; when false, use legacy per-event emitter
+	ChipIngressBatchEmitterEnabled  bool          // When true, use batch emitter; when false (default), use legacy per-event emitter
 	ChipIngressBufferSize           uint          // Per-worker channel buffer size (default 100)
 	ChipIngressMaxBatchSize         uint          // Max events per PublishBatch call (default 50)
 	ChipIngressSendInterval         time.Duration // Flush interval per worker (default 500ms when zero or unset)

From 58a8dd93d12bd8a93e6a701f624b2d6a3d39793b Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Wed, 4 Mar 2026 20:54:58 +0200
Subject: [PATCH 11/28] Update defaults

---
 pkg/beholder/config.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 6f9af30d91..3d8758b677 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -50,7 +50,7 @@ type Config struct {
 	ChipIngressMaxBatchSize         uint          // Max events per PublishBatch call (default 50)
 	ChipIngressSendInterval         time.Duration // Flush interval per worker (default 500ms when zero or unset)
 	ChipIngressSendTimeout          time.Duration // Timeout per PublishBatch call (default 10s)
-	ChipIngressRetryConfig          *RetryConfig  // Retry config for failed PublishBatch calls (defaults match OTel SDK: 5s/30s/1m)
+	ChipIngressRetryConfig          *RetryConfig  // Retry config for failed PublishBatch calls (defaults: 500ms/5s/30s)
 	ChipIngressDrainTimeout         time.Duration // Max time to flush remaining events on shutdown (default 5s)
 
 	// OTel Log
@@ -99,6 +99,14 @@ var defaultRetryConfig = RetryConfig{
 	MaxElapsedTime:  1 * time.Minute, // Retry is enabled
 }
 
+// Retry defaults for the chip ingress batch emitter: faster backoff than OTel
+// to avoid buffer pressure on the small per-worker channel.
+var defaultChipIngressRetryConfig = RetryConfig{
+	InitialInterval: 500 * time.Millisecond,
+	MaxInterval:     5 * time.Second,
+	MaxElapsedTime:  5 * time.Second,
+}
+
 const (
 	defaultPackageName = "beholder"
 )
@@ -148,7 +156,7 @@ func DefaultConfig() Config {
 		ChipIngressMaxBatchSize:        50,
 		ChipIngressSendInterval:        500 * time.Millisecond,
 		ChipIngressSendTimeout:         10 * time.Second,
-		ChipIngressRetryConfig:         defaultRetryConfig.Copy(),
+		ChipIngressRetryConfig:         defaultChipIngressRetryConfig.Copy(),
 		ChipIngressDrainTimeout:        5 * time.Second,
 		// Auth (defaults to static auth mode with TTL=0)
 		AuthHeadersTTL: 0,

From 7871a2c844aba78269380f7dd8bdd5f2b3aac80c Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Wed, 4 Mar 2026 21:45:46 +0200
Subject: [PATCH 12/28] Change default timeout to 5s

---
 pkg/beholder/chip_ingress_batch_emitter.go | 2 +-
 pkg/beholder/config.go                     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index f2e92c1ff8..27b8d727e6 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -66,7 +66,7 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	}
 	sendTimeout := cfg.ChipIngressSendTimeout
 	if sendTimeout == 0 {
-		sendTimeout = 10 * time.Second
+		sendTimeout = 5 * time.Second
 	}
 	drainTimeout := cfg.ChipIngressDrainTimeout
 	if drainTimeout == 0 {
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 3d8758b677..0b8b6129a5 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -49,7 +49,7 @@ type Config struct {
 	ChipIngressBufferSize           uint          // Per-worker channel buffer size (default 100)
 	ChipIngressMaxBatchSize         uint          // Max events per PublishBatch call (default 50)
 	ChipIngressSendInterval         time.Duration // Flush interval per worker (default 500ms when zero or unset)
-	ChipIngressSendTimeout          time.Duration // Timeout per PublishBatch call (default 10s)
+	ChipIngressSendTimeout          time.Duration // Timeout per PublishBatch call (default 5s)
 	ChipIngressRetryConfig          *RetryConfig  // Retry config for failed PublishBatch calls (defaults: 500ms/5s/30s)
 	ChipIngressDrainTimeout         time.Duration // Max time to flush remaining events on shutdown (default 5s)
 
@@ -155,7 +155,7 @@ func DefaultConfig() Config {
 		ChipIngressBufferSize:          100,
 		ChipIngressMaxBatchSize:        50,
 		ChipIngressSendInterval:        500 * time.Millisecond,
-		ChipIngressSendTimeout:         10 * time.Second,
+		ChipIngressSendTimeout:         5 * time.Second,
 		ChipIngressRetryConfig:         defaultChipIngressRetryConfig.Copy(),
 		ChipIngressDrainTimeout:        5 * time.Second,
 		// Auth (defaults to static auth mode with TTL=0)

From d7f42dca74544375ba8329f7c3c459c67922fa42 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Thu, 5 Mar 2026 13:36:06 +0200
Subject: [PATCH 13/28] Remove retries, add max workers

---
 pkg/beholder/chip_ingress_batch_emitter.go    |  42 ++--
 .../chip_ingress_batch_emitter_test.go        | 202 +++---------------
 pkg/beholder/chip_ingress_emitter_worker.go   |  65 +-----
 pkg/beholder/config.go                        |  32 ++-
 pkg/beholder/config_test.go                   |   2 +-
 5 files changed, 71 insertions(+), 272 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index 27b8d727e6..8c0a3d4f93 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -29,20 +29,19 @@ type ChipIngressBatchEmitter struct {
 
 	bufferSize   uint
 	maxBatchSize uint
+	maxWorkers   int
 	sendInterval time.Duration
 	sendTimeout  time.Duration
-	retryCfg     *RetryConfig
 	drainTimeout time.Duration
 
 	metrics batchEmitterMetrics
 }
 
 type batchEmitterMetrics struct {
-	eventsSent      otelmetric.Int64Counter
-	eventsDropped   otelmetric.Int64Counter
-	batchRetries    otelmetric.Int64Counter
-	batchFailures   otelmetric.Int64Counter
-	eventsDrained   otelmetric.Int64Counter
+	eventsSent    otelmetric.Int64Counter
+	eventsDropped otelmetric.Int64Counter
+	batchFailures otelmetric.Int64Counter
+	eventsDrained otelmetric.Int64Counter
 }
 
 // NewChipIngressBatchEmitter creates a batch emitter backed by the given chipingress client.
@@ -60,6 +59,10 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	if maxBatchSize == 0 {
 		maxBatchSize = 50
 	}
+	maxWorkers := cfg.ChipIngressMaxWorkers
+	if maxWorkers == 0 {
+		maxWorkers = defaultMaxWorkers
+	}
 	sendInterval := cfg.ChipIngressSendInterval
 	if sendInterval == 0 {
 		sendInterval = 500 * time.Millisecond
@@ -84,9 +87,9 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 		workers:      make(map[string]*chipIngressEmitterWorker),
 		bufferSize:   bufferSize,
 		maxBatchSize: maxBatchSize,
+		maxWorkers:   maxWorkers,
 		sendInterval: sendInterval,
 		sendTimeout:  sendTimeout,
-		retryCfg:     cfg.ChipIngressRetryConfig,
 		drainTimeout: drainTimeout,
 		metrics:      metrics,
 	}
@@ -112,6 +115,9 @@ func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs
 		attributes := newAttributes(attrKVs...)
 
 		worker := e.findOrCreateWorker(domain, entity)
+		if worker == nil {
+			return nil
+		}
 
 		payload := emitterPayload{
 			body:       body,
@@ -137,7 +143,7 @@ func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs
 // findOrCreateWorker returns the worker for the given (domain, entity) pair,
 // creating one with a new buffered channel and flush goroutine if it doesn't exist.
 func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chipIngressEmitterWorker {
-	workerKey := fmt.Sprintf("%s_%s", domain, entity)
+	workerKey := domain + "/" + entity
 
 	e.workersMutex.RLock()
 	worker, found := e.workers[workerKey]
@@ -150,11 +156,16 @@ func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chi
 	e.workersMutex.Lock()
 	defer e.workersMutex.Unlock()
 
-	// Double-check after acquiring write lock
 	if worker, found = e.workers[workerKey]; found {
 		return worker
 	}
 
+	if len(e.workers) >= e.maxWorkers {
+		e.eng.Warnf("chip ingress batch emitter: max workers (%d) reached, dropping event for %s", e.maxWorkers, workerKey)
+		e.metrics.eventsDropped.Add(context.Background(), 1)
+		return nil
+	}
+
 	worker = newChipIngressEmitterWorker(
 		e.client,
 		make(chan emitterPayload, e.bufferSize),
@@ -162,7 +173,6 @@ func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chi
 		entity,
 		e.maxBatchSize,
 		e.sendTimeout,
-		e.retryCfg,
 		e.metrics,
 		e.eng,
 	)
@@ -197,21 +207,14 @@ func newBatchEmitterMetrics(meter otelmetric.Meter) (batchEmitterMetrics, error)
 	}
 
 	eventsDropped, err := meter.Int64Counter("chip_ingress.events_dropped",
-		otelmetric.WithDescription("Total events dropped (buffer full or retries exhausted)"),
+		otelmetric.WithDescription("Total events dropped (buffer full or send failure)"),
 		otelmetric.WithUnit("{event}"))
 	if err != nil {
 		return batchEmitterMetrics{}, err
 	}
 
-	batchRetries, err := meter.Int64Counter("chip_ingress.batch_retries",
-		otelmetric.WithDescription("Total PublishBatch retry attempts"),
-		otelmetric.WithUnit("{attempt}"))
-	if err != nil {
-		return batchEmitterMetrics{}, err
-	}
-
 	batchFailures, err := meter.Int64Counter("chip_ingress.batch_failures",
-		otelmetric.WithDescription("Total batches that failed after all retries"),
+		otelmetric.WithDescription("Total PublishBatch calls that failed"),
 		otelmetric.WithUnit("{batch}"))
 	if err != nil {
 		return batchEmitterMetrics{}, err
@@ -227,7 +230,6 @@ func newBatchEmitterMetrics(meter otelmetric.Meter) (batchEmitterMetrics, error)
 	return batchEmitterMetrics{
 		eventsSent:    eventsSent,
 		eventsDropped: eventsDropped,
-		batchRetries:  batchRetries,
 		batchFailures: batchFailures,
 		eventsDrained: eventsDrained,
 	}, nil
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
index f827a04398..142a5edd82 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -472,130 +472,6 @@ func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
 	})
 }
 
-func newRetryTestConfig() beholder.Config {
-	return beholder.Config{
-		ChipIngressBufferSize:   10,
-		ChipIngressMaxBatchSize: 5,
-		ChipIngressSendInterval: 50 * time.Millisecond,
-		ChipIngressSendTimeout:  1 * time.Second,
-		ChipIngressRetryConfig: &beholder.RetryConfig{
-			InitialInterval: 10 * time.Millisecond,
-			MaxInterval:     50 * time.Millisecond,
-			MaxElapsedTime:  500 * time.Millisecond,
-		},
-		ChipIngressDrainTimeout: 5 * time.Second,
-	}
-}
-
-func TestChipIngressBatchEmitter_RetrySuccess(t *testing.T) {
-	t.Run("succeeds after transient failure", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		callCount := 0
-		totalEventsSent := 0
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				callCount++
-				batch := args.Get(1).(*chipingress.CloudEventBatch)
-				if callCount <= 2 {
-					return // first 2 calls fail
-				}
-				totalEventsSent += len(batch.Events)
-			}).
-			Return(nil, assert.AnError).Times(2)
-
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				callCount++
-				batch := args.Get(1).(*chipingress.CloudEventBatch)
-				totalEventsSent += len(batch.Events)
-			}).
-			Return(nil, nil)
-
-		cfg := newRetryTestConfig()
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		for i := 0; i < 3; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
-
-		assert.Eventually(t, func() bool {
-			mu.Lock()
-			defer mu.Unlock()
-			return totalEventsSent >= 3
-		}, 5*time.Second, 10*time.Millisecond)
-
-		require.NoError(t, emitter.Close())
-
-		mu.Lock()
-		defer mu.Unlock()
-		assert.GreaterOrEqual(t, callCount, 3, "should have retried at least twice before succeeding")
-		assert.Equal(t, 3, totalEventsSent)
-	})
-}
-
-func TestChipIngressBatchEmitter_RetryExhaustion(t *testing.T) {
-	t.Run("drops events after retries exhausted", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		callCount := 0
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(_ mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				callCount++
-			}).
-			Return(nil, assert.AnError)
-
-		cfg := newRetryTestConfig()
-		cfg.ChipIngressRetryConfig = &beholder.RetryConfig{
-			InitialInterval: 10 * time.Millisecond,
-			MaxInterval:     20 * time.Millisecond,
-			MaxElapsedTime:  100 * time.Millisecond,
-		}
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		for i := 0; i < 3; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
-
-		// Wait long enough for retries to exhaust
-		assert.Eventually(t, func() bool {
-			mu.Lock()
-			defer mu.Unlock()
-			return callCount >= 2
-		}, 3*time.Second, 10*time.Millisecond)
-
-		require.NoError(t, emitter.Close())
-
-		mu.Lock()
-		defer mu.Unlock()
-		assert.GreaterOrEqual(t, callCount, 2, "should have attempted at least 2 times before exhausting retries")
-	})
-}
-
 func TestChipIngressBatchEmitter_GracefulDrain(t *testing.T) {
 	t.Run("flushes buffered events on close", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
@@ -805,65 +681,49 @@ func TestChipIngressBatchEmitter_DrainTimeout(t *testing.T) {
 	})
 }
 
-func TestChipIngressBatchEmitter_RetryShutdownDuringBackoff(t *testing.T) {
-	t.Run("shutdown during retry backoff completes promptly", func(t *testing.T) {
+func TestChipIngressBatchEmitter_MaxWorkersCap(t *testing.T) {
+	t.Run("drops events when max workers reached", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		callCount := 0
 		clientMock.
 			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(_ mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				callCount++
-			}).
-			Return(nil, assert.AnError)
+			Return(nil, nil).
+			Maybe()
 
-		cfg := beholder.Config{
-			ChipIngressBufferSize:   10,
-			ChipIngressMaxBatchSize: 5,
-			ChipIngressSendInterval: 50 * time.Millisecond,
-			ChipIngressSendTimeout:  1 * time.Second,
-			ChipIngressRetryConfig: &beholder.RetryConfig{
-				InitialInterval: 10 * time.Second, // very long backoff
-				MaxInterval:     30 * time.Second,
-				MaxElapsedTime:  1 * time.Minute,
-			},
-			ChipIngressDrainTimeout: 5 * time.Second,
-		}
+		cfg := newTestConfig()
+		cfg.ChipIngressMaxWorkers = 2
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
 		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
+		defer emitter.Close() //nolint:errcheck
 
-		for i := 0; i < 3; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
+		// Create 2 workers (at the cap)
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "domain-1",
+			beholder.AttrKeyEntity, "entity-1",
+		)
+		require.NoError(t, err)
 
-		// Wait for at least one PublishBatch attempt (which will fail and enter 10s backoff)
-		assert.Eventually(t, func() bool {
-			mu.Lock()
-			defer mu.Unlock()
-			return callCount >= 1
-		}, 2*time.Second, 10*time.Millisecond)
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "domain-2",
+			beholder.AttrKeyEntity, "entity-2",
+		)
+		require.NoError(t, err)
 
-		// Close while the worker is sleeping in the 10s retry backoff
-		closeDone := make(chan error, 1)
-		go func() {
-			closeDone <- emitter.Close()
-		}()
+		// 3rd unique pair should be silently dropped (no error, no worker created)
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "domain-3",
+			beholder.AttrKeyEntity, "entity-3",
+		)
+		assert.NoError(t, err, "Emit should not error when max workers is reached")
 
-		select {
-		case err := <-closeDone:
-			assert.NoError(t, err, "close should not error")
-		case <-time.After(3 * time.Second):
-			t.Fatal("Close() did not return within 3s; timer.Stop() during retry backoff is not working")
-		}
+		// Existing workers should still accept events
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "domain-1",
+			beholder.AttrKeyEntity, "entity-1",
+		)
+		assert.NoError(t, err, "Emit to existing worker should still work")
 	})
 }
 
diff --git a/pkg/beholder/chip_ingress_emitter_worker.go b/pkg/beholder/chip_ingress_emitter_worker.go
index 87eab18117..b2bd7b3101 100644
--- a/pkg/beholder/chip_ingress_emitter_worker.go
+++ b/pkg/beholder/chip_ingress_emitter_worker.go
@@ -3,7 +3,6 @@ package beholder
 import (
 	"context"
 	"fmt"
-	"math/rand/v2"
 	"sync/atomic"
 	"time"
 
@@ -24,6 +23,8 @@ type emitterPayload struct {
 
 // chipIngressEmitterWorker buffers events for a single (domain, entity) pair
 // and flushes them via PublishBatch on a periodic interval.
+// Transport-level retries (UNAVAILABLE, RESOURCE_EXHAUSTED) are handled by the
+// gRPC client's built-in retry policy; no application-level retry is needed.
 type chipIngressEmitterWorker struct {
 	client       chipingress.Client
 	ch           chan emitterPayload
@@ -34,10 +35,6 @@ type chipIngressEmitterWorker struct {
 	lggr         logger.Logger
 	dropCount    atomic.Uint32
 
-	retryInitialInterval time.Duration
-	retryMaxInterval     time.Duration
-	retryMaxElapsed      time.Duration
-
 	metrics     batchEmitterMetrics
 	metricAttrs otelmetric.MeasurementOption
 }
@@ -49,11 +46,10 @@ func newChipIngressEmitterWorker(
 	entity string,
 	maxBatchSize uint,
 	sendTimeout time.Duration,
-	retryCfg *RetryConfig,
 	metrics batchEmitterMetrics,
 	lggr logger.Logger,
 ) *chipIngressEmitterWorker {
-	w := &chipIngressEmitterWorker{
+	return &chipIngressEmitterWorker{
 		client:       client,
 		ch:           ch,
 		domain:       domain,
@@ -67,15 +63,9 @@ func newChipIngressEmitterWorker(
 			attribute.String("entity", entity),
 		)),
 	}
-	if retryCfg != nil && retryCfg.Enabled() {
-		w.retryInitialInterval = retryCfg.InitialInterval
-		w.retryMaxInterval = retryCfg.MaxInterval
-		w.retryMaxElapsed = retryCfg.MaxElapsedTime
-	}
-	return w
 }
 
-// Send drains the channel and sends a batch with retry on failure.
+// Send drains the channel and sends a batch.
 // Called periodically by the tick loop.
 func (w *chipIngressEmitterWorker) Send(ctx context.Context) {
 	if len(w.ch) == 0 {
@@ -87,52 +77,7 @@ func (w *chipIngressEmitterWorker) Send(ctx context.Context) {
 		return
 	}
 
-	if w.retryMaxElapsed == 0 {
-		w.publishOnce(ctx, batch)
-		return
-	}
-
-	backoff := w.retryInitialInterval
-	deadline := time.Now().Add(w.retryMaxElapsed)
-
-	batchSize := int64(len(batch.Events))
-	for attempt := 0; ; attempt++ {
-		sendCtx, cancel := context.WithTimeout(ctx, w.sendTimeout)
-		_, err := w.client.PublishBatch(sendCtx, batch)
-		cancel()
-
-		if err == nil {
-			w.metrics.eventsSent.Add(context.Background(), batchSize, w.metricAttrs)
-			return
-		}
-
-		w.lggr.Warnf("PublishBatch failed (attempt %d, domain=%s, entity=%s): %v",
-			attempt+1, w.domain, w.entity, err)
-		w.metrics.batchRetries.Add(context.Background(), 1, w.metricAttrs)
-
-		if time.Now().Add(backoff).After(deadline) {
-			w.lggr.Warnf("PublishBatch retries exhausted, dropping %d events (domain=%s, entity=%s)",
-				len(batch.Events), w.domain, w.entity)
-			w.metrics.batchFailures.Add(context.Background(), 1, w.metricAttrs)
-			w.metrics.eventsDropped.Add(context.Background(), batchSize, w.metricAttrs)
-			return
-		}
-
-		timer := time.NewTimer(backoff)
-		select {
-		case <-ctx.Done():
-			timer.Stop()
-			w.lggr.Warnf("context cancelled during retry, dropping %d events (domain=%s, entity=%s)",
-				len(batch.Events), w.domain, w.entity)
-			w.metrics.eventsDropped.Add(context.Background(), batchSize, w.metricAttrs)
-			return
-		case <-timer.C:
-		}
-
-		next := backoff * 2
-		jitter := time.Duration(rand.Int64N(int64(next)/5 + 1)) //nolint:gosec
-		backoff = min(next+jitter, w.retryMaxInterval)
-	}
+	w.publishOnce(ctx, batch)
 }
 
 func (w *chipIngressEmitterWorker) publishOnce(ctx context.Context, batch *chipingress.CloudEventBatch) {
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 0b8b6129a5..9d80177560 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -45,13 +45,13 @@ type Config struct {
 	ChipIngressInsecureConnection  bool // Disables TLS for Chip Ingress Emitter
 
 	// Chip Ingress Batch Emitter
-	ChipIngressBatchEmitterEnabled  bool          // When true, use batch emitter; when false (default), use legacy per-event emitter
-	ChipIngressBufferSize           uint          // Per-worker channel buffer size (default 100)
-	ChipIngressMaxBatchSize         uint          // Max events per PublishBatch call (default 50)
-	ChipIngressSendInterval         time.Duration // Flush interval per worker (default 500ms when zero or unset)
-	ChipIngressSendTimeout          time.Duration // Timeout per PublishBatch call (default 5s)
-	ChipIngressRetryConfig          *RetryConfig  // Retry config for failed PublishBatch calls (defaults: 500ms/5s/30s)
-	ChipIngressDrainTimeout         time.Duration // Max time to flush remaining events on shutdown (default 5s)
+	ChipIngressBatchEmitterEnabled bool          // When true, use batch emitter; when false (default), use legacy per-event emitter
+	ChipIngressBufferSize          uint          // Per-worker channel buffer size (default 100)
+	ChipIngressMaxBatchSize        uint          // Max events per PublishBatch call (default 50)
+	ChipIngressMaxWorkers          int           // Max concurrent (domain, entity) workers (default 100)
+	ChipIngressSendInterval        time.Duration // Flush interval per worker (default 500ms when zero or unset)
+	ChipIngressSendTimeout         time.Duration // Timeout per PublishBatch call (default 5s)
+	ChipIngressDrainTimeout        time.Duration // Max time to flush remaining events on shutdown (default 5s)
 
 	// OTel Log
 	LogExportTimeout      time.Duration
@@ -99,16 +99,9 @@ var defaultRetryConfig = RetryConfig{
 	MaxElapsedTime:  1 * time.Minute, // Retry is enabled
 }
 
-// Retry defaults for the chip ingress batch emitter: faster backoff than OTel
-// to avoid buffer pressure on the small per-worker channel.
-var defaultChipIngressRetryConfig = RetryConfig{
-	InitialInterval: 500 * time.Millisecond,
-	MaxInterval:     5 * time.Second,
-	MaxElapsedTime:  5 * time.Second,
-}
-
 const (
 	defaultPackageName = "beholder"
+	defaultMaxWorkers  = 100
 )
 
 var defaultOtelAttributes = []attribute.KeyValue{
@@ -154,9 +147,9 @@ func DefaultConfig() Config {
 		ChipIngressBatchEmitterEnabled: false,
 		ChipIngressBufferSize:          100,
 		ChipIngressMaxBatchSize:        50,
+		ChipIngressMaxWorkers:          defaultMaxWorkers,
 		ChipIngressSendInterval:        500 * time.Millisecond,
 		ChipIngressSendTimeout:         5 * time.Second,
-		ChipIngressRetryConfig:         defaultChipIngressRetryConfig.Copy(),
 		ChipIngressDrainTimeout:        5 * time.Second,
 		// Auth (defaults to static auth mode with TTL=0)
 		AuthHeadersTTL: 0,
@@ -169,10 +162,9 @@ func TestDefaultConfig() Config {
 	config.EmitterBatchProcessor = false
 	config.LogBatchProcessor = false
 	// Retries are disabled for testing
-	config.LogRetryConfig.MaxElapsedTime = 0          // Retry is disabled
-	config.TraceRetryConfig.MaxElapsedTime = 0        // Retry is disabled
-	config.MetricRetryConfig.MaxElapsedTime = 0       // Retry is disabled
-	config.ChipIngressRetryConfig.MaxElapsedTime = 0  // Retry is disabled
+	config.LogRetryConfig.MaxElapsedTime = 0    // Retry is disabled
+	config.TraceRetryConfig.MaxElapsedTime = 0  // Retry is disabled
+	config.MetricRetryConfig.MaxElapsedTime = 0 // Retry is disabled
 	// Auth disabled for testing (TTL=0 means static auth mode)
 	config.AuthHeadersTTL = 0
 	return config
diff --git a/pkg/beholder/config_test.go b/pkg/beholder/config_test.go
index f98f77bc60..5a83c71178 100644
--- a/pkg/beholder/config_test.go
+++ b/pkg/beholder/config_test.go
@@ -67,6 +67,6 @@ func ExampleConfig() {
 	}
 	fmt.Printf("%+v\n", *config.LogRetryConfig)
 	// Output:
-	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressRetryConfig:<nil> ChipIngressDrainTimeout:0s LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
+	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressMaxWorkers:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressDrainTimeout:0s LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
 	// {InitialInterval:5s MaxInterval:30s MaxElapsedTime:1m0s}
 }

From af757a12bb50afa02a812326b4e85695e373d466 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Thu, 5 Mar 2026 13:36:33 +0200
Subject: [PATCH 14/28] Fix draining, remove unused

---
 pkg/beholder/client.go              | 13 ++++++-------
 pkg/beholder/dual_source_emitter.go |  6 ------
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index e042368f20..9a5fdf9305 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -257,9 +257,6 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 	}
 
 	onClose := func() (err error) {
-		if batchEmitterService != nil {
-			err = errors.Join(err, batchEmitterService.Close())
-		}
 		for _, provider := range []shutdowner{messageLoggerProvider, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider} {
 			err = errors.Join(err, provider.Shutdown(context.Background()))
 		}
@@ -268,17 +265,19 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 	return &Client{cfg, logger, tracer, meter, emitter, chipIngressClient, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider, signer, onClose}, nil
 }
 
-// Closes all providers, flushes all data and stops all background processes
+// Closes all providers, flushes all data and stops all background processes.
+// Order matters: emitter is closed first so the batch emitter can drain
+// buffered events while the gRPC connection is still alive.
 func (c Client) Close() (err error) {
-	if c.Chip != nil {
-		err = errors.Join(err, c.Chip.Close())
-	}
 	if c.Emitter != nil {
 		err = errors.Join(err, c.Emitter.Close())
 	}
 	if c.OnClose != nil {
 		err = errors.Join(err, c.OnClose())
 	}
+	if c.Chip != nil {
+		err = errors.Join(err, c.Chip.Close())
+	}
 	return
 }
 
diff --git a/pkg/beholder/dual_source_emitter.go b/pkg/beholder/dual_source_emitter.go
index d34dbb614e..0501e5c529 100644
--- a/pkg/beholder/dual_source_emitter.go
+++ b/pkg/beholder/dual_source_emitter.go
@@ -7,7 +7,6 @@ import (
 	"sync/atomic"
 
 	"github.com/smartcontractkit/chainlink-common/pkg/logger"
-	"github.com/smartcontractkit/chainlink-common/pkg/services"
 )
 
 // DualSourceEmitter emits both to chip ingress and to the otel collector
@@ -18,8 +17,6 @@ type DualSourceEmitter struct {
 	chipIngressEmitter   Emitter
 	otelCollectorEmitter Emitter
 	log                  logger.Logger
-	stopCh               services.StopChan
-	wg                   services.WaitGroup
 	closed               atomic.Bool
 }
 
@@ -42,7 +39,6 @@ func NewDualSourceEmitter(chipIngressEmitter Emitter, otelCollectorEmitter Emitt
 		chipIngressEmitter:   chipIngressEmitter,
 		otelCollectorEmitter: otelCollectorEmitter,
 		log:                  logger,
-		stopCh:               make(services.StopChan),
 	}, nil
 }
 
@@ -50,8 +46,6 @@ func (d *DualSourceEmitter) Close() error {
 	if wasClosed := d.closed.Swap(true); wasClosed {
 		return errors.New("already closed")
 	}
-	close(d.stopCh)
-	d.wg.Wait()
 	return errors.Join(d.chipIngressEmitter.Close(), d.otelCollectorEmitter.Close())
 }
 

From 32ebc25478fedfbda902f4f53baa61a67ae9b806 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Thu, 5 Mar 2026 14:32:46 +0200
Subject: [PATCH 15/28] Change defaults

---
 pkg/beholder/chip_ingress_batch_emitter.go      | 10 +++++-----
 pkg/beholder/chip_ingress_batch_emitter_test.go |  2 +-
 pkg/beholder/config.go                          | 10 +++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index 8c0a3d4f93..d526d1a1d3 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -53,11 +53,11 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 
 	bufferSize := cfg.ChipIngressBufferSize
 	if bufferSize == 0 {
-		bufferSize = 100
+		bufferSize = 1000
 	}
 	maxBatchSize := cfg.ChipIngressMaxBatchSize
 	if maxBatchSize == 0 {
-		maxBatchSize = 50
+		maxBatchSize = 500
 	}
 	maxWorkers := cfg.ChipIngressMaxWorkers
 	if maxWorkers == 0 {
@@ -65,15 +65,15 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	}
 	sendInterval := cfg.ChipIngressSendInterval
 	if sendInterval == 0 {
-		sendInterval = 500 * time.Millisecond
+		sendInterval = 100 * time.Millisecond
 	}
 	sendTimeout := cfg.ChipIngressSendTimeout
 	if sendTimeout == 0 {
-		sendTimeout = 5 * time.Second
+		sendTimeout = 3 * time.Second
 	}
 	drainTimeout := cfg.ChipIngressDrainTimeout
 	if drainTimeout == 0 {
-		drainTimeout = 5 * time.Second
+		drainTimeout = 10 * time.Second
 	}
 
 	meter := otel.Meter("beholder/chip_ingress_batch_emitter")
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
index 142a5edd82..b2fe7211bf 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -457,7 +457,7 @@ func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
 		)
 		require.NoError(t, err)
 
-		// Default send interval is 500ms; wait for flush
+		// Default send interval is 100ms; wait for flush
 		assert.Eventually(t, func() bool {
 			mu.Lock()
 			defer mu.Unlock()
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 9d80177560..3f371799df 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -145,12 +145,12 @@ func DefaultConfig() Config {
 		LogCompressor: "gzip",
 		// Chip Ingress Batch Emitter
 		ChipIngressBatchEmitterEnabled: false,
-		ChipIngressBufferSize:          100,
-		ChipIngressMaxBatchSize:        50,
+		ChipIngressBufferSize:          1000,
+		ChipIngressMaxBatchSize:        500,
 		ChipIngressMaxWorkers:          defaultMaxWorkers,
-		ChipIngressSendInterval:        500 * time.Millisecond,
-		ChipIngressSendTimeout:         5 * time.Second,
-		ChipIngressDrainTimeout:        5 * time.Second,
+		ChipIngressSendInterval:        100 * time.Millisecond,
+		ChipIngressSendTimeout:         3 * time.Second,
+		ChipIngressDrainTimeout:        10 * time.Second,
 		// Auth (defaults to static auth mode with TTL=0)
 		AuthHeadersTTL: 0,
 	}

From d45812681bd5753d902e7abddb2070788e3d34a2 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 6 Mar 2026 15:55:29 +0200
Subject: [PATCH 16/28] Use batch emitter client

---
 go.mod                                        |   2 +
 go.sum                                        |   2 -
 pkg/beholder/chip_ingress_batch_emitter.go    | 179 ++++++----
 .../chip_ingress_batch_emitter_test.go        | 326 +++++++++++++++---
 pkg/beholder/chip_ingress_emitter_worker.go   | 180 +---------
 pkg/beholder/config.go                        |  17 +-
 pkg/beholder/config_test.go                   |   2 +-
 pkg/chipingress/batch/client.go               |   4 +-
 pkg/chipingress/batch/client_test.go          |   7 +-
 9 files changed, 407 insertions(+), 312 deletions(-)

diff --git a/go.mod b/go.mod
index eb9c45471f..85fb5f3969 100644
--- a/go.mod
+++ b/go.mod
@@ -157,3 +157,5 @@ require (
 	google.golang.org/genproto/googleapis/api v0.0.0-20251029180050-ab9386a59fda // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 )
+
+replace github.com/smartcontractkit/chainlink-common/pkg/chipingress => ./pkg/chipingress
diff --git a/go.sum b/go.sum
index bfa8dbdd0a..58c5db722d 100644
--- a/go.sum
+++ b/go.sum
@@ -328,8 +328,6 @@ github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMB
 github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
 github.com/smartcontractkit/chain-selectors v1.0.89 h1:L9oWZGqQXWyTPnC6ODXgu3b0DFyLmJ9eHv+uJrE9IZY=
 github.com/smartcontractkit/chain-selectors v1.0.89/go.mod h1:qy7whtgG5g+7z0jt0nRyii9bLND9m15NZTzuQPkMZ5w=
-github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9oqASnkS03RE1HQwYQQxrO4l46O5JSzxqLgg=
-github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10/go.mod h1:oiDa54M0FwxevWwyAX773lwdWvFYYlYHHQV1LQ5HpWY=
 github.com/smartcontractkit/chainlink-protos/billing/go v0.0.0-20251024234028-0988426d98f4 h1:GCzrxDWn3b7jFfEA+WiYRi8CKoegsayiDoJBCjYkneE=
 github.com/smartcontractkit/chainlink-protos/billing/go v0.0.0-20251024234028-0988426d98f4/go.mod h1:HHGeDUpAsPa0pmOx7wrByCitjQ0mbUxf0R9v+g67uCA=
 github.com/smartcontractkit/chainlink-protos/cre/go v0.0.0-20260226130359-963f935e0396 h1:03tbcwjyIEjvHba1IWOj1sfThwebm2XNzyFHSuZtlWc=
diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index d526d1a1d3..17b8479431 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -10,12 +10,15 @@ import (
 	otelmetric "go.opentelemetry.io/otel/metric"
 
 	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
+	"github.com/smartcontractkit/chainlink-common/pkg/chipingress/batch"
 	"github.com/smartcontractkit/chainlink-common/pkg/logger"
 	"github.com/smartcontractkit/chainlink-common/pkg/services"
 )
 
 // ChipIngressBatchEmitter buffers events per (domain, entity) and flushes them
 // via chipingress.Client.PublishBatch on a periodic interval.
+// Each (domain, entity) pair gets its own batch.Client, providing per-entity
+// isolation and independent concurrency scaling.
 // It satisfies the Emitter interface so it can be used as a drop-in replacement
 // for ChipIngressEmitter.
 type ChipIngressBatchEmitter struct {
@@ -27,12 +30,13 @@ type ChipIngressBatchEmitter struct {
 	workers      map[string]*chipIngressEmitterWorker
 	workersMutex sync.RWMutex
 
-	bufferSize   uint
-	maxBatchSize uint
-	maxWorkers   int
-	sendInterval time.Duration
-	sendTimeout  time.Duration
-	drainTimeout time.Duration
+	bufferSize         int
+	maxBatchSize       int
+	maxWorkers         int
+	maxConcurrentSends int
+	sendInterval       time.Duration
+	sendTimeout        time.Duration
+	drainTimeout       time.Duration
 
 	metrics batchEmitterMetrics
 }
@@ -40,8 +44,6 @@ type ChipIngressBatchEmitter struct {
 type batchEmitterMetrics struct {
 	eventsSent    otelmetric.Int64Counter
 	eventsDropped otelmetric.Int64Counter
-	batchFailures otelmetric.Int64Counter
-	eventsDrained otelmetric.Int64Counter
 }
 
 // NewChipIngressBatchEmitter creates a batch emitter backed by the given chipingress client.
@@ -51,11 +53,11 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 		return nil, fmt.Errorf("chip ingress client is nil")
 	}
 
-	bufferSize := cfg.ChipIngressBufferSize
+	bufferSize := int(cfg.ChipIngressBufferSize)
 	if bufferSize == 0 {
 		bufferSize = 1000
 	}
-	maxBatchSize := cfg.ChipIngressMaxBatchSize
+	maxBatchSize := int(cfg.ChipIngressMaxBatchSize)
 	if maxBatchSize == 0 {
 		maxBatchSize = 500
 	}
@@ -63,6 +65,10 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	if maxWorkers == 0 {
 		maxWorkers = defaultMaxWorkers
 	}
+	maxConcurrentSends := cfg.ChipIngressMaxConcurrentSends
+	if maxConcurrentSends == 0 {
+		maxConcurrentSends = defaultMaxConcurrentSends
+	}
 	sendInterval := cfg.ChipIngressSendInterval
 	if sendInterval == 0 {
 		sendInterval = 100 * time.Millisecond
@@ -83,15 +89,16 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	}
 
 	e := &ChipIngressBatchEmitter{
-		client:       client,
-		workers:      make(map[string]*chipIngressEmitterWorker),
-		bufferSize:   bufferSize,
-		maxBatchSize: maxBatchSize,
-		maxWorkers:   maxWorkers,
-		sendInterval: sendInterval,
-		sendTimeout:  sendTimeout,
-		drainTimeout: drainTimeout,
-		metrics:      metrics,
+		client:             client,
+		workers:            make(map[string]*chipIngressEmitterWorker),
+		bufferSize:         bufferSize,
+		maxBatchSize:       maxBatchSize,
+		maxWorkers:         maxWorkers,
+		maxConcurrentSends: maxConcurrentSends,
+		sendInterval:       sendInterval,
+		sendTimeout:        sendTimeout,
+		drainTimeout:       drainTimeout,
+		metrics:            metrics,
 	}
 
 	e.Service, e.eng = services.Config{
@@ -102,10 +109,36 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 }
 
 // Emit extracts (domain, entity) from the attributes, routes the event to the
-// appropriate per-(domain, entity) worker, and returns immediately.
-// If the worker's buffer is full, the event is dropped and a warning is logged.
-// Returns an error if the emitter has been closed.
+// appropriate per-(domain, entity) worker's batch.Client, and returns immediately.
+// If the worker's buffer is full, the event is dropped and a metric is bumped.
+// Returns an error if the emitter has been closed or the caller's context is cancelled.
 func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
+	return e.emitInternal(ctx, body, nil, attrKVs...)
+}
+
+// EmitWithCallback works like Emit but accepts a callback that is invoked once
+// the event's fate is determined: nil on successful PublishBatch delivery, or a
+// non-nil error on send failure or buffer-full drop.
+//
+// Contract:
+//   - If EmitWithCallback returns a non-nil error, the callback will NOT be invoked.
+//     The caller should handle the error from the return value.
+//   - If EmitWithCallback returns nil, the callback is guaranteed to be invoked
+//     exactly once — either asynchronously (after the batch is sent) or
+//     synchronously (if the event was dropped at enqueue time).
+//
+// Callers can use this for "synchronous" emission:
+//
+//	done := make(chan error, 1)
+//	if err := emitter.EmitWithCallback(ctx, body, func(err error) { done <- err }, attrKVs...); err != nil {
+//	    return err // callback will not fire
+//	}
+//	err := <-done // safe — callback will fire exactly once
+func (e *ChipIngressBatchEmitter) EmitWithCallback(ctx context.Context, body []byte, callback func(error), attrKVs ...any) error {
+	return e.emitInternal(ctx, body, callback, attrKVs...)
+}
+
+func (e *ChipIngressBatchEmitter) emitInternal(ctx context.Context, body []byte, callback func(error), attrKVs ...any) error {
 	return e.eng.IfNotStopped(func() error {
 		domain, entity, err := ExtractSourceAndType(attrKVs...)
 		if err != nil {
@@ -116,24 +149,40 @@ func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs
 
 		worker := e.findOrCreateWorker(domain, entity)
 		if worker == nil {
+			if callback != nil {
+				callback(fmt.Errorf("max workers reached, event dropped"))
+			}
 			return nil
 		}
 
-		payload := emitterPayload{
-			body:       body,
-			attributes: attributes,
-			domain:     domain,
-			entity:     entity,
+		event, err := chipingress.NewEvent(domain, entity, body, attributes)
+		if err != nil {
+			return fmt.Errorf("failed to create CloudEvent: %w", err)
+		}
+		eventPb, err := chipingress.EventToProto(event)
+		if err != nil {
+			return fmt.Errorf("failed to convert to proto: %w", err)
+		}
+
+		if err := ctx.Err(); err != nil {
+			return err
 		}
 
-		select {
-		case worker.ch <- payload:
-			// Intentionally racy with logBufferFullWithExpBackoff — only affects log frequency, not correctness.
-			worker.dropCount.Store(0)
-		case <-ctx.Done():
-			return ctx.Err()
-		default:
-			worker.logBufferFullWithExpBackoff(payload)
+		queueErr := worker.batchClient.QueueMessage(eventPb, func(sendErr error) {
+			if sendErr != nil {
+				e.metrics.eventsDropped.Add(context.Background(), 1, worker.metricAttrs)
+			} else {
+				e.metrics.eventsSent.Add(context.Background(), 1, worker.metricAttrs)
+			}
+			if callback != nil {
+				callback(sendErr)
+			}
+		})
+		if queueErr != nil {
+			e.metrics.eventsDropped.Add(context.Background(), 1, worker.metricAttrs)
+			if callback != nil {
+				callback(queueErr)
+			}
 		}
 
 		return nil
@@ -141,9 +190,9 @@ func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs
 }
 
 // findOrCreateWorker returns the worker for the given (domain, entity) pair,
-// creating one with a new buffered channel and flush goroutine if it doesn't exist.
+// creating one backed by a new batch.Client if it doesn't exist.
 func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chipIngressEmitterWorker {
-	workerKey := domain + "/" + entity
+	workerKey := domain + "\x00" + entity
 
 	e.workersMutex.RLock()
 	worker, found := e.workers[workerKey]
@@ -166,32 +215,26 @@ func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chi
 		return nil
 	}
 
-	worker = newChipIngressEmitterWorker(
-		e.client,
-		make(chan emitterPayload, e.bufferSize),
-		domain,
-		entity,
-		e.maxBatchSize,
-		e.sendTimeout,
-		e.metrics,
-		e.eng,
+	batchClient, err := batch.NewBatchClient(e.client,
+		batch.WithBatchSize(e.maxBatchSize),
+		batch.WithMessageBuffer(e.bufferSize),
+		batch.WithBatchInterval(e.sendInterval),
+		batch.WithMaxPublishTimeout(e.sendTimeout),
+		batch.WithShutdownTimeout(e.drainTimeout),
+		batch.WithMaxConcurrentSends(e.maxConcurrentSends),
+		batch.WithEventClone(false),
 	)
+	if err != nil {
+		e.eng.Errorf("chip ingress batch emitter: failed to create batch client for %s: %v", workerKey, err)
+		return nil
+	}
+
+	worker = newChipIngressEmitterWorker(batchClient, domain, entity)
 
-	sendInterval := e.sendInterval
-	drainTimeout := e.drainTimeout
 	e.eng.Go(func(ctx context.Context) {
-		ticker := time.NewTicker(sendInterval)
-		defer ticker.Stop()
-
-		for {
-			select {
-			case <-ticker.C:
-				worker.Send(ctx)
-			case <-ctx.Done():
-				worker.drain(drainTimeout)
-				return
-			}
-		}
+		worker.batchClient.Start(ctx)
+		<-ctx.Done()
+		worker.batchClient.Stop()
 	})
 
 	e.workers[workerKey] = worker
@@ -213,24 +256,8 @@ func newBatchEmitterMetrics(meter otelmetric.Meter) (batchEmitterMetrics, error)
 		return batchEmitterMetrics{}, err
 	}
 
-	batchFailures, err := meter.Int64Counter("chip_ingress.batch_failures",
-		otelmetric.WithDescription("Total PublishBatch calls that failed"),
-		otelmetric.WithUnit("{batch}"))
-	if err != nil {
-		return batchEmitterMetrics{}, err
-	}
-
-	eventsDrained, err := meter.Int64Counter("chip_ingress.events_drained",
-		otelmetric.WithDescription("Total events flushed during graceful shutdown"),
-		otelmetric.WithUnit("{event}"))
-	if err != nil {
-		return batchEmitterMetrics{}, err
-	}
-
 	return batchEmitterMetrics{
 		eventsSent:    eventsSent,
 		eventsDropped: eventsDropped,
-		batchFailures: batchFailures,
-		eventsDrained: eventsDrained,
 	}, nil
 }
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
index b2fe7211bf..0e51b808e6 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -18,10 +18,12 @@ import (
 
 func newTestConfig() beholder.Config {
 	return beholder.Config{
-		ChipIngressBufferSize:   10,
-		ChipIngressMaxBatchSize: 5,
-		ChipIngressSendInterval: 50 * time.Millisecond,
-		ChipIngressSendTimeout:  5 * time.Second,
+		ChipIngressBufferSize:         10,
+		ChipIngressMaxBatchSize:       5,
+		ChipIngressMaxConcurrentSends: 3,
+		ChipIngressSendInterval:       50 * time.Millisecond,
+		ChipIngressSendTimeout:        5 * time.Second,
+		ChipIngressDrainTimeout:       5 * time.Second,
 	}
 }
 
@@ -67,7 +69,11 @@ func TestChipIngressBatchEmitter_Emit(t *testing.T) {
 			Return(nil, nil).
 			Maybe()
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 10 * time.Second
+		cfg.ChipIngressMaxBatchSize = 100
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 		defer emitter.Close() //nolint:errcheck
@@ -78,7 +84,8 @@ func TestChipIngressBatchEmitter_Emit(t *testing.T) {
 		)
 		require.NoError(t, err)
 
-		// PublishBatch should NOT have been called yet (event is just buffered)
+		// With a large batch size and long interval, PublishBatch should not fire yet
+		time.Sleep(100 * time.Millisecond)
 		clientMock.AssertNotCalled(t, "PublishBatch", mock.Anything, mock.Anything)
 	})
 }
@@ -250,36 +257,56 @@ func TestChipIngressBatchEmitter_PerDomainEntityIsolation(t *testing.T) {
 func TestChipIngressBatchEmitter_BufferFull(t *testing.T) {
 	t.Run("events are dropped when buffer is full", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
-		// Block PublishBatch so the buffer fills up
+
+		// Block PublishBatch so the batcher's send pipeline backs up,
+		// eventually filling the message buffer channel.
+		sendBlocked := make(chan struct{})
+		firstCallSignal := make(chan struct{}, 1)
 		clientMock.
 			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(_ mock.Arguments) {
+				select {
+				case firstCallSignal <- struct{}{}:
+				default:
+				}
+				<-sendBlocked
+			}).
 			Return(nil, nil).
 			Maybe()
 
 		cfg := newTestConfig()
-		cfg.ChipIngressBufferSize = 3
-		cfg.ChipIngressSendInterval = 10 * time.Second // very long interval to prevent flushing
+		cfg.ChipIngressBufferSize = 2
+		cfg.ChipIngressMaxBatchSize = 1
+		cfg.ChipIngressMaxConcurrentSends = 1
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+		cfg.ChipIngressDrainTimeout = 200 * time.Millisecond
 
 		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
+		defer close(sendBlocked)
 		defer emitter.Close() //nolint:errcheck
 
-		// Fill the buffer (3 events)
-		for i := 0; i < 3; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
+		// First event triggers a send that blocks, exhausting the semaphore
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		require.NoError(t, err)
+
+		// Wait for the batcher to process the event and block on PublishBatch
+		<-firstCallSignal
+		// Give batcher time to read the next event and block on the semaphore
+		time.Sleep(100 * time.Millisecond)
+
+		// Flood the buffer — Emit should never return an error
+		for i := 0; i < 10; i++ {
+			err = emitter.Emit(t.Context(), []byte("overflow"),
 				beholder.AttrKeyDomain, "platform",
 				beholder.AttrKeyEntity, "TestEvent",
 			)
-			require.NoError(t, err)
+			assert.NoError(t, err, "Emit should not return error even when dropping events")
 		}
-
-		// This should not error (it drops silently), but the event won't be delivered
-		err = emitter.Emit(t.Context(), []byte("dropped"),
-			beholder.AttrKeyDomain, "platform",
-			beholder.AttrKeyEntity, "TestEvent",
-		)
-		assert.NoError(t, err)
 	})
 }
 
@@ -414,13 +441,6 @@ func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
 		require.NoError(t, emitter.Start(t.Context()))
 		defer emitter.Close() //nolint:errcheck
 
-		// Fill the buffer so the next Emit will block on channel send
-		err = emitter.Emit(t.Context(), []byte("fill"),
-			beholder.AttrKeyDomain, "platform",
-			beholder.AttrKeyEntity, "TestEvent",
-		)
-		require.NoError(t, err)
-
 		ctx, cancel := context.WithCancel(t.Context())
 		cancel()
 
@@ -489,11 +509,12 @@ func TestChipIngressBatchEmitter_GracefulDrain(t *testing.T) {
 			Return(nil, nil)
 
 		cfg := beholder.Config{
-			ChipIngressBufferSize:   20,
-			ChipIngressMaxBatchSize: 10,
-			ChipIngressSendInterval: 1 * time.Hour, // very long interval — events won't flush via tick
-			ChipIngressSendTimeout:  5 * time.Second,
-			ChipIngressDrainTimeout: 5 * time.Second,
+			ChipIngressBufferSize:         20,
+			ChipIngressMaxBatchSize:       10,
+			ChipIngressMaxConcurrentSends: 3,
+			ChipIngressSendInterval:       1 * time.Hour,
+			ChipIngressSendTimeout:        5 * time.Second,
+			ChipIngressDrainTimeout:       5 * time.Second,
 		}
 
 		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
@@ -508,6 +529,9 @@ func TestChipIngressBatchEmitter_GracefulDrain(t *testing.T) {
 			require.NoError(t, err)
 		}
 
+		// Give the batcher time to read events from the channel into its internal batch
+		time.Sleep(50 * time.Millisecond)
+
 		// Events are buffered but no tick has fired. Close should drain them.
 		require.NoError(t, emitter.Close())
 
@@ -537,11 +561,12 @@ func TestChipIngressBatchEmitter_DrainMultipleDomains(t *testing.T) {
 			Return(nil, nil)
 
 		cfg := beholder.Config{
-			ChipIngressBufferSize:   20,
-			ChipIngressMaxBatchSize: 10,
-			ChipIngressSendInterval: 1 * time.Hour,
-			ChipIngressSendTimeout:  5 * time.Second,
-			ChipIngressDrainTimeout: 5 * time.Second,
+			ChipIngressBufferSize:         20,
+			ChipIngressMaxBatchSize:       10,
+			ChipIngressMaxConcurrentSends: 3,
+			ChipIngressSendInterval:       1 * time.Hour,
+			ChipIngressSendTimeout:        5 * time.Second,
+			ChipIngressDrainTimeout:       5 * time.Second,
 		}
 
 		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
@@ -563,6 +588,9 @@ func TestChipIngressBatchEmitter_DrainMultipleDomains(t *testing.T) {
 			require.NoError(t, err)
 		}
 
+		// Give batchers time to read events from their channels
+		time.Sleep(50 * time.Millisecond)
+
 		require.NoError(t, emitter.Close())
 
 		mu.Lock()
@@ -573,7 +601,7 @@ func TestChipIngressBatchEmitter_DrainMultipleDomains(t *testing.T) {
 }
 
 func TestChipIngressBatchEmitter_DrainPublishBatchFailure(t *testing.T) {
-	t.Run("drain continues attempting batches after failure", func(t *testing.T) {
+	t.Run("handles batch failure and continues sending", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
 
 		var mu sync.Mutex
@@ -605,18 +633,19 @@ func TestChipIngressBatchEmitter_DrainPublishBatchFailure(t *testing.T) {
 			Return(nil, nil)
 
 		cfg := beholder.Config{
-			ChipIngressBufferSize:   20,
-			ChipIngressMaxBatchSize: 3,
-			ChipIngressSendInterval: 1 * time.Hour,
-			ChipIngressSendTimeout:  5 * time.Second,
-			ChipIngressDrainTimeout: 5 * time.Second,
+			ChipIngressBufferSize:         20,
+			ChipIngressMaxBatchSize:       3,
+			ChipIngressMaxConcurrentSends: 3,
+			ChipIngressSendInterval:       1 * time.Hour,
+			ChipIngressSendTimeout:        5 * time.Second,
+			ChipIngressDrainTimeout:       5 * time.Second,
 		}
 
 		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
-		// Emit 6 events with maxBatchSize=3 => 2 batches during drain
+		// Emit 6 events with maxBatchSize=3 => 2 batches via size trigger
 		for i := 0; i < 6; i++ {
 			err = emitter.Emit(t.Context(), []byte("body"),
 				beholder.AttrKeyDomain, "platform",
@@ -625,11 +654,14 @@ func TestChipIngressBatchEmitter_DrainPublishBatchFailure(t *testing.T) {
 			require.NoError(t, err)
 		}
 
+		// Give the batcher time to read events and trigger size-based sends
+		time.Sleep(100 * time.Millisecond)
+
 		require.NoError(t, emitter.Close())
 
 		mu.Lock()
 		defer mu.Unlock()
-		assert.GreaterOrEqual(t, callCount, 2, "drain should have attempted at least 2 batches")
+		assert.GreaterOrEqual(t, callCount, 2, "should have attempted at least 2 batches")
 		assert.Equal(t, 3, totalEventsSent, "second batch should have succeeded despite first batch failure")
 	})
 }
@@ -648,11 +680,12 @@ func TestChipIngressBatchEmitter_DrainTimeout(t *testing.T) {
 			Maybe()
 
 		cfg := beholder.Config{
-			ChipIngressBufferSize:   20,
-			ChipIngressMaxBatchSize: 10,
-			ChipIngressSendInterval: 1 * time.Hour,
-			ChipIngressSendTimeout:  10 * time.Second,
-			ChipIngressDrainTimeout: 200 * time.Millisecond,
+			ChipIngressBufferSize:         20,
+			ChipIngressMaxBatchSize:       10,
+			ChipIngressMaxConcurrentSends: 3,
+			ChipIngressSendInterval:       1 * time.Hour,
+			ChipIngressSendTimeout:        10 * time.Second,
+			ChipIngressDrainTimeout:       200 * time.Millisecond,
 		}
 
 		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
@@ -667,6 +700,9 @@ func TestChipIngressBatchEmitter_DrainTimeout(t *testing.T) {
 			require.NoError(t, err)
 		}
 
+		// Give the batcher time to read events
+		time.Sleep(50 * time.Millisecond)
+
 		closeDone := make(chan error, 1)
 		go func() {
 			closeDone <- emitter.Close()
@@ -747,3 +783,193 @@ func TestChipIngressBatchEmitter_EmitAfterClose(t *testing.T) {
 		assert.Error(t, err, "Emit after Close should return an error")
 	})
 }
+
+func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
+	t.Run("callback receives nil on successful send", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, nil)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		done := make(chan error, 1)
+		err = emitter.EmitWithCallback(t.Context(), []byte("body"), func(sendErr error) {
+			done <- sendErr
+		},
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		require.NoError(t, err)
+
+		select {
+		case sendErr := <-done:
+			assert.NoError(t, sendErr, "callback should receive nil on success")
+		case <-time.After(3 * time.Second):
+			t.Fatal("callback was not invoked within timeout")
+		}
+
+		require.NoError(t, emitter.Close())
+	})
+
+	t.Run("callback receives error on PublishBatch failure", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, assert.AnError)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		done := make(chan error, 1)
+		err = emitter.EmitWithCallback(t.Context(), []byte("body"), func(sendErr error) {
+			done <- sendErr
+		},
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		require.NoError(t, err)
+
+		select {
+		case sendErr := <-done:
+			assert.Error(t, sendErr, "callback should receive error on failure")
+		case <-time.After(3 * time.Second):
+			t.Fatal("callback was not invoked within timeout")
+		}
+
+		require.NoError(t, emitter.Close())
+	})
+
+	t.Run("callback receives error when buffer is full", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+
+		sendBlocked := make(chan struct{})
+		firstCallSignal := make(chan struct{}, 1)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Run(func(_ mock.Arguments) {
+				select {
+				case firstCallSignal <- struct{}{}:
+				default:
+				}
+				<-sendBlocked
+			}).
+			Return(nil, nil).
+			Maybe()
+
+		cfg := newTestConfig()
+		cfg.ChipIngressBufferSize = 2
+		cfg.ChipIngressMaxBatchSize = 1
+		cfg.ChipIngressMaxConcurrentSends = 1
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+		cfg.ChipIngressDrainTimeout = 200 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+		defer close(sendBlocked)
+		defer emitter.Close() //nolint:errcheck
+
+		// First event triggers a send that blocks, exhausting the semaphore
+		err = emitter.Emit(t.Context(), []byte("body"),
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		require.NoError(t, err)
+
+		// Wait for PublishBatch to be called and blocked
+		<-firstCallSignal
+		// Give batcher time to read the next event and block on the semaphore
+		time.Sleep(100 * time.Millisecond)
+
+		// Flood the buffer (size 2) so it becomes full
+		for i := 0; i < 10; i++ {
+			_ = emitter.Emit(t.Context(), []byte("filler"),
+				beholder.AttrKeyDomain, "platform",
+				beholder.AttrKeyEntity, "TestEvent",
+			)
+		}
+
+		// Buffer is full — callback should be invoked synchronously with an error
+		dropped := make(chan error, 1)
+		err = emitter.EmitWithCallback(t.Context(), []byte("overflow"), func(sendErr error) {
+			dropped <- sendErr
+		},
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		assert.NoError(t, err, "Emit should not return an error even when dropping")
+
+		select {
+		case dropErr := <-dropped:
+			assert.Error(t, dropErr, "callback should receive an error when buffer is full")
+		case <-time.After(time.Second):
+			t.Fatal("callback was not invoked for dropped event")
+		}
+	})
+
+	t.Run("synchronous emit pattern works end-to-end", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, nil)
+
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		done := make(chan error, 1)
+		err = emitter.EmitWithCallback(t.Context(), []byte("sync-body"), func(sendErr error) {
+			done <- sendErr
+		},
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		require.NoError(t, err)
+
+		// Block until the event is actually sent — the "synchronous" pattern
+		select {
+		case sendErr := <-done:
+			assert.NoError(t, sendErr)
+		case <-time.After(3 * time.Second):
+			t.Fatal("synchronous emit did not complete within timeout")
+		}
+
+		require.NoError(t, emitter.Close())
+	})
+
+	t.Run("nil callback behaves like Emit", func(t *testing.T) {
+		clientMock := mocks.NewClient(t)
+		clientMock.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(nil, nil).
+			Maybe()
+
+		cfg := newTestConfig()
+		cfg.ChipIngressSendInterval = 50 * time.Millisecond
+
+		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		require.NoError(t, err)
+		require.NoError(t, emitter.Start(t.Context()))
+
+		err = emitter.EmitWithCallback(t.Context(), []byte("body"), nil,
+			beholder.AttrKeyDomain, "platform",
+			beholder.AttrKeyEntity, "TestEvent",
+		)
+		assert.NoError(t, err)
+
+		require.NoError(t, emitter.Close())
+	})
+}
diff --git a/pkg/beholder/chip_ingress_emitter_worker.go b/pkg/beholder/chip_ingress_emitter_worker.go
index b2bd7b3101..005b2c3aa5 100644
--- a/pkg/beholder/chip_ingress_emitter_worker.go
+++ b/pkg/beholder/chip_ingress_emitter_worker.go
@@ -1,197 +1,29 @@
 package beholder
 
 import (
-	"context"
-	"fmt"
-	"sync/atomic"
-	"time"
-
 	"go.opentelemetry.io/otel/attribute"
 	otelmetric "go.opentelemetry.io/otel/metric"
 
-	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
-	"github.com/smartcontractkit/chainlink-common/pkg/logger"
+	"github.com/smartcontractkit/chainlink-common/pkg/chipingress/batch"
 )
 
-// emitterPayload holds a single event to be batched and sent via chip ingress.
-type emitterPayload struct {
-	body       []byte
-	attributes map[string]any
-	domain     string
-	entity     string
-}
-
-// chipIngressEmitterWorker buffers events for a single (domain, entity) pair
-// and flushes them via PublishBatch on a periodic interval.
-// Transport-level retries (UNAVAILABLE, RESOURCE_EXHAUSTED) are handled by the
-// gRPC client's built-in retry policy; no application-level retry is needed.
+// chipIngressEmitterWorker wraps a batch.Client for a single (domain, entity) pair.
+// The batch.Client handles buffering, periodic flushing, concurrent sends, and graceful shutdown.
 type chipIngressEmitterWorker struct {
-	client       chipingress.Client
-	ch           chan emitterPayload
-	domain       string
-	entity       string
-	maxBatchSize uint
-	sendTimeout  time.Duration
-	lggr         logger.Logger
-	dropCount    atomic.Uint32
-
-	metrics     batchEmitterMetrics
+	batchClient *batch.Client
 	metricAttrs otelmetric.MeasurementOption
 }
 
 func newChipIngressEmitterWorker(
-	client chipingress.Client,
-	ch chan emitterPayload,
+	batchClient *batch.Client,
 	domain string,
 	entity string,
-	maxBatchSize uint,
-	sendTimeout time.Duration,
-	metrics batchEmitterMetrics,
-	lggr logger.Logger,
 ) *chipIngressEmitterWorker {
 	return &chipIngressEmitterWorker{
-		client:       client,
-		ch:           ch,
-		domain:       domain,
-		entity:       entity,
-		maxBatchSize: maxBatchSize,
-		sendTimeout:  sendTimeout,
-		lggr:         logger.Named(lggr, "ChipIngressEmitterWorker"),
-		metrics:      metrics,
+		batchClient: batchClient,
 		metricAttrs: otelmetric.WithAttributeSet(attribute.NewSet(
 			attribute.String("domain", domain),
 			attribute.String("entity", entity),
 		)),
 	}
 }
-
-// Send drains the channel and sends a batch.
-// Called periodically by the tick loop.
-func (w *chipIngressEmitterWorker) Send(ctx context.Context) {
-	if len(w.ch) == 0 {
-		return
-	}
-
-	batch := w.buildBatch()
-	if batch == nil || len(batch.Events) == 0 {
-		return
-	}
-
-	w.publishOnce(ctx, batch)
-}
-
-func (w *chipIngressEmitterWorker) publishOnce(ctx context.Context, batch *chipingress.CloudEventBatch) {
-	sendCtx, cancel := context.WithTimeout(ctx, w.sendTimeout)
-	defer cancel()
-
-	batchSize := int64(len(batch.Events))
-	_, err := w.client.PublishBatch(sendCtx, batch)
-	if err != nil {
-		w.lggr.Warnf("could not send batch via chip ingress (domain=%s, entity=%s): %v",
-			w.domain, w.entity, err)
-		w.metrics.batchFailures.Add(context.Background(), 1, w.metricAttrs)
-		w.metrics.eventsDropped.Add(context.Background(), batchSize, w.metricAttrs)
-		return
-	}
-	w.metrics.eventsSent.Add(context.Background(), batchSize, w.metricAttrs)
-}
-
-// buildBatch drains the channel up to maxBatchSize and converts payloads to a CloudEventBatch.
-func (w *chipIngressEmitterWorker) buildBatch() *chipingress.CloudEventBatch {
-	var events []chipingress.CloudEvent
-
-	max := int(w.maxBatchSize) // #nosec G115
-drain:
-	for len(events) < max {
-		select {
-		case payload := <-w.ch:
-			event, err := w.payloadToEvent(payload)
-			if err != nil {
-				w.lggr.Warnf("failed to build CloudEvent, dropping: %v", err)
-				w.metrics.eventsDropped.Add(context.Background(), 1, w.metricAttrs)
-				continue
-			}
-			events = append(events, event)
-		default:
-			break drain
-		}
-	}
-
-	if len(events) == 0 {
-		return nil
-	}
-
-	batch, err := chipingress.EventsToBatch(events)
-	if err != nil {
-		w.lggr.Warnf("failed to convert events to batch: %v", err)
-		w.metrics.eventsDropped.Add(context.Background(), int64(len(events)), w.metricAttrs)
-		return nil
-	}
-
-	return batch
-}
-
-func (w *chipIngressEmitterWorker) payloadToEvent(payload emitterPayload) (chipingress.CloudEvent, error) {
-	event, err := chipingress.NewEvent(payload.domain, payload.entity, payload.body, payload.attributes)
-	if err != nil {
-		return chipingress.CloudEvent{}, fmt.Errorf("failed to create CloudEvent: %w", err)
-	}
-	return event, nil
-}
-
-// drain flushes all remaining buffered events before shutdown.
-// Uses a fresh context with the given timeout (independent of the cancelled parent).
-// Continues attempting subsequent batches even if one fails.
-func (w *chipIngressEmitterWorker) drain(timeout time.Duration) {
-	if len(w.ch) == 0 {
-		return
-	}
-
-	ctx, cancel := context.WithTimeout(context.Background(), timeout)
-	defer cancel()
-
-	w.lggr.Infof("draining %d buffered events (domain=%s, entity=%s)", len(w.ch), w.domain, w.entity)
-
-	for len(w.ch) > 0 {
-		if ctx.Err() != nil {
-			remaining := len(w.ch)
-			if remaining > 0 {
-				w.lggr.Warnf("drain timeout exceeded, dropping %d remaining events (domain=%s, entity=%s)",
-					remaining, w.domain, w.entity)
-				w.metrics.eventsDropped.Add(context.Background(), int64(remaining), w.metricAttrs)
-			}
-			return
-		}
-
-		batch := w.buildBatch()
-		if batch == nil || len(batch.Events) == 0 {
-			break
-		}
-
-		batchSize := int64(len(batch.Events))
-		sendCtx, sendCancel := context.WithTimeout(ctx, w.sendTimeout)
-		_, err := w.client.PublishBatch(sendCtx, batch)
-		sendCancel()
-
-		if err != nil {
-			w.lggr.Warnf("drain PublishBatch failed, dropping %d events (domain=%s, entity=%s): %v",
-				len(batch.Events), w.domain, w.entity, err)
-			w.metrics.eventsDropped.Add(context.Background(), batchSize, w.metricAttrs)
-			continue
-		}
-
-		w.metrics.eventsDrained.Add(context.Background(), batchSize, w.metricAttrs)
-	}
-}
-
-// logBufferFullWithExpBackoff logs at 1, 2, 4, 8, 16, 32, 64, 100, 200, 300, ...
-// to avoid flooding logs when the buffer is persistently full.
-// dropCount is intentionally racy with Emit's Store(0) — this only affects log frequency, not correctness.
-func (w *chipIngressEmitterWorker) logBufferFullWithExpBackoff(payload emitterPayload) {
-	w.metrics.eventsDropped.Add(context.Background(), 1, w.metricAttrs)
-	count := w.dropCount.Add(1)
-	if count > 0 && (count%100 == 0 || count&(count-1) == 0) {
-		w.lggr.Warnf("chip ingress emitter buffer full, dropping event (domain=%s, entity=%s, droppedCount=%d)",
-			payload.domain, payload.entity, count)
-	}
-}
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 3f371799df..8bcf7c3a4b 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -46,12 +46,13 @@ type Config struct {
 
 	// Chip Ingress Batch Emitter
 	ChipIngressBatchEmitterEnabled bool          // When true, use batch emitter; when false (default), use legacy per-event emitter
-	ChipIngressBufferSize          uint          // Per-worker channel buffer size (default 100)
-	ChipIngressMaxBatchSize        uint          // Max events per PublishBatch call (default 50)
+	ChipIngressBufferSize          uint          // Per-worker message buffer size (default 1000)
+	ChipIngressMaxBatchSize        uint          // Max events per PublishBatch call (default 500)
 	ChipIngressMaxWorkers          int           // Max concurrent (domain, entity) workers (default 100)
-	ChipIngressSendInterval        time.Duration // Flush interval per worker (default 500ms when zero or unset)
-	ChipIngressSendTimeout         time.Duration // Timeout per PublishBatch call (default 5s)
-	ChipIngressDrainTimeout        time.Duration // Max time to flush remaining events on shutdown (default 5s)
+	ChipIngressSendInterval        time.Duration // Flush interval per worker (default 100ms)
+	ChipIngressSendTimeout         time.Duration // Timeout per PublishBatch call (default 3s)
+	ChipIngressDrainTimeout        time.Duration // Max time to flush remaining events on shutdown (default 10s)
+	ChipIngressMaxConcurrentSends  int           // Max concurrent PublishBatch calls per worker (default 3)
 
 	// OTel Log
 	LogExportTimeout      time.Duration
@@ -100,8 +101,9 @@ var defaultRetryConfig = RetryConfig{
 }
 
 const (
-	defaultPackageName = "beholder"
-	defaultMaxWorkers  = 100
+	defaultPackageName        = "beholder"
+	defaultMaxWorkers         = 100
+	defaultMaxConcurrentSends = 3
 )
 
 var defaultOtelAttributes = []attribute.KeyValue{
@@ -151,6 +153,7 @@ func DefaultConfig() Config {
 		ChipIngressSendInterval:        100 * time.Millisecond,
 		ChipIngressSendTimeout:         3 * time.Second,
 		ChipIngressDrainTimeout:        10 * time.Second,
+		ChipIngressMaxConcurrentSends:  defaultMaxConcurrentSends,
 		// Auth (defaults to static auth mode with TTL=0)
 		AuthHeadersTTL: 0,
 	}
diff --git a/pkg/beholder/config_test.go b/pkg/beholder/config_test.go
index 5a83c71178..c96b6187c2 100644
--- a/pkg/beholder/config_test.go
+++ b/pkg/beholder/config_test.go
@@ -67,6 +67,6 @@ func ExampleConfig() {
 	}
 	fmt.Printf("%+v\n", *config.LogRetryConfig)
 	// Output:
-	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressMaxWorkers:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressDrainTimeout:0s LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
+	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressMaxWorkers:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressDrainTimeout:0s ChipIngressMaxConcurrentSends:0 LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
 	// {InitialInterval:5s MaxInterval:30s MaxElapsedTime:1m0s}
 }
diff --git a/pkg/chipingress/batch/client.go b/pkg/chipingress/batch/client.go
index e02704b26b..af97c0684b 100644
--- a/pkg/chipingress/batch/client.go
+++ b/pkg/chipingress/batch/client.go
@@ -110,7 +110,9 @@ func (b *Client) Start(ctx context.Context) {
 // Forcibly shutdowns down after timeout if not completed.
 func (b *Client) Stop() {
 	b.shutdownOnce.Do(func() {
-		ctx, cancel := b.stopCh.CtxWithTimeout(b.shutdownTimeout)
+		// Use a standalone timeout context so the shutdown wait isn't cancelled
+		// by close(b.stopCh) below.
+		ctx, cancel := context.WithTimeout(context.Background(), b.shutdownTimeout)
 		defer cancel()
 
 		if b.cancelBatcher != nil {
diff --git a/pkg/chipingress/batch/client_test.go b/pkg/chipingress/batch/client_test.go
index 7f8c356fb3..dab751e06c 100644
--- a/pkg/chipingress/batch/client_test.go
+++ b/pkg/chipingress/batch/client_test.go
@@ -837,6 +837,11 @@ func TestStop(t *testing.T) {
 
 	t.Run("QueueMessage returns error after Stop", func(t *testing.T) {
 		mockClient := mocks.NewClient(t)
+		mockClient.
+			On("PublishBatch", mock.Anything, mock.Anything).
+			Return(&chipingress.PublishResponse{}, nil).
+			Maybe()
+
 		client, err := NewBatchClient(mockClient, WithBatchSize(10))
 		require.NoError(t, err)
 
@@ -853,7 +858,7 @@ func TestStop(t *testing.T) {
 		}, nil)
 		require.NoError(t, err)
 
-		// Stop the client
+		// Stop the client — drains any buffered messages
 		client.Stop()
 
 		// Queue message after stop - should fail

From 314b0f55bae988269b6e1ebeb753645e74be32eb Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 6 Mar 2026 16:27:39 +0200
Subject: [PATCH 17/28] Increase defaultMaxConcurrentSends 3->10

---
 pkg/beholder/chip_ingress_emitter_worker.go | 2 --
 pkg/beholder/config.go                      | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/pkg/beholder/chip_ingress_emitter_worker.go b/pkg/beholder/chip_ingress_emitter_worker.go
index 005b2c3aa5..5b964bca3c 100644
--- a/pkg/beholder/chip_ingress_emitter_worker.go
+++ b/pkg/beholder/chip_ingress_emitter_worker.go
@@ -7,8 +7,6 @@ import (
 	"github.com/smartcontractkit/chainlink-common/pkg/chipingress/batch"
 )
 
-// chipIngressEmitterWorker wraps a batch.Client for a single (domain, entity) pair.
-// The batch.Client handles buffering, periodic flushing, concurrent sends, and graceful shutdown.
 type chipIngressEmitterWorker struct {
 	batchClient *batch.Client
 	metricAttrs otelmetric.MeasurementOption
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 8bcf7c3a4b..5f6dafe51c 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -103,7 +103,7 @@ var defaultRetryConfig = RetryConfig{
 const (
 	defaultPackageName        = "beholder"
 	defaultMaxWorkers         = 100
-	defaultMaxConcurrentSends = 3
+	defaultMaxConcurrentSends = 10
 )
 
 var defaultOtelAttributes = []attribute.KeyValue{

From 689ff493d78515d37c3c37cf4fa429df43ff88d6 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 6 Mar 2026 16:55:04 +0200
Subject: [PATCH 18/28] Use application logger

---
 pkg/beholder/client.go      | 11 ++++-------
 pkg/beholder/config.go      |  5 ++++-
 pkg/beholder/config_test.go |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index 9a5fdf9305..454bc89d54 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -8,7 +8,7 @@ import (
 	"io"
 	"time"
 
-	ccllogger "github.com/smartcontractkit/chainlink-common/pkg/logger"
+
 	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
@@ -226,13 +226,10 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 
 		var chipIngressEmitter Emitter
 		if cfg.ChipIngressBatchEmitterEnabled {
-			// TODO: accept a logger from the caller instead of creating a new root logger,
-			// so batch emitter logs respect the node's logging configuration.
-			lggr, lErr := ccllogger.New()
-			if lErr != nil {
-				return nil, fmt.Errorf("failed to create logger for chip ingress batch emitter: %w", lErr)
+			if cfg.ChipIngressLogger == nil {
+				return nil, fmt.Errorf("ChipIngressLogger is required when ChipIngressBatchEmitterEnabled is true")
 			}
-			batchEmitterService, err = NewChipIngressBatchEmitter(chipIngressClient, cfg, lggr)
+			batchEmitterService, err = NewChipIngressBatchEmitter(chipIngressClient, cfg, cfg.ChipIngressLogger)
 			if err != nil {
 				return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
 			}
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index 5f6dafe51c..bfce03c7cf 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -7,6 +7,8 @@ import (
 	"go.opentelemetry.io/otel/sdk/metric"
 	"go.opentelemetry.io/otel/sdk/trace"
 	"go.uber.org/zap/zapcore"
+
+	"github.com/smartcontractkit/chainlink-common/pkg/logger"
 )
 
 type Config struct {
@@ -52,7 +54,8 @@ type Config struct {
 	ChipIngressSendInterval        time.Duration // Flush interval per worker (default 100ms)
 	ChipIngressSendTimeout         time.Duration // Timeout per PublishBatch call (default 3s)
 	ChipIngressDrainTimeout        time.Duration // Max time to flush remaining events on shutdown (default 10s)
-	ChipIngressMaxConcurrentSends  int           // Max concurrent PublishBatch calls per worker (default 3)
+	ChipIngressMaxConcurrentSends  int           // Max concurrent PublishBatch calls per worker (default 10)
+	ChipIngressLogger              logger.Logger // Required when ChipIngressBatchEmitterEnabled is true
 
 	// OTel Log
 	LogExportTimeout      time.Duration
diff --git a/pkg/beholder/config_test.go b/pkg/beholder/config_test.go
index c96b6187c2..50410d0a39 100644
--- a/pkg/beholder/config_test.go
+++ b/pkg/beholder/config_test.go
@@ -67,6 +67,6 @@ func ExampleConfig() {
 	}
 	fmt.Printf("%+v\n", *config.LogRetryConfig)
 	// Output:
-	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressMaxWorkers:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressDrainTimeout:0s ChipIngressMaxConcurrentSends:0 LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
+	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressMaxWorkers:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressDrainTimeout:0s ChipIngressMaxConcurrentSends:0 ChipIngressLogger:<nil> LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
 	// {InitialInterval:5s MaxInterval:30s MaxElapsedTime:1m0s}
 }

From 86ca238054ad5b521abe2079d256fabd12b94faa Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 6 Mar 2026 16:58:04 +0200
Subject: [PATCH 19/28] Small fixes

---
 pkg/beholder/chip_ingress_batch_emitter.go | 8 ++++++--
 pkg/beholder/client.go                     | 1 -
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index 17b8479431..b9e280120a 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -7,6 +7,7 @@ import (
 	"time"
 
 	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/attribute"
 	otelmetric "go.opentelemetry.io/otel/metric"
 
 	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
@@ -210,8 +211,11 @@ func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chi
 	}
 
 	if len(e.workers) >= e.maxWorkers {
-		e.eng.Warnf("chip ingress batch emitter: max workers (%d) reached, dropping event for %s", e.maxWorkers, workerKey)
-		e.metrics.eventsDropped.Add(context.Background(), 1)
+		e.eng.Warnf("chip ingress batch emitter: max workers (%d) reached, dropping event for %s/%s", e.maxWorkers, domain, entity)
+		e.metrics.eventsDropped.Add(context.Background(), 1, otelmetric.WithAttributeSet(attribute.NewSet(
+			attribute.String("domain", domain),
+			attribute.String("entity", entity),
+		)))
 		return nil
 	}
 
diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index 454bc89d54..04ef19910e 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -8,7 +8,6 @@ import (
 	"io"
 	"time"
 
-
 	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"

From ac45d121befe7324db5a6f80f8523e90e75a011d Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 6 Mar 2026 23:24:58 +0200
Subject: [PATCH 20/28] Amend comment

---
 pkg/beholder/chip_ingress_batch_emitter.go | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index b9e280120a..68b30e9192 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -109,10 +109,9 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	return e, nil
 }
 
-// Emit extracts (domain, entity) from the attributes, routes the event to the
-// appropriate per-(domain, entity) worker's batch.Client, and returns immediately.
-// If the worker's buffer is full, the event is dropped and a metric is bumped.
-// Returns an error if the emitter has been closed or the caller's context is cancelled.
+// Emit queues an event for batched delivery. It returns immediately without blocking.
+// If the worker's buffer is full, the event is silently dropped (metric bumped).
+// Returns an error only if the emitter is closed or the context is cancelled.
 func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
 	return e.emitInternal(ctx, body, nil, attrKVs...)
 }

From ffad1edfd7f83892b81fd8a8547a70bb21b9ce93 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Mon, 9 Mar 2026 18:54:40 +0200
Subject: [PATCH 21/28] Add missing logger

---
 pkg/loop/server.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/loop/server.go b/pkg/loop/server.go
index b36bc30a04..28472ac2a3 100644
--- a/pkg/loop/server.go
+++ b/pkg/loop/server.go
@@ -174,6 +174,7 @@ func (s *Server) start(opts ...ServerOpt) error {
 			ChipIngressEmitterGRPCEndpoint: s.EnvConfig.ChipIngressEndpoint,
 			ChipIngressInsecureConnection:  s.EnvConfig.ChipIngressInsecureConnection,
 			ChipIngressBatchEmitterEnabled: s.EnvConfig.ChipIngressBatchEmitterEnabled,
+			ChipIngressLogger:              s.Logger,
 			MetricCompressor:               s.EnvConfig.TelemetryMetricCompressor,
 		}
 

From 71db03e805503655f63d69d2df3b2ad6073af166 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Thu, 12 Mar 2026 16:03:00 +0200
Subject: [PATCH 22/28] Fix regression

---
 pkg/beholder/client.go                   |  2 +-
 pkg/beholder/dual_source_emitter.go      | 36 ++++++++++---
 pkg/beholder/dual_source_emitter_test.go | 64 ++++++++++++++++++++++--
 3 files changed, 89 insertions(+), 13 deletions(-)

diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index 04ef19910e..83f61c3a0e 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -243,7 +243,7 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 			}
 		}
 
-		emitter, err = NewDualSourceEmitter(chipIngressEmitter, emitter)
+		emitter, err = NewDualSourceEmitter(chipIngressEmitter, emitter, cfg.ChipIngressBatchEmitterEnabled)
 		if err != nil {
 			if batchEmitterService != nil {
 				_ = batchEmitterService.Close()
diff --git a/pkg/beholder/dual_source_emitter.go b/pkg/beholder/dual_source_emitter.go
index 0501e5c529..7d4cdc7748 100644
--- a/pkg/beholder/dual_source_emitter.go
+++ b/pkg/beholder/dual_source_emitter.go
@@ -7,6 +7,7 @@ import (
 	"sync/atomic"
 
 	"github.com/smartcontractkit/chainlink-common/pkg/logger"
+	"github.com/smartcontractkit/chainlink-common/pkg/services"
 )
 
 // DualSourceEmitter emits both to chip ingress and to the otel collector
@@ -17,10 +18,13 @@ type DualSourceEmitter struct {
 	chipIngressEmitter   Emitter
 	otelCollectorEmitter Emitter
 	log                  logger.Logger
+	nonBlockingEmitter   bool
+	stopCh               services.StopChan
+	wg                   services.WaitGroup
 	closed               atomic.Bool
 }
 
-func NewDualSourceEmitter(chipIngressEmitter Emitter, otelCollectorEmitter Emitter) (Emitter, error) {
+func NewDualSourceEmitter(chipIngressEmitter Emitter, otelCollectorEmitter Emitter, nonBlockingChipIngress bool) (Emitter, error) {
 
 	if chipIngressEmitter == nil {
 		return nil, fmt.Errorf("chip ingress emitter is nil")
@@ -39,6 +43,8 @@ func NewDualSourceEmitter(chipIngressEmitter Emitter, otelCollectorEmitter Emitt
 		chipIngressEmitter:   chipIngressEmitter,
 		otelCollectorEmitter: otelCollectorEmitter,
 		log:                  logger,
+		nonBlockingEmitter:   nonBlockingChipIngress,
+		stopCh:               make(services.StopChan),
 	}, nil
 }
 
@@ -46,20 +52,36 @@ func (d *DualSourceEmitter) Close() error {
 	if wasClosed := d.closed.Swap(true); wasClosed {
 		return errors.New("already closed")
 	}
+	close(d.stopCh)
+	d.wg.Wait()
 	return errors.Join(d.chipIngressEmitter.Close(), d.otelCollectorEmitter.Close())
 }
 
 func (d *DualSourceEmitter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
-
-	// Emit via OTLP first
 	if err := d.otelCollectorEmitter.Emit(ctx, body, attrKVs...); err != nil {
 		return err
 	}
 
-	// Emit via chip ingress. When backed by ChipIngressBatchEmitter this is
-	// non-blocking (just a channel send), so no goroutine wrapper is needed.
-	if err := d.chipIngressEmitter.Emit(ctx, body, attrKVs...); err != nil {
-		d.log.Infof("failed to emit to chip ingress: %v", err)
+	if d.nonBlockingEmitter {
+		if err := d.chipIngressEmitter.Emit(ctx, body, attrKVs...); err != nil {
+			d.log.Infof("failed to emit to chip ingress: %v", err)
+		}
+	} else {
+		// Legacy ChipIngressEmitter.Emit is a synchronous gRPC call;
+		// fire-and-forget via goroutine to avoid blocking the caller.
+		if err := d.wg.TryAdd(1); err != nil {
+			return err
+		}
+		go func(ctx context.Context) {
+			defer d.wg.Done()
+			var cancel context.CancelFunc
+			ctx, cancel = d.stopCh.Ctx(ctx)
+			defer cancel()
+
+			if err := d.chipIngressEmitter.Emit(ctx, body, attrKVs...); err != nil {
+				d.log.Infof("failed to emit to chip ingress: %v", err)
+			}
+		}(context.WithoutCancel(ctx))
 	}
 
 	return nil
diff --git a/pkg/beholder/dual_source_emitter_test.go b/pkg/beholder/dual_source_emitter_test.go
index 3429ab1648..314d82348f 100644
--- a/pkg/beholder/dual_source_emitter_test.go
+++ b/pkg/beholder/dual_source_emitter_test.go
@@ -3,7 +3,9 @@ package beholder_test
 import (
 	"context"
 	"fmt"
+	"sync/atomic"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -18,7 +20,7 @@ func TestNewDualSourceEmitter(t *testing.T) {
 		chipEmitter := &mockEmitter{}
 		otelEmitter := &mockEmitter{}
 
-		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, otelEmitter)
+		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, otelEmitter, false)
 		require.NoError(t, err)
 
 		assert.NotNil(t, emitter)
@@ -29,7 +31,7 @@ func TestNewDualSourceEmitter(t *testing.T) {
 	t.Run("nil chip ingress emitter", func(t *testing.T) {
 
 		otelEmitter := &mockEmitter{}
-		emitter, err := beholder.NewDualSourceEmitter(nil, otelEmitter)
+		emitter, err := beholder.NewDualSourceEmitter(nil, otelEmitter, false)
 
 		assert.Error(t, err)
 		assert.Nil(t, emitter)
@@ -39,7 +41,7 @@ func TestNewDualSourceEmitter(t *testing.T) {
 	t.Run("nil otel collector emitter", func(t *testing.T) {
 
 		chipEmitter := &mockEmitter{}
-		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, nil)
+		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, nil, false)
 
 		assert.Error(t, err)
 		assert.Nil(t, emitter)
@@ -51,7 +53,7 @@ func TestDualSourceEmitterEmit(t *testing.T) {
 		chipEmitter := &mockEmitter{}
 		otelEmitter := &mockEmitter{}
 
-		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, otelEmitter)
+		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, otelEmitter, false)
 		require.NoError(t, err)
 
 		err = emitter.Emit(t.Context(), []byte("test message"), "key", "value")
@@ -67,7 +69,7 @@ func TestDualSourceEmitterEmit(t *testing.T) {
 			},
 		}
 
-		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, otelEmitter)
+		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, otelEmitter, false)
 		require.NoError(t, err)
 
 		err = emitter.Emit(t.Context(), []byte("test message"))
@@ -76,6 +78,58 @@ func TestDualSourceEmitterEmit(t *testing.T) {
 	})
 }
 
+func TestDualSourceEmitterBlockingBehavior(t *testing.T) {
+	t.Run("legacy mode does not block caller", func(t *testing.T) {
+		var chipCalled atomic.Bool
+		chipEmitter := &mockEmitter{
+			emitFunc: func(ctx context.Context, body []byte, attrKVs ...any) error {
+				time.Sleep(200 * time.Millisecond)
+				chipCalled.Store(true)
+				return nil
+			},
+		}
+		otelEmitter := &mockEmitter{}
+
+		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, otelEmitter, false)
+		require.NoError(t, err)
+
+		start := time.Now()
+		err = emitter.Emit(t.Context(), []byte("test"))
+		elapsed := time.Since(start)
+
+		assert.NoError(t, err)
+		assert.Less(t, elapsed, 100*time.Millisecond,
+			"Emit should return immediately; chip ingress runs in a goroutine")
+		assert.False(t, chipCalled.Load(),
+			"chip ingress emit should still be in-flight when Emit returns")
+
+		require.NoError(t, emitter.Close())
+		assert.True(t, chipCalled.Load(),
+			"Close should wait for the in-flight chip ingress emit to finish")
+	})
+
+	t.Run("non-blocking mode emits inline", func(t *testing.T) {
+		var chipCalled atomic.Bool
+		chipEmitter := &mockEmitter{
+			emitFunc: func(ctx context.Context, body []byte, attrKVs ...any) error {
+				chipCalled.Store(true)
+				return nil
+			},
+		}
+		otelEmitter := &mockEmitter{}
+
+		emitter, err := beholder.NewDualSourceEmitter(chipEmitter, otelEmitter, true)
+		require.NoError(t, err)
+
+		err = emitter.Emit(t.Context(), []byte("test"))
+		assert.NoError(t, err)
+		assert.True(t, chipCalled.Load(),
+			"chip ingress emit should complete before Emit returns")
+
+		require.NoError(t, emitter.Close())
+	})
+}
+
 // Mock emitter for testing
 type mockEmitter struct {
 	emitFunc func(ctx context.Context, body []byte, attrKVs ...any) error

From a66147b8a5696885bab32a0d015034b0a6153a23 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Fri, 13 Mar 2026 00:06:32 +0200
Subject: [PATCH 23/28] Switch to single client

---
 pkg/beholder/chip_ingress_batch_emitter.go    | 169 ++--
 .../chip_ingress_batch_emitter_test.go        | 782 +++---------------
 pkg/beholder/chip_ingress_emitter_worker.go   |  27 -
 pkg/beholder/config.go                        |   9 +-
 pkg/beholder/config_test.go                   |   2 +-
 5 files changed, 172 insertions(+), 817 deletions(-)
 delete mode 100644 pkg/beholder/chip_ingress_emitter_worker.go

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index 68b30e9192..2c28c11513 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -16,30 +16,16 @@ import (
 	"github.com/smartcontractkit/chainlink-common/pkg/services"
 )
 
-// ChipIngressBatchEmitter buffers events per (domain, entity) and flushes them
-// via chipingress.Client.PublishBatch on a periodic interval.
-// Each (domain, entity) pair gets its own batch.Client, providing per-entity
-// isolation and independent concurrency scaling.
-// It satisfies the Emitter interface so it can be used as a drop-in replacement
-// for ChipIngressEmitter.
+// ChipIngressBatchEmitter batches events and sends them via chipingress.Client.PublishBatch.
+// It implements the Emitter interface.
 type ChipIngressBatchEmitter struct {
 	services.Service
 	eng *services.Engine
 
-	client chipingress.Client
+	batchClient *batch.Client
 
-	workers      map[string]*chipIngressEmitterWorker
-	workersMutex sync.RWMutex
-
-	bufferSize         int
-	maxBatchSize       int
-	maxWorkers         int
-	maxConcurrentSends int
-	sendInterval       time.Duration
-	sendTimeout        time.Duration
-	drainTimeout       time.Duration
-
-	metrics batchEmitterMetrics
+	metricAttrsCache sync.Map // map[string]otelmetric.MeasurementOption
+	metrics          batchEmitterMetrics
 }
 
 type batchEmitterMetrics struct {
@@ -48,7 +34,6 @@ type batchEmitterMetrics struct {
 }
 
 // NewChipIngressBatchEmitter creates a batch emitter backed by the given chipingress client.
-// Call Start() to begin health monitoring, and Close() to stop all workers.
 func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logger.Logger) (*ChipIngressBatchEmitter, error) {
 	if client == nil {
 		return nil, fmt.Errorf("chip ingress client is nil")
@@ -62,10 +47,6 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 	if maxBatchSize == 0 {
 		maxBatchSize = 500
 	}
-	maxWorkers := cfg.ChipIngressMaxWorkers
-	if maxWorkers == 0 {
-		maxWorkers = defaultMaxWorkers
-	}
 	maxConcurrentSends := cfg.ChipIngressMaxConcurrentSends
 	if maxConcurrentSends == 0 {
 		maxConcurrentSends = defaultMaxConcurrentSends
@@ -89,51 +70,49 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 		return nil, fmt.Errorf("failed to create batch emitter metrics: %w", err)
 	}
 
+	batchClient, err := batch.NewBatchClient(client,
+		batch.WithBatchSize(maxBatchSize),
+		batch.WithMessageBuffer(bufferSize),
+		batch.WithBatchInterval(sendInterval),
+		batch.WithMaxPublishTimeout(sendTimeout),
+		batch.WithShutdownTimeout(drainTimeout),
+		batch.WithMaxConcurrentSends(maxConcurrentSends),
+		batch.WithEventClone(false),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create batch client: %w", err)
+	}
+
 	e := &ChipIngressBatchEmitter{
-		client:             client,
-		workers:            make(map[string]*chipIngressEmitterWorker),
-		bufferSize:         bufferSize,
-		maxBatchSize:       maxBatchSize,
-		maxWorkers:         maxWorkers,
-		maxConcurrentSends: maxConcurrentSends,
-		sendInterval:       sendInterval,
-		sendTimeout:        sendTimeout,
-		drainTimeout:       drainTimeout,
-		metrics:            metrics,
+		batchClient: batchClient,
+		metrics:     metrics,
 	}
 
 	e.Service, e.eng = services.Config{
 		Name: "ChipIngressBatchEmitter",
 	}.NewServiceEngine(lggr)
 
+	e.eng.Go(func(ctx context.Context) {
+		batchClient.Start(ctx)
+		<-ctx.Done()
+		batchClient.Stop()
+	})
+
 	return e, nil
 }
 
-// Emit queues an event for batched delivery. It returns immediately without blocking.
-// If the worker's buffer is full, the event is silently dropped (metric bumped).
-// Returns an error only if the emitter is closed or the context is cancelled.
+// Emit queues an event for batched delivery without blocking.
+// Returns an error if the emitter is stopped or the context is cancelled.
+// If the buffer is full, the event is silently dropped.
 func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
 	return e.emitInternal(ctx, body, nil, attrKVs...)
 }
 
-// EmitWithCallback works like Emit but accepts a callback that is invoked once
-// the event's fate is determined: nil on successful PublishBatch delivery, or a
-// non-nil error on send failure or buffer-full drop.
-//
-// Contract:
-//   - If EmitWithCallback returns a non-nil error, the callback will NOT be invoked.
-//     The caller should handle the error from the return value.
-//   - If EmitWithCallback returns nil, the callback is guaranteed to be invoked
-//     exactly once — either asynchronously (after the batch is sent) or
-//     synchronously (if the event was dropped at enqueue time).
+// EmitWithCallback works like Emit but invokes callback once the event's fate
+// is determined (nil on success, non-nil on failure or buffer-full drop).
 //
-// Callers can use this for "synchronous" emission:
-//
-//	done := make(chan error, 1)
-//	if err := emitter.EmitWithCallback(ctx, body, func(err error) { done <- err }, attrKVs...); err != nil {
-//	    return err // callback will not fire
-//	}
-//	err := <-done // safe — callback will fire exactly once
+// If EmitWithCallback returns a non-nil error, the callback will NOT be invoked.
+// If it returns nil, the callback is guaranteed to fire exactly once.
 func (e *ChipIngressBatchEmitter) EmitWithCallback(ctx context.Context, body []byte, callback func(error), attrKVs ...any) error {
 	return e.emitInternal(ctx, body, callback, attrKVs...)
 }
@@ -147,14 +126,6 @@ func (e *ChipIngressBatchEmitter) emitInternal(ctx context.Context, body []byte,
 
 		attributes := newAttributes(attrKVs...)
 
-		worker := e.findOrCreateWorker(domain, entity)
-		if worker == nil {
-			if callback != nil {
-				callback(fmt.Errorf("max workers reached, event dropped"))
-			}
-			return nil
-		}
-
 		event, err := chipingress.NewEvent(domain, entity, body, attributes)
 		if err != nil {
 			return fmt.Errorf("failed to create CloudEvent: %w", err)
@@ -168,18 +139,20 @@ func (e *ChipIngressBatchEmitter) emitInternal(ctx context.Context, body []byte,
 			return err
 		}
 
-		queueErr := worker.batchClient.QueueMessage(eventPb, func(sendErr error) {
+		metricAttrs := e.metricAttrsFor(domain, entity)
+
+		queueErr := e.batchClient.QueueMessage(eventPb, func(sendErr error) {
 			if sendErr != nil {
-				e.metrics.eventsDropped.Add(context.Background(), 1, worker.metricAttrs)
+				e.metrics.eventsDropped.Add(context.Background(), 1, metricAttrs)
 			} else {
-				e.metrics.eventsSent.Add(context.Background(), 1, worker.metricAttrs)
+				e.metrics.eventsSent.Add(context.Background(), 1, metricAttrs)
 			}
 			if callback != nil {
 				callback(sendErr)
 			}
 		})
 		if queueErr != nil {
-			e.metrics.eventsDropped.Add(context.Background(), 1, worker.metricAttrs)
+			e.metrics.eventsDropped.Add(context.Background(), 1, metricAttrs)
 			if callback != nil {
 				callback(queueErr)
 			}
@@ -189,59 +162,17 @@ func (e *ChipIngressBatchEmitter) emitInternal(ctx context.Context, body []byte,
 	})
 }
 
-// findOrCreateWorker returns the worker for the given (domain, entity) pair,
-// creating one backed by a new batch.Client if it doesn't exist.
-func (e *ChipIngressBatchEmitter) findOrCreateWorker(domain, entity string) *chipIngressEmitterWorker {
-	workerKey := domain + "\x00" + entity
-
-	e.workersMutex.RLock()
-	worker, found := e.workers[workerKey]
-	e.workersMutex.RUnlock()
-
-	if found {
-		return worker
-	}
-
-	e.workersMutex.Lock()
-	defer e.workersMutex.Unlock()
-
-	if worker, found = e.workers[workerKey]; found {
-		return worker
-	}
-
-	if len(e.workers) >= e.maxWorkers {
-		e.eng.Warnf("chip ingress batch emitter: max workers (%d) reached, dropping event for %s/%s", e.maxWorkers, domain, entity)
-		e.metrics.eventsDropped.Add(context.Background(), 1, otelmetric.WithAttributeSet(attribute.NewSet(
-			attribute.String("domain", domain),
-			attribute.String("entity", entity),
-		)))
-		return nil
-	}
-
-	batchClient, err := batch.NewBatchClient(e.client,
-		batch.WithBatchSize(e.maxBatchSize),
-		batch.WithMessageBuffer(e.bufferSize),
-		batch.WithBatchInterval(e.sendInterval),
-		batch.WithMaxPublishTimeout(e.sendTimeout),
-		batch.WithShutdownTimeout(e.drainTimeout),
-		batch.WithMaxConcurrentSends(e.maxConcurrentSends),
-		batch.WithEventClone(false),
-	)
-	if err != nil {
-		e.eng.Errorf("chip ingress batch emitter: failed to create batch client for %s: %v", workerKey, err)
-		return nil
-	}
-
-	worker = newChipIngressEmitterWorker(batchClient, domain, entity)
-
-	e.eng.Go(func(ctx context.Context) {
-		worker.batchClient.Start(ctx)
-		<-ctx.Done()
-		worker.batchClient.Stop()
-	})
-
-	e.workers[workerKey] = worker
-	return worker
+func (e *ChipIngressBatchEmitter) metricAttrsFor(domain, entity string) otelmetric.MeasurementOption {
+	key := domain + "\x00" + entity
+	if v, ok := e.metricAttrsCache.Load(key); ok {
+		return v.(otelmetric.MeasurementOption)
+	}
+	attrs := otelmetric.WithAttributeSet(attribute.NewSet(
+		attribute.String("domain", domain),
+		attribute.String("entity", entity),
+	))
+	v, _ := e.metricAttrsCache.LoadOrStore(key, attrs)
+	return v.(otelmetric.MeasurementOption)
 }
 
 func newBatchEmitterMetrics(meter otelmetric.Meter) (batchEmitterMetrics, error) {
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_test.go
index 0e51b808e6..45cd6a2dfe 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_test.go
@@ -62,35 +62,6 @@ func TestChipIngressBatchEmitter_Emit(t *testing.T) {
 		assert.Error(t, err)
 	})
 
-	t.Run("enqueues and does not call PublishBatch immediately", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Return(nil, nil).
-			Maybe()
-
-		cfg := newTestConfig()
-		cfg.ChipIngressSendInterval = 10 * time.Second
-		cfg.ChipIngressMaxBatchSize = 100
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-		defer emitter.Close() //nolint:errcheck
-
-		err = emitter.Emit(t.Context(), []byte("body"),
-			beholder.AttrKeyDomain, "test-domain",
-			beholder.AttrKeyEntity, "test-entity",
-		)
-		require.NoError(t, err)
-
-		// With a large batch size and long interval, PublishBatch should not fire yet
-		time.Sleep(100 * time.Millisecond)
-		clientMock.AssertNotCalled(t, "PublishBatch", mock.Anything, mock.Anything)
-	})
-}
-
-func TestChipIngressBatchEmitter_BatchAssembly(t *testing.T) {
 	t.Run("events are batched and sent via PublishBatch", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
 
@@ -121,7 +92,6 @@ func TestChipIngressBatchEmitter_BatchAssembly(t *testing.T) {
 			require.NoError(t, err)
 		}
 
-		// Wait for flush to occur
 		assert.Eventually(t, func() bool {
 			mu.Lock()
 			defer mu.Unlock()
@@ -141,651 +111,173 @@ func TestChipIngressBatchEmitter_BatchAssembly(t *testing.T) {
 	})
 }
 
-func TestChipIngressBatchEmitter_MaxBatchSize(t *testing.T) {
-	t.Run("batch is capped at maxBatchSize", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		var batchSizes []int
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				batch := args.Get(1).(*chipingress.CloudEventBatch)
-				batchSizes = append(batchSizes, len(batch.Events))
-			}).
-			Return(nil, nil)
-
-		cfg := newTestConfig()
-		cfg.ChipIngressBufferSize = 20
-		cfg.ChipIngressMaxBatchSize = 3
-		cfg.ChipIngressSendInterval = 50 * time.Millisecond
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		for i := 0; i < 7; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
+func TestChipIngressBatchEmitter_CloudEventFormat(t *testing.T) {
+	clientMock := mocks.NewClient(t)
 
-		// Wait for all events to be flushed
-		assert.Eventually(t, func() bool {
+	var mu sync.Mutex
+	var receivedBatch *chipingress.CloudEventBatch
+	clientMock.
+		On("PublishBatch", mock.Anything, mock.Anything).
+		Run(func(args mock.Arguments) {
 			mu.Lock()
 			defer mu.Unlock()
-			total := 0
-			for _, s := range batchSizes {
-				total += s
-			}
-			return total >= 7
-		}, 2*time.Second, 10*time.Millisecond)
+			receivedBatch = args.Get(1).(*chipingress.CloudEventBatch)
+		}).
+		Return(nil, nil)
 
-		require.NoError(t, emitter.Close())
+	cfg := newTestConfig()
+	cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		mu.Lock()
-		defer mu.Unlock()
-		for _, size := range batchSizes {
-			assert.LessOrEqual(t, size, 3, "batch size should not exceed maxBatchSize")
-		}
-	})
-}
-
-func TestChipIngressBatchEmitter_PerDomainEntityIsolation(t *testing.T) {
-	t.Run("separate workers for different domain/entity pairs", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		domainEntitySeen := make(map[string]int)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				batch := args.Get(1).(*chipingress.CloudEventBatch)
-				for _, event := range batch.Events {
-					key := event.Source + "/" + event.Type
-					domainEntitySeen[key] += 1
-				}
-			}).
-			Return(nil, nil)
-
-		cfg := newTestConfig()
-		cfg.ChipIngressSendInterval = 50 * time.Millisecond
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		// Emit events for two different domain/entity pairs
-		for i := 0; i < 3; i++ {
-			err = emitter.Emit(t.Context(), []byte("workflow"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "WorkflowEvent",
-			)
-			require.NoError(t, err)
-		}
-		for i := 0; i < 2; i++ {
-			err = emitter.Emit(t.Context(), []byte("bridge"),
-				beholder.AttrKeyDomain, "data-feeds",
-				beholder.AttrKeyEntity, "BridgeStatus",
-			)
-			require.NoError(t, err)
-		}
-
-		// Wait for both to be flushed
-		assert.Eventually(t, func() bool {
-			mu.Lock()
-			defer mu.Unlock()
-			return domainEntitySeen["platform/WorkflowEvent"] >= 3 &&
-				domainEntitySeen["data-feeds/BridgeStatus"] >= 2
-		}, 2*time.Second, 10*time.Millisecond)
+	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+	require.NoError(t, err)
+	require.NoError(t, emitter.Start(t.Context()))
 
-		require.NoError(t, emitter.Close())
+	err = emitter.Emit(t.Context(), []byte("test-payload"),
+		beholder.AttrKeyDomain, "my-domain",
+		beholder.AttrKeyEntity, "my-entity",
+	)
+	require.NoError(t, err)
 
+	assert.Eventually(t, func() bool {
 		mu.Lock()
 		defer mu.Unlock()
-		assert.Equal(t, 3, domainEntitySeen["platform/WorkflowEvent"])
-		assert.Equal(t, 2, domainEntitySeen["data-feeds/BridgeStatus"])
-	})
-}
-
-func TestChipIngressBatchEmitter_BufferFull(t *testing.T) {
-	t.Run("events are dropped when buffer is full", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		// Block PublishBatch so the batcher's send pipeline backs up,
-		// eventually filling the message buffer channel.
-		sendBlocked := make(chan struct{})
-		firstCallSignal := make(chan struct{}, 1)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(_ mock.Arguments) {
-				select {
-				case firstCallSignal <- struct{}{}:
-				default:
-				}
-				<-sendBlocked
-			}).
-			Return(nil, nil).
-			Maybe()
-
-		cfg := newTestConfig()
-		cfg.ChipIngressBufferSize = 2
-		cfg.ChipIngressMaxBatchSize = 1
-		cfg.ChipIngressMaxConcurrentSends = 1
-		cfg.ChipIngressSendInterval = 50 * time.Millisecond
-		cfg.ChipIngressDrainTimeout = 200 * time.Millisecond
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-		defer close(sendBlocked)
-		defer emitter.Close() //nolint:errcheck
-
-		// First event triggers a send that blocks, exhausting the semaphore
-		err = emitter.Emit(t.Context(), []byte("body"),
-			beholder.AttrKeyDomain, "platform",
-			beholder.AttrKeyEntity, "TestEvent",
-		)
-		require.NoError(t, err)
-
-		// Wait for the batcher to process the event and block on PublishBatch
-		<-firstCallSignal
-		// Give batcher time to read the next event and block on the semaphore
-		time.Sleep(100 * time.Millisecond)
-
-		// Flood the buffer — Emit should never return an error
-		for i := 0; i < 10; i++ {
-			err = emitter.Emit(t.Context(), []byte("overflow"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			assert.NoError(t, err, "Emit should not return error even when dropping events")
-		}
-	})
-}
-
-func TestChipIngressBatchEmitter_Lifecycle(t *testing.T) {
-	t.Run("start and close cleanly", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Return(nil, nil).
-			Maybe()
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
-		require.NoError(t, err)
+		return receivedBatch != nil
+	}, 2*time.Second, 10*time.Millisecond)
 
-		require.NoError(t, emitter.Start(t.Context()))
+	require.NoError(t, emitter.Close())
 
-		// Emit a few events to create workers
-		for i := 0; i < 3; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
+	mu.Lock()
+	defer mu.Unlock()
+	require.Len(t, receivedBatch.Events, 1)
 
-		// Close should not hang or error
-		require.NoError(t, emitter.Close())
-	})
-}
-
-func TestChipIngressBatchEmitter_CloudEventFormat(t *testing.T) {
-	t.Run("CloudEvents have correct source, type, and data", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		var receivedBatch *chipingress.CloudEventBatch
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				receivedBatch = args.Get(1).(*chipingress.CloudEventBatch)
-			}).
-			Return(nil, nil)
-
-		cfg := newTestConfig()
-		cfg.ChipIngressSendInterval = 50 * time.Millisecond
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		err = emitter.Emit(t.Context(), []byte("test-payload"),
-			beholder.AttrKeyDomain, "my-domain",
-			beholder.AttrKeyEntity, "my-entity",
-		)
-		require.NoError(t, err)
-
-		assert.Eventually(t, func() bool {
-			mu.Lock()
-			defer mu.Unlock()
-			return receivedBatch != nil
-		}, 2*time.Second, 10*time.Millisecond)
-
-		require.NoError(t, emitter.Close())
-
-		mu.Lock()
-		defer mu.Unlock()
-		require.Len(t, receivedBatch.Events, 1)
-
-		event := receivedBatch.Events[0]
-		assert.Equal(t, "my-domain", event.Source)
-		assert.Equal(t, "my-entity", event.Type)
-		assert.NotEmpty(t, event.Id)
-	})
+	event := receivedBatch.Events[0]
+	assert.Equal(t, "my-domain", event.Source)
+	assert.Equal(t, "my-entity", event.Type)
+	assert.NotEmpty(t, event.Id)
 }
 
 func TestChipIngressBatchEmitter_PublishBatchError(t *testing.T) {
-	t.Run("PublishBatch error is handled gracefully", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
+	clientMock := mocks.NewClient(t)
 
-		var mu sync.Mutex
-		callCount := 0
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(_ mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				callCount++
-			}).
-			Return(nil, assert.AnError)
-
-		cfg := newTestConfig()
-		cfg.ChipIngressSendInterval = 50 * time.Millisecond
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		for i := 0; i < 3; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
-
-		assert.Eventually(t, func() bool {
+	var mu sync.Mutex
+	callCount := 0
+	clientMock.
+		On("PublishBatch", mock.Anything, mock.Anything).
+		Run(func(_ mock.Arguments) {
 			mu.Lock()
 			defer mu.Unlock()
-			return callCount > 0
-		}, 2*time.Second, 10*time.Millisecond)
+			callCount++
+		}).
+		Return(nil, assert.AnError)
 
-		require.NoError(t, emitter.Close())
-	})
-}
-
-func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
-	t.Run("Emit returns context error when context is cancelled", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Return(nil, nil).
-			Maybe()
-
-		cfg := newTestConfig()
-		cfg.ChipIngressBufferSize = 1
-		cfg.ChipIngressSendInterval = 10 * time.Second
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-		defer emitter.Close() //nolint:errcheck
+	cfg := newTestConfig()
+	cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		ctx, cancel := context.WithCancel(t.Context())
-		cancel()
-
-		err = emitter.Emit(ctx, []byte("should-fail"),
-			beholder.AttrKeyDomain, "platform",
-			beholder.AttrKeyEntity, "TestEvent",
-		)
-		assert.ErrorIs(t, err, context.Canceled)
-	})
-}
-
-func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
-	t.Run("zero config uses sane defaults", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		var receivedBatch *chipingress.CloudEventBatch
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				receivedBatch = args.Get(1).(*chipingress.CloudEventBatch)
-			}).
-			Return(nil, nil)
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, beholder.Config{}, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
+	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+	require.NoError(t, err)
+	require.NoError(t, emitter.Start(t.Context()))
 
+	for i := 0; i < 3; i++ {
 		err = emitter.Emit(t.Context(), []byte("body"),
 			beholder.AttrKeyDomain, "platform",
 			beholder.AttrKeyEntity, "TestEvent",
 		)
 		require.NoError(t, err)
+	}
 
-		// Default send interval is 100ms; wait for flush
-		assert.Eventually(t, func() bool {
-			mu.Lock()
-			defer mu.Unlock()
-			return receivedBatch != nil
-		}, 3*time.Second, 50*time.Millisecond)
-
-		require.NoError(t, emitter.Close())
-
+	assert.Eventually(t, func() bool {
 		mu.Lock()
 		defer mu.Unlock()
-		require.Len(t, receivedBatch.Events, 1)
-	})
-}
-
-func TestChipIngressBatchEmitter_GracefulDrain(t *testing.T) {
-	t.Run("flushes buffered events on close", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		totalEventsSent := 0
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				batch := args.Get(1).(*chipingress.CloudEventBatch)
-				totalEventsSent += len(batch.Events)
-			}).
-			Return(nil, nil)
-
-		cfg := beholder.Config{
-			ChipIngressBufferSize:         20,
-			ChipIngressMaxBatchSize:       10,
-			ChipIngressMaxConcurrentSends: 3,
-			ChipIngressSendInterval:       1 * time.Hour,
-			ChipIngressSendTimeout:        5 * time.Second,
-			ChipIngressDrainTimeout:       5 * time.Second,
-		}
+		return callCount > 0
+	}, 2*time.Second, 10*time.Millisecond)
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		for i := 0; i < 5; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
-
-		// Give the batcher time to read events from the channel into its internal batch
-		time.Sleep(50 * time.Millisecond)
-
-		// Events are buffered but no tick has fired. Close should drain them.
-		require.NoError(t, emitter.Close())
-
-		mu.Lock()
-		defer mu.Unlock()
-		assert.Equal(t, 5, totalEventsSent, "all buffered events should be drained on close")
-	})
+	require.NoError(t, emitter.Close())
 }
 
-func TestChipIngressBatchEmitter_DrainMultipleDomains(t *testing.T) {
-	t.Run("drains events from all workers on close", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		domainEntitySent := make(map[string]int)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				batch := args.Get(1).(*chipingress.CloudEventBatch)
-				for _, event := range batch.Events {
-					key := event.Source + "/" + event.Type
-					domainEntitySent[key] += 1
-				}
-			}).
-			Return(nil, nil)
-
-		cfg := beholder.Config{
-			ChipIngressBufferSize:         20,
-			ChipIngressMaxBatchSize:       10,
-			ChipIngressMaxConcurrentSends: 3,
-			ChipIngressSendInterval:       1 * time.Hour,
-			ChipIngressSendTimeout:        5 * time.Second,
-			ChipIngressDrainTimeout:       5 * time.Second,
-		}
+func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
+	clientMock := mocks.NewClient(t)
+	clientMock.
+		On("PublishBatch", mock.Anything, mock.Anything).
+		Return(nil, nil).
+		Maybe()
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
+	cfg := newTestConfig()
+	cfg.ChipIngressBufferSize = 1
+	cfg.ChipIngressSendInterval = 10 * time.Second
 
-		for i := 0; i < 3; i++ {
-			err = emitter.Emit(t.Context(), []byte("workflow"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "WorkflowEvent",
-			)
-			require.NoError(t, err)
-		}
-		for i := 0; i < 2; i++ {
-			err = emitter.Emit(t.Context(), []byte("bridge"),
-				beholder.AttrKeyDomain, "data-feeds",
-				beholder.AttrKeyEntity, "BridgeStatus",
-			)
-			require.NoError(t, err)
-		}
+	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+	require.NoError(t, err)
+	require.NoError(t, emitter.Start(t.Context()))
+	defer emitter.Close() //nolint:errcheck
 
-		// Give batchers time to read events from their channels
-		time.Sleep(50 * time.Millisecond)
+	ctx, cancel := context.WithCancel(t.Context())
+	cancel()
 
-		require.NoError(t, emitter.Close())
-
-		mu.Lock()
-		defer mu.Unlock()
-		assert.Equal(t, 3, domainEntitySent["platform/WorkflowEvent"])
-		assert.Equal(t, 2, domainEntitySent["data-feeds/BridgeStatus"])
-	})
+	err = emitter.Emit(ctx, []byte("should-fail"),
+		beholder.AttrKeyDomain, "platform",
+		beholder.AttrKeyEntity, "TestEvent",
+	)
+	assert.ErrorIs(t, err, context.Canceled)
 }
 
-func TestChipIngressBatchEmitter_DrainPublishBatchFailure(t *testing.T) {
-	t.Run("handles batch failure and continues sending", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		var mu sync.Mutex
-		callCount := 0
-		totalEventsSent := 0
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				callCount++
-				batch := args.Get(1).(*chipingress.CloudEventBatch)
-				if callCount == 1 {
-					return // first call fails
-				}
-				totalEventsSent += len(batch.Events)
-			}).
-			Return(nil, assert.AnError).Once()
-
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				mu.Lock()
-				defer mu.Unlock()
-				callCount++
-				batch := args.Get(1).(*chipingress.CloudEventBatch)
-				totalEventsSent += len(batch.Events)
-			}).
-			Return(nil, nil)
-
-		cfg := beholder.Config{
-			ChipIngressBufferSize:         20,
-			ChipIngressMaxBatchSize:       3,
-			ChipIngressMaxConcurrentSends: 3,
-			ChipIngressSendInterval:       1 * time.Hour,
-			ChipIngressSendTimeout:        5 * time.Second,
-			ChipIngressDrainTimeout:       5 * time.Second,
-		}
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
+func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
+	clientMock := mocks.NewClient(t)
 
-		// Emit 6 events with maxBatchSize=3 => 2 batches via size trigger
-		for i := 0; i < 6; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
+	var mu sync.Mutex
+	var receivedBatch *chipingress.CloudEventBatch
+	clientMock.
+		On("PublishBatch", mock.Anything, mock.Anything).
+		Run(func(args mock.Arguments) {
+			mu.Lock()
+			defer mu.Unlock()
+			receivedBatch = args.Get(1).(*chipingress.CloudEventBatch)
+		}).
+		Return(nil, nil)
 
-		// Give the batcher time to read events and trigger size-based sends
-		time.Sleep(100 * time.Millisecond)
+	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, beholder.Config{}, newTestLogger(t))
+	require.NoError(t, err)
+	require.NoError(t, emitter.Start(t.Context()))
 
-		require.NoError(t, emitter.Close())
+	err = emitter.Emit(t.Context(), []byte("body"),
+		beholder.AttrKeyDomain, "platform",
+		beholder.AttrKeyEntity, "TestEvent",
+	)
+	require.NoError(t, err)
 
+	assert.Eventually(t, func() bool {
 		mu.Lock()
 		defer mu.Unlock()
-		assert.GreaterOrEqual(t, callCount, 2, "should have attempted at least 2 batches")
-		assert.Equal(t, 3, totalEventsSent, "second batch should have succeeded despite first batch failure")
-	})
-}
-
-func TestChipIngressBatchEmitter_DrainTimeout(t *testing.T) {
-	t.Run("close returns promptly when drain timeout expires", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Run(func(args mock.Arguments) {
-				ctx := args.Get(0).(context.Context)
-				<-ctx.Done() // simulate a slow server that only returns on context cancellation
-			}).
-			Return(nil, context.DeadlineExceeded).
-			Maybe()
-
-		cfg := beholder.Config{
-			ChipIngressBufferSize:         20,
-			ChipIngressMaxBatchSize:       10,
-			ChipIngressMaxConcurrentSends: 3,
-			ChipIngressSendInterval:       1 * time.Hour,
-			ChipIngressSendTimeout:        10 * time.Second,
-			ChipIngressDrainTimeout:       200 * time.Millisecond,
-		}
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		for i := 0; i < 5; i++ {
-			err = emitter.Emit(t.Context(), []byte("body"),
-				beholder.AttrKeyDomain, "platform",
-				beholder.AttrKeyEntity, "TestEvent",
-			)
-			require.NoError(t, err)
-		}
-
-		// Give the batcher time to read events
-		time.Sleep(50 * time.Millisecond)
-
-		closeDone := make(chan error, 1)
-		go func() {
-			closeDone <- emitter.Close()
-		}()
-
-		select {
-		case err := <-closeDone:
-			assert.NoError(t, err, "close should not error")
-		case <-time.After(5 * time.Second):
-			t.Fatal("Close() did not return within 5s; drain timeout is not working")
-		}
-	})
-}
-
-func TestChipIngressBatchEmitter_MaxWorkersCap(t *testing.T) {
-	t.Run("drops events when max workers reached", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Return(nil, nil).
-			Maybe()
-
-		cfg := newTestConfig()
-		cfg.ChipIngressMaxWorkers = 2
-		cfg.ChipIngressSendInterval = 50 * time.Millisecond
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-		defer emitter.Close() //nolint:errcheck
-
-		// Create 2 workers (at the cap)
-		err = emitter.Emit(t.Context(), []byte("body"),
-			beholder.AttrKeyDomain, "domain-1",
-			beholder.AttrKeyEntity, "entity-1",
-		)
-		require.NoError(t, err)
+		return receivedBatch != nil
+	}, 3*time.Second, 50*time.Millisecond)
 
-		err = emitter.Emit(t.Context(), []byte("body"),
-			beholder.AttrKeyDomain, "domain-2",
-			beholder.AttrKeyEntity, "entity-2",
-		)
-		require.NoError(t, err)
+	require.NoError(t, emitter.Close())
 
-		// 3rd unique pair should be silently dropped (no error, no worker created)
-		err = emitter.Emit(t.Context(), []byte("body"),
-			beholder.AttrKeyDomain, "domain-3",
-			beholder.AttrKeyEntity, "entity-3",
-		)
-		assert.NoError(t, err, "Emit should not error when max workers is reached")
-
-		// Existing workers should still accept events
-		err = emitter.Emit(t.Context(), []byte("body"),
-			beholder.AttrKeyDomain, "domain-1",
-			beholder.AttrKeyEntity, "entity-1",
-		)
-		assert.NoError(t, err, "Emit to existing worker should still work")
-	})
+	mu.Lock()
+	defer mu.Unlock()
+	require.Len(t, receivedBatch.Events, 1)
 }
 
 func TestChipIngressBatchEmitter_EmitAfterClose(t *testing.T) {
-	t.Run("Emit after Close returns error", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Return(nil, nil).
-			Maybe()
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-		require.NoError(t, emitter.Close())
+	clientMock := mocks.NewClient(t)
+	clientMock.
+		On("PublishBatch", mock.Anything, mock.Anything).
+		Return(nil, nil).
+		Maybe()
 
-		err = emitter.Emit(t.Context(), []byte("body"),
-			beholder.AttrKeyDomain, "platform",
-			beholder.AttrKeyEntity, "TestEvent",
-		)
-		assert.Error(t, err, "Emit after Close should return an error")
-	})
+	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
+	require.NoError(t, err)
+	require.NoError(t, emitter.Start(t.Context()))
+	require.NoError(t, emitter.Close())
+
+	err = emitter.Emit(t.Context(), []byte("body"),
+		beholder.AttrKeyDomain, "platform",
+		beholder.AttrKeyEntity, "TestEvent",
+	)
+	assert.Error(t, err)
 }
 
 func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
-	t.Run("callback receives nil on successful send", func(t *testing.T) {
+	t.Run("callback receives nil on success", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
 		clientMock.
 			On("PublishBatch", mock.Anything, mock.Anything).
@@ -809,7 +301,7 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 
 		select {
 		case sendErr := <-done:
-			assert.NoError(t, sendErr, "callback should receive nil on success")
+			assert.NoError(t, sendErr)
 		case <-time.After(3 * time.Second):
 			t.Fatal("callback was not invoked within timeout")
 		}
@@ -841,7 +333,7 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 
 		select {
 		case sendErr := <-done:
-			assert.Error(t, sendErr, "callback should receive error on failure")
+			assert.Error(t, sendErr)
 		case <-time.After(3 * time.Second):
 			t.Fatal("callback was not invoked within timeout")
 		}
@@ -879,19 +371,15 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 		defer close(sendBlocked)
 		defer emitter.Close() //nolint:errcheck
 
-		// First event triggers a send that blocks, exhausting the semaphore
 		err = emitter.Emit(t.Context(), []byte("body"),
 			beholder.AttrKeyDomain, "platform",
 			beholder.AttrKeyEntity, "TestEvent",
 		)
 		require.NoError(t, err)
 
-		// Wait for PublishBatch to be called and blocked
 		<-firstCallSignal
-		// Give batcher time to read the next event and block on the semaphore
 		time.Sleep(100 * time.Millisecond)
 
-		// Flood the buffer (size 2) so it becomes full
 		for i := 0; i < 10; i++ {
 			_ = emitter.Emit(t.Context(), []byte("filler"),
 				beholder.AttrKeyDomain, "platform",
@@ -899,7 +387,6 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 			)
 		}
 
-		// Buffer is full — callback should be invoked synchronously with an error
 		dropped := make(chan error, 1)
 		err = emitter.EmitWithCallback(t.Context(), []byte("overflow"), func(sendErr error) {
 			dropped <- sendErr
@@ -907,49 +394,16 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 			beholder.AttrKeyDomain, "platform",
 			beholder.AttrKeyEntity, "TestEvent",
 		)
-		assert.NoError(t, err, "Emit should not return an error even when dropping")
+		assert.NoError(t, err)
 
 		select {
 		case dropErr := <-dropped:
-			assert.Error(t, dropErr, "callback should receive an error when buffer is full")
+			assert.Error(t, dropErr)
 		case <-time.After(time.Second):
 			t.Fatal("callback was not invoked for dropped event")
 		}
 	})
 
-	t.Run("synchronous emit pattern works end-to-end", func(t *testing.T) {
-		clientMock := mocks.NewClient(t)
-		clientMock.
-			On("PublishBatch", mock.Anything, mock.Anything).
-			Return(nil, nil)
-
-		cfg := newTestConfig()
-		cfg.ChipIngressSendInterval = 50 * time.Millisecond
-
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
-		require.NoError(t, err)
-		require.NoError(t, emitter.Start(t.Context()))
-
-		done := make(chan error, 1)
-		err = emitter.EmitWithCallback(t.Context(), []byte("sync-body"), func(sendErr error) {
-			done <- sendErr
-		},
-			beholder.AttrKeyDomain, "platform",
-			beholder.AttrKeyEntity, "TestEvent",
-		)
-		require.NoError(t, err)
-
-		// Block until the event is actually sent — the "synchronous" pattern
-		select {
-		case sendErr := <-done:
-			assert.NoError(t, sendErr)
-		case <-time.After(3 * time.Second):
-			t.Fatal("synchronous emit did not complete within timeout")
-		}
-
-		require.NoError(t, emitter.Close())
-	})
-
 	t.Run("nil callback behaves like Emit", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
 		clientMock.
diff --git a/pkg/beholder/chip_ingress_emitter_worker.go b/pkg/beholder/chip_ingress_emitter_worker.go
deleted file mode 100644
index 5b964bca3c..0000000000
--- a/pkg/beholder/chip_ingress_emitter_worker.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package beholder
-
-import (
-	"go.opentelemetry.io/otel/attribute"
-	otelmetric "go.opentelemetry.io/otel/metric"
-
-	"github.com/smartcontractkit/chainlink-common/pkg/chipingress/batch"
-)
-
-type chipIngressEmitterWorker struct {
-	batchClient *batch.Client
-	metricAttrs otelmetric.MeasurementOption
-}
-
-func newChipIngressEmitterWorker(
-	batchClient *batch.Client,
-	domain string,
-	entity string,
-) *chipIngressEmitterWorker {
-	return &chipIngressEmitterWorker{
-		batchClient: batchClient,
-		metricAttrs: otelmetric.WithAttributeSet(attribute.NewSet(
-			attribute.String("domain", domain),
-			attribute.String("entity", entity),
-		)),
-	}
-}
diff --git a/pkg/beholder/config.go b/pkg/beholder/config.go
index bfce03c7cf..deeed041fd 100644
--- a/pkg/beholder/config.go
+++ b/pkg/beholder/config.go
@@ -48,13 +48,12 @@ type Config struct {
 
 	// Chip Ingress Batch Emitter
 	ChipIngressBatchEmitterEnabled bool          // When true, use batch emitter; when false (default), use legacy per-event emitter
-	ChipIngressBufferSize          uint          // Per-worker message buffer size (default 1000)
+	ChipIngressBufferSize          uint          // Message buffer size (default 1000)
 	ChipIngressMaxBatchSize        uint          // Max events per PublishBatch call (default 500)
-	ChipIngressMaxWorkers          int           // Max concurrent (domain, entity) workers (default 100)
-	ChipIngressSendInterval        time.Duration // Flush interval per worker (default 100ms)
+	ChipIngressSendInterval        time.Duration // Flush interval (default 100ms)
 	ChipIngressSendTimeout         time.Duration // Timeout per PublishBatch call (default 3s)
 	ChipIngressDrainTimeout        time.Duration // Max time to flush remaining events on shutdown (default 10s)
-	ChipIngressMaxConcurrentSends  int           // Max concurrent PublishBatch calls per worker (default 10)
+	ChipIngressMaxConcurrentSends  int           // Max concurrent PublishBatch calls (default 10)
 	ChipIngressLogger              logger.Logger // Required when ChipIngressBatchEmitterEnabled is true
 
 	// OTel Log
@@ -105,7 +104,6 @@ var defaultRetryConfig = RetryConfig{
 
 const (
 	defaultPackageName        = "beholder"
-	defaultMaxWorkers         = 100
 	defaultMaxConcurrentSends = 10
 )
 
@@ -152,7 +150,6 @@ func DefaultConfig() Config {
 		ChipIngressBatchEmitterEnabled: false,
 		ChipIngressBufferSize:          1000,
 		ChipIngressMaxBatchSize:        500,
-		ChipIngressMaxWorkers:          defaultMaxWorkers,
 		ChipIngressSendInterval:        100 * time.Millisecond,
 		ChipIngressSendTimeout:         3 * time.Second,
 		ChipIngressDrainTimeout:        10 * time.Second,
diff --git a/pkg/beholder/config_test.go b/pkg/beholder/config_test.go
index 50410d0a39..80bef78d3a 100644
--- a/pkg/beholder/config_test.go
+++ b/pkg/beholder/config_test.go
@@ -67,6 +67,6 @@ func ExampleConfig() {
 	}
 	fmt.Printf("%+v\n", *config.LogRetryConfig)
 	// Output:
-	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressMaxWorkers:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressDrainTimeout:0s ChipIngressMaxConcurrentSends:0 ChipIngressLogger:<nil> LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
+	// {InsecureConnection:true CACertFile: OtelExporterGRPCEndpoint:localhost:4317 OtelExporterHTTPEndpoint:localhost:4318 ResourceAttributes:[{Key:package_name Value:{vtype:4 numeric:0 stringly:beholder slice:<nil>}} {Key:sender Value:{vtype:4 numeric:0 stringly:beholderclient slice:<nil>}}] EmitterExportTimeout:1s EmitterExportInterval:1s EmitterExportMaxBatchSize:512 EmitterMaxQueueSize:2048 EmitterBatchProcessor:true TraceSampleRatio:1 TraceBatchTimeout:1s TraceSpanExporter:<nil> TraceRetryConfig:<nil> TraceCompressor:gzip MetricReaderInterval:1s MetricRetryConfig:<nil> MetricViews:[] MetricCompressor:gzip ChipIngressEmitterEnabled:false ChipIngressEmitterGRPCEndpoint: ChipIngressInsecureConnection:false ChipIngressBatchEmitterEnabled:false ChipIngressBufferSize:0 ChipIngressMaxBatchSize:0 ChipIngressSendInterval:0s ChipIngressSendTimeout:0s ChipIngressDrainTimeout:0s ChipIngressMaxConcurrentSends:0 ChipIngressLogger:<nil> LogExportTimeout:1s LogExportInterval:1s LogExportMaxBatchSize:512 LogMaxQueueSize:2048 LogBatchProcessor:true LogRetryConfig:<nil> LogStreamingEnabled:false LogLevel:info LogCompressor:gzip AuthHeaders:map[] AuthHeadersTTL:0s AuthKeySigner:<nil> AuthPublicKeyHex:}
 	// {InitialInterval:5s MaxInterval:30s MaxElapsedTime:1m0s}
 }

From 16aa7091d0e4eb590e18bab89e95feb98b51142f Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Mon, 16 Mar 2026 19:49:03 +0200
Subject: [PATCH 24/28] Use caller ctx

---
 pkg/beholder/chip_ingress_batch_emitter.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter.go
index 2c28c11513..c3f21173e0 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter.go
@@ -143,16 +143,16 @@ func (e *ChipIngressBatchEmitter) emitInternal(ctx context.Context, body []byte,
 
 		queueErr := e.batchClient.QueueMessage(eventPb, func(sendErr error) {
 			if sendErr != nil {
-				e.metrics.eventsDropped.Add(context.Background(), 1, metricAttrs)
+				e.metrics.eventsDropped.Add(ctx, 1, metricAttrs)
 			} else {
-				e.metrics.eventsSent.Add(context.Background(), 1, metricAttrs)
+				e.metrics.eventsSent.Add(ctx, 1, metricAttrs)
 			}
 			if callback != nil {
 				callback(sendErr)
 			}
 		})
 		if queueErr != nil {
-			e.metrics.eventsDropped.Add(context.Background(), 1, metricAttrs)
+			e.metrics.eventsDropped.Add(ctx, 1, metricAttrs)
 			if callback != nil {
 				callback(queueErr)
 			}

From 3048466c40b88c7d351777f8023ff35bf2887bfc Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Wed, 18 Mar 2026 15:23:53 +0200
Subject: [PATCH 25/28] Add batch emitter in services

---
 pkg/beholder/client.go                 |  33 +++++---
 pkg/beholder/emit_only_adapter.go      |  18 +++++
 pkg/beholder/emit_only_adapter_test.go |  54 +++++++++++++
 pkg/beholder/httpclient.go             |   2 +-
 pkg/beholder/managed_services_test.go  | 102 +++++++++++++++++++++++++
 pkg/beholder/noop.go                   |   2 +-
 6 files changed, 198 insertions(+), 13 deletions(-)
 create mode 100644 pkg/beholder/emit_only_adapter.go
 create mode 100644 pkg/beholder/emit_only_adapter_test.go
 create mode 100644 pkg/beholder/managed_services_test.go

diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index 83f61c3a0e..2f695f758d 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -26,6 +26,7 @@ import (
 	"google.golang.org/grpc/credentials/insecure"
 
 	"github.com/smartcontractkit/chainlink-common/pkg/chipingress"
+	"github.com/smartcontractkit/chainlink-common/pkg/services"
 )
 
 const defaultGRPCCompressor = "gzip"
@@ -60,6 +61,9 @@ type Client struct {
 
 	// OnClose
 	OnClose func() error
+
+	// managedServices holds services started/stopped by the application, not by the Client.
+	managedServices []services.Service
 }
 
 // NewClient creates a new Client with initialized OpenTelemetry components
@@ -232,10 +236,7 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 			if err != nil {
 				return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
 			}
-			if err = batchEmitterService.Start(context.Background()); err != nil {
-				return nil, fmt.Errorf("failed to start chip ingress batch emitter: %w", err)
-			}
-			chipIngressEmitter = batchEmitterService
+			chipIngressEmitter = &emitOnlyAdapter{batchEmitterService}
 		} else {
 			chipIngressEmitter, err = NewChipIngressEmitter(chipIngressClient)
 			if err != nil {
@@ -245,9 +246,6 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 
 		emitter, err = NewDualSourceEmitter(chipIngressEmitter, emitter, cfg.ChipIngressBatchEmitterEnabled)
 		if err != nil {
-			if batchEmitterService != nil {
-				_ = batchEmitterService.Close()
-			}
 			return nil, fmt.Errorf("failed to create dual source emitter: %w", err)
 		}
 	}
@@ -258,12 +256,25 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 		}
 		return
 	}
-	return &Client{cfg, logger, tracer, meter, emitter, chipIngressClient, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider, signer, onClose}, nil
+	var managed []services.Service
+	if batchEmitterService != nil {
+		managed = append(managed, batchEmitterService)
+	}
+
+	return &Client{cfg, logger, tracer, meter, emitter, chipIngressClient, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider, signer, onClose, managed}, nil
+}
+
+// ManagedServices returns services whose lifecycle is owned by the application
+// (start, stop, health). Returns nil when the receiver is nil or has none.
+func (c *Client) ManagedServices() []services.Service {
+	if c == nil {
+		return nil
+	}
+	return c.managedServices
 }
 
-// Closes all providers, flushes all data and stops all background processes.
-// Order matters: emitter is closed first so the batch emitter can drain
-// buffered events while the gRPC connection is still alive.
+// Close shuts down OTel providers and the chip ingress connection.
+// It does NOT close managed services — those are closed by the application.
 func (c Client) Close() (err error) {
 	if c.Emitter != nil {
 		err = errors.Join(err, c.Emitter.Close())
diff --git a/pkg/beholder/emit_only_adapter.go b/pkg/beholder/emit_only_adapter.go
new file mode 100644
index 0000000000..3b2ac3d736
--- /dev/null
+++ b/pkg/beholder/emit_only_adapter.go
@@ -0,0 +1,18 @@
+package beholder
+
+import "context"
+
+// emitOnlyAdapter wraps a ChipIngressBatchEmitter as an Emitter but with a
+// no-op Close. This decouples emission (used by DualSourceEmitter) from
+// lifecycle management (owned by the application's service list).
+type emitOnlyAdapter struct {
+	e *ChipIngressBatchEmitter
+}
+
+func (a *emitOnlyAdapter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
+	return a.e.Emit(ctx, body, attrKVs...)
+}
+
+func (a *emitOnlyAdapter) Close() error {
+	return nil // lifecycle is managed externally
+}
diff --git a/pkg/beholder/emit_only_adapter_test.go b/pkg/beholder/emit_only_adapter_test.go
new file mode 100644
index 0000000000..c81c168cc7
--- /dev/null
+++ b/pkg/beholder/emit_only_adapter_test.go
@@ -0,0 +1,54 @@
+package beholder_test
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/smartcontractkit/chainlink-common/pkg/beholder"
+)
+
+func TestEmitOnlyAdapter_CloseIsNoOp(t *testing.T) {
+	// Create a client with batch enabled, get the emitter, and verify
+	// that closing the client (which closes the DualSourceEmitter which
+	// closes the emitOnlyAdapter) does not close the batch emitter service.
+	lggr := newTestLogger(t)
+
+	client, err := beholder.NewClient(beholder.Config{
+		OtelExporterGRPCEndpoint:       "localhost:4317",
+		ChipIngressEmitterEnabled:      true,
+		ChipIngressEmitterGRPCEndpoint: "localhost:9090",
+		ChipIngressInsecureConnection:  true,
+		ChipIngressBatchEmitterEnabled: true,
+		ChipIngressLogger:              lggr,
+		ChipIngressBufferSize:          10,
+		ChipIngressMaxBatchSize:        5,
+		ChipIngressSendInterval:        50 * time.Millisecond,
+		ChipIngressSendTimeout:         1 * time.Second,
+		ChipIngressDrainTimeout:        1 * time.Second,
+	})
+	require.NoError(t, err)
+
+	managed := client.ManagedServices()
+	require.Len(t, managed, 1)
+	svc := managed[0]
+
+	// Close the client (which closes the DualSourceEmitter → emitOnlyAdapter.Close → no-op).
+	// OTel provider flush may fail (no real endpoint) — we only care that it doesn't
+	// close the batch emitter service.
+	_ = client.Close()
+
+	// The batch emitter service should still be startable (not already closed)
+	err = svc.Start(t.Context())
+	require.NoError(t, err)
+
+	// And closable
+	err = svc.Close()
+	require.NoError(t, err)
+
+	// Second close should error (StopOnce)
+	err = svc.Close()
+	assert.Error(t, err, "second close should fail")
+}
diff --git a/pkg/beholder/httpclient.go b/pkg/beholder/httpclient.go
index 693f581452..7bea69a424 100644
--- a/pkg/beholder/httpclient.go
+++ b/pkg/beholder/httpclient.go
@@ -187,7 +187,7 @@ func NewHTTPClient(cfg Config, otlploghttpNew otlploghttpFactory) (*Client, erro
 		return
 	}
 	// HTTP client doesn't currently support rotating auth, so lazySigner is always nil
-	return &Client{cfg, logger, tracer, meter, emitter, nil, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider, nil, onClose}, nil
+	return &Client{cfg, logger, tracer, meter, emitter, nil, loggerProvider, tracerProvider, meterProvider, messageLoggerProvider, nil, onClose, nil}, nil
 }
 
 func newHTTPTracerProvider(config Config, resource *sdkresource.Resource, tlsConfig *tls.Config) (*sdktrace.TracerProvider, error) {
diff --git a/pkg/beholder/managed_services_test.go b/pkg/beholder/managed_services_test.go
new file mode 100644
index 0000000000..7feaff6e9b
--- /dev/null
+++ b/pkg/beholder/managed_services_test.go
@@ -0,0 +1,102 @@
+package beholder_test
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/smartcontractkit/chainlink-common/pkg/beholder"
+)
+
+func TestManagedServices_NilReceiver(t *testing.T) {
+	var c *beholder.Client
+	assert.Nil(t, c.ManagedServices())
+}
+
+func TestManagedServices_NoopClient(t *testing.T) {
+	c := beholder.NewNoopClient()
+	assert.Empty(t, c.ManagedServices())
+}
+
+func TestManagedServices_BatchDisabled(t *testing.T) {
+	client, err := beholder.NewClient(beholder.Config{
+		OtelExporterGRPCEndpoint:       "localhost:4317",
+		ChipIngressEmitterEnabled:      true,
+		ChipIngressEmitterGRPCEndpoint: "localhost:9090",
+		ChipIngressInsecureConnection:  true,
+		ChipIngressBatchEmitterEnabled: false,
+	})
+	require.NoError(t, err)
+	defer func() { _ = client.Close() }()
+
+	assert.Empty(t, client.ManagedServices())
+}
+
+func TestManagedServices_BatchEnabled(t *testing.T) {
+	lggr := newTestLogger(t)
+
+	client, err := beholder.NewClient(beholder.Config{
+		OtelExporterGRPCEndpoint:       "localhost:4317",
+		ChipIngressEmitterEnabled:      true,
+		ChipIngressEmitterGRPCEndpoint: "localhost:9090",
+		ChipIngressInsecureConnection:  true,
+		ChipIngressBatchEmitterEnabled: true,
+		ChipIngressLogger:              lggr,
+		ChipIngressBufferSize:          10,
+		ChipIngressMaxBatchSize:        5,
+		ChipIngressSendInterval:        50 * time.Millisecond,
+		ChipIngressSendTimeout:         1 * time.Second,
+		ChipIngressDrainTimeout:        1 * time.Second,
+	})
+	require.NoError(t, err)
+
+	managed := client.ManagedServices()
+	require.Len(t, managed, 1)
+	assert.Equal(t, "ChipIngressBatchEmitter", managed[0].Name())
+
+	// Service should be startable (not already started)
+	err = managed[0].Start(context.Background())
+	require.NoError(t, err)
+
+	err = managed[0].Close()
+	require.NoError(t, err)
+
+	// Client.Close may return OTel flush errors (no real endpoint) — that's fine.
+	// The key assertion is that closing the client does NOT close the batch emitter
+	// (emitOnlyAdapter.Close is a no-op).
+	_ = client.Close()
+}
+
+func TestManagedServices_BatchEmitterNotAutoStarted(t *testing.T) {
+	lggr := newTestLogger(t)
+
+	client, err := beholder.NewClient(beholder.Config{
+		OtelExporterGRPCEndpoint:       "localhost:4317",
+		ChipIngressEmitterEnabled:      true,
+		ChipIngressEmitterGRPCEndpoint: "localhost:9090",
+		ChipIngressInsecureConnection:  true,
+		ChipIngressBatchEmitterEnabled: true,
+		ChipIngressLogger:              lggr,
+		ChipIngressBufferSize:          10,
+		ChipIngressMaxBatchSize:        5,
+		ChipIngressSendInterval:        50 * time.Millisecond,
+		ChipIngressSendTimeout:         1 * time.Second,
+		ChipIngressDrainTimeout:        1 * time.Second,
+	})
+	require.NoError(t, err)
+
+	managed := client.ManagedServices()
+	require.Len(t, managed, 1)
+
+	// The service should not be ready yet (not started)
+	err = managed[0].Ready()
+	assert.Error(t, err, "service should not be ready before Start()")
+
+	// Start, verify ready, then close
+	require.NoError(t, managed[0].Start(context.Background()))
+	require.NoError(t, managed[0].Ready())
+	require.NoError(t, managed[0].Close())
+}
diff --git a/pkg/beholder/noop.go b/pkg/beholder/noop.go
index 67580ec15c..1ed0a0144a 100644
--- a/pkg/beholder/noop.go
+++ b/pkg/beholder/noop.go
@@ -39,7 +39,7 @@ func NewNoopClient() *Client {
 	// ChipIngress
 	chipClient := &chipingress.NoopClient{}
 
-	return &Client{cfg, logger, tracer, meter, messageEmitter, chipClient, loggerProvider, tracerProvider, meterProvider, loggerProvider, nil, noopOnClose}
+	return &Client{cfg, logger, tracer, meter, messageEmitter, chipClient, loggerProvider, tracerProvider, meterProvider, loggerProvider, nil, noopOnClose, nil}
 }
 
 // NewStdoutClient creates a new Client with exporters which send telemetry data to standard output

From 7257a19a5c701084f98558ac10a48a0d25c01d8e Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Wed, 18 Mar 2026 15:38:50 +0200
Subject: [PATCH 26/28] BatchEmitter + Service

---
 ... => chip_ingress_batch_emitter_service.go} | 20 ++++-----
 ...hip_ingress_batch_emitter_service_test.go} | 42 +++++++++----------
 pkg/beholder/client.go                        |  4 +-
 pkg/beholder/emit_only_adapter.go             |  4 +-
 pkg/beholder/managed_services_test.go         |  2 +-
 5 files changed, 36 insertions(+), 36 deletions(-)
 rename pkg/beholder/{chip_ingress_batch_emitter.go => chip_ingress_batch_emitter_service.go} (83%)
 rename pkg/beholder/{chip_ingress_batch_emitter_test.go => chip_ingress_batch_emitter_service_test.go} (84%)

diff --git a/pkg/beholder/chip_ingress_batch_emitter.go b/pkg/beholder/chip_ingress_batch_emitter_service.go
similarity index 83%
rename from pkg/beholder/chip_ingress_batch_emitter.go
rename to pkg/beholder/chip_ingress_batch_emitter_service.go
index c3f21173e0..427941e3f5 100644
--- a/pkg/beholder/chip_ingress_batch_emitter.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_service.go
@@ -16,9 +16,9 @@ import (
 	"github.com/smartcontractkit/chainlink-common/pkg/services"
 )
 
-// ChipIngressBatchEmitter batches events and sends them via chipingress.Client.PublishBatch.
+// ChipIngressBatchEmitterService batches events and sends them via chipingress.Client.PublishBatch.
 // It implements the Emitter interface.
-type ChipIngressBatchEmitter struct {
+type ChipIngressBatchEmitterService struct {
 	services.Service
 	eng *services.Engine
 
@@ -33,8 +33,8 @@ type batchEmitterMetrics struct {
 	eventsDropped otelmetric.Int64Counter
 }
 
-// NewChipIngressBatchEmitter creates a batch emitter backed by the given chipingress client.
-func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logger.Logger) (*ChipIngressBatchEmitter, error) {
+// NewChipIngressBatchEmitterService creates a batch emitter service backed by the given chipingress client.
+func NewChipIngressBatchEmitterService(client chipingress.Client, cfg Config, lggr logger.Logger) (*ChipIngressBatchEmitterService, error) {
 	if client == nil {
 		return nil, fmt.Errorf("chip ingress client is nil")
 	}
@@ -83,13 +83,13 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 		return nil, fmt.Errorf("failed to create batch client: %w", err)
 	}
 
-	e := &ChipIngressBatchEmitter{
+	e := &ChipIngressBatchEmitterService{
 		batchClient: batchClient,
 		metrics:     metrics,
 	}
 
 	e.Service, e.eng = services.Config{
-		Name: "ChipIngressBatchEmitter",
+		Name: "ChipIngressBatchEmitterService",
 	}.NewServiceEngine(lggr)
 
 	e.eng.Go(func(ctx context.Context) {
@@ -104,7 +104,7 @@ func NewChipIngressBatchEmitter(client chipingress.Client, cfg Config, lggr logg
 // Emit queues an event for batched delivery without blocking.
 // Returns an error if the emitter is stopped or the context is cancelled.
 // If the buffer is full, the event is silently dropped.
-func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
+func (e *ChipIngressBatchEmitterService) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
 	return e.emitInternal(ctx, body, nil, attrKVs...)
 }
 
@@ -113,11 +113,11 @@ func (e *ChipIngressBatchEmitter) Emit(ctx context.Context, body []byte, attrKVs
 //
 // If EmitWithCallback returns a non-nil error, the callback will NOT be invoked.
 // If it returns nil, the callback is guaranteed to fire exactly once.
-func (e *ChipIngressBatchEmitter) EmitWithCallback(ctx context.Context, body []byte, callback func(error), attrKVs ...any) error {
+func (e *ChipIngressBatchEmitterService) EmitWithCallback(ctx context.Context, body []byte, callback func(error), attrKVs ...any) error {
 	return e.emitInternal(ctx, body, callback, attrKVs...)
 }
 
-func (e *ChipIngressBatchEmitter) emitInternal(ctx context.Context, body []byte, callback func(error), attrKVs ...any) error {
+func (e *ChipIngressBatchEmitterService) emitInternal(ctx context.Context, body []byte, callback func(error), attrKVs ...any) error {
 	return e.eng.IfNotStopped(func() error {
 		domain, entity, err := ExtractSourceAndType(attrKVs...)
 		if err != nil {
@@ -162,7 +162,7 @@ func (e *ChipIngressBatchEmitter) emitInternal(ctx context.Context, body []byte,
 	})
 }
 
-func (e *ChipIngressBatchEmitter) metricAttrsFor(domain, entity string) otelmetric.MeasurementOption {
+func (e *ChipIngressBatchEmitterService) metricAttrsFor(domain, entity string) otelmetric.MeasurementOption {
 	key := domain + "\x00" + entity
 	if v, ok := e.metricAttrsCache.Load(key); ok {
 		return v.(otelmetric.MeasurementOption)
diff --git a/pkg/beholder/chip_ingress_batch_emitter_test.go b/pkg/beholder/chip_ingress_batch_emitter_service_test.go
similarity index 84%
rename from pkg/beholder/chip_ingress_batch_emitter_test.go
rename to pkg/beholder/chip_ingress_batch_emitter_service_test.go
index 45cd6a2dfe..988647a56d 100644
--- a/pkg/beholder/chip_ingress_batch_emitter_test.go
+++ b/pkg/beholder/chip_ingress_batch_emitter_service_test.go
@@ -35,25 +35,25 @@ func newTestLogger(t *testing.T) logger.Logger {
 	return lggr
 }
 
-func TestNewChipIngressBatchEmitter(t *testing.T) {
+func TestNewChipIngressBatchEmitterService(t *testing.T) {
 	t.Run("happy path", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
+		emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, newTestConfig(), newTestLogger(t))
 		require.NoError(t, err)
 		assert.NotNil(t, emitter)
 	})
 
 	t.Run("returns error when client is nil", func(t *testing.T) {
-		emitter, err := beholder.NewChipIngressBatchEmitter(nil, newTestConfig(), newTestLogger(t))
+		emitter, err := beholder.NewChipIngressBatchEmitterService(nil, newTestConfig(), newTestLogger(t))
 		assert.Error(t, err)
 		assert.Nil(t, emitter)
 	})
 }
 
-func TestChipIngressBatchEmitter_Emit(t *testing.T) {
+func TestChipIngressBatchEmitterService_Emit(t *testing.T) {
 	t.Run("returns error when domain/entity missing", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
+		emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, newTestConfig(), newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 		defer emitter.Close() //nolint:errcheck
@@ -80,7 +80,7 @@ func TestChipIngressBatchEmitter_Emit(t *testing.T) {
 		cfg := newTestConfig()
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
@@ -111,7 +111,7 @@ func TestChipIngressBatchEmitter_Emit(t *testing.T) {
 	})
 }
 
-func TestChipIngressBatchEmitter_CloudEventFormat(t *testing.T) {
+func TestChipIngressBatchEmitterService_CloudEventFormat(t *testing.T) {
 	clientMock := mocks.NewClient(t)
 
 	var mu sync.Mutex
@@ -128,7 +128,7 @@ func TestChipIngressBatchEmitter_CloudEventFormat(t *testing.T) {
 	cfg := newTestConfig()
 	cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+	emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, cfg, newTestLogger(t))
 	require.NoError(t, err)
 	require.NoError(t, emitter.Start(t.Context()))
 
@@ -156,7 +156,7 @@ func TestChipIngressBatchEmitter_CloudEventFormat(t *testing.T) {
 	assert.NotEmpty(t, event.Id)
 }
 
-func TestChipIngressBatchEmitter_PublishBatchError(t *testing.T) {
+func TestChipIngressBatchEmitterService_PublishBatchError(t *testing.T) {
 	clientMock := mocks.NewClient(t)
 
 	var mu sync.Mutex
@@ -173,7 +173,7 @@ func TestChipIngressBatchEmitter_PublishBatchError(t *testing.T) {
 	cfg := newTestConfig()
 	cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+	emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, cfg, newTestLogger(t))
 	require.NoError(t, err)
 	require.NoError(t, emitter.Start(t.Context()))
 
@@ -194,7 +194,7 @@ func TestChipIngressBatchEmitter_PublishBatchError(t *testing.T) {
 	require.NoError(t, emitter.Close())
 }
 
-func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
+func TestChipIngressBatchEmitterService_ContextCancellation(t *testing.T) {
 	clientMock := mocks.NewClient(t)
 	clientMock.
 		On("PublishBatch", mock.Anything, mock.Anything).
@@ -205,7 +205,7 @@ func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
 	cfg.ChipIngressBufferSize = 1
 	cfg.ChipIngressSendInterval = 10 * time.Second
 
-	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+	emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, cfg, newTestLogger(t))
 	require.NoError(t, err)
 	require.NoError(t, emitter.Start(t.Context()))
 	defer emitter.Close() //nolint:errcheck
@@ -220,7 +220,7 @@ func TestChipIngressBatchEmitter_ContextCancellation(t *testing.T) {
 	assert.ErrorIs(t, err, context.Canceled)
 }
 
-func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
+func TestChipIngressBatchEmitterService_DefaultConfig(t *testing.T) {
 	clientMock := mocks.NewClient(t)
 
 	var mu sync.Mutex
@@ -234,7 +234,7 @@ func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
 		}).
 		Return(nil, nil)
 
-	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, beholder.Config{}, newTestLogger(t))
+	emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, beholder.Config{}, newTestLogger(t))
 	require.NoError(t, err)
 	require.NoError(t, emitter.Start(t.Context()))
 
@@ -257,14 +257,14 @@ func TestChipIngressBatchEmitter_DefaultConfig(t *testing.T) {
 	require.Len(t, receivedBatch.Events, 1)
 }
 
-func TestChipIngressBatchEmitter_EmitAfterClose(t *testing.T) {
+func TestChipIngressBatchEmitterService_EmitAfterClose(t *testing.T) {
 	clientMock := mocks.NewClient(t)
 	clientMock.
 		On("PublishBatch", mock.Anything, mock.Anything).
 		Return(nil, nil).
 		Maybe()
 
-	emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, newTestConfig(), newTestLogger(t))
+	emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, newTestConfig(), newTestLogger(t))
 	require.NoError(t, err)
 	require.NoError(t, emitter.Start(t.Context()))
 	require.NoError(t, emitter.Close())
@@ -276,7 +276,7 @@ func TestChipIngressBatchEmitter_EmitAfterClose(t *testing.T) {
 	assert.Error(t, err)
 }
 
-func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
+func TestChipIngressBatchEmitterService_EmitWithCallback(t *testing.T) {
 	t.Run("callback receives nil on success", func(t *testing.T) {
 		clientMock := mocks.NewClient(t)
 		clientMock.
@@ -286,7 +286,7 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 		cfg := newTestConfig()
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
@@ -318,7 +318,7 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 		cfg := newTestConfig()
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
@@ -365,7 +365,7 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 		cfg.ChipIngressDrainTimeout = 200 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 		defer close(sendBlocked)
@@ -414,7 +414,7 @@ func TestChipIngressBatchEmitter_EmitWithCallback(t *testing.T) {
 		cfg := newTestConfig()
 		cfg.ChipIngressSendInterval = 50 * time.Millisecond
 
-		emitter, err := beholder.NewChipIngressBatchEmitter(clientMock, cfg, newTestLogger(t))
+		emitter, err := beholder.NewChipIngressBatchEmitterService(clientMock, cfg, newTestLogger(t))
 		require.NoError(t, err)
 		require.NoError(t, emitter.Start(t.Context()))
 
diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index 2f695f758d..c254f7a097 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -191,7 +191,7 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 	// This will eventually be removed in favor of chip-ingress emitter
 	// and logs will be sent via OTLP using the regular Logger instead of calling Emit
 	emitter := NewMessageEmitter(messageLogger)
-	var batchEmitterService *ChipIngressBatchEmitter
+	var batchEmitterService *ChipIngressBatchEmitterService
 	var chipIngressClient chipingress.Client = &chipingress.NoopClient{}
 	// if chip ingress is enabled, create dual source emitter that sends to both otel collector and chip ingress
 	// eventually we will remove the dual source emitter and just use chip ingress
@@ -232,7 +232,7 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 			if cfg.ChipIngressLogger == nil {
 				return nil, fmt.Errorf("ChipIngressLogger is required when ChipIngressBatchEmitterEnabled is true")
 			}
-			batchEmitterService, err = NewChipIngressBatchEmitter(chipIngressClient, cfg, cfg.ChipIngressLogger)
+			batchEmitterService, err = NewChipIngressBatchEmitterService(chipIngressClient, cfg, cfg.ChipIngressLogger)
 			if err != nil {
 				return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
 			}
diff --git a/pkg/beholder/emit_only_adapter.go b/pkg/beholder/emit_only_adapter.go
index 3b2ac3d736..52da40b4e6 100644
--- a/pkg/beholder/emit_only_adapter.go
+++ b/pkg/beholder/emit_only_adapter.go
@@ -2,11 +2,11 @@ package beholder
 
 import "context"
 
-// emitOnlyAdapter wraps a ChipIngressBatchEmitter as an Emitter but with a
+// emitOnlyAdapter wraps a ChipIngressBatchEmitterService as an Emitter but with a
 // no-op Close. This decouples emission (used by DualSourceEmitter) from
 // lifecycle management (owned by the application's service list).
 type emitOnlyAdapter struct {
-	e *ChipIngressBatchEmitter
+	e *ChipIngressBatchEmitterService
 }
 
 func (a *emitOnlyAdapter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
diff --git a/pkg/beholder/managed_services_test.go b/pkg/beholder/managed_services_test.go
index 7feaff6e9b..38727382fe 100644
--- a/pkg/beholder/managed_services_test.go
+++ b/pkg/beholder/managed_services_test.go
@@ -55,7 +55,7 @@ func TestManagedServices_BatchEnabled(t *testing.T) {
 
 	managed := client.ManagedServices()
 	require.Len(t, managed, 1)
-	assert.Equal(t, "ChipIngressBatchEmitter", managed[0].Name())
+	assert.Equal(t, "ChipIngressBatchEmitterService", managed[0].Name())
 
 	// Service should be startable (not already started)
 	err = managed[0].Start(context.Background())

From e82a57edc6e041db5ec273883d94bd9b819ae401 Mon Sep 17 00:00:00 2001
From: Thomas Kaliakos <thomas.kaliakos@smartcontract.com>
Date: Wed, 18 Mar 2026 22:52:45 +0200
Subject: [PATCH 27/28] remove emit_only_adapter - amend loop server

---
 pkg/beholder/client.go                 |  5 ++-
 pkg/beholder/emit_only_adapter.go      | 18 ---------
 pkg/beholder/emit_only_adapter_test.go | 54 --------------------------
 pkg/beholder/managed_services_test.go  | 11 ++++--
 pkg/loop/server.go                     | 11 ++++++
 5 files changed, 21 insertions(+), 78 deletions(-)
 delete mode 100644 pkg/beholder/emit_only_adapter.go
 delete mode 100644 pkg/beholder/emit_only_adapter_test.go

diff --git a/pkg/beholder/client.go b/pkg/beholder/client.go
index c254f7a097..aec93bf046 100644
--- a/pkg/beholder/client.go
+++ b/pkg/beholder/client.go
@@ -236,7 +236,7 @@ func NewGRPCClient(cfg Config, otlploggrpcNew otlploggrpcFactory) (*Client, erro
 			if err != nil {
 				return nil, fmt.Errorf("failed to create chip ingress batch emitter: %w", err)
 			}
-			chipIngressEmitter = &emitOnlyAdapter{batchEmitterService}
+			chipIngressEmitter = batchEmitterService
 		} else {
 			chipIngressEmitter, err = NewChipIngressEmitter(chipIngressClient)
 			if err != nil {
@@ -274,7 +274,8 @@ func (c *Client) ManagedServices() []services.Service {
 }
 
 // Close shuts down OTel providers and the chip ingress connection.
-// It does NOT close managed services — those are closed by the application.
+// The batch emitter service may already have been closed by the application;
+// the duplicate Close returns an "already stopped" error which is harmless.
 func (c Client) Close() (err error) {
 	if c.Emitter != nil {
 		err = errors.Join(err, c.Emitter.Close())
diff --git a/pkg/beholder/emit_only_adapter.go b/pkg/beholder/emit_only_adapter.go
deleted file mode 100644
index 52da40b4e6..0000000000
--- a/pkg/beholder/emit_only_adapter.go
+++ /dev/null
@@ -1,18 +0,0 @@
-package beholder
-
-import "context"
-
-// emitOnlyAdapter wraps a ChipIngressBatchEmitterService as an Emitter but with a
-// no-op Close. This decouples emission (used by DualSourceEmitter) from
-// lifecycle management (owned by the application's service list).
-type emitOnlyAdapter struct {
-	e *ChipIngressBatchEmitterService
-}
-
-func (a *emitOnlyAdapter) Emit(ctx context.Context, body []byte, attrKVs ...any) error {
-	return a.e.Emit(ctx, body, attrKVs...)
-}
-
-func (a *emitOnlyAdapter) Close() error {
-	return nil // lifecycle is managed externally
-}
diff --git a/pkg/beholder/emit_only_adapter_test.go b/pkg/beholder/emit_only_adapter_test.go
deleted file mode 100644
index c81c168cc7..0000000000
--- a/pkg/beholder/emit_only_adapter_test.go
+++ /dev/null
@@ -1,54 +0,0 @@
-package beholder_test
-
-import (
-	"testing"
-	"time"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-
-	"github.com/smartcontractkit/chainlink-common/pkg/beholder"
-)
-
-func TestEmitOnlyAdapter_CloseIsNoOp(t *testing.T) {
-	// Create a client with batch enabled, get the emitter, and verify
-	// that closing the client (which closes the DualSourceEmitter which
-	// closes the emitOnlyAdapter) does not close the batch emitter service.
-	lggr := newTestLogger(t)
-
-	client, err := beholder.NewClient(beholder.Config{
-		OtelExporterGRPCEndpoint:       "localhost:4317",
-		ChipIngressEmitterEnabled:      true,
-		ChipIngressEmitterGRPCEndpoint: "localhost:9090",
-		ChipIngressInsecureConnection:  true,
-		ChipIngressBatchEmitterEnabled: true,
-		ChipIngressLogger:              lggr,
-		ChipIngressBufferSize:          10,
-		ChipIngressMaxBatchSize:        5,
-		ChipIngressSendInterval:        50 * time.Millisecond,
-		ChipIngressSendTimeout:         1 * time.Second,
-		ChipIngressDrainTimeout:        1 * time.Second,
-	})
-	require.NoError(t, err)
-
-	managed := client.ManagedServices()
-	require.Len(t, managed, 1)
-	svc := managed[0]
-
-	// Close the client (which closes the DualSourceEmitter → emitOnlyAdapter.Close → no-op).
-	// OTel provider flush may fail (no real endpoint) — we only care that it doesn't
-	// close the batch emitter service.
-	_ = client.Close()
-
-	// The batch emitter service should still be startable (not already closed)
-	err = svc.Start(t.Context())
-	require.NoError(t, err)
-
-	// And closable
-	err = svc.Close()
-	require.NoError(t, err)
-
-	// Second close should error (StopOnce)
-	err = svc.Close()
-	assert.Error(t, err, "second close should fail")
-}
diff --git a/pkg/beholder/managed_services_test.go b/pkg/beholder/managed_services_test.go
index 38727382fe..476cd4dc44 100644
--- a/pkg/beholder/managed_services_test.go
+++ b/pkg/beholder/managed_services_test.go
@@ -8,6 +8,8 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
+	"github.com/smartcontractkit/chainlink-common/pkg/services"
+
 	"github.com/smartcontractkit/chainlink-common/pkg/beholder"
 )
 
@@ -64,10 +66,11 @@ func TestManagedServices_BatchEnabled(t *testing.T) {
 	err = managed[0].Close()
 	require.NoError(t, err)
 
-	// Client.Close may return OTel flush errors (no real endpoint) — that's fine.
-	// The key assertion is that closing the client does NOT close the batch emitter
-	// (emitOnlyAdapter.Close is a no-op).
-	_ = client.Close()
+	// Client.Close calls DualSourceEmitter.Close which calls the batch emitter's
+	// Close a second time. StopOnce returns "already stopped" — harmless.
+	err = client.Close()
+	require.Error(t, err)
+	assert.ErrorIs(t, err, services.ErrAlreadyStopped)
 }
 
 func TestManagedServices_BatchEmitterNotAutoStarted(t *testing.T) {
diff --git a/pkg/loop/server.go b/pkg/loop/server.go
index 28472ac2a3..3fa83da4c9 100644
--- a/pkg/loop/server.go
+++ b/pkg/loop/server.go
@@ -94,6 +94,7 @@ type Server struct {
 	webServer       *webServer
 	checker         *services.HealthChecker
 	LimitsFactory   limits.Factory
+	managedServices []services.Service
 }
 
 func newServer(loggerName string) (*Server, error) {
@@ -214,6 +215,13 @@ func (s *Server) start(opts ...ServerOpt) error {
 		beholder.SetClient(beholderClient)
 		beholder.SetGlobalOtelProviders()
 
+		for _, svc := range beholderClient.ManagedServices() {
+			if err := svc.Start(ctx); err != nil {
+				return fmt.Errorf("failed to start beholder managed service %s: %w", svc.Name(), err)
+			}
+			s.managedServices = append(s.managedServices, svc)
+		}
+
 		if beholderCfg.LogStreamingEnabled {
 			otelLogger, err := NewOtelLogger(beholderClient.Logger, beholderCfg.LogLevel)
 			if err != nil {
@@ -285,6 +293,9 @@ func (s *Server) Register(c services.HealthReporter) error { return s.checker.Re
 
 // Stop closes resources and flushes logs.
 func (s *Server) Stop() {
+	for i := len(s.managedServices) - 1; i >= 0; i-- {
+		s.Logger.ErrorIfFn(s.managedServices[i].Close, "Failed to close managed service")
+	}
 	if s.dbStatsReporter != nil {
 		s.dbStatsReporter.Stop()
 	}

From 2c481d0ccf65a17433ed1f2c6328141e94f327ff Mon Sep 17 00:00:00 2001
From: Pavel <177363085+pkcll@users.noreply.github.com>
Date: Wed, 25 Mar 2026 23:22:36 -0400
Subject: [PATCH 28/28] test(beholder): close writer client in global test

---
 pkg/beholder/global_test.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pkg/beholder/global_test.go b/pkg/beholder/global_test.go
index df57d0a41b..0867609f78 100644
--- a/pkg/beholder/global_test.go
+++ b/pkg/beholder/global_test.go
@@ -77,6 +77,9 @@ func TestClient_SetGlobalOtelProviders(t *testing.T) {
 	var b strings.Builder
 	client, err := beholder.NewWriterClient(&b)
 	require.NoError(t, err)
+	defer func() {
+		require.NoError(t, client.Close())
+	}()
 	// Set global Otel Client
 	beholder.SetClient(client)