diff --git a/docs/api.yaml b/docs/api.yaml index 782999a20..cbbbf824f 100644 --- a/docs/api.yaml +++ b/docs/api.yaml @@ -270,6 +270,9 @@ types: - name: contract_address type: string description: Address of the main contract on this blockchain + - name: confirmation_delay_secs + type: integer + description: Per-chain reorg-protection window in seconds; events are buffered for this duration before being committed. 0 disables the gate. See nitronode/docs/reorg-fix.md. - balance_entry: description: Balance for a specific asset diff --git a/docs/protocol/security-and-limitations.md b/docs/protocol/security-and-limitations.md index 36cbbad15..611a795a5 100644 --- a/docs/protocol/security-and-limitations.md +++ b/docs/protocol/security-and-limitations.md @@ -66,6 +66,7 @@ In the current protocol version, participants MUST trust nodes for: - **Off-chain transfer routing** — when a user sends funds off-chain to another party, the node must countersign both the sender's state (decreasing their allocation) and the receiver's credit state (increasing theirs); the on-chain contract cannot enforce atomicity between two independent channel updates. A malicious node could apply the sender's state while withholding the receiver's credit, capturing the transferred funds. Users must trust the node to faithfully execute both legs of every off-chain transfer. - **Asset-symbol equivalence** — the node operator controls which chain-specific tokens are configured under a single unified asset symbol. The protocol treats all tokens sharing a symbol as fully fungible 1:1 representations of the same asset, so off-chain credit denominated in that asset can be redeemed from any of those token inventories regardless of which one originally backed it (the validator binds unchanneled credit to the chain/token chosen at first channel creation, enforcing only that the asset symbol matches). This is intended behaviour that enables cross-chain redemption. Operators MUST therefore configure only economically equivalent (1:1 redeemable) tokens under one symbol; grouping non-equivalent tokens (e.g. a test token and production USDC) under the same symbol would let credit sourced from the cheap inventory be redeemed against the valuable one. Token equivalence cannot be verified programmatically and is an operator configuration responsibility. - **Signature validator registry** — the node operator controls which additional signature validators are registered on the ChannelHub contract. A malicious or compromised node could register a validator that approves forged user signatures, then use it to create channels or close them without the user's knowledge. A 1-day activation delay (`VALIDATOR_ACTIVATION_DELAY`) creates an observable window before any newly registered validator can be used. Users MUST monitor the `ValidatorRegistered` event on the ChannelHub contract and SHOULD revoke all ERC20 approvals granted to ChannelHub immediately upon detecting an unexpected registration. Once registered, a validator cannot be deactivated — the 1-day window is the entire response budget. Users SHOULD avoid granting large standing ERC20 approvals to ChannelHub to cap worst-case exposure. +- **Chain reorg depth** — the node credits off-chain balances after observing on-chain events. To bound reorg risk, each chain has a `confirmation_delay_secs` window before events are committed; events whose block is reorged out within that window are discarded. When the configured delay is set below the chain's hard finality time, a residual risk remains: a deeper reorg can leave the off-chain state with no on-chain backing. Operators MUST set `confirmation_delay_secs` to at least the chain's finality time when this residual exposure is unacceptable. See [Reorg-Protection Confirmation Gate](../../nitronode/docs/reorg-fix.md). Participants do not need to trust nodes for: diff --git a/nitronode/README.md b/nitronode/README.md index cb505b00b..632abad99 100644 --- a/nitronode/README.md +++ b/nitronode/README.md @@ -19,6 +19,7 @@ Nitronode is built with a modular architecture: - **RPC Server**: WebSocket-based JSON-RPC server handling client requests. - **Blockchain Listeners**: Monitors on-chain events from Nitrolite `ChannelHub` contracts across multiple chains. +- **Confirmation Gate**: Per-chain reorg-protection buffer between the listener and event handlers. Delays event delivery by `confirmation_delay_secs` so that events whose blocks are reorged out before the window elapses are dropped instead of committed. See [docs/reorg-fix.md](docs/reorg-fix.md). - **Event Handlers**: Processes blockchain events to update internal channel and user states. - **Storage Layer**: - **Database Store**: Persistent storage for channels, states, and transactions (supports SQLite and PostgreSQL). @@ -53,6 +54,7 @@ blockchains: id: 80002 contract_address: "0x9d1E88627884e066B81A02d69BCB2437a520534C" block_step: 1000 + confirmation_delay_secs: 10 # reorg-protection window; 0 disables. See docs/reorg-fix.md. - name: base_sepolia id: 84532 @@ -128,6 +130,7 @@ docker run -p 7824:7824 -e NITRONODE_SIGNER_KEY=... nitronode nitronode/ ├── api/ # JSON-RPC request handlers ├── config/ # Default configurations and migrations +├── docs/ # Component design notes (e.g. reorg-fix.md) ├── event_handlers/ # Logic for reacting to blockchain events ├── metrics/ # Prometheus telemetry implementation ├── store/ # Persistence layer (SQL and Memory) @@ -157,6 +160,7 @@ The following protocol operations are fully specified in [protocol-description.m - [Nitrolite Protocol Overview](../protocol-description.md) - [Communication Flows](../docs/communication_flows/) - [API Reference](../docs/api.yaml) +- [Reorg-Protection Confirmation Gate](docs/reorg-fix.md) ## License diff --git a/nitronode/api/node_v1/utils.go b/nitronode/api/node_v1/utils.go index 96209281c..c49919843 100644 --- a/nitronode/api/node_v1/utils.go +++ b/nitronode/api/node_v1/utils.go @@ -10,9 +10,10 @@ import ( func mapBlockchainV1(blockchain core.Blockchain) rpc.BlockchainInfoV1 { return rpc.BlockchainInfoV1{ - Name: blockchain.Name, - BlockchainID: strconv.FormatUint(blockchain.ID, 10), - ChannelHubAddress: blockchain.ChannelHubAddress, + Name: blockchain.Name, + BlockchainID: strconv.FormatUint(blockchain.ID, 10), + ChannelHubAddress: blockchain.ChannelHubAddress, + ConfirmationDelaySecs: blockchain.ConfirmationDelaySecs, } } diff --git a/nitronode/api/rpc_router.go b/nitronode/api/rpc_router.go index be748051e..69822d9c5 100644 --- a/nitronode/api/rpc_router.go +++ b/nitronode/api/rpc_router.go @@ -23,9 +23,9 @@ type RPCRouter struct { } type RPCRouterConfig struct { - NodeVersion string - MinChallenge uint32 - MaxChallenge uint32 + NodeVersion string + MinChallenge uint32 + MaxChallenge uint32 MaxParticipants int MaxSessionDataLen int MaxSessionKeyIDs int diff --git a/nitronode/config/migrations/postgres/20260608000000_add_block_hash_to_contract_events.sql b/nitronode/config/migrations/postgres/20260608000000_add_block_hash_to_contract_events.sql new file mode 100644 index 000000000..5dc48625f --- /dev/null +++ b/nitronode/config/migrations/postgres/20260608000000_add_block_hash_to_contract_events.sql @@ -0,0 +1,7 @@ +-- +goose Up + +ALTER TABLE contract_events ADD COLUMN block_hash CHAR(66) NOT NULL DEFAULT ''; + +-- +goose Down + +ALTER TABLE contract_events DROP COLUMN block_hash; diff --git a/nitronode/docs/reorg-fix.md b/nitronode/docs/reorg-fix.md new file mode 100644 index 000000000..142710aca --- /dev/null +++ b/nitronode/docs/reorg-fix.md @@ -0,0 +1,411 @@ +# Reorg Attack Fix — Confirmation Window Specification + +## 1. Risk + +The Nitronode event listener credits a user's off-chain balance the moment it observes a deposit event on-chain. If the block containing that deposit is subsequently removed from the canonical chain (a "reorganisation"), the off-chain credit persists while the on-chain deposit no longer exists. Because the credited balance can be transferred to a receiver before the node has any way to detect the reorg, the node ends up honoring an off-chain state transition that is permanently unbacked. + +The worst-case outcome is a net loss of node liquidity equal to the sum of all deposit amounts that were credited during a reorg window and successfully drained to attacker-controlled receivers before the reorg was detected. There is no recovery path for the node once a signed receiver state exists. + +This risk is meaningful on any chain where head-level reorgs occur naturally or can be induced. On modern fast-finality chains (BNB, Polygon post-Rio, Avalanche) the residual probability is very low. On Ethereum L1, depth-1 reorgs are routine and cryptoeconomic finality takes ~12.8 minutes. + +--- + +## 2. Solution Overview + +A **per-chain confirmation window** is introduced between raw event delivery and handler invocation. When the listener observes any event on chain C: + +- It does **not** invoke the handler immediately. +- It waits for `confirmation_delay_secs` seconds (configured per chain in `blockchains.yaml`). +- If no reorg of the event's block occurs during that window, the handler is invoked normally. +- If the event's block is reorged out (`removed: true` log arrives), the pending invocation is cancelled with no side effects. +- If the reorged transaction is re-included (the same event appears again), the confirmation window restarts from zero. + +The delay applies uniformly to **all** events, not only deposit-class ones. Selective gating would require the component to understand event semantics and introduce ordering hazards when events for different channels arrive interleaved — for example, a deposit event and a challenge event on separate channels could fire their handlers out of original arrival order if only the deposit is delayed. Uniform delay preserves the relative order of all events as they arrived from the chain while adding a single, predictable latency layer. + +### 2.1 Residual risk and the finality trade-off + +The confirmation window eliminates the reorg risk only when `confirmation_delay_secs` is set to or above the chain's cryptoeconomic finality time. For the representative values in §3: + +- **Ethereum at 780s (~13 min):** matches Casper FFG hard finality. Reorging past this point requires ≥1/3 of total stake to be slashed. No residual risk. +- **Polygon at 10s, BNB at 5s:** exceeds the empirical reorg tail depth. Residual risk is negligible but not cryptoeconomically eliminated. +- **Ethereum at 36s (3 blocks, "quick" finality):** P(reorg depth ≥ 4) ≈ 10⁻⁵–10⁻⁶ per event. Residual risk is real. + +When `confirmation_delay_secs` is set *below* the chain's finality time, **this specification acknowledges a residual risk**: it is possible — with low but non-zero probability — that an event passes the gate, the reactor commits it to the database, and the block containing that event is subsequently reorged out by a reorg deeper than the gate window. + +When this occurs, the committed state (balance credit, channel open) has no corresponding on-chain event in the canonical chain. If the transaction is re-mined in the new canonical block, the reactor's idempotency guard (§6.6) handles the re-delivery cleanly. If it is not re-mined, the DB retains stale state that can only be partially corrected on the next node restart via the reconciliation walk (§4.4). There is no automated rollback; the exposure scales with the deposit value and is bounded by the probability of deep reorgs on the target chain. + +Operators who cannot accept this residual exposure should set `confirmation_delay_secs` to the chain's hard-finality time (Ethereum: 780s; Polygon: `finalized` tag resolves to ~5s; L2s: `finalized` maps to L1 Casper FFG at ~13 min). The gate's detection mechanisms (§6.5, §6.6) provide observability when the residual-risk scenario occurs. + +--- + +## 3. Configuration + +A new `confirmation_delay_secs` field is added per chain in `blockchains.yaml`. Representative values: + +```yaml +chains: + - id: 1 # Ethereum mainnet + confirmation_delay_secs: 780 # ~13 min — Casper FFG hard finality + - id: 137 # Polygon PoS (post-Heimdall v2 / Rio) + confirmation_delay_secs: 10 # 5 blocks × ~2s; empirical reorg tail is sub-10s + - id: 56 # BNB Smart Chain + confirmation_delay_secs: 5 # fast-finality, ~3-4 blocks + - id: 42161 # Arbitrum One + confirmation_delay_secs: 120 # L2 `safe` tag (L1-posted batch), ~1-2 min + - id: 8453 # Base + confirmation_delay_secs: 120 # same L2 `safe` semantics +``` + +`confirmation_delay_secs: 0` disables the gate — events are processed immediately. Appropriate for BFT single-slot chains where the node operator accepts the negligible residual risk, or for chains using a finality-tag subscription rather than a block-count gate. + +--- + +## 4. Confirmation Window Behavior + +### 4.1 Normal path + +When a log `E` arrives (without `Removed: true`): + +1. Record the event in the live-entry map under `(txHash, logIndex)` with its `blockHash` as the tombstone discriminator, and append it to the FIFO drain queue with its block timestamp as `arrivedAt`. +2. The gate's drain goroutine (single shared timer per gate; see §6.3) treats the entry as eligible once `arrivedAt + confirmation_delay_secs` has elapsed. +3. When the entry matures, invoke the event handler. + +### 4.2 Reorg path + +If a log with `Removed: true` arrives for the same `(txHash, blockHash, logIndex)` before the timer fires: + +- Cancel the pending timer. +- Do not invoke the handler — no state change occurs. +- The listener remains active. When the same transaction is re-included, its event will be delivered again (without `Removed: true`) and the gate starts a fresh window under the new block's key. + +### 4.3 Out-of-order delivery + +The re-added event (no `Removed: true`, new block) may arrive at the listener before the corresponding `Removed: true` log for the old block. When this happens, the gate **replaces** the pending entry for `(txHash, logIndex)` with the new one and resets the confirmation timer under the new block's key: + +- On the non-removed re-add, overwrite `pending[(txHash, logIndex)]` with the new `blockHash` and append the new event to the queue tail with a fresh `arrivedAt`. The earlier queue entry remains in place as a tombstone — its `blockHash` no longer matches `pending`, so the drain goroutine silently skips it when it reaches the head. +- The subsequent `Removed: true` log for the OLD block carries the old `blockHash` and therefore matches neither `pending` (whose value is now the new block's hash) nor any `forwardedSet` record. It performs a no-op. + +The tombstone-map design replaces the prior slice-scan approach: every live operation is O(1), and exactly one event per `(txHash, logIndex)` is forwarded — the latest re-mining. + +- On a `Removed: true` log for a key that **has no live `pending` entry and no `forwardedSet` record**: no-op. The event either belongs to a block that was already replaced by a later re-add (handled above), or it is a stale removal from a fork the gate has no record of. + +> Repeated reorgs of the same transaction are theoretically possible but imply a chain-level consensus failure. The gate's replace/restart cycle handles each naturally; no special cap is needed. + +### 4.4 Startup and reconciliation + +#### Prerequisites + +Before the reconciliation logic described below can function, `block_hash` must be added as a column to `contract_events` and to the `core.BlockchainEvent` struct. The value is available in `types.Log.BlockHash` at the time the gate calls the reactor. Without this column, reorg detection in steps 2–4 is not possible. + +**Why `block_hash` is the minimal required addition — and why alternatives fail:** + +The reconciliation walk needs to answer one question per stored block: "is this specific block still in the canonical chain?" The definitive answer combines the stored hash with an `eth_getBlockByNumber(storedBlockNumber)` lookup — the canonical chain has exactly one block at each height, and comparing its hash to the stored hash tells us whether the stored block is still canonical. Without the stored hash, two alternatives were evaluated and both fail: + +- **`block_number` alone is insufficient.** After a reorg, a *different* block can occupy the same height. Calling `eth_getBlockByNumber(storedBlockNumber)` always returns a block — but it may be a new block from the reorged fork. Without the original hash there is no way to tell whether the block returned is the one the reactor processed. + +- **`transaction_hash` via `eth_getTransactionReceipt` is insufficient.** A block can be reorged out even if every one of its transactions was re-mined in a new block at the same height. In that case all receipt lookups return `blockNumber` matching the stored value, but the original block is gone and the stored DB state no longer corresponds to the canonical chain. Additionally, the backward walk (step 3) must traverse every stored *block* in descending order; rows in `contract_events` only exist for blocks that contained a `ChannelHub` event. A reorg that diverged entirely within a gap — blocks with no relevant events — is invisible to a tx-receipt-based walk. + +Note that `eth_getBlockByHash(storedHash)` alone is **not** suitable as the canonicality check: a node may still have the orphan side-chain header cached locally and return it successfully, so a non-null response does not prove the block is in the canonical chain. The check must use `eth_getBlockByNumber` so the response is by definition the current canonical block at that height. + +`block_hash` is a single `CHAR(66)` column. Its addition enables exact, O(1)-per-step canonicality checks and is the only approach that handles all reorg scenarios correctly. + +#### Definition: latest processed block + +The **latest processed block** for a chain is the highest block number at which the reactor successfully committed at least one event to the database — identical to the listener's existing startup cursor (`MAX(block_number)` in `contract_events` for this `blockchain_id` and contract address, computed by `GetLatestContractEventBlockNumber`). This is distinct from the highest block the listener ever *saw*: the listener may have seen many blocks that contained no relevant events and therefore left no `contract_events` rows. + +#### Reconciliation steps + +On startup, for each chain, after the `block_hash` migration has been applied: + +1. Query `contract_events` for the latest committed event: `latestBlockNum = MAX(block_number)`, `latestBlockHash = block_hash` at that row. If no rows exist, start the scan from the chain's configured genesis / start block and skip to step 5. +2. Call `eth_getBlockByNumber(latestBlockNum)` on the chain's RPC and compare the returned block's hash against `latestBlockHash`. + - **Hash matches** → the stored block is the current canonical block at that height; no reorg above it. Proceed to step 4. + - **Hash differs** → a different block now occupies that height; the stored block has been reorged out. Proceed to step 3. + - **`ethereum.NotFound`** (RPC has no canonical block at that number, e.g. the height was pruned) → treat as reorged-out and proceed to step 3 rather than failing startup. +3. **Common-ancestor walk using stored block hashes:** query `contract_events` for the next-older distinct `block_hash` (the highest `block_number` strictly below the current candidate). Repeat step 2 with this (number, hash) pair. Continue until a stored block is confirmed canonical, or until no older stored hash exists. This height is the **common ancestor**. + + > **Why walk stored hashes, not block numbers?** In normal operation most blocks contain no `ChannelHub` events, so `contract_events` has no row for them. A block-number walk would find nothing to compare at event-gap heights and could miss a reorg that occurred entirely within such a gap. Walking by stored block hashes ensures every comparison is against a block the reactor actually processed. + + If the walk exhausts stored rows without finding a canonical one **and** no older row exists (`prevNum == 0` with `prevHash == ""`), the listener resumes from the *original* latest stored block number. The orphaned hash is discarded; `eth_getLogs` is a canonical-chain range query, so canonical-replacement logs between that height and the current tip are re-fetched normally. The empty-store case (`latestNum == 0`) continues to skip historical replay and tracks the chain from the live subscription. + +4. Set the scan start to `commonAncestorBlockNum`. Events between `commonAncestorBlockNum` and `latestBlockNum` that came from the reorged fork are still present in the DB. The reactor has no rollback mechanism for those rows — the re-scan below will re-apply canonical events over them where the transaction was re-mined (idempotent), and leave the orphaned DB state in place where the transaction was not re-mined (residual risk; see §2.1). State-setting operations (`UpdateChannel`, `RefreshUserEnforcedBalance`) will overwrite with canonical values for re-mined events; rows from dropped transactions remain as stale data with no automated cleanup. +5. Start the event scan from `commonAncestorBlockNum` (or genesis if step 1 found no rows). Replayed events are routed **per-event by block age**: + - Events whose block timestamp is **older than `confirmation_delay_secs`** are routed directly to the reactor, bypassing the gate. Their block is past the reorg window — `eth_getLogs` returned them as canonical, and any reorg that could displace them would exceed the configured finality bound. There is no incremental reorg risk to guard against, and routing them through the gate would only add latency. + - Events whose block timestamp is **younger than `confirmation_delay_secs`** are routed through the gate, the same path live events take. The common-ancestor walk only confirms the *starting* block is canonical; replay can fetch logs from blocks all the way up to the current chain tip, some of which are still inside the reorg window. Forwarding those directly to the reactor would re-introduce the very double-spend window the gate was built to close. + + The `Listener` accepts two handlers (`eventHandler` for live events and recent historical events, `historicalEventHandler` for mature historical events) and makes the per-event routing decision from `eventLog.BlockTimestamp`. To guarantee that field is populated regardless of the RPC provider's behavior, the listener calls `ensureBlockTimestamp` once per event, which uses `eventLog.BlockTimestamp` when present and falls back to `HeaderByHash` otherwise (at most one fetch per block regardless of event count). + When `confirmation_delay_secs` is `0` the gate is disabled and every historical event is routed to `historicalEventHandler`. On an `ensureBlockTimestamp` failure the Listener falls back to `eventHandler` (the gate) — the conservative choice that preserves the reorg-protection invariant at the cost of a small delay. +6. The reactor is idempotent for replayed events: `HandleHomeChannelCreated` has an explicit early-return guard when the channel is already open; `HandleHomeChannelCheckpointed` and `RefreshUserEnforcedBalance` use set-semantics (not accumulation) and recompute from the latest DB state. Before opening a transaction, `HandleEvent` calls `IsContractEventProcessed`; if the event is already committed, it returns `nil` immediately with no DB transaction opened. If `IsContractEventProcessed` returns an error, `HandleEvent` returns the wrapped error; the listener unsubscribes and the process restarts (per the lifecycle closure in §6.8), re-fetching the same range via the DB cursor so the pre-check retries. For events that pass the pre-check, `StoreContractEvent` is called last inside the DB transaction and enforces a unique constraint on `(transaction_hash, log_index, blockchain_id)` as a final backstop. +7. Historical log queries (`eth_getLogs`) return only canonical chain events — there are no `Removed: true` signals during replay, and replay does not flow through the gate (step 5). Removal signals from the live WebSocket subscription that arrive during the replay phase are buffered in the listener's `currentCh` and reach the gate only after the historical replay phase completes; if they cancel a re-mined event that has already been forwarded by the live path, the post-gate reorg detection in §6.5 logs them. +8. When `confirmation_delay_secs == 0`, the listener drops `Removed:true` live logs at the Phase 2 boundary because there is no downstream gate to consume them; the reactor never receives `Removed:true` logs in either mode. + +--- + +## 5. Scope + +The delay applies to **all** events emitted by the `ChannelHub` contract on a given chain. No filtering by event type is performed inside the gate. + +> **Note:** `ChannelCreated` (`handleHomeChannelCreated`) calls `RefreshUserEnforcedBalance`. Verify whether the initial channel state carries a non-zero deposit; if it does, the uniform delay already protects it — no special casing is needed. + +--- + +## 6. Implementation Notes + +### 6.1 Component placement and wiring + +The `ConfirmationGate` is a thin in-memory component that sits between the raw log stream (`listener.go`) and the `ChannelHubReactor`. + +**Existing wiring** (`nitronode/main.go:127-129`): + +```go +reactor := evm.NewChannelHubReactor(b.ID, ...) +l := evm.NewListener(..., reactor.HandleEvent, ...) +``` + +The listener accepts a handler of type `HandleEvent func(ctx context.Context, eventLog types.Log) error`. The gate exposes the same signature and is inserted between the two: + +```go +reactor := evm.NewChannelHubReactor(b.ID, ...) +var liveHandler evm.HandleEvent +if confirmationDelay > 0 { + gate, err := evm.NewConfirmationGate(confirmationDelay, b.ID, reactor.HandleEvent, logger) + if err != nil { /* fatal */ } + gate.Start(ctx) + liveHandler = gate.HandleEvent +} else { + liveHandler = reactor.HandleEvent +} +l := evm.NewListener(..., liveHandler, reactor.HandleEvent, ...) +``` + +The constructor returns an error for `delay <= 0`; the wiring layer is responsible for skipping gate construction when the operator configured `confirmation_delay_secs: 0` and routing live events straight to the reactor. + +The reactor itself does not change. All the listener's existing logic — subscription management, cursor tracking, reconnection, historical replay — is unaffected. + +**Handling `Removed: true` logs:** currently `listener.go:289-294` skips removed logs before they reach the handler. This skip must be moved: the listener should forward removed logs to `gate.HandleEvent` (they still carry the `Removed` flag on `types.Log`), and the gate alone decides whether to cancel a pending timer or ignore the signal. The reactor never sees a `Removed: true` log. + +### 6.2 Event identity for queue keying + +The Listener delivers events in strict block order, so the FIFO queue is naturally ordered by arrival time. Two distinct keys identify events at different layers of the design: + +- **`(txHash, logIndex)` — the live-entry key, used as the tombstone-map (`pending`) key.** On a non-removed arrival, the Pusher sets `pending[ek] = eventLog.BlockHash` (overwriting any prior value) and appends to the queue tail. On a `Removed: true` arrival, the Pusher checks `pending[ek]` and cancels (deletes from `pending`) if the stored `blockHash` matches the removed log's. A stale removal for an OLD block whose `pending` value has already been overwritten by a newer re-add will not match and falls through to the `forwardedSet` lookup (§6.5). Both operations are O(1) map lookups; the queue body is never scanned. +- **`(txHash, blockHash, logIndex)` — the post-gate detection key (`forwardedKey`), used to index `forwardedSet`.** When the drain goroutine forwards an event, it inserts this triple into `forwardedSet` so a later `Removed: true` for the same exact occurrence can be matched and the post-gate reorg WARN emitted. Including `blockHash` ensures a stale removal for an already-replaced fork cannot cause a spurious WARN against a different re-mining. + +`blockHash` is excluded from the live-entry key so that a re-mining of the same tx overwrites the original `pending` value regardless of which block it landed in. `blockHash` is included in the post-gate detection key so that the WARN matches the specific occurrence that was forwarded. + +A single transaction can emit multiple events for the same `txHash` (e.g., two `ChannelDeposited` logs in a batch open). `logIndex` disambiguates these; it is unique per log within a block and is present in both the live event and its corresponding `Removed: true` log. + +`blockHash` is also used by: + +- The post-gate reorg detection map (`forwardedSet`, §6.5) — keyed by `(txHash, blockHash, logIndex)` to identify which specific occurrence was forwarded, with the FIFO `forwardedQueue` driving O(1) eviction. +- `StoreContractEvent` in the reactor — stored in `contract_events` for the reconciliation walk (§4.4). + +### 6.3 Timer-and-kick design + +**Data structure:** a FIFO queue of `(types.Log, arrivedAt time.Time)` paired with a `pending` tombstone map that is the source of truth for which queue entries are live. The queue is append-tail and pop-head only; stale entries are skipped at the head by comparing `pending[ek]` to the popped entry's `BlockHash`. Removal scans of the queue body are eliminated. + +```go +type queueEntry struct { + log types.Log + arrivedAt time.Time +} + +type eventKey struct { // used as the tombstone-map key (re-add replaces prior entry) + txHash common.Hash + logIndex uint +} + +type forwardedKey struct { // post-gate detection key (full triple, written by drain goroutine, read on Removed) + txHash common.Hash + blockHash common.Hash + logIndex uint +} + +type forwardedExpiry struct { + key forwardedKey + forwardedAt time.Time +} + +type ConfirmationGate struct { + delay time.Duration + chainID uint64 + handler HandleEvent + logger log.Logger + + mu sync.Mutex + queue []queueEntry // protected by mu + pending map[eventKey]common.Hash // live (txHash, logIndex) -> blockHash; protected by mu + forwardedSet map[forwardedKey]time.Time // protected by mu; entries are kept for a small multiple of `delay` (see §6.5) + forwardedQueue []forwardedExpiry // FIFO of (key, forwardedAt) driving O(1) eviction; protected by mu + + kick chan struct{} // buffered 1, non-blocking sender + timer *time.Timer // created in Start(ctx); reset to the head entry's deadline +} +``` + +--- + +**Pusher path** (driven by the existing Listener; implements the `HandleEvent` signature) + +Receives `types.Log` from the Listener. On each event: + +- If `Removed: true` — under `mu`: if `pending[ek] == eventLog.BlockHash`, `delete(pending, ek)` (pre-gate cancel; the tombstoned queue entry is silently skipped when it reaches the head). Otherwise, if `forwardedSet[fk]` is set, emit the post-gate WARN (§6.5) and `delete(forwardedSet, fk)`; leave the corresponding `forwardedQueue` entry in place — it expires on its own and the eviction loop's value-check makes the early delete safe. Otherwise, emit a DEBUG "removal for unknown/stale event". +- Otherwise — under `mu`: set `pending[ek] = eventLog.BlockHash` (replacing any prior value for the same `(txHash, logIndex)`) and append `(log, arrivedAt)` to the queue tail. `arrivedAt` is the block timestamp (see §6.7). Release `mu` and send a non-blocking `kick` (`select { case g.kick <- struct{}{}: default: }`). + +No expiration check, no forwarding. Push only. + +--- + +**Drain goroutine** (single, started by `Start(ctx)`) + +A single timer drives forwarding; no idle wakeups. The timer is reset to the head entry's deadline; a 1-buffered `kick` channel coalesces wakeups from the Pusher when a new head deadline is sooner than the currently-armed timer (or when the queue was empty). + +```go +for { + select { + case <-ctx.Done(): + return + case <-g.kick: + case <-g.timer.C: + } + g.drainAndReschedule() +} +``` + +`drainAndReschedule`: + +1. Under `mu`: `now := time.Now()`. While the head entry is mature (`queue[0].arrivedAt + delay <= now`): + - Pop it. + - **Tombstone check:** if `pending[ek] != entry.log.BlockHash`, the live entry for that `(txHash, logIndex)` has been replaced by a re-add. Drop silently. Do **not** touch `pending[ek]` — it refers to the *new* live entry still in the queue. + - Otherwise: `delete(pending, ek)`; `forwardedSet[fk] = now`; `forwardedQueue = append(forwardedQueue, forwardedExpiry{fk, now})`. **These three writes happen before releasing `mu`** around the handler call, so a fast `Removed: true` arriving immediately after forwarding always sees the entry and emits the post-gate WARN. + - Release `mu`, call `handler`, re-acquire. +2. Evict aged-out `forwardedSet` entries (see §6.5). +3. Reset the timer to the new head's deadline, or leave it stopped if the queue is empty (the next `kick` will recompute). + +No event handling, no Listener awareness. Drain-and-forward only. + +--- + +**Properties** + +| Property | Detail | +| --- | --- | +| Chain-agnostic | `confirmationDelay` is the only chain-specific input | +| Forward latency after window | Bounded by timer scheduling jitter; no fixed polling tick | +| Idle cost | None — no ticker; the goroutine blocks on `ctx.Done()`/`kick`/`timer.C` | +| Reorg within window | Pusher's tombstone delete cancels the entry; Reactor never sees the event | +| Reorg deeper than window | Rare; Reactor-level idempotency (§6.6) handles re-delivered events | +| Concurrency | Pusher and drain goroutine share `mu`; Reactor is called outside the lock | +| Shutdown | Drain goroutine exits on `ctx.Done()`; `defer g.timer.Stop()` cleans up the timer; entries still in queue are discarded (safe — they were never forwarded). `kick` is **not** closed — the Pusher may still be invoked by an in-flight listener event during shutdown, and the non-blocking send is safe whether the receiver is alive or gone. | + +### 6.4 Exposing `confirmation_delay_secs` via API + +Clients need to know the confirmation delay for each chain so they can display the correct waiting time to users after submitting a deposit. The best existing candidate is **`node.v1.GetConfig`**, which already returns a per-chain `BlockchainInfoV1` object. + +Files to update: + +- `pkg/rpc/types.go` — add `ConfirmationDelaySecs uint64` to `BlockchainInfoV1`. +- `nitronode/api/node_v1/utils.go` — populate the new field in `mapBlockchainV1` from the chain's loaded config. +- `pkg/core/types.go` (or wherever `core.Blockchain` is defined) — add `ConfirmationDelaySecs uint64` so the value flows from `blockchains.yaml` through config loading into the API handler. + +No new endpoint is needed. The field appears alongside existing per-chain fields (contract addresses, asset list, block time) and is read-only from the client's perspective. + +### 6.5 Post-gate reorg detection in the gate + +The `forwardedSet` membership map (paired with the `forwardedQueue` FIFO; both in the `ConfirmationGate` struct, §6.3) provides detection without any DB access. The **drain goroutine** writes to both each time it forwards an event; the **Pusher** reads `forwardedSet` when a `Removed: true` log arrives and finds no live entry in `pending`. + +When `Removed: true` arrives in the Pusher: + +- **`pending[ek] == eventLog.BlockHash`** → normal pre-gate removal; delete from `pending` and return. No log. +- **No pre-gate match, but `forwardedKey{txHash, blockHash, logIndex}` is in `forwardedSet`** → the event was already forwarded to the Reactor and its block has now been reorged out. Log at **`WARN`** with `txHash`, `blockHash`, `logIndex`, `chainID`. `delete(forwardedSet, fk)`. The corresponding `forwardedQueue` entry is left in place — it ages out on its own; the eviction loop's value-check (below) tolerates the early delete. +- **Match in neither** → log at `DEBUG` ("removal for unknown/stale event" — predates the current run or arrived after FIFO eviction). + +`forwardedSet` entries are kept for a small multiple of `delay` — long enough that any `Removed: true` for a forwarded event arrives while the entry is still present, short enough that the map remains bounded. The exact multiplier is an implementation choice (current value: see `recentMultiplier` in `confirmation_gate.go`; e.g. 2 or 3 work in practice). + +Eviction is performed in `drainAndReschedule` (the timer/kick goroutine), not in a separate sweep: + +- Pop the front of `forwardedQueue` while `now − forwardedAt > recentMultiplier × delay`. +- For each popped `forwardedExpiry{key, forwardedAt}`, **delete from `forwardedSet` only if `forwardedSet[key] == forwardedAt`**. The value check guards the rare re-forward case (same key forwarded a second time after the chain un-reorgs back to the original block and a fresh delay elapses): the older FIFO entry must not evict the newer set membership. It also makes the §6.5 early delete (post-gate WARN path) a safe no-op when the eviction loop later visits its `forwardedQueue` sibling. + +`forwardedAt` is the gate's wall-clock at forward time — not `BlockTimestamp` — so FIFO ordering is monotonic regardless of how `arrivedAt` was sourced. The map stays small because post-gate reorgs are rare and `Removed: true` arrives within one or two block-times of the reorg. No separate cleanup goroutine is required. + +### 6.6 Reactor defense-in-depth: skip re-delivered events + +When a re-added event reaches the reactor (same tx re-mined in a new block after a reorg, confirmed by a fresh gate timer), the reactor attempts to process an event it has already committed. This guard converts what is currently a DB constraint-violation error and a full transaction rollback into a clean, explicit logged exit. + +**Important limitation:** this guard identifies events by `(txHash, logIndex, blockchainID)`, where `log_index` is a **block-level** index in go-ethereum — the position of this log among all logs in the entire block, across all transactions. If a transaction is re-mined in a new block where different transactions precede it, its logs receive different block-level `log_index` values. The new `(txHash, newLogIndex, blockchainID)` tuple does not match any committed row, so `IsContractEventProcessed` returns `false` and **the reorged event passes through this check**. In that case the reactor's business-logic idempotency is the actual guard (see below). This guard therefore only catches exact re-deliveries — cases where `log_index` is unchanged. + +Add a new method to `ChannelHubReactorStore`: + +```go +// IsContractEventProcessed reports whether an event identified by +// (txHash, logIndex, blockchainID) has already been committed, +// regardless of which block it appeared in. +// NOTE: uses block-level logIndex — does not detect reorged events +// where the same tx re-mines with a different block-level log position. +IsContractEventProcessed(txHash string, logIndex uint, blockchainID uint64) (bool, error) +``` + +At the top of `HandleEvent`, before entering `useStoreInTx`, call this method. If the event is already committed, log at **`INFO`** ("skipping re-delivered event, already committed") and return `nil` immediately. No transaction is opened; no state is touched. If `IsContractEventProcessed` itself returns an error, `HandleEvent` returns the wrapped error immediately; the listener unsubscribes and the process restarts (per the lifecycle closure in §6.8). On restart, the DB cursor re-fetches the same range and the pre-check retries. + +Reorged events that pass through this check are still neutralized by the reactor's **business-logic idempotency**: + +- `HandleHomeChannelCreated` has an explicit early-return when the channel is already open. +- `HandleHomeChannelCheckpointed` and `RefreshUserEnforcedBalance` use set-semantics (overwrite, not accumulate). +- The `StoreContractEvent` unique constraint on `(transaction_hash, log_index, blockchain_id)` remains as the final backstop for the case where `log_index` happens to be unchanged. + +The value of `IsContractEventProcessed` is therefore: + +1. **Noise reduction for exact re-deliveries** — converts a constraint-violation rollback (logged as an error by the gate's drain goroutine) into a clean INFO exit with no DB transaction opened. +2. **Correctness for the reconciliation walk (§4.4)** — when the node replays already-processed historical events on startup, every re-delivered event would otherwise produce a constraint-violation error and potentially stall the walk. This pre-check makes the reconciliation path viable. + +Together, §6.5 and §6.6 produce two complementary log signals: + +| Signal | Source | Level | Meaning | +| --- | --- | --- | --- | +| "post-gate reorg detected for event X" | Gate | WARN | Committed block was reorged; residual-risk scenario is active | +| "skipping re-delivered event X" | Reactor | INFO | Same tx re-mined at same block position; reactor correctly skips it | + +If the operator sees the WARN but never the INFO, either the transaction was not re-mined, or it was re-mined at a different block position (this check did not fire; business-logic idempotency handled it silently). + +#### Reorg-safe idempotency — separate task + +To make the idempotency check itself robust to reorged events regardless of block position, the idempotency key must be stable across re-mining. The block-level `log_index` is not stable; a **tx-relative log index** is. + +The tx-relative log index is the 0-based position of a log within its own transaction's emitted logs. It is invariant: the same transaction always emits the same logs in the same order, so its tx-relative indices never change across reorgs. The EVM guarantees that all logs of a transaction arrive consecutively in ascending block-level order, so the tx-relative index can be computed in-process as: + +``` +tx_log_index = l.Index - min(l.Index for all logs of l.TxHash in this block) +``` + +No RPC call is required — the minimum is established by the first log of each transaction seen in a block, which always arrives before subsequent logs of the same transaction. + +Implementing this requires: + +- **DB migration**: add `tx_log_index` column to `contract_events`; replace the unique index `(transaction_hash, log_index, blockchain_id)` with `(transaction_hash, tx_log_index, blockchain_id)`. +- **`BlockchainEvent` struct**: add `TxLogIndex uint32` field. +- **Reactor**: maintain a small in-memory map `(blockHash, txHash) → minBlockLogIndex` to compute `tx_log_index` for each incoming event; evict entries when a new block is first seen. +- **`IsContractEventProcessed` and `StoreContractEvent`**: operate on `tx_log_index` instead of `log_index`. + +**This is a separate task.** It is not part of the current confirmation-gate scope. Until it is implemented, the reactor relies on business-logic idempotency for the reorged-different-position case, which is correct but not explicitly guarded at the storage layer. + +### 6.7 Source of `arrivedAt` and the listener's timestamp fallback + +The gate uses the **block timestamp** of each event as its `arrivedAt` reference rather than wall-clock time. This ensures that events replayed from historical blocks (whose timestamps are minutes or hours in the past) are forwarded immediately on the first drain, without waiting for the full confirmation delay to elapse again. + +#### Source of `arrivedAt` + +The gate reads `eventLog.BlockTimestamp` directly from the `types.Log` it receives. It performs no RPC, holds no timestamp cache, and depends on nothing other than the in-memory value on the log struct. The listener guarantees `BlockTimestamp` is non-zero before forwarding a non-removed event to the gate. If the gate ever observes a zero value (defense-in-depth for tests and edge cases), it falls back to `time.Now()` for that single event; the listener owns any operational warning. + +#### Reliability and fallback + +`blockTimestamp` is part of the Ethereum execution JSON-RPC spec (execution-apis `receipt.yaml`, 2024) and is populated by current Geth (≥1.13.10), Erigon, Nethermind, Reth, Besu, recent `bnb-chain/bsc`, Bor, Arbitrum Nitro, and op-geth (Base, Optimism). It is **not** populated by Avalanche C-Chain (`ava-labs/libevm` does not define the field) and is unreliable on older `bsc-dataseed` nodes still in production rotation. + +Therefore the **listener** — not the gate — owns the fallback. Before forwarding a non-removed event to the gate (or to the reactor on the historical bypass), the listener calls `ensureBlockTimestamp`, which uses `eventLog.BlockTimestamp` when present and falls back to one `HeaderByHash(blockHash)` RPC otherwise. A single-entry cache keyed on `lastBlockHash` elides repeat fetches for consecutive events from the same block, which — because the listener delivers events in block order — is the only relevant case. `Removed: true` logs skip `ensureBlockTimestamp` entirely; the gate's cancel path never reads `BlockTimestamp`. +On `HeaderByHash` failure the listener logs a WARN and forwards the event through the gate anyway, where the zero-defense fallback above degrades the entry to a wall-clock delay rather than dropping it silently. + +--- + +### 6.8 Handler error semantics + +When a downstream handler invoked after the confirmation delay returns an error, the gate's `run` goroutine returns the error and the gate's lifecycle closure (passed to `Start`) is invoked with it. In `nitronode/main.go`, that closure calls `logger.Fatal` → process exit. The supervisor restarts the process; the next `Listen` invocation re-fetches the unstored event via the DB cursor in `findCommonAncestor` + Phase 1 reconciliation, restoring the pre-PR crash-restart-replay invariant. The gate does **not** retry handler errors in-process; this is intentional and matches pre-PR behavior. Events queued behind the failed event are dropped on teardown and re-fetched after restart. The gate's lifecycle (`Start(ctx, handleClosure)`) is identical to `Listener.Listen` and `BlockchainWorker.Start`; the listener does not know that its downstream handler may fail asynchronously — error propagation is handled by the supervisor (`main.go`), where it already is for the other two components. diff --git a/nitronode/main.go b/nitronode/main.go index 1735cbe0f..64475f370 100644 --- a/nitronode/main.go +++ b/nitronode/main.go @@ -119,9 +119,32 @@ func main() { return wrapInTx(func(s database.DatabaseStore) error { return h(s) }) } - reactor := evm.NewChannelHubReactor(b.ID, bb.StateSigner.PublicKey().Address().String(), eventHandlerService, bb.MemoryStore, useCHRStoreInTx) + reactor := evm.NewChannelHubReactor(b.ID, bb.StateSigner.PublicKey().Address().String(), eventHandlerService, bb.MemoryStore, useCHRStoreInTx, bb.DbStore) reactor.SetOnEventProcessed(bb.RuntimeMetrics.IncBlockchainEvent) - l := evm.NewListener(common.HexToAddress(b.ChannelHubAddress), client, b.ID, b.BlockStep, logger, reactor.HandleEvent, bb.DbStore) + + confirmationDelay := time.Duration(b.ConfirmationDelaySecs) * time.Second + var liveHandler evm.HandleEvent + if confirmationDelay > 0 { + gate, err := evm.NewConfirmationGate(confirmationDelay, b.ID, reactor.HandleEvent, logger) + if err != nil { + logger.Fatal("failed to create confirmation gate", "error", err, "blockchainID", b.ID) + } + gate.Start(blockchainCtx, func(err error) { + if err != nil { + logger.Fatal("confirmation gate stopped", "error", err, "blockchainID", b.ID) + } + }) + liveHandler = gate.HandleEvent + } else { + liveHandler = reactor.HandleEvent + } + + // Live events flow through the confirmation gate (when delay > 0) or directly to the + // reactor (when delay == 0). Historical events from eth_getLogs are routed per-event + // based on block age: events older than confirmationDelay go directly to the reactor + // (past the reorg window); recent events still flow through the live handler because + // their blocks may still be reorged. + l := evm.NewListener(common.HexToAddress(b.ChannelHubAddress), client, b.ID, b.BlockStep, confirmationDelay, logger, liveHandler, reactor.HandleEvent, bb.DbStore) l.Listen(blockchainCtx, func(err error) { if err != nil { logger.Fatal("blockchain listener stopped", "error", err, "blockchainID", b.ID) diff --git a/nitronode/store/database/contract_event.go b/nitronode/store/database/contract_event.go index cdc5af2db..5c6b8fa64 100644 --- a/nitronode/store/database/contract_event.go +++ b/nitronode/store/database/contract_event.go @@ -17,6 +17,7 @@ type ContractEvent struct { BlockchainID uint64 `gorm:"column:blockchain_id"` Name string `gorm:"column:name"` BlockNumber uint64 `gorm:"column:block_number"` + BlockHash string `gorm:"column:block_hash"` TransactionHash string `gorm:"column:transaction_hash"` LogIndex uint32 `gorm:"column:log_index"` CreatedAt time.Time `gorm:"column:created_at"` @@ -34,6 +35,7 @@ func (s *DBStore) StoreContractEvent(ev core.BlockchainEvent) error { BlockchainID: ev.BlockchainID, Name: ev.Name, BlockNumber: ev.BlockNumber, + BlockHash: ev.BlockHash, TransactionHash: strings.ToLower(ev.TransactionHash), LogIndex: ev.LogIndex, CreatedAt: time.Now(), @@ -55,11 +57,12 @@ func (s *DBStore) GetLatestContractEventBlockNumber(contractAddress string, bloc return blockNumber, nil } -// IsContractEventPresent checks whether a specific contract event has already been stored. -func (s *DBStore) IsContractEventPresent(blockchainID, blockNumber uint64, txHash string, logIndex uint32) (bool, error) { +// IsContractEventProcessed reports whether an event identified by (txHash, logIndex, blockchainID) +// has already been committed, regardless of which block it appeared in. +func (s *DBStore) IsContractEventProcessed(txHash string, logIndex uint32, blockchainID uint64) (bool, error) { var ev ContractEvent - err := s.db.Where("blockchain_id = ? AND block_number = ? AND transaction_hash = ? AND log_index = ?", - blockchainID, blockNumber, strings.ToLower(txHash), logIndex). + err := s.db.Where("transaction_hash = ? AND log_index = ? AND blockchain_id = ?", + strings.ToLower(txHash), logIndex, blockchainID). Take(&ev).Error if errors.Is(err, gorm.ErrRecordNotFound) { return false, nil @@ -69,3 +72,37 @@ func (s *DBStore) IsContractEventPresent(blockchainID, blockNumber uint64, txHas } return true, nil } + +// GetLatestContractEventBlockHashAndNumber returns the block_number and block_hash of the +// highest stored event for the given contract. Returns (0, "", nil) when no rows exist. +func (s *DBStore) GetLatestContractEventBlockHashAndNumber(contractAddress string, blockchainID uint64) (uint64, string, error) { + var ev ContractEvent + err := s.db.Where("blockchain_id = ? AND contract_address = ?", blockchainID, strings.ToLower(contractAddress)). + Order("block_number DESC"). + First(&ev).Error + if errors.Is(err, gorm.ErrRecordNotFound) { + return 0, "", nil + } + if err != nil { + return 0, "", err + } + return ev.BlockNumber, ev.BlockHash, nil +} + +// GetPreviousDistinctBlockHash returns the block_number and block_hash of the highest +// stored event whose block_number is strictly below belowBlockNumber. Returns (0, "", nil) +// when no such row exists (signals genesis fallback). +func (s *DBStore) GetPreviousDistinctBlockHash(contractAddress string, blockchainID uint64, belowBlockNumber uint64) (uint64, string, error) { + var ev ContractEvent + err := s.db.Where("blockchain_id = ? AND contract_address = ? AND block_number < ?", + blockchainID, strings.ToLower(contractAddress), belowBlockNumber). + Order("block_number DESC"). + First(&ev).Error + if errors.Is(err, gorm.ErrRecordNotFound) { + return 0, "", nil + } + if err != nil { + return 0, "", err + } + return ev.BlockNumber, ev.BlockHash, nil +} diff --git a/nitronode/store/database/contract_event_test.go b/nitronode/store/database/contract_event_test.go index 11a28ab02..1e14e5127 100644 --- a/nitronode/store/database/contract_event_test.go +++ b/nitronode/store/database/contract_event_test.go @@ -82,7 +82,7 @@ func TestGetLatestContractEventBlockNumber(t *testing.T) { }) } -func TestIsContractEventPresent(t *testing.T) { +func TestIsContractEventProcessed(t *testing.T) { db, cleanup := SetupTestDB(t) defer cleanup() @@ -100,38 +100,32 @@ func TestIsContractEventPresent(t *testing.T) { require.NoError(t, store.StoreContractEvent(ev)) t.Run("existing event returns true", func(t *testing.T) { - present, err := store.IsContractEventPresent(1, 500, ev.TransactionHash, 3) + present, err := store.IsContractEventProcessed(ev.TransactionHash, 3, 1) require.NoError(t, err) assert.True(t, present) }) t.Run("case-insensitive txHash match", func(t *testing.T) { // Query with uppercase — stored value was lowercased by StoreContractEvent - present, err := store.IsContractEventPresent(1, 500, "0xABCDEF1234567890ABCDEF1234567890ABCDEF1234567890ABCDEF1234567890", 3) + present, err := store.IsContractEventProcessed("0xABCDEF1234567890ABCDEF1234567890ABCDEF1234567890ABCDEF1234567890", 3, 1) require.NoError(t, err) assert.True(t, present) }) - t.Run("wrong block number returns false", func(t *testing.T) { - present, err := store.IsContractEventPresent(1, 501, ev.TransactionHash, 3) - require.NoError(t, err) - assert.False(t, present) - }) - t.Run("wrong log index returns false", func(t *testing.T) { - present, err := store.IsContractEventPresent(1, 500, ev.TransactionHash, 4) + present, err := store.IsContractEventProcessed(ev.TransactionHash, 4, 1) require.NoError(t, err) assert.False(t, present) }) t.Run("wrong blockchain returns false", func(t *testing.T) { - present, err := store.IsContractEventPresent(2, 500, ev.TransactionHash, 3) + present, err := store.IsContractEventProcessed(ev.TransactionHash, 3, 2) require.NoError(t, err) assert.False(t, present) }) t.Run("wrong txHash returns false", func(t *testing.T) { - present, err := store.IsContractEventPresent(1, 500, "0x0000000000000000000000000000000000000000000000000000000000000000", 3) + present, err := store.IsContractEventProcessed("0x0000000000000000000000000000000000000000000000000000000000000000", 3, 1) require.NoError(t, err) assert.False(t, present) }) diff --git a/nitronode/store/database/interface.go b/nitronode/store/database/interface.go index 80099383d..b832e1c05 100644 --- a/nitronode/store/database/interface.go +++ b/nitronode/store/database/interface.go @@ -294,6 +294,18 @@ type DatabaseStore interface { // GetLatestContractEventBlockNumber returns the highest block number for a given contract. GetLatestContractEventBlockNumber(contractAddress string, blockchainID uint64) (lastBlock uint64, err error) - // IsContractEventPresent checks if a specific contract event has already been stored. - IsContractEventPresent(blockchainID, blockNumber uint64, txHash string, logIndex uint32) (isPresent bool, err error) + // IsContractEventProcessed reports whether an event identified by (txHash, logIndex, blockchainID) + // has already been committed, regardless of which block it appeared in. + // NOTE: uses block-level logIndex — does not detect reorged events where the same tx + // re-mines with a different block-level log position (see nitronode/docs/reorg-fix.md §6.6). + IsContractEventProcessed(txHash string, logIndex uint32, blockchainID uint64) (bool, error) + + // GetLatestContractEventBlockHashAndNumber returns the block_number and block_hash of + // the highest stored event for the given contract. Returns (0, "", nil) when no rows exist. + GetLatestContractEventBlockHashAndNumber(contractAddress string, blockchainID uint64) (blockNumber uint64, blockHash string, err error) + + // GetPreviousDistinctBlockHash returns the block_number and block_hash of the highest + // stored event with block_number strictly below belowBlockNumber. Returns (0, "", nil) + // when no such row exists. + GetPreviousDistinctBlockHash(contractAddress string, blockchainID uint64, belowBlockNumber uint64) (blockNumber uint64, blockHash string, err error) } diff --git a/nitronode/store/memory/blockchain_config.go b/nitronode/store/memory/blockchain_config.go index 4b0a85035..c968c9a9e 100644 --- a/nitronode/store/memory/blockchain_config.go +++ b/nitronode/store/memory/blockchain_config.go @@ -43,6 +43,9 @@ type BlockchainConfig struct { ChannelHubAddress string `yaml:"channel_hub_address"` // ChannelHubSigValidators maps validator IDs to the addresses of signature validators for the ChannelHub contract on this blockchain ChannelHubSigValidators map[uint8]string `yaml:"channel_hub_sig_validators"` + // ConfirmationDelaySecs is the number of seconds to wait before processing an event. + // Set to 0 to process events immediately (disables the confirmation gate). + ConfirmationDelaySecs uint32 `yaml:"confirmation_delay_secs"` } // LoadEnabledBlockchains loads and validates blockchain configurations from a YAML file. diff --git a/nitronode/store/memory/memory_store.go b/nitronode/store/memory/memory_store.go index 02d813b42..1ee224beb 100644 --- a/nitronode/store/memory/memory_store.go +++ b/nitronode/store/memory/memory_store.go @@ -33,10 +33,11 @@ func NewMemoryStoreV1(assetsConfig AssetsConfig, blockchainsConfig map[uint64]Bl } blockchains = append(blockchains, core.Blockchain{ - ID: bc.ID, - Name: bc.Name, - ChannelHubAddress: bc.ChannelHubAddress, - BlockStep: bc.BlockStep, + ID: bc.ID, + Name: bc.Name, + ChannelHubAddress: bc.ChannelHubAddress, + BlockStep: bc.BlockStep, + ConfirmationDelaySecs: bc.ConfirmationDelaySecs, }) } slices.SortFunc(blockchains, func(a, b core.Blockchain) int { diff --git a/pkg/blockchain/evm/channel_hub_reactor.go b/pkg/blockchain/evm/channel_hub_reactor.go index 1ec7aaa4d..2ff230a85 100644 --- a/pkg/blockchain/evm/channel_hub_reactor.go +++ b/pkg/blockchain/evm/channel_hub_reactor.go @@ -112,6 +112,12 @@ type ChannelHubReactorStore interface { // StoreContractEvent persists a blockchain event to the database. StoreContractEvent(ev core.BlockchainEvent) error + + // IsContractEventProcessed reports whether an event identified by (txHash, logIndex, blockchainID) + // has already been committed, regardless of which block it appeared in. + // NOTE: uses block-level logIndex — does not detect reorged events where the same tx + // re-mines with a different block-level log position (see nitronode/docs/reorg-fix.md §6.6). + IsContractEventProcessed(txHash string, logIndex uint32, blockchainID uint64) (bool, error) } var channelHubAbi *abi.ABI @@ -148,16 +154,18 @@ type ChannelHubReactor struct { nodeAddress string eventHandler core.ChannelHubEventHandler assetStore AssetStore + store ChannelHubReactorStore // non-transactional; used for the pre-check in HandleEvent useStoreInTx ChannelHubReactorStoreTxProvider onEventProcessed func(blockchainID uint64, success bool) } -func NewChannelHubReactor(blockchainID uint64, nodeAddress string, eventHandler core.ChannelHubEventHandler, assetStore AssetStore, useStoreInTx ChannelHubReactorStoreTxProvider) *ChannelHubReactor { +func NewChannelHubReactor(blockchainID uint64, nodeAddress string, eventHandler core.ChannelHubEventHandler, assetStore AssetStore, useStoreInTx ChannelHubReactorStoreTxProvider, store ChannelHubReactorStore) *ChannelHubReactor { return &ChannelHubReactor{ blockchainID: blockchainID, nodeAddress: nodeAddress, eventHandler: eventHandler, assetStore: assetStore, + store: store, useStoreInTx: useStoreInTx, } } @@ -167,7 +175,13 @@ func (r *ChannelHubReactor) SetOnEventProcessed(fn func(blockchainID uint64, suc r.onEventProcessed = fn } -func (r *ChannelHubReactor) HandleEvent(ctx context.Context, l types.Log) error { +func (r *ChannelHubReactor) HandleEvent(ctx context.Context, l types.Log) (err error) { + defer func() { + if r.onEventProcessed != nil { + r.onEventProcessed(r.blockchainID, err == nil) + } + }() + logger := log.FromContext(ctx) eventID := l.Topics[0] @@ -178,7 +192,22 @@ func (r *ChannelHubReactor) HandleEvent(ctx context.Context, l types.Log) error } logger.Debug("received event", "name", eventName, "blockNumber", l.BlockNumber, "txHash", l.TxHash.String(), "logIndex", l.Index) - err := r.useStoreInTx(func(store ChannelHubReactorStore) error { + // Pre-check: skip already-committed events without opening a transaction. + // This converts the constraint-violation rollback path into a clean early exit and + // is required for the reconciliation walk (§4.4) to replay events without errors. + // Reorged events with a changed block-level logIndex pass through this check; + // they are handled by the reactor's business-logic idempotency (see nitronode/docs/reorg-fix.md §6.6). + processed, err := r.store.IsContractEventProcessed(l.TxHash.String(), uint32(l.Index), r.blockchainID) + if err != nil { + return errors.Wrap(err, "pre-check IsContractEventProcessed failed") + } + if processed { + logger.Info("skipping re-delivered event, already committed", + "event", eventName, "txHash", l.TxHash.String(), "logIndex", l.Index, "chainID", r.blockchainID) + return nil + } + + err = r.useStoreInTx(func(store ChannelHubReactorStore) error { var err error switch eventID { case channelHubAbi.Events["NodeBalanceUpdated"].ID: @@ -245,6 +274,7 @@ func (r *ChannelHubReactor) HandleEvent(ctx context.Context, l types.Log) error ContractAddress: l.Address.Hex(), TransactionHash: l.TxHash.String(), LogIndex: uint32(l.Index), + BlockHash: l.BlockHash.Hex(), }); err != nil { logger.Warn("error storing contract event", "error", err, "event", eventName, "blockNumber", l.BlockNumber, "txHash", l.TxHash.String(), "logIndex", l.Index) return errors.Wrap(err, "error storing contract event") @@ -253,9 +283,6 @@ func (r *ChannelHubReactor) HandleEvent(ctx context.Context, l types.Log) error logger.Info("processed event", "event", eventName, "blockNumber", l.BlockNumber, "txHash", l.TxHash.String(), "logIndex", l.Index) return nil }) - if r.onEventProcessed != nil { - r.onEventProcessed(r.blockchainID, err == nil) - } return err } diff --git a/pkg/blockchain/evm/channel_hub_reactor_test.go b/pkg/blockchain/evm/channel_hub_reactor_test.go index 62b6abd16..6893bf232 100644 --- a/pkg/blockchain/evm/channel_hub_reactor_test.go +++ b/pkg/blockchain/evm/channel_hub_reactor_test.go @@ -136,6 +136,11 @@ func (m *mockChannelHubStore) RecordTransaction(tx core.Transaction, application return args.Error(0) } +func (m *mockChannelHubStore) IsContractEventProcessed(txHash string, logIndex uint32, blockchainID uint64) (bool, error) { + args := m.Called(txHash, logIndex, blockchainID) + return args.Bool(0), args.Error(1) +} + // mockChannelHubEventHandler captures events dispatched by the reactor. type mockChannelHubEventHandler struct { mock.Mock @@ -248,11 +253,14 @@ func packNonIndexed(t *testing.T, eventName string, args ...interface{}) []byte } // newReactor creates a ChannelHubReactor wired to the provided mocks. +// Sets up a default IsContractEventProcessed expectation that returns (false, nil) +// so existing tests don't need to set it up individually. func newReactor(blockchainID uint64, nodeAddress string, handler *mockChannelHubEventHandler, assetStore *MockAssetStore, store *mockChannelHubStore) *ChannelHubReactor { + store.On("IsContractEventProcessed", mock.Anything, mock.Anything, mock.Anything).Return(false, nil) useStoreInTx := func(fn ChannelHubReactorStoreTxHandler) error { return fn(store) } - return NewChannelHubReactor(blockchainID, nodeAddress, handler, assetStore, useStoreInTx) + return NewChannelHubReactor(blockchainID, nodeAddress, handler, assetStore, useStoreInTx, store) } // expectStoreContractEvent sets up the mock expectation for StoreContractEvent. @@ -1039,6 +1047,72 @@ func TestChannelHubReactor_HandleEscrowDepositsPurged(t *testing.T) { store.AssertExpectations(t) } +func TestChannelHubReactor_HandleEvent_PreCheckError_ReturnsError(t *testing.T) { + blockchainID := uint64(1) + nodeAddr := "0x1111111111111111111111111111111111111111" + tokenAddr := common.HexToAddress("0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48") + amount := big.NewInt(1_000_000) + + logEntry := types.Log{ + Topics: []common.Hash{ + channelHubAbi.Events["NodeBalanceUpdated"].ID, + common.BytesToHash(tokenAddr.Bytes()), + }, + Data: packNonIndexed(t, "NodeBalanceUpdated", amount), + BlockNumber: 100, + TxHash: common.HexToHash("0xaabbcc"), + Index: 0, + } + + store := new(mockChannelHubStore) + handler := new(mockChannelHubEventHandler) + assetStore := new(MockAssetStore) + + // Pre-check returns an error — reactor must return it immediately. + store.On("IsContractEventProcessed", mock.Anything, mock.Anything, mock.Anything).Return(false, assert.AnError) + + useStoreInTx := func(fn ChannelHubReactorStoreTxHandler) error { return fn(store) } + reactor := NewChannelHubReactor(blockchainID, nodeAddr, handler, assetStore, useStoreInTx, store) + + err := reactor.HandleEvent(context.Background(), logEntry) + require.Error(t, err) + require.ErrorContains(t, err, "pre-check IsContractEventProcessed failed") + + // Neither business logic nor StoreContractEvent should be called. + handler.AssertNotCalled(t, "HandleNodeBalanceUpdated", mock.Anything, mock.Anything, mock.Anything) + store.AssertNotCalled(t, "StoreContractEvent", mock.Anything) +} + +func TestChannelHubReactor_HandleEvent_AlreadyProcessed(t *testing.T) { + blockchainID := uint64(1) + nodeAddr := "0x1111111111111111111111111111111111111111" + txHash := common.HexToHash("0xaabbcc") + + logEntry := types.Log{ + Topics: []common.Hash{channelHubAbi.Events["NodeBalanceUpdated"].ID}, + BlockNumber: 100, + TxHash: txHash, + Index: 0, + } + + store := new(mockChannelHubStore) + handler := new(mockChannelHubEventHandler) + assetStore := new(MockAssetStore) + + // Pre-check returns true — event already committed. + store.On("IsContractEventProcessed", txHash.String(), uint32(0), blockchainID).Return(true, nil) + + useStoreInTx := func(fn ChannelHubReactorStoreTxHandler) error { return fn(store) } + reactor := NewChannelHubReactor(blockchainID, nodeAddr, handler, assetStore, useStoreInTx, store) + + err := reactor.HandleEvent(context.Background(), logEntry) + require.NoError(t, err) + + // Neither business logic nor StoreContractEvent should be called. + handler.AssertNotCalled(t, "HandleNodeBalanceUpdated", mock.Anything, mock.Anything, mock.Anything) + store.AssertNotCalled(t, "StoreContractEvent", mock.Anything) +} + func TestChannelHubReactor_UnknownEvent(t *testing.T) { blockchainID := uint64(1) nodeAddr := "0x1111111111111111111111111111111111111111" @@ -1119,4 +1193,28 @@ func TestChannelHubReactor_OnEventProcessedCallback(t *testing.T) { require.Error(t, err) assert.False(t, cbSuccess) }) + + t.Run("callback receives false on pre-check error", func(t *testing.T) { + store := new(mockChannelHubStore) + handler := new(mockChannelHubEventHandler) + assetStore := new(MockAssetStore) + + // Pre-check returns an error — deferred callback must still fire with success=false. + store.On("IsContractEventProcessed", mock.Anything, mock.Anything, mock.Anything).Return(false, assert.AnError) + + useStoreInTx := func(fn ChannelHubReactorStoreTxHandler) error { return fn(store) } + reactor := NewChannelHubReactor(blockchainID, nodeAddr, handler, assetStore, useStoreInTx, store) + + var cbCalled bool + var cbSuccess bool + reactor.SetOnEventProcessed(func(_ uint64, success bool) { + cbCalled = true + cbSuccess = success + }) + + err := reactor.HandleEvent(context.Background(), logEntry) + require.Error(t, err) + assert.True(t, cbCalled, "callback must be invoked on pre-check error") + assert.False(t, cbSuccess) + }) } diff --git a/pkg/blockchain/evm/confirmation_gate.go b/pkg/blockchain/evm/confirmation_gate.go new file mode 100644 index 000000000..1a829716a --- /dev/null +++ b/pkg/blockchain/evm/confirmation_gate.go @@ -0,0 +1,305 @@ +package evm + +import ( + "context" + "errors" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/layer-3/nitrolite/pkg/log" +) + +// recentMultiplier controls how long forwardedSet entries are retained: +// (recentMultiplier × delay). This is the window during which a post-gate +// Removed:true can be matched against a previously forwarded event and emit +// the post-gate reorg WARN. +const recentMultiplier = 3 + +// queueEntry holds a pending event waiting for the confirmation delay to expire. +type queueEntry struct { + log types.Log + arrivedAt time.Time // derived from eventLog.BlockTimestamp; fallback time.Now() when zero +} + +// eventKey identifies an event by tx and log index; blockHash is intentionally excluded +// so that a reorg-replacement event (same tx, same index, different block) can match +// and cancel the original pending entry. +type eventKey struct { + txHash common.Hash + logIndex uint +} + +// forwardedKey identifies an event that has already been forwarded to the downstream +// handler; blockHash is included so a Removed notification from a different block fork +// does NOT falsely trigger post-gate reorg logic. +type forwardedKey struct { + txHash common.Hash + blockHash common.Hash + logIndex uint +} + +// forwardedExpiry pairs a forwardedKey with the wall-clock time at which the event +// was forwarded, for O(1) FIFO eviction from forwardedSet. +type forwardedExpiry struct { + key forwardedKey + forwardedAt time.Time +} + +// ConfirmationGate buffers incoming events for a configurable delay before forwarding +// them to a downstream handler, providing a window to cancel events that are reorged +// out before the delay expires. +// +// The gate is pure in-memory: it reads arrival time from eventLog.BlockTimestamp and +// performs no RPC. The caller (Listener) is responsible for ensuring BlockTimestamp +// is populated before invoking HandleEvent. +type ConfirmationGate struct { + delay time.Duration + chainID uint64 + handler HandleEvent + logger log.Logger + + mu sync.Mutex + queue []queueEntry // append-tail, pop-head + pending map[eventKey]common.Hash // live (txHash, logIndex) -> blockHash; source of truth for live entries + forwardedSet map[forwardedKey]time.Time // key -> forwardedAt + forwardedQueue []forwardedExpiry // FIFO of (key, forwardedAt) for O(1) eviction + + kick chan struct{} // buffered 1; non-blocking sends + timer *time.Timer // created in Start(ctx) +} + +// NewConfirmationGate creates a ConfirmationGate that holds events for delay before +// forwarding them to handler. delay must be > 0; delay <= 0 returns an error +// (the wiring layer is responsible for skipping gate construction when the operator +// configured delay == 0). +func NewConfirmationGate( + delay time.Duration, + chainID uint64, + handler HandleEvent, + logger log.Logger, +) (*ConfirmationGate, error) { + if delay <= 0 { + return nil, errors.New("confirmation gate requires delay > 0") + } + return &ConfirmationGate{ + delay: delay, + chainID: chainID, + handler: handler, + logger: logger.WithName("confirmation-gate"), + pending: make(map[eventKey]common.Hash), + forwardedSet: make(map[forwardedKey]time.Time), + forwardedQueue: nil, + kick: make(chan struct{}, 1), + }, nil +} + +// Start begins the background goroutine that forwards matured entries to the +// downstream handler. handleClosure is called exactly once after the goroutine +// exits; err is non-nil only when the downstream handler returned an error +// after the confirmation delay. The timer is created here (tied to the +// goroutine's lifecycle) and stopped on shutdown. +func (g *ConfirmationGate) Start(ctx context.Context, handleClosure func(err error)) { + g.timer = time.NewTimer(time.Hour) // arbitrary long initial; reset on first drain + if !g.timer.Stop() { + <-g.timer.C + } + + childCtx, cancel := context.WithCancel(ctx) + wg := sync.WaitGroup{} + wg.Add(1) + + var closureErr error + var closureErrMu sync.Mutex + childHandleClosure := func(err error) { + closureErrMu.Lock() + defer closureErrMu.Unlock() + if err != nil && closureErr == nil { + closureErr = err + } + cancel() + wg.Done() + } + + go func() { childHandleClosure(g.run(childCtx)) }() + + go func() { + wg.Wait() + closureErrMu.Lock() + defer closureErrMu.Unlock() + handleClosure(closureErr) + }() +} + +// HandleEvent is the entry point called by the upstream Listener for each event. +// +// A non-removed event is queued and will be forwarded after the confirmation delay. +// A removed event cancels its pending queue entry (pre-gate reorg) or — if the entry +// was already forwarded — records a post-gate reorg warning. +func (g *ConfirmationGate) HandleEvent(_ context.Context, eventLog types.Log) error { + ek := eventKey{txHash: eventLog.TxHash, logIndex: uint(eventLog.Index)} + + if !eventLog.Removed { + // Derive arrival time from the event's block timestamp. The listener + // guarantees this is non-zero in steady state; the fallback is + // defense-in-depth for tests/edge cases. No log here — the listener + // owns the warning when it cannot ensure the timestamp. + var ts time.Time + if eventLog.BlockTimestamp != 0 { + ts = time.Unix(int64(eventLog.BlockTimestamp), 0) + } else { + ts = time.Now() + } + + g.mu.Lock() + g.pending[ek] = eventLog.BlockHash + g.queue = append(g.queue, queueEntry{log: eventLog, arrivedAt: ts}) + g.mu.Unlock() + + // Non-blocking kick so the poller wakes up to (re)compute the timer + // even when it is currently sleeping on a far-future deadline. + select { + case g.kick <- struct{}{}: + default: + } + return nil + } + + // eventLog.Removed == true: attempt pre-gate or post-gate cancellation. + fk := forwardedKey{txHash: eventLog.TxHash, blockHash: eventLog.BlockHash, logIndex: uint(eventLog.Index)} + + g.mu.Lock() + defer g.mu.Unlock() + + // Pre-gate cancel: the live pending entry corresponds to this block. + // Delete from pending; the tombstoned queue entry is skipped on pop. + if liveBlockHash, ok := g.pending[ek]; ok && liveBlockHash == eventLog.BlockHash { + delete(g.pending, ek) + return nil + } + + // Post-gate: the event has already been forwarded. + if _, ok := g.forwardedSet[fk]; ok { + g.logger.Warn("post-gate reorg detected", + "txHash", eventLog.TxHash.Hex(), + "blockHash", eventLog.BlockHash.Hex(), + "logIndex", eventLog.Index, + "chainID", g.chainID, + ) + // Delete from the membership map; leave the forwardedQueue entry in + // place — it expires on its own. The eviction loop's value-check makes + // the later delete safe even if the same key is forwarded again. + delete(g.forwardedSet, fk) + return nil + } + + g.logger.Debug("removal for unknown/stale event", + "txHash", eventLog.TxHash.Hex(), + "blockHash", eventLog.BlockHash.Hex(), + "logIndex", eventLog.Index, + "chainID", g.chainID, + ) + return nil +} + +// run is the background goroutine that wakes on a kick, on the timer firing, or on +// ctx cancellation. It forwards matured entries, evicts stale forwardedSet entries, +// and reschedules the timer for the next head deadline. Returns a non-nil error if +// the downstream handler failed; returns nil on clean shutdown. +func (g *ConfirmationGate) run(ctx context.Context) error { + defer g.timer.Stop() + for { + select { + case <-ctx.Done(): + return nil + case <-g.kick: + case <-g.timer.C: + } + if err := g.drainAndReschedule(); err != nil { + return err + } + } +} + +// drainAndReschedule forwards all queue entries whose confirmation delay has +// elapsed, evicts forwardedSet entries older than (recentMultiplier × delay), +// and resets the timer to the next head deadline. Returns a non-nil error if the +// downstream handler failed; the caller (run) propagates it to the lifecycle closure. +func (g *ConfirmationGate) drainAndReschedule() error { + g.mu.Lock() + now := time.Now() + + // Step 1: drain matured head entries. + for len(g.queue) > 0 && !g.queue[0].arrivedAt.Add(g.delay).After(now) { + entry := g.queue[0] + g.queue = g.queue[1:] + + ek := eventKey{txHash: entry.log.TxHash, logIndex: uint(entry.log.Index)} + + // Tombstone check: if the live pending entry no longer points at this + // blockHash, a reorg-replacement event has superseded it. Drop silently. + // Do NOT touch pending[ek] — it refers to the new live event (still in + // the queue) and deleting it would break the next tombstone check or the + // next Removed cancel. + liveBlockHash, ok := g.pending[ek] + if !ok || liveBlockHash != entry.log.BlockHash { + continue + } + + // Forward: clear pending, insert into forwardedSet + forwardedQueue + // BEFORE releasing mu so that a fast Removed:true arriving immediately + // after the handler call still sees the entry and emits the post-gate WARN. + delete(g.pending, ek) + fk := forwardedKey{ + txHash: entry.log.TxHash, + blockHash: entry.log.BlockHash, + logIndex: uint(entry.log.Index), + } + g.forwardedSet[fk] = now + g.forwardedQueue = append(g.forwardedQueue, forwardedExpiry{key: fk, forwardedAt: now}) + + g.mu.Unlock() + + evCtx := log.SetContextLogger(context.Background(), g.logger) + if err := g.handler(evCtx, entry.log); err != nil { + g.logger.Error("handler error after confirmation delay, stopping gate", + "error", err, + "chainID", g.chainID, + ) + return err // mu already released before the handler call; no relock needed. + } + + g.mu.Lock() + } + + // Step 2: FIFO eviction of forwardedSet entries older than recentMultiplier × delay. + for len(g.forwardedQueue) > 0 && now.Sub(g.forwardedQueue[0].forwardedAt) > recentMultiplier*g.delay { + popped := g.forwardedQueue[0] + g.forwardedQueue = g.forwardedQueue[1:] + + // Only delete from forwardedSet if the stored timestamp still equals + // the popped entry's timestamp. This guards the rare re-forward case + // (same key forwarded again after a chain un-reorg) so the older FIFO + // entry does not evict newer set membership. Tolerates the §2.4 Removed + // path having already deleted the entry (no-op). + if storedAt, ok := g.forwardedSet[popped.key]; ok && storedAt.Equal(popped.forwardedAt) { + delete(g.forwardedSet, popped.key) + } + } + + // Step 3: reset timer to next head deadline using the standard drain pattern. + if !g.timer.Stop() { + select { + case <-g.timer.C: + default: + } + } + if len(g.queue) > 0 { + g.timer.Reset(time.Until(g.queue[0].arrivedAt.Add(g.delay))) + } + // else: leave the timer stopped; the next kick recomputes. + + g.mu.Unlock() + return nil +} diff --git a/pkg/blockchain/evm/confirmation_gate_test.go b/pkg/blockchain/evm/confirmation_gate_test.go new file mode 100644 index 000000000..c95b7e6ce --- /dev/null +++ b/pkg/blockchain/evm/confirmation_gate_test.go @@ -0,0 +1,840 @@ +package evm + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/layer-3/nitrolite/pkg/log" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// helpers + +// makeLog builds a types.Log with BlockTimestamp == 0. The gate then derives +// arrivedAt from time.Now() at HandleEvent time, which gives sub-second +// resolution. This is the appropriate helper for tests that use millisecond-scale +// delays — BlockTimestamp itself is unix-seconds and would round-trip-truncate +// any timestamp set by the test, causing arrivedAt to land up to 1s in the past +// and the entry to mature immediately. +// +// Tests that explicitly exercise the BlockTimestamp-driven arrival path use +// makeLogAt instead and pick durations large enough to tolerate second-resolution +// truncation. +func makeLog(txHash common.Hash, blockHash common.Hash, logIndex uint, removed bool) types.Log { + return types.Log{ + TxHash: txHash, + BlockHash: blockHash, + Index: uint(logIndex), + Removed: removed, + } +} + +// makeLogAt builds a non-removed types.Log whose BlockTimestamp is set to the +// supplied wall-clock time. Used for tests that want the gate to derive +// arrivedAt from a specific moment in the past — must be paired with delays +// large enough (≥1s recommended) to tolerate seconds-resolution truncation of +// BlockTimestamp. +func makeLogAt(txHash common.Hash, blockHash common.Hash, logIndex uint, removed bool, ts time.Time) types.Log { + return types.Log{ + TxHash: txHash, + BlockHash: blockHash, + Index: uint(logIndex), + Removed: removed, + BlockTimestamp: uint64(ts.Unix()), + } +} + +func newGate(t *testing.T, delay time.Duration, handler HandleEvent) *ConfirmationGate { + t.Helper() + g, err := NewConfirmationGate(delay, 1, handler, log.NewNoopLogger()) + require.NoError(t, err) + return g +} + +// T0: constructor rejects non-positive delay (operator-facing delay==0 is handled +// by wiring in main.go which skips constructing the gate). +func TestConfirmationGate_Constructor_RejectsNonPositiveDelay(t *testing.T) { + t.Parallel() + + handler := func(_ context.Context, _ types.Log) error { return nil } + + g, err := NewConfirmationGate(0, 1, handler, log.NewNoopLogger()) + require.Error(t, err) + assert.Nil(t, g) + + g, err = NewConfirmationGate(-1*time.Second, 1, handler, log.NewNoopLogger()) + require.Error(t, err) + assert.Nil(t, g) +} + +// T2: normal event is queued and delivered after the delay. +func TestConfirmationGate_NormalPath(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + var deliveredLog types.Log + var mu sync.Mutex + + handler := func(_ context.Context, l types.Log) error { + mu.Lock() + deliveredLog = l + mu.Unlock() + callCount.Add(1) + return nil + } + + g := newGate(t, 5*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x02") + bh := common.HexToHash("0xBB") + ev := makeLog(tx, bh, 0, false) + + require.NoError(t, g.HandleEvent(context.Background(), ev)) + + // should NOT be called within 1 ms + time.Sleep(1 * time.Millisecond) + assert.Equal(t, int32(0), callCount.Load(), "handler must not be called before delay expires") + + // should be called within 500 ms total + deadline := time.After(500 * time.Millisecond) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called within timeout") + default: + time.Sleep(1 * time.Millisecond) + } + } + + assert.Equal(t, int32(1), callCount.Load()) + mu.Lock() + assert.Equal(t, ev.TxHash, deliveredLog.TxHash) + assert.Equal(t, ev.Index, deliveredLog.Index) + mu.Unlock() +} + +// T3: a Removed event for a queued entry cancels it before forwarding. +func TestConfirmationGate_ReorgCancel(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 10*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x03") + bh := common.HexToHash("0xCC") + + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, 0, false))) + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, 0, true))) + + time.Sleep(20 * time.Millisecond) + assert.Equal(t, int32(0), callCount.Load(), "handler must never be called after reorg cancel") +} + +// T4: a re-delivered event (same tx/logIndex, different blockHash) replaces the original +// in the pending map; the late-arriving Removed for the old blockHash is a no-op (live +// pending hash no longer matches); the new event is forwarded once. +func TestConfirmationGate_OutOfOrder(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 10*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x04") + bhOld := common.HexToHash("0xAA") + bhNew := common.HexToHash("0xBB") + + // Event A: original block — queued under (tx, 0). + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bhOld, 0, false))) + // Event B: re-mined in new block — replaces pending[ek] = bhNew. The queued A entry + // becomes a tombstone (its blockHash no longer matches pending[ek]). + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bhNew, 0, false))) + // Removed for old block: pending[ek] is bhNew, not bhOld; no forwarded entry yet; + // no-op (debug log). + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bhOld, 0, true))) + + deadline := time.After(500 * time.Millisecond) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called within timeout — event B was not forwarded") + default: + time.Sleep(5 * time.Millisecond) + } + } + + // Only B should have been forwarded (A was tombstoned and silently dropped). + assert.Equal(t, int32(1), callCount.Load()) +} + +// T5: post-gate reorg — Removed arrives after the event was already forwarded. +// Verify handler is called, Removed is handled gracefully (no panic/error). +func TestConfirmationGate_PostGateReorg(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 2*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x05") + bh := common.HexToHash("0xDD") + ev := makeLog(tx, bh, 0, false) + + require.NoError(t, g.HandleEvent(context.Background(), ev)) + + // Wait until forwarded. + deadline := time.After(500 * time.Millisecond) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called within timeout") + default: + time.Sleep(1 * time.Millisecond) + } + } + assert.Equal(t, int32(1), callCount.Load()) + + // Post-gate Removed — should not panic or return error. + // WARN log "post-gate reorg detected" is emitted internally (manually observable). + err := g.HandleEvent(context.Background(), makeLog(tx, bh, 0, true)) + assert.NoError(t, err) + + // Handler should still have been called exactly once. + assert.Equal(t, int32(1), callCount.Load()) +} + +// T6: Removed for a completely unknown event — no error, no handler call. +func TestConfirmationGate_UnknownRemoval(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 10*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x06") + bh := common.HexToHash("0xEE") + + err := g.HandleEvent(context.Background(), makeLog(tx, bh, 0, true)) + assert.NoError(t, err) + + time.Sleep(20 * time.Millisecond) + assert.Equal(t, int32(0), callCount.Load()) +} + +// T7: BlockTimestamp far in the past → event is immediately mature and forwarded fast. +func TestConfirmationGate_BlockTimestampBypass(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 10*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x07") + bh := common.HexToHash("0xFF") + + // Block timestamp 30 seconds ago — arrivedAt + 10ms is far in the past, so the + // entry is matured the moment the drain loop runs. + require.NoError(t, g.HandleEvent(context.Background(), makeLogAt(tx, bh, 0, false, time.Now().Add(-30*time.Second)))) + + deadline := time.After(500 * time.Millisecond) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called within timeout") + default: + time.Sleep(1 * time.Millisecond) + } + } + assert.Equal(t, int32(1), callCount.Load()) +} + +// T8: partial elapsed delay — BlockTimestamp 2 seconds in the past with delay=5s. +// +// Because BlockTimestamp is unix-seconds, the .Unix() conversion floors to the +// nearest whole second. In the worst case the gate sees arrivedAt up to 1s +// further in the past than the wall-clock target — so the actual remaining +// delay is in [2s, 3s]. Sleeping 500ms is safely inside that "not yet" window +// regardless of where the subsecond boundary landed. +func TestConfirmationGate_BlockTimestampPartialDelay(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 5*time.Second, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x08") + bh := common.HexToHash("0x08") + + require.NoError(t, g.HandleEvent(context.Background(), makeLogAt(tx, bh, 0, false, time.Now().Add(-2*time.Second)))) + + // Not called after 500 ms (worst-case remaining is ≥2s). + time.Sleep(500 * time.Millisecond) + assert.Equal(t, int32(0), callCount.Load(), "handler must not be called before remaining delay expires") + + // Called within 7s total. + deadline := time.After(7 * time.Second) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called within timeout") + default: + time.Sleep(50 * time.Millisecond) + } + } + assert.Equal(t, int32(1), callCount.Load()) +} + +// T9 (reframed): BlockTimestamp == 0 falls back to time.Now() — the full delay +// must elapse. No log is emitted from the gate side (the listener owns any WARN +// for a missing timestamp). +func TestConfirmationGate_BlockTimestampZeroFallback(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 10*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x09") + bh := common.HexToHash("0x09") + + // makeLog produces BlockTimestamp == 0 → gate falls back to time.Now(). + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, 0, false))) + + // Not called immediately (fell back to current time, full delay required). + time.Sleep(1 * time.Millisecond) + assert.Equal(t, int32(0), callCount.Load(), "handler must not be called before delay expires") + + // Called within 500 ms. + deadline := time.After(500 * time.Millisecond) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called within timeout") + default: + time.Sleep(1 * time.Millisecond) + } + } + assert.Equal(t, int32(1), callCount.Load()) +} + +// T11: cancelling the context prevents queued events from being forwarded. +func TestConfirmationGate_Shutdown(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 50*time.Millisecond, handler) + ctx, cancel := context.WithCancel(t.Context()) + g.Start(ctx, func(error) {}) + + for i := range 3 { + tx := common.HexToHash(string(rune(0x20 + i))) + bh := common.HexToHash(string(rune(0x30 + i))) + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, uint(i), false))) + } + + // Cancel before delay expires. + cancel() + + time.Sleep(100 * time.Millisecond) + assert.Equal(t, int32(0), callCount.Load(), "no events must be forwarded after context cancellation") +} + +// T12: forwardedSet entries are evicted after recentMultiplier × delay. +// Behavior under test: after eviction, a Removed for the same (tx, blockHash, idx) +// must fall through to the DEBUG path — no panic, no error. +func TestConfirmationGate_ForwardedSetEviction(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + delay := 5 * time.Millisecond + g := newGate(t, delay, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x12") + bh := common.HexToHash("0x12") + + // Enqueue and wait for forward. + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, 0, false))) + deadline := time.After(500 * time.Millisecond) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called within timeout") + default: + time.Sleep(1 * time.Millisecond) + } + } + + // At this point forwardedSet contains the entry. + g.mu.Lock() + _, present := g.forwardedSet[forwardedKey{txHash: tx, blockHash: bh, logIndex: 0}] + g.mu.Unlock() + assert.True(t, present, "forwardedSet must contain the entry immediately after forwarding") + + // Wait well past recentMultiplier × delay, then enqueue another event to trigger + // the eviction path inside drainAndReschedule. + time.Sleep(time.Duration(recentMultiplier+1) * delay) + + tx2 := common.HexToHash("0x13") + bh2 := common.HexToHash("0x13") + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx2, bh2, 0, false))) + + // Wait for tx2 to forward; the eviction loop also runs. + deadline = time.After(500 * time.Millisecond) + for callCount.Load() < 2 { + select { + case <-deadline: + t.Fatal("second handler invocation timed out") + default: + time.Sleep(1 * time.Millisecond) + } + } + + g.mu.Lock() + _, presentAfter := g.forwardedSet[forwardedKey{txHash: tx, blockHash: bh, logIndex: 0}] + g.mu.Unlock() + assert.False(t, presentAfter, "old forwardedSet entry must be evicted after recentMultiplier × delay") + + // A second Removed for the original event — falls through to DEBUG (not found). + // No panic, no error. + err := g.HandleEvent(context.Background(), makeLog(tx, bh, 0, true)) + assert.NoError(t, err) +} + +// T13: multiple events are all delivered, preserving queue order. +func TestConfirmationGate_MultipleEvents_Ordering(t *testing.T) { + t.Parallel() + + var mu sync.Mutex + var delivered []common.Hash + + handler := func(_ context.Context, l types.Log) error { + mu.Lock() + delivered = append(delivered, l.TxHash) + mu.Unlock() + return nil + } + + g := newGate(t, 5*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + txHashes := []common.Hash{ + common.HexToHash("0xA1"), + common.HexToHash("0xA2"), + common.HexToHash("0xA3"), + } + bh := common.HexToHash("0xBLOCK") + + for i, tx := range txHashes { + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, uint(i), false))) + } + + // Wait for all 3 events to be delivered. + deadline := time.After(500 * time.Millisecond) + for { + mu.Lock() + n := len(delivered) + mu.Unlock() + if n >= 3 { + break + } + select { + case <-deadline: + t.Fatalf("only %d/3 events delivered within timeout", n) + default: + time.Sleep(1 * time.Millisecond) + } + } + + mu.Lock() + defer mu.Unlock() + require.Len(t, delivered, 3) + assert.Equal(t, txHashes[0], delivered[0]) + assert.Equal(t, txHashes[1], delivered[1]) + assert.Equal(t, txHashes[2], delivered[2]) +} + +// New: tombstone-skip — a non-removed re-add with a different blockHash supersedes +// the queued entry. When the original entry's deadline arrives, the gate notices +// the tombstone (pending[ek] != entry.log.BlockHash) and silently drops it. +// Only the new entry's forward is observed. +func TestConfirmationGate_TombstoneSkip(t *testing.T) { + t.Parallel() + + var mu sync.Mutex + var delivered []common.Hash // blockHashes seen by handler + + handler := func(_ context.Context, l types.Log) error { + mu.Lock() + delivered = append(delivered, l.BlockHash) + mu.Unlock() + return nil + } + + delay := 30 * time.Millisecond + g := newGate(t, delay, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x20") + bhA := common.HexToHash("0xAAA") + bhB := common.HexToHash("0xBBB") + + // Enqueue event for blockHashA. + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bhA, 0, false))) + // Before the delay elapses, send a non-removed re-add with blockHashB — same (tx, idx). + // The gate replaces pending[ek] = bhB and appends a new queue entry; the bhA entry + // becomes a tombstone. + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bhB, 0, false))) + + // Wait past the delay. + deadline := time.After(500 * time.Millisecond) + for { + mu.Lock() + n := len(delivered) + mu.Unlock() + if n >= 1 { + break + } + select { + case <-deadline: + t.Fatal("handler not called within timeout — event B was not forwarded") + default: + time.Sleep(2 * time.Millisecond) + } + } + + // Allow extra time to ensure bhA does not slip through later (it shouldn't — + // it's tombstoned and dropped silently on pop). + time.Sleep(50 * time.Millisecond) + + mu.Lock() + defer mu.Unlock() + require.Len(t, delivered, 1, "exactly one forward expected (the bhB entry)") + assert.Equal(t, bhB, delivered[0], "the bhB entry must be the one forwarded") +} + +// New: FIFO eviction with early-delete tolerance. After forwarding, a Removed:true +// arrives and removes the forwardedSet entry while emitting the post-gate WARN. +// Later, the FIFO eviction loop pops the corresponding forwardedQueue entry — the +// set entry is already gone. The eviction must not panic and must not double-invoke +// the handler. +func TestConfirmationGate_FIFOEviction_ToleratesEarlyDelete(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + delay := 5 * time.Millisecond + g := newGate(t, delay, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x30") + bh := common.HexToHash("0x30") + + // Forward an event. + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, 0, false))) + deadline := time.After(500 * time.Millisecond) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called within timeout") + default: + time.Sleep(1 * time.Millisecond) + } + } + + // Confirm the forwardedSet entry exists. + fk := forwardedKey{txHash: tx, blockHash: bh, logIndex: 0} + g.mu.Lock() + _, presentBefore := g.forwardedSet[fk] + queueLen := len(g.forwardedQueue) + g.mu.Unlock() + require.True(t, presentBefore, "forwardedSet must contain the entry immediately after forwarding") + require.Equal(t, 1, queueLen, "forwardedQueue must contain one entry") + + // Send Removed:true — gate emits post-gate WARN and deletes the entry from forwardedSet + // (but leaves the forwardedQueue entry in place; it will expire on its own). + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, 0, true))) + + g.mu.Lock() + _, presentAfterRemoved := g.forwardedSet[fk] + g.mu.Unlock() + require.False(t, presentAfterRemoved, "forwardedSet entry must be deleted by the post-gate WARN path") + + // Wait well past recentMultiplier × delay, then kick the drain loop with a new event + // so eviction runs and pops the orphaned forwardedQueue entry. + time.Sleep(time.Duration(recentMultiplier+1) * delay) + + tx2 := common.HexToHash("0x31") + bh2 := common.HexToHash("0x31") + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx2, bh2, 0, false))) + + // Wait for the second forward. + deadline = time.After(500 * time.Millisecond) + for callCount.Load() < 2 { + select { + case <-deadline: + t.Fatal("second handler invocation timed out") + default: + time.Sleep(1 * time.Millisecond) + } + } + + // Handler called exactly twice (once per forward; no double-action from eviction). + assert.Equal(t, int32(2), callCount.Load()) + + // The orphaned forwardedQueue entry must have been popped during eviction. + g.mu.Lock() + // After tx2 is forwarded, the queue should have exactly one entry (tx2's). + finalQueueLen := len(g.forwardedQueue) + g.mu.Unlock() + assert.Equal(t, 1, finalQueueLen, "orphan forwardedQueue entry must have been evicted") +} + +// New: timer reschedule — enqueue a single event and do NOT send any further kicks. +// The handler must be invoked when the timer fires. +func TestConfirmationGate_TimerReschedule(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + return nil + } + + g := newGate(t, 20*time.Millisecond, handler) + g.Start(t.Context(), func(error) {}) + + tx := common.HexToHash("0x40") + bh := common.HexToHash("0x40") + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, 0, false))) + + // No further HandleEvent calls. Wait for the timer to fire. + deadline := time.After(500 * time.Millisecond) + for callCount.Load() == 0 { + select { + case <-deadline: + t.Fatal("handler not called via timer fire alone") + default: + time.Sleep(2 * time.Millisecond) + } + } + assert.Equal(t, int32(1), callCount.Load()) +} + +// New: kick during a pending timer must NOT extend the original timer's deadline. +// Event A is enqueued first (timer arms for A's deadline). Before A matures we +// enqueue B with a LATER BlockTimestamp. A must still fire at its original +// deadline; the kick rescheduled the timer to A's head deadline (unchanged). +func TestConfirmationGate_KickDuringPendingTimer(t *testing.T) { + t.Parallel() + + var mu sync.Mutex + var deliveredOrder []common.Hash + firstFiredAt := make(chan time.Time, 1) + + handler := func(_ context.Context, l types.Log) error { + mu.Lock() + deliveredOrder = append(deliveredOrder, l.TxHash) + isFirst := len(deliveredOrder) == 1 + mu.Unlock() + if isFirst { + select { + case firstFiredAt <- time.Now(): + default: + } + } + return nil + } + + delay := 100 * time.Millisecond + g := newGate(t, delay, handler) + g.Start(t.Context(), func(error) {}) + + txA := common.HexToHash("0x50") + bhA := common.HexToHash("0x50") + txB := common.HexToHash("0x51") + bhB := common.HexToHash("0x51") + + // Event A: BlockTimestamp == 0 → gate uses time.Now() at HandleEvent time as arrivedAt. + enqueueA := time.Now() + require.NoError(t, g.HandleEvent(context.Background(), makeLog(txA, bhA, 0, false))) + + // Brief sleep, then enqueue B. The kick wakes the drain loop; A is not yet + // mature; the timer must be reset to A's deadline. B's deadline is later than + // A's because its arrivedAt is later (HandleEvent uses time.Now() when + // BlockTimestamp == 0). + time.Sleep(20 * time.Millisecond) + require.NoError(t, g.HandleEvent(context.Background(), makeLog(txB, bhB, 0, false))) + + // Wait for A to fire. + select { + case firedAt := <-firstFiredAt: + // A's expected deadline was enqueueA + delay. Firing should occur no + // earlier than ~that moment and not be delayed by B's later deadline. + elapsed := firedAt.Sub(enqueueA) + // Allow generous slack but ensure A did not get pushed to B's deadline + // (B's deadline is enqueueA + ~20ms + 50ms + delay = enqueueA + ~170ms). + assert.GreaterOrEqual(t, elapsed, 90*time.Millisecond, "A fired before its deadline") + assert.Less(t, elapsed, 160*time.Millisecond, "A's deadline was extended by B's kick") + case <-time.After(1 * time.Second): + t.Fatal("A did not fire within timeout") + } + + mu.Lock() + defer mu.Unlock() + require.GreaterOrEqual(t, len(deliveredOrder), 1) + assert.Equal(t, txA, deliveredOrder[0], "A must fire first (queue order preserved)") +} + +// New: shutdown with non-empty queue — cancel the gate's context, assert the +// goroutine exits quickly and no handler is invoked. +func TestConfirmationGate_ShutdownWithNonEmptyQueue(t *testing.T) { + t.Parallel() + + var callCount atomic.Int32 + handlerEntered := make(chan struct{}, 4) + handler := func(_ context.Context, _ types.Log) error { + callCount.Add(1) + select { + case handlerEntered <- struct{}{}: + default: + } + return nil + } + + g := newGate(t, 200*time.Millisecond, handler) + ctx, cancel := context.WithCancel(t.Context()) + g.Start(ctx, func(error) {}) + + // Enqueue multiple events far in the future. + for i := range 4 { + tx := common.HexToHash(string(rune(0x60 + i))) + bh := common.HexToHash(string(rune(0x70 + i))) + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, uint(i), false))) + } + + // Cancel and assert the gate's goroutine exits within a short window. + cancel() + + // Give the goroutine time to observe ctx.Done. + time.Sleep(50 * time.Millisecond) + + // Even if we wait far longer than the delay would otherwise require, no handler call. + time.Sleep(300 * time.Millisecond) + assert.Equal(t, int32(0), callCount.Load(), "no handler invocations expected after shutdown") + + select { + case <-handlerEntered: + t.Fatal("handler was invoked after shutdown") + default: + } +} + +// TestConfirmationGate_HandlerErrorPropagatesFatal: when the downstream handler +// returns an error after the confirmation delay, the gate's lifecycle closure +// receives the sentinel error exactly once and the run goroutine exits. +// Subsequent events that mature must NOT invoke the handler again. +func TestConfirmationGate_HandlerErrorPropagatesFatal(t *testing.T) { + t.Parallel() + + sentinelErr := errors.New("handler sentinel error") + var handlerCalls atomic.Int64 + handler := func(_ context.Context, _ types.Log) error { + handlerCalls.Add(1) + return sentinelErr + } + + delay := 50 * time.Millisecond + g := newGate(t, delay, handler) + + closureCh := make(chan error, 2) // size 2 to catch a buggy double-invocation + g.Start(t.Context(), func(err error) { closureCh <- err }) + + tx := common.HexToHash("0xF1") + bh := common.HexToHash("0xF1") + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx, bh, 0, false))) + + // The closure must be invoked once with the sentinel error. + select { + case err := <-closureCh: + assert.Equal(t, sentinelErr, err, "closure must receive the sentinel error") + case <-time.After(delay + 200*time.Millisecond): + t.Fatal("closure was not invoked within timeout") + } + + // A second invocation must not occur — the run goroutine has exited. + select { + case extra := <-closureCh: + t.Fatalf("unexpected second closure invocation: %v", extra) + case <-time.After(50 * time.Millisecond): + // correct: no second invocation + } + + // Enqueue a second event after the failure. The goroutine has exited; even + // if the kick is queued in the buffered channel it will never be drained. + tx2 := common.HexToHash("0xF2") + bh2 := common.HexToHash("0xF2") + require.NoError(t, g.HandleEvent(context.Background(), makeLog(tx2, bh2, 0, false))) + + // Wait past the delay; the handler must NOT be called a second time. + time.Sleep(delay + 100*time.Millisecond) + assert.Equal(t, int64(1), handlerCalls.Load(), "handler must be invoked exactly once across gate lifetime") +} diff --git a/pkg/blockchain/evm/interface.go b/pkg/blockchain/evm/interface.go index 23e5bf639..936107393 100644 --- a/pkg/blockchain/evm/interface.go +++ b/pkg/blockchain/evm/interface.go @@ -5,17 +5,27 @@ import ( ethereum "github.com/ethereum/go-ethereum" "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" ) type HandleEvent func(ctx context.Context, eventLog types.Log) error -// ContractEventGetter is used by Listener for resumption and deduplication. +// ContractEventGetter is used by Listener for resumption, deduplication, and +// reconciliation-walk queries. type ContractEventGetter interface { // GetLatestContractEventBlockNumber returns the block to resume from (0 = start fresh). GetLatestContractEventBlockNumber(contractAddress string, blockchainID uint64) (lastBlock uint64, err error) - // IsContractEventPresent checks whether a specific event was already processed. - IsContractEventPresent(blockchainID, blockNumber uint64, txHash string, logIndex uint32) (isPresent bool, err error) + // IsContractEventProcessed reports whether an event identified by (txHash, logIndex, blockchainID) + // has already been committed, regardless of which block it appeared in. + IsContractEventProcessed(txHash string, logIndex uint32, blockchainID uint64) (bool, error) + // GetLatestContractEventBlockHashAndNumber returns the block_number and block_hash of + // the highest stored event. Returns (0, "", nil) when no rows exist. + GetLatestContractEventBlockHashAndNumber(contractAddress string, blockchainID uint64) (blockNumber uint64, blockHash string, err error) + // GetPreviousDistinctBlockHash returns the block_number and block_hash of the highest + // stored event with block_number strictly below belowBlockNumber. Returns (0, "", nil) + // when no such row exists (genesis fallback). + GetPreviousDistinctBlockHash(contractAddress string, blockchainID uint64, belowBlockNumber uint64) (blockNumber uint64, blockHash string, err error) } type AssetStore interface { @@ -35,4 +45,9 @@ type AssetStore interface { type EVMClient interface { ethereum.ChainStateReader bind.ContractBackend + // HeaderByHash is used by the gate's block-timestamp fetcher and by the + // Listener's age-based routing of Phase 1 events. It returns whatever header + // the node has for the given hash (which may be a side-chain header) — it is + // NOT suitable for canonicality checks; use HeaderByNumber for that. + HeaderByHash(ctx context.Context, hash common.Hash) (*types.Header, error) } diff --git a/pkg/blockchain/evm/listener.go b/pkg/blockchain/evm/listener.go index 1e2a013d7..51c35f5ac 100644 --- a/pkg/blockchain/evm/listener.go +++ b/pkg/blockchain/evm/listener.go @@ -9,7 +9,6 @@ import ( "time" "github.com/ethereum/go-ethereum" - "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/layer-3/nitrolite/pkg/log" @@ -25,26 +24,50 @@ const ( // deduplicated delivery even across restarts. Cancel the context passed to Listen // for graceful shutdown. type Listener struct { - contractAddress common.Address - client bind.ContractBackend - blockchainID uint64 - blockStep uint64 // max blocks per FilterLogs call during reconciliation - logger log.Logger - handleEvent HandleEvent - eventGetter ContractEventGetter + contractAddress common.Address + client EVMClient + blockchainID uint64 + blockStep uint64 // max blocks per FilterLogs call during reconciliation + confirmationDelay time.Duration // routing threshold for Phase 1 events; 0 disables age-based routing + logger log.Logger + handleEvent HandleEvent // live events and recent historical events; typically the ConfirmationGate + handleHistoricalEvent HandleEvent // historical events older than confirmationDelay; typically the reactor directly + eventGetter ContractEventGetter + + // Single-entry block-timestamp cache for ensureBlockTimestamp. The listener's + // processEvents loop is strictly serial (Phase 1 drains before Phase 2, each + // phase processes one event at a time), so these fields require no mutex. + lastBlockHash common.Hash + lastBlockTimestamp time.Time } // NewListener creates a Listener. blockStep controls how many blocks are fetched // per RPC call during historical reconciliation. -func NewListener(contractAddress common.Address, client bind.ContractBackend, blockchainID uint64, blockStep uint64, logger log.Logger, eventHandler HandleEvent, eventGetter ContractEventGetter) *Listener { +// +// confirmationDelay controls per-event routing for Phase 1 (historical) events: +// - When 0: every historical event is routed to historicalEventHandler. +// - When > 0: each event's block timestamp is fetched via HeaderByHash. Events older +// than confirmationDelay are routed to historicalEventHandler (their block is past +// the reorg window, so they are safe to forward directly). Events younger than +// confirmationDelay are routed to eventHandler so they pass through the gate — +// historical replay reaching very recent blocks is no safer than live delivery +// and the gate must still protect against reorgs of those blocks. +// +// Live (Phase 2) events always flow to eventHandler. +// +// eventHandler is typically the ConfirmationGate; historicalEventHandler is typically +// the reactor directly. The two handlers may be the same function when no gate is in use. +func NewListener(contractAddress common.Address, client EVMClient, blockchainID uint64, blockStep uint64, confirmationDelay time.Duration, logger log.Logger, eventHandler HandleEvent, historicalEventHandler HandleEvent, eventGetter ContractEventGetter) *Listener { return &Listener{ - contractAddress: contractAddress, - client: client, - blockchainID: blockchainID, - blockStep: blockStep, - logger: logger.WithName("evm"), - handleEvent: eventHandler, - eventGetter: eventGetter, + contractAddress: contractAddress, + client: client, + blockchainID: blockchainID, + blockStep: blockStep, + confirmationDelay: confirmationDelay, + logger: logger.WithName("evm"), + handleEvent: eventHandler, + handleHistoricalEvent: historicalEventHandler, + eventGetter: eventGetter, } } @@ -106,9 +129,9 @@ func (l *Listener) logBackOff(count uint64, originator string) (time.Duration, b // On subscription failure it retries with exponential backoff. Returns non-nil only // when the handler or the event-presence check fails. func (l *Listener) listenEvents(ctx context.Context) error { - lastBlock, err := l.eventGetter.GetLatestContractEventBlockNumber(l.contractAddress.String(), l.blockchainID) + lastBlock, err := findCommonAncestor(ctx, l.client, l.eventGetter, l.contractAddress.String(), l.blockchainID, l.logger) if err != nil { - return fmt.Errorf("failed to get latest processed block: %w", err) + return fmt.Errorf("failed to find common ancestor: %w", err) } var backOffCount atomic.Uint64 @@ -186,10 +209,15 @@ func (l *Listener) listenEvents(ctx context.Context) error { // processEvents runs two sequential phases: historical (historicalCh until closed), // then live (currentCh until ctx or subscription death). In each phase the first -// events are checked via IsContractEventPresent; once a non-present event is found +// events are checked via IsContractEventProcessed; once a non-present event is found // the check is skipped for the rest of that phase (events are strictly ordered). // Returns nil on subscription loss (reconnect), non-nil on handler/check failure. // +// Both the listener (here) and the reactor (channel_hub_reactor.go) call +// IsContractEventProcessed, so both share a dependency on DB availability. A +// transient Postgres hiccup at either call site surfaces the error, unsubscribes, +// and restarts the process — consistent behavior across the pipeline. +// // Listener ordering & idempotency invariant // ----------------------------------------- // Downstream handlers (and any code reasoning about the relative arrival order @@ -204,10 +232,13 @@ func (l *Listener) listenEvents(ctx context.Context) error { // reconcileBlockRange + live subscription preserve chain order within each // phase. // -// 2. Idempotent resume. On restart, IsContractEventPresent gates the first +// 2. Idempotent resume. On restart, IsContractEventProcessed gates the first // event of each phase: events already persisted in a prior run are skipped // rather than reprocessed. Once a non-present event is seen the check is // dropped for the remainder of the phase (safe because of guarantee 1). +// The dedup check identifies events by (txHash, logIndex, blockchainID); +// reorged events with a re-shuffled block-level log index are not detected +// here and rely on reactor business-logic idempotency. // // 3. Cursor advances only on handler success. lastBlock is updated on each // live event, but a non-nil return from handleEvent unsubscribes and @@ -215,11 +246,18 @@ func (l *Listener) listenEvents(ctx context.Context) error { // failed event; the next Listen invocation re-fetches from the same // cursor. Transient handler failures retry instead of silently dropping. // -// 4. Reorged-out logs are discarded. Live deliveries with Removed=true are -// dropped. A reorg that fully removes a ChannelChallenged log also -// removes the matching on-chain status transition to DISPUTED, so the -// contract's Path-1 (challenge-timeout) close cannot subsequently fire -// for the same channel. +// 4. Reorged-out logs are routed by delay configuration. +// When confirmationDelay > 0, live deliveries with Removed=true are +// forwarded to the handler (ConfirmationGate) so the gate can cancel +// any pending confirmation timer for that event; the gate filters them +// before forwarding confirmed events to the reactor. When +// confirmationDelay == 0, there is no gate to consume the removal +// signal, so the listener drops Removed=true logs at the Phase 2 +// boundary — matching pre-PR behavior. In both modes the reactor +// never sees Removed=true logs directly. The lastBlock cursor and +// IsContractEventProcessed dedup check are skipped for Removed=true +// events so neither the resume cursor nor the idempotency guard is +// corrupted by a reorg signal. // // A consequence used by the nitronode event handlers: for any channel that // closes via Path-1 (challenge-timeout, ChannelHub Closed-from-DISPUTED), @@ -250,7 +288,7 @@ func (l *Listener) processEvents( break } if !historicalCheckDone { - present, err := l.eventGetter.IsContractEventPresent(l.blockchainID, eventLog.BlockNumber, eventLog.TxHash.Hex(), uint32(eventLog.Index)) + present, err := l.eventGetter.IsContractEventProcessed(eventLog.TxHash.Hex(), uint32(eventLog.Index), l.blockchainID) if err != nil { eventSubscription.Unsubscribe() return fmt.Errorf("failed to check historical event presence: %w", err) @@ -263,7 +301,22 @@ func (l *Listener) processEvents( } l.logger.Debug("received historical event", "blockchainID", l.blockchainID, "contractAddress", l.contractAddress.String(), "blockNumber", eventLog.BlockNumber, "logIndex", eventLog.Index) evCtx := log.SetContextLogger(context.Background(), l.logger) - if err := l.handleEvent(evCtx, eventLog); err != nil { + eventLog, err := l.ensureBlockTimestamp(ctx, eventLog) + if err != nil { + l.logger.Warn("failed to ensure block timestamp for historical event, routing through gate", + "error", err, + "blockchainID", l.blockchainID, + "blockNumber", eventLog.BlockNumber, + "blockHash", eventLog.BlockHash.Hex(), + ) + if err := l.handleEvent(evCtx, eventLog); err != nil { + eventSubscription.Unsubscribe() + return err + } + continue + } + handler := l.routeHistoricalEvent(eventLog) + if err := handler(evCtx, eventLog); err != nil { eventSubscription.Unsubscribe() return err } @@ -287,27 +340,47 @@ func (l *Listener) processEvents( eventSubscription.Unsubscribe() return nil case eventLog := <-currentCh: - // During a chain reorganization geth re-delivers orphaned logs with - // Removed: true. Skip them to avoid applying phantom state changes. - if eventLog.Removed { - l.logger.Warn("skipping removed log from reorg", "blockchainID", l.blockchainID, "blockNumber", eventLog.BlockNumber, "logIndex", eventLog.Index, "txHash", eventLog.TxHash.Hex()) + if eventLog.Removed && l.confirmationDelay == 0 { + l.logger.Warn("dropping Removed=true live event on no-gate path", + "blockchainID", l.blockchainID, + "contractAddress", l.contractAddress.String(), + "blockNumber", eventLog.BlockNumber, + "blockHash", eventLog.BlockHash.Hex(), + "txHash", eventLog.TxHash.Hex(), + "logIndex", eventLog.Index, + ) continue } - *lastBlock = eventLog.BlockNumber - if !currentCheckDone { - present, err := l.eventGetter.IsContractEventPresent(l.blockchainID, eventLog.BlockNumber, eventLog.TxHash.Hex(), uint32(eventLog.Index)) - if err != nil { - eventSubscription.Unsubscribe() - return fmt.Errorf("failed to check current event presence: %w", err) - } - if present { - l.logger.Debug("skipping already present current event", "blockchainID", l.blockchainID, "blockNumber", eventLog.BlockNumber, "logIndex", eventLog.Index) - continue + if !eventLog.Removed { + *lastBlock = eventLog.BlockNumber + if !currentCheckDone { + present, err := l.eventGetter.IsContractEventProcessed(eventLog.TxHash.Hex(), uint32(eventLog.Index), l.blockchainID) + if err != nil { + eventSubscription.Unsubscribe() + return fmt.Errorf("failed to check current event presence: %w", err) + } + if present { + l.logger.Debug("skipping already present current event", "blockchainID", l.blockchainID, "blockNumber", eventLog.BlockNumber, "logIndex", eventLog.Index) + continue + } + currentCheckDone = true } - currentCheckDone = true + l.logger.Debug("received current event", "blockchainID", l.blockchainID, "contractAddress", l.contractAddress.String(), "blockNumber", eventLog.BlockNumber, "logIndex", eventLog.Index) } - l.logger.Debug("received current event", "blockchainID", l.blockchainID, "contractAddress", l.contractAddress.String(), "blockNumber", eventLog.BlockNumber, "logIndex", eventLog.Index) evCtx := log.SetContextLogger(context.Background(), l.logger) + if !eventLog.Removed { + ensured, err := l.ensureBlockTimestamp(ctx, eventLog) + if err != nil { + l.logger.Warn("failed to ensure block timestamp for current event, routing through gate", + "error", err, + "blockchainID", l.blockchainID, + "blockNumber", eventLog.BlockNumber, + "blockHash", eventLog.BlockHash.Hex(), + ) + } else { + eventLog = ensured + } + } if err := l.handleEvent(evCtx, eventLog); err != nil { eventSubscription.Unsubscribe() return err @@ -397,11 +470,68 @@ func (l *Listener) reconcileBlockRange( } } -// TODO: the current reorg handling (skipping Removed logs) prevents new damage but -// does not undo side effects from the original delivery if it was already processed. -// A more robust approach is a confirmation buffer: hold live logs in memory keyed by -// block number, only apply them after N confirmations (new blocks on top), and discard -// any log that arrives with Removed: true while still in the buffer. This adds N blocks -// of latency (~12s × N on mainnet) but guarantees that only finalized events reach the -// handler. On L2s where reorgs are near-zero, the latency trade-off may not be worth it, -// so this should be configurable per chain. +// ensureBlockTimestamp returns eventLog with BlockTimestamp guaranteed non-zero. +// +// Most EVM chains and providers populate BlockTimestamp in the JSON-RPC response, +// in which case eventLog is returned unchanged. For chains/providers that do NOT +// populate it (notably Avalanche C-Chain via ava-labs/libevm, and older BSC +// dataseed nodes), this method fetches the block header via HeaderByHash and +// populates the field on the local-stack copy of types.Log. +// +// Single-entry cache (lastBlockHash) elides repeat fetches for consecutive events +// from the same block — the only relevant case because the listener delivers events +// in block order. +// +// On HeaderByHash failure, returns the original eventLog and the error. Callers +// decide whether to fall back to the gate (which is the conservative behavior; +// see live-path and routeHistoricalEvent below). +func (l *Listener) ensureBlockTimestamp(ctx context.Context, eventLog types.Log) (types.Log, error) { + if eventLog.BlockTimestamp != 0 { + return eventLog, nil + } + + if eventLog.BlockHash == l.lastBlockHash && !l.lastBlockTimestamp.IsZero() { + eventLog.BlockTimestamp = uint64(l.lastBlockTimestamp.Unix()) + return eventLog, nil + } + + headerCtx, cancel := context.WithTimeout(ctx, rpcRequestTimeout) + defer cancel() + header, err := l.client.HeaderByHash(headerCtx, eventLog.BlockHash) + if err != nil { + return eventLog, err + } + + blockTime := time.Unix(int64(header.Time), 0) + l.lastBlockHash = eventLog.BlockHash + l.lastBlockTimestamp = blockTime + eventLog.BlockTimestamp = header.Time + return eventLog, nil +} + +// routeHistoricalEvent chooses the handler for a Phase 1 event based on the age of +// its block. Events whose block timestamp is older than confirmationDelay are routed +// to handleHistoricalEvent (they are past the reorg window and safe to forward +// directly). Recent events — whose blocks may still be reorged — are routed to +// handleEvent so they pass through the gate. When confirmationDelay is zero, every +// event is routed to handleHistoricalEvent. +// +// Reads eventLog.BlockTimestamp directly — callers are expected to have invoked +// ensureBlockTimestamp first. Defense-in-depth: if BlockTimestamp is zero (caller +// failed to ensure it), route through handleEvent (the gate) as the conservative +// choice. +func (l *Listener) routeHistoricalEvent(eventLog types.Log) HandleEvent { + if l.confirmationDelay == 0 { + return l.handleHistoricalEvent + } + + if eventLog.BlockTimestamp == 0 { + return l.handleEvent + } + + blockTime := time.Unix(int64(eventLog.BlockTimestamp), 0) + if time.Since(blockTime) < l.confirmationDelay { + return l.handleEvent + } + return l.handleHistoricalEvent +} diff --git a/pkg/blockchain/evm/listener_test.go b/pkg/blockchain/evm/listener_test.go index 8b339dc36..57305e67f 100644 --- a/pkg/blockchain/evm/listener_test.go +++ b/pkg/blockchain/evm/listener_test.go @@ -45,7 +45,7 @@ func TestNewListener(t *testing.T) { addr := common.HexToAddress("0x123") eventGetter := new(MockContractEventGetter) - l := NewListener(addr, mockClient, 1, 100, logger, nil, eventGetter) + l := NewListener(addr, mockClient, 1, 100, 0, logger, nil, nil, eventGetter) require.NotNil(t, l) assert.Equal(t, addr, l.contractAddress) assert.Equal(t, uint64(1), l.blockchainID) @@ -59,7 +59,8 @@ func TestListener_Listen_CurrentEvents(t *testing.T) { addr := common.HexToAddress("0x123") eventGetter := new(MockContractEventGetter) - eventGetter.On("GetLatestContractEventBlockNumber", addr.String(), uint64(1)).Return(uint64(0), nil) + // No stored events → findCommonAncestor returns 0 immediately (genesis). + eventGetter.On("GetLatestContractEventBlockHashAndNumber", addr.String(), uint64(1)).Return(uint64(0), "", nil) ctx, cancel := context.WithCancel(context.Background()) t.Cleanup(cancel) @@ -72,7 +73,7 @@ func TestListener_Listen_CurrentEvents(t *testing.T) { return nil } - listener := NewListener(addr, mockClient, 1, 100, logger, handleEvent, eventGetter) + listener := NewListener(addr, mockClient, 1, 100, 0, logger, handleEvent, handleEvent, eventGetter) // Mock SubscribeFilterLogs sub := &MockSubscription{ @@ -80,17 +81,17 @@ func TestListener_Listen_CurrentEvents(t *testing.T) { unsub: func() {}, } - // Mock SubscribeFilterLogs: send a log immediately + // Mock SubscribeFilterLogs: send a log immediately. BlockTimestamp is set so + // the listener's ensureBlockTimestamp short-circuits and does not call HeaderByHash. mockClient.On("SubscribeFilterLogs", mock.Anything, mock.Anything, mock.Anything). Run(func(args mock.Arguments) { ch := args.Get(2).(chan<- types.Log) - // Send a log immediately - ch <- types.Log{BlockNumber: 10, Index: 1} + ch <- types.Log{BlockNumber: 10, Index: 1, BlockTimestamp: uint64(time.Now().Unix())} }). Return(sub, nil) - // The first current event will trigger IsContractEventPresent check - eventGetter.On("IsContractEventPresent", uint64(1), uint64(10), mock.Anything, uint32(1)).Return(false, nil) + // The first current event will trigger IsContractEventProcessed check + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(1), uint64(1)).Return(false, nil) go listener.Listen(ctx, func(err error) {}) @@ -109,7 +110,7 @@ func TestListener_ReconcileBlockRange(t *testing.T) { addr := common.HexToAddress("0x123") eventGetter := new(MockContractEventGetter) - listener := NewListener(addr, mockClient, 1, 10, logger, nil, eventGetter) + listener := NewListener(addr, mockClient, 1, 10, 0, logger, nil, nil, eventGetter) // Setup FilterLogs mock // We expect a range fetch. start=100, step=10 -> end=110. current=120. @@ -154,13 +155,17 @@ func TestListener_Listen_HistoricalAndCurrent(t *testing.T) { logger := log.NewNoopLogger() addr := common.HexToAddress("0x123") - // Start from block 100 + // Start from block 100, canonical: the reconciler will compute its hash via HeaderByNumber(100) + // and compare against the stored hash. We construct a deterministic Header so we can pre-compute + // the hash and feed it back as the stored value. + canonicalAt100 := &types.Header{Number: big.NewInt(100), Difficulty: big.NewInt(1)} + blockHash100 := canonicalAt100.Hash() eventGetter := new(MockContractEventGetter) - eventGetter.On("GetLatestContractEventBlockNumber", addr.String(), uint64(1)).Return(uint64(100), nil) + eventGetter.On("GetLatestContractEventBlockHashAndNumber", addr.String(), uint64(1)).Return(uint64(100), blockHash100.Hex(), nil) // Historical event at block 105 is not present - eventGetter.On("IsContractEventPresent", uint64(1), uint64(105), mock.Anything, uint32(0)).Return(false, nil) + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil) // Current event at block 111 — after historical is done, first current event triggers check - eventGetter.On("IsContractEventPresent", uint64(1), uint64(111), mock.Anything, uint32(0)).Return(false, nil) + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil) ctx, cancel := context.WithCancel(context.Background()) t.Cleanup(cancel) @@ -182,14 +187,20 @@ func TestListener_Listen_HistoricalAndCurrent(t *testing.T) { return nil } - listener := NewListener(addr, mockClient, 1, 10, logger, handleEvent, eventGetter) + listener := NewListener(addr, mockClient, 1, 10, 0, logger, handleEvent, handleEvent, eventGetter) - // Mock HeaderByNumber (current tip is 110) + // findCommonAncestor: HeaderByNumber(100) returns the same header we hashed above, + // so the stored hash matches and block 100 is confirmed canonical. + mockClient.On("HeaderByNumber", mock.Anything, mock.MatchedBy(func(n *big.Int) bool { + return n != nil && n.Cmp(big.NewInt(100)) == 0 + })).Return(canonicalAt100, nil) + + // Mock HeaderByNumber(nil) for the chain-tip lookup (current tip is 110). currentHeader := &types.Header{Number: big.NewInt(110)} mockClient.On("HeaderByNumber", mock.Anything, (*big.Int)(nil)).Return(currentHeader, nil) - // Mock FilterLogs (100-110) - histLogs := []types.Log{{BlockNumber: 105, Index: 0}} + // Mock FilterLogs (100-110). BlockTimestamp is set so ensureBlockTimestamp short-circuits. + histLogs := []types.Log{{BlockNumber: 105, Index: 0, BlockTimestamp: uint64(time.Now().Unix())}} mockClient.On("FilterLogs", mock.Anything, mock.Anything).Return(histLogs, nil) // Mock SubscribeFilterLogs @@ -197,8 +208,7 @@ func TestListener_Listen_HistoricalAndCurrent(t *testing.T) { mockClient.On("SubscribeFilterLogs", mock.Anything, mock.Anything, mock.Anything). Run(func(args mock.Arguments) { ch := args.Get(2).(chan<- types.Log) - // Send a current log - ch <- types.Log{BlockNumber: 111, Index: 0} + ch <- types.Log{BlockNumber: 111, Index: 0, BlockTimestamp: uint64(time.Now().Unix())} }). Return(sub, nil) @@ -224,22 +234,24 @@ func TestProcessEvents_DedupSkipsPresent(t *testing.T) { return nil } - listener := NewListener(addr, new(MockEVMClient), 1, 10, logger, handleEvent, eventGetter) + listener := NewListener(addr, new(MockEVMClient), 1, 10, 0, logger, handleEvent, handleEvent, eventGetter) // Historical: 3 events. First 2 are present (skipped), 3rd is not (handled). - // After the 3rd, the check should stop — no IsContractEventPresent call for events 4+. + // After the 3rd, the check should stop — no IsContractEventProcessed call for events 4+. + // BlockTimestamp is set so ensureBlockTimestamp short-circuits. + ts := uint64(time.Now().Unix()) historicalCh := make(chan types.Log, 5) - historicalCh <- types.Log{BlockNumber: 100, Index: 0, TxHash: common.HexToHash("0xaa")} - historicalCh <- types.Log{BlockNumber: 101, Index: 0, TxHash: common.HexToHash("0xbb")} - historicalCh <- types.Log{BlockNumber: 102, Index: 0, TxHash: common.HexToHash("0xcc")} - historicalCh <- types.Log{BlockNumber: 103, Index: 0, TxHash: common.HexToHash("0xdd")} - historicalCh <- types.Log{BlockNumber: 104, Index: 0, TxHash: common.HexToHash("0xee")} + historicalCh <- types.Log{BlockNumber: 100, Index: 0, TxHash: common.HexToHash("0xaa"), BlockTimestamp: ts} + historicalCh <- types.Log{BlockNumber: 101, Index: 0, TxHash: common.HexToHash("0xbb"), BlockTimestamp: ts} + historicalCh <- types.Log{BlockNumber: 102, Index: 0, TxHash: common.HexToHash("0xcc"), BlockTimestamp: ts} + historicalCh <- types.Log{BlockNumber: 103, Index: 0, TxHash: common.HexToHash("0xdd"), BlockTimestamp: ts} + historicalCh <- types.Log{BlockNumber: 104, Index: 0, TxHash: common.HexToHash("0xee"), BlockTimestamp: ts} close(historicalCh) // First two are present, third is not - eventGetter.On("IsContractEventPresent", uint64(1), uint64(100), mock.Anything, uint32(0)).Return(true, nil).Once() - eventGetter.On("IsContractEventPresent", uint64(1), uint64(101), mock.Anything, uint32(0)).Return(true, nil).Once() - eventGetter.On("IsContractEventPresent", uint64(1), uint64(102), mock.Anything, uint32(0)).Return(false, nil).Once() + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(true, nil).Once() + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(true, nil).Once() + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil).Once() // No mock for 103, 104 — if called, mock will panic, proving the check stopped sub := &MockSubscription{errChan: make(chan error)} @@ -275,13 +287,14 @@ func TestProcessEvents_SubscriptionErrorDuringPhase1(t *testing.T) { return nil } - listener := NewListener(addr, new(MockEVMClient), 1, 10, logger, handleEvent, eventGetter) + listener := NewListener(addr, new(MockEVMClient), 1, 10, 0, logger, handleEvent, handleEvent, eventGetter) - // Historical channel with events that will block (not closed yet) + // Historical channel with events that will block (not closed yet). BlockTimestamp + // is set so ensureBlockTimestamp short-circuits. historicalCh := make(chan types.Log, 2) - historicalCh <- types.Log{BlockNumber: 100, Index: 0, TxHash: common.HexToHash("0xaa")} + historicalCh <- types.Log{BlockNumber: 100, Index: 0, TxHash: common.HexToHash("0xaa"), BlockTimestamp: uint64(time.Now().Unix())} - eventGetter.On("IsContractEventPresent", uint64(1), uint64(100), mock.Anything, uint32(0)).Return(false, nil) + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil) // Subscription that will error shortly subErrCh := make(chan error, 1) @@ -303,6 +316,301 @@ func TestProcessEvents_SubscriptionErrorDuringPhase1(t *testing.T) { assert.Equal(t, []uint64{100}, handledBlocks) } +// TestListener_PhaseHandlerRouting verifies the age-based routing of Phase 1 events: +// - Historical events older than confirmationDelay → handleHistoricalEvent (direct, gate bypass) +// - Historical events younger than confirmationDelay → handleEvent (through gate; still in reorg window) +// - Live (Phase 2) events → handleEvent (always) +// - HeaderByHash fetch failures → handleEvent (conservative fallback) +// +// See nitronode/docs/reorg-fix.md §4.4 step 5. +func TestListener_PhaseHandlerRouting(t *testing.T) { + t.Parallel() + logger := log.NewNoopLogger() + addr := common.HexToAddress("0x123") + confirmationDelay := 60 * time.Second + + mockClient := new(MockEVMClient) + eventGetter := new(MockContractEventGetter) + + var ( + mu sync.Mutex + historicalLogs []types.Log + liveLogs []types.Log + ) + historicalHandler := func(_ context.Context, l types.Log) error { + mu.Lock() + defer mu.Unlock() + historicalLogs = append(historicalLogs, l) + return nil + } + liveHandler := func(_ context.Context, l types.Log) error { + mu.Lock() + defer mu.Unlock() + liveLogs = append(liveLogs, l) + return nil + } + + listener := NewListener(addr, mockClient, 1, 10, confirmationDelay, logger, liveHandler, historicalHandler, eventGetter) + + // Old historical event (block timestamp 10 minutes ago) — should bypass the gate. + oldHash := common.HexToHash("0xa1") + oldLog := types.Log{BlockNumber: 100, Index: 0, TxHash: common.HexToHash("0xaaa"), BlockHash: oldHash} + oldHeader := &types.Header{Number: big.NewInt(100), Time: uint64(time.Now().Add(-10 * time.Minute).Unix())} + mockClient.On("HeaderByHash", mock.Anything, oldHash).Return(oldHeader, nil).Once() + + // Recent historical event (block timestamp 5 seconds ago) — should flow through the gate. + recentHash := common.HexToHash("0xa2") + recentLog := types.Log{BlockNumber: 101, Index: 0, TxHash: common.HexToHash("0xbbb"), BlockHash: recentHash} + recentHeader := &types.Header{Number: big.NewInt(101), Time: uint64(time.Now().Add(-5 * time.Second).Unix())} + mockClient.On("HeaderByHash", mock.Anything, recentHash).Return(recentHeader, nil).Once() + + // Historical event whose HeaderByHash fetch fails — should fall back to the gate. + failHash := common.HexToHash("0xa3") + failLog := types.Log{BlockNumber: 102, Index: 0, TxHash: common.HexToHash("0xccc"), BlockHash: failHash} + mockClient.On("HeaderByHash", mock.Anything, failHash).Return(nil, fmt.Errorf("rpc failure")).Once() + + // Live event — always to liveHandler regardless of age. BlockTimestamp is set + // so ensureBlockTimestamp short-circuits on the Phase 2 path (avoiding a + // HeaderByHash call we'd otherwise have to mock). + currentLog := types.Log{BlockNumber: 200, Index: 0, TxHash: common.HexToHash("0xddd"), BlockHash: common.HexToHash("0xb1"), BlockTimestamp: uint64(time.Now().Unix())} + + historicalCh := make(chan types.Log, 3) + historicalCh <- oldLog + historicalCh <- recentLog + historicalCh <- failLog + close(historicalCh) + + currentCh := make(chan types.Log, 1) + currentCh <- currentLog + + // Only the first historical event triggers IsContractEventProcessed (then the check is dropped for the phase); + // the first live event triggers it again for Phase 2. + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil).Once() + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil).Once() + + sub := &MockSubscription{errChan: make(chan error, 1), unsub: func() {}} + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(100 * time.Millisecond) + cancel() + }() + + var lastBlock uint64 + err := listener.processEvents(ctx, sub, historicalCh, currentCh, &lastBlock) + require.NoError(t, err) + + mu.Lock() + defer mu.Unlock() + require.Len(t, historicalLogs, 1, "only the old historical event should bypass the gate") + assert.Equal(t, uint64(100), historicalLogs[0].BlockNumber) + require.Len(t, liveLogs, 3, "recent + fallback historical events plus the live event must reach the live handler") + assert.Equal(t, uint64(101), liveLogs[0].BlockNumber, "recent historical event routed through the gate") + assert.Equal(t, uint64(102), liveLogs[1].BlockNumber, "HeaderByHash-failed historical event routed through the gate (conservative fallback)") + assert.Equal(t, uint64(200), liveLogs[2].BlockNumber, "live event always routed to the gate") + + mockClient.AssertExpectations(t) + eventGetter.AssertExpectations(t) +} + +// TestListener_PhaseHandlerRouting_DelayZero verifies that when confirmationDelay is 0, +// every historical event is routed to handleHistoricalEvent without any HeaderByHash +// fetch — preserving the legacy bypass for gate-disabled chains. +func TestListener_PhaseHandlerRouting_DelayZero(t *testing.T) { + t.Parallel() + logger := log.NewNoopLogger() + addr := common.HexToAddress("0x123") + + mockClient := new(MockEVMClient) + eventGetter := new(MockContractEventGetter) + + var ( + mu sync.Mutex + historicalLogs []types.Log + ) + historicalHandler := func(_ context.Context, l types.Log) error { + mu.Lock() + defer mu.Unlock() + historicalLogs = append(historicalLogs, l) + return nil + } + liveHandler := func(_ context.Context, _ types.Log) error { + t.Fatal("live handler must not be called when delay is 0 and only Phase 1 events are present") + return nil + } + + listener := NewListener(addr, mockClient, 1, 10, 0, logger, liveHandler, historicalHandler, eventGetter) + + // BlockTimestamp populated by the upstream RPC — ensureBlockTimestamp short-circuits + // and routeHistoricalEvent routes directly to historicalHandler because delay == 0. + histLog := types.Log{BlockNumber: 100, Index: 0, TxHash: common.HexToHash("0xaaa"), BlockHash: common.HexToHash("0xa1"), BlockTimestamp: uint64(time.Now().Unix())} + historicalCh := make(chan types.Log, 1) + historicalCh <- histLog + close(historicalCh) + currentCh := make(chan types.Log) + + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil).Once() + + sub := &MockSubscription{errChan: make(chan error, 1), unsub: func() {}} + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + var lastBlock uint64 + err := listener.processEvents(ctx, sub, historicalCh, currentCh, &lastBlock) + require.NoError(t, err) + + mu.Lock() + defer mu.Unlock() + require.Len(t, historicalLogs, 1) + assert.Equal(t, uint64(100), historicalLogs[0].BlockNumber) + + // HeaderByHash must NOT have been called — the upstream RPC populated BlockTimestamp, + // so ensureBlockTimestamp short-circuits. + mockClient.AssertNotCalled(t, "HeaderByHash") +} + +func TestListener_RemovedLog_ForwardedToHandler(t *testing.T) { + t.Parallel() + + t.Run("WithGate", func(t *testing.T) { + t.Parallel() + logger := log.NewNoopLogger() + addr := common.HexToAddress("0x123") + eventGetter := new(MockContractEventGetter) + + // Track which logs reached handleEvent. + var handledLogs []types.Log + handleEvent := func(ctx context.Context, eventLog types.Log) error { + handledLogs = append(handledLogs, eventLog) + return nil + } + + // confirmationDelay > 0: the gate is active; Removed=true logs MUST be forwarded. + const delay = 30 * time.Second + listener := NewListener(addr, new(MockEVMClient), 1, 10, delay, logger, handleEvent, handleEvent, eventGetter) + + // No historical events. + historicalCh := make(chan types.Log) + close(historicalCh) + + currentCh := make(chan types.Log, 2) + + // Event 1: non-Removed at block 10 — triggers IsContractEventProcessed check, + // advances lastBlock, sets currentCheckDone = true. BlockTimestamp is set so + // ensureBlockTimestamp short-circuits. + normalLog := types.Log{BlockNumber: 10, Index: 0, TxHash: common.HexToHash("0xabc"), BlockTimestamp: uint64(time.Now().Unix())} + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil).Once() + + // Event 2: Removed=true at block 11 — must NOT advance lastBlock, must NOT call + // IsContractEventProcessed, but MUST reach handleEvent (gate needs the removal signal). + removedLog := types.Log{BlockNumber: 11, Index: 0, TxHash: common.HexToHash("0xdef"), Removed: true} + + currentCh <- normalLog + currentCh <- removedLog + + sub := &MockSubscription{errChan: make(chan error, 1), unsub: func() {}} + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + // Give processEvents enough time to drain both buffered events, then cancel. + time.Sleep(100 * time.Millisecond) + cancel() + }() + + var lastBlock uint64 + err := listener.processEvents(ctx, sub, historicalCh, currentCh, &lastBlock) + require.NoError(t, err) + + // Both events must have reached handleEvent. + require.Len(t, handledLogs, 2, "handleEvent must be called for both the normal and the Removed event when gate is active") + + // Verify first call was the normal log and second was the removed log. + assert.Equal(t, uint64(10), handledLogs[0].BlockNumber) + assert.False(t, handledLogs[0].Removed) + assert.Equal(t, uint64(11), handledLogs[1].BlockNumber) + assert.True(t, handledLogs[1].Removed) + + // lastBlock must NOT have advanced past the normal event's block. + assert.Equal(t, uint64(10), lastBlock, "lastBlock must not be advanced by a Removed=true event") + + // IsContractEventProcessed must have been called exactly once (for the normal log only). + eventGetter.AssertNumberOfCalls(t, "IsContractEventProcessed", 1) + eventGetter.AssertExpectations(t) + }) + + t.Run("NoGate", func(t *testing.T) { + t.Parallel() + logger := log.NewNoopLogger() + addr := common.HexToAddress("0x123") + eventGetter := new(MockContractEventGetter) + + // Track which logs reached handleEvent. + var handledLogs []types.Log + handleEvent := func(ctx context.Context, eventLog types.Log) error { + handledLogs = append(handledLogs, eventLog) + return nil + } + + // confirmationDelay == 0: no gate; Removed=true logs must be dropped at Phase 2 boundary. + listener := NewListener(addr, new(MockEVMClient), 1, 10, 0, logger, handleEvent, handleEvent, eventGetter) + + // No historical events. + historicalCh := make(chan types.Log) + close(historicalCh) + + currentCh := make(chan types.Log, 3) + + // Event 1: non-Removed at block 10 — advances lastBlock, triggers dedup check. + // BlockTimestamp is set so ensureBlockTimestamp short-circuits. + normalLog := types.Log{BlockNumber: 10, Index: 0, TxHash: common.HexToHash("0xabc"), BlockTimestamp: uint64(time.Now().Unix())} + eventGetter.On("IsContractEventProcessed", mock.Anything, uint32(0), uint64(1)).Return(false, nil).Once() + + // Event 2: Removed=true at block 11 — must be dropped; must NOT reach handleEvent, + // must NOT advance lastBlock. + removedLog := types.Log{BlockNumber: 11, Index: 0, TxHash: common.HexToHash("0xdef"), Removed: true} + + // Event 3: another non-Removed at block 12 — must flow normally after the dropped removal. + // BlockTimestamp is set so ensureBlockTimestamp short-circuits. + followupLog := types.Log{BlockNumber: 12, Index: 1, TxHash: common.HexToHash("0xghi"), BlockTimestamp: uint64(time.Now().Unix())} + + currentCh <- normalLog + currentCh <- removedLog + currentCh <- followupLog + + sub := &MockSubscription{errChan: make(chan error, 1), unsub: func() {}} + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + // Give processEvents enough time to drain all three buffered events, then cancel. + time.Sleep(100 * time.Millisecond) + cancel() + }() + + var lastBlock uint64 + err := listener.processEvents(ctx, sub, historicalCh, currentCh, &lastBlock) + require.NoError(t, err) + + // Only the two non-Removed events must have reached handleEvent. + require.Len(t, handledLogs, 2, "handleEvent must NOT be called for Removed=true when no gate is active") + assert.Equal(t, uint64(10), handledLogs[0].BlockNumber) + assert.False(t, handledLogs[0].Removed) + assert.Equal(t, uint64(12), handledLogs[1].BlockNumber) + assert.False(t, handledLogs[1].Removed) + + // lastBlock must reflect the last non-Removed event, not the removed one. + assert.Equal(t, uint64(12), lastBlock, "lastBlock must not be advanced by a Removed=true event") + + // IsContractEventProcessed must have been called exactly once (for the first normal log only; + // the follow-up log skips the check because currentCheckDone is already true). + eventGetter.AssertNumberOfCalls(t, "IsContractEventProcessed", 1) + eventGetter.AssertExpectations(t) + }) +} + func TestReconcileBlockRange_ContextCancellation(t *testing.T) { t.Parallel() mockClient := new(MockEVMClient) @@ -310,7 +618,7 @@ func TestReconcileBlockRange_ContextCancellation(t *testing.T) { addr := common.HexToAddress("0x123") eventGetter := new(MockContractEventGetter) - listener := NewListener(addr, mockClient, 1, 10, logger, nil, eventGetter) + listener := NewListener(addr, mockClient, 1, 10, 0, logger, nil, nil, eventGetter) ctx, cancel := context.WithCancel(context.Background()) @@ -338,3 +646,113 @@ func TestReconcileBlockRange_ContextCancellation(t *testing.T) { assert.LessOrEqual(t, len(received), 1) mockClient.AssertNumberOfCalls(t, "FilterLogs", 1) } + +// TestEnsureBlockTimestamp_Populated: when BlockTimestamp is already set on the +// incoming log, ensureBlockTimestamp returns the log unchanged and does not call +// HeaderByHash. We prove the latter by leaving the mock unconfigured — any call +// would panic. +func TestEnsureBlockTimestamp_Populated(t *testing.T) { + t.Parallel() + mockClient := new(MockEVMClient) + logger := log.NewNoopLogger() + addr := common.HexToAddress("0x123") + eventGetter := new(MockContractEventGetter) + + listener := NewListener(addr, mockClient, 1, 10, 0, logger, nil, nil, eventGetter) + + originalTs := uint64(1700000000) + eventLog := types.Log{ + BlockNumber: 100, + BlockHash: common.HexToHash("0xabc"), + BlockTimestamp: originalTs, + } + + got, err := listener.ensureBlockTimestamp(context.Background(), eventLog) + require.NoError(t, err) + assert.Equal(t, originalTs, got.BlockTimestamp, "BlockTimestamp must be returned unchanged") + assert.Equal(t, eventLog.BlockHash, got.BlockHash) + mockClient.AssertNotCalled(t, "HeaderByHash") +} + +// TestEnsureBlockTimestamp_Fetch: when BlockTimestamp == 0, ensureBlockTimestamp +// calls HeaderByHash exactly once and populates BlockTimestamp from header.Time. +func TestEnsureBlockTimestamp_Fetch(t *testing.T) { + t.Parallel() + mockClient := new(MockEVMClient) + logger := log.NewNoopLogger() + addr := common.HexToAddress("0x123") + eventGetter := new(MockContractEventGetter) + + listener := NewListener(addr, mockClient, 1, 10, 0, logger, nil, nil, eventGetter) + + bh := common.HexToHash("0xabc") + headerTime := uint64(1700000000) + header := &types.Header{Number: big.NewInt(100), Time: headerTime} + mockClient.On("HeaderByHash", mock.Anything, bh).Return(header, nil).Once() + + eventLog := types.Log{BlockNumber: 100, BlockHash: bh} + + got, err := listener.ensureBlockTimestamp(context.Background(), eventLog) + require.NoError(t, err) + assert.Equal(t, headerTime, got.BlockTimestamp, "BlockTimestamp must be populated from header.Time") + mockClient.AssertExpectations(t) +} + +// TestEnsureBlockTimestamp_CacheHit: two consecutive events with the same BlockHash +// (both with BlockTimestamp == 0) must trigger exactly one HeaderByHash call. The +// second call reads from the single-entry cache. +func TestEnsureBlockTimestamp_CacheHit(t *testing.T) { + t.Parallel() + mockClient := new(MockEVMClient) + logger := log.NewNoopLogger() + addr := common.HexToAddress("0x123") + eventGetter := new(MockContractEventGetter) + + listener := NewListener(addr, mockClient, 1, 10, 0, logger, nil, nil, eventGetter) + + bh := common.HexToHash("0xabc") + headerTime := uint64(1700000000) + header := &types.Header{Number: big.NewInt(100), Time: headerTime} + // Set up exactly ONE HeaderByHash expectation; a second call would fail + // AssertExpectations because the mock is .Once(). + mockClient.On("HeaderByHash", mock.Anything, bh).Return(header, nil).Once() + + first := types.Log{BlockNumber: 100, BlockHash: bh, Index: 0} + second := types.Log{BlockNumber: 100, BlockHash: bh, Index: 1} + + got1, err := listener.ensureBlockTimestamp(context.Background(), first) + require.NoError(t, err) + assert.Equal(t, headerTime, got1.BlockTimestamp) + + got2, err := listener.ensureBlockTimestamp(context.Background(), second) + require.NoError(t, err) + assert.Equal(t, headerTime, got2.BlockTimestamp) + + mockClient.AssertNumberOfCalls(t, "HeaderByHash", 1) + mockClient.AssertExpectations(t) +} + +// TestEnsureBlockTimestamp_FetchError: when HeaderByHash returns an error, +// ensureBlockTimestamp returns the original (unmutated) eventLog and the error. +// The caller decides whether to fall back to the gate. +func TestEnsureBlockTimestamp_FetchError(t *testing.T) { + t.Parallel() + mockClient := new(MockEVMClient) + logger := log.NewNoopLogger() + addr := common.HexToAddress("0x123") + eventGetter := new(MockContractEventGetter) + + listener := NewListener(addr, mockClient, 1, 10, 0, logger, nil, nil, eventGetter) + + bh := common.HexToHash("0xabc") + mockClient.On("HeaderByHash", mock.Anything, bh).Return(nil, fmt.Errorf("rpc failure")).Once() + + eventLog := types.Log{BlockNumber: 100, BlockHash: bh} + + got, err := listener.ensureBlockTimestamp(context.Background(), eventLog) + require.Error(t, err) + // On error, BlockTimestamp remains at the input value (0). + assert.Equal(t, uint64(0), got.BlockTimestamp) + assert.Equal(t, bh, got.BlockHash) + mockClient.AssertExpectations(t) +} diff --git a/pkg/blockchain/evm/mock_test.go b/pkg/blockchain/evm/mock_test.go index b96784f80..b81388a8f 100644 --- a/pkg/blockchain/evm/mock_test.go +++ b/pkg/blockchain/evm/mock_test.go @@ -120,6 +120,14 @@ func (m *MockEVMClient) SubscribeFilterLogs(ctx context.Context, query ethereum. return args.Get(0).(ethereum.Subscription), args.Error(1) } +func (m *MockEVMClient) HeaderByHash(ctx context.Context, hash common.Hash) (*types.Header, error) { + args := m.Called(ctx, hash) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*types.Header), args.Error(1) +} + // MockContractEventGetter implements ContractEventGetter interface type MockContractEventGetter struct { mock.Mock @@ -130,11 +138,21 @@ func (m *MockContractEventGetter) GetLatestContractEventBlockNumber(contractAddr return args.Get(0).(uint64), args.Error(1) } -func (m *MockContractEventGetter) IsContractEventPresent(blockchainID, blockNumber uint64, txHash string, logIndex uint32) (bool, error) { - args := m.Called(blockchainID, blockNumber, txHash, logIndex) +func (m *MockContractEventGetter) IsContractEventProcessed(txHash string, logIndex uint32, blockchainID uint64) (bool, error) { + args := m.Called(txHash, logIndex, blockchainID) return args.Bool(0), args.Error(1) } +func (m *MockContractEventGetter) GetLatestContractEventBlockHashAndNumber(contractAddress string, blockchainID uint64) (uint64, string, error) { + args := m.Called(contractAddress, blockchainID) + return args.Get(0).(uint64), args.String(1), args.Error(2) +} + +func (m *MockContractEventGetter) GetPreviousDistinctBlockHash(contractAddress string, blockchainID uint64, belowBlockNumber uint64) (uint64, string, error) { + args := m.Called(contractAddress, blockchainID, belowBlockNumber) + return args.Get(0).(uint64), args.String(1), args.Error(2) +} + // MockAssetStore implements AssetStore interface type MockAssetStore struct { mock.Mock diff --git a/pkg/blockchain/evm/reconciler.go b/pkg/blockchain/evm/reconciler.go new file mode 100644 index 000000000..aedbc80bf --- /dev/null +++ b/pkg/blockchain/evm/reconciler.go @@ -0,0 +1,128 @@ +package evm + +import ( + "context" + "errors" + "fmt" + "math/big" + + "github.com/ethereum/go-ethereum" + "github.com/ethereum/go-ethereum/common" + "github.com/layer-3/nitrolite/pkg/log" +) + +// findCommonAncestor determines the last block in the canonical chain that the +// node has already processed. It walks stored block hashes backward until it +// finds a stored hash that matches the canonical chain's hash at that height, +// then returns that block number as the safe replay start point. +// +// Returns 0 only when no stored events exist (empty store). When every stored +// block has been reorged out but a latest row exists, returns that row's block +// number so the caller can replay canonical logs from that height via eth_getLogs. +func findCommonAncestor( + ctx context.Context, + client EVMClient, + getter ContractEventGetter, + contractAddress string, + blockchainID uint64, + logger log.Logger, +) (uint64, error) { + latestNum, latestHash, err := getter.GetLatestContractEventBlockHashAndNumber(contractAddress, blockchainID) + if err != nil { + return 0, fmt.Errorf("get latest contract event block hash: %w", err) + } + if latestHash == "" { + // No stored events (latestNum=0) or pre-migration row with no hash (latestNum>0). + // Either way, treat latestNum as the safe canonical resume point. + return latestNum, nil + } + + blockNum, blockHash := latestNum, latestHash + + for { + if ctx.Err() != nil { + return 0, ctx.Err() + } + + canonical, err := isStoredBlockCanonical(ctx, client, blockNum, common.HexToHash(blockHash)) + if err != nil { + return 0, fmt.Errorf("check canonicality of block %d (%s): %w", blockNum, blockHash, err) + } + + if canonical { + logger.Info("reconciliation: found common ancestor", + "blockchainID", blockchainID, + "blockNumber", blockNum, + "blockHash", blockHash, + ) + return blockNum, nil + } + + // Block was reorged out — walk to the next-older stored block. + logger.Info("reconciliation: block reorged, walking backward", + "blockchainID", blockchainID, + "blockNumber", blockNum, + "blockHash", blockHash, + ) + prevNum, prevHash, err := getter.GetPreviousDistinctBlockHash(contractAddress, blockchainID, blockNum) + if err != nil { + return 0, fmt.Errorf("get previous distinct block hash below %d: %w", blockNum, err) + } + if prevHash == "" { + if prevNum == 0 { + // All stored event blocks have been reorged out and no older stored + // row exists. Resume from the orphaned latest stored block: eth_getLogs + // is a canonical-chain range query, so the canonical replacement logs + // between latestNum and the current tip will be re-fetched. The orphaned + // hash is irrelevant — only the height drives the range query. + logger.Info("reconciliation: all stored blocks reorged, resuming from orphaned latest", + "blockchainID", blockchainID, + "blockNumber", latestNum, + ) + return latestNum, nil + } + // Pre-migration row mid-walk (prevNum > 0, no hash recorded): trust it. + logger.Info("reconciliation: reached pre-migration boundary", + "blockchainID", blockchainID, + "blockNumber", prevNum, + ) + return prevNum, nil + } + + blockNum = prevNum + blockHash = prevHash + } +} + +// isStoredBlockCanonical reports whether the block currently occupying blockNum +// in the canonical chain has the given storedHash. It uses HeaderByNumber rather +// than HeaderByHash because the two answer different questions: +// +// - HeaderByHash returns any header the node has indexed, including orphan +// side-chain headers still cached locally. A successful return does NOT prove +// the block is in the canonical chain. A reorged-out hash may also come back +// as ethereum.NotFound depending on the backend's pruning policy — +// conflating those two outcomes with a single boolean is unsafe. +// +// - HeaderByNumber returns the block currently occupying that height in the +// canonical chain. Comparing its hash to the stored hash is definitive: equal +// means the stored block is canonical, different means it has been reorged +// out. +// +// ethereum.NotFound from HeaderByNumber (e.g. the chain has pruned the height or +// has not yet produced a block at that height) is treated as "not canonical" +// rather than a fatal error, so the caller walks backward instead of crashing +// the listener on startup. +func isStoredBlockCanonical(ctx context.Context, client EVMClient, blockNum uint64, storedHash common.Hash) (bool, error) { + header, err := client.HeaderByNumber(ctx, new(big.Int).SetUint64(blockNum)) + if err != nil { + if errors.Is(err, ethereum.NotFound) { + return false, nil + } + return false, err + } + if header == nil { + return false, nil + } + return header.Hash() == storedHash, nil +} diff --git a/pkg/blockchain/evm/reconciler_test.go b/pkg/blockchain/evm/reconciler_test.go new file mode 100644 index 000000000..f9d709be6 --- /dev/null +++ b/pkg/blockchain/evm/reconciler_test.go @@ -0,0 +1,235 @@ +package evm + +import ( + "context" + "errors" + "math/big" + "testing" + + ethereum "github.com/ethereum/go-ethereum" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/layer-3/nitrolite/pkg/log" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +const ( + testContract = "0x1234567890abcdef1234567890abcdef12345678" + testBlockchainID = uint64(1) +) + +func newTestLogger() log.Logger { + return log.NewNoopLogger() +} + +// makeHeader builds a Header with a deterministic (and unique-per-seed) hash for +// the given block number. Two calls with different seeds produce headers whose +// Hash() values differ, which lets canonicality tests distinguish "this stored +// block is canonical" (same seed) from "this stored block was reorged out" +// (different seed at the same number). +func makeHeader(blockNum int64, seed int64) *types.Header { + return &types.Header{ + Number: big.NewInt(blockNum), + Difficulty: big.NewInt(seed), + } +} + +func bigEqual(want *big.Int) interface{} { + return mock.MatchedBy(func(got *big.Int) bool { return got != nil && got.Cmp(want) == 0 }) +} + +// TestFindCommonAncestor_NoStoredEvents verifies that when no contract events exist, +// findCommonAncestor returns 0 (genesis fallback). +func TestFindCommonAncestor_NoStoredEvents(t *testing.T) { + t.Parallel() + + client := new(MockEVMClient) + getter := new(MockContractEventGetter) + + getter.On("GetLatestContractEventBlockHashAndNumber", testContract, testBlockchainID). + Return(uint64(0), "", nil) + + result, err := findCommonAncestor(context.Background(), client, getter, testContract, testBlockchainID, newTestLogger()) + require.NoError(t, err) + assert.Equal(t, uint64(0), result) + client.AssertNotCalled(t, "HeaderByNumber") + client.AssertNotCalled(t, "HeaderByHash") +} + +// TestFindCommonAncestor_LatestBlockCanonical verifies that when the latest stored block +// is still canonical, findCommonAncestor returns that block number with no backward walk. +func TestFindCommonAncestor_LatestBlockCanonical(t *testing.T) { + t.Parallel() + + client := new(MockEVMClient) + getter := new(MockContractEventGetter) + + header := makeHeader(500, 1) + storedHash := header.Hash() + + getter.On("GetLatestContractEventBlockHashAndNumber", testContract, testBlockchainID). + Return(uint64(500), storedHash.Hex(), nil) + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(500))).Return(header, nil) + + result, err := findCommonAncestor(context.Background(), client, getter, testContract, testBlockchainID, newTestLogger()) + require.NoError(t, err) + assert.Equal(t, uint64(500), result) + getter.AssertNotCalled(t, "GetPreviousDistinctBlockHash") +} + +// TestFindCommonAncestor_SingleReorgDepth verifies that when the latest stored block has +// been reorged out (canonical chain has a different block at that height), findCommonAncestor +// walks back one step and returns the previous canonical block. +func TestFindCommonAncestor_SingleReorgDepth(t *testing.T) { + t.Parallel() + + client := new(MockEVMClient) + getter := new(MockContractEventGetter) + + // Block 200 was reorged: stored hash came from a now-orphan block; canonical chain + // has a different block at the same height. + storedAt200 := makeHeader(200, 1) + canonicalAt200 := makeHeader(200, 2) + require.NotEqual(t, storedAt200.Hash(), canonicalAt200.Hash()) + + // Block 190 is canonical. + headerAt190 := makeHeader(190, 1) + storedAt190 := headerAt190.Hash() + + getter.On("GetLatestContractEventBlockHashAndNumber", testContract, testBlockchainID). + Return(uint64(200), storedAt200.Hash().Hex(), nil) + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(200))).Return(canonicalAt200, nil) + getter.On("GetPreviousDistinctBlockHash", testContract, testBlockchainID, uint64(200)). + Return(uint64(190), storedAt190.Hex(), nil) + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(190))).Return(headerAt190, nil) + + result, err := findCommonAncestor(context.Background(), client, getter, testContract, testBlockchainID, newTestLogger()) + require.NoError(t, err) + assert.Equal(t, uint64(190), result) +} + +// TestFindCommonAncestor_NotFoundTreatedAsReorg verifies that when HeaderByNumber returns +// ethereum.NotFound (e.g. the RPC backend has pruned that height, or no canonical block +// exists at that number yet), the walk continues backward instead of crashing the listener. +// This is the regression the colleague flagged: the old HeaderByHash path treated NotFound +// as a fatal startup error. +func TestFindCommonAncestor_NotFoundTreatedAsReorg(t *testing.T) { + t.Parallel() + + client := new(MockEVMClient) + getter := new(MockContractEventGetter) + + storedAt200 := common.HexToHash("0xreorged200") + headerAt190 := makeHeader(190, 1) + + getter.On("GetLatestContractEventBlockHashAndNumber", testContract, testBlockchainID). + Return(uint64(200), storedAt200.Hex(), nil) + // HeaderByNumber(200) returns NotFound — must NOT be treated as fatal. + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(200))).Return(nil, ethereum.NotFound) + + getter.On("GetPreviousDistinctBlockHash", testContract, testBlockchainID, uint64(200)). + Return(uint64(190), headerAt190.Hash().Hex(), nil) + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(190))).Return(headerAt190, nil) + + result, err := findCommonAncestor(context.Background(), client, getter, testContract, testBlockchainID, newTestLogger()) + require.NoError(t, err) + assert.Equal(t, uint64(190), result) +} + +// TestFindCommonAncestor_AllStoredReorged_ResumesFromOrphanedLatest verifies that when all +// stored blocks have been reorged out (canonical hashes differ at every stored height) and +// no older stored row exists, findCommonAncestor returns the original latestBlockNum so the +// caller can replay canonical logs from that height via eth_getLogs. +func TestFindCommonAncestor_AllStoredReorged_ResumesFromOrphanedLatest(t *testing.T) { + t.Parallel() + + client := new(MockEVMClient) + getter := new(MockContractEventGetter) + + storedAt300 := makeHeader(300, 1).Hash() + storedAt200 := makeHeader(200, 1).Hash() + canonicalAt300 := makeHeader(300, 2) + canonicalAt200 := makeHeader(200, 2) + + getter.On("GetLatestContractEventBlockHashAndNumber", testContract, testBlockchainID). + Return(uint64(300), storedAt300.Hex(), nil) + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(300))).Return(canonicalAt300, nil) + getter.On("GetPreviousDistinctBlockHash", testContract, testBlockchainID, uint64(300)). + Return(uint64(200), storedAt200.Hex(), nil) + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(200))).Return(canonicalAt200, nil) + getter.On("GetPreviousDistinctBlockHash", testContract, testBlockchainID, uint64(200)). + Return(uint64(0), "", nil) + + result, err := findCommonAncestor(context.Background(), client, getter, testContract, testBlockchainID, newTestLogger()) + require.NoError(t, err) + // Returns the original latestBlockNum (300), not 0: the caller uses eth_getLogs from + // that height to re-fetch canonical replacement logs. + assert.Equal(t, uint64(300), result) +} + +// TestFindCommonAncestor_PreMigrationLatestRow verifies that when the latest stored row has +// an empty block_hash (pre-migration row), findCommonAncestor returns that block number +// without making any RPC call, treating the row as canonical. +func TestFindCommonAncestor_PreMigrationLatestRow(t *testing.T) { + t.Parallel() + + client := new(MockEVMClient) + getter := new(MockContractEventGetter) + + // blockNum=450 but blockHash="" — pre-migration row. + getter.On("GetLatestContractEventBlockHashAndNumber", testContract, testBlockchainID). + Return(uint64(450), "", nil) + + result, err := findCommonAncestor(context.Background(), client, getter, testContract, testBlockchainID, newTestLogger()) + require.NoError(t, err) + assert.Equal(t, uint64(450), result) + client.AssertNotCalled(t, "HeaderByNumber") +} + +// TestFindCommonAncestor_PreMigrationMidWalk verifies that when a pre-migration row (empty +// block_hash) is encountered during the backward walk, the walk stops and returns that +// block number rather than making an RPC call with a zero hash. +func TestFindCommonAncestor_PreMigrationMidWalk(t *testing.T) { + t.Parallel() + + client := new(MockEVMClient) + getter := new(MockContractEventGetter) + + storedAt300 := makeHeader(300, 1).Hash() + canonicalAt300 := makeHeader(300, 2) + + getter.On("GetLatestContractEventBlockHashAndNumber", testContract, testBlockchainID). + Return(uint64(300), storedAt300.Hex(), nil) + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(300))).Return(canonicalAt300, nil) + + // Walk backward hits a pre-migration row with empty hash at block 250. + getter.On("GetPreviousDistinctBlockHash", testContract, testBlockchainID, uint64(300)). + Return(uint64(250), "", nil) + + result, err := findCommonAncestor(context.Background(), client, getter, testContract, testBlockchainID, newTestLogger()) + require.NoError(t, err) + assert.Equal(t, uint64(250), result) + // HeaderByNumber must NOT be called for the zero-hash pre-migration row. + client.AssertNumberOfCalls(t, "HeaderByNumber", 1) +} + +// TestFindCommonAncestor_RPCError verifies that non-NotFound RPC errors are propagated. +func TestFindCommonAncestor_RPCError(t *testing.T) { + t.Parallel() + + client := new(MockEVMClient) + getter := new(MockContractEventGetter) + + blockHash := common.HexToHash("0xfailhash") + getter.On("GetLatestContractEventBlockHashAndNumber", testContract, testBlockchainID). + Return(uint64(100), blockHash.Hex(), nil) + + client.On("HeaderByNumber", mock.Anything, bigEqual(big.NewInt(100))). + Return(nil, errors.New("rpc timeout")) + + _, err := findCommonAncestor(context.Background(), client, getter, testContract, testBlockchainID, newTestLogger()) + require.Error(t, err) + assert.Contains(t, err.Error(), "rpc timeout") +} diff --git a/pkg/core/README.md b/pkg/core/README.md index 466b885df..ceb12aff4 100644 --- a/pkg/core/README.md +++ b/pkg/core/README.md @@ -52,7 +52,15 @@ The `Client` interface abstracts the communication with the `ChannelsHub` smart ### Listener Interface -The `Listener` allows applications to react to on-chain state changes by registering handlers for events like `HomeChannelCreatedEvent` or `EscrowDepositFinalizedEvent`. +The `Listener` exposes events via a **two-handler model**. A `liveHandler` receives live events plus any historical events still within the reorg window, while a `historicalEventHandler` receives mature historical events past the configured `confirmationDelay`. Per-event routing is decided by the listener itself: it compares `eventLog.BlockTimestamp` against `confirmationDelay` to choose which handler an event flows into. This makes the listener delay-aware rather than pushing that decision down to consumers. + +The typical `liveHandler` is the **`ConfirmationGate`**, which implements the reorg-protection window. The gate buffers each event for `confirmation_delay_secs` before forwarding it to the reactor; if the event's block is reorged out within that window, the gate silently drops it instead of committing it downstream. With the gate in place, the reactor only ever sees events whose blocks have survived the configured confirmation window. + +To make this work, the listener owns timestamp population. **`ensureBlockTimestamp`** guarantees `BlockTimestamp` is set on every non-removed event before it is forwarded: it uses `eventLog.BlockTimestamp` directly when present, and otherwise falls back to a cached `HeaderByHash` lookup. The gate relies on this to compute each event's `arrivedAt` correctly. **`Removed: true`** logs are handled exclusively at the listener boundary: in the live (Phase 2) path with a gate, removed logs are forwarded so the gate can cancel a pending timer; with no gate configured (`confirmation_delay_secs == 0`), the listener drops removed logs at Phase 2 and the reactor never sees them. Historical (Phase 1) replays use `eth_getLogs`, which never emits removals, so that path is simpler by construction. + +On startup, the listener reconciles against possible reorgs that happened while the node was down. **`findCommonAncestor`** walks stored block hashes backward to locate a still-canonical resume point. If every stored block has been reorged out, it returns the orphaned-latest height so `eth_getLogs` re-fetches canonical replacements from that range; the orphan hash itself is discarded — only the height matters because `eth_getLogs` is a canonical-chain range query. + +See [`nitronode/docs/reorg-fix.md`](../../nitronode/docs/reorg-fix.md) for the full design. ### State Advancer diff --git a/pkg/core/event.go b/pkg/core/event.go index 2b6988a5e..c0e8aa131 100644 --- a/pkg/core/event.go +++ b/pkg/core/event.go @@ -87,4 +87,5 @@ type BlockchainEvent struct { BlockNumber uint64 `json:"block_number"` TransactionHash string `json:"transaction_hash"` LogIndex uint32 `json:"log_index"` + BlockHash string `json:"block_hash"` } diff --git a/pkg/core/types.go b/pkg/core/types.go index 57f949bae..0d4d68978 100644 --- a/pkg/core/types.go +++ b/pkg/core/types.go @@ -1094,6 +1094,7 @@ type Blockchain struct { ID uint64 `json:"id"` // Blockchain network ID ChannelHubAddress string `json:"channel_hub_address"` // Address of the ChannelHub contract on this blockchain BlockStep uint64 `json:"block_step"` // Number of blocks between each channel update + ConfirmationDelaySecs uint32 `json:"confirmation_delay_secs"` // Seconds to wait before processing an event (0 = immediate) } // Asset represents information about a supported asset diff --git a/pkg/rpc/types.go b/pkg/rpc/types.go index 9ad75a804..a1bb5ce7a 100644 --- a/pkg/rpc/types.go +++ b/pkg/rpc/types.go @@ -264,6 +264,9 @@ type BlockchainInfoV1 struct { BlockchainID string `json:"blockchain_id"` // ChannelHubAddress is the contract address on this network ChannelHubAddress string `json:"channel_hub_address"` + // ConfirmationDelaySecs is the number of seconds the node waits before crediting a deposit event. + // Zero means the gate is disabled and events are processed immediately. + ConfirmationDelaySecs uint32 `json:"confirmation_delay_secs"` } // ============================================================================ diff --git a/sdk/go/utils.go b/sdk/go/utils.go index d3dbebaf3..8a5daba08 100644 --- a/sdk/go/utils.go +++ b/sdk/go/utils.go @@ -30,6 +30,7 @@ func transformNodeConfig(resp rpc.NodeV1GetConfigResponse) (*core.NodeConfig, er ID: blockchainID, ChannelHubAddress: info.ChannelHubAddress, BlockStep: 0, // Not provided in RPC response + ConfirmationDelaySecs: info.ConfirmationDelaySecs, }) } diff --git a/sdk/go/utils_test.go b/sdk/go/utils_test.go index 021a47d6c..1d650e95f 100644 --- a/sdk/go/utils_test.go +++ b/sdk/go/utils_test.go @@ -19,9 +19,10 @@ func TestTransformNodeConfig(t *testing.T) { SupportedSigValidators: []core.ChannelSignerType{core.ChannelSignerType_SessionKey}, Blockchains: []rpc.BlockchainInfoV1{ { - Name: "Polygon", - BlockchainID: "137", - ChannelHubAddress: "0xHubAddress", + Name: "Polygon", + BlockchainID: "137", + ChannelHubAddress: "0xHubAddress", + ConfirmationDelaySecs: 10, }, }, } @@ -35,6 +36,7 @@ func TestTransformNodeConfig(t *testing.T) { assert.Len(t, config.Blockchains, 1) assert.Equal(t, uint64(137), config.Blockchains[0].ID) assert.Equal(t, "Polygon", config.Blockchains[0].Name) + assert.Equal(t, uint32(10), config.Blockchains[0].ConfirmationDelaySecs) // Test error case rpcResp.Blockchains[0].BlockchainID = "invalid" diff --git a/sdk/ts/src/core/types.ts b/sdk/ts/src/core/types.ts index 69a8b62a7..c7d18ad8d 100644 --- a/sdk/ts/src/core/types.ts +++ b/sdk/ts/src/core/types.ts @@ -166,6 +166,7 @@ export interface Blockchain { id: bigint; // uint64 channelHubAddress: Address; blockStep: bigint; // uint64 + confirmationDelaySecs: number; // seconds; 0 means gate is disabled } export interface Token { diff --git a/sdk/ts/src/rpc/types.ts b/sdk/ts/src/rpc/types.ts index 70890b5c7..8db60945d 100644 --- a/sdk/ts/src/rpc/types.ts +++ b/sdk/ts/src/rpc/types.ts @@ -195,6 +195,8 @@ export interface BlockchainInfoV1 { blockchain_id: string; // uint64 as string /** Channel hub contract address on this network */ channel_hub_address: Address; + /** Seconds the node waits before crediting a deposit event; 0 means gate is disabled */ + confirmation_delay_secs?: number; } // ============================================================================ diff --git a/sdk/ts/src/utils.ts b/sdk/ts/src/utils.ts index a836891f8..83df72f90 100644 --- a/sdk/ts/src/utils.ts +++ b/sdk/ts/src/utils.ts @@ -42,6 +42,7 @@ export function transformNodeConfig(resp: API.NodeV1GetConfigResponse): core.Nod id: BigInt(info.blockchain_id), channelHubAddress: info.channel_hub_address as Address, blockStep: 0n, // Not provided in RPC response + confirmationDelaySecs: info.confirmation_delay_secs ?? 0, })); return { diff --git a/sdk/ts/test/unit/__snapshots__/public-api-drift.test.ts.snap b/sdk/ts/test/unit/__snapshots__/public-api-drift.test.ts.snap index 7f61abd57..3c41f70bd 100644 --- a/sdk/ts/test/unit/__snapshots__/public-api-drift.test.ts.snap +++ b/sdk/ts/test/unit/__snapshots__/public-api-drift.test.ts.snap @@ -506,6 +506,7 @@ exports[`SDK public runtime API drift guard keeps root TypeScript public API sig "properties": [ "blockStep: bigint", "channelHubAddress: Address", + "confirmationDelaySecs: number", "id: bigint", "name: string", ], @@ -548,6 +549,7 @@ exports[`SDK public runtime API drift guard keeps root TypeScript public API sig "properties": [ "blockchain_id: string", "channel_hub_address: Address", + "confirmation_delay_secs: number", "name: string", ], "signatures": [],