From 34bd70dbb91877d6d9fe30e2c861549facfd2078 Mon Sep 17 00:00:00 2001 From: corey Date: Tue, 9 Jun 2026 17:20:45 +0800 Subject: [PATCH] fix(derivation): clamp deriveForce skipNumber to batch tip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scenario-C dispatch (lastBlockNumber missing locally + !l2Grew) is decided in derivationBlock BEFORE reactors are quiesced; the localLatest passed into deriveForce is read AFTER StopReactorsBeforeReorg. In the window between the dispatch decision and the Stop, blocksync can backfill past the batch tip. When that happens skipNumber >= rollupData.lastBlockNumber, the existing loop short-circuits every block via the `Number <= skipNumber` continue, and the function returns header(skipNumber) — a block past the batch. Upstream verifyBatchRoots and tagAdvancer.advanceSafe then run against that wrong header: roots compared against post-batch state (false stateException) and safe head pushed past the actual batch tip. With the clamp, the race materialising degrades to the same outcome scenario A would have produced once P2P caught up — verifyBatchRoots sees header(lastBlockNumber), advanceSafe pins safe to the correct batch tip. Targets PR #966 review (CodeRabbit comment r3340287543). Co-Authored-By: Claude Opus 4.7 (1M context) --- node/derivation/derivation.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 4fa9db2af..068f255f5 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -905,6 +905,32 @@ func (d *Derivation) deriveForce(rollupData *BatchInfo, skipNumber uint64) (*eth return nil, fmt.Errorf("invalid firstBlockNumber 0 for batch %d", rollupData.batchIndex) } + // Race short-circuit: scenario C dispatch is decided before reactors are + // quiesced (HeaderByNumber check at derivationBlock vs StopReactors inside + // withReactorsQuiesced), so blocksync can backfill past lastBlockNumber in + // that small window. When that happens, skipNumber (= localLatest read + // after Stop) ends up >= the batch tip. Without this guard the loop below + // would `continue` on every block, return header(skipNumber) — a block + // past the batch — and then verifyBatchRoots / advanceSafe upstream would + // run against the wrong header (false stateException + safe head pushed + // past the batch). Returning header(lastBlockNumber) collapses this case + // to the same outcome scenario A would have produced if the dispatch had + // caught the now-present batch tip. + if skipNumber >= rollupData.lastBlockNumber { + lastHeader, err := d.l2Client.HeaderByNumber(d.ctx, big.NewInt(int64(rollupData.lastBlockNumber))) + if err != nil { + return nil, fmt.Errorf("read batch tip at %d: %w", rollupData.lastBlockNumber, err) + } + if lastHeader == nil { + return nil, fmt.Errorf("batch tip at %d missing", rollupData.lastBlockNumber) + } + d.logger.Info("deriveForce: P2P caught up past batch tip during scenario-C dispatch window; no-op write", + "batchIndex", rollupData.batchIndex, + "lastBlockNumber", rollupData.lastBlockNumber, + "skipNumber", skipNumber) + return lastHeader, nil + } + // Anchor: parent of the first block we will WRITE must exist locally. // scenario B (skipNumber==0): firstNum-1. // scenario C: max(firstNum-1, skipNumber).