From f1b5870650f7cb4e692eab01576f0b52f9f526f6 Mon Sep 17 00:00:00 2001 From: chengwenxi <22697326+chengwenxi@users.noreply.github.com> Date: Wed, 10 Jun 2026 09:05:44 +0800 Subject: [PATCH 1/2] fix(l1sequencer): fail-fast on verifier startup when L1 unreachable (F-03/F-01/F-13) NewSequencerVerifier treated an initial syncHistory() failure as a soft error (Logger.Error only). When L1 RPC is unreachable at boot, history stays empty, upgrade.SetUpgradeBlockHeight is never called, and the global UpgradeBlockHeight is left at its -1 sentinel. Booting with UpgradeBlockHeight=-1 is unsafe: - F-01: IsUpgraded() returns false for every height, so the PBFT state machine can run past the true upgrade height. - F-03: on a block-synced fullnode restart, the handshake's sequencer-mode replay exemption (replay.go: `IsUpgraded(storeHeight+1)`) fails -> ErrAppBlockHeightTooHigh; and the consensus ctor's reconstructLastCommit guard (state.go:192) runs against a nil commit -> panic. Make startup fail-fast: NewSequencerVerifier now returns an error when the initial sync fails or returns empty history (UpgradeBlockHeight still < 0), and main.go propagates it so the node exits at boot rather than booting into the unsafe -1 state. The operator's supervisor restarts the node once L1 is reachable. --- node/cmd/node/main.go | 5 ++++- node/l1sequencer/verifier.go | 21 ++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 9296c4ed7..f9fbbbf08 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -328,7 +328,10 @@ func initL1SequencerComponents( if err != nil { return nil, nil, nil, fmt.Errorf("failed to create L1Sequencer caller: %w", err) } - verifier = l1sequencer.NewSequencerVerifier(caller, logger) + verifier, err = l1sequencer.NewSequencerVerifier(caller, logger) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to initialize sequencer verifier: %w", err) + } logger.Info("Sequencer verifier initialized", "contract", contractAddr.Hex()) } else { return nil, nil, nil, fmt.Errorf("L1 Sequencer contract address is required, check l1.sequencerContract configuration") diff --git a/node/l1sequencer/verifier.go b/node/l1sequencer/verifier.go index b12be781d..454e797a7 100644 --- a/node/l1sequencer/verifier.go +++ b/node/l1sequencer/verifier.go @@ -46,7 +46,17 @@ type SequencerVerifier struct { // NewSequencerVerifier creates a new SequencerVerifier, loads the full sequencer // history from L1 (finalized), and starts a background refresh goroutine. // Call Stop to terminate the background loop. -func NewSequencerVerifier(caller *bindings.L1SequencerCaller, logger tmlog.Logger) *SequencerVerifier { +// +// Startup is fail-fast: if the initial syncHistory() fails or returns empty +// history, the global upgrade.UpgradeBlockHeight is never set and stays at its +// -1 sentinel. Running with UpgradeBlockHeight=-1 is unsafe: IsUpgraded() +// returns false for every height, so the PBFT state machine can run past the +// true upgrade height, and on a block-synced fullnode restart the handshake's +// sequencer-mode replay exemption fails (ErrAppBlockHeightTooHigh) while +// reconstructLastCommit runs against a nil commit and panics. We therefore +// refuse to start rather than boot into that state; the operator's supervisor +// restarts us once L1 is reachable. +func NewSequencerVerifier(caller *bindings.L1SequencerCaller, logger tmlog.Logger) (*SequencerVerifier, error) { ctx, cancel := context.WithCancel(context.Background()) v := &SequencerVerifier{ caller: caller, @@ -54,11 +64,16 @@ func NewSequencerVerifier(caller *bindings.L1SequencerCaller, logger tmlog.Logge cancel: cancel, } if err := v.syncHistory(); err != nil { - v.logger.Error("Failed to load sequencer history from L1", "err", err) + cancel() + return nil, fmt.Errorf("refusing to start with UpgradeBlockHeight=-1: initial sequencer history sync from L1 failed: %w", err) + } + if upgrade.UpgradeBlockHeight < 0 { + cancel() + return nil, fmt.Errorf("refusing to start with UpgradeBlockHeight=-1: L1 returned empty sequencer history; upgrade height unknown") } v.logCurrentState() go v.refreshLoop(ctx) - return v + return v, nil } // logCurrentState prints a one-line snapshot of the loaded contract state at From 4f2760b6ab76697812de4393469abc3a36be67f7 Mon Sep 17 00:00:00 2001 From: chengwenxi <22697326+chengwenxi@users.noreply.github.com> Date: Wed, 10 Jun 2026 10:17:03 +0800 Subject: [PATCH 2/2] fix(node): resolve golangci-lint errors across node package The CI Lint job runs golangci-lint over the whole node/ package, not just the PR diff, and was failing on pre-existing issues. Fix all of them so the check passes: - gofmt: struct field alignment / formatting (verifier.go, signer.go, ha_service.go) - errcheck: check or explicitly ignore deferred Close/Cancel/SetDeadline return values (block_fsm.go, ha_service.go, enclave_signer.go) - gosec G112: set ReadHeaderTimeout on http.Server (main.go metrics server, hakeeper/rpc/server.go) to avoid Slowloris - gosec G301: tighten Raft storage dir perms 0o755 -> 0o750 (ha_service.go) - misspell: cancelled -> canceled, initialised -> initialized (verifier.go, derivation.go, ha_service.go) No behavioral change. `make lint` (go run build/lint.go) now exits 0. --- node/cmd/node/main.go | 2 +- node/derivation/derivation.go | 2 +- node/hakeeper/block_fsm.go | 4 ++-- node/hakeeper/ha_service.go | 22 +++++++++++----------- node/hakeeper/rpc/server.go | 6 ++++-- node/l1sequencer/enclave_signer.go | 4 ++-- node/l1sequencer/signer.go | 1 - node/l1sequencer/verifier.go | 4 ++-- 8 files changed, 23 insertions(+), 22 deletions(-) diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index f9fbbbf08..00aecc2b8 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -380,7 +380,7 @@ func startMetricsServer(addr string, logger tmlog.Logger) { } mux := http.NewServeMux() mux.Handle("/metrics", promhttp.Handler()) - srv := &http.Server{Addr: addr, Handler: mux} + srv := &http.Server{Addr: addr, Handler: mux, ReadHeaderTimeout: 10 * time.Second} go func() { if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { logger.Error("metrics server", "addr", addr, "err", err) diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 068f255f5..f7e8ef2a2 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -878,7 +878,7 @@ func (d *Derivation) withReactorsQuiesced(ctx context.Context, batchIndex uint64 return err } defer func() { - // Use background context so a cancelled parent ctx doesn't + // Use background context so a canceled parent ctx doesn't // prevent reactor restart. height := preWrite if cur, readErr := d.l2Client.BlockNumber(context.Background()); readErr == nil { diff --git a/node/hakeeper/block_fsm.go b/node/hakeeper/block_fsm.go index 2a97ee212..d299df5c4 100644 --- a/node/hakeeper/block_fsm.go +++ b/node/hakeeper/block_fsm.go @@ -153,7 +153,7 @@ func (f *BlockFSM) Snapshot() (raft.FSMSnapshot, error) { // Reads the 8-byte appliedHeight from the snapshot. Does NOT call onApplied -- // geth state must be recovered independently (Fullnode P2P sync). func (f *BlockFSM) Restore(rc io.ReadCloser) error { - defer rc.Close() + defer func() { _ = rc.Close() }() data, err := io.ReadAll(rc) if err != nil { @@ -191,7 +191,7 @@ func (s *blockSnapshot) Persist(sink raft.SnapshotSink) error { var buf [8]byte binary.BigEndian.PutUint64(buf[:], s.height) if _, err := sink.Write(buf[:]); err != nil { - sink.Cancel() + _ = sink.Cancel() return fmt.Errorf("blockSnapshot.Persist: write failed: %w", err) } return sink.Close() diff --git a/node/hakeeper/ha_service.go b/node/hakeeper/ha_service.go index 5c1002859..fd3953a52 100644 --- a/node/hakeeper/ha_service.go +++ b/node/hakeeper/ha_service.go @@ -30,13 +30,13 @@ const ( // HAService implements the SequencerHA interface from tendermint/sequencer. // It also satisfies rpc.ConsensusAdapter so it can be passed directly to the RPC server. type HAService struct { - logger tmlog.Logger - cfg *Config + logger tmlog.Logger + cfg *Config advertisedAddr string // resolved once in New(), used throughout - fsm *BlockFSM - rpcServer *hakeeperrpc.Server + fsm *BlockFSM + rpcServer *hakeeperrpc.Server - // Raft internals (initialised in Start) + // Raft internals (initialized in Start) r *raft.Raft transport *raft.NetworkTransport @@ -69,7 +69,7 @@ func (h *HAService) SetOnBlockApplied(fn func(*types.BlockV2) error) { // ── SequencerHA interface ──────────────────────────────────────────────────── -// Start initialises Raft and the management RPC server. +// Start initializes Raft and the management RPC server. // Called by StateV2.OnStart() at upgrade height. func (h *HAService) Start() error { if err := h.initRaft(); err != nil { @@ -271,7 +271,7 @@ func (h *HAService) Addr() string { return h.advertisedAddr } // initRaft creates the Raft instance. Called once from Start(). // On failure, all opened resources are cleaned up via a single deferred closure. func (h *HAService) initRaft() (retErr error) { - if err := os.MkdirAll(h.cfg.StorageDir, 0o755); err != nil { + if err := os.MkdirAll(h.cfg.StorageDir, 0o750); err != nil { return fmt.Errorf("mkdir %q: %w", h.cfg.StorageDir, err) } @@ -287,13 +287,13 @@ func (h *HAService) initRaft() (retErr error) { r.Shutdown() } if transport != nil { - transport.Close() + _ = transport.Close() } if stableStore != nil { - stableStore.Close() + _ = stableStore.Close() } if logStore != nil { - logStore.Close() + _ = logStore.Close() } } }() @@ -364,7 +364,7 @@ func (h *HAService) initRaft() (retErr error) { h.r = r h.transport = transport - h.logger.Info("hakeeper: raft initialised", "bind", bindAddr) + h.logger.Info("hakeeper: raft initialized", "bind", bindAddr) return nil } diff --git a/node/hakeeper/rpc/server.go b/node/hakeeper/rpc/server.go index 90cc3bc33..3cea4ee9b 100644 --- a/node/hakeeper/rpc/server.go +++ b/node/hakeeper/rpc/server.go @@ -4,6 +4,7 @@ import ( "fmt" "net/http" "sync" + "time" ethrpc "github.com/morph-l2/go-ethereum/rpc" "github.com/pkg/errors" @@ -40,8 +41,9 @@ func New(log log.Logger, listenAddr string, listenPort int, cons ConsensusAdapte addr := fmt.Sprintf("%s:%d", listenAddr, listenPort) httpSrv := &http.Server{ - Addr: addr, - Handler: mux, + Addr: addr, + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, } return &Server{ diff --git a/node/l1sequencer/enclave_signer.go b/node/l1sequencer/enclave_signer.go index e83a70fca..53d575440 100644 --- a/node/l1sequencer/enclave_signer.go +++ b/node/l1sequencer/enclave_signer.go @@ -165,7 +165,7 @@ func (s *EnclaveSigner) probe() error { if err := conn.SetDeadline(time.Now().Add(requestTimeout)); err != nil { return fmt.Errorf("set deadline: %w", err) } - defer conn.SetDeadline(time.Time{}) // clear on exit so signOnce can manage its own deadline + defer func() { _ = conn.SetDeadline(time.Time{}) }() // clear on exit so signOnce can manage its own deadline if _, err := conn.Write([]byte{opGetPubkey}); err != nil { return fmt.Errorf("write GetPubkey: %w", err) @@ -220,7 +220,7 @@ func (s *EnclaveSigner) signOnce(conn net.Conn, data []byte) ([]byte, error) { if err := conn.SetDeadline(time.Now().Add(requestTimeout)); err != nil { return nil, err } - defer conn.SetDeadline(time.Time{}) + defer func() { _ = conn.SetDeadline(time.Time{}) }() req := make([]byte, 1+hashLen) req[0] = opSign diff --git a/node/l1sequencer/signer.go b/node/l1sequencer/signer.go index 4ad851304..e99726e7a 100644 --- a/node/l1sequencer/signer.go +++ b/node/l1sequencer/signer.go @@ -53,4 +53,3 @@ func (s *LocalSigner) Sign(data []byte) ([]byte, error) { func (s *LocalSigner) Address() common.Address { return s.address } - diff --git a/node/l1sequencer/verifier.go b/node/l1sequencer/verifier.go index 454e797a7..07da0c809 100644 --- a/node/l1sequencer/verifier.go +++ b/node/l1sequencer/verifier.go @@ -34,7 +34,7 @@ type sequencerCursor struct { // History is loaded from L1 at construction and refreshed every 5 minutes. // All L1 reads use the finalized block tag to avoid ingesting reorged data. type SequencerVerifier struct { - mu sync.Mutex + mu sync.Mutex history []bindings.L1SequencerHistoryRecord cursor sequencerCursor @@ -142,7 +142,7 @@ func (c *SequencerVerifier) syncHistory() error { return nil } -// refreshLoop polls L1 until ctx is cancelled. +// refreshLoop polls L1 until ctx is canceled. // Uses exponential backoff (10s -> 20s -> ... -> 5min) while history is empty, // then switches to the normal 5-minute interval once loaded. func (c *SequencerVerifier) refreshLoop(ctx context.Context) {