From 6c40d1f52437c6f4dd953df0fe3d3e21973208ab Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 4 Jun 2026 15:48:02 +0800 Subject: [PATCH 1/9] feat(node): single metrics endpoint + layer1 verify-mode tendermint skip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Layer1 verify mode: skip tendermint setup for fullnodes (derivation alone reconstructs the chain from L1 batches/blobs). Sequencer mode (signer != nil) still starts tendermint as before. - Single Prometheus endpoint owned by main.go, sourced from tmCfg.Instrumentation.PrometheusListenAddr (config.toml). Tendermint's own listener is force-disabled so the node never exposes two /metrics URLs serving the same DefaultGatherer set. - Drop --metrics-server-enable / --metrics-hostname / --metrics-port flags and the Config.Metrics* fields; ops only needs config.toml's instrumentation block now. - DefaultConfig.Confirmations: rpc.SafeBlockNumber -> 10. Pairs with the always-on L1 reorg detector (SPEC-005 §4.7.6) so a deeper reorg is still recoverable, while keeping derivation lag low. - Drop the dead L1Confirmations branch in derivation.SetCliContext (sync still owns that flag); DerivationConfirmations remains the only override path for derivation. Co-Authored-By: Claude Opus 4.7 (1M context) --- go.work.sum | 1 + node/cmd/node/main.go | 58 +++++++++++++++++++++++++++++++---- node/derivation/config.go | 27 +++++----------- node/derivation/derivation.go | 12 ++------ node/derivation/metrics.go | 15 --------- node/flags/flags.go | 24 --------------- 6 files changed, 63 insertions(+), 74 deletions(-) diff --git a/go.work.sum b/go.work.sum index ef22362fd..7279f6a49 100644 --- a/go.work.sum +++ b/go.work.sum @@ -539,6 +539,7 @@ github.com/esimonov/ifshort v1.0.4 h1:6SID4yGWfRae/M7hkVDVVyppy8q/v9OuxNdmjLQStB github.com/esimonov/ifshort v1.0.4/go.mod h1:Pe8zjlRrJ80+q2CxHLfEOfTwxCZ4O+MuhcHcfgNWTk0= github.com/ethereum/c-kzg-4844/bindings/go v0.0.0-20230126171313-363c7d7593b4 h1:B2mpK+MNqgPqk2/KNi1LbqwtZDy5F7iy0mynQiBr8VA= github.com/ethereum/c-kzg-4844/bindings/go v0.0.0-20230126171313-363c7d7593b4/go.mod h1:y4GA2JbAUama1S4QwYjC2hefgGLU8Ul0GMtL/ADMF1c= +github.com/ethereum/go-ethereum v1.10.26 h1:i/7d9RBBwiXCEuyduBQzJw/mKmnvzsN14jqBmytw72s= github.com/ethereum/go-ethereum v1.10.26/go.mod h1:EYFyF19u3ezGLD4RqOkLq+ZCXzYbLoNDdZlMt7kyKFg= github.com/ettle/strcase v0.1.1 h1:htFueZyVeE1XNnMEfbqp5r67qAN/4r6ya1ysq8Q+Zcw= github.com/ettle/strcase v0.1.1/go.mod h1:hzDLsPC7/lwKyBOywSHEP89nt2pDgdy+No1NBA9o9VY= diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 2f6206b5a..97def645d 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -3,6 +3,7 @@ package main import ( "context" "fmt" + "net/http" "os" "os/signal" "path/filepath" @@ -13,6 +14,7 @@ import ( "github.com/morph-l2/go-ethereum/common" "github.com/morph-l2/go-ethereum/crypto" "github.com/morph-l2/go-ethereum/ethclient" + "github.com/prometheus/client_golang/prometheus/promhttp" tmlog "github.com/tendermint/tendermint/libs/log" tmnode "github.com/tendermint/tendermint/node" "github.com/tendermint/tendermint/privval" @@ -116,6 +118,13 @@ func L2NodeMain(ctx *cli.Context) error { if err != nil { return err } + // The node owns a single Prometheus HTTP endpoint (see metrics-server + // block below) backed by prometheus.DefaultGatherer. Tendermint's + // metrics still register to the default registry and are served from + // that endpoint, but tendermint must not bind its own listener — that + // would produce two endpoints (one tm-only on :26660, one node-wide) + // returning the same metrics. + tmCfg.Instrumentation.Prometheus = false tmVal := privval.LoadOrGenFilePV(tmCfg.PrivValidatorKeyFile(), tmCfg.PrivValidatorStateFile()) pubKey, _ := tmVal.GetPubKey() @@ -143,13 +152,25 @@ func L2NodeMain(ctx *cli.Context) error { return err } - if isMockSequencer { + // ========== Derivation config (loaded early to drive the layer1 branch below) ========== + derivationCfg := derivation.DefaultConfig() + if err := derivationCfg.SetCliContext(ctx); err != nil { + return fmt.Errorf("derivation set cli context error: %v", err) + } + + switch { + case isMockSequencer: ms, err = mock.NewSequencer(executor) if err != nil { return err } go ms.Start() - } else { + case signer == nil && derivationCfg.VerifyMode == derivation.VerifyModeLayer1: + // Fullnode in layer1 verify mode: derivation alone reconstructs the + // chain from L1 batches/blobs without local consensus. Skipping + // tendermint avoids running a no-op replica behind derivation. + nodeConfig.Logger.Info("layer1 verify mode: tendermint not started") + default: // Convert typed nil (*HAService)(nil) to untyped nil interface to avoid // Go's nil interface gotcha: a typed nil satisfies (ha != nil) checks. var ha tmsequencer.SequencerHA @@ -172,10 +193,6 @@ func L2NodeMain(ctx *cli.Context) error { // is both redundant (it would re-fetch L1 batches it produced) and // unsafe (deriveForce would risk a self-reorg on transient divergence). if signer == nil { - derivationCfg := derivation.DefaultConfig() - if err := derivationCfg.SetCliContext(ctx); err != nil { - return fmt.Errorf("derivation set cli context error: %v", err) - } rollup, err := bindings.NewRollup(derivationCfg.RollupContractAddress, l1Client) if err != nil { return fmt.Errorf("NewRollup error: %v", err) @@ -190,6 +207,14 @@ func L2NodeMain(ctx *cli.Context) error { nodeConfig.Logger.Info("derivation skipped: sequencer mode") } + // ========== Single metrics endpoint ========== + // All components (tendermint consensus/p2p/state/proxy, derivation, + // sync, executor) register to prometheus.DefaultRegisterer. We serve + // DefaultGatherer here so every verify-mode / sequencer-mode produces + // exactly one metrics endpoint sourced from config.toml's + // instrumentation.prometheus_listen_addr. + startMetricsServer(tmCfg.Instrumentation.PrometheusListenAddr, nodeConfig.Logger) + interruptChannel := make(chan os.Signal, 1) signal.Notify(interruptChannel, []os.Signal{ os.Interrupt, @@ -360,6 +385,27 @@ func initL1SequencerComponents( return tracker, verifier, signer, nil } +// startMetricsServer launches a single /metrics HTTP endpoint backed by +// prometheus.DefaultGatherer. addr is read from +// tmCfg.Instrumentation.PrometheusListenAddr; an empty value disables the +// endpoint. ListenAndServe failures are logged but do not crash the node — +// metrics are observability, not a control-plane dependency. +func startMetricsServer(addr string, logger tmlog.Logger) { + if addr == "" { + logger.Info("metrics server disabled (instrumentation.prometheus_listen_addr is empty)") + return + } + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.Handler()) + srv := &http.Server{Addr: addr, Handler: mux} + go func() { + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + logger.Error("metrics server", "addr", addr, "err", err) + } + }() + logger.Info("metrics server started", "addr", addr) +} + func homeDir(ctx *cli.Context) (string, error) { home := ctx.GlobalString(flags.Home.Name) if home == "" { diff --git a/node/derivation/config.go b/node/derivation/config.go index c70845478..7648a8484 100644 --- a/node/derivation/config.go +++ b/node/derivation/config.go @@ -72,24 +72,18 @@ type Config struct { FetchBlockRange uint64 `json:"fetch_block_range"` VerifyMode string `json:"verify_mode"` ReorgCheckDepth uint64 `json:"reorg_check_depth"` - MetricsPort uint64 `json:"metrics_port"` - MetricsHostname string `json:"metrics_hostname"` - MetricsServerEnable bool `json:"metrics_server_enable"` } func DefaultConfig() *Config { return &Config{ L1: &types.L1Config{ - // Default to L1 safe (~1 epoch / ~6 min lag) rather than finalized - // (~2 epochs / ~13 min lag). L1 safe blocks can theoretically be - // reorg'd if a Casper FFG slashing condition fires, so this default - // is paired with always-on L1 reorg detection (SPEC-005 §4.7.6 in - // reorg.go) which rewinds the derivation cursor and resets the tag - // advancer when an L1 hash mismatch is observed. Operators wanting - // strict no-reorg-possible reads can still set - // --derivation.confirmations=-3 (rpc.FinalizedBlockNumber) or - // --l1.confirmations=-3 to revert to the previous behavior. - Confirmations: rpc.SafeBlockNumber, + // Fixed-depth (latest-N) confirmations rather than the SafeBlockNumber + // tag: 10 blocks (~2 min on mainnet) keeps lag low, and the always-on + // L1 reorg detector (SPEC-005 §4.7.6 in reorg.go) rewinds the + // derivation cursor on hash mismatch so a deeper reorg is recoverable. + // Operators wanting strict no-reorg-possible reads can still set + // --derivation.confirmations=-3 (rpc.FinalizedBlockNumber). + Confirmations: 10, }, PollInterval: DefaultPollInterval, LogProgressInterval: DefaultLogProgressInterval, @@ -102,9 +96,6 @@ func DefaultConfig() *Config { func (c *Config) SetCliContext(ctx *cli.Context) error { c.L1.Addr = ctx.GlobalString(flags.L1NodeAddr.Name) - if ctx.GlobalIsSet(flags.L1Confirmations.Name) { - c.L1.Confirmations = rpc.BlockNumber(ctx.GlobalInt64(flags.L1Confirmations.Name)) - } // The current setting priority is greater than Env L1Confirmations if ctx.GlobalIsSet(flags.DerivationConfirmations.Name) { c.L1.Confirmations = rpc.BlockNumber(ctx.GlobalInt64(flags.DerivationConfirmations.Name)) @@ -198,9 +189,5 @@ func (c *Config) SetCliContext(ctx *cli.Context) error { c.L2.EngineAddr = l2EngineAddr c.L2.JwtSecret = secret - c.MetricsServerEnable = ctx.GlobalBool(flags.MetricsServerEnable.Name) - c.MetricsHostname = ctx.GlobalString(flags.MetricsHostname.Name) - c.MetricsPort = ctx.GlobalUint64(flags.MetricsPort.Name) - return nil } diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 818d5467a..5d50c5193 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -119,16 +119,10 @@ func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, } ctx, cancel := context.WithCancel(ctx) logger = logger.With("module", "derivation") + // Metrics register to prometheus.DefaultRegisterer; the HTTP endpoint + // itself is started once at the top level (cmd/node/main.go) so every + // verify-mode and sequencer-mode produces exactly one /metrics URL. metrics := PrometheusMetrics("morphnode") - if cfg.MetricsServerEnable { - go func() { - _, err := metrics.Serve(cfg.MetricsHostname, cfg.MetricsPort) - if err != nil { - panic(fmt.Errorf("metrics server start error:%v", err)) - } - }() - logger.Info("metrics server enabled", "host", cfg.MetricsHostname, "port", cfg.MetricsPort) - } baseHttp := NewBasicHTTPClient(cfg.BeaconRpc, logger) l1BeaconClient := NewL1BeaconClient(baseHttp) diff --git a/node/derivation/metrics.go b/node/derivation/metrics.go index 285525157..dce79ca13 100644 --- a/node/derivation/metrics.go +++ b/node/derivation/metrics.go @@ -1,14 +1,9 @@ package derivation import ( - "net" - "net/http" - "strconv" - "github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics/prometheus" stdprometheus "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" ) const ( @@ -181,13 +176,3 @@ func (m *Metrics) IncL1ReorgReset() { func (m *Metrics) IncTagInvariantViolation() { m.TagInvariantViolationTotal.Add(1) } - -func (m *Metrics) Serve(hostname string, port uint64) (*http.Server, error) { - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.Handler()) - srv := new(http.Server) - srv.Addr = net.JoinHostPort(hostname, strconv.FormatUint(port, 10)) - srv.Handler = mux - err := srv.ListenAndServe() - return srv, err -} diff --git a/node/flags/flags.go b/node/flags/flags.go index 124353034..5f8b955ff 100644 --- a/node/flags/flags.go +++ b/node/flags/flags.go @@ -334,25 +334,6 @@ var ( Usage: "Compress determines if the rotated log files should be compressed using gzip. The default is not to perform compression. It is used only when log.filename is provided.", EnvVar: prefixEnvVar("LOG_COMPRESS"), } - - // metrics - MetricsServerEnable = cli.BoolFlag{ - Name: "metrics-server-enable", - Usage: "Whether or not to run the embedded metrics server", - EnvVar: prefixEnvVar("METRICS_SERVER_ENABLE"), - } - MetricsHostname = cli.StringFlag{ - Name: "metrics-hostname", - Usage: "The hostname of the metrics server", - Value: "0.0.0.0", - EnvVar: prefixEnvVar("METRICS_HOSTNAME"), - } - MetricsPort = cli.Uint64Flag{ - Name: "metrics-port", - Usage: "The port of the metrics server", - Value: 26660, - EnvVar: prefixEnvVar("METRICS_PORT"), - } ) var Flags = []cli.Flag{ @@ -420,9 +401,4 @@ var Flags = []cli.Flag{ LogFileMaxSize, LogFileMaxAge, LogCompress, - - // metrics - MetricsServerEnable, - MetricsPort, - MetricsHostname, } From 7170598b48faf37d3a9dd010d34f747e2704545c Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 4 Jun 2026 16:06:43 +0800 Subject: [PATCH 2/9] refactor(node): drop signer==nil guard on layer1 verify-mode branch verify-mode is a derivation knob; combining it with a sequencer signer is a misconfig, not a state to defend against. Letting the layer1 branch match unconditionally surfaces the mistake as "tendermint never started" instead of silently ignoring the flag and pretending nothing's wrong. Co-Authored-By: Claude Opus 4.7 (1M context) --- node/cmd/node/main.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 97def645d..84fe3a530 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -165,10 +165,12 @@ func L2NodeMain(ctx *cli.Context) error { return err } go ms.Start() - case signer == nil && derivationCfg.VerifyMode == derivation.VerifyModeLayer1: - // Fullnode in layer1 verify mode: derivation alone reconstructs the - // chain from L1 batches/blobs without local consensus. Skipping - // tendermint avoids running a no-op replica behind derivation. + case derivationCfg.VerifyMode == derivation.VerifyModeLayer1: + // Layer1 verify mode: derivation alone reconstructs the chain from + // L1 batches/blobs, no local consensus. The flag is a derivation + // knob, so combining it with a sequencer signer is a misconfig that + // surfaces here as "tendermint never started" rather than being + // silently ignored. nodeConfig.Logger.Info("layer1 verify mode: tendermint not started") default: // Convert typed nil (*HAService)(nil) to untyped nil interface to avoid From 27823adb680601d6cb13badc3cc5b6eeee41101a Mon Sep 17 00:00:00 2001 From: "corey.zhang" Date: Thu, 4 Jun 2026 16:41:00 +0800 Subject: [PATCH 3/9] chore(ops): add L1_SEQUENCER_CONTRACT env var to all devnet nodes All nodes (node-0/1/2/3, sentry-node-0/1) now require MORPH_NODE_L1_SEQUENCER_CONTRACT environment variable to initialize the L1 sequencer verifier component. This is required by the new sequencer verification logic in PR 974 which uses the L1Sequencer contract to verify block proposer identity during consensus. Tested: All nodes start successfully and metrics endpoints are working. Co-Authored-By: Claude Opus 4.7 (1M context) --- ops/docker/docker-compose-4nodes.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ops/docker/docker-compose-4nodes.yml b/ops/docker/docker-compose-4nodes.yml index 9b3934d14..cb7de2473 100644 --- a/ops/docker/docker-compose-4nodes.yml +++ b/ops/docker/docker-compose-4nodes.yml @@ -241,6 +241,7 @@ services: - MORPH_NODE_ROLLUP_ADDRESS=${MORPH_ROLLUP:-0x6900000000000000000000000000000000000010} - MORPH_NODE_SYNC_START_HEIGHT=${MORPH_NODE_SYNC_START_HEIGHT:-1} - MORPH_NODE_UPGRADE_BATCH_TIME=${BATCH_UPGRADE_TIME} + - MORPH_NODE_L1_SEQUENCER_CONTRACT=${L1_SEQUENCER_CONTRACT} volumes: - ".devnet/node0:${NODE_DATA_DIR}" - "${PWD}/jwt-secret.txt:${JWT_SECRET_PATH}" @@ -271,6 +272,7 @@ services: - MORPH_NODE_ROLLUP_ADDRESS=${MORPH_ROLLUP:-0x6900000000000000000000000000000000000010} - MORPH_NODE_SYNC_START_HEIGHT=${MORPH_NODE_SYNC_START_HEIGHT:-1} - MORPH_NODE_UPGRADE_BATCH_TIME=${BATCH_UPGRADE_TIME} + - MORPH_NODE_L1_SEQUENCER_CONTRACT=${L1_SEQUENCER_CONTRACT} volumes: - ".devnet/node1:${NODE_DATA_DIR}" - "${PWD}/jwt-secret.txt:${JWT_SECRET_PATH}" @@ -302,6 +304,7 @@ services: - MORPH_NODE_ROLLUP_ADDRESS=${MORPH_ROLLUP:-0x6900000000000000000000000000000000000010} - MORPH_NODE_SYNC_START_HEIGHT=${MORPH_NODE_SYNC_START_HEIGHT:-1} - MORPH_NODE_UPGRADE_BATCH_TIME=${BATCH_UPGRADE_TIME} + - MORPH_NODE_L1_SEQUENCER_CONTRACT=${L1_SEQUENCER_CONTRACT} volumes: - ".devnet/node2:${NODE_DATA_DIR}" - "${PWD}/jwt-secret.txt:${JWT_SECRET_PATH}" @@ -333,6 +336,7 @@ services: - MORPH_NODE_ROLLUP_ADDRESS=${MORPH_ROLLUP:-0x6900000000000000000000000000000000000010} - MORPH_NODE_SYNC_START_HEIGHT=${MORPH_NODE_SYNC_START_HEIGHT:-1} - MORPH_NODE_UPGRADE_BATCH_TIME=${BATCH_UPGRADE_TIME} + - MORPH_NODE_L1_SEQUENCER_CONTRACT=${L1_SEQUENCER_CONTRACT} volumes: - ".devnet/node3:${NODE_DATA_DIR}" - "${PWD}/jwt-secret.txt:${JWT_SECRET_PATH}" @@ -388,6 +392,7 @@ services: - MORPH_NODE_L1_CONFIRMATIONS=0 - MORPH_NODE_ROLLUP_ADDRESS=${MORPH_ROLLUP:-0x6900000000000000000000000000000000000010} - MORPH_NODE_SYNC_START_HEIGHT=${MORPH_NODE_SYNC_START_HEIGHT:-1} + - MORPH_NODE_L1_SEQUENCER_CONTRACT=${L1_SEQUENCER_CONTRACT} volumes: - ".devnet/node4:${NODE_DATA_DIR}" - "${PWD}/jwt-secret.txt:${JWT_SECRET_PATH}" @@ -443,6 +448,7 @@ services: - MORPH_NODE_ROLLUP_ADDRESS=${MORPH_ROLLUP:-0x6900000000000000000000000000000000000010} - MORPH_NODE_SYNC_START_HEIGHT=${MORPH_NODE_SYNC_START_HEIGHT:-1} - MORPH_NODE_DERIVATION_VERIFY_MODE=layer1 + - MORPH_NODE_L1_SEQUENCER_CONTRACT=${L1_SEQUENCER_CONTRACT} volumes: - ".devnet/node5:${NODE_DATA_DIR}" - "${PWD}/jwt-secret.txt:${JWT_SECRET_PATH}" From b0c5d28131ab321e63707ecd57ff0356c5eddca2 Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 4 Jun 2026 17:04:00 +0800 Subject: [PATCH 4/9] chore(derivation): const DefaultConfirmations + bump fetch range + baseHeight default - Extract Confirmations=10 as a named const so the value lives next to the comment that explains it instead of being a magic number in DefaultConfig. - Bump DefaultFetchBlockRange 100 -> 500 on the derivation path (sync.DefaultFetchBlockRange stays 100). The derivation eth_getLogs filter is just CommitBatch events, so a 500-block window stays small on the wire while cutting round-trips 5x during catch-up. - Default baseHeight to the current L2 head when unset. baseHeight is the height below which stateRoot checks are skipped; pinning it to the live head means a fresh node only verifies blocks it derives from this point forward, instead of re-checking historical blocks against an empty rollup cursor. - Drop the unused L1ChainID flag (no consumer in morphnode; the ops/l2-genesis copy is a separate package). Co-Authored-By: Claude Opus 4.7 (1M context) --- node/derivation/config.go | 15 ++++++++++++--- node/derivation/derivation.go | 13 +++++++++++++ node/flags/flags.go | 7 ------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/node/derivation/config.go b/node/derivation/config.go index 7648a8484..64cde744d 100644 --- a/node/derivation/config.go +++ b/node/derivation/config.go @@ -21,8 +21,12 @@ import ( ) const ( - // DefaultFetchBlockRange is the number of blocks that we collect in a single eth_getLogs query. - DefaultFetchBlockRange = uint64(100) + // DefaultFetchBlockRange is the number of blocks that we collect in a + // single eth_getLogs query. 500 (vs sync.DefaultFetchBlockRange=100) + // trades RPC latency budget for fewer round-trips on the derivation + // path, where each query is only a CommitBatch event filter and the + // response stays small even at 500 blocks. + DefaultFetchBlockRange = uint64(500) // DefaultPollInterval is the frequency at which we query for new L1 messages. DefaultPollInterval = time.Second * 15 @@ -43,6 +47,11 @@ const ( // distance" rule of thumb and provides safety margin if Confirmations is // configured below finalized. DefaultReorgCheckDepth = uint64(64) + + // DefaultConfirmations: rationale lives on the L1.Confirmations field + // in DefaultConfig() — fixed-depth (latest-N) paired with the SPEC-005 + // §4.7.6 reorg detector, not a chain tag. + DefaultConfirmations = rpc.BlockNumber(10) ) // validateAndDefaultVerifyMode normalises an empty VerifyMode to the default @@ -83,7 +92,7 @@ func DefaultConfig() *Config { // derivation cursor on hash mismatch so a deeper reorg is recoverable. // Operators wanting strict no-reorg-possible reads can still set // --derivation.confirmations=-3 (rpc.FinalizedBlockNumber). - Confirmations: 10, + Confirmations: DefaultConfirmations, }, PollInterval: DefaultPollInterval, LogProgressInterval: DefaultLogProgressInterval, diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 5d50c5193..7147ae97e 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -170,6 +170,19 @@ func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, logger.Info("derivation startHeight defaulted to latest L1 confirmed block", "height", blockNumber, "confirmations", d.confirmations) d.startHeight = blockNumber } + // First-run baseHeight default: baseHeight is the L2 height below which + // stateRoot checks are skipped (snapshot-imported nodes set this to the + // snapshot height). When unset, pin to the current L2 head so derivation + // only verifies blocks it actually produces from this point forward — + // otherwise it would re-verify historical blocks against an empty rollup + // cursor and fail. + if d.baseHeight == 0 { + l2Number, err := d.l2Client.BlockNumber(ctx) + if err != nil { + return nil, fmt.Errorf("failed to fetch l2 block number: %w", err) + } + d.baseHeight = l2Number + } return d, nil } diff --git a/node/flags/flags.go b/node/flags/flags.go index 5f8b955ff..488295338 100644 --- a/node/flags/flags.go +++ b/node/flags/flags.go @@ -70,12 +70,6 @@ var ( EnvVar: prefixEnvVar("L1_ETH_BEACON_RPC"), } - L1ChainID = cli.Uint64Flag{ - Name: "l1.chain-id", - Usage: "L1 Chain ID", - EnvVar: prefixEnvVar("L1_CHAIN_ID"), - } - L1Confirmations = cli.Int64Flag{ Name: "l1.confirmations", Usage: "Number of confirmations on L1 needed for finalization", @@ -339,7 +333,6 @@ var ( var Flags = []cli.Flag{ Home, L1NodeAddr, - L1ChainID, L1Confirmations, L2EthAddr, L2EngineAddr, From e1c486f71a88999abfbbbe42fab15e8ed6604770 Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 4 Jun 2026 17:18:05 +0800 Subject: [PATCH 5/9] fix(node): preserve tendermint metrics on the unified /metrics endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tmCfg.Instrumentation.Prometheus is overloaded inside tendermint: DefaultMetricsProvider returns NopMetrics() when it's false, so cs/p2p/sm/proxy collectors never register to prometheus.DefaultRegisterer. The previous "Prometheus=false" approach silently dropped every tendermint_consensus/p2p/state/proxy series from our unified endpoint — not just the duplicate :26660 binding it was meant to suppress. Switch to clearing PrometheusListenAddr instead. node.OnStart's HTTP-bind guard (`Prometheus && addr != ""`) trips on the empty addr, but DefaultMetricsProvider keeps the real Prometheus collectors. Capture the operator's configured addr first so our top-level startMetricsServer still serves on it. Co-Authored-By: Claude Opus 4.7 (1M context) --- node/cmd/node/main.go | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 84fe3a530..ecb32c1ac 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -118,13 +118,24 @@ func L2NodeMain(ctx *cli.Context) error { if err != nil { return err } - // The node owns a single Prometheus HTTP endpoint (see metrics-server - // block below) backed by prometheus.DefaultGatherer. Tendermint's - // metrics still register to the default registry and are served from - // that endpoint, but tendermint must not bind its own listener — that - // would produce two endpoints (one tm-only on :26660, one node-wide) - // returning the same metrics. - tmCfg.Instrumentation.Prometheus = false + // Hand the listen address from tmCfg to our top-level metrics server + // (see metrics-server block below) so the node exposes exactly one + // /metrics endpoint backed by prometheus.DefaultGatherer. + // + // Subtlety: tendermint's Instrumentation.Prometheus flag is overloaded — + // it gates BOTH (a) whether DefaultMetricsProvider returns real + // PrometheusMetrics or NopMetrics and (b) whether node.OnStart binds + // :26660. Setting it to false would silently drop all + // tendermint_consensus/p2p/state/proxy series from our unified endpoint, + // because Nop collectors never register to DefaultRegisterer. Instead, + // keep Prometheus=true (so collectors register) and clear the listen + // address (so node.OnStart's `Prometheus && addr != ""` guard skips the + // HTTP bind). + metricsAddr := "" + if tmCfg.Instrumentation.Prometheus { + metricsAddr = tmCfg.Instrumentation.PrometheusListenAddr + } + tmCfg.Instrumentation.PrometheusListenAddr = "" tmVal := privval.LoadOrGenFilePV(tmCfg.PrivValidatorKeyFile(), tmCfg.PrivValidatorStateFile()) pubKey, _ := tmVal.GetPubKey() @@ -214,8 +225,10 @@ func L2NodeMain(ctx *cli.Context) error { // sync, executor) register to prometheus.DefaultRegisterer. We serve // DefaultGatherer here so every verify-mode / sequencer-mode produces // exactly one metrics endpoint sourced from config.toml's - // instrumentation.prometheus_listen_addr. - startMetricsServer(tmCfg.Instrumentation.PrometheusListenAddr, nodeConfig.Logger) + // instrumentation.prometheus_listen_addr. metricsAddr was captured + // before clearing tmCfg.Instrumentation.PrometheusListenAddr above — + // see the rationale comment there for why the redirect is needed. + startMetricsServer(metricsAddr, nodeConfig.Logger) interruptChannel := make(chan os.Signal, 1) signal.Notify(interruptChannel, []os.Signal{ From d2050ba7077ea2883fe648cb2682421b647e23da Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 4 Jun 2026 17:23:22 +0800 Subject: [PATCH 6/9] refactor(node): only start own metrics server in layer1 mode Walk back the global tmCfg.Instrumentation override. Local / sequencer / mock paths inherit tendermint's :26660 listener exactly as upstream configured it; only the layer1 fullnode path (where tmnode never starts) brings up its own promhttp listener on the same address from config.toml. Trades the "single mechanism in all modes" symmetry for a much smaller blast radius: no risk of subtly altering what tendermint registers to DefaultRegisterer in the modes that actually run consensus. Co-Authored-By: Claude Opus 4.7 (1M context) --- node/cmd/node/main.go | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index ecb32c1ac..47293df3b 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -118,24 +118,6 @@ func L2NodeMain(ctx *cli.Context) error { if err != nil { return err } - // Hand the listen address from tmCfg to our top-level metrics server - // (see metrics-server block below) so the node exposes exactly one - // /metrics endpoint backed by prometheus.DefaultGatherer. - // - // Subtlety: tendermint's Instrumentation.Prometheus flag is overloaded — - // it gates BOTH (a) whether DefaultMetricsProvider returns real - // PrometheusMetrics or NopMetrics and (b) whether node.OnStart binds - // :26660. Setting it to false would silently drop all - // tendermint_consensus/p2p/state/proxy series from our unified endpoint, - // because Nop collectors never register to DefaultRegisterer. Instead, - // keep Prometheus=true (so collectors register) and clear the listen - // address (so node.OnStart's `Prometheus && addr != ""` guard skips the - // HTTP bind). - metricsAddr := "" - if tmCfg.Instrumentation.Prometheus { - metricsAddr = tmCfg.Instrumentation.PrometheusListenAddr - } - tmCfg.Instrumentation.PrometheusListenAddr = "" tmVal := privval.LoadOrGenFilePV(tmCfg.PrivValidatorKeyFile(), tmCfg.PrivValidatorStateFile()) pubKey, _ := tmVal.GetPubKey() @@ -183,6 +165,14 @@ func L2NodeMain(ctx *cli.Context) error { // surfaces here as "tendermint never started" rather than being // silently ignored. nodeConfig.Logger.Info("layer1 verify mode: tendermint not started") + // Without tmnode, no /metrics endpoint exists — every other mode + // inherits one from tendermint via config.toml's + // instrumentation.prometheus_listen_addr. Mirror that behaviour so + // derivation/sync metrics stay scrape-able. Tendermint's own series + // are absent here by design (no consensus running). + if tmCfg.Instrumentation.Prometheus { + startMetricsServer(tmCfg.Instrumentation.PrometheusListenAddr, nodeConfig.Logger) + } default: // Convert typed nil (*HAService)(nil) to untyped nil interface to avoid // Go's nil interface gotcha: a typed nil satisfies (ha != nil) checks. @@ -220,16 +210,6 @@ func L2NodeMain(ctx *cli.Context) error { nodeConfig.Logger.Info("derivation skipped: sequencer mode") } - // ========== Single metrics endpoint ========== - // All components (tendermint consensus/p2p/state/proxy, derivation, - // sync, executor) register to prometheus.DefaultRegisterer. We serve - // DefaultGatherer here so every verify-mode / sequencer-mode produces - // exactly one metrics endpoint sourced from config.toml's - // instrumentation.prometheus_listen_addr. metricsAddr was captured - // before clearing tmCfg.Instrumentation.PrometheusListenAddr above — - // see the rationale comment there for why the redirect is needed. - startMetricsServer(metricsAddr, nodeConfig.Logger) - interruptChannel := make(chan os.Signal, 1) signal.Notify(interruptChannel, []os.Signal{ os.Interrupt, From ab758da59da8fede9dabb7da9e2cd0a1a319262e Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 4 Jun 2026 17:25:42 +0800 Subject: [PATCH 7/9] refactor(node): drop Prometheus-flag guard around layer1 metrics server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In layer1 mode tendermint never starts, so its Instrumentation.Prometheus flag has no semantic anchor — it gates a listener that doesn't exist here. Let startMetricsServer's empty-addr fast-path be the single off switch. Operators who want metrics off either leave the addr empty or clear it explicitly; they shouldn't have to also flip a tendermint flag that's otherwise irrelevant to this code path. Co-Authored-By: Claude Opus 4.7 (1M context) --- node/cmd/node/main.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 47293df3b..4e461b807 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -169,10 +169,11 @@ func L2NodeMain(ctx *cli.Context) error { // inherits one from tendermint via config.toml's // instrumentation.prometheus_listen_addr. Mirror that behaviour so // derivation/sync metrics stay scrape-able. Tendermint's own series - // are absent here by design (no consensus running). - if tmCfg.Instrumentation.Prometheus { - startMetricsServer(tmCfg.Instrumentation.PrometheusListenAddr, nodeConfig.Logger) - } + // are absent here by design (no consensus running). The + // Instrumentation.Prometheus toggle is intentionally ignored — + // it gates a tendermint listener that doesn't exist on this path; + // startMetricsServer treats an empty addr as the off-switch. + startMetricsServer(tmCfg.Instrumentation.PrometheusListenAddr, nodeConfig.Logger) default: // Convert typed nil (*HAService)(nil) to untyped nil interface to avoid // Go's nil interface gotcha: a typed nil satisfies (ha != nil) checks. From 9183220519285514d7aa31fce52c7e111059ebc0 Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 4 Jun 2026 17:27:46 +0800 Subject: [PATCH 8/9] chore(node): trim layer1 case comments Co-Authored-By: Claude Opus 4.7 (1M context) --- node/cmd/node/main.go | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 4e461b807..188449974 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -159,20 +159,9 @@ func L2NodeMain(ctx *cli.Context) error { } go ms.Start() case derivationCfg.VerifyMode == derivation.VerifyModeLayer1: - // Layer1 verify mode: derivation alone reconstructs the chain from - // L1 batches/blobs, no local consensus. The flag is a derivation - // knob, so combining it with a sequencer signer is a misconfig that - // surfaces here as "tendermint never started" rather than being - // silently ignored. nodeConfig.Logger.Info("layer1 verify mode: tendermint not started") - // Without tmnode, no /metrics endpoint exists — every other mode - // inherits one from tendermint via config.toml's - // instrumentation.prometheus_listen_addr. Mirror that behaviour so - // derivation/sync metrics stay scrape-able. Tendermint's own series - // are absent here by design (no consensus running). The - // Instrumentation.Prometheus toggle is intentionally ignored — - // it gates a tendermint listener that doesn't exist on this path; - // startMetricsServer treats an empty addr as the off-switch. + // Other modes inherit /metrics from tendermint; layer1 has to bring + // its own listener up on the same address. startMetricsServer(tmCfg.Instrumentation.PrometheusListenAddr, nodeConfig.Logger) default: // Convert typed nil (*HAService)(nil) to untyped nil interface to avoid From 69ac4e5378b1a78f9f4e20eb86ec7568c83ec567 Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 4 Jun 2026 17:28:47 +0800 Subject: [PATCH 9/9] chore(node): drop startMetricsServer doc comment Co-Authored-By: Claude Opus 4.7 (1M context) --- node/cmd/node/main.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 188449974..689e1884f 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -370,11 +370,6 @@ func initL1SequencerComponents( return tracker, verifier, signer, nil } -// startMetricsServer launches a single /metrics HTTP endpoint backed by -// prometheus.DefaultGatherer. addr is read from -// tmCfg.Instrumentation.PrometheusListenAddr; an empty value disables the -// endpoint. ListenAndServe failures are logged but do not crash the node — -// metrics are observability, not a control-plane dependency. func startMetricsServer(addr string, logger tmlog.Logger) { if addr == "" { logger.Info("metrics server disabled (instrumentation.prometheus_listen_addr is empty)")