Skip to content

Commit 9e47304

Browse files
committed
make evm_execution more robust
1 parent afcd6bd commit 9e47304

File tree

21 files changed

+873
-219
lines changed

21 files changed

+873
-219
lines changed

.mockery.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,11 @@ packages:
6363
dir: ./test/mocks
6464
pkgname: mocks
6565
filename: da.go
66-
github.com/evstack/ev-node/pkg/da/types:
67-
interfaces:
6866
Verifier:
6967
config:
7068
dir: ./test/mocks
7169
pkgname: mocks
72-
filename: da_verifier.go
70+
filename: da.go
7371
github.com/evstack/ev-node/pkg/da/jsonrpc:
7472
interfaces:
7573
BlobModule:

block/internal/executing/executor.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package executing
33
import (
44
"bytes"
55
"context"
6+
"crypto/sha256"
67
"errors"
78
"fmt"
89
"sync"
@@ -25,6 +26,15 @@ import (
2526
"github.com/evstack/ev-node/types"
2627
)
2728

29+
// payloadResumer is an optional interface that EVM execution clients can implement
30+
// to support resuming in-progress payload builds after crashes.
31+
// This is defined locally to avoid coupling the core interface to EVM-specific concepts.
32+
type payloadResumer interface {
33+
// ResumePayload resumes an in-progress payload build using a stored payloadID.
34+
// This allows crash recovery without creating sibling blocks.
35+
ResumePayload(ctx context.Context, payloadID []byte) (stateRoot []byte, err error)
36+
}
37+
2838
// Executor handles block production, transaction processing, and state management
2939
type Executor struct {
3040
// Core components
@@ -369,6 +379,26 @@ func (e *Executor) produceBlock() error {
369379
if err = batch.SaveBlockData(header, data, &types.Signature{}); err != nil {
370380
return fmt.Errorf("failed to save block data: %w", err)
371381
}
382+
383+
// Save ExecMeta with Stage="started" for crash recovery and idempotent execution
384+
execMeta := &store.ExecMeta{
385+
Height: newHeight,
386+
Timestamp: header.Time().Unix(),
387+
Stage: store.ExecStageStarted,
388+
UpdatedAtUnix: time.Now().Unix(),
389+
}
390+
// Compute tx hash for sanity checks on retry
391+
if len(data.Txs) > 0 {
392+
h := sha256.New()
393+
for _, tx := range data.Txs {
394+
h.Write(tx)
395+
}
396+
execMeta.TxHash = h.Sum(nil)
397+
}
398+
if err = batch.SaveExecMeta(execMeta); err != nil {
399+
return fmt.Errorf("failed to save exec meta: %w", err)
400+
}
401+
372402
if err = batch.Commit(); err != nil {
373403
return fmt.Errorf("failed to commit early save batch: %w", err)
374404
}
@@ -422,6 +452,18 @@ func (e *Executor) produceBlock() error {
422452
return fmt.Errorf("failed to update state: %w", err)
423453
}
424454

455+
// Update ExecMeta to Stage="promoted" after successful execution
456+
execMeta := &store.ExecMeta{
457+
Height: newHeight,
458+
Timestamp: header.Time().Unix(),
459+
StateRoot: newState.AppHash,
460+
Stage: store.ExecStagePromoted,
461+
UpdatedAtUnix: time.Now().Unix(),
462+
}
463+
if err := batch.SaveExecMeta(execMeta); err != nil {
464+
return fmt.Errorf("failed to update exec meta to promoted: %w", err)
465+
}
466+
425467
if err := batch.Commit(); err != nil {
426468
return fmt.Errorf("failed to commit batch: %w", err)
427469
}
@@ -624,8 +666,63 @@ func (e *Executor) signHeader(header types.Header) (types.Signature, error) {
624666
}
625667

626668
// executeTxsWithRetry executes transactions with retry logic.
669+
// It first checks ExecMeta for idempotent execution - if a block was already built
670+
// at this height, it returns the stored StateRoot instead of rebuilding.
671+
// If a payloadID exists (started but not promoted), it attempts to resume the payload
672+
// using the PayloadResumer interface if available.
627673
// NOTE: the function retries the execution client call regardless of the error. Some execution clients errors are irrecoverable, and will eventually halt the node, as expected.
628674
func (e *Executor) executeTxsWithRetry(ctx context.Context, rawTxs [][]byte, header types.Header, currentState types.State) ([]byte, error) {
675+
height := header.Height()
676+
677+
// Task 3.1: Check ExecMeta for idempotent execution
678+
// If we already have a promoted block at this height, return the stored StateRoot
679+
execMeta, err := e.store.GetExecMeta(ctx, height)
680+
if err == nil && execMeta != nil {
681+
if execMeta.Stage == store.ExecStagePromoted && len(execMeta.StateRoot) > 0 {
682+
e.logger.Info().
683+
Uint64("height", height).
684+
Str("stage", execMeta.Stage).
685+
Msg("executeTxsWithRetry: reusing already-promoted execution (idempotent)")
686+
return execMeta.StateRoot, nil
687+
}
688+
689+
// Task 3.3: If we have a started execution with a payloadID, try to resume
690+
// This handles crash recovery where we got a payloadID but didn't complete the build
691+
if execMeta.Stage == store.ExecStageStarted && len(execMeta.PayloadID) > 0 {
692+
e.logger.Info().
693+
Uint64("height", height).
694+
Str("stage", execMeta.Stage).
695+
Msg("executeTxsWithRetry: found in-progress execution with payloadID, attempting resume")
696+
697+
// Check if the executor implements payloadResumer (EVM-specific)
698+
if resumer, ok := e.exec.(payloadResumer); ok {
699+
stateRoot, err := resumer.ResumePayload(ctx, execMeta.PayloadID)
700+
if err == nil {
701+
e.logger.Info().
702+
Uint64("height", height).
703+
Msg("executeTxsWithRetry: successfully resumed payload")
704+
return stateRoot, nil
705+
}
706+
// Resume failed - log and fall through to normal execution
707+
// The EL-level idempotency check will handle if the block was already built
708+
e.logger.Warn().Err(err).
709+
Uint64("height", height).
710+
Msg("executeTxsWithRetry: failed to resume payload, falling back to normal execution")
711+
} else {
712+
e.logger.Debug().
713+
Uint64("height", height).
714+
Msg("executeTxsWithRetry: executor does not support PayloadResumer, using normal execution")
715+
}
716+
} else if execMeta.Stage == store.ExecStageStarted {
717+
// Started but no payloadID - log and proceed normally
718+
// The EL-level idempotency check in ExecuteTxs will handle reusing the block
719+
e.logger.Debug().
720+
Uint64("height", height).
721+
Str("stage", execMeta.Stage).
722+
Msg("executeTxsWithRetry: found in-progress execution without payloadID, will attempt EL-level idempotency")
723+
}
724+
}
725+
629726
for attempt := 1; attempt <= common.MaxRetriesBeforeHalt; attempt++ {
630727
newAppHash, _, err := e.exec.ExecuteTxs(ctx, rawTxs, header.Height(), header.Time(), currentState.AppHash)
631728
if err != nil {

block/internal/executing/executor_logic_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,13 @@ func TestExecutor_executeTxsWithRetry(t *testing.T) {
297297
mockExec := testmocks.NewMockExecutor(t)
298298
tt.setupMock(mockExec)
299299

300+
// Use an in-memory store for the test (needed for GetExecMeta)
301+
ds := sync.MutexWrap(datastore.NewMapDatastore())
302+
memStore := store.New(ds)
303+
300304
e := &Executor{
301305
exec: mockExec,
306+
store: memStore,
302307
ctx: execCtx,
303308
logger: zerolog.Nop(),
304309
}

buf.gen.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
version: v2
2-
clean: true
32

43
plugins:
54
- remote: buf.build/protocolbuffers/go

0 commit comments

Comments
 (0)