@@ -3,6 +3,7 @@ package executing
33import (
44 "bytes"
55 "context"
6+ "crypto/sha256"
67 "errors"
78 "fmt"
89 "sync"
@@ -25,6 +26,15 @@ import (
2526 "github.com/evstack/ev-node/types"
2627)
2728
29+ // payloadResumer is an optional interface that EVM execution clients can implement
30+ // to support resuming in-progress payload builds after crashes.
31+ // This is defined locally to avoid coupling the core interface to EVM-specific concepts.
32+ type payloadResumer interface {
33+ // ResumePayload resumes an in-progress payload build using a stored payloadID.
34+ // This allows crash recovery without creating sibling blocks.
35+ ResumePayload (ctx context.Context , payloadID []byte ) (stateRoot []byte , err error )
36+ }
37+
2838// Executor handles block production, transaction processing, and state management
2939type Executor struct {
3040 // Core components
@@ -369,6 +379,26 @@ func (e *Executor) produceBlock() error {
369379 if err = batch .SaveBlockData (header , data , & types.Signature {}); err != nil {
370380 return fmt .Errorf ("failed to save block data: %w" , err )
371381 }
382+
383+ // Save ExecMeta with Stage="started" for crash recovery and idempotent execution
384+ execMeta := & store.ExecMeta {
385+ Height : newHeight ,
386+ Timestamp : header .Time ().Unix (),
387+ Stage : store .ExecStageStarted ,
388+ UpdatedAtUnix : time .Now ().Unix (),
389+ }
390+ // Compute tx hash for sanity checks on retry
391+ if len (data .Txs ) > 0 {
392+ h := sha256 .New ()
393+ for _ , tx := range data .Txs {
394+ h .Write (tx )
395+ }
396+ execMeta .TxHash = h .Sum (nil )
397+ }
398+ if err = batch .SaveExecMeta (execMeta ); err != nil {
399+ return fmt .Errorf ("failed to save exec meta: %w" , err )
400+ }
401+
372402 if err = batch .Commit (); err != nil {
373403 return fmt .Errorf ("failed to commit early save batch: %w" , err )
374404 }
@@ -422,6 +452,18 @@ func (e *Executor) produceBlock() error {
422452 return fmt .Errorf ("failed to update state: %w" , err )
423453 }
424454
455+ // Update ExecMeta to Stage="promoted" after successful execution
456+ execMeta := & store.ExecMeta {
457+ Height : newHeight ,
458+ Timestamp : header .Time ().Unix (),
459+ StateRoot : newState .AppHash ,
460+ Stage : store .ExecStagePromoted ,
461+ UpdatedAtUnix : time .Now ().Unix (),
462+ }
463+ if err := batch .SaveExecMeta (execMeta ); err != nil {
464+ return fmt .Errorf ("failed to update exec meta to promoted: %w" , err )
465+ }
466+
425467 if err := batch .Commit (); err != nil {
426468 return fmt .Errorf ("failed to commit batch: %w" , err )
427469 }
@@ -624,8 +666,63 @@ func (e *Executor) signHeader(header types.Header) (types.Signature, error) {
624666}
625667
626668// executeTxsWithRetry executes transactions with retry logic.
669+ // It first checks ExecMeta for idempotent execution - if a block was already built
670+ // at this height, it returns the stored StateRoot instead of rebuilding.
671+ // If a payloadID exists (started but not promoted), it attempts to resume the payload
672+ // using the PayloadResumer interface if available.
627673// NOTE: the function retries the execution client call regardless of the error. Some execution clients errors are irrecoverable, and will eventually halt the node, as expected.
628674func (e * Executor ) executeTxsWithRetry (ctx context.Context , rawTxs [][]byte , header types.Header , currentState types.State ) ([]byte , error ) {
675+ height := header .Height ()
676+
677+ // Task 3.1: Check ExecMeta for idempotent execution
678+ // If we already have a promoted block at this height, return the stored StateRoot
679+ execMeta , err := e .store .GetExecMeta (ctx , height )
680+ if err == nil && execMeta != nil {
681+ if execMeta .Stage == store .ExecStagePromoted && len (execMeta .StateRoot ) > 0 {
682+ e .logger .Info ().
683+ Uint64 ("height" , height ).
684+ Str ("stage" , execMeta .Stage ).
685+ Msg ("executeTxsWithRetry: reusing already-promoted execution (idempotent)" )
686+ return execMeta .StateRoot , nil
687+ }
688+
689+ // Task 3.3: If we have a started execution with a payloadID, try to resume
690+ // This handles crash recovery where we got a payloadID but didn't complete the build
691+ if execMeta .Stage == store .ExecStageStarted && len (execMeta .PayloadID ) > 0 {
692+ e .logger .Info ().
693+ Uint64 ("height" , height ).
694+ Str ("stage" , execMeta .Stage ).
695+ Msg ("executeTxsWithRetry: found in-progress execution with payloadID, attempting resume" )
696+
697+ // Check if the executor implements payloadResumer (EVM-specific)
698+ if resumer , ok := e .exec .(payloadResumer ); ok {
699+ stateRoot , err := resumer .ResumePayload (ctx , execMeta .PayloadID )
700+ if err == nil {
701+ e .logger .Info ().
702+ Uint64 ("height" , height ).
703+ Msg ("executeTxsWithRetry: successfully resumed payload" )
704+ return stateRoot , nil
705+ }
706+ // Resume failed - log and fall through to normal execution
707+ // The EL-level idempotency check will handle if the block was already built
708+ e .logger .Warn ().Err (err ).
709+ Uint64 ("height" , height ).
710+ Msg ("executeTxsWithRetry: failed to resume payload, falling back to normal execution" )
711+ } else {
712+ e .logger .Debug ().
713+ Uint64 ("height" , height ).
714+ Msg ("executeTxsWithRetry: executor does not support PayloadResumer, using normal execution" )
715+ }
716+ } else if execMeta .Stage == store .ExecStageStarted {
717+ // Started but no payloadID - log and proceed normally
718+ // The EL-level idempotency check in ExecuteTxs will handle reusing the block
719+ e .logger .Debug ().
720+ Uint64 ("height" , height ).
721+ Str ("stage" , execMeta .Stage ).
722+ Msg ("executeTxsWithRetry: found in-progress execution without payloadID, will attempt EL-level idempotency" )
723+ }
724+ }
725+
629726 for attempt := 1 ; attempt <= common .MaxRetriesBeforeHalt ; attempt ++ {
630727 newAppHash , _ , err := e .exec .ExecuteTxs (ctx , rawTxs , header .Height (), header .Time (), currentState .AppHash )
631728 if err != nil {
0 commit comments