Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions sei-cosmos/storev2/rootmulti/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,11 @@ func NewStore(
ctx := context.Background()
scStore := composite.NewCompositeCommitStore(ctx, scDir, scConfig)
if err := scStore.CleanupCrashArtifacts(); err != nil {
panic(err)
if commonerrors.IsFileLockError(err) {
logger.Error("non-fatal: failed to acquire file lock for cleanup", "err", err)
} else {
Comment on lines +97 to +99
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Fail fast instead of ignoring cleanup lock failure

This branch now treats cleanup lock acquisition errors as non-fatal and continues startup, but in the current startup path LoadVersionAndUpgrade drops rs.scStore.LoadVersion(..., false) errors (store.go lines 440-441). In lock-contention scenarios, this can let the node proceed without a successfully opened SC backend instead of stopping early. Until downstream error propagation is fixed, lock acquisition failure here should remain fatal.

Useful? React with 👍 / 👎.

panic(err)
}
}
store := &Store{
scStore: scStore,
Expand Down Expand Up @@ -434,7 +438,7 @@ func (rs *Store) LoadVersionAndUpgrade(version int64, upgrades *types.StoreUpgra
}
rs.scStore.Initialize(initialStores)
if _, err := rs.scStore.LoadVersion(version, false); err != nil {
return nil
return err
}

storesKeysForDeletion := make(map[types.StoreKey]struct{})
Expand Down
42 changes: 16 additions & 26 deletions sei-db/common/errors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,34 @@ package errors

import (
"errors"
"strings"
)

var (
ErrKeyEmpty = errors.New("key empty")
ErrRecordNotFound = errors.New("record not found")
ErrStartAfterEnd = errors.New("start key after end key")
ErrorExportDone = errors.New("export is complete")
ErrNotFound = errors.New("not found")
ErrKeyEmpty = errors.New("key empty")
ErrRecordNotFound = errors.New("record not found")
ErrStartAfterEnd = errors.New("start key after end key")
ErrorExportDone = errors.New("export is complete")
ErrNotFound = errors.New("not found")
ErrFileLockUnavailable = errors.New("file lock unavailable")
)

// IsNotFound returns true if the error represents a "not found" condition.
func IsNotFound(err error) bool {
return errors.Is(err, ErrNotFound)
}

// IsFileLockError returns true if the error is due to a file lock
// that could not be acquired (e.g. held by another process).
func IsFileLockError(err error) bool {
return errors.Is(err, ErrFileLockUnavailable)
}

// Join returns an error that wraps the given errors.
// Any nil error values are discarded.
// Join returns nil if errs contains no non-nil values.
// The error formats as the concatenation of the strings obtained
// by calling the Error method of each element of errs, with a newline
// between each string.
// Unlike the previous string-concatenation implementation, this delegates
// to stdlib errors.Join so that wrapped sentinels remain detectable via
// errors.Is / errors.As.
func Join(errs ...error) error {
var errStrs []string
numErrs := 0
for _, err := range errs {
if err != nil {
numErrs++
if err.Error() != "" {
errStrs = append(errStrs, err.Error())
}
}
}

if numErrs <= 0 {
return nil
}

return errors.New(strings.Join(errStrs, "\n"))

return errors.Join(errs...)
}
2 changes: 1 addition & 1 deletion sei-db/state_db/sc/composite/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func (cs *CompositeCommitStore) LoadVersion(targetVersion int64, readOnly bool)
if cs.evmCommitter != nil {
evmStore, err := cs.evmCommitter.LoadVersion(targetVersion, true)
if err != nil {
logger.Info("FlatKV unavailable for readonly load, EVM data will not be served",
logger.Error("FlatKV unavailable for readonly load, EVM data will not be served",
"version", targetVersion, "err", err)
} else {
newStore.evmCommitter = evmStore
Expand Down
9 changes: 8 additions & 1 deletion sei-db/state_db/sc/flatkv/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package flatkv

import (
"encoding/binary"
"errors"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -185,12 +186,18 @@ func removeTmpDirs(dir string) error {
if err != nil {
return err
}
var errs []error
for _, e := range entries {
name := e.Name()
if e.IsDir() && (strings.HasSuffix(name, tmpSuffix) || strings.HasSuffix(name, removingSuffix)) {
_ = os.RemoveAll(filepath.Join(dir, name))
if err := os.RemoveAll(filepath.Join(dir, name)); err != nil {
errs = append(errs, fmt.Errorf("remove tmp dir %s: %w", name, err))
}
}
}
if len(errs) > 0 {
return errors.Join(errs...)
}
return nil
}

Expand Down
26 changes: 24 additions & 2 deletions sei-db/state_db/sc/flatkv/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"path/filepath"
"time"

commonerrors "github.com/sei-protocol/sei-chain/sei-db/common/errors"
"github.com/sei-protocol/sei-chain/sei-db/common/metrics"
"github.com/sei-protocol/sei-chain/sei-db/db_engine/pebbledb"
seidbtypes "github.com/sei-protocol/sei-chain/sei-db/db_engine/types"
Expand Down Expand Up @@ -216,8 +217,19 @@ func (s *CommitStore) LoadVersion(targetVersion int64, readOnly bool) (_ Store,
}

// loadVersionReadOnly creates an isolated, read-only CommitStore at the
// requested version.
// requested version. If the writer lock has not yet been acquired (e.g. the
// store was freshly constructed), CleanupOrphanedReadOnlyDirs is called
// lazily to acquire it and clean up any leftover directories. When the lock
// is acquired lazily, ownership is transferred to the returned clone so that
// closing the clone releases it; this prevents leaking the lock when the
// caller never explicitly closes the parent store.
func (s *CommitStore) loadVersionReadOnly(targetVersion int64) (_ Store, retErr error) {
lazyLock := s.fileLock == nil
if lazyLock {
if err := s.CleanupOrphanedReadOnlyDirs(); err != nil {
return nil, fmt.Errorf("loadVersionReadOnly: pre-init cleanup: %w", err)
}
}
ro := NewCommitStore(s.ctx, s.dbDir, s.config)

workDir, err := os.MkdirTemp(ro.flatkvDir(), readOnlyDirPrefix)
Expand All @@ -226,6 +238,13 @@ func (s *CommitStore) loadVersionReadOnly(targetVersion int64) (_ Store, retErr
}
ro.readOnlyWorkDir = workDir

// Transfer the lazily-acquired lock to the clone so that ro.Close()
// releases it, preventing a leak when the parent is never closed.
if lazyLock && s.fileLock != nil {
ro.fileLock = s.fileLock
s.fileLock = nil
}

defer func() {
if retErr != nil {
if closeErr := ro.Close(); closeErr != nil {
Expand Down Expand Up @@ -385,10 +404,13 @@ func (s *CommitStore) acquireFileLock(dir string) error {
}
locked, err := fl.TryLock()
if err != nil {
if errors.Is(err, filelock.ErrLocked) {
return fmt.Errorf("%w: %v", commonerrors.ErrFileLockUnavailable, err)
}
return fmt.Errorf("acquire file lock: %w", err)
}
if !locked {
return fmt.Errorf("acquire file lock: already held by another process (%s)", lockPath)
return fmt.Errorf("%w: held by another process (%s)", commonerrors.ErrFileLockUnavailable, lockPath)
}
s.fileLock = fl
return nil
Expand Down
14 changes: 11 additions & 3 deletions sei-db/state_db/sc/flatkv/store_lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ func (s *CommitStore) Close() error {
}

if s.readOnlyWorkDir != "" {
_ = os.RemoveAll(s.readOnlyWorkDir)
if rmErr := os.RemoveAll(s.readOnlyWorkDir); rmErr != nil {
err = errors.Join(err, fmt.Errorf("remove readonly workdir: %w", rmErr))
}
}

if err != nil {
Expand Down Expand Up @@ -110,14 +112,20 @@ func (s *CommitStore) CleanupOrphanedReadOnlyDirs() error {

entries, err := os.ReadDir(dir)
if err != nil {
return nil
return fmt.Errorf("read flatkv dir: %w", err)
}
var errs []error
for _, e := range entries {
if e.IsDir() && strings.HasPrefix(e.Name(), readOnlyDirPrefix) {
logger.Info("removing orphaned readonly dir", "dir", e.Name())
_ = os.RemoveAll(filepath.Join(dir, e.Name()))
if err := os.RemoveAll(filepath.Join(dir, e.Name())); err != nil {
errs = append(errs, fmt.Errorf("remove orphaned dir %s: %w", e.Name(), err))
}
}
}
if len(errs) > 0 {
return errors.Join(errs...)
}
return nil
}

Expand Down
4 changes: 3 additions & 1 deletion sei-db/state_db/sc/flatkv/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/stretchr/testify/require"

commonerrors "github.com/sei-protocol/sei-chain/sei-db/common/errors"
"github.com/sei-protocol/sei-chain/sei-db/common/evm"
"github.com/sei-protocol/sei-chain/sei-db/db_engine/pebbledb"
"github.com/sei-protocol/sei-chain/sei-db/db_engine/types"
Expand Down Expand Up @@ -956,5 +957,6 @@ func TestCleanupOrphanedReadOnlyDirsHoldsWriterLock(t *testing.T) {

err := s2.CleanupOrphanedReadOnlyDirs()
require.Error(t, err)
require.ErrorContains(t, err, "acquire file lock")
require.ErrorIs(t, err, commonerrors.ErrFileLockUnavailable)
require.ErrorContains(t, err, "file already locked")
}
Loading