Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion go/logic/applier.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,32 @@ func (this *Applier) AttemptInstantDDL() error {
return err
}
// We don't need a trx, because for instant DDL the SQL mode doesn't matter.
_, err := this.db.Exec(query)
return retryOnLockWaitTimeout(func() error {
_, err := this.db.Exec(query)
return err
}, this.migrationContext.Log)
}

// retryOnLockWaitTimeout retries the given operation on MySQL lock wait timeout
// (errno 1205). Non-timeout errors return immediately. This is used for instant
// DDL attempts where the operation may be blocked by a long-running transaction.
func retryOnLockWaitTimeout(operation func() error, logger base.Logger) error {
const maxRetries = 5
var err error
for i := 0; i < maxRetries; i++ {
if i != 0 {
logger.Infof("Retrying after lock wait timeout (attempt %d/%d)", i+1, maxRetries)
RetrySleepFn(time.Duration(i) * 5 * time.Second)
}
err = operation()
if err == nil {
return nil
}
var mysqlErr *drivermysql.MySQLError
if !errors.As(err, &mysqlErr) || mysqlErr.Number != 1205 {
return err
}
}
return err
}

Expand Down
68 changes: 68 additions & 0 deletions go/logic/applier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ package logic
import (
"context"
gosql "database/sql"
"errors"
"strings"
"testing"
"time"

drivermysql "github.com/go-sql-driver/mysql"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"

Expand Down Expand Up @@ -198,6 +201,71 @@ func TestApplierInstantDDL(t *testing.T) {
})
}

func TestRetryOnLockWaitTimeout(t *testing.T) {
oldRetrySleepFn := RetrySleepFn
defer func() { RetrySleepFn = oldRetrySleepFn }()
RetrySleepFn = func(d time.Duration) {} // no-op for tests

logger := base.NewMigrationContext().Log

lockWaitTimeoutErr := &drivermysql.MySQLError{Number: 1205, Message: "Lock wait timeout exceeded"}
nonRetryableErr := &drivermysql.MySQLError{Number: 1845, Message: "ALGORITHM=INSTANT is not supported"}

t.Run("success on first attempt", func(t *testing.T) {
calls := 0
err := retryOnLockWaitTimeout(func() error {
calls++
return nil
}, logger)
require.NoError(t, err)
require.Equal(t, 1, calls)
})

t.Run("retry on lock wait timeout then succeed", func(t *testing.T) {
calls := 0
err := retryOnLockWaitTimeout(func() error {
calls++
if calls < 3 {
return lockWaitTimeoutErr
}
return nil
}, logger)
require.NoError(t, err)
require.Equal(t, 3, calls)
})

t.Run("non-retryable error returns immediately", func(t *testing.T) {
calls := 0
err := retryOnLockWaitTimeout(func() error {
calls++
return nonRetryableErr
}, logger)
require.ErrorIs(t, err, nonRetryableErr)
require.Equal(t, 1, calls)
})

t.Run("non-mysql error returns immediately", func(t *testing.T) {
calls := 0
genericErr := errors.New("connection refused")
err := retryOnLockWaitTimeout(func() error {
calls++
return genericErr
}, logger)
require.ErrorIs(t, err, genericErr)
require.Equal(t, 1, calls)
})

t.Run("exhausts all retries", func(t *testing.T) {
calls := 0
err := retryOnLockWaitTimeout(func() error {
calls++
return lockWaitTimeoutErr
}, logger)
require.ErrorIs(t, err, lockWaitTimeoutErr)
require.Equal(t, 5, calls)
})
}

type ApplierTestSuite struct {
suite.Suite

Expand Down
Loading