Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .devcontainer/start-mysql.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ nohup mariadbd \
--port="${PORT}" \
--skip-name-resolve \
--log-error="${LOG_FILE}" \
--innodb-print-all-deadlocks=ON \
>/dev/null 2>&1 &

# Wait for MySQL to be ready
Expand Down
93 changes: 93 additions & 0 deletions database/deadlock_debug.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package database

import (
"strings"

"gorm.io/gorm"
)

// PrintDeadlockInfo prints detailed deadlock information from MySQL/MariaDB InnoDB status.
// This should be called when a deadlock error is detected to help diagnose the issue.
// Returns the deadlock information as a string, or empty string if not available.
func PrintDeadlockInfo(db *gorm.DB) string {
if db.Dialector.Name() != "mysql" {
return ""
}

// Get InnoDB status
var results []map[string]interface{}
err := db.Raw("SHOW ENGINE INNODB STATUS").Scan(&results).Error
if err != nil || len(results) == 0 {
return ""
}

// Extract status from result
status, ok := results[0]["Status"].(string)
if !ok {
return ""
}

// Extract just the deadlock section
if idx := strings.Index(status, "LATEST DETECTED DEADLOCK"); idx >= 0 {
endIdx := strings.Index(status[idx:], "--------\nTRANSACTIONS")
if endIdx > 0 {
return status[idx : idx+endIdx]
}
// If no TRANSACTIONS section found, just return everything after deadlock
return status[idx:]
}

return ""
}

// EnableDeadlockLogging enables logging of all deadlocks to the MySQL error log.
// By default, MySQL/MariaDB only logs the most recent deadlock.
// This setting persists until the server is restarted.
func EnableDeadlockLogging(db *gorm.DB) error {
if db.Dialector.Name() != "mysql" {
return nil
}
return db.Exec("SET GLOBAL innodb_print_all_deadlocks = ON").Error
}

// CheckDeadlockLoggingEnabled checks if innodb_print_all_deadlocks is enabled.
func CheckDeadlockLoggingEnabled(db *gorm.DB) (bool, error) {
if db.Dialector.Name() != "mysql" {
return false, nil
}
var result struct {
VariableName string `gorm:"column:Variable_name"`
Value string `gorm:"column:Value"`
}
err := db.Raw("SHOW VARIABLES LIKE 'innodb_print_all_deadlocks'").Scan(&result).Error
if err != nil {
return false, err
}
return strings.ToLower(result.Value) == "on", nil
}

// GetDataLockWaits returns current lock wait information from performance_schema.
// This requires MySQL 8.0.30+ or MariaDB 10.5+.
func GetDataLockWaits(db *gorm.DB) ([]map[string]interface{}, error) {
if db.Dialector.Name() != "mysql" {
return nil, nil
}
var results []map[string]interface{}
err := db.Raw("SELECT * FROM performance_schema.data_lock_waits").Scan(&results).Error
return results, err
}

// GetLockWaitTransactions returns transactions currently waiting for locks.
// This requires MySQL 8.0.30+ or MariaDB 10.5+.
func GetLockWaitTransactions(db *gorm.DB) ([]map[string]interface{}, error) {
if db.Dialector.Name() != "mysql" {
return nil, nil
}
var results []map[string]interface{}
err := db.Raw(`
SELECT * FROM performance_schema.events_transactions_current
WHERE STATE = 'ACTIVE'
AND AUTOCOMMIT = 'NO'
`).Scan(&results).Error
return results, err
}
136 changes: 130 additions & 6 deletions handler/dataprep/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/rclone/rclone/fs"
"github.com/rjNemo/underscore"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)

type RemoveRequest struct {
Expand Down Expand Up @@ -53,12 +54,135 @@ func (DefaultHandler) RemovePreparationHandler(ctx context.Context, db *gorm.DB,
}

err = database.DoRetry(ctx, func() error {
return db.Transaction(func(db *gorm.DB) error {
// Use Select to control deletion order and avoid circular cascade deadlocks.
// GORM v1.31+ handles this by deleting associations in specified order,
// preventing Postgres deadlocks from multiple cascade paths to Files table.
// See: https://github.com/data-preservation-programs/singularity/pull/583
return db.Select("Wallets", "SourceStorages", "OutputStorages").Delete(&preparation).Error
return db.Transaction(func(tx *gorm.DB) error {
// Explicitly delete child records to avoid CASCADE deadlocks with concurrent operations.
// We materialize IDs first to avoid nested subqueries that cause MySQL deadlocks.

// Step 1: Get all attachment IDs for this preparation
var attachmentIDs []model.SourceAttachmentID
err := tx.Table("source_attachments").Select("id").
Where("preparation_id = ?", preparation.ID).
Find(&attachmentIDs).Error
if err != nil {
return errors.WithStack(err)
}

if len(attachmentIDs) == 0 {
// No attachments, just delete the preparation
return tx.Select("Wallets", "SourceStorages", "OutputStorages").Delete(&preparation).Error
}

// Step 2: Get all car IDs
var carIDs []model.CarID
err = tx.Table("cars").Select("id").
Where("preparation_id = ?", preparation.ID).
Find(&carIDs).Error
if err != nil {
return errors.WithStack(err)
}

// Step 3: Get all job IDs
var jobIDs []model.JobID
err = tx.Table("jobs").Select("id").
Where("attachment_id IN ?", attachmentIDs).
Find(&jobIDs).Error
if err != nil {
return errors.WithStack(err)
}

// Step 4: Get all file IDs
var fileIDs []model.FileID
err = tx.Table("files").Select("id").
Where("attachment_id IN ?", attachmentIDs).
Find(&fileIDs).Error
if err != nil {
return errors.WithStack(err)
}

// Step 5: Try to lock all jobs with SKIP LOCKED to detect concurrent activity
// This prevents deadlock with concurrent job updates
if len(jobIDs) > 0 {
var lockedJobs []model.Job
err = tx.Clauses(clause.Locking{
Strength: "UPDATE",
Options: "SKIP LOCKED",
}).Select("id").
Where("id IN ?", jobIDs).
Find(&lockedJobs).Error
if err != nil {
return errors.WithStack(err)
}

// If we couldn't lock all jobs, some are being used by concurrent transactions
if len(lockedJobs) < len(jobIDs) {
return errors.Wrapf(handlererror.ErrInvalidParameter,
"preparation %s has jobs in use by concurrent operations (%d/%d locked)",
preparation.Name, len(lockedJobs), len(jobIDs))
}
}

// Now delete in leaf-to-root order using materialized IDs:

// 1. Delete car_blocks (leaf node)
if len(carIDs) > 0 {
err = tx.Where("car_id IN ?", carIDs).Delete(&model.CarBlock{}).Error
if err != nil {
return errors.WithStack(err)
}
}

// 2. Delete cars
if len(carIDs) > 0 {
err = tx.Where("id IN ?", carIDs).Delete(&model.Car{}).Error
if err != nil {
return errors.WithStack(err)
}
}

// 3. Delete file_ranges (from jobs)
if len(jobIDs) > 0 {
err = tx.Where("job_id IN ?", jobIDs).Delete(&model.FileRange{}).Error
if err != nil {
return errors.WithStack(err)
}
}

// 4. Delete file_ranges (from files)
if len(fileIDs) > 0 {
err = tx.Where("file_id IN ?", fileIDs).Delete(&model.FileRange{}).Error
if err != nil {
return errors.WithStack(err)
}
}

// 5. Delete files (before directories to avoid circular cascade)
if len(fileIDs) > 0 {
err = tx.Where("id IN ?", fileIDs).Delete(&model.File{}).Error
if err != nil {
return errors.WithStack(err)
}
}

// 6. Delete directories
err = tx.Where("attachment_id IN ?", attachmentIDs).Delete(&model.Directory{}).Error
if err != nil {
return errors.WithStack(err)
}

// 7. Delete jobs
if len(jobIDs) > 0 {
err = tx.Where("id IN ?", jobIDs).Delete(&model.Job{}).Error
if err != nil {
return errors.WithStack(err)
}
}

// 8. Now delete the preparation itself, which will cascade to:
// - wallet_assignments (many2many, small table)
// - source_attachments (now empty, no more cascades)
// - output_attachments (many2many, small table)
// These cascades are safe because we've already deleted all the heavy child tables.
return tx.Select("Wallets", "SourceStorages", "OutputStorages").Delete(&preparation).Error
})
})
if err != nil {
Expand Down
Loading