apache · ptlrs · Mar 18, 2026 · Mar 24, 2026 · Apr 18, 2026 · Apr 18, 2026
diff --git a/...ain/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/...ain/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java
@@ -63,6 +63,8 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
   public static final String DISK_CHECK_MIN_GAP_KEY = "hdds.datanode.disk.check.min.gap";
   public static final String DISK_CHECK_TIMEOUT_KEY = "hdds.datanode.disk.check.timeout";
   public static final String DISK_CHECK_SLIDING_WINDOW_TIMEOUT_KEY = "hdds.datanode.disk.check.sliding.window.timeout";
+  public static final String DISK_CHECK_RETRY_GAP_KEY = "hdds.datanode.disk.check.retry.gap";
+  public static final String DISK_CHECK_RETRY_ATTEMPTS = "hdds.datanode.disk.check.retry.attempts";
 
   // Minimum space should be left on volume.
   // Ex: If volume has 1000GB and minFreeSpace is configured as 10GB,
@@ -104,6 +106,9 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
   static final Duration DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT =
       Duration.ofMinutes(PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT).plus(DISK_CHECK_TIMEOUT_DEFAULT);
 
+  static final Duration DISK_CHECK_RETRY_GAP_DEFAULT = Duration.ofMinutes(1);
+  static final int DISK_CHECK_RETRY_ATTEMPTS_DEFAULT = 2;
+
   static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = true;
   static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024;
   static final int ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT = 64;
@@ -373,6 +378,14 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
   )
   private boolean isDiskCheckEnabled = true;
 
+  @Config(key = "hdds.datanode.rocksdb.disk.check.io.test.enabled",
+      defaultValue = "true",
+      type = ConfigType.BOOLEAN,
+      tags = {DATANODE},
+      description = "The configuration to enable or disable RocksDb disk IO checks."
+  )
+  private boolean isRocksDbDiskCheckEnabled = true;
+
   @Config(key = "hdds.datanode.disk.check.io.failures.tolerated",
       defaultValue = "1",
       type = ConfigType.INT,
@@ -430,6 +443,25 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
   )
   private Duration diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT;
 
+  @Config(key = DISK_CHECK_RETRY_GAP_KEY,
+      defaultValue = "1m",
+      type = ConfigType.TIME,
+      tags = {DATANODE},
+      description = "Time to wait between retries of disk checks."
+          + " To ignore transient issues, the RocksDb instance on a disk is validated multiple times before"
+          + " declaring failure. This configuration defines the time to wait between the retry attempts."
+          + " Unit could be defined with postfix (ns,ms,s,m,h,d)."
+  )
+  private Duration diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT;
+
+  @Config(key = DISK_CHECK_RETRY_ATTEMPTS,
+      defaultValue = "2",
+      type = ConfigType.INT,
+      tags = {DATANODE},
+      description = "Number of retry attempts for opening RocksDb before declaring failure."
+  )
+  private int diskCheckRetryAttempts = DISK_CHECK_RETRY_ATTEMPTS_DEFAULT;
+
   @Config(key = "hdds.datanode.chunk.data.validation.check",
       defaultValue = "false",
       type = ConfigType.BOOLEAN,
@@ -709,6 +741,25 @@ public void validate() {
       diskCheckSlidingWindowTimeout = defaultTimeout;
     }
 
+    if (diskCheckRetryAttempts <= 0) {
+      LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}",
+          DISK_CHECK_RETRY_ATTEMPTS, diskCheckRetryAttempts, DISK_CHECK_RETRY_ATTEMPTS_DEFAULT);
+      diskCheckRetryAttempts = DISK_CHECK_RETRY_ATTEMPTS_DEFAULT;
+    }
+
+    if (diskCheckRetryGap.isNegative()) {
+      LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}",
+          DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_RETRY_GAP_DEFAULT);
+      diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT;
+    }
+
+    if (diskCheckRetryGap.compareTo(diskCheckTimeout.dividedBy(diskCheckRetryAttempts)) > 0) {
+      LOG.warn("{} was set to {}. It must be less than {} / {} which is {}. Defaulting to {}",
+          DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_TIMEOUT_KEY, diskCheckRetryAttempts,
+          diskCheckTimeout.dividedBy(diskCheckRetryAttempts), DISK_CHECK_RETRY_GAP_DEFAULT);
+      diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT;
+    }
+
     if (blockDeleteCommandWorkerInterval.isNegative()) {
       LOG.warn(BLOCK_DELETE_COMMAND_WORKER_INTERVAL +
           " must be greater than zero and was set to {}. Defaulting to {}",
@@ -924,10 +975,18 @@ public Duration getDiskCheckTimeout() {
     return diskCheckTimeout;
   }
 
+  public Duration getDiskCheckRetryGap() {
+    return diskCheckRetryGap;
+  }
+
   public void setDiskCheckTimeout(Duration duration) {
     diskCheckTimeout = duration;
   }
 
+  public int getDiskCheckRetryAttempts() {
+    return diskCheckRetryAttempts;
+  }
+
   public void setDiskCheckEnabled(boolean diskCheckEnabled) {
     isDiskCheckEnabled = diskCheckEnabled;
   }
@@ -936,6 +995,10 @@ public boolean isDiskCheckEnabled() {
     return isDiskCheckEnabled;
   }
 
+  public boolean isRocksDbDiskCheckEnabled() {
+    return isRocksDbDiskCheckEnabled;
+  }
+
   public Duration getDiskCheckSlidingWindowTimeout() {
     return diskCheckSlidingWindowTimeout;
   }

diff --git a/...ner-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/...ner-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
@@ -25,6 +25,7 @@
 import jakarta.annotation.Nullable;
 import java.io.File;
 import java.io.IOException;
+import java.time.Duration;
 import java.util.Iterator;
 import java.util.List;
 import java.util.concurrent.ConcurrentSkipListSet;
@@ -306,20 +307,34 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused)
 
   @VisibleForTesting
   public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException {
-    if (!getDiskCheckEnabled()) {
+    if (!(getDiskCheckEnabled() && getDatanodeConfig().isRocksDbDiskCheckEnabled())) {
       return VolumeCheckResult.HEALTHY;
     }
 
-    try (ManagedOptions managedOptions = new ManagedOptions();
-         ManagedRocksDB ignored = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) {
-      // Do nothing. Only check if rocksdb is accessible.
-      LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}.", dbFile, getStorageDir());
-    } catch (Exception e) {
-      if (Thread.currentThread().isInterrupted()) {
-        throw new InterruptedException("Check of database for volume " + this + " interrupted.");
+    // We attempt to open RocksDb twice to ignore any transient errors
+    // and to confirm that we actually cannot open RocksDb in readonly mode.
+    final int maxAttempts = getDatanodeConfig().getDiskCheckRetryAttempts();
+    final Duration maxRetryGap = getDatanodeConfig().getDiskCheckRetryGap();
+    for (int attempt = 0; attempt < maxAttempts; attempt++) {
+      try (ManagedOptions managedOptions = new ManagedOptions();
+           ManagedRocksDB ignored =
+               ManagedRocksDB.openAsSecondary(managedOptions, dbFile.toString(), getTmpDir().getPath())) {
+        // Do nothing. Only check if rocksdb is accessible.
+        LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}.", dbFile, getStorageDir());
+        break;
+      } catch (Exception e) {
+        if (Thread.currentThread().isInterrupted()) {
+          throw new InterruptedException("Check of database for volume " + this + " interrupted.");
+        }
+
+        if (attempt == maxAttempts - 1) {
+          LOG.error("Could not open Volume DB located at {}", dbFile, e);
+          getIoTestSlidingWindow().add();
+        } else {
+          LOG.warn("Could not open Volume DB located at {}", dbFile, e);
+          Thread.sleep(maxRetryGap.toMillis());
+        }
       }
-      LOG.warn("Could not open Volume DB located at {}", dbFile, e);
-      getIoTestSlidingWindow().add();
     }
 
     if (getIoTestSlidingWindow().isExceeded()) {

diff --git a/...managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java b/...managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java
@@ -75,6 +75,14 @@ public static ManagedRocksDB openReadOnly(
     );
   }
 
+  public static ManagedRocksDB openAsSecondary(
+      final ManagedOptions options,
+      final String dbPath,
+      final String secondaryDbLogFilePath)
+      throws RocksDBException {
+    return new ManagedRocksDB(RocksDB.openAsSecondary(options, dbPath, secondaryDbLogFilePath));
+  }
+
   public static ManagedRocksDB open(
       final DBOptions options, final String path,
       final List<ColumnFamilyDescriptor> columnFamilyDescriptors,