Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ public final class ContainerBalancerConfiguration {
"data node is very high")
private boolean triggerDuEnable = false;

@Config(key = "hdds.container.balancer.include.non.standard.containers", type = ConfigType.BOOLEAN,
defaultValue = "false", tags = {ConfigTag.BALANCER},
description = "Whether to include containers in non-standard states, such as " +
"OVER_REPLICATED CLOSED/QUASI_CLOSED and HEALTHY QUASI_CLOSED containers.")
private boolean includeNonStandardContainers = false;

/**
* Gets the threshold value for Container Balancer.
*
Expand Down Expand Up @@ -432,6 +438,24 @@ public void setExcludeNodes(String excludeNodes) {
this.excludeNodes = excludeNodes;
}

/**
* Get the includeNonStandardContainers value for Container Balancer.
*
* @return the boolean value of includeNonStandardContainers
*/
public Boolean getIncludeNonStandardContainers() {
return includeNonStandardContainers;
}

/**
* Set the includeNonStandardContainers value for Container Balancer.
*
* @param enable the boolean value to be set to includeNonStandardContainers
*/
public void setIncludeNonStandardContainers(boolean enable) {
includeNonStandardContainers = enable;
}

@Override
public String toString() {
return String.format("Container Balancer Configuration values:%n" +
Expand Down Expand Up @@ -478,7 +502,9 @@ public String toString() {
"Datanodes Specified to be Balanced",
includeNodes.equals("") ? "None" : includeNodes,
"Datanodes Excluded from Balancing",
excludeNodes.equals("") ? "None" : excludeNodes);
excludeNodes.equals("") ? "None" : excludeNodes,
"Whether to include non-standard containers for balancing",
includeNonStandardContainers);
}

public ContainerBalancerConfigurationProto.Builder toProtobufBuilder() {
Expand All @@ -500,7 +526,8 @@ public ContainerBalancerConfigurationProto.Builder toProtobufBuilder() {
.setExcludeDatanodes(excludeNodes)
.setMoveNetworkTopologyEnable(networkTopologyEnable)
.setTriggerDuBeforeMoveEnable(triggerDuEnable)
.setMoveReplicationTimeout(moveReplicationTimeout);
.setMoveReplicationTimeout(moveReplicationTimeout)
.setIncludeNonStandardContainers(includeNonStandardContainers);
return builder;
}

Expand Down Expand Up @@ -555,6 +582,9 @@ static ContainerBalancerConfiguration fromProtobuf(
if (proto.hasMoveReplicationTimeout()) {
config.setMoveReplicationTimeout(proto.getMoveReplicationTimeout());
}
if (proto.hasIncludeNonStandardContainers()) {
config.setIncludeNonStandardContainers(proto.getIncludeNonStandardContainers());
}
return config;
}
}
1 change: 1 addition & 0 deletions hadoop-hdds/interface-client/src/main/proto/hdds.proto
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ message ContainerBalancerConfigurationProto {
optional int32 nextIterationIndex = 19;
optional int64 moveReplicationTimeout = 20;
optional string includeContainers = 21;
optional bool includeNonStandardContainers = 22;
}

message TransferLeadershipRequestProto {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ private Comparator<ContainerID> orderContainersByUsedBytes() {
* 3. Container size should be closer to 5GB.
* 4. Container must not be in the configured exclude containers list.
* 5. Container should be closed.
* <p>
* If {@link ContainerBalancerConfiguration#getIncludeNonStandardContainers()}
* is enabled, non-standard containers will also be included:
* - OVER_REPLICATED CLOSED containers (subject to minimum CLOSED replica checks)
* - OVER_REPLICATED and HEALTHY QUASI_CLOSED containers (subject to all QUASI_CLOSED
* replica checks and non-empty source replicas)
* @param node DatanodeDetails for which to find candidate containers.
* @return true if the container should be excluded, else false
*/
Expand Down Expand Up @@ -190,9 +196,15 @@ public boolean shouldBeExcluded(ContainerID containerID,
return true;
}

return !isContainerClosed(container, node, replicas) ||
!isContainerHealthyForMove(container, replicas) ||
isContainerReplicatingOrDeleting(containerID);
if (balancerConfiguration.getIncludeNonStandardContainers()) {
return !isContainerClosedRelaxed(container, node, replicas) ||
!isContainerHealthyForMoveRelaxed(container, replicas) ||
isContainerReplicatingOrDeleting(containerID);
} else {
return !isContainerClosed(container, node, replicas) ||
!isContainerHealthyForMove(container, replicas) ||
isContainerReplicatingOrDeleting(containerID);
}
}

/**
Expand Down Expand Up @@ -242,6 +254,95 @@ private boolean isContainerHealthyForMove(ContainerInfo container, Set<Container
return true;
}

/**
* Relaxed version of isContainerClosed used when includeNonStandardContainers is enabled.
* <p>
* - CLOSED container, CLOSED replica: if replication health is OVER_REPLICATED, requires
* minimum CLOSED replicas; otherwise allows.
* - CLOSED container, QUASI_CLOSED replica: requires minimum CLOSED replicas and a non-empty
* replica on the source datanode.
* - QUASI_CLOSED container: all replicas must be QUASI_CLOSED; replica on source must be
* non-empty.
*
* @param container container to check
* @param datanodeDetails datanode on which a replica of the container is present
* @param replicas all replicas of the container
* @return true if container and replica are eligible for balancing, else false
*/
private boolean isContainerClosedRelaxed(ContainerInfo container,
DatanodeDetails datanodeDetails,
Set<ContainerReplica> replicas) {
ContainerHealthResult.HealthState replicationHealth =
replicationManager.getContainerReplicationHealth(container, replicas).getHealthState();
HddsProtos.LifeCycleState containerState = container.getState();
// Find the specific replica on this datanode
ContainerReplica targetReplica = replicas.stream()
.filter(r -> r.getDatanodeDetails().equals(datanodeDetails))
.findFirst()
.orElse(null);
if (targetReplica == null) {
return false;
}
ContainerReplicaProto.State replicaState = targetReplica.getState();

// Case 1: Container is CLOSED
if (containerState == HddsProtos.LifeCycleState.CLOSED) {
if (replicaState == ContainerReplicaProto.State.CLOSED) {
if (replicationHealth == ContainerHealthResult.HealthState.OVER_REPLICATED) {
return hasMinClosedReplicas(container, replicas);
}
return true;
}

Comment thread
sarvekshayr marked this conversation as resolved.
if (replicaState == ContainerReplicaProto.State.QUASI_CLOSED) {
if (!hasMinClosedReplicas(container, replicas)) {
return false;
}
return !targetReplica.isEmpty();
}
return false;
}

// Case 2: Container is QUASI_CLOSED
if (containerState == HddsProtos.LifeCycleState.QUASI_CLOSED) {
boolean allReplicasQuasiClosed = replicas.stream()
.allMatch(r -> r.getState() == ContainerReplicaProto.State.QUASI_CLOSED);
if (!allReplicasQuasiClosed) {
return false;
}
return !targetReplica.isEmpty();
}
return false;
}

private static boolean hasMinClosedReplicas(ContainerInfo container, Set<ContainerReplica> replicas) {
long count = replicas.stream()
.filter(r -> r.getState() == ContainerReplicaProto.State.CLOSED)
.count();
return count >= container.getReplicationConfig().getRequiredNodes();
}

/**
* Relaxed version of isContainerHealthyForMove used when includeNonStandardContainers is enabled.
* <p>
* - OVER_REPLICATED CLOSED and QUASI_CLOSED containers are allowed.
*
* @param container container to check
* @param replicas the container's replicas
* @return false if it should not be moved, true otherwise
*/
private boolean isContainerHealthyForMoveRelaxed(ContainerInfo container, Set<ContainerReplica> replicas) {
ContainerHealthResult.HealthState state =
replicationManager.getContainerReplicationHealth(container, replicas).getHealthState();
if (state == ContainerHealthResult.HealthState.HEALTHY ||
state == ContainerHealthResult.HealthState.OVER_REPLICATED) {
return true;
}

LOG.debug("Excluding container {} with replicas {} as its health is {}.", container, replicas, state);
return false;
}

private boolean breaksMaxSizeToMoveLimit(ContainerID containerID,
long usedBytes,
long sizeMovedAlready) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ public ContainerBalancerTask(StorageContainerManager scm,
this.moveManager.setMoveTimeout(config.getMoveTimeout().toMillis());
this.moveManager.setReplicationTimeout(
config.getMoveReplicationTimeout().toMillis());
this.moveManager.setIncludeNonStandardContainers(
config.getIncludeNonStandardContainers());
this.delayStart = delayStart;
this.ozoneConfiguration = scm.getConfiguration();
this.containerBalancer = containerBalancer;
Expand Down
Loading