Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
Expand Down Expand Up @@ -101,6 +103,9 @@ assert getRegion().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID || isFailed()
setNextState(TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE);
break;
case TRUNCATE_REGION_MAKE_OFFLINE:
// Recovery snapshots release the region lock between states, so re-check that the
// original region still exists before creating an unassign TRSP for it.
checkOnline(env, getRegion());
addChildProcedure(createUnAssignProcedures(env));
setNextState(TruncateRegionState.TRUNCATE_REGION_REMOVE);
break;
Expand Down Expand Up @@ -132,8 +137,7 @@ assert getRegion().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID || isFailed()
}

private void createRegionOnFileSystem(final MasterProcedureEnv env) throws IOException {
RegionStateNode regionNode =
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
RegionStateNode regionNode = getExistingRegionStateNode(env);
regionNode.lock();
try {
final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
Expand All @@ -146,8 +150,7 @@ private void createRegionOnFileSystem(final MasterProcedureEnv env) throws IOExc
}

private void deleteRegionFromFileSystem(final MasterProcedureEnv env) throws IOException {
RegionStateNode regionNode =
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
RegionStateNode regionNode = getExistingRegionStateNode(env);
regionNode.lock();
try {
final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
Expand Down Expand Up @@ -178,9 +181,12 @@ protected void rollbackState(final MasterProcedureEnv env, final TruncateRegionS
case TRUNCATE_REGION_MAKE_OFFLINE:
RegionStateNode regionNode =
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
if (regionNode == null) {
// Region was unassigned by state TRUNCATE_REGION_MAKE_OFFLINE.
// So Assign it back
if (
regionNode != null
&& regionNode.isInState(State.CLOSED, State.OFFLINE, State.ABNORMALLY_CLOSED)
) {
// Region was unassigned by state TRUNCATE_REGION_MAKE_OFFLINE. Assign it back only if it
// still exists.
addChildProcedure(createAssignProcedures(env));
}
return;
Expand All @@ -190,6 +196,16 @@ protected void rollbackState(final MasterProcedureEnv env, final TruncateRegionS
}
}

private RegionStateNode getExistingRegionStateNode(final MasterProcedureEnv env)
throws UnknownRegionException {
RegionStateNode regionNode =
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
if (regionNode == null) {
throw new UnknownRegionException("No RegionState found for " + getRegion().getEncodedName());
}
return regionNode;
}

@Override
protected void completionCleanup(final MasterProcedureEnv env) {
releaseSyncLatch();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@

import static org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil.insertData;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseIOException;
Expand All @@ -29,6 +34,7 @@
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.SnapshotDescription;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
Expand Down Expand Up @@ -199,6 +205,113 @@ public void testRecoverySnapshotAndRestore() throws Exception {
UTIL.getAdmin().deleteTable(restoredTableName);
}

@Test
public void testMakeOfflineFailsIfRegionWasMergedAwayDuringRecoverySnapshotGap()
throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
final String[] families = new String[] { "f1", "f2" };
final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();

final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") };
MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families);

MasterProcedureEnv environment = procExec.getEnvironment();
RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream()
.filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString()))
.min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get();
TestableTruncateRegionProcedure procedure =
new TestableTruncateRegionProcedure(environment, regionToTruncate);

mergeRegionContainingTarget(procExec, tableName, regionToTruncate);
assertNull(
environment.getAssignmentManager().getRegionStates().getRegionStateNode(regionToTruncate));

procedure.executeStateForTest(environment, TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE);

assertTrue("Truncate should fail once the merged-away parent region is missing",
procedure.isFailed());
assertNull("MAKE_OFFLINE must not recreate the removed RegionStateNode",
environment.getAssignmentManager().getRegionStates().getRegionStateNode(regionToTruncate));
assertFalse("MAKE_OFFLINE must not schedule child procedures after the region disappears",
procedure.hasPendingChildrenForTest());
}

@Test
public void testRollbackDoesNotRecreateRegionMergedAwayDuringRecoverySnapshotGap()
throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
final String[] families = new String[] { "f1", "f2" };
final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();

final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"), Bytes.toBytes("60") };
MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, families);

MasterProcedureEnv environment = procExec.getEnvironment();
RegionInfo regionToTruncate = environment.getAssignmentManager().getAssignedRegions().stream()
.filter(r -> tableName.getNameAsString().equals(r.getTable().getNameAsString()))
.min((o1, o2) -> Bytes.compareTo(o1.getStartKey(), o2.getStartKey())).get();
TestableTruncateRegionProcedure procedure =
new TestableTruncateRegionProcedure(environment, regionToTruncate);

mergeRegionContainingTarget(procExec, tableName, regionToTruncate);
assertNull(
environment.getAssignmentManager().getRegionStates().getRegionStateNode(regionToTruncate));

procedure.rollbackStateForTest(environment, TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE);

assertNull("Rollback must not recreate the removed RegionStateNode",
environment.getAssignmentManager().getRegionStates().getRegionStateNode(regionToTruncate));
assertFalse("Rollback must not schedule assign children for a removed region",
procedure.hasPendingChildrenForTest());
}

private void mergeRegionContainingTarget(ProcedureExecutor<MasterProcedureEnv> procExec,
TableName tableName, RegionInfo regionToTruncate) throws Exception {
List<RegionInfo> regions = UTIL.getAdmin().getRegions(tableName).stream()
.sorted(RegionInfo.COMPARATOR).collect(Collectors.toList());
RegionInfo adjacentRegionToMerge =
regions.stream().filter(region -> !region.equals(regionToTruncate)).findFirst()
.orElseThrow(() -> new AssertionError("No adjacent region available for merge"));
long mergeProcId =
procExec.submitProcedure(new MergeTableRegionsProcedure(procExec.getEnvironment(),
new RegionInfo[] { regionToTruncate, adjacentRegionToMerge }, false));
ProcedureTestingUtility.waitProcedure(procExec, mergeProcId);
ProcedureTestingUtility.assertProcNotFailed(procExec, mergeProcId);
}

public static class TestableTruncateRegionProcedure extends TruncateRegionProcedure {
private boolean childScheduled;

public TestableTruncateRegionProcedure() {
super();
}

public TestableTruncateRegionProcedure(MasterProcedureEnv env, RegionInfo region)
throws HBaseIOException {
super(env, region);
}

public Flow executeStateForTest(MasterProcedureEnv env, TruncateRegionState state)
throws InterruptedException {
return super.executeFromState(env, state);
}

public void rollbackStateForTest(MasterProcedureEnv env, TruncateRegionState state)
throws IOException {
super.rollbackState(env, state);
}

@Override
protected <T extends Procedure<MasterProcedureEnv>> void addChildProcedure(T... subProcedure) {
childScheduled = true;
super.addChildProcedure(subProcedure);
}

public boolean hasPendingChildrenForTest() {
return childScheduled;
}
}

public static class FailingTruncateRegionProcedure extends TruncateRegionProcedure {
private boolean failOnce = false;

Expand Down