Search in sources :

Example 1 with MoveResult

use of org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult in project ozone by apache.

the class TestReplicationManager method testDnBecameUnhealthyWhenMoving.

/**
 * test src and target datanode become unhealthy when moving.
 */
@Test
public void testDnBecameUnhealthyWhenMoving() throws IOException, NodeNotFoundException, InterruptedException, ExecutionException {
    final ContainerInfo container = createContainer(LifeCycleState.CLOSED);
    ContainerID id = container.containerID();
    ContainerReplica dn1 = addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY));
    CompletableFuture<MoveResult> cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    Assert.assertTrue(scmLogs.getOutput().contains("receive a move request about container"));
    nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, STALE));
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
    nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY));
    cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    addReplicaToDn(container, dn3, CLOSED);
    replicationManager.processAll();
    eventQueue.processAll(1000);
    nodeManager.setNodeStatus(dn1.getDatanodeDetails(), new NodeStatus(IN_SERVICE, STALE));
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.DELETION_FAIL_NODE_UNHEALTHY);
}
Also used : MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test)

Example 2 with MoveResult

use of org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult in project ozone by apache.

the class TestReplicationManager method testMovePrerequisites.

/**
 * before Replication Manager generates a completablefuture for a move option,
 * some Prerequisites should be satisfied.
 */
@Test
public void testMovePrerequisites() throws IOException, NodeNotFoundException, InterruptedException, ExecutionException, InvalidStateTransitionException {
    // all conditions is met
    final ContainerInfo container = createContainer(LifeCycleState.OPEN);
    ContainerID id = container.containerID();
    ContainerReplica dn1 = addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    ContainerReplica dn2 = addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY));
    ContainerReplica dn4 = addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    CompletableFuture<MoveResult> cf;
    // the above move is executed successfully, so there may be some item in
    // inflightReplication or inflightDeletion. here we stop replication manager
    // to clear these states, which may impact the tests below.
    // we don't need a running replicationManamger now
    replicationManager.stop();
    Thread.sleep(100L);
    cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.FAIL_NOT_RUNNING);
    replicationManager.start();
    Thread.sleep(100L);
    // container in not in OPEN state
    cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED);
    // open -> closing
    containerStateManager.updateContainerState(id.getProtobuf(), LifeCycleEvent.FINALIZE);
    cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED);
    // closing -> quasi_closed
    containerStateManager.updateContainerState(id.getProtobuf(), LifeCycleEvent.QUASI_CLOSE);
    cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED);
    // quasi_closed -> closed
    containerStateManager.updateContainerState(id.getProtobuf(), LifeCycleEvent.FORCE_CLOSE);
    Assert.assertTrue(LifeCycleState.CLOSED == containerStateManager.getContainer(id).getState());
    // Node is not in healthy state
    for (HddsProtos.NodeState state : HddsProtos.NodeState.values()) {
        if (state != HEALTHY) {
            nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, state));
            cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
            Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
            cf = replicationManager.move(id, new MoveDataNodePair(dn3, dn1.getDatanodeDetails()));
            Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
        }
    }
    nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY));
    // Node is not in IN_SERVICE state
    for (HddsProtos.NodeOperationalState state : HddsProtos.NodeOperationalState.values()) {
        if (state != IN_SERVICE) {
            nodeManager.setNodeStatus(dn3, new NodeStatus(state, HEALTHY));
            cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
            Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
            cf = replicationManager.move(id, new MoveDataNodePair(dn3, dn1.getDatanodeDetails()));
            Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
        }
    }
    nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY));
    // container exists in target datanode
    cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn2.getDatanodeDetails()));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_EXIST_IN_TARGET);
    // container does not exist in source datanode
    cf = replicationManager.move(id, new MoveDataNodePair(dn3, dn3));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_NOT_EXIST_IN_SOURCE);
    // make container over relplicated to test the
    // case that container is in inflightDeletion
    ContainerReplica dn5 = addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), State.CLOSED);
    replicationManager.processAll();
    // waiting for inflightDeletion generation
    eventQueue.processAll(1000);
    cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_INFLIGHT_DELETION);
    resetReplicationManager();
    // make the replica num be 2 to test the case
    // that container is in inflightReplication
    containerStateManager.removeContainerReplica(id, dn5);
    containerStateManager.removeContainerReplica(id, dn4);
    // replication manager should generate inflightReplication
    replicationManager.processAll();
    // waiting for inflightReplication generation
    eventQueue.processAll(1000);
    cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.REPLICATION_FAIL_INFLIGHT_REPLICATION);
}
Also used : MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test)

Example 3 with MoveResult

use of org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult in project ozone by apache.

the class TestReplicationManager method testMove.

/**
 * if all the prerequisites are satisfied, move should work as expected.
 */
@Test
public void testMove() throws IOException, NodeNotFoundException, InterruptedException, ExecutionException {
    final ContainerInfo container = createContainer(LifeCycleState.CLOSED);
    ContainerID id = container.containerID();
    ContainerReplica dn1 = addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY));
    CompletableFuture<MoveResult> cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn3));
    Assert.assertTrue(scmLogs.getOutput().contains("receive a move request about container"));
    Thread.sleep(100L);
    Assert.assertTrue(datanodeCommandHandler.received(SCMCommandProto.Type.replicateContainerCommand, dn3));
    Assert.assertEquals(1, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand));
    // replicate container to dn3
    addReplicaToDn(container, dn3, CLOSED);
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertTrue(datanodeCommandHandler.received(SCMCommandProto.Type.deleteContainerCommand, dn1.getDatanodeDetails()));
    Assert.assertEquals(1, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand));
    containerStateManager.removeContainerReplica(id, dn1);
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.COMPLETED);
}
Also used : MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test)

Example 4 with MoveResult

use of org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult in project ozone by apache.

the class TestReplicationManager method testMoveNotDeleteSrcIfPolicyNotSatisfied.

/**
 * make sure RM does not delete replica if placement policy is not satisfied.
 */
@Test
public void testMoveNotDeleteSrcIfPolicyNotSatisfied() throws IOException, NodeNotFoundException, InterruptedException, ExecutionException {
    final ContainerInfo container = createContainer(LifeCycleState.CLOSED);
    ContainerID id = container.containerID();
    ContainerReplica dn1 = addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    ContainerReplica dn2 = addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
    DatanodeDetails dn4 = addNode(new NodeStatus(IN_SERVICE, HEALTHY));
    CompletableFuture<MoveResult> cf = replicationManager.move(id, new MoveDataNodePair(dn1.getDatanodeDetails(), dn4));
    Assert.assertTrue(scmLogs.getOutput().contains("receive a move request about container"));
    Thread.sleep(100L);
    Assert.assertTrue(datanodeCommandHandler.received(SCMCommandProto.Type.replicateContainerCommand, dn4));
    Assert.assertEquals(1, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand));
    // replicate container to dn4
    addReplicaToDn(container, dn4, CLOSED);
    // now, replication succeeds, but replica in dn2 lost,
    // and there are only tree replicas totally, so rm should
    // not delete the replica on dn1
    containerStateManager.removeContainerReplica(id, dn2);
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertFalse(datanodeCommandHandler.received(SCMCommandProto.Type.deleteContainerCommand, dn1.getDatanodeDetails()));
    Assert.assertTrue(cf.isDone() && cf.get() == MoveResult.DELETE_FAIL_POLICY);
}
Also used : MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test)

Aggregations

DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)4 MockDatanodeDetails.createDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails)4 MockDatanodeDetails.randomDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails)4 MoveResult (org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult)4 MoveDataNodePair (org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair)4 NodeStatus (org.apache.hadoop.hdds.scm.node.NodeStatus)4 Test (org.junit.Test)4 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)1