Search in sources :

Example 1 with HEALTHY

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY in project ozone by apache.

the class TestReplicationManager method additionalReplicaScheduledWhenMisReplicated.

@Test
public void additionalReplicaScheduledWhenMisReplicated() throws IOException {
    final ContainerInfo container = getContainer(LifeCycleState.CLOSED);
    container.setUsedBytes(100);
    final ContainerID id = container.containerID();
    final UUID originNodeId = UUID.randomUUID();
    final ContainerReplica replicaOne = getReplicas(id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails());
    final ContainerReplica replicaTwo = getReplicas(id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails());
    final ContainerReplica replicaThree = getReplicas(id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails());
    containerStateManager.addContainer(container.getProtobuf());
    containerStateManager.updateContainerReplica(id, replicaOne);
    containerStateManager.updateContainerReplica(id, replicaTwo);
    containerStateManager.updateContainerReplica(id, replicaThree);
    // Ensure a mis-replicated status is returned for any containers in this
    // test where there are 3 replicas. When there are 2 or 4 replicas
    // the status returned will be healthy.
    Mockito.when(containerPlacementPolicy.validateContainerPlacement(Mockito.argThat(list -> list.size() == 3), Mockito.anyInt())).thenAnswer(invocation -> {
        return new ContainerPlacementStatusDefault(1, 2, 3);
    });
    int currentReplicateCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand);
    final long currentBytesToReplicate = replicationManager.getMetrics().getNumReplicationBytesTotal();
    replicationManager.processAll();
    eventQueue.processAll(1000);
    // At this stage, due to the mocked calls to validateContainerPlacement
    // the policy will not be satisfied, and replication will be triggered.
    Assert.assertEquals(currentReplicateCommandCount + 1, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand));
    Assert.assertEquals(currentReplicateCommandCount + 1, replicationManager.getMetrics().getNumReplicationCmdsSent());
    Assert.assertEquals(currentBytesToReplicate + 100, replicationManager.getMetrics().getNumReplicationBytesTotal());
    Assert.assertEquals(1, replicationManager.getInflightReplication().size());
    Assert.assertEquals(1, replicationManager.getMetrics().getInflightReplication());
    ReplicationManagerReport report = replicationManager.getContainerReport();
    Assert.assertEquals(1, report.getStat(LifeCycleState.CLOSED));
    Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.MIS_REPLICATED));
    // Now make it so that all containers seem mis-replicated no matter how
    // many replicas. This will test replicas are not scheduled if the new
    // replica does not fix the mis-replication.
    Mockito.when(containerPlacementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenAnswer(invocation -> {
        return new ContainerPlacementStatusDefault(1, 2, 3);
    });
    currentReplicateCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand);
    replicationManager.processAll();
    eventQueue.processAll(1000);
    // At this stage, due to the mocked calls to validateContainerPlacement
    // the mis-replicated racks will not have improved, so expect to see nothing
    // scheduled.
    Assert.assertEquals(currentReplicateCommandCount, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand));
    Assert.assertEquals(currentReplicateCommandCount, replicationManager.getMetrics().getNumReplicationCmdsSent());
    Assert.assertEquals(1, replicationManager.getInflightReplication().size());
    Assert.assertEquals(1, replicationManager.getMetrics().getInflightReplication());
}
Also used : HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) TimeoutException(java.util.concurrent.TimeoutException) STALE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE) DBStore(org.apache.hadoop.hdds.utils.db.DBStore) SCMCommandProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto) HddsTestUtils.getContainer(org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) After(org.junit.After) Map(java.util.Map) SCMHAManager(org.apache.hadoop.hdds.scm.ha.SCMHAManager) HddsConfigKeys(org.apache.hadoop.hdds.HddsConfigKeys) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) FileUtil(org.apache.hadoop.fs.FileUtil) DBStoreBuilder(org.apache.hadoop.hdds.utils.db.DBStoreBuilder) Longs(com.google.common.primitives.Longs) Set(java.util.Set) UUID(java.util.UUID) IN_SERVICE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE) Instant(java.time.Instant) HddsTestUtils.getReplicas(org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas) Collectors(java.util.stream.Collectors) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) ZoneId(java.time.ZoneId) PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) List(java.util.List) MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) ReplicationManagerConfiguration(org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration) SCMServiceManager(org.apache.hadoop.hdds.scm.ha.SCMServiceManager) Optional(java.util.Optional) DECOMMISSIONED(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) IntStream(java.util.stream.IntStream) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) MockSCMHAManager(org.apache.hadoop.hdds.scm.ha.MockSCMHAManager) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DECOMMISSIONING(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING) Before(org.junit.Before) LifeCycleEvent(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent) HEALTHY(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY) State(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) FileUtils(org.apache.commons.io.FileUtils) EventHandler(org.apache.hadoop.hdds.server.events.EventHandler) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) SCMEvents(org.apache.hadoop.hdds.scm.events.SCMEvents) PipelineManager(org.apache.hadoop.hdds.scm.pipeline.PipelineManager) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) IN_MAINTENANCE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE) Mockito(org.mockito.Mockito) SCMDBDefinition(org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition) CLOSED(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED) SCMDBTransactionBufferImpl(org.apache.hadoop.hdds.scm.metadata.SCMDBTransactionBufferImpl) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) Assert(org.junit.Assert) TestClock(org.apache.ozone.test.TestClock) UUID(java.util.UUID) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) Test(org.junit.Test)

Example 2 with HEALTHY

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY in project ozone by apache.

the class TestReplicationManager method testOverReplicatedClosedContainerWithDecomAndMaint.

/**
 * When a CLOSED container is over replicated, ReplicationManager
 * deletes the excess replicas. While choosing the replica for deletion
 * ReplicationManager should not attempt to remove a DECOMMISSION or
 * MAINTENANCE replica.
 */
@Test
public void testOverReplicatedClosedContainerWithDecomAndMaint() throws IOException {
    final ContainerInfo container = createContainer(LifeCycleState.CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED);
    addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    final int currentDeleteCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand);
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertEquals(currentDeleteCommandCount + 2, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand));
    Assert.assertEquals(currentDeleteCommandCount + 2, replicationManager.getMetrics().getNumDeletionCmdsSent());
    Assert.assertEquals(1, replicationManager.getInflightDeletion().size());
    Assert.assertEquals(1, replicationManager.getMetrics().getInflightDeletion());
    // Get the DECOM and Maint replica and ensure none of them are scheduled
    // for removal
    Set<ContainerReplica> decom = containerStateManager.getContainerReplicas(container.containerID()).stream().filter(r -> r.getDatanodeDetails().getPersistedOpState() != IN_SERVICE).collect(Collectors.toSet());
    for (ContainerReplica r : decom) {
        Assert.assertFalse(datanodeCommandHandler.received(SCMCommandProto.Type.deleteContainerCommand, r.getDatanodeDetails()));
    }
    assertOverReplicatedCount(1);
}
Also used : HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) TimeoutException(java.util.concurrent.TimeoutException) STALE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE) DBStore(org.apache.hadoop.hdds.utils.db.DBStore) SCMCommandProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto) HddsTestUtils.getContainer(org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) After(org.junit.After) Map(java.util.Map) SCMHAManager(org.apache.hadoop.hdds.scm.ha.SCMHAManager) HddsConfigKeys(org.apache.hadoop.hdds.HddsConfigKeys) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) FileUtil(org.apache.hadoop.fs.FileUtil) DBStoreBuilder(org.apache.hadoop.hdds.utils.db.DBStoreBuilder) Longs(com.google.common.primitives.Longs) Set(java.util.Set) UUID(java.util.UUID) IN_SERVICE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE) Instant(java.time.Instant) HddsTestUtils.getReplicas(org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas) Collectors(java.util.stream.Collectors) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) ZoneId(java.time.ZoneId) PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) List(java.util.List) MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) ReplicationManagerConfiguration(org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration) SCMServiceManager(org.apache.hadoop.hdds.scm.ha.SCMServiceManager) Optional(java.util.Optional) DECOMMISSIONED(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) IntStream(java.util.stream.IntStream) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) MockSCMHAManager(org.apache.hadoop.hdds.scm.ha.MockSCMHAManager) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DECOMMISSIONING(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING) Before(org.junit.Before) LifeCycleEvent(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent) HEALTHY(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY) State(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) FileUtils(org.apache.commons.io.FileUtils) EventHandler(org.apache.hadoop.hdds.server.events.EventHandler) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) SCMEvents(org.apache.hadoop.hdds.scm.events.SCMEvents) PipelineManager(org.apache.hadoop.hdds.scm.pipeline.PipelineManager) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) IN_MAINTENANCE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE) Mockito(org.mockito.Mockito) SCMDBDefinition(org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition) CLOSED(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED) SCMDBTransactionBufferImpl(org.apache.hadoop.hdds.scm.metadata.SCMDBTransactionBufferImpl) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) Assert(org.junit.Assert) TestClock(org.apache.ozone.test.TestClock) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test)

Aggregations

Longs (com.google.common.primitives.Longs)2 File (java.io.File)2 IOException (java.io.IOException)2 Instant (java.time.Instant)2 ZoneId (java.time.ZoneId)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Set (java.util.Set)2 UUID (java.util.UUID)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 ExecutionException (java.util.concurrent.ExecutionException)2 TimeUnit (java.util.concurrent.TimeUnit)2 TimeoutException (java.util.concurrent.TimeoutException)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Collectors (java.util.stream.Collectors)2 IntStream (java.util.stream.IntStream)2 FileUtils (org.apache.commons.io.FileUtils)2