Search in sources :

Example 1 with IN_MAINTENANCE

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE in project ozone by apache.

the class TestReplicationManager method testOverReplicatedClosedContainerWithDecomAndMaint.

/**
 * When a CLOSED container is over replicated, ReplicationManager
 * deletes the excess replicas. While choosing the replica for deletion
 * ReplicationManager should not attempt to remove a DECOMMISSION or
 * MAINTENANCE replica.
 */
@Test
public void testOverReplicatedClosedContainerWithDecomAndMaint() throws IOException {
    final ContainerInfo container = createContainer(LifeCycleState.CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED);
    addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
    final int currentDeleteCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand);
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertEquals(currentDeleteCommandCount + 2, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand));
    Assert.assertEquals(currentDeleteCommandCount + 2, replicationManager.getMetrics().getNumDeletionCmdsSent());
    Assert.assertEquals(1, replicationManager.getInflightDeletion().size());
    Assert.assertEquals(1, replicationManager.getMetrics().getInflightDeletion());
    // Get the DECOM and Maint replica and ensure none of them are scheduled
    // for removal
    Set<ContainerReplica> decom = containerStateManager.getContainerReplicas(container.containerID()).stream().filter(r -> r.getDatanodeDetails().getPersistedOpState() != IN_SERVICE).collect(Collectors.toSet());
    for (ContainerReplica r : decom) {
        Assert.assertFalse(datanodeCommandHandler.received(SCMCommandProto.Type.deleteContainerCommand, r.getDatanodeDetails()));
    }
    assertOverReplicatedCount(1);
}
Also used : HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) TimeoutException(java.util.concurrent.TimeoutException) STALE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE) DBStore(org.apache.hadoop.hdds.utils.db.DBStore) SCMCommandProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto) HddsTestUtils.getContainer(org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) After(org.junit.After) Map(java.util.Map) SCMHAManager(org.apache.hadoop.hdds.scm.ha.SCMHAManager) HddsConfigKeys(org.apache.hadoop.hdds.HddsConfigKeys) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) FileUtil(org.apache.hadoop.fs.FileUtil) DBStoreBuilder(org.apache.hadoop.hdds.utils.db.DBStoreBuilder) Longs(com.google.common.primitives.Longs) Set(java.util.Set) UUID(java.util.UUID) IN_SERVICE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE) Instant(java.time.Instant) HddsTestUtils.getReplicas(org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas) Collectors(java.util.stream.Collectors) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) ZoneId(java.time.ZoneId) PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) List(java.util.List) MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) ReplicationManagerConfiguration(org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration) SCMServiceManager(org.apache.hadoop.hdds.scm.ha.SCMServiceManager) Optional(java.util.Optional) DECOMMISSIONED(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) IntStream(java.util.stream.IntStream) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) MockSCMHAManager(org.apache.hadoop.hdds.scm.ha.MockSCMHAManager) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DECOMMISSIONING(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING) Before(org.junit.Before) LifeCycleEvent(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent) HEALTHY(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY) State(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) FileUtils(org.apache.commons.io.FileUtils) EventHandler(org.apache.hadoop.hdds.server.events.EventHandler) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) SCMEvents(org.apache.hadoop.hdds.scm.events.SCMEvents) PipelineManager(org.apache.hadoop.hdds.scm.pipeline.PipelineManager) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) IN_MAINTENANCE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE) Mockito(org.mockito.Mockito) SCMDBDefinition(org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition) CLOSED(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED) SCMDBTransactionBufferImpl(org.apache.hadoop.hdds.scm.metadata.SCMDBTransactionBufferImpl) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) Assert(org.junit.Assert) TestClock(org.apache.ozone.test.TestClock) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test)

Example 2 with IN_MAINTENANCE

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE in project ozone by apache.

the class TestDecommissionAndMaintenance method testMaintenanceEndsAutomaticallyAtTimeout.

@Test
public // and new replicas created.
void testMaintenanceEndsAutomaticallyAtTimeout() throws Exception {
    // Generate some data on the empty cluster to create some containers
    generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
    ContainerInfo container = waitForAndReturnContainer();
    DatanodeDetails dn = getOneDNHostingReplica(getContainerReplicas(container));
    scmClient.startMaintenanceNodes(Arrays.asList(getDNHostAndPort(dn)), 0);
    waitForDnToReachPersistedOpState(dn, IN_MAINTENANCE);
    long newEndTime = System.currentTimeMillis() / 1000 + 5;
    // Update the maintenance end time via NM manually. As the current
    // decommission interface only allows us to specify hours from now as the
    // end time, that is not really suitable for a test like this.
    nm.setNodeOperationalState(dn, IN_MAINTENANCE, newEndTime);
    waitForDnToReachOpState(dn, IN_SERVICE);
    waitForDnToReachPersistedOpState(dn, IN_SERVICE);
    // Put the node back into maintenance and then stop it and wait for it to
    // go dead
    scmClient.startMaintenanceNodes(Arrays.asList(getDNHostAndPort(dn)), 0);
    waitForDnToReachPersistedOpState(dn, IN_MAINTENANCE);
    cluster.shutdownHddsDatanode(dn);
    waitForDnToReachHealthState(dn, DEAD);
    newEndTime = System.currentTimeMillis() / 1000 + 5;
    nm.setNodeOperationalState(dn, IN_MAINTENANCE, newEndTime);
    waitForDnToReachOpState(dn, IN_SERVICE);
    // Ensure there are 3 replicas not including the dead node, indicating a new
    // replica was created
    GenericTestUtils.waitFor(() -> getContainerReplicas(container).stream().filter(r -> !r.getDatanodeDetails().equals(dn)).count() == 3, 200, 30000);
}
Also used : ScmConfigKeys(org.apache.hadoop.hdds.scm.ScmConfigKeys) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Arrays(java.util.Arrays) OZONE_SCM_STALENODE_INTERVAL(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL) LoggerFactory(org.slf4j.LoggerFactory) HDDS_HEARTBEAT_INTERVAL(org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL) TimeoutException(java.util.concurrent.TimeoutException) MiniOzoneCluster(org.apache.hadoop.ozone.MiniOzoneCluster) ContainerReplica(org.apache.hadoop.hdds.scm.container.ContainerReplica) OZONE_SCM_HA_ENABLE_KEY(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY) After(org.junit.After) Duration(java.time.Duration) Assert.fail(org.junit.Assert.fail) AfterClass(org.junit.AfterClass) ENTERING_MAINTENANCE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE) HDDS_CONTAINER_REPORT_INTERVAL(org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL) OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL) Set(java.util.Set) IN_SERVICE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE) Collectors(java.util.stream.Collectors) ReplicationType(org.apache.hadoop.hdds.client.ReplicationType) List(java.util.List) ReplicationManagerConfiguration(org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration) TestCase.assertTrue(junit.framework.TestCase.assertTrue) HDDS_PIPELINE_REPORT_INTERVAL(org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL) DECOMMISSIONED(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) MiniOzoneClusterProvider(org.apache.hadoop.ozone.MiniOzoneClusterProvider) TestCase.assertEquals(junit.framework.TestCase.assertEquals) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) NodeManager(org.apache.hadoop.hdds.scm.node.NodeManager) BeforeClass(org.junit.BeforeClass) ContainerNotFoundException(org.apache.hadoop.hdds.scm.container.ContainerNotFoundException) OZONE_SCM_DEADNODE_INTERVAL(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL) ArrayList(java.util.ArrayList) DEAD(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) OzoneBucket(org.apache.hadoop.ozone.client.OzoneBucket) ContainerManager(org.apache.hadoop.hdds.scm.container.ContainerManager) OZONE_SCM_DATANODE_ADMIN_MONITOR_INTERVAL(org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_ADMIN_MONITOR_INTERVAL) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) DECOMMISSIONING(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING) TestDataUtil(org.apache.hadoop.ozone.TestDataUtil) HDDS_COMMAND_STATUS_REPORT_INTERVAL(org.apache.hadoop.hdds.HddsConfigKeys.HDDS_COMMAND_STATUS_REPORT_INTERVAL) Before(org.junit.Before) HEALTHY(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) ContainerOperationClient(org.apache.hadoop.hdds.scm.cli.ContainerOperationClient) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test) IOException(java.io.IOException) PipelineManager(org.apache.hadoop.hdds.scm.pipeline.PipelineManager) ReplicationFactor(org.apache.hadoop.hdds.client.ReplicationFactor) ContainerReplicaCount(org.apache.hadoop.hdds.scm.container.ContainerReplicaCount) HDDS_NODE_REPORT_INTERVAL(org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL) TimeUnit(java.util.concurrent.TimeUnit) IN_MAINTENANCE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE) SECONDS(java.util.concurrent.TimeUnit.SECONDS) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Set (java.util.Set)2 TimeUnit (java.util.concurrent.TimeUnit)2 TimeoutException (java.util.concurrent.TimeoutException)2 Collectors (java.util.stream.Collectors)2 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)2 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)2 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)2 DECOMMISSIONED (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED)2 DECOMMISSIONING (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING)2 IN_MAINTENANCE (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE)2 IN_SERVICE (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE)2 HEALTHY (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY)2 ReplicationManagerConfiguration (org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration)2 NodeStatus (org.apache.hadoop.hdds.scm.node.NodeStatus)2 NodeNotFoundException (org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException)2 PipelineManager (org.apache.hadoop.hdds.scm.pipeline.PipelineManager)2 GenericTestUtils (org.apache.ozone.test.GenericTestUtils)2