Search in sources :

Example 66 with ContainerReplica

use of org.apache.hadoop.hdds.scm.container.ContainerReplica in project ozone by apache.

the class TestDecommissionAndMaintenance method testSingleNodeWithOpenPipelineCanGotoMaintenance.

@Test
public // state when it re-registers.
void testSingleNodeWithOpenPipelineCanGotoMaintenance() throws Exception {
    // Generate some data on the empty cluster to create some containers
    generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
    // Locate any container and find its open pipeline
    final ContainerInfo container = waitForAndReturnContainer();
    Pipeline pipeline = pm.getPipeline(container.getPipelineID());
    assertEquals(Pipeline.PipelineState.OPEN, pipeline.getPipelineState());
    Set<ContainerReplica> replicas = getContainerReplicas(container);
    final DatanodeDetails dn = getOneDNHostingReplica(replicas);
    scmClient.startMaintenanceNodes(Arrays.asList(getDNHostAndPort(dn)), 0);
    waitForDnToReachOpState(dn, IN_MAINTENANCE);
    waitForDnToReachPersistedOpState(dn, IN_MAINTENANCE);
    // Should still be 3 replicas online as no replication should happen for
    // maintenance
    Set<ContainerReplica> newReplicas = cm.getContainerReplicas(container.containerID());
    assertEquals(3, newReplicas.size());
    // Stop the maintenance DN
    cluster.shutdownHddsDatanode(dn);
    waitForDnToReachHealthState(dn, DEAD);
    // Now the maintenance node is dead, we should still have
    // 3 replicas as we don't purge the replicas for a dead maintenance node
    newReplicas = cm.getContainerReplicas(container.containerID());
    assertEquals(3, newReplicas.size());
    // Restart the DN and it should keep the IN_MAINTENANCE state
    cluster.restartHddsDatanode(dn, true);
    DatanodeDetails newDN = nm.getNodeByUuid(dn.getUuid().toString());
    waitForDnToReachHealthState(newDN, HEALTHY);
    waitForDnToReachPersistedOpState(newDN, IN_MAINTENANCE);
    // Stop the DN and wait for it to go dead.
    int dnIndex = cluster.getHddsDatanodeIndex(dn);
    cluster.shutdownHddsDatanode(dnIndex);
    waitForDnToReachHealthState(dn, DEAD);
    // Datanode is shutdown and dead. Now recommission it in SCM
    scmClient.recommissionNodes(Arrays.asList(getDNHostAndPort(dn)));
    // Now restart it and ensure it remains IN_SERVICE
    cluster.restartHddsDatanode(dnIndex, true);
    DatanodeDetails newDn = nm.getNodeByUuid(dn.getUuid().toString());
    // As this is not an initial registration since SCM was started, the DN
    // should report its operational state and if it differs from what SCM
    // has, then the SCM state should be used and the DN state updated.
    waitForDnToReachHealthState(newDn, HEALTHY);
    waitForDnToReachOpState(newDn, IN_SERVICE);
    waitForDnToReachPersistedOpState(dn, IN_SERVICE);
}
Also used : ContainerReplica(org.apache.hadoop.hdds.scm.container.ContainerReplica) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test)

Example 67 with ContainerReplica

use of org.apache.hadoop.hdds.scm.container.ContainerReplica in project ozone by apache.

the class TestDecommissionAndMaintenance method testEnteringMaintenanceNodeCompletesAfterSCMRestart.

@Test
public // re-registers, it should continue to enter maintenance.
void testEnteringMaintenanceNodeCompletesAfterSCMRestart() throws Exception {
    // Stop Replication Manager to sure no containers are replicated
    stopReplicationManager();
    // Generate some data on the empty cluster to create some containers
    generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
    // Locate any container and find its open pipeline
    final ContainerInfo container = waitForAndReturnContainer();
    Set<ContainerReplica> replicas = getContainerReplicas(container);
    List<DatanodeDetails> forMaintenance = new ArrayList<>();
    replicas.forEach(r -> forMaintenance.add(r.getDatanodeDetails()));
    scmClient.startMaintenanceNodes(forMaintenance.stream().map(d -> getDNHostAndPort(d)).collect(Collectors.toList()), 0);
    // Ensure all 3 DNs go to entering_maintenance
    for (DatanodeDetails dn : forMaintenance) {
        waitForDnToReachPersistedOpState(dn, ENTERING_MAINTENANCE);
    }
    cluster.restartStorageContainerManager(true);
    setManagers();
    List<DatanodeDetails> newDns = new ArrayList<>();
    for (DatanodeDetails dn : forMaintenance) {
        newDns.add(nm.getNodeByUuid(dn.getUuid().toString()));
    }
    // Ensure all 3 DNs go to maintenance
    for (DatanodeDetails dn : newDns) {
        waitForDnToReachOpState(dn, IN_MAINTENANCE);
    }
    // There should now be 5-6 replicas of the container we are tracking
    Set<ContainerReplica> newReplicas = cm.getContainerReplicas(container.containerID());
    assertTrue(newReplicas.size() >= 5);
}
Also used : ContainerReplica(org.apache.hadoop.hdds.scm.container.ContainerReplica) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

ContainerReplica (org.apache.hadoop.hdds.scm.container.ContainerReplica)67 Test (org.junit.Test)53 ContainerInfo (org.apache.hadoop.hdds.scm.container.ContainerInfo)52 ContainerReplicaCount (org.apache.hadoop.hdds.scm.container.ContainerReplicaCount)36 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)15 ContainerID (org.apache.hadoop.hdds.scm.container.ContainerID)9 ContainerManager (org.apache.hadoop.hdds.scm.container.ContainerManager)7 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)6 UUID (java.util.UUID)6 MockDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails)5 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)5 IOException (java.io.IOException)4 Map (java.util.Map)4 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)4 ContainerPlacementStatusDefault (org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault)4 File (java.io.File)3 Collection (java.util.Collection)3 HashMap (java.util.HashMap)3 List (java.util.List)3