use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE in project ozone by apache.
the class TestReplicationManager method testOverReplicatedClosedContainerWithDecomAndMaint.
/**
* When a CLOSED container is over replicated, ReplicationManager
* deletes the excess replicas. While choosing the replica for deletion
* ReplicationManager should not attempt to remove a DECOMMISSION or
* MAINTENANCE replica.
*/
@Test
public void testOverReplicatedClosedContainerWithDecomAndMaint() throws IOException {
final ContainerInfo container = createContainer(LifeCycleState.CLOSED);
addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED);
addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED);
addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
addReplica(container, NodeStatus.inServiceHealthy(), CLOSED);
final int currentDeleteCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand);
replicationManager.processAll();
eventQueue.processAll(1000);
Assert.assertEquals(currentDeleteCommandCount + 2, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand));
Assert.assertEquals(currentDeleteCommandCount + 2, replicationManager.getMetrics().getNumDeletionCmdsSent());
Assert.assertEquals(1, replicationManager.getInflightDeletion().size());
Assert.assertEquals(1, replicationManager.getMetrics().getInflightDeletion());
// Get the DECOM and Maint replica and ensure none of them are scheduled
// for removal
Set<ContainerReplica> decom = containerStateManager.getContainerReplicas(container.containerID()).stream().filter(r -> r.getDatanodeDetails().getPersistedOpState() != IN_SERVICE).collect(Collectors.toSet());
for (ContainerReplica r : decom) {
Assert.assertFalse(datanodeCommandHandler.received(SCMCommandProto.Type.deleteContainerCommand, r.getDatanodeDetails()));
}
assertOverReplicatedCount(1);
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE in project ozone by apache.
the class TestDecommissionAndMaintenance method testMaintenanceEndsAutomaticallyAtTimeout.
@Test
public // and new replicas created.
void testMaintenanceEndsAutomaticallyAtTimeout() throws Exception {
// Generate some data on the empty cluster to create some containers
generateData(20, "key", ReplicationFactor.THREE, ReplicationType.RATIS);
ContainerInfo container = waitForAndReturnContainer();
DatanodeDetails dn = getOneDNHostingReplica(getContainerReplicas(container));
scmClient.startMaintenanceNodes(Arrays.asList(getDNHostAndPort(dn)), 0);
waitForDnToReachPersistedOpState(dn, IN_MAINTENANCE);
long newEndTime = System.currentTimeMillis() / 1000 + 5;
// Update the maintenance end time via NM manually. As the current
// decommission interface only allows us to specify hours from now as the
// end time, that is not really suitable for a test like this.
nm.setNodeOperationalState(dn, IN_MAINTENANCE, newEndTime);
waitForDnToReachOpState(dn, IN_SERVICE);
waitForDnToReachPersistedOpState(dn, IN_SERVICE);
// Put the node back into maintenance and then stop it and wait for it to
// go dead
scmClient.startMaintenanceNodes(Arrays.asList(getDNHostAndPort(dn)), 0);
waitForDnToReachPersistedOpState(dn, IN_MAINTENANCE);
cluster.shutdownHddsDatanode(dn);
waitForDnToReachHealthState(dn, DEAD);
newEndTime = System.currentTimeMillis() / 1000 + 5;
nm.setNodeOperationalState(dn, IN_MAINTENANCE, newEndTime);
waitForDnToReachOpState(dn, IN_SERVICE);
// Ensure there are 3 replicas not including the dead node, indicating a new
// replica was created
GenericTestUtils.waitFor(() -> getContainerReplicas(container).stream().filter(r -> !r.getDatanodeDetails().equals(dn)).count() == 3, 200, 30000);
}
Aggregations