Search in sources :

Example 1 with ContainerPlacementStatusDefault

use of org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault in project ozone by apache.

the class SCMCommonPlacementPolicy method validateContainerPlacement.

/**
 * This default implementation handles rack aware policies and non rack
 * aware policies. If a future placement policy needs to check more than racks
 * to validate the policy (eg node groups, HDFS like upgrade domain) this
 * method should be overridden in the sub class.
 * This method requires that subclasses which implement rack aware policies
 * override the default method getRequiredRackCount and getNetworkTopology.
 * @param dns List of datanodes holding a replica of the container
 * @param replicas The expected number of replicas
 * @return ContainerPlacementStatus indicating if the placement policy is
 *         met or not. Not this only considers the rack count and not the
 *         number of replicas.
 */
@Override
public ContainerPlacementStatus validateContainerPlacement(List<DatanodeDetails> dns, int replicas) {
    NetworkTopology topology = nodeManager.getClusterNetworkTopologyMap();
    int requiredRacks = getRequiredRackCount(replicas);
    if (topology == null || replicas == 1 || requiredRacks == 1) {
        if (dns.size() > 0) {
            // placement is always satisfied if there is at least one DN.
            return validPlacement;
        } else {
            return invalidPlacement;
        }
    }
    // We have a network topology so calculate if it is satisfied or not.
    int numRacks = 1;
    final int maxLevel = topology.getMaxLevel();
    // The leaf nodes are all at max level, so the number of nodes at
    // leafLevel - 1 is the rack count
    numRacks = topology.getNumOfNodes(maxLevel - 1);
    final long currentRackCount = dns.stream().map(d -> topology.getAncestor(d, 1)).distinct().count();
    if (replicas < requiredRacks) {
        requiredRacks = replicas;
    }
    return new ContainerPlacementStatusDefault((int) currentRackCount, requiredRacks, numRacks);
}
Also used : NetworkTopology(org.apache.hadoop.hdds.scm.net.NetworkTopology) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault)

Example 2 with ContainerPlacementStatusDefault

use of org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault in project ozone by apache.

the class TestContainerHealthTaskRecordGenerator method testCorrectRecordsGenerated.

@Test
public void testCorrectRecordsGenerated() {
    Set<ContainerReplica> replicas = generateReplicas(container, CLOSED, CLOSED, CLOSED);
    // HEALTHY container - no records generated.
    ContainerHealthStatus status = new ContainerHealthStatus(container, replicas, placementPolicy);
    List<UnhealthyContainers> records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords(status, (long) 1234567);
    assertEquals(0, records.size());
    // Over-replicated - expect 1 over replicated record
    replicas = generateReplicas(container, CLOSED, CLOSED, CLOSED, CLOSED, CLOSED);
    status = new ContainerHealthStatus(container, replicas, placementPolicy);
    records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords(status, (long) 1234567);
    assertEquals(1, records.size());
    UnhealthyContainers rec = records.get(0);
    assertEquals(UnHealthyContainerStates.OVER_REPLICATED.toString(), rec.getContainerState());
    assertEquals(3, rec.getExpectedReplicaCount().intValue());
    assertEquals(5, rec.getActualReplicaCount().intValue());
    assertEquals(-2, rec.getReplicaDelta().intValue());
    // Under and Mis Replicated - expect 2 records - mis and under replicated
    replicas = generateReplicas(container, CLOSED, CLOSED);
    when(placementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenReturn(new ContainerPlacementStatusDefault(1, 2, 5));
    status = new ContainerHealthStatus(container, replicas, placementPolicy);
    records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords(status, (long) 1234567);
    assertEquals(2, records.size());
    rec = findRecordForState(records, UnHealthyContainerStates.MIS_REPLICATED);
    assertEquals(UnHealthyContainerStates.MIS_REPLICATED.toString(), rec.getContainerState());
    assertEquals(2, rec.getExpectedReplicaCount().intValue());
    assertEquals(1, rec.getActualReplicaCount().intValue());
    assertEquals(1, rec.getReplicaDelta().intValue());
    assertNotNull(rec.getReason());
    rec = findRecordForState(records, UnHealthyContainerStates.UNDER_REPLICATED);
    assertEquals(UnHealthyContainerStates.UNDER_REPLICATED.toString(), rec.getContainerState());
    assertEquals(3, rec.getExpectedReplicaCount().intValue());
    assertEquals(2, rec.getActualReplicaCount().intValue());
    assertEquals(1, rec.getReplicaDelta().intValue());
    // Missing Record - expect just a single missing record even though
    // it is mis-replicated too
    replicas.clear();
    when(placementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenReturn(new ContainerPlacementStatusDefault(1, 2, 5));
    status = new ContainerHealthStatus(container, replicas, placementPolicy);
    records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords(status, (long) 1234567);
    assertEquals(1, records.size());
    rec = records.get(0);
    assertEquals(UnHealthyContainerStates.MISSING.toString(), rec.getContainerState());
    assertEquals(3, rec.getExpectedReplicaCount().intValue());
    assertEquals(0, rec.getActualReplicaCount().intValue());
    assertEquals(3, rec.getReplicaDelta().intValue());
}
Also used : ContainerReplica(org.apache.hadoop.hdds.scm.container.ContainerReplica) UnhealthyContainers(org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) Test(org.junit.Test)

Example 3 with ContainerPlacementStatusDefault

use of org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault in project ozone by apache.

the class TestContainerHealthTaskRecordGenerator method testMisReplicatedRecordRetainedAndUpdated.

@Test
public void testMisReplicatedRecordRetainedAndUpdated() {
    // under replicated container
    Set<ContainerReplica> replicas = generateReplicas(container, CLOSED, CLOSED, CLOSED);
    when(placementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenReturn(new ContainerPlacementStatusDefault(2, 3, 5));
    ContainerHealthStatus status = new ContainerHealthStatus(container, replicas, placementPolicy);
    UnhealthyContainersRecord rec = misReplicatedRecord();
    assertTrue(ContainerHealthTask.ContainerHealthRecords.retainOrUpdateRecord(status, rec));
    // The record actual count should be updated from 1 -> 2
    assertEquals(2, rec.getActualReplicaCount().intValue());
    assertEquals(1, rec.getReplicaDelta().intValue());
    assertNotNull(rec.getReason());
    // Missing / Over / Mis replicated should not be retained
    assertFalse(ContainerHealthTask.ContainerHealthRecords.retainOrUpdateRecord(status, missingRecord()));
    assertFalse(ContainerHealthTask.ContainerHealthRecords.retainOrUpdateRecord(status, underReplicatedRecord()));
    assertFalse(ContainerHealthTask.ContainerHealthRecords.retainOrUpdateRecord(status, overReplicatedRecord()));
    // Container is now placed OK - should be removed.
    when(placementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenReturn(new ContainerPlacementStatusDefault(3, 3, 5));
    status = new ContainerHealthStatus(container, replicas, placementPolicy);
    assertFalse(ContainerHealthTask.ContainerHealthRecords.retainOrUpdateRecord(status, rec));
}
Also used : ContainerReplica(org.apache.hadoop.hdds.scm.container.ContainerReplica) UnhealthyContainersRecord(org.hadoop.ozone.recon.schema.tables.records.UnhealthyContainersRecord) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) Test(org.junit.Test)

Example 4 with ContainerPlacementStatusDefault

use of org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault in project ozone by apache.

the class TestContainerHealthTaskRecordGenerator method setup.

@Before
public void setup() {
    placementPolicy = mock(PlacementPolicy.class);
    container = mock(ContainerInfo.class);
    when(container.getReplicationConfig()).thenReturn(RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE));
    when(container.containerID()).thenReturn(ContainerID.valueOf(123456));
    when(container.getContainerID()).thenReturn((long) 123456);
    when(placementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenReturn(new ContainerPlacementStatusDefault(1, 1, 1));
}
Also used : PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) Before(org.junit.Before)

Example 5 with ContainerPlacementStatusDefault

use of org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault in project ozone by apache.

the class TestReplicationManager method additionalReplicaScheduledWhenMisReplicated.

@Test
public void additionalReplicaScheduledWhenMisReplicated() throws IOException {
    final ContainerInfo container = getContainer(LifeCycleState.CLOSED);
    container.setUsedBytes(100);
    final ContainerID id = container.containerID();
    final UUID originNodeId = UUID.randomUUID();
    final ContainerReplica replicaOne = getReplicas(id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails());
    final ContainerReplica replicaTwo = getReplicas(id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails());
    final ContainerReplica replicaThree = getReplicas(id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails());
    containerStateManager.addContainer(container.getProtobuf());
    containerStateManager.updateContainerReplica(id, replicaOne);
    containerStateManager.updateContainerReplica(id, replicaTwo);
    containerStateManager.updateContainerReplica(id, replicaThree);
    // Ensure a mis-replicated status is returned for any containers in this
    // test where there are 3 replicas. When there are 2 or 4 replicas
    // the status returned will be healthy.
    Mockito.when(containerPlacementPolicy.validateContainerPlacement(Mockito.argThat(list -> list.size() == 3), Mockito.anyInt())).thenAnswer(invocation -> {
        return new ContainerPlacementStatusDefault(1, 2, 3);
    });
    int currentReplicateCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand);
    final long currentBytesToReplicate = replicationManager.getMetrics().getNumReplicationBytesTotal();
    replicationManager.processAll();
    eventQueue.processAll(1000);
    // At this stage, due to the mocked calls to validateContainerPlacement
    // the policy will not be satisfied, and replication will be triggered.
    Assert.assertEquals(currentReplicateCommandCount + 1, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand));
    Assert.assertEquals(currentReplicateCommandCount + 1, replicationManager.getMetrics().getNumReplicationCmdsSent());
    Assert.assertEquals(currentBytesToReplicate + 100, replicationManager.getMetrics().getNumReplicationBytesTotal());
    Assert.assertEquals(1, replicationManager.getInflightReplication().size());
    Assert.assertEquals(1, replicationManager.getMetrics().getInflightReplication());
    ReplicationManagerReport report = replicationManager.getContainerReport();
    Assert.assertEquals(1, report.getStat(LifeCycleState.CLOSED));
    Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.MIS_REPLICATED));
    // Now make it so that all containers seem mis-replicated no matter how
    // many replicas. This will test replicas are not scheduled if the new
    // replica does not fix the mis-replication.
    Mockito.when(containerPlacementPolicy.validateContainerPlacement(Mockito.anyList(), Mockito.anyInt())).thenAnswer(invocation -> {
        return new ContainerPlacementStatusDefault(1, 2, 3);
    });
    currentReplicateCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand);
    replicationManager.processAll();
    eventQueue.processAll(1000);
    // At this stage, due to the mocked calls to validateContainerPlacement
    // the mis-replicated racks will not have improved, so expect to see nothing
    // scheduled.
    Assert.assertEquals(currentReplicateCommandCount, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand));
    Assert.assertEquals(currentReplicateCommandCount, replicationManager.getMetrics().getNumReplicationCmdsSent());
    Assert.assertEquals(1, replicationManager.getInflightReplication().size());
    Assert.assertEquals(1, replicationManager.getMetrics().getInflightReplication());
}
Also used : HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) TimeoutException(java.util.concurrent.TimeoutException) STALE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE) DBStore(org.apache.hadoop.hdds.utils.db.DBStore) SCMCommandProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto) HddsTestUtils.getContainer(org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) After(org.junit.After) Map(java.util.Map) SCMHAManager(org.apache.hadoop.hdds.scm.ha.SCMHAManager) HddsConfigKeys(org.apache.hadoop.hdds.HddsConfigKeys) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) FileUtil(org.apache.hadoop.fs.FileUtil) DBStoreBuilder(org.apache.hadoop.hdds.utils.db.DBStoreBuilder) Longs(com.google.common.primitives.Longs) Set(java.util.Set) UUID(java.util.UUID) IN_SERVICE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE) Instant(java.time.Instant) HddsTestUtils.getReplicas(org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas) Collectors(java.util.stream.Collectors) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) ZoneId(java.time.ZoneId) PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) List(java.util.List) MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) ReplicationManagerConfiguration(org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration) SCMServiceManager(org.apache.hadoop.hdds.scm.ha.SCMServiceManager) Optional(java.util.Optional) DECOMMISSIONED(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) IntStream(java.util.stream.IntStream) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) MockSCMHAManager(org.apache.hadoop.hdds.scm.ha.MockSCMHAManager) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DECOMMISSIONING(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING) Before(org.junit.Before) LifeCycleEvent(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent) HEALTHY(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY) State(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) FileUtils(org.apache.commons.io.FileUtils) EventHandler(org.apache.hadoop.hdds.server.events.EventHandler) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) SCMEvents(org.apache.hadoop.hdds.scm.events.SCMEvents) PipelineManager(org.apache.hadoop.hdds.scm.pipeline.PipelineManager) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) IN_MAINTENANCE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE) Mockito(org.mockito.Mockito) SCMDBDefinition(org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition) CLOSED(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED) SCMDBTransactionBufferImpl(org.apache.hadoop.hdds.scm.metadata.SCMDBTransactionBufferImpl) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) Assert(org.junit.Assert) TestClock(org.apache.ozone.test.TestClock) UUID(java.util.UUID) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) Test(org.junit.Test)

Aggregations

ContainerPlacementStatusDefault (org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault)12 Test (org.junit.Test)9 PlacementPolicy (org.apache.hadoop.hdds.scm.PlacementPolicy)7 Before (org.junit.Before)7 Longs (com.google.common.primitives.Longs)5 File (java.io.File)5 IOException (java.io.IOException)5 Instant (java.time.Instant)5 ZoneId (java.time.ZoneId)5 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 List (java.util.List)5 Map (java.util.Map)5 Optional (java.util.Optional)5 Set (java.util.Set)5 UUID (java.util.UUID)5 CompletableFuture (java.util.concurrent.CompletableFuture)5 ExecutionException (java.util.concurrent.ExecutionException)5 TimeUnit (java.util.concurrent.TimeUnit)5 TimeoutException (java.util.concurrent.TimeoutException)5