Search in sources :

Example 66 with ContainerID

use of org.apache.hadoop.hdds.scm.container.ContainerID in project ozone by apache.

the class ContainerBalancerSelectionCriteria method getCandidateContainers.

/**
 * Gets containers that are suitable for moving based on the following
 * required criteria:
 * 1. Container must not be undergoing replication.
 * 2. Container must not already be selected for balancing.
 * 3. Container size should be closer to 5GB.
 * 4. Container must not be in the configured exclude containers list.
 * 5. Container should be closed.
 *
 * @param node DatanodeDetails for which to find candidate containers.
 * @return NavigableSet of candidate containers that satisfy the criteria.
 */
public NavigableSet<ContainerID> getCandidateContainers(DatanodeDetails node) {
    NavigableSet<ContainerID> containerIDSet = new TreeSet<>(orderContainersByUsedBytes().reversed());
    try {
        containerIDSet.addAll(nodeManager.getContainers(node));
    } catch (NodeNotFoundException e) {
        LOG.warn("Could not find Datanode {} while selecting candidate " + "containers for Container Balancer.", node.toString(), e);
        return containerIDSet;
    }
    if (excludeContainers != null) {
        containerIDSet.removeAll(excludeContainers);
    }
    if (selectedContainers != null) {
        containerIDSet.removeAll(selectedContainers);
    }
    // remove not closed containers
    containerIDSet.removeIf(containerID -> {
        try {
            return containerManager.getContainer(containerID).getState() != HddsProtos.LifeCycleState.CLOSED;
        } catch (ContainerNotFoundException e) {
            LOG.warn("Could not retrieve ContainerInfo for container {} for " + "checking LifecycleState in ContainerBalancer. Excluding this " + "container.", containerID.toString(), e);
            return true;
        }
    });
    // if the utilization of the source data node becomes lower than lowerLimit
    // after the container is moved out , then the container can not be
    // a candidate one, and we should remove it from the candidateContainers.
    containerIDSet.removeIf(c -> {
        ContainerInfo cInfo;
        try {
            cInfo = containerManager.getContainer(c);
        } catch (ContainerNotFoundException e) {
            LOG.warn("Could not find container {} when " + "be matched with a move target", c);
            // remove this not found container
            return true;
        }
        return !findSourceStrategy.canSizeLeaveSource(node, cInfo.getUsedBytes());
    });
    containerIDSet.removeIf(this::isContainerReplicatingOrDeleting);
    return containerIDSet;
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) TreeSet(java.util.TreeSet) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) ContainerNotFoundException(org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)

Example 67 with ContainerID

use of org.apache.hadoop.hdds.scm.container.ContainerID in project ozone by apache.

the class ContainerBalancer method moveContainer.

/**
 * Asks {@link ReplicationManager} to move the specified container from
 * source to target.
 *
 * @param source the source datanode
 * @param moveSelection the selected container to move and target datanode
 * @return false if an exception occurred, the move completed
 * exceptionally, or the move completed with a result other than
 * ReplicationManager.MoveResult.COMPLETED. Returns true if the move
 * completed with MoveResult.COMPLETED or move is not yet done
 */
private boolean moveContainer(DatanodeDetails source, ContainerMoveSelection moveSelection) {
    ContainerID container = moveSelection.getContainerID();
    CompletableFuture<ReplicationManager.MoveResult> future;
    try {
        future = replicationManager.move(container, source, moveSelection.getTargetNode());
    } catch (ContainerNotFoundException e) {
        LOG.warn("Could not find Container {} for container move", container, e);
        return false;
    } catch (NodeNotFoundException e) {
        LOG.warn("Container move failed for container {}", container, e);
        return false;
    }
    if (future.isDone()) {
        if (future.isCompletedExceptionally()) {
            LOG.info("Container move for container {} from source {} to target {}" + "completed exceptionally", container.toString(), source.getUuidString(), moveSelection.getTargetNode().getUuidString());
            return false;
        } else {
            ReplicationManager.MoveResult result = future.join();
            moveSelectionToFutureMap.put(moveSelection, future);
            return result == ReplicationManager.MoveResult.COMPLETED;
        }
    } else {
        moveSelectionToFutureMap.put(moveSelection, future);
        return true;
    }
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) ReplicationManager(org.apache.hadoop.hdds.scm.container.ReplicationManager) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) ContainerNotFoundException(org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)

Example 68 with ContainerID

use of org.apache.hadoop.hdds.scm.container.ContainerID in project ozone by apache.

the class ContainerStateMap method intersectSets.

/**
 * Calculates the intersection between sets and returns a new set.
 *
 * @param smaller - First Set
 * @param bigger - Second Set
 * @return resultSet which is the intersection of these two sets.
 */
private NavigableSet<ContainerID> intersectSets(final NavigableSet<ContainerID> smaller, final NavigableSet<ContainerID> bigger) {
    Preconditions.checkState(smaller.size() <= bigger.size(), "This function assumes the first set is lesser or equal to second " + "set");
    final NavigableSet<ContainerID> resultSet = new TreeSet<>();
    for (ContainerID id : smaller) {
        if (bigger.contains(id)) {
            resultSet.add(id);
        }
    }
    return resultSet;
}
Also used : ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) TreeSet(java.util.TreeSet)

Example 69 with ContainerID

use of org.apache.hadoop.hdds.scm.container.ContainerID in project ozone by apache.

the class AbstractFindTargetGreedy method containerMoveSatisfiesPlacementPolicy.

/**
 * Checks if container being present in target instead of source satisfies
 * the placement policy.
 * @param containerID Container to be moved from source to target
 * @param replicas Set of replicas of the given container
 * @param source Source datanode for container move
 * @param target Target datanode for container move
 * @return true if placement policy is satisfied, otherwise false
 */
private boolean containerMoveSatisfiesPlacementPolicy(ContainerID containerID, Set<ContainerReplica> replicas, DatanodeDetails source, DatanodeDetails target) {
    ContainerInfo containerInfo;
    try {
        containerInfo = containerManager.getContainer(containerID);
    } catch (ContainerNotFoundException e) {
        logger.warn("Could not get Container {} from Container Manager while " + "checking if container move satisfies placement policy in " + "Container Balancer.", containerID.toString(), e);
        return false;
    }
    List<DatanodeDetails> replicaList = replicas.stream().map(ContainerReplica::getDatanodeDetails).filter(datanodeDetails -> !datanodeDetails.equals(source)).collect(Collectors.toList());
    replicaList.add(target);
    ContainerPlacementStatus placementStatus = placementPolicy.validateContainerPlacement(replicaList, containerInfo.getReplicationConfig().getRequiredNodes());
    return placementStatus.isPolicySatisfied();
}
Also used : NodeManager(org.apache.hadoop.hdds.scm.node.NodeManager) Logger(org.slf4j.Logger) ContainerNotFoundException(org.apache.hadoop.hdds.scm.container.ContainerNotFoundException) Collection(java.util.Collection) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) Set(java.util.Set) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) DatanodeUsageInfo(org.apache.hadoop.hdds.scm.node.DatanodeUsageInfo) HashMap(java.util.HashMap) UUID(java.util.UUID) ContainerPlacementStatus(org.apache.hadoop.hdds.scm.ContainerPlacementStatus) Collectors(java.util.stream.Collectors) PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) List(java.util.List) ContainerReplica(org.apache.hadoop.hdds.scm.container.ContainerReplica) Map(java.util.Map) ContainerManager(org.apache.hadoop.hdds.scm.container.ContainerManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) ContainerReplica(org.apache.hadoop.hdds.scm.container.ContainerReplica) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) ContainerPlacementStatus(org.apache.hadoop.hdds.scm.ContainerPlacementStatus) ContainerNotFoundException(org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)

Example 70 with ContainerID

use of org.apache.hadoop.hdds.scm.container.ContainerID in project ozone by apache.

the class TestFailoverWithSCMHA method testMoveFailover.

@Test
public void testMoveFailover() throws Exception {
    SCMClientConfig scmClientConfig = conf.getObject(SCMClientConfig.class);
    scmClientConfig.setRetryCount(1);
    scmClientConfig.setRetryInterval(100);
    scmClientConfig.setMaxRetryTimeout(1500);
    Assert.assertEquals(scmClientConfig.getRetryCount(), 15);
    conf.setFromObject(scmClientConfig);
    StorageContainerManager scm = getLeader(cluster);
    Assert.assertNotNull(scm);
    final ContainerID id = getContainer(HddsProtos.LifeCycleState.CLOSED).containerID();
    DatanodeDetails dn1 = randomDatanodeDetails();
    DatanodeDetails dn2 = randomDatanodeDetails();
    // here we just want to test whether the new leader will get the same
    // inflight move after failover, so no need to create container and datanode,
    // just mock them bypassing all the pre checks.
    scm.getReplicationManager().getMoveScheduler().startMove(id.getProtobuf(), (new MoveDataNodePair(dn1, dn2)).getProtobufMessage(CURRENT_VERSION));
    SCMBlockLocationFailoverProxyProvider failoverProxyProvider = new SCMBlockLocationFailoverProxyProvider(conf);
    failoverProxyProvider.changeCurrentProxy(scm.getSCMNodeId());
    ScmBlockLocationProtocolClientSideTranslatorPB scmBlockLocationClient = new ScmBlockLocationProtocolClientSideTranslatorPB(failoverProxyProvider);
    GenericTestUtils.setLogLevel(SCMBlockLocationFailoverProxyProvider.LOG, Level.DEBUG);
    GenericTestUtils.LogCapturer logCapture = GenericTestUtils.LogCapturer.captureLogs(SCMBlockLocationFailoverProxyProvider.LOG);
    ScmBlockLocationProtocol scmBlockLocationProtocol = TracingUtil.createProxy(scmBlockLocationClient, ScmBlockLocationProtocol.class, conf);
    scmBlockLocationProtocol.getScmInfo();
    Assert.assertTrue(logCapture.getOutput().contains("Performing failover to suggested leader"));
    scm = getLeader(cluster);
    Assert.assertNotNull(scm);
    // switch to the new leader successfully, new leader should
    // get the same inflightMove
    Map<ContainerID, MoveDataNodePair> inflightMove = scm.getReplicationManager().getMoveScheduler().getInflightMove();
    Assert.assertTrue(inflightMove.containsKey(id));
    MoveDataNodePair mp = inflightMove.get(id);
    Assert.assertTrue(dn2.equals(mp.getTgt()));
    Assert.assertTrue(dn1.equals(mp.getSrc()));
    // complete move in the new leader
    scm.getReplicationManager().getMoveScheduler().completeMove(id.getProtobuf());
    SCMContainerLocationFailoverProxyProvider proxyProvider = new SCMContainerLocationFailoverProxyProvider(conf, null);
    GenericTestUtils.setLogLevel(SCMContainerLocationFailoverProxyProvider.LOG, Level.DEBUG);
    logCapture = GenericTestUtils.LogCapturer.captureLogs(SCMContainerLocationFailoverProxyProvider.LOG);
    proxyProvider.changeCurrentProxy(scm.getSCMNodeId());
    StorageContainerLocationProtocol scmContainerClient = TracingUtil.createProxy(new StorageContainerLocationProtocolClientSideTranslatorPB(proxyProvider), StorageContainerLocationProtocol.class, conf);
    scmContainerClient.allocateContainer(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.ONE, "ozone");
    Assert.assertTrue(logCapture.getOutput().contains("Performing failover to suggested leader"));
    // switch to the new leader successfully, new leader should
    // get the same inflightMove , which should not contains
    // that container.
    scm = getLeader(cluster);
    Assert.assertNotNull(scm);
    inflightMove = scm.getReplicationManager().getMoveScheduler().getInflightMove();
    Assert.assertFalse(inflightMove.containsKey(id));
}
Also used : SCMBlockLocationFailoverProxyProvider(org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) SCMClientConfig(org.apache.hadoop.hdds.scm.proxy.SCMClientConfig) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) SCMContainerLocationFailoverProxyProvider(org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider) ScmBlockLocationProtocolClientSideTranslatorPB(org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB) ScmBlockLocationProtocol(org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol) MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) StorageContainerLocationProtocol(org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol) StorageContainerLocationProtocolClientSideTranslatorPB(org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB) Test(org.junit.jupiter.api.Test)

Aggregations

ContainerID (org.apache.hadoop.hdds.scm.container.ContainerID)70 Test (org.junit.Test)36 ContainerInfo (org.apache.hadoop.hdds.scm.container.ContainerInfo)28 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)26 IOException (java.io.IOException)17 UUID (java.util.UUID)16 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)16 ContainerWithPipeline (org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline)14 ContainerNotFoundException (org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)12 ArrayList (java.util.ArrayList)11 Map (java.util.Map)11 ContainerReplica (org.apache.hadoop.hdds.scm.container.ContainerReplica)11 HashMap (java.util.HashMap)10 List (java.util.List)9 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)9 ContainerManager (org.apache.hadoop.hdds.scm.container.ContainerManager)8 TreeSet (java.util.TreeSet)7 PipelineID (org.apache.hadoop.hdds.scm.pipeline.PipelineID)7 StorageContainerManager (org.apache.hadoop.hdds.scm.server.StorageContainerManager)7 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)6