Search in sources :

Example 1 with Replicate

use of org.apache.hadoop.hdds.scm.metadata.Replicate in project ozone by apache.

the class ReplicationManager method handleUnderReplicatedContainer.

/**
 * If the given container is under replicated, identify a new set of
 * datanode(s) to replicate the container using PlacementPolicy
 * and send replicate container command to the identified datanode(s).
 *
 * @param container ContainerInfo
 * @param replicaSet An instance of ContainerReplicaCount, containing the
 *                   current replica count and inflight adds and deletes
 */
private void handleUnderReplicatedContainer(final ContainerInfo container, final ContainerReplicaCount replicaSet, final ContainerPlacementStatus placementStatus) {
    LOG.debug("Handling under-replicated container: {}", container);
    Set<ContainerReplica> replicas = replicaSet.getReplica();
    try {
        if (replicaSet.isSufficientlyReplicated() && placementStatus.isPolicySatisfied()) {
            LOG.info("The container {} with replicas {} is sufficiently " + "replicated and is not mis-replicated", container.getContainerID(), replicaSet);
            return;
        }
        int repDelta = replicaSet.additionalReplicaNeeded();
        final ContainerID id = container.containerID();
        final List<DatanodeDetails> deletionInFlight = inflightDeletion.getOrDefault(id, Collections.emptyList()).stream().map(action -> action.datanode).collect(Collectors.toList());
        final List<DatanodeDetails> replicationInFlight = inflightReplication.getOrDefault(id, Collections.emptyList()).stream().map(action -> action.datanode).collect(Collectors.toList());
        final List<DatanodeDetails> source = replicas.stream().filter(r -> r.getState() == State.QUASI_CLOSED || r.getState() == State.CLOSED).filter(r -> getNodeStatus(r.getDatanodeDetails()).isHealthy()).filter(r -> !deletionInFlight.contains(r.getDatanodeDetails())).sorted((r1, r2) -> r2.getSequenceId().compareTo(r1.getSequenceId())).map(ContainerReplica::getDatanodeDetails).collect(Collectors.toList());
        if (source.size() > 0) {
            final int replicationFactor = container.getReplicationConfig().getRequiredNodes();
            // Want to check if the container is mis-replicated after considering
            // inflight add and delete.
            // Create a new list from source (healthy replicas minus pending delete)
            List<DatanodeDetails> targetReplicas = new ArrayList<>(source);
            // Then add any pending additions
            targetReplicas.addAll(replicationInFlight);
            final ContainerPlacementStatus inFlightplacementStatus = containerPlacement.validateContainerPlacement(targetReplicas, replicationFactor);
            final int misRepDelta = inFlightplacementStatus.misReplicationCount();
            final int replicasNeeded = repDelta < misRepDelta ? misRepDelta : repDelta;
            if (replicasNeeded <= 0) {
                LOG.debug("Container {} meets replication requirement with " + "inflight replicas", id);
                return;
            }
            // We should ensure that the target datanode has enough space
            // for a complete container to be created, but since the container
            // size may be changed smaller than origin, we should be defensive.
            final long dataSizeRequired = Math.max(container.getUsedBytes(), currentContainerSize);
            final List<DatanodeDetails> excludeList = replicas.stream().map(ContainerReplica::getDatanodeDetails).collect(Collectors.toList());
            excludeList.addAll(replicationInFlight);
            final List<DatanodeDetails> selectedDatanodes = containerPlacement.chooseDatanodes(excludeList, null, replicasNeeded, 0, dataSizeRequired);
            if (repDelta > 0) {
                LOG.info("Container {} is under replicated. Expected replica count" + " is {}, but found {}.", id, replicationFactor, replicationFactor - repDelta);
            }
            int newMisRepDelta = misRepDelta;
            if (misRepDelta > 0) {
                LOG.info("Container: {}. {}", id, placementStatus.misReplicatedReason());
                // Check if the new target nodes (original plus newly selected nodes)
                // makes the placement policy valid.
                targetReplicas.addAll(selectedDatanodes);
                newMisRepDelta = containerPlacement.validateContainerPlacement(targetReplicas, replicationFactor).misReplicationCount();
            }
            if (repDelta > 0 || newMisRepDelta < misRepDelta) {
                // improves things.
                for (DatanodeDetails datanode : selectedDatanodes) {
                    sendReplicateCommand(container, datanode, source);
                }
            } else {
                LOG.warn("Container {} is mis-replicated, requiring {} additional " + "replicas. After selecting new nodes, mis-replication has not " + "improved. No additional replicas will be scheduled", id, misRepDelta);
            }
        } else {
            LOG.warn("Cannot replicate container {}, no healthy replica found.", container.containerID());
        }
    } catch (IOException | IllegalStateException ex) {
        LOG.warn("Exception while replicating container {}.", container.getContainerID(), ex);
    }
}
Also used : ConfigGroup(org.apache.hadoop.hdds.conf.ConfigGroup) ScmConfigKeys(org.apache.hadoop.hdds.scm.ScmConfigKeys) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) DeleteContainerCommand(org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand) LoggerFactory(org.slf4j.LoggerFactory) ConfigurationSource(org.apache.hadoop.hdds.conf.ConfigurationSource) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) Duration(java.time.Duration) Map(java.util.Map) SCMHAManager(org.apache.hadoop.hdds.scm.ha.SCMHAManager) ReplicateContainerCommand(org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand) HddsConfigKeys(org.apache.hadoop.hdds.HddsConfigKeys) ConfigType(org.apache.hadoop.hdds.conf.ConfigType) Predicate(java.util.function.Predicate) MOVE(org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType.MOVE) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ExitUtil(org.apache.hadoop.util.ExitUtil) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) ContainerPlacementStatus(org.apache.hadoop.hdds.scm.ContainerPlacementStatus) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) List(java.util.List) StorageUnit(org.apache.hadoop.hdds.conf.StorageUnit) PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) Config(org.apache.hadoop.hdds.conf.Config) MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) SCMServiceManager(org.apache.hadoop.hdds.scm.ha.SCMServiceManager) SCMHAInvocationHandler(org.apache.hadoop.hdds.scm.ha.SCMHAInvocationHandler) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) Proxy(java.lang.reflect.Proxy) NodeManager(org.apache.hadoop.hdds.scm.node.NodeManager) HealthState(org.apache.hadoop.hdds.scm.container.ReplicationManagerReport.HealthState) CURRENT_VERSION(org.apache.hadoop.ozone.ClientVersions.CURRENT_VERSION) Preconditions(org.apache.ratis.util.Preconditions) Replicate(org.apache.hadoop.hdds.scm.metadata.Replicate) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) CompletableFuture(java.util.concurrent.CompletableFuture) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) SCMRatisServer(org.apache.hadoop.hdds.scm.ha.SCMRatisServer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) NotLeaderException(org.apache.ratis.protocol.exceptions.NotLeaderException) SCMService(org.apache.hadoop.hdds.scm.ha.SCMService) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) CloseContainerCommand(org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand) GeneratedMessage(com.google.protobuf.GeneratedMessage) LinkedList(java.util.LinkedList) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) DBTransactionBuffer(org.apache.hadoop.hdds.scm.metadata.DBTransactionBuffer) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) State(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) IOException(java.io.IOException) SCMEvents(org.apache.hadoop.hdds.scm.events.SCMEvents) NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) Lock(java.util.concurrent.locks.Lock) OZONE(org.apache.hadoop.hdds.conf.ConfigTag.OZONE) Table(org.apache.hadoop.hdds.utils.db.Table) SCM(org.apache.hadoop.hdds.conf.ConfigTag.SCM) Clock(java.time.Clock) ReplicationManagerMetrics(org.apache.hadoop.hdds.scm.container.replication.ReplicationManagerMetrics) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) TableIterator(org.apache.hadoop.hdds.utils.db.TableIterator) Collections(java.util.Collections) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerPlacementStatus(org.apache.hadoop.hdds.scm.ContainerPlacementStatus)

Example 2 with Replicate

use of org.apache.hadoop.hdds.scm.metadata.Replicate in project ozone by apache.

the class ReplicationManager method handleUnstableContainer.

/**
 * Handles unstable container.
 * A container is inconsistent if any of the replica state doesn't
 * match the container state. We have to take appropriate action
 * based on state of the replica.
 *
 * @param container ContainerInfo
 * @param replicas Set of ContainerReplicas
 */
private void handleUnstableContainer(final ContainerInfo container, final Set<ContainerReplica> replicas) {
    // Find unhealthy replicas
    List<ContainerReplica> unhealthyReplicas = replicas.stream().filter(r -> !compareState(container.getState(), r.getState())).collect(Collectors.toList());
    Iterator<ContainerReplica> iterator = unhealthyReplicas.iterator();
    while (iterator.hasNext()) {
        final ContainerReplica replica = iterator.next();
        final State state = replica.getState();
        if (state == State.OPEN || state == State.CLOSING) {
            sendCloseCommand(container, replica.getDatanodeDetails(), false);
            iterator.remove();
        }
        if (state == State.QUASI_CLOSED) {
            // Send force close command if the BCSID matches
            if (container.getSequenceId() == replica.getSequenceId()) {
                sendCloseCommand(container, replica.getDatanodeDetails(), true);
                iterator.remove();
            }
        }
    }
    // Now we are left with the replicas which are either unhealthy or
    // the BCSID doesn't match. These replicas should be deleted.
    /*
     * If we have unhealthy replicas we go under replicated and then
     * replicate the healthy copy.
     *
     * We also make sure that we delete only one unhealthy replica at a time.
     *
     * If there are two unhealthy replica:
     *  - Delete first unhealthy replica
     *  - Re-replicate the healthy copy
     *  - Delete second unhealthy replica
     *  - Re-replicate the healthy copy
     *
     * Note: Only one action will be executed in a single ReplicationMonitor
     *       iteration. So to complete all the above actions we need four
     *       ReplicationMonitor iterations.
     */
    unhealthyReplicas.stream().findFirst().ifPresent(replica -> sendDeleteCommand(container, replica.getDatanodeDetails(), false));
}
Also used : ConfigGroup(org.apache.hadoop.hdds.conf.ConfigGroup) ScmConfigKeys(org.apache.hadoop.hdds.scm.ScmConfigKeys) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) DeleteContainerCommand(org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand) LoggerFactory(org.slf4j.LoggerFactory) ConfigurationSource(org.apache.hadoop.hdds.conf.ConfigurationSource) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) Duration(java.time.Duration) Map(java.util.Map) SCMHAManager(org.apache.hadoop.hdds.scm.ha.SCMHAManager) ReplicateContainerCommand(org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand) HddsConfigKeys(org.apache.hadoop.hdds.HddsConfigKeys) ConfigType(org.apache.hadoop.hdds.conf.ConfigType) Predicate(java.util.function.Predicate) MOVE(org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType.MOVE) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ExitUtil(org.apache.hadoop.util.ExitUtil) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) ContainerPlacementStatus(org.apache.hadoop.hdds.scm.ContainerPlacementStatus) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) List(java.util.List) StorageUnit(org.apache.hadoop.hdds.conf.StorageUnit) PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) Config(org.apache.hadoop.hdds.conf.Config) MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) SCMServiceManager(org.apache.hadoop.hdds.scm.ha.SCMServiceManager) SCMHAInvocationHandler(org.apache.hadoop.hdds.scm.ha.SCMHAInvocationHandler) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) Proxy(java.lang.reflect.Proxy) NodeManager(org.apache.hadoop.hdds.scm.node.NodeManager) HealthState(org.apache.hadoop.hdds.scm.container.ReplicationManagerReport.HealthState) CURRENT_VERSION(org.apache.hadoop.ozone.ClientVersions.CURRENT_VERSION) Preconditions(org.apache.ratis.util.Preconditions) Replicate(org.apache.hadoop.hdds.scm.metadata.Replicate) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) CompletableFuture(java.util.concurrent.CompletableFuture) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) SCMRatisServer(org.apache.hadoop.hdds.scm.ha.SCMRatisServer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) NotLeaderException(org.apache.ratis.protocol.exceptions.NotLeaderException) SCMService(org.apache.hadoop.hdds.scm.ha.SCMService) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) CloseContainerCommand(org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand) GeneratedMessage(com.google.protobuf.GeneratedMessage) LinkedList(java.util.LinkedList) StorageContainerManager(org.apache.hadoop.hdds.scm.server.StorageContainerManager) DBTransactionBuffer(org.apache.hadoop.hdds.scm.metadata.DBTransactionBuffer) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) State(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) IOException(java.io.IOException) SCMEvents(org.apache.hadoop.hdds.scm.events.SCMEvents) NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) Lock(java.util.concurrent.locks.Lock) OZONE(org.apache.hadoop.hdds.conf.ConfigTag.OZONE) Table(org.apache.hadoop.hdds.utils.db.Table) SCM(org.apache.hadoop.hdds.conf.ConfigTag.SCM) Clock(java.time.Clock) ReplicationManagerMetrics(org.apache.hadoop.hdds.scm.container.replication.ReplicationManagerMetrics) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) TableIterator(org.apache.hadoop.hdds.utils.db.TableIterator) Collections(java.util.Collections) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand) HealthState(org.apache.hadoop.hdds.scm.container.ReplicationManagerReport.HealthState) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) State(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State) NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 GeneratedMessage (com.google.protobuf.GeneratedMessage)2 IOException (java.io.IOException)2 Proxy (java.lang.reflect.Proxy)2 Clock (java.time.Clock)2 Duration (java.time.Duration)2 ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2 Comparator (java.util.Comparator)2 HashSet (java.util.HashSet)2 Iterator (java.util.Iterator)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedList (java.util.LinkedList)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 UUID (java.util.UUID)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 TimeUnit (java.util.concurrent.TimeUnit)2