Search in sources :

Example 1 with CommandForDatanode

use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.

the class PipelineReportHandler method processPipelineReport.

protected void processPipelineReport(PipelineReport report, DatanodeDetails dn, EventPublisher publisher) throws IOException {
    PipelineID pipelineID = PipelineID.getFromProtobuf(report.getPipelineID());
    Pipeline pipeline;
    try {
        pipeline = pipelineManager.getPipeline(pipelineID);
    } catch (PipelineNotFoundException e) {
        if (scmContext.isLeader()) {
            LOGGER.info("Reported pipeline {} is not found", pipelineID);
            SCMCommand<?> command = new ClosePipelineCommand(pipelineID);
            command.setTerm(scmContext.getTermOfLeader());
            publisher.fireEvent(SCMEvents.DATANODE_COMMAND, new CommandForDatanode<>(dn.getUuid(), command));
        }
        return;
    }
    setReportedDatanode(pipeline, dn);
    setPipelineLeaderId(report, pipeline, dn);
    if (pipeline.getPipelineState() == Pipeline.PipelineState.ALLOCATED) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Pipeline {} {} reported by {}", pipeline.getReplicationConfig(), pipeline.getId(), dn);
        }
        if (pipeline.isHealthy()) {
            pipelineManager.openPipeline(pipelineID);
        }
    }
    if (pipeline.isHealthy()) {
        if (pipelineAvailabilityCheck && scmSafeModeManager.getInSafeMode()) {
            publisher.fireEvent(SCMEvents.OPEN_PIPELINE, pipeline);
        }
    }
}
Also used : CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) ClosePipelineCommand(org.apache.hadoop.ozone.protocol.commands.ClosePipelineCommand) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand)

Example 2 with CommandForDatanode

use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.

the class PipelineActionHandler method processPipelineAction.

/**
 * Process the given PipelineAction.
 *
 * @param datanode the datanode which has sent the PipelineAction
 * @param pipelineAction the PipelineAction
 * @param publisher EventPublisher to fire new events if required
 */
private void processPipelineAction(final DatanodeDetails datanode, final PipelineAction pipelineAction, final EventPublisher publisher) {
    final ClosePipelineInfo info = pipelineAction.getClosePipeline();
    final PipelineAction.Action action = pipelineAction.getAction();
    final PipelineID pid = PipelineID.getFromProtobuf(info.getPipelineID());
    try {
        LOG.info("Received pipeline action {} for {} from datanode {}. " + "Reason : {}", action, pid, datanode.getUuidString(), info.getDetailedReason());
        if (action == PipelineAction.Action.CLOSE) {
            pipelineManager.closePipeline(pipelineManager.getPipeline(pid), false);
        } else {
            LOG.error("unknown pipeline action:{}", action);
        }
    } catch (PipelineNotFoundException e) {
        LOG.warn("Pipeline action {} received for unknown pipeline {}, " + "firing close pipeline event.", action, pid);
        SCMCommand<?> command = new ClosePipelineCommand(pid);
        try {
            command.setTerm(scmContext.getTermOfLeader());
        } catch (NotLeaderException nle) {
            LOG.warn("Skip sending ClosePipelineCommand for pipeline {}," + " since not leader SCM.", pid);
            return;
        }
        publisher.fireEvent(SCMEvents.DATANODE_COMMAND, new CommandForDatanode<>(datanode.getUuid(), command));
    } catch (IOException ioe) {
        LOG.error("Could not execute pipeline action={} pipeline={}", action, pid, ioe);
    }
}
Also used : CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) NotLeaderException(org.apache.ratis.protocol.exceptions.NotLeaderException) ClosePipelineInfo(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ClosePipelineInfo) ClosePipelineCommand(org.apache.hadoop.ozone.protocol.commands.ClosePipelineCommand) IOException(java.io.IOException) PipelineAction(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineAction) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand)

Example 3 with CommandForDatanode

use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.

the class TestSCMNodeManager method testProcessLayoutVersionLowerMlv.

@Test
public void testProcessLayoutVersionLowerMlv() throws IOException {
    OzoneConfiguration conf = new OzoneConfiguration();
    SCMStorageConfig scmStorageConfig = mock(SCMStorageConfig.class);
    when(scmStorageConfig.getClusterID()).thenReturn("xyz111");
    EventPublisher eventPublisher = mock(EventPublisher.class);
    HDDSLayoutVersionManager lvm = new HDDSLayoutVersionManager(scmStorageConfig.getLayoutVersion());
    SCMNodeManager nodeManager = new SCMNodeManager(conf, scmStorageConfig, eventPublisher, new NetworkTopologyImpl(conf), SCMContext.emptyContext(), lvm);
    DatanodeDetails node1 = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager);
    verify(eventPublisher, times(1)).fireEvent(NEW_NODE, node1);
    int scmMlv = nodeManager.getLayoutVersionManager().getMetadataLayoutVersion();
    nodeManager.processLayoutVersionReport(node1, LayoutVersionProto.newBuilder().setMetadataLayoutVersion(scmMlv - 1).setSoftwareLayoutVersion(scmMlv).build());
    ArgumentCaptor<CommandForDatanode> captor = ArgumentCaptor.forClass(CommandForDatanode.class);
    verify(eventPublisher, times(1)).fireEvent(Mockito.eq(DATANODE_COMMAND), captor.capture());
    assertTrue(captor.getValue().getDatanodeId().equals(node1.getUuid()));
    assertTrue(captor.getValue().getCommand().getType().equals(finalizeNewLayoutVersionCommand));
}
Also used : NetworkTopologyImpl(org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) SCMStorageConfig(org.apache.hadoop.hdds.scm.server.SCMStorageConfig) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) MockDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) HDDSLayoutVersionManager(org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager) Test(org.junit.Test)

Example 4 with CommandForDatanode

use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.

the class TestReplicationManager method testUnderReplicatedQuasiClosedContainerWithUnhealthyReplica.

/**
 * When a QUASI_CLOSED container is under replicated, ReplicationManager
 * should re-replicate it. If there are any unhealthy replica, it has to
 * be deleted.
 *
 * In this test case, the container is QUASI_CLOSED and is under replicated
 * and also has an unhealthy replica.
 *
 * In the first iteration of ReplicationManager, it should re-replicate
 * the container so that it has enough replicas.
 *
 * In the second iteration, ReplicationManager should delete the unhealthy
 * replica.
 *
 * In the third iteration, ReplicationManager will re-replicate as the
 * container has again become under replicated after the unhealthy
 * replica has been deleted.
 */
@Test
public void testUnderReplicatedQuasiClosedContainerWithUnhealthyReplica() throws IOException, InterruptedException, TimeoutException {
    final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED);
    container.setUsedBytes(99);
    final ContainerID id = container.containerID();
    final UUID originNodeId = UUID.randomUUID();
    final ContainerReplica replicaOne = getReplicas(id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails());
    final ContainerReplica replicaTwo = getReplicas(id, State.UNHEALTHY, 1000L, originNodeId, randomDatanodeDetails());
    containerStateManager.addContainer(container.getProtobuf());
    containerStateManager.updateContainerReplica(id, replicaOne);
    containerStateManager.updateContainerReplica(id, replicaTwo);
    final int currentReplicateCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand);
    final int currentDeleteCommandCount = datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand);
    final long currentBytesToDelete = replicationManager.getMetrics().getNumDeletionBytesTotal();
    replicationManager.processAll();
    GenericTestUtils.waitFor(() -> (currentReplicateCommandCount + 1) == datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand), 50, 5000);
    Optional<CommandForDatanode> replicateCommand = datanodeCommandHandler.getReceivedCommands().stream().filter(c -> c.getCommand().getType().equals(SCMCommandProto.Type.replicateContainerCommand)).findFirst();
    Assert.assertTrue(replicateCommand.isPresent());
    DatanodeDetails newNode = createDatanodeDetails(replicateCommand.get().getDatanodeId());
    ContainerReplica newReplica = getReplicas(id, State.QUASI_CLOSED, 1000L, originNodeId, newNode);
    containerStateManager.updateContainerReplica(id, newReplica);
    ReplicationManagerReport report = replicationManager.getContainerReport();
    Assert.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED));
    Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
    Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED));
    Assert.assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNHEALTHY));
    /*
     * We have report the replica to SCM, in the next ReplicationManager
     * iteration it should delete the unhealthy replica.
     */
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertEquals(currentDeleteCommandCount + 1, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.deleteContainerCommand));
    // ReplicaTwo should be deleted, that is the unhealthy one
    Assert.assertTrue(datanodeCommandHandler.received(SCMCommandProto.Type.deleteContainerCommand, replicaTwo.getDatanodeDetails()));
    Assert.assertEquals(currentDeleteCommandCount + 1, replicationManager.getMetrics().getNumDeletionCmdsSent());
    Assert.assertEquals(currentBytesToDelete + 99, replicationManager.getMetrics().getNumDeletionBytesTotal());
    Assert.assertEquals(1, replicationManager.getInflightDeletion().size());
    Assert.assertEquals(1, replicationManager.getMetrics().getInflightDeletion());
    containerStateManager.removeContainerReplica(id, replicaTwo);
    final long currentDeleteCommandCompleted = replicationManager.getMetrics().getNumDeletionCmdsCompleted();
    report = replicationManager.getContainerReport();
    Assert.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED));
    Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
    Assert.assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED));
    Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.UNHEALTHY));
    /*
     * We have now removed unhealthy replica, next iteration of
     * ReplicationManager should re-replicate the container as it
     * is under replicated now
     */
    replicationManager.processAll();
    eventQueue.processAll(1000);
    Assert.assertEquals(0, replicationManager.getInflightDeletion().size());
    Assert.assertEquals(0, replicationManager.getMetrics().getInflightDeletion());
    Assert.assertEquals(currentDeleteCommandCompleted + 1, replicationManager.getMetrics().getNumDeletionCmdsCompleted());
    Assert.assertEquals(currentReplicateCommandCount + 2, datanodeCommandHandler.getInvocationCount(SCMCommandProto.Type.replicateContainerCommand));
    Assert.assertEquals(currentReplicateCommandCount + 2, replicationManager.getMetrics().getNumReplicationCmdsSent());
    Assert.assertEquals(1, replicationManager.getInflightReplication().size());
    Assert.assertEquals(1, replicationManager.getMetrics().getInflightReplication());
    report = replicationManager.getContainerReport();
    Assert.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED));
    Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
    Assert.assertEquals(1, report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED));
    Assert.assertEquals(0, report.getStat(ReplicationManagerReport.HealthState.UNHEALTHY));
}
Also used : HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) TimeoutException(java.util.concurrent.TimeoutException) STALE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE) DBStore(org.apache.hadoop.hdds.utils.db.DBStore) SCMCommandProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto) HddsTestUtils.getContainer(org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) After(org.junit.After) Map(java.util.Map) SCMHAManager(org.apache.hadoop.hdds.scm.ha.SCMHAManager) HddsConfigKeys(org.apache.hadoop.hdds.HddsConfigKeys) ContainerPlacementStatusDefault(org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault) FileUtil(org.apache.hadoop.fs.FileUtil) DBStoreBuilder(org.apache.hadoop.hdds.utils.db.DBStoreBuilder) Longs(com.google.common.primitives.Longs) Set(java.util.Set) UUID(java.util.UUID) IN_SERVICE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE) Instant(java.time.Instant) HddsTestUtils.getReplicas(org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas) Collectors(java.util.stream.Collectors) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) ZoneId(java.time.ZoneId) PlacementPolicy(org.apache.hadoop.hdds.scm.PlacementPolicy) List(java.util.List) MoveDataNodePair(org.apache.hadoop.hdds.scm.container.common.helpers.MoveDataNodePair) ReplicationManagerConfiguration(org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration) SCMServiceManager(org.apache.hadoop.hdds.scm.ha.SCMServiceManager) Optional(java.util.Optional) DECOMMISSIONED(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) IntStream(java.util.stream.IntStream) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) MoveResult(org.apache.hadoop.hdds.scm.container.ReplicationManager.MoveResult) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) MockSCMHAManager(org.apache.hadoop.hdds.scm.ha.MockSCMHAManager) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DECOMMISSIONING(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING) Before(org.junit.Before) LifeCycleEvent(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent) HEALTHY(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY) State(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) FileUtils(org.apache.commons.io.FileUtils) EventHandler(org.apache.hadoop.hdds.server.events.EventHandler) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) SCMEvents(org.apache.hadoop.hdds.scm.events.SCMEvents) PipelineManager(org.apache.hadoop.hdds.scm.pipeline.PipelineManager) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) IN_MAINTENANCE(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE) Mockito(org.mockito.Mockito) SCMDBDefinition(org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition) CLOSED(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED) SCMDBTransactionBufferImpl(org.apache.hadoop.hdds.scm.metadata.SCMDBTransactionBufferImpl) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) Assert(org.junit.Assert) TestClock(org.apache.ozone.test.TestClock) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) UUID(java.util.UUID) Test(org.junit.Test)

Example 5 with CommandForDatanode

use of org.apache.hadoop.ozone.protocol.commands.CommandForDatanode in project ozone by apache.

the class RatisPipelineProvider method close.

/**
 * Removes pipeline from SCM. Sends command to destroy pipeline on all
 * the datanodes.
 *
 * @param pipeline            - Pipeline to be destroyed
 * @throws NotLeaderException - Send datanode command while not leader
 */
@Override
public void close(Pipeline pipeline) throws NotLeaderException {
    final ClosePipelineCommand closeCommand = new ClosePipelineCommand(pipeline.getId());
    closeCommand.setTerm(scmContext.getTermOfLeader());
    pipeline.getNodes().forEach(node -> {
        final CommandForDatanode<?> datanodeCommand = new CommandForDatanode<>(node.getUuid(), closeCommand);
        LOG.info("Send pipeline:{} close command to datanode {}", pipeline.getId(), datanodeCommand.getDatanodeId());
        eventPublisher.fireEvent(SCMEvents.DATANODE_COMMAND, datanodeCommand);
    });
}
Also used : CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) ClosePipelineCommand(org.apache.hadoop.ozone.protocol.commands.ClosePipelineCommand)

Aggregations

CommandForDatanode (org.apache.hadoop.ozone.protocol.commands.CommandForDatanode)6 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)3 EventPublisher (org.apache.hadoop.hdds.server.events.EventPublisher)3 ClosePipelineCommand (org.apache.hadoop.ozone.protocol.commands.ClosePipelineCommand)3 Test (org.junit.Test)3 IOException (java.io.IOException)2 UUID (java.util.UUID)2 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)2 MockDatanodeDetails.createDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails)2 MockDatanodeDetails.randomDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails)2 SCMCommand (org.apache.hadoop.ozone.protocol.commands.SCMCommand)2 Longs (com.google.common.primitives.Longs)1 File (java.io.File)1 Field (java.lang.reflect.Field)1 Instant (java.time.Instant)1 ZoneId (java.time.ZoneId)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1