Search in sources :

Example 1 with SetNodeOperationalStateCommand

use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.

the class HeartbeatEndpointTask method processResponse.

/**
 * Add this command to command processing Queue.
 *
 * @param response - SCMHeartbeat response.
 */
private void processResponse(SCMHeartbeatResponseProto response, final DatanodeDetailsProto datanodeDetails) {
    Preconditions.checkState(response.getDatanodeUUID().equalsIgnoreCase(datanodeDetails.getUuid()), "Unexpected datanode ID in the response.");
    // Verify the response is indeed for this datanode.
    for (SCMCommandProto commandResponseProto : response.getCommandsList()) {
        switch(commandResponseProto.getCommandType()) {
            case reregisterCommand:
                if (rpcEndpoint.getState() == EndPointStates.HEARTBEAT) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Received SCM notification to register." + " Interrupt HEARTBEAT and transit to REGISTER state.");
                    }
                    rpcEndpoint.setState(EndPointStates.REGISTER);
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Illegal state {} found, expecting {}.", rpcEndpoint.getState().name(), EndPointStates.HEARTBEAT);
                    }
                }
                break;
            case deleteBlocksCommand:
                DeleteBlocksCommand deleteBlocksCommand = DeleteBlocksCommand.getFromProtobuf(commandResponseProto.getDeleteBlocksCommandProto());
                if (commandResponseProto.hasTerm()) {
                    deleteBlocksCommand.setTerm(commandResponseProto.getTerm());
                }
                if (!deleteBlocksCommand.blocksTobeDeleted().isEmpty()) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(DeletedContainerBlocksSummary.getFrom(deleteBlocksCommand.blocksTobeDeleted()).toString());
                    }
                    this.context.addCommand(deleteBlocksCommand);
                }
                break;
            case closeContainerCommand:
                CloseContainerCommand closeContainer = CloseContainerCommand.getFromProtobuf(commandResponseProto.getCloseContainerCommandProto());
                if (commandResponseProto.hasTerm()) {
                    closeContainer.setTerm(commandResponseProto.getTerm());
                }
                if (commandResponseProto.hasEncodedToken()) {
                    closeContainer.setEncodedToken(commandResponseProto.getEncodedToken());
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Received SCM container close request for container {}", closeContainer.getContainerID());
                }
                this.context.addCommand(closeContainer);
                break;
            case replicateContainerCommand:
                ReplicateContainerCommand replicateContainerCommand = ReplicateContainerCommand.getFromProtobuf(commandResponseProto.getReplicateContainerCommandProto());
                if (commandResponseProto.hasTerm()) {
                    replicateContainerCommand.setTerm(commandResponseProto.getTerm());
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Received SCM container replicate request for container {}", replicateContainerCommand.getContainerID());
                }
                this.context.addCommand(replicateContainerCommand);
                break;
            case deleteContainerCommand:
                DeleteContainerCommand deleteContainerCommand = DeleteContainerCommand.getFromProtobuf(commandResponseProto.getDeleteContainerCommandProto());
                if (commandResponseProto.hasTerm()) {
                    deleteContainerCommand.setTerm(commandResponseProto.getTerm());
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Received SCM delete container request for container {}", deleteContainerCommand.getContainerID());
                }
                this.context.addCommand(deleteContainerCommand);
                break;
            case createPipelineCommand:
                CreatePipelineCommand createPipelineCommand = CreatePipelineCommand.getFromProtobuf(commandResponseProto.getCreatePipelineCommandProto());
                if (commandResponseProto.hasTerm()) {
                    createPipelineCommand.setTerm(commandResponseProto.getTerm());
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Received SCM create pipeline request {}", createPipelineCommand.getPipelineID());
                }
                this.context.addCommand(createPipelineCommand);
                break;
            case closePipelineCommand:
                ClosePipelineCommand closePipelineCommand = ClosePipelineCommand.getFromProtobuf(commandResponseProto.getClosePipelineCommandProto());
                if (commandResponseProto.hasTerm()) {
                    closePipelineCommand.setTerm(commandResponseProto.getTerm());
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Received SCM close pipeline request {}", closePipelineCommand.getPipelineID());
                }
                this.context.addCommand(closePipelineCommand);
                break;
            case setNodeOperationalStateCommand:
                SetNodeOperationalStateCommand setNodeOperationalStateCommand = SetNodeOperationalStateCommand.getFromProtobuf(commandResponseProto.getSetNodeOperationalStateCommandProto());
                if (commandResponseProto.hasTerm()) {
                    setNodeOperationalStateCommand.setTerm(commandResponseProto.getTerm());
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Received SCM set operational state command. State: {} " + "Expiry: {}", setNodeOperationalStateCommand.getOpState(), setNodeOperationalStateCommand.getStateExpiryEpochSeconds());
                }
                this.context.addCommand(setNodeOperationalStateCommand);
                break;
            case finalizeNewLayoutVersionCommand:
                FinalizeNewLayoutVersionCommand finalizeNewLayoutVersionCommand = FinalizeNewLayoutVersionCommand.getFromProtobuf(commandResponseProto.getFinalizeNewLayoutVersionCommandProto());
                if (commandResponseProto.hasTerm()) {
                    finalizeNewLayoutVersionCommand.setTerm(commandResponseProto.getTerm());
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Received SCM finalize command {}", finalizeNewLayoutVersionCommand.getId());
                }
                this.context.addCommand(finalizeNewLayoutVersionCommand);
                break;
            case refreshVolumeUsageInfo:
                RefreshVolumeUsageCommand refreshVolumeUsageCommand = RefreshVolumeUsageCommand.getFromProtobuf(commandResponseProto.getRefreshVolumeUsageCommandProto());
                if (commandResponseProto.hasTerm()) {
                    refreshVolumeUsageCommand.setTerm(commandResponseProto.getTerm());
                }
                this.context.addCommand(refreshVolumeUsageCommand);
                break;
            default:
                throw new IllegalArgumentException("Unknown response : " + commandResponseProto.getCommandType().name());
        }
    }
}
Also used : RefreshVolumeUsageCommand(org.apache.hadoop.ozone.protocol.commands.RefreshVolumeUsageCommand) SCMCommandProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto) ReplicateContainerCommand(org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand) ClosePipelineCommand(org.apache.hadoop.ozone.protocol.commands.ClosePipelineCommand) CloseContainerCommand(org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand) SetNodeOperationalStateCommand(org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand) DeleteBlocksCommand(org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand) CreatePipelineCommand(org.apache.hadoop.ozone.protocol.commands.CreatePipelineCommand) FinalizeNewLayoutVersionCommand(org.apache.hadoop.ozone.protocol.commands.FinalizeNewLayoutVersionCommand) DeleteContainerCommand(org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand)

Example 2 with SetNodeOperationalStateCommand

use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.

the class SetNodeOperationalStateCommandHandler method handle.

/**
 * Handles a given SCM command.
 *
 * @param command - SCM Command
 * @param container - Ozone Container.
 * @param context - Current Context.
 * @param connectionManager - The SCMs that we are talking to.
 */
@Override
public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) {
    long startTime = Time.monotonicNow();
    invocationCount.incrementAndGet();
    StorageContainerDatanodeProtocolProtos.SetNodeOperationalStateCommandProto setNodeCmdProto = null;
    if (command.getType() != Type.setNodeOperationalStateCommand) {
        LOG.warn("Skipping handling command, expected command " + "type {} but found {}", Type.setNodeOperationalStateCommand, command.getType());
        return;
    }
    SetNodeOperationalStateCommand setNodeCmd = (SetNodeOperationalStateCommand) command;
    setNodeCmdProto = setNodeCmd.getProto();
    DatanodeDetails dni = context.getParent().getDatanodeDetails();
    dni.setPersistedOpState(setNodeCmdProto.getNodeOperationalState());
    dni.setPersistedOpStateExpiryEpochSec(setNodeCmd.getStateExpiryEpochSeconds());
    try {
        persistDatanodeDetails(dni);
    } catch (IOException ioe) {
        LOG.error("Failed to persist the datanode state", ioe);
    // TODO - this should probably be raised, but it will break the command
    // handler interface.
    }
    totalTime.addAndGet(Time.monotonicNow() - startTime);
}
Also used : SetNodeOperationalStateCommand(org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) IOException(java.io.IOException) StorageContainerDatanodeProtocolProtos(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos)

Example 3 with SetNodeOperationalStateCommand

use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.

the class TestSCMNodeManager method testSetNodeOpStateAndCommandFired.

/**
 * For leader SCM, ensure that a change to the operationalState of a node
 * fires a SCMCommand of type SetNodeOperationalStateCommand.
 *
 * For follower SCM, no SetNodeOperationalStateCommand should be fired, yet
 * operationalState of the node will be updated according to the heartbeat.
 */
@Test
public void testSetNodeOpStateAndCommandFired() throws IOException, NodeNotFoundException, AuthenticationException {
    final int interval = 100;
    OzoneConfiguration conf = getConf();
    conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, interval, MILLISECONDS);
    // If factor 1 pipelines are auto created, registering the new node will
    // trigger a pipeline creation command which may interfere with command
    // checking in this test.
    conf.setBoolean(OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE, false);
    try (SCMNodeManager nodeManager = createNodeManager(conf)) {
        DatanodeDetails dn = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager);
        LayoutVersionManager versionManager = nodeManager.getLayoutVersionManager();
        final LayoutVersionProto layoutInfo = toLayoutVersionProto(versionManager.getMetadataLayoutVersion(), versionManager.getSoftwareLayoutVersion());
        long expiry = System.currentTimeMillis() / 1000 + 1000;
        nodeManager.setNodeOperationalState(dn, HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE, expiry);
        // If found mismatch, leader SCM fires a SetNodeOperationalStateCommand
        // to update the opState persisted in Datanode.
        scm.getScmContext().updateLeaderAndTerm(true, 1);
        List<SCMCommand> commands = nodeManager.processHeartbeat(dn, layoutInfo);
        Assert.assertTrue(commands.get(0).getClass().equals(SetNodeOperationalStateCommand.class));
        assertEquals(1, commands.size());
        // If found mismatch, follower SCM update its own opState according
        // to the heartbeat, and no SCMCommand will be fired.
        scm.getScmContext().updateLeaderAndTerm(false, 2);
        commands = nodeManager.processHeartbeat(dn, layoutInfo);
        assertEquals(0, commands.size());
        NodeStatus scmStatus = nodeManager.getNodeStatus(dn);
        assertTrue(scmStatus.getOperationalState() == dn.getPersistedOpState() && scmStatus.getOpStateExpiryEpochSeconds() == dn.getPersistedOpStateExpiryEpochSec());
    }
}
Also used : UpgradeUtils.toLayoutVersionProto(org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.toLayoutVersionProto) LayoutVersionProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto) SetNodeOperationalStateCommand(org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) MockDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails) MockDatanodeDetails.createDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) LayoutVersionManager(org.apache.hadoop.ozone.upgrade.LayoutVersionManager) HDDSLayoutVersionManager(org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand) Test(org.junit.Test)

Example 4 with SetNodeOperationalStateCommand

use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.

the class TestReconNodeManager method testReconNodeDB.

@Test
public void testReconNodeDB() throws IOException, NodeNotFoundException {
    ReconStorageConfig scmStorageConfig = new ReconStorageConfig(conf, new ReconUtils());
    EventQueue eventQueue = new EventQueue();
    NetworkTopology clusterMap = new NetworkTopologyImpl(conf);
    Table<UUID, DatanodeDetails> nodeTable = ReconSCMDBDefinition.NODES.getTable(store);
    ReconNodeManager reconNodeManager = new ReconNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
    ReconNewNodeHandler reconNewNodeHandler = new ReconNewNodeHandler(reconNodeManager);
    assertTrue(reconNodeManager.getAllNodes().isEmpty());
    DatanodeDetails datanodeDetails = randomDatanodeDetails();
    String uuidString = datanodeDetails.getUuidString();
    // Register a random datanode.
    reconNodeManager.register(datanodeDetails, null, null);
    reconNewNodeHandler.onMessage(reconNodeManager.getNodeByUuid(uuidString), null);
    assertEquals(1, reconNodeManager.getAllNodes().size());
    assertNotNull(reconNodeManager.getNodeByUuid(uuidString));
    // If any commands are added to the eventQueue without using the onMessage
    // interface, then they should be filtered out and not returned to the DN
    // when it heartbeats.
    // This command should never be returned by Recon
    reconNodeManager.addDatanodeCommand(datanodeDetails.getUuid(), new SetNodeOperationalStateCommand(1234, DECOMMISSIONING, 0));
    // This one should be returned
    reconNodeManager.addDatanodeCommand(datanodeDetails.getUuid(), new ReregisterCommand());
    // OperationalState sanity check
    final DatanodeDetails dnDetails = reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString());
    assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE, dnDetails.getPersistedOpState());
    assertEquals(dnDetails.getPersistedOpState(), reconNodeManager.getNodeStatus(dnDetails).getOperationalState());
    assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(), reconNodeManager.getNodeStatus(dnDetails).getOpStateExpiryEpochSeconds());
    // Upon processing the heartbeat, the illegal command should be filtered out
    List<SCMCommand> returnedCmds = reconNodeManager.processHeartbeat(datanodeDetails, defaultLayoutVersionProto());
    assertEquals(1, returnedCmds.size());
    assertEquals(SCMCommandProto.Type.reregisterCommand, returnedCmds.get(0).getType());
    // Now feed a DECOMMISSIONED heartbeat of the same DN
    datanodeDetails.setPersistedOpState(HddsProtos.NodeOperationalState.DECOMMISSIONED);
    datanodeDetails.setPersistedOpStateExpiryEpochSec(12345L);
    reconNodeManager.processHeartbeat(datanodeDetails, defaultLayoutVersionProto());
    // Check both persistedOpState and NodeStatus#operationalState
    assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED, dnDetails.getPersistedOpState());
    assertEquals(dnDetails.getPersistedOpState(), reconNodeManager.getNodeStatus(dnDetails).getOperationalState());
    assertEquals(12345L, dnDetails.getPersistedOpStateExpiryEpochSec());
    assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(), reconNodeManager.getNodeStatus(dnDetails).getOpStateExpiryEpochSeconds());
    // Close the DB, and recreate the instance of Recon Node Manager.
    eventQueue.close();
    reconNodeManager.close();
    reconNodeManager = new ReconNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
    // Verify that the node information was persisted and loaded back.
    assertEquals(1, reconNodeManager.getAllNodes().size());
    assertNotNull(reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()));
}
Also used : ReconUtils(org.apache.hadoop.ozone.recon.ReconUtils) SetNodeOperationalStateCommand(org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) NetworkTopologyImpl(org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl) ReregisterCommand(org.apache.hadoop.ozone.protocol.commands.ReregisterCommand) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) NetworkTopology(org.apache.hadoop.hdds.scm.net.NetworkTopology) UUID(java.util.UUID) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand) Test(org.junit.Test)

Example 5 with SetNodeOperationalStateCommand

use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.

the class SCMNodeManager method updateDatanodeOpState.

/**
 * This method should only be called when processing the heartbeat.
 *
 * On leader SCM, for a registered node, the information stored in SCM is
 * the source of truth. If the operational state or expiry reported in the
 * datanode heartbeat do not match those store in SCM, queue a command to
 * update the state persisted on the datanode. Additionally, ensure the
 * datanodeDetails stored in SCM match those reported in the heartbeat.
 *
 * On follower SCM, datanode notifies follower SCM its latest operational
 * state or expiry via heartbeat. If the operational state or expiry
 * reported in the datanode heartbeat do not match those stored in SCM,
 * just update the state in follower SCM accordingly.
 *
 * @param reportedDn The DatanodeDetails taken from the node heartbeat.
 * @throws NodeNotFoundException
 */
protected void updateDatanodeOpState(DatanodeDetails reportedDn) throws NodeNotFoundException {
    NodeStatus scmStatus = getNodeStatus(reportedDn);
    if (opStateDiffers(reportedDn, scmStatus)) {
        if (scmContext.isLeader()) {
            LOG.info("Scheduling a command to update the operationalState " + "persisted on {} as the reported value does not " + "match the value stored in SCM ({}, {})", reportedDn, scmStatus.getOperationalState(), scmStatus.getOpStateExpiryEpochSeconds());
            try {
                SCMCommand<?> command = new SetNodeOperationalStateCommand(Time.monotonicNow(), scmStatus.getOperationalState(), scmStatus.getOpStateExpiryEpochSeconds());
                command.setTerm(scmContext.getTermOfLeader());
                addDatanodeCommand(reportedDn.getUuid(), command);
            } catch (NotLeaderException nle) {
                LOG.warn("Skip sending SetNodeOperationalStateCommand," + " since current SCM is not leader.", nle);
                return;
            }
        } else {
            LOG.info("Update the operationalState saved in follower SCM " + "for {} as the reported value does not " + "match the value stored in SCM ({}, {})", reportedDn, scmStatus.getOperationalState(), scmStatus.getOpStateExpiryEpochSeconds());
            setNodeOperationalState(reportedDn, reportedDn.getPersistedOpState(), reportedDn.getPersistedOpStateExpiryEpochSec());
        }
    }
    DatanodeDetails scmDnd = nodeStateManager.getNode(reportedDn);
    scmDnd.setPersistedOpStateExpiryEpochSec(reportedDn.getPersistedOpStateExpiryEpochSec());
    scmDnd.setPersistedOpState(reportedDn.getPersistedOpState());
}
Also used : NotLeaderException(org.apache.ratis.protocol.exceptions.NotLeaderException) SetNodeOperationalStateCommand(org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails)

Aggregations

SetNodeOperationalStateCommand (org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand)5 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)4 MockDatanodeDetails.randomDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails)2 SCMCommand (org.apache.hadoop.ozone.protocol.commands.SCMCommand)2 Test (org.junit.Test)2 IOException (java.io.IOException)1 UUID (java.util.UUID)1 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)1 MockDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails)1 MockDatanodeDetails.createDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails.createDatanodeDetails)1 StorageContainerDatanodeProtocolProtos (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos)1 LayoutVersionProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto)1 SCMCommandProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto)1 NetworkTopology (org.apache.hadoop.hdds.scm.net.NetworkTopology)1 NetworkTopologyImpl (org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl)1 EventQueue (org.apache.hadoop.hdds.server.events.EventQueue)1 HDDSLayoutVersionManager (org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager)1 UpgradeUtils.toLayoutVersionProto (org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.toLayoutVersionProto)1 CloseContainerCommand (org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand)1 ClosePipelineCommand (org.apache.hadoop.ozone.protocol.commands.ClosePipelineCommand)1