Search in sources :

Example 1 with ReregisterCommand

use of org.apache.hadoop.ozone.protocol.commands.ReregisterCommand in project ozone by apache.

the class ReconNodeManager method processHeartbeat.

/**
 * Send heartbeat to indicate the datanode is alive and doing well.
 *
 * @param datanodeDetails - DatanodeDetailsProto.
 * @param layoutInfo - Layout Version Proto
 * @return SCMheartbeat response.
 */
@Override
public List<SCMCommand> processHeartbeat(DatanodeDetails datanodeDetails, LayoutVersionProto layoutInfo) {
    List<SCMCommand> cmds = new ArrayList<>();
    long currentTime = Time.now();
    if (needUpdate(datanodeDetails, currentTime)) {
        cmds.add(new ReregisterCommand());
        LOG.info("Sending ReregisterCommand() for " + datanodeDetails.getHostName());
        datanodeHeartbeatMap.put(datanodeDetails.getUuid(), Time.now());
        return cmds;
    }
    // Update heartbeat map with current time
    datanodeHeartbeatMap.put(datanodeDetails.getUuid(), Time.now());
    cmds.addAll(super.processHeartbeat(datanodeDetails, layoutInfo));
    return cmds.stream().filter(c -> ALLOWED_COMMANDS.contains(c.getType())).collect(toList());
}
Also used : OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) Type(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) SCMStorageConfig(org.apache.hadoop.hdds.scm.server.SCMStorageConfig) NodeReportProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) SCMContext(org.apache.hadoop.hdds.scm.ha.SCMContext) ArrayList(java.util.ArrayList) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) EventPublisher(org.apache.hadoop.hdds.server.events.EventPublisher) Map(java.util.Map) PipelineReportsProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto) NetworkTopology(org.apache.hadoop.hdds.scm.net.NetworkTopology) ImmutableSet(com.google.common.collect.ImmutableSet) Logger(org.slf4j.Logger) LayoutVersionProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto) Set(java.util.Set) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) RegisteredCommand(org.apache.hadoop.ozone.protocol.commands.RegisteredCommand) IOException(java.io.IOException) UUID(java.util.UUID) CommandForDatanode(org.apache.hadoop.ozone.protocol.commands.CommandForDatanode) ReregisterCommand(org.apache.hadoop.ozone.protocol.commands.ReregisterCommand) Type.reregisterCommand(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type.reregisterCommand) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) VersionResponse(org.apache.hadoop.ozone.protocol.VersionResponse) Table(org.apache.hadoop.hdds.utils.db.Table) HddsServerUtil(org.apache.hadoop.hdds.utils.HddsServerUtil) SCMNodeManager(org.apache.hadoop.hdds.scm.node.SCMNodeManager) HDDSLayoutVersionManager(org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager) Time(org.apache.hadoop.util.Time) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TableIterator(org.apache.hadoop.hdds.utils.db.TableIterator) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand) SCMVersionRequestProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto) ReregisterCommand(org.apache.hadoop.ozone.protocol.commands.ReregisterCommand) ArrayList(java.util.ArrayList) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand)

Example 2 with ReregisterCommand

use of org.apache.hadoop.ozone.protocol.commands.ReregisterCommand in project ozone by apache.

the class SCMDatanodeHeartbeatDispatcher method dispatch.

/**
 * Dispatches heartbeat to registered event handlers.
 *
 * @param heartbeat heartbeat to be dispatched.
 *
 * @return list of SCMCommand
 */
public List<SCMCommand> dispatch(SCMHeartbeatRequestProto heartbeat) {
    DatanodeDetails datanodeDetails = DatanodeDetails.getFromProtoBuf(heartbeat.getDatanodeDetails());
    List<SCMCommand> commands;
    // Heartbeat for unregistered nodes.
    if (!nodeManager.isNodeRegistered(datanodeDetails)) {
        LOG.info("SCM received heartbeat from an unregistered datanode {}. " + "Asking datanode to re-register.", datanodeDetails);
        UUID dnID = datanodeDetails.getUuid();
        nodeManager.addDatanodeCommand(dnID, new ReregisterCommand());
        commands = nodeManager.getCommandQueue(dnID);
    } else {
        LayoutVersionProto layoutVersion = null;
        if (!heartbeat.hasDataNodeLayoutVersion()) {
            // Backward compatibility to make sure old Datanodes can still talk to
            // SCM.
            layoutVersion = toLayoutVersionProto(INITIAL_VERSION.layoutVersion(), INITIAL_VERSION.layoutVersion());
        } else {
            layoutVersion = heartbeat.getDataNodeLayoutVersion();
        }
        LOG.debug("Processing DataNode Layout Report.");
        nodeManager.processLayoutVersionReport(datanodeDetails, layoutVersion);
        // should we dispatch heartbeat through eventPublisher?
        commands = nodeManager.processHeartbeat(datanodeDetails, layoutVersion);
        if (heartbeat.hasNodeReport()) {
            LOG.debug("Dispatching Node Report.");
            eventPublisher.fireEvent(NODE_REPORT, new NodeReportFromDatanode(datanodeDetails, heartbeat.getNodeReport()));
        }
        if (heartbeat.hasContainerReport()) {
            LOG.debug("Dispatching Container Report.");
            eventPublisher.fireEvent(CONTAINER_REPORT, new ContainerReportFromDatanode(datanodeDetails, heartbeat.getContainerReport()));
        }
        final List<IncrementalContainerReportProto> icrs = heartbeat.getIncrementalContainerReportList();
        if (icrs.size() > 0) {
            LOG.debug("Dispatching ICRs.");
            for (IncrementalContainerReportProto icr : icrs) {
                eventPublisher.fireEvent(INCREMENTAL_CONTAINER_REPORT, new IncrementalContainerReportFromDatanode(datanodeDetails, icr));
            }
        }
        if (heartbeat.hasContainerActions()) {
            LOG.debug("Dispatching Container Actions.");
            eventPublisher.fireEvent(CONTAINER_ACTIONS, new ContainerActionsFromDatanode(datanodeDetails, heartbeat.getContainerActions()));
        }
        if (heartbeat.hasPipelineReports()) {
            LOG.debug("Dispatching Pipeline Report.");
            eventPublisher.fireEvent(PIPELINE_REPORT, new PipelineReportFromDatanode(datanodeDetails, heartbeat.getPipelineReports()));
        }
        if (heartbeat.hasPipelineActions()) {
            LOG.debug("Dispatching Pipeline Actions.");
            eventPublisher.fireEvent(PIPELINE_ACTIONS, new PipelineActionsFromDatanode(datanodeDetails, heartbeat.getPipelineActions()));
        }
        if (heartbeat.getCommandStatusReportsCount() != 0) {
            LOG.debug("Dispatching Command Status Report.");
            for (CommandStatusReportsProto commandStatusReport : heartbeat.getCommandStatusReportsList()) {
                eventPublisher.fireEvent(CMD_STATUS_REPORT, new CommandStatusReportFromDatanode(datanodeDetails, commandStatusReport));
            }
        }
    }
    return commands;
}
Also used : UpgradeUtils.toLayoutVersionProto(org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.toLayoutVersionProto) LayoutVersionProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto) IncrementalContainerReportProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.IncrementalContainerReportProto) ReregisterCommand(org.apache.hadoop.ozone.protocol.commands.ReregisterCommand) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) CommandStatusReportsProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto) UUID(java.util.UUID) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand)

Example 3 with ReregisterCommand

use of org.apache.hadoop.ozone.protocol.commands.ReregisterCommand in project ozone by apache.

the class TestReconNodeManager method testReconNodeDB.

@Test
public void testReconNodeDB() throws IOException, NodeNotFoundException {
    ReconStorageConfig scmStorageConfig = new ReconStorageConfig(conf, new ReconUtils());
    EventQueue eventQueue = new EventQueue();
    NetworkTopology clusterMap = new NetworkTopologyImpl(conf);
    Table<UUID, DatanodeDetails> nodeTable = ReconSCMDBDefinition.NODES.getTable(store);
    ReconNodeManager reconNodeManager = new ReconNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
    ReconNewNodeHandler reconNewNodeHandler = new ReconNewNodeHandler(reconNodeManager);
    assertTrue(reconNodeManager.getAllNodes().isEmpty());
    DatanodeDetails datanodeDetails = randomDatanodeDetails();
    String uuidString = datanodeDetails.getUuidString();
    // Register a random datanode.
    reconNodeManager.register(datanodeDetails, null, null);
    reconNewNodeHandler.onMessage(reconNodeManager.getNodeByUuid(uuidString), null);
    assertEquals(1, reconNodeManager.getAllNodes().size());
    assertNotNull(reconNodeManager.getNodeByUuid(uuidString));
    // If any commands are added to the eventQueue without using the onMessage
    // interface, then they should be filtered out and not returned to the DN
    // when it heartbeats.
    // This command should never be returned by Recon
    reconNodeManager.addDatanodeCommand(datanodeDetails.getUuid(), new SetNodeOperationalStateCommand(1234, DECOMMISSIONING, 0));
    // This one should be returned
    reconNodeManager.addDatanodeCommand(datanodeDetails.getUuid(), new ReregisterCommand());
    // OperationalState sanity check
    final DatanodeDetails dnDetails = reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString());
    assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE, dnDetails.getPersistedOpState());
    assertEquals(dnDetails.getPersistedOpState(), reconNodeManager.getNodeStatus(dnDetails).getOperationalState());
    assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(), reconNodeManager.getNodeStatus(dnDetails).getOpStateExpiryEpochSeconds());
    // Upon processing the heartbeat, the illegal command should be filtered out
    List<SCMCommand> returnedCmds = reconNodeManager.processHeartbeat(datanodeDetails, defaultLayoutVersionProto());
    assertEquals(1, returnedCmds.size());
    assertEquals(SCMCommandProto.Type.reregisterCommand, returnedCmds.get(0).getType());
    // Now feed a DECOMMISSIONED heartbeat of the same DN
    datanodeDetails.setPersistedOpState(HddsProtos.NodeOperationalState.DECOMMISSIONED);
    datanodeDetails.setPersistedOpStateExpiryEpochSec(12345L);
    reconNodeManager.processHeartbeat(datanodeDetails, defaultLayoutVersionProto());
    // Check both persistedOpState and NodeStatus#operationalState
    assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED, dnDetails.getPersistedOpState());
    assertEquals(dnDetails.getPersistedOpState(), reconNodeManager.getNodeStatus(dnDetails).getOperationalState());
    assertEquals(12345L, dnDetails.getPersistedOpStateExpiryEpochSec());
    assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(), reconNodeManager.getNodeStatus(dnDetails).getOpStateExpiryEpochSeconds());
    // Close the DB, and recreate the instance of Recon Node Manager.
    eventQueue.close();
    reconNodeManager.close();
    reconNodeManager = new ReconNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
    // Verify that the node information was persisted and loaded back.
    assertEquals(1, reconNodeManager.getAllNodes().size());
    assertNotNull(reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()));
}
Also used : ReconUtils(org.apache.hadoop.ozone.recon.ReconUtils) SetNodeOperationalStateCommand(org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand) EventQueue(org.apache.hadoop.hdds.server.events.EventQueue) NetworkTopologyImpl(org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl) ReregisterCommand(org.apache.hadoop.ozone.protocol.commands.ReregisterCommand) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) NetworkTopology(org.apache.hadoop.hdds.scm.net.NetworkTopology) UUID(java.util.UUID) SCMCommand(org.apache.hadoop.ozone.protocol.commands.SCMCommand) Test(org.junit.Test)

Aggregations

UUID (java.util.UUID)3 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)3 ReregisterCommand (org.apache.hadoop.ozone.protocol.commands.ReregisterCommand)3 SCMCommand (org.apache.hadoop.ozone.protocol.commands.SCMCommand)3 LayoutVersionProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto)2 NetworkTopology (org.apache.hadoop.hdds.scm.net.NetworkTopology)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Collectors.toList (java.util.stream.Collectors.toList)1 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)1 MockDatanodeDetails.randomDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails)1 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)1 CommandStatusReportsProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto)1 IncrementalContainerReportProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.IncrementalContainerReportProto)1