use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.
the class HeartbeatEndpointTask method processResponse.
/**
* Add this command to command processing Queue.
*
* @param response - SCMHeartbeat response.
*/
private void processResponse(SCMHeartbeatResponseProto response, final DatanodeDetailsProto datanodeDetails) {
Preconditions.checkState(response.getDatanodeUUID().equalsIgnoreCase(datanodeDetails.getUuid()), "Unexpected datanode ID in the response.");
// Verify the response is indeed for this datanode.
for (SCMCommandProto commandResponseProto : response.getCommandsList()) {
switch(commandResponseProto.getCommandType()) {
case reregisterCommand:
if (rpcEndpoint.getState() == EndPointStates.HEARTBEAT) {
if (LOG.isDebugEnabled()) {
LOG.debug("Received SCM notification to register." + " Interrupt HEARTBEAT and transit to REGISTER state.");
}
rpcEndpoint.setState(EndPointStates.REGISTER);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Illegal state {} found, expecting {}.", rpcEndpoint.getState().name(), EndPointStates.HEARTBEAT);
}
}
break;
case deleteBlocksCommand:
DeleteBlocksCommand deleteBlocksCommand = DeleteBlocksCommand.getFromProtobuf(commandResponseProto.getDeleteBlocksCommandProto());
if (commandResponseProto.hasTerm()) {
deleteBlocksCommand.setTerm(commandResponseProto.getTerm());
}
if (!deleteBlocksCommand.blocksTobeDeleted().isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug(DeletedContainerBlocksSummary.getFrom(deleteBlocksCommand.blocksTobeDeleted()).toString());
}
this.context.addCommand(deleteBlocksCommand);
}
break;
case closeContainerCommand:
CloseContainerCommand closeContainer = CloseContainerCommand.getFromProtobuf(commandResponseProto.getCloseContainerCommandProto());
if (commandResponseProto.hasTerm()) {
closeContainer.setTerm(commandResponseProto.getTerm());
}
if (commandResponseProto.hasEncodedToken()) {
closeContainer.setEncodedToken(commandResponseProto.getEncodedToken());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Received SCM container close request for container {}", closeContainer.getContainerID());
}
this.context.addCommand(closeContainer);
break;
case replicateContainerCommand:
ReplicateContainerCommand replicateContainerCommand = ReplicateContainerCommand.getFromProtobuf(commandResponseProto.getReplicateContainerCommandProto());
if (commandResponseProto.hasTerm()) {
replicateContainerCommand.setTerm(commandResponseProto.getTerm());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Received SCM container replicate request for container {}", replicateContainerCommand.getContainerID());
}
this.context.addCommand(replicateContainerCommand);
break;
case deleteContainerCommand:
DeleteContainerCommand deleteContainerCommand = DeleteContainerCommand.getFromProtobuf(commandResponseProto.getDeleteContainerCommandProto());
if (commandResponseProto.hasTerm()) {
deleteContainerCommand.setTerm(commandResponseProto.getTerm());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Received SCM delete container request for container {}", deleteContainerCommand.getContainerID());
}
this.context.addCommand(deleteContainerCommand);
break;
case createPipelineCommand:
CreatePipelineCommand createPipelineCommand = CreatePipelineCommand.getFromProtobuf(commandResponseProto.getCreatePipelineCommandProto());
if (commandResponseProto.hasTerm()) {
createPipelineCommand.setTerm(commandResponseProto.getTerm());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Received SCM create pipeline request {}", createPipelineCommand.getPipelineID());
}
this.context.addCommand(createPipelineCommand);
break;
case closePipelineCommand:
ClosePipelineCommand closePipelineCommand = ClosePipelineCommand.getFromProtobuf(commandResponseProto.getClosePipelineCommandProto());
if (commandResponseProto.hasTerm()) {
closePipelineCommand.setTerm(commandResponseProto.getTerm());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Received SCM close pipeline request {}", closePipelineCommand.getPipelineID());
}
this.context.addCommand(closePipelineCommand);
break;
case setNodeOperationalStateCommand:
SetNodeOperationalStateCommand setNodeOperationalStateCommand = SetNodeOperationalStateCommand.getFromProtobuf(commandResponseProto.getSetNodeOperationalStateCommandProto());
if (commandResponseProto.hasTerm()) {
setNodeOperationalStateCommand.setTerm(commandResponseProto.getTerm());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Received SCM set operational state command. State: {} " + "Expiry: {}", setNodeOperationalStateCommand.getOpState(), setNodeOperationalStateCommand.getStateExpiryEpochSeconds());
}
this.context.addCommand(setNodeOperationalStateCommand);
break;
case finalizeNewLayoutVersionCommand:
FinalizeNewLayoutVersionCommand finalizeNewLayoutVersionCommand = FinalizeNewLayoutVersionCommand.getFromProtobuf(commandResponseProto.getFinalizeNewLayoutVersionCommandProto());
if (commandResponseProto.hasTerm()) {
finalizeNewLayoutVersionCommand.setTerm(commandResponseProto.getTerm());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Received SCM finalize command {}", finalizeNewLayoutVersionCommand.getId());
}
this.context.addCommand(finalizeNewLayoutVersionCommand);
break;
case refreshVolumeUsageInfo:
RefreshVolumeUsageCommand refreshVolumeUsageCommand = RefreshVolumeUsageCommand.getFromProtobuf(commandResponseProto.getRefreshVolumeUsageCommandProto());
if (commandResponseProto.hasTerm()) {
refreshVolumeUsageCommand.setTerm(commandResponseProto.getTerm());
}
this.context.addCommand(refreshVolumeUsageCommand);
break;
default:
throw new IllegalArgumentException("Unknown response : " + commandResponseProto.getCommandType().name());
}
}
}
use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.
the class SetNodeOperationalStateCommandHandler method handle.
/**
* Handles a given SCM command.
*
* @param command - SCM Command
* @param container - Ozone Container.
* @param context - Current Context.
* @param connectionManager - The SCMs that we are talking to.
*/
@Override
public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) {
long startTime = Time.monotonicNow();
invocationCount.incrementAndGet();
StorageContainerDatanodeProtocolProtos.SetNodeOperationalStateCommandProto setNodeCmdProto = null;
if (command.getType() != Type.setNodeOperationalStateCommand) {
LOG.warn("Skipping handling command, expected command " + "type {} but found {}", Type.setNodeOperationalStateCommand, command.getType());
return;
}
SetNodeOperationalStateCommand setNodeCmd = (SetNodeOperationalStateCommand) command;
setNodeCmdProto = setNodeCmd.getProto();
DatanodeDetails dni = context.getParent().getDatanodeDetails();
dni.setPersistedOpState(setNodeCmdProto.getNodeOperationalState());
dni.setPersistedOpStateExpiryEpochSec(setNodeCmd.getStateExpiryEpochSeconds());
try {
persistDatanodeDetails(dni);
} catch (IOException ioe) {
LOG.error("Failed to persist the datanode state", ioe);
// TODO - this should probably be raised, but it will break the command
// handler interface.
}
totalTime.addAndGet(Time.monotonicNow() - startTime);
}
use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.
the class TestSCMNodeManager method testSetNodeOpStateAndCommandFired.
/**
* For leader SCM, ensure that a change to the operationalState of a node
* fires a SCMCommand of type SetNodeOperationalStateCommand.
*
* For follower SCM, no SetNodeOperationalStateCommand should be fired, yet
* operationalState of the node will be updated according to the heartbeat.
*/
@Test
public void testSetNodeOpStateAndCommandFired() throws IOException, NodeNotFoundException, AuthenticationException {
final int interval = 100;
OzoneConfiguration conf = getConf();
conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, interval, MILLISECONDS);
// If factor 1 pipelines are auto created, registering the new node will
// trigger a pipeline creation command which may interfere with command
// checking in this test.
conf.setBoolean(OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE, false);
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
DatanodeDetails dn = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager);
LayoutVersionManager versionManager = nodeManager.getLayoutVersionManager();
final LayoutVersionProto layoutInfo = toLayoutVersionProto(versionManager.getMetadataLayoutVersion(), versionManager.getSoftwareLayoutVersion());
long expiry = System.currentTimeMillis() / 1000 + 1000;
nodeManager.setNodeOperationalState(dn, HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE, expiry);
// If found mismatch, leader SCM fires a SetNodeOperationalStateCommand
// to update the opState persisted in Datanode.
scm.getScmContext().updateLeaderAndTerm(true, 1);
List<SCMCommand> commands = nodeManager.processHeartbeat(dn, layoutInfo);
Assert.assertTrue(commands.get(0).getClass().equals(SetNodeOperationalStateCommand.class));
assertEquals(1, commands.size());
// If found mismatch, follower SCM update its own opState according
// to the heartbeat, and no SCMCommand will be fired.
scm.getScmContext().updateLeaderAndTerm(false, 2);
commands = nodeManager.processHeartbeat(dn, layoutInfo);
assertEquals(0, commands.size());
NodeStatus scmStatus = nodeManager.getNodeStatus(dn);
assertTrue(scmStatus.getOperationalState() == dn.getPersistedOpState() && scmStatus.getOpStateExpiryEpochSeconds() == dn.getPersistedOpStateExpiryEpochSec());
}
}
use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.
the class TestReconNodeManager method testReconNodeDB.
@Test
public void testReconNodeDB() throws IOException, NodeNotFoundException {
ReconStorageConfig scmStorageConfig = new ReconStorageConfig(conf, new ReconUtils());
EventQueue eventQueue = new EventQueue();
NetworkTopology clusterMap = new NetworkTopologyImpl(conf);
Table<UUID, DatanodeDetails> nodeTable = ReconSCMDBDefinition.NODES.getTable(store);
ReconNodeManager reconNodeManager = new ReconNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
ReconNewNodeHandler reconNewNodeHandler = new ReconNewNodeHandler(reconNodeManager);
assertTrue(reconNodeManager.getAllNodes().isEmpty());
DatanodeDetails datanodeDetails = randomDatanodeDetails();
String uuidString = datanodeDetails.getUuidString();
// Register a random datanode.
reconNodeManager.register(datanodeDetails, null, null);
reconNewNodeHandler.onMessage(reconNodeManager.getNodeByUuid(uuidString), null);
assertEquals(1, reconNodeManager.getAllNodes().size());
assertNotNull(reconNodeManager.getNodeByUuid(uuidString));
// If any commands are added to the eventQueue without using the onMessage
// interface, then they should be filtered out and not returned to the DN
// when it heartbeats.
// This command should never be returned by Recon
reconNodeManager.addDatanodeCommand(datanodeDetails.getUuid(), new SetNodeOperationalStateCommand(1234, DECOMMISSIONING, 0));
// This one should be returned
reconNodeManager.addDatanodeCommand(datanodeDetails.getUuid(), new ReregisterCommand());
// OperationalState sanity check
final DatanodeDetails dnDetails = reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString());
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE, dnDetails.getPersistedOpState());
assertEquals(dnDetails.getPersistedOpState(), reconNodeManager.getNodeStatus(dnDetails).getOperationalState());
assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(), reconNodeManager.getNodeStatus(dnDetails).getOpStateExpiryEpochSeconds());
// Upon processing the heartbeat, the illegal command should be filtered out
List<SCMCommand> returnedCmds = reconNodeManager.processHeartbeat(datanodeDetails, defaultLayoutVersionProto());
assertEquals(1, returnedCmds.size());
assertEquals(SCMCommandProto.Type.reregisterCommand, returnedCmds.get(0).getType());
// Now feed a DECOMMISSIONED heartbeat of the same DN
datanodeDetails.setPersistedOpState(HddsProtos.NodeOperationalState.DECOMMISSIONED);
datanodeDetails.setPersistedOpStateExpiryEpochSec(12345L);
reconNodeManager.processHeartbeat(datanodeDetails, defaultLayoutVersionProto());
// Check both persistedOpState and NodeStatus#operationalState
assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED, dnDetails.getPersistedOpState());
assertEquals(dnDetails.getPersistedOpState(), reconNodeManager.getNodeStatus(dnDetails).getOperationalState());
assertEquals(12345L, dnDetails.getPersistedOpStateExpiryEpochSec());
assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(), reconNodeManager.getNodeStatus(dnDetails).getOpStateExpiryEpochSeconds());
// Close the DB, and recreate the instance of Recon Node Manager.
eventQueue.close();
reconNodeManager.close();
reconNodeManager = new ReconNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
// Verify that the node information was persisted and loaded back.
assertEquals(1, reconNodeManager.getAllNodes().size());
assertNotNull(reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()));
}
use of org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand in project ozone by apache.
the class SCMNodeManager method updateDatanodeOpState.
/**
* This method should only be called when processing the heartbeat.
*
* On leader SCM, for a registered node, the information stored in SCM is
* the source of truth. If the operational state or expiry reported in the
* datanode heartbeat do not match those store in SCM, queue a command to
* update the state persisted on the datanode. Additionally, ensure the
* datanodeDetails stored in SCM match those reported in the heartbeat.
*
* On follower SCM, datanode notifies follower SCM its latest operational
* state or expiry via heartbeat. If the operational state or expiry
* reported in the datanode heartbeat do not match those stored in SCM,
* just update the state in follower SCM accordingly.
*
* @param reportedDn The DatanodeDetails taken from the node heartbeat.
* @throws NodeNotFoundException
*/
protected void updateDatanodeOpState(DatanodeDetails reportedDn) throws NodeNotFoundException {
NodeStatus scmStatus = getNodeStatus(reportedDn);
if (opStateDiffers(reportedDn, scmStatus)) {
if (scmContext.isLeader()) {
LOG.info("Scheduling a command to update the operationalState " + "persisted on {} as the reported value does not " + "match the value stored in SCM ({}, {})", reportedDn, scmStatus.getOperationalState(), scmStatus.getOpStateExpiryEpochSeconds());
try {
SCMCommand<?> command = new SetNodeOperationalStateCommand(Time.monotonicNow(), scmStatus.getOperationalState(), scmStatus.getOpStateExpiryEpochSeconds());
command.setTerm(scmContext.getTermOfLeader());
addDatanodeCommand(reportedDn.getUuid(), command);
} catch (NotLeaderException nle) {
LOG.warn("Skip sending SetNodeOperationalStateCommand," + " since current SCM is not leader.", nle);
return;
}
} else {
LOG.info("Update the operationalState saved in follower SCM " + "for {} as the reported value does not " + "match the value stored in SCM ({}, {})", reportedDn, scmStatus.getOperationalState(), scmStatus.getOpStateExpiryEpochSeconds());
setNodeOperationalState(reportedDn, reportedDn.getPersistedOpState(), reportedDn.getPersistedOpStateExpiryEpochSec());
}
}
DatanodeDetails scmDnd = nodeStateManager.getNode(reportedDn);
scmDnd.setPersistedOpStateExpiryEpochSec(reportedDn.getPersistedOpStateExpiryEpochSec());
scmDnd.setPersistedOpState(reportedDn.getPersistedOpState());
}
Aggregations