use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.
the class FSNamesystem method handleHeartbeat.
/**
* The given node has reported in. This method should:
* 1) Record the heartbeat, so the datanode isn't timed out
* 2) Adjust usage stats for future block allocation
*
* If a substantial amount of time passed since the last datanode
* heartbeat then request an immediate block report.
*
* @return an array of datanode commands
* @throws IOException
*/
HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int xmitsInProgress, int failedVolumes, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease, @Nonnull SlowPeerReports slowPeers) throws IOException {
readLock();
try {
//get datanode commands
final int maxTransfer = blockManager.getMaxReplicationStreams() - xmitsInProgress;
DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(nodeReg, reports, getBlockPoolId(), cacheCapacity, cacheUsed, xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary, slowPeers);
long blockReportLeaseId = 0;
if (requestFullBlockReportLease) {
blockReportLeaseId = blockManager.requestBlockReportLeaseId(nodeReg);
}
//create ha status
final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(haContext.getState().getServiceState(), getFSImage().getCorrectLastAppliedOrWrittenTxId());
return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo, blockReportLeaseId);
} finally {
readUnlock("handleHeartbeat");
}
}
use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.
the class DatanodeProtocolServerSideTranslatorPB method sendHeartbeat.
@Override
public HeartbeatResponseProto sendHeartbeat(RpcController controller, HeartbeatRequestProto request) throws ServiceException {
HeartbeatResponse response;
try {
final StorageReport[] report = PBHelperClient.convertStorageReports(request.getReportsList());
VolumeFailureSummary volumeFailureSummary = request.hasVolumeFailureSummary() ? PBHelper.convertVolumeFailureSummary(request.getVolumeFailureSummary()) : null;
response = impl.sendHeartbeat(PBHelper.convert(request.getRegistration()), report, request.getCacheCapacity(), request.getCacheUsed(), request.getXmitsInProgress(), request.getXceiverCount(), request.getFailedVolumes(), volumeFailureSummary, request.getRequestFullBlockReportLease(), PBHelper.convertSlowPeerInfo(request.getSlowPeersList()));
} catch (IOException e) {
throw new ServiceException(e);
}
HeartbeatResponseProto.Builder builder = HeartbeatResponseProto.newBuilder();
DatanodeCommand[] cmds = response.getCommands();
if (cmds != null) {
for (int i = 0; i < cmds.length; i++) {
if (cmds[i] != null) {
builder.addCmds(PBHelper.convert(cmds[i]));
}
}
}
builder.setHaStatus(PBHelper.convert(response.getNameNodeHaState()));
RollingUpgradeStatus rollingUpdateStatus = response.getRollingUpdateStatus();
if (rollingUpdateStatus != null) {
// V2 is always set for newer datanodes.
// To be compatible with older datanodes, V1 is set to null
// if the RU was finalized.
RollingUpgradeStatusProto rus = PBHelperClient.convertRollingUpgradeStatus(rollingUpdateStatus);
builder.setRollingUpgradeStatusV2(rus);
if (!rollingUpdateStatus.isFinalized()) {
builder.setRollingUpgradeStatus(rus);
}
}
builder.setFullBlockReportLeaseId(response.getFullBlockReportLeaseId());
return builder.build();
}
use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.
the class BPServiceActor method sendHeartBeat.
HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) throws IOException {
scheduler.scheduleNextHeartbeat();
StorageReport[] reports = dn.getFSDataset().getStorageReports(bpos.getBlockPoolId());
if (LOG.isDebugEnabled()) {
LOG.debug("Sending heartbeat with " + reports.length + " storage reports from service actor: " + this);
}
final long now = monotonicNow();
scheduler.updateLastHeartbeatTime(now);
VolumeFailureSummary volumeFailureSummary = dn.getFSDataset().getVolumeFailureSummary();
int numFailedVolumes = volumeFailureSummary != null ? volumeFailureSummary.getFailedStorageLocations().length : 0;
final boolean slowPeersReportDue = scheduler.isSlowPeersReportDue(now);
final SlowPeerReports slowPeers = slowPeersReportDue && dn.getPeerMetrics() != null ? SlowPeerReports.create(dn.getPeerMetrics().getOutliers()) : SlowPeerReports.EMPTY_REPORT;
HeartbeatResponse response = bpNamenode.sendHeartbeat(bpRegistration, reports, dn.getFSDataset().getCacheCapacity(), dn.getFSDataset().getCacheUsed(), dn.getXmitsInProgress(), dn.getXceiverCount(), numFailedVolumes, volumeFailureSummary, requestBlockReportLease, slowPeers);
if (slowPeersReportDue) {
// If the report was due and successfully sent, schedule the next one.
scheduler.scheduleNextSlowPeerReport();
}
return response;
}
use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.
the class TestDataNodeLifeline method testSendLifelineIfHeartbeatBlocked.
@Test
public void testSendLifelineIfHeartbeatBlocked() throws Exception {
// Run the test for the duration of sending 10 lifeline RPC messages.
int numLifelines = 10;
CountDownLatch lifelinesSent = new CountDownLatch(numLifelines);
// Intercept heartbeat to inject an artificial delay, until all expected
// lifeline RPC messages have been sent.
doAnswer(new LatchAwaitingAnswer<HeartbeatResponse>(lifelinesSent)).when(namenode).sendHeartbeat(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class), anyBoolean(), any(SlowPeerReports.class));
// Intercept lifeline to trigger latch count-down on each call.
doAnswer(new LatchCountingAnswer<Void>(lifelinesSent)).when(lifelineNamenode).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
// that the DataNode always stays alive, and never goes stale or dead.
while (!lifelinesSent.await(1, SECONDS)) {
assertEquals("Expect DataNode to be kept alive by lifeline.", 1, namesystem.getNumLiveDataNodes());
assertEquals("Expect DataNode not marked dead due to lifeline.", 0, namesystem.getNumDeadDataNodes());
assertEquals("Expect DataNode not marked stale due to lifeline.", 0, namesystem.getNumStaleDataNodes());
}
// Verify that we did in fact call the lifeline RPC.
verify(lifelineNamenode, atLeastOnce()).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
// Also verify lifeline call through metrics. We expect at least
// numLifelines, guaranteed by waiting on the latch. There is a small
// possibility of extra lifeline calls depending on timing, so we allow
// slack in the assertion.
assertTrue("Expect metrics to count at least " + numLifelines + " calls.", getLongCounter("LifelinesNumOps", getMetrics(metrics.name())) >= numLifelines);
}
use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.
the class TestDataNodeLifeline method testNoLifelineSentIfHeartbeatsOnTime.
@Test
public void testNoLifelineSentIfHeartbeatsOnTime() throws Exception {
// Run the test for the duration of sending 10 heartbeat RPC messages.
int numHeartbeats = 10;
CountDownLatch heartbeatsSent = new CountDownLatch(numHeartbeats);
// Intercept heartbeat to trigger latch count-down on each call.
doAnswer(new LatchCountingAnswer<HeartbeatResponse>(heartbeatsSent)).when(namenode).sendHeartbeat(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class), anyBoolean(), any(SlowPeerReports.class));
// stays alive, and never goes stale or dead.
while (!heartbeatsSent.await(1, SECONDS)) {
assertEquals("Expect DataNode to be kept alive by lifeline.", 1, namesystem.getNumLiveDataNodes());
assertEquals("Expect DataNode not marked dead due to lifeline.", 0, namesystem.getNumDeadDataNodes());
assertEquals("Expect DataNode not marked stale due to lifeline.", 0, namesystem.getNumStaleDataNodes());
}
// Verify that we did not call the lifeline RPC.
verify(lifelineNamenode, never()).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
// Also verify no lifeline calls through metrics.
assertEquals("Expect metrics to count no lifeline calls.", 0, getLongCounter("LifelinesNumOps", getMetrics(metrics.name())));
}
Aggregations