Search in sources :

Example 1 with HeartbeatResponse

use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.

the class FSNamesystem method handleHeartbeat.

/**
   * The given node has reported in.  This method should:
   * 1) Record the heartbeat, so the datanode isn't timed out
   * 2) Adjust usage stats for future block allocation
   *
   * If a substantial amount of time passed since the last datanode
   * heartbeat then request an immediate block report.
   *
   * @return an array of datanode commands
   * @throws IOException
   */
HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int xmitsInProgress, int failedVolumes, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease, @Nonnull SlowPeerReports slowPeers) throws IOException {
    readLock();
    try {
        //get datanode commands
        final int maxTransfer = blockManager.getMaxReplicationStreams() - xmitsInProgress;
        DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(nodeReg, reports, getBlockPoolId(), cacheCapacity, cacheUsed, xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary, slowPeers);
        long blockReportLeaseId = 0;
        if (requestFullBlockReportLease) {
            blockReportLeaseId = blockManager.requestBlockReportLeaseId(nodeReg);
        }
        //create ha status
        final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(haContext.getState().getServiceState(), getFSImage().getCorrectLastAppliedOrWrittenTxId());
        return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo, blockReportLeaseId);
    } finally {
        readUnlock("handleHeartbeat");
    }
}
Also used : HeartbeatResponse(org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse) DatanodeCommand(org.apache.hadoop.hdfs.server.protocol.DatanodeCommand) NNHAStatusHeartbeat(org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat)

Example 2 with HeartbeatResponse

use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.

the class DatanodeProtocolServerSideTranslatorPB method sendHeartbeat.

@Override
public HeartbeatResponseProto sendHeartbeat(RpcController controller, HeartbeatRequestProto request) throws ServiceException {
    HeartbeatResponse response;
    try {
        final StorageReport[] report = PBHelperClient.convertStorageReports(request.getReportsList());
        VolumeFailureSummary volumeFailureSummary = request.hasVolumeFailureSummary() ? PBHelper.convertVolumeFailureSummary(request.getVolumeFailureSummary()) : null;
        response = impl.sendHeartbeat(PBHelper.convert(request.getRegistration()), report, request.getCacheCapacity(), request.getCacheUsed(), request.getXmitsInProgress(), request.getXceiverCount(), request.getFailedVolumes(), volumeFailureSummary, request.getRequestFullBlockReportLease(), PBHelper.convertSlowPeerInfo(request.getSlowPeersList()));
    } catch (IOException e) {
        throw new ServiceException(e);
    }
    HeartbeatResponseProto.Builder builder = HeartbeatResponseProto.newBuilder();
    DatanodeCommand[] cmds = response.getCommands();
    if (cmds != null) {
        for (int i = 0; i < cmds.length; i++) {
            if (cmds[i] != null) {
                builder.addCmds(PBHelper.convert(cmds[i]));
            }
        }
    }
    builder.setHaStatus(PBHelper.convert(response.getNameNodeHaState()));
    RollingUpgradeStatus rollingUpdateStatus = response.getRollingUpdateStatus();
    if (rollingUpdateStatus != null) {
        // V2 is always set for newer datanodes.
        // To be compatible with older datanodes, V1 is set to null
        //  if the RU was finalized.
        RollingUpgradeStatusProto rus = PBHelperClient.convertRollingUpgradeStatus(rollingUpdateStatus);
        builder.setRollingUpgradeStatusV2(rus);
        if (!rollingUpdateStatus.isFinalized()) {
            builder.setRollingUpgradeStatus(rus);
        }
    }
    builder.setFullBlockReportLeaseId(response.getFullBlockReportLeaseId());
    return builder.build();
}
Also used : HeartbeatResponse(org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse) RollingUpgradeStatusProto(org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RollingUpgradeStatusProto) RollingUpgradeStatus(org.apache.hadoop.hdfs.protocol.RollingUpgradeStatus) StorageReport(org.apache.hadoop.hdfs.server.protocol.StorageReport) IOException(java.io.IOException) VolumeFailureSummary(org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary) DatanodeCommand(org.apache.hadoop.hdfs.server.protocol.DatanodeCommand) ServiceException(com.google.protobuf.ServiceException) HeartbeatResponseProto(org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.HeartbeatResponseProto)

Example 3 with HeartbeatResponse

use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.

the class BPServiceActor method sendHeartBeat.

HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) throws IOException {
    scheduler.scheduleNextHeartbeat();
    StorageReport[] reports = dn.getFSDataset().getStorageReports(bpos.getBlockPoolId());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Sending heartbeat with " + reports.length + " storage reports from service actor: " + this);
    }
    final long now = monotonicNow();
    scheduler.updateLastHeartbeatTime(now);
    VolumeFailureSummary volumeFailureSummary = dn.getFSDataset().getVolumeFailureSummary();
    int numFailedVolumes = volumeFailureSummary != null ? volumeFailureSummary.getFailedStorageLocations().length : 0;
    final boolean slowPeersReportDue = scheduler.isSlowPeersReportDue(now);
    final SlowPeerReports slowPeers = slowPeersReportDue && dn.getPeerMetrics() != null ? SlowPeerReports.create(dn.getPeerMetrics().getOutliers()) : SlowPeerReports.EMPTY_REPORT;
    HeartbeatResponse response = bpNamenode.sendHeartbeat(bpRegistration, reports, dn.getFSDataset().getCacheCapacity(), dn.getFSDataset().getCacheUsed(), dn.getXmitsInProgress(), dn.getXceiverCount(), numFailedVolumes, volumeFailureSummary, requestBlockReportLease, slowPeers);
    if (slowPeersReportDue) {
        // If the report was due and successfully sent, schedule the next one.
        scheduler.scheduleNextSlowPeerReport();
    }
    return response;
}
Also used : HeartbeatResponse(org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse) StorageReport(org.apache.hadoop.hdfs.server.protocol.StorageReport) SlowPeerReports(org.apache.hadoop.hdfs.server.protocol.SlowPeerReports) VolumeFailureSummary(org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary)

Example 4 with HeartbeatResponse

use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.

the class TestDataNodeLifeline method testSendLifelineIfHeartbeatBlocked.

@Test
public void testSendLifelineIfHeartbeatBlocked() throws Exception {
    // Run the test for the duration of sending 10 lifeline RPC messages.
    int numLifelines = 10;
    CountDownLatch lifelinesSent = new CountDownLatch(numLifelines);
    // Intercept heartbeat to inject an artificial delay, until all expected
    // lifeline RPC messages have been sent.
    doAnswer(new LatchAwaitingAnswer<HeartbeatResponse>(lifelinesSent)).when(namenode).sendHeartbeat(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class), anyBoolean(), any(SlowPeerReports.class));
    // Intercept lifeline to trigger latch count-down on each call.
    doAnswer(new LatchCountingAnswer<Void>(lifelinesSent)).when(lifelineNamenode).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
    // that the DataNode always stays alive, and never goes stale or dead.
    while (!lifelinesSent.await(1, SECONDS)) {
        assertEquals("Expect DataNode to be kept alive by lifeline.", 1, namesystem.getNumLiveDataNodes());
        assertEquals("Expect DataNode not marked dead due to lifeline.", 0, namesystem.getNumDeadDataNodes());
        assertEquals("Expect DataNode not marked stale due to lifeline.", 0, namesystem.getNumStaleDataNodes());
    }
    // Verify that we did in fact call the lifeline RPC.
    verify(lifelineNamenode, atLeastOnce()).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
    // Also verify lifeline call through metrics.  We expect at least
    // numLifelines, guaranteed by waiting on the latch.  There is a small
    // possibility of extra lifeline calls depending on timing, so we allow
    // slack in the assertion.
    assertTrue("Expect metrics to count at least " + numLifelines + " calls.", getLongCounter("LifelinesNumOps", getMetrics(metrics.name())) >= numLifelines);
}
Also used : HeartbeatResponse(org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) SlowPeerReports(org.apache.hadoop.hdfs.server.protocol.SlowPeerReports) CountDownLatch(java.util.concurrent.CountDownLatch) VolumeFailureSummary(org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary) Test(org.junit.Test)

Example 5 with HeartbeatResponse

use of org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse in project hadoop by apache.

the class TestDataNodeLifeline method testNoLifelineSentIfHeartbeatsOnTime.

@Test
public void testNoLifelineSentIfHeartbeatsOnTime() throws Exception {
    // Run the test for the duration of sending 10 heartbeat RPC messages.
    int numHeartbeats = 10;
    CountDownLatch heartbeatsSent = new CountDownLatch(numHeartbeats);
    // Intercept heartbeat to trigger latch count-down on each call.
    doAnswer(new LatchCountingAnswer<HeartbeatResponse>(heartbeatsSent)).when(namenode).sendHeartbeat(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class), anyBoolean(), any(SlowPeerReports.class));
    // stays alive, and never goes stale or dead.
    while (!heartbeatsSent.await(1, SECONDS)) {
        assertEquals("Expect DataNode to be kept alive by lifeline.", 1, namesystem.getNumLiveDataNodes());
        assertEquals("Expect DataNode not marked dead due to lifeline.", 0, namesystem.getNumDeadDataNodes());
        assertEquals("Expect DataNode not marked stale due to lifeline.", 0, namesystem.getNumStaleDataNodes());
    }
    // Verify that we did not call the lifeline RPC.
    verify(lifelineNamenode, never()).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
    // Also verify no lifeline calls through metrics.
    assertEquals("Expect metrics to count no lifeline calls.", 0, getLongCounter("LifelinesNumOps", getMetrics(metrics.name())));
}
Also used : HeartbeatResponse(org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) SlowPeerReports(org.apache.hadoop.hdfs.server.protocol.SlowPeerReports) CountDownLatch(java.util.concurrent.CountDownLatch) VolumeFailureSummary(org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary) Test(org.junit.Test)

Aggregations

HeartbeatResponse (org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse)11 SlowPeerReports (org.apache.hadoop.hdfs.server.protocol.SlowPeerReports)7 VolumeFailureSummary (org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary)7 DatanodeCommand (org.apache.hadoop.hdfs.server.protocol.DatanodeCommand)6 DatanodeRegistration (org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration)5 NNHAStatusHeartbeat (org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat)5 IOException (java.io.IOException)4 InetSocketAddress (java.net.InetSocketAddress)3 DatanodeProtocolClientSideTranslatorPB (org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB)3 NamespaceInfo (org.apache.hadoop.hdfs.server.protocol.NamespaceInfo)3 StorageReport (org.apache.hadoop.hdfs.server.protocol.StorageReport)3 Test (org.junit.Test)3 InvocationOnMock (org.mockito.invocation.InvocationOnMock)3 ServiceException (com.google.protobuf.ServiceException)2 File (java.io.File)2 ArrayList (java.util.ArrayList)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 RollingUpgradeStatus (org.apache.hadoop.hdfs.protocol.RollingUpgradeStatus)2 HeartbeatResponseProto (org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.HeartbeatResponseProto)2 EOFException (java.io.EOFException)1