use of org.apache.hadoop.hdfs.server.protocol.DatanodeCommand in project hadoop by apache.
the class FSNamesystem method handleHeartbeat.
/**
* The given node has reported in. This method should:
* 1) Record the heartbeat, so the datanode isn't timed out
* 2) Adjust usage stats for future block allocation
*
* If a substantial amount of time passed since the last datanode
* heartbeat then request an immediate block report.
*
* @return an array of datanode commands
* @throws IOException
*/
HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int xmitsInProgress, int failedVolumes, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease, @Nonnull SlowPeerReports slowPeers) throws IOException {
readLock();
try {
//get datanode commands
final int maxTransfer = blockManager.getMaxReplicationStreams() - xmitsInProgress;
DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(nodeReg, reports, getBlockPoolId(), cacheCapacity, cacheUsed, xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary, slowPeers);
long blockReportLeaseId = 0;
if (requestFullBlockReportLease) {
blockReportLeaseId = blockManager.requestBlockReportLeaseId(nodeReg);
}
//create ha status
final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(haContext.getState().getServiceState(), getFSImage().getCorrectLastAppliedOrWrittenTxId());
return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo, blockReportLeaseId);
} finally {
readUnlock("handleHeartbeat");
}
}
use of org.apache.hadoop.hdfs.server.protocol.DatanodeCommand in project hadoop by apache.
the class DatanodeProtocolClientSideTranslatorPB method sendHeartbeat.
@Override
public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xmitsInProgress, int xceiverCount, int failedVolumes, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease, @Nonnull SlowPeerReports slowPeers) throws IOException {
HeartbeatRequestProto.Builder builder = HeartbeatRequestProto.newBuilder().setRegistration(PBHelper.convert(registration)).setXmitsInProgress(xmitsInProgress).setXceiverCount(xceiverCount).setFailedVolumes(failedVolumes).setRequestFullBlockReportLease(requestFullBlockReportLease);
builder.addAllReports(PBHelperClient.convertStorageReports(reports));
if (cacheCapacity != 0) {
builder.setCacheCapacity(cacheCapacity);
}
if (cacheUsed != 0) {
builder.setCacheUsed(cacheUsed);
}
if (volumeFailureSummary != null) {
builder.setVolumeFailureSummary(PBHelper.convertVolumeFailureSummary(volumeFailureSummary));
}
if (slowPeers.haveSlowPeers()) {
builder.addAllSlowPeers(PBHelper.convertSlowPeerInfo(slowPeers));
}
HeartbeatResponseProto resp;
try {
resp = rpcProxy.sendHeartbeat(NULL_CONTROLLER, builder.build());
} catch (ServiceException se) {
throw ProtobufHelper.getRemoteException(se);
}
DatanodeCommand[] cmds = new DatanodeCommand[resp.getCmdsList().size()];
int index = 0;
for (DatanodeCommandProto p : resp.getCmdsList()) {
cmds[index] = PBHelper.convert(p);
index++;
}
RollingUpgradeStatus rollingUpdateStatus = null;
// Use v2 semantics if available.
if (resp.hasRollingUpgradeStatusV2()) {
rollingUpdateStatus = PBHelperClient.convert(resp.getRollingUpgradeStatusV2());
} else if (resp.hasRollingUpgradeStatus()) {
rollingUpdateStatus = PBHelperClient.convert(resp.getRollingUpgradeStatus());
}
return new HeartbeatResponse(cmds, PBHelper.convert(resp.getHaStatus()), rollingUpdateStatus, resp.getFullBlockReportLeaseId());
}
use of org.apache.hadoop.hdfs.server.protocol.DatanodeCommand in project hadoop by apache.
the class DatanodeProtocolServerSideTranslatorPB method blockReport.
@Override
public BlockReportResponseProto blockReport(RpcController controller, BlockReportRequestProto request) throws ServiceException {
DatanodeCommand cmd = null;
StorageBlockReport[] report = new StorageBlockReport[request.getReportsCount()];
int index = 0;
for (StorageBlockReportProto s : request.getReportsList()) {
final BlockListAsLongs blocks;
if (s.hasNumberOfBlocks()) {
// new style buffer based reports
int num = (int) s.getNumberOfBlocks();
Preconditions.checkState(s.getBlocksCount() == 0, "cannot send both blocks list and buffers");
blocks = BlockListAsLongs.decodeBuffers(num, s.getBlocksBuffersList(), maxDataLength);
} else {
blocks = BlockListAsLongs.decodeLongs(s.getBlocksList(), maxDataLength);
}
report[index++] = new StorageBlockReport(PBHelperClient.convert(s.getStorage()), blocks);
}
try {
cmd = impl.blockReport(PBHelper.convert(request.getRegistration()), request.getBlockPoolId(), report, request.hasContext() ? PBHelper.convert(request.getContext()) : null);
} catch (IOException e) {
throw new ServiceException(e);
}
BlockReportResponseProto.Builder builder = BlockReportResponseProto.newBuilder();
if (cmd != null) {
builder.setCmd(PBHelper.convert(cmd));
}
return builder.build();
}
use of org.apache.hadoop.hdfs.server.protocol.DatanodeCommand in project hadoop by apache.
the class BPServiceActor method offerService.
/**
* Main loop for each BP thread. Run until shutdown,
* forever calling remote NameNode functions.
*/
private void offerService() throws Exception {
LOG.info("For namenode " + nnAddr + " using" + " BLOCKREPORT_INTERVAL of " + dnConf.blockReportInterval + "msec" + " CACHEREPORT_INTERVAL of " + dnConf.cacheReportInterval + "msec" + " Initial delay: " + dnConf.initialBlockReportDelayMs + "msec" + "; heartBeatInterval=" + dnConf.heartBeatInterval + (lifelineSender != null ? "; lifelineIntervalMs=" + dnConf.getLifelineIntervalMs() : ""));
long fullBlockReportLeaseId = 0;
//
while (shouldRun()) {
try {
final long startTime = scheduler.monotonicNow();
//
// Every so often, send heartbeat or block-report
//
final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime);
HeartbeatResponse resp = null;
if (sendHeartbeat) {
//
// All heartbeat messages include following info:
// -- Datanode name
// -- data transfer port
// -- Total capacity
// -- Bytes remaining
//
boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) && scheduler.isBlockReportDue(startTime);
if (!dn.areHeartbeatsDisabledForTests()) {
resp = sendHeartBeat(requestBlockReportLease);
assert resp != null;
if (resp.getFullBlockReportLeaseId() != 0) {
if (fullBlockReportLeaseId != 0) {
LOG.warn(nnAddr + " sent back a full block report lease " + "ID of 0x" + Long.toHexString(resp.getFullBlockReportLeaseId()) + ", but we already have a lease ID of 0x" + Long.toHexString(fullBlockReportLeaseId) + ". " + "Overwriting old lease ID.");
}
fullBlockReportLeaseId = resp.getFullBlockReportLeaseId();
}
dn.getMetrics().addHeartbeat(scheduler.monotonicNow() - startTime);
// If the state of this NN has changed (eg STANDBY->ACTIVE)
// then let the BPOfferService update itself.
//
// Important that this happens before processCommand below,
// since the first heartbeat to a new active might have commands
// that we should actually process.
bpos.updateActorStatesFromHeartbeat(this, resp.getNameNodeHaState());
state = resp.getNameNodeHaState().getState();
if (state == HAServiceState.ACTIVE) {
handleRollingUpgradeStatus(resp);
}
long startProcessCommands = monotonicNow();
if (!processCommand(resp.getCommands()))
continue;
long endProcessCommands = monotonicNow();
if (endProcessCommands - startProcessCommands > 2000) {
LOG.info("Took " + (endProcessCommands - startProcessCommands) + "ms to process " + resp.getCommands().length + " commands from NN");
}
}
}
if (ibrManager.sendImmediately() || sendHeartbeat) {
ibrManager.sendIBRs(bpNamenode, bpRegistration, bpos.getBlockPoolId(), dn.getMetrics());
}
List<DatanodeCommand> cmds = null;
boolean forceFullBr = scheduler.forceFullBlockReport.getAndSet(false);
if (forceFullBr) {
LOG.info("Forcing a full block report to " + nnAddr);
}
if ((fullBlockReportLeaseId != 0) || forceFullBr) {
cmds = blockReport(fullBlockReportLeaseId);
fullBlockReportLeaseId = 0;
}
processCommand(cmds == null ? null : cmds.toArray(new DatanodeCommand[cmds.size()]));
if (!dn.areCacheReportsDisabledForTests()) {
DatanodeCommand cmd = cacheReport();
processCommand(new DatanodeCommand[] { cmd });
}
if (sendHeartbeat) {
dn.getMetrics().addHeartbeatTotal(scheduler.monotonicNow() - startTime);
}
// There is no work to do; sleep until hearbeat timer elapses,
// or work arrives, and then iterate again.
ibrManager.waitTillNextIBR(scheduler.getHeartbeatWaitTime());
} catch (RemoteException re) {
String reClass = re.getClassName();
if (UnregisteredNodeException.class.getName().equals(reClass) || DisallowedDatanodeException.class.getName().equals(reClass) || IncorrectVersionException.class.getName().equals(reClass)) {
LOG.warn(this + " is shutting down", re);
shouldServiceRun = false;
return;
}
LOG.warn("RemoteException in offerService", re);
sleepAfterException();
} catch (IOException e) {
LOG.warn("IOException in offerService", e);
sleepAfterException();
}
processQueueMessages();
}
// while (shouldRun())
}
use of org.apache.hadoop.hdfs.server.protocol.DatanodeCommand in project hadoop by apache.
the class TestHeartbeatHandling method testHeartbeat.
/**
* Test if
* {@link FSNamesystem#handleHeartbeat}
* can pick up replication and/or invalidate requests and observes the max
* limit
*/
@Test
public void testHeartbeat() throws Exception {
final Configuration conf = new HdfsConfiguration();
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
cluster.waitActive();
final FSNamesystem namesystem = cluster.getNamesystem();
final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
final String poolId = namesystem.getBlockPoolId();
final DatanodeRegistration nodeReg = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg);
final String storageID = DatanodeStorage.generateUuid();
dd.updateStorage(new DatanodeStorage(storageID));
final int REMAINING_BLOCKS = 1;
final int MAX_REPLICATE_LIMIT = conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2);
final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT;
final int MAX_INVALIDATE_BLOCKS = 2 * MAX_INVALIDATE_LIMIT + REMAINING_BLOCKS;
final int MAX_REPLICATE_BLOCKS = 2 * MAX_REPLICATE_LIMIT + REMAINING_BLOCKS;
final DatanodeStorageInfo[] ONE_TARGET = { dd.getStorageInfo(storageID) };
try {
namesystem.writeLock();
synchronized (hm) {
for (int i = 0; i < MAX_REPLICATE_BLOCKS; i++) {
dd.addBlockToBeReplicated(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP), ONE_TARGET);
}
DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(1, cmds.length);
assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);
ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
for (int i = 0; i < MAX_INVALIDATE_BLOCKS; i++) {
blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP));
}
dd.addBlocksToBeInvalidated(blockList);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(2, cmds.length);
assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);
assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(2, cmds.length);
assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);
assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(1, cmds.length);
assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(0, cmds.length);
}
} finally {
namesystem.writeUnlock();
}
} finally {
cluster.shutdown();
}
}
Aggregations