use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.
the class DatanodeLifelineProtocolServerSideTranslatorPB method sendLifeline.
@Override
public LifelineResponseProto sendLifeline(RpcController controller, HeartbeatRequestProto request) throws ServiceException {
try {
final StorageReport[] report = PBHelperClient.convertStorageReports(request.getReportsList());
VolumeFailureSummary volumeFailureSummary = request.hasVolumeFailureSummary() ? PBHelper.convertVolumeFailureSummary(request.getVolumeFailureSummary()) : null;
impl.sendLifeline(PBHelper.convert(request.getRegistration()), report, request.getCacheCapacity(), request.getCacheUsed(), request.getXmitsInProgress(), request.getXceiverCount(), request.getFailedVolumes(), volumeFailureSummary);
return VOID_LIFELINE_RESPONSE_PROTO;
} catch (IOException e) {
throw new ServiceException(e);
}
}
use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.
the class DatanodeDescriptor method updateHeartbeatState.
/**
* process datanode heartbeat or stats initialization.
*/
public void updateHeartbeatState(StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int volFailures, VolumeFailureSummary volumeFailureSummary) {
long totalCapacity = 0;
long totalRemaining = 0;
long totalBlockPoolUsed = 0;
long totalDfsUsed = 0;
long totalNonDfsUsed = 0;
Set<DatanodeStorageInfo> failedStorageInfos = null;
// Decide if we should check for any missing StorageReport and mark it as
// failed. There are different scenarios.
// 1. When DN is running, a storage failed. Given the current DN
// implementation doesn't add recovered storage back to its storage list
// until DN restart, we can assume volFailures won't decrease
// during the current DN registration session.
// When volumeFailures == this.volumeFailures, it implies there is no
// state change. No need to check for failed storage. This is an
// optimization. Recent versions of the DataNode report a
// VolumeFailureSummary containing the date/time of the last volume
// failure. If that's available, then we check that instead for greater
// accuracy.
// 2. After DN restarts, volFailures might not increase and it is possible
// we still have new failed storage. For example, admins reduce
// available storages in configuration. Another corner case
// is the failed volumes might change after restart; a) there
// is one good storage A, one restored good storage B, so there is
// one element in storageReports and that is A. b) A failed. c) Before
// DN sends HB to NN to indicate A has failed, DN restarts. d) After DN
// restarts, storageReports has one element which is B.
final boolean checkFailedStorages;
if (volumeFailureSummary != null && this.volumeFailureSummary != null) {
checkFailedStorages = volumeFailureSummary.getLastVolumeFailureDate() > this.volumeFailureSummary.getLastVolumeFailureDate();
} else {
checkFailedStorages = (volFailures > this.volumeFailures) || !heartbeatedSinceRegistration;
}
if (checkFailedStorages) {
if (this.volumeFailures != volFailures) {
LOG.info("Number of failed storages changes from {} to {}", this.volumeFailures, volFailures);
}
synchronized (storageMap) {
failedStorageInfos = new HashSet<>(storageMap.values());
}
}
setCacheCapacity(cacheCapacity);
setCacheUsed(cacheUsed);
setXceiverCount(xceiverCount);
setLastUpdate(Time.now());
setLastUpdateMonotonic(Time.monotonicNow());
this.volumeFailures = volFailures;
this.volumeFailureSummary = volumeFailureSummary;
for (StorageReport report : reports) {
DatanodeStorageInfo storage = updateStorage(report.getStorage());
if (checkFailedStorages) {
failedStorageInfos.remove(storage);
}
storage.receivedHeartbeat(report);
totalCapacity += report.getCapacity();
totalRemaining += report.getRemaining();
totalBlockPoolUsed += report.getBlockPoolUsed();
totalDfsUsed += report.getDfsUsed();
totalNonDfsUsed += report.getNonDfsUsed();
}
rollBlocksScheduled(getLastUpdateMonotonic());
// Update total metrics for the node.
setCapacity(totalCapacity);
setRemaining(totalRemaining);
setBlockPoolUsed(totalBlockPoolUsed);
setDfsUsed(totalDfsUsed);
setNonDfsUsed(totalNonDfsUsed);
if (checkFailedStorages) {
updateFailedStorage(failedStorageInfos);
}
long storageMapSize;
synchronized (storageMap) {
storageMapSize = storageMap.size();
}
if (storageMapSize != reports.length) {
pruneStorageMap(reports);
}
}
use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.
the class TestBlockManager method testBlockManagerMachinesArray.
@Test
public void testBlockManagerMachinesArray() throws Exception {
final Configuration conf = new HdfsConfiguration();
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
cluster.waitActive();
BlockManager blockManager = cluster.getNamesystem().getBlockManager();
FileSystem fs = cluster.getFileSystem();
final Path filePath = new Path("/tmp.txt");
final long fileLen = 1L;
DFSTestUtil.createFile(fs, filePath, fileLen, (short) 3, 1L);
ArrayList<DataNode> datanodes = cluster.getDataNodes();
assertEquals(datanodes.size(), 4);
FSNamesystem ns = cluster.getNamesystem();
// get the block
final String bpid = cluster.getNamesystem().getBlockPoolId();
File storageDir = cluster.getInstanceStorageDir(0, 0);
File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
assertTrue("Data directory does not exist", dataDir.exists());
BlockInfo blockInfo = blockManager.blocksMap.getBlocks().iterator().next();
ExtendedBlock blk = new ExtendedBlock(bpid, blockInfo.getBlockId(), blockInfo.getNumBytes(), blockInfo.getGenerationStamp());
DatanodeDescriptor failedStorageDataNode = blockManager.getStoredBlock(blockInfo).getDatanode(0);
DatanodeDescriptor corruptStorageDataNode = blockManager.getStoredBlock(blockInfo).getDatanode(1);
ArrayList<StorageReport> reports = new ArrayList<StorageReport>();
for (int i = 0; i < failedStorageDataNode.getStorageInfos().length; i++) {
DatanodeStorageInfo storageInfo = failedStorageDataNode.getStorageInfos()[i];
DatanodeStorage dns = new DatanodeStorage(failedStorageDataNode.getStorageInfos()[i].getStorageID(), DatanodeStorage.State.FAILED, failedStorageDataNode.getStorageInfos()[i].getStorageType());
while (storageInfo.getBlockIterator().hasNext()) {
BlockInfo blockInfo1 = storageInfo.getBlockIterator().next();
if (blockInfo1.equals(blockInfo)) {
StorageReport report = new StorageReport(dns, true, storageInfo.getCapacity(), storageInfo.getDfsUsed(), storageInfo.getRemaining(), storageInfo.getBlockPoolUsed(), 0L);
reports.add(report);
break;
}
}
}
failedStorageDataNode.updateHeartbeat(reports.toArray(StorageReport.EMPTY_ARRAY), 0L, 0L, 0, 0, null);
ns.writeLock();
DatanodeStorageInfo corruptStorageInfo = null;
for (int i = 0; i < corruptStorageDataNode.getStorageInfos().length; i++) {
corruptStorageInfo = corruptStorageDataNode.getStorageInfos()[i];
while (corruptStorageInfo.getBlockIterator().hasNext()) {
BlockInfo blockInfo1 = corruptStorageInfo.getBlockIterator().next();
if (blockInfo1.equals(blockInfo)) {
break;
}
}
}
blockManager.findAndMarkBlockAsCorrupt(blk, corruptStorageDataNode, corruptStorageInfo.getStorageID(), CorruptReplicasMap.Reason.ANY.toString());
ns.writeUnlock();
BlockInfo[] blockInfos = new BlockInfo[] { blockInfo };
ns.readLock();
LocatedBlocks locatedBlocks = blockManager.createLocatedBlocks(blockInfos, 3L, false, 0L, 3L, false, false, null, null);
assertTrue("Located Blocks should exclude corrupt" + "replicas and failed storages", locatedBlocks.getLocatedBlocks().size() == 1);
ns.readUnlock();
}
use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.
the class TestNameNodePrunesMissingStorages method runTest.
private static void runTest(final String testCaseName, final boolean createFiles, final int numInitialStorages, final int expectedStoragesAfterTest) throws IOException {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).storagesPerDatanode(numInitialStorages).build();
cluster.waitActive();
final DataNode dn0 = cluster.getDataNodes().get(0);
// Ensure NN knows about the storage.
final DatanodeID dnId = dn0.getDatanodeId();
final DatanodeDescriptor dnDescriptor = cluster.getNamesystem().getBlockManager().getDatanodeManager().getDatanode(dnId);
assertThat(dnDescriptor.getStorageInfos().length, is(numInitialStorages));
final String bpid = cluster.getNamesystem().getBlockPoolId();
final DatanodeRegistration dnReg = dn0.getDNRegistrationForBP(bpid);
DataNodeTestUtils.triggerBlockReport(dn0);
if (createFiles) {
final Path path = new Path("/", testCaseName);
DFSTestUtil.createFile(cluster.getFileSystem(), path, 1024, (short) 1, 0x1BAD5EED);
DataNodeTestUtils.triggerBlockReport(dn0);
}
// Generate a fake StorageReport that is missing one storage.
final StorageReport[] reports = dn0.getFSDataset().getStorageReports(bpid);
final StorageReport[] prunedReports = new StorageReport[numInitialStorages - 1];
System.arraycopy(reports, 0, prunedReports, 0, prunedReports.length);
// Stop the DataNode and send fake heartbeat with missing storage.
cluster.stopDataNode(0);
cluster.getNameNodeRpc().sendHeartbeat(dnReg, prunedReports, 0L, 0L, 0, 0, 0, null, true, SlowPeerReports.EMPTY_REPORT);
// Check that the missing storage was pruned.
assertThat(dnDescriptor.getStorageInfos().length, is(expectedStoragesAfterTest));
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.
the class TestFsDatasetCache method setHeartbeatResponse.
private static void setHeartbeatResponse(DatanodeCommand[] cmds) throws IOException {
NNHAStatusHeartbeat ha = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, fsImage.getLastAppliedOrWrittenTxId());
HeartbeatResponse response = new HeartbeatResponse(cmds, ha, null, ThreadLocalRandom.current().nextLong() | 1L);
doReturn(response).when(spyNN).sendHeartbeat((DatanodeRegistration) any(), (StorageReport[]) any(), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), (VolumeFailureSummary) any(), anyBoolean(), any(SlowPeerReports.class));
}
Aggregations