Search in sources :

Example 11 with StorageReport

use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.

the class DatanodeLifelineProtocolServerSideTranslatorPB method sendLifeline.

@Override
public LifelineResponseProto sendLifeline(RpcController controller, HeartbeatRequestProto request) throws ServiceException {
    try {
        final StorageReport[] report = PBHelperClient.convertStorageReports(request.getReportsList());
        VolumeFailureSummary volumeFailureSummary = request.hasVolumeFailureSummary() ? PBHelper.convertVolumeFailureSummary(request.getVolumeFailureSummary()) : null;
        impl.sendLifeline(PBHelper.convert(request.getRegistration()), report, request.getCacheCapacity(), request.getCacheUsed(), request.getXmitsInProgress(), request.getXceiverCount(), request.getFailedVolumes(), volumeFailureSummary);
        return VOID_LIFELINE_RESPONSE_PROTO;
    } catch (IOException e) {
        throw new ServiceException(e);
    }
}
Also used : ServiceException(com.google.protobuf.ServiceException) StorageReport(org.apache.hadoop.hdfs.server.protocol.StorageReport) IOException(java.io.IOException) VolumeFailureSummary(org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary)

Example 12 with StorageReport

use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.

the class DatanodeDescriptor method updateHeartbeatState.

/**
   * process datanode heartbeat or stats initialization.
   */
public void updateHeartbeatState(StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int volFailures, VolumeFailureSummary volumeFailureSummary) {
    long totalCapacity = 0;
    long totalRemaining = 0;
    long totalBlockPoolUsed = 0;
    long totalDfsUsed = 0;
    long totalNonDfsUsed = 0;
    Set<DatanodeStorageInfo> failedStorageInfos = null;
    // Decide if we should check for any missing StorageReport and mark it as
    // failed. There are different scenarios.
    // 1. When DN is running, a storage failed. Given the current DN
    //    implementation doesn't add recovered storage back to its storage list
    //    until DN restart, we can assume volFailures won't decrease
    //    during the current DN registration session.
    //    When volumeFailures == this.volumeFailures, it implies there is no
    //    state change. No need to check for failed storage. This is an
    //    optimization.  Recent versions of the DataNode report a
    //    VolumeFailureSummary containing the date/time of the last volume
    //    failure.  If that's available, then we check that instead for greater
    //    accuracy.
    // 2. After DN restarts, volFailures might not increase and it is possible
    //    we still have new failed storage. For example, admins reduce
    //    available storages in configuration. Another corner case
    //    is the failed volumes might change after restart; a) there
    //    is one good storage A, one restored good storage B, so there is
    //    one element in storageReports and that is A. b) A failed. c) Before
    //    DN sends HB to NN to indicate A has failed, DN restarts. d) After DN
    //    restarts, storageReports has one element which is B.
    final boolean checkFailedStorages;
    if (volumeFailureSummary != null && this.volumeFailureSummary != null) {
        checkFailedStorages = volumeFailureSummary.getLastVolumeFailureDate() > this.volumeFailureSummary.getLastVolumeFailureDate();
    } else {
        checkFailedStorages = (volFailures > this.volumeFailures) || !heartbeatedSinceRegistration;
    }
    if (checkFailedStorages) {
        if (this.volumeFailures != volFailures) {
            LOG.info("Number of failed storages changes from {} to {}", this.volumeFailures, volFailures);
        }
        synchronized (storageMap) {
            failedStorageInfos = new HashSet<>(storageMap.values());
        }
    }
    setCacheCapacity(cacheCapacity);
    setCacheUsed(cacheUsed);
    setXceiverCount(xceiverCount);
    setLastUpdate(Time.now());
    setLastUpdateMonotonic(Time.monotonicNow());
    this.volumeFailures = volFailures;
    this.volumeFailureSummary = volumeFailureSummary;
    for (StorageReport report : reports) {
        DatanodeStorageInfo storage = updateStorage(report.getStorage());
        if (checkFailedStorages) {
            failedStorageInfos.remove(storage);
        }
        storage.receivedHeartbeat(report);
        totalCapacity += report.getCapacity();
        totalRemaining += report.getRemaining();
        totalBlockPoolUsed += report.getBlockPoolUsed();
        totalDfsUsed += report.getDfsUsed();
        totalNonDfsUsed += report.getNonDfsUsed();
    }
    rollBlocksScheduled(getLastUpdateMonotonic());
    // Update total metrics for the node.
    setCapacity(totalCapacity);
    setRemaining(totalRemaining);
    setBlockPoolUsed(totalBlockPoolUsed);
    setDfsUsed(totalDfsUsed);
    setNonDfsUsed(totalNonDfsUsed);
    if (checkFailedStorages) {
        updateFailedStorage(failedStorageInfos);
    }
    long storageMapSize;
    synchronized (storageMap) {
        storageMapSize = storageMap.size();
    }
    if (storageMapSize != reports.length) {
        pruneStorageMap(reports);
    }
}
Also used : StorageReport(org.apache.hadoop.hdfs.server.protocol.StorageReport)

Example 13 with StorageReport

use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.

the class TestBlockManager method testBlockManagerMachinesArray.

@Test
public void testBlockManagerMachinesArray() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    cluster.waitActive();
    BlockManager blockManager = cluster.getNamesystem().getBlockManager();
    FileSystem fs = cluster.getFileSystem();
    final Path filePath = new Path("/tmp.txt");
    final long fileLen = 1L;
    DFSTestUtil.createFile(fs, filePath, fileLen, (short) 3, 1L);
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    assertEquals(datanodes.size(), 4);
    FSNamesystem ns = cluster.getNamesystem();
    // get the block
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    File storageDir = cluster.getInstanceStorageDir(0, 0);
    File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
    assertTrue("Data directory does not exist", dataDir.exists());
    BlockInfo blockInfo = blockManager.blocksMap.getBlocks().iterator().next();
    ExtendedBlock blk = new ExtendedBlock(bpid, blockInfo.getBlockId(), blockInfo.getNumBytes(), blockInfo.getGenerationStamp());
    DatanodeDescriptor failedStorageDataNode = blockManager.getStoredBlock(blockInfo).getDatanode(0);
    DatanodeDescriptor corruptStorageDataNode = blockManager.getStoredBlock(blockInfo).getDatanode(1);
    ArrayList<StorageReport> reports = new ArrayList<StorageReport>();
    for (int i = 0; i < failedStorageDataNode.getStorageInfos().length; i++) {
        DatanodeStorageInfo storageInfo = failedStorageDataNode.getStorageInfos()[i];
        DatanodeStorage dns = new DatanodeStorage(failedStorageDataNode.getStorageInfos()[i].getStorageID(), DatanodeStorage.State.FAILED, failedStorageDataNode.getStorageInfos()[i].getStorageType());
        while (storageInfo.getBlockIterator().hasNext()) {
            BlockInfo blockInfo1 = storageInfo.getBlockIterator().next();
            if (blockInfo1.equals(blockInfo)) {
                StorageReport report = new StorageReport(dns, true, storageInfo.getCapacity(), storageInfo.getDfsUsed(), storageInfo.getRemaining(), storageInfo.getBlockPoolUsed(), 0L);
                reports.add(report);
                break;
            }
        }
    }
    failedStorageDataNode.updateHeartbeat(reports.toArray(StorageReport.EMPTY_ARRAY), 0L, 0L, 0, 0, null);
    ns.writeLock();
    DatanodeStorageInfo corruptStorageInfo = null;
    for (int i = 0; i < corruptStorageDataNode.getStorageInfos().length; i++) {
        corruptStorageInfo = corruptStorageDataNode.getStorageInfos()[i];
        while (corruptStorageInfo.getBlockIterator().hasNext()) {
            BlockInfo blockInfo1 = corruptStorageInfo.getBlockIterator().next();
            if (blockInfo1.equals(blockInfo)) {
                break;
            }
        }
    }
    blockManager.findAndMarkBlockAsCorrupt(blk, corruptStorageDataNode, corruptStorageInfo.getStorageID(), CorruptReplicasMap.Reason.ANY.toString());
    ns.writeUnlock();
    BlockInfo[] blockInfos = new BlockInfo[] { blockInfo };
    ns.readLock();
    LocatedBlocks locatedBlocks = blockManager.createLocatedBlocks(blockInfos, 3L, false, 0L, 3L, false, false, null, null);
    assertTrue("Located Blocks should exclude corrupt" + "replicas and failed storages", locatedBlocks.getLocatedBlocks().size() == 1);
    ns.readUnlock();
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) ArrayList(java.util.ArrayList) StorageReport(org.apache.hadoop.hdfs.server.protocol.StorageReport) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) ReceivedDeletedBlockInfo(org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeStorage(org.apache.hadoop.hdfs.server.protocol.DatanodeStorage) TestINodeFile(org.apache.hadoop.hdfs.server.namenode.TestINodeFile) INodeFile(org.apache.hadoop.hdfs.server.namenode.INodeFile) File(java.io.File) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 14 with StorageReport

use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.

the class TestNameNodePrunesMissingStorages method runTest.

private static void runTest(final String testCaseName, final boolean createFiles, final int numInitialStorages, final int expectedStoragesAfterTest) throws IOException {
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).storagesPerDatanode(numInitialStorages).build();
        cluster.waitActive();
        final DataNode dn0 = cluster.getDataNodes().get(0);
        // Ensure NN knows about the storage.
        final DatanodeID dnId = dn0.getDatanodeId();
        final DatanodeDescriptor dnDescriptor = cluster.getNamesystem().getBlockManager().getDatanodeManager().getDatanode(dnId);
        assertThat(dnDescriptor.getStorageInfos().length, is(numInitialStorages));
        final String bpid = cluster.getNamesystem().getBlockPoolId();
        final DatanodeRegistration dnReg = dn0.getDNRegistrationForBP(bpid);
        DataNodeTestUtils.triggerBlockReport(dn0);
        if (createFiles) {
            final Path path = new Path("/", testCaseName);
            DFSTestUtil.createFile(cluster.getFileSystem(), path, 1024, (short) 1, 0x1BAD5EED);
            DataNodeTestUtils.triggerBlockReport(dn0);
        }
        // Generate a fake StorageReport that is missing one storage.
        final StorageReport[] reports = dn0.getFSDataset().getStorageReports(bpid);
        final StorageReport[] prunedReports = new StorageReport[numInitialStorages - 1];
        System.arraycopy(reports, 0, prunedReports, 0, prunedReports.length);
        // Stop the DataNode and send fake heartbeat with missing storage.
        cluster.stopDataNode(0);
        cluster.getNameNodeRpc().sendHeartbeat(dnReg, prunedReports, 0L, 0L, 0, 0, 0, null, true, SlowPeerReports.EMPTY_REPORT);
        // Check that the missing storage was pruned.
        assertThat(dnDescriptor.getStorageInfos().length, is(expectedStoragesAfterTest));
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) StorageReport(org.apache.hadoop.hdfs.server.protocol.StorageReport) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration)

Example 15 with StorageReport

use of org.apache.hadoop.hdfs.server.protocol.StorageReport in project hadoop by apache.

the class TestFsDatasetCache method setHeartbeatResponse.

private static void setHeartbeatResponse(DatanodeCommand[] cmds) throws IOException {
    NNHAStatusHeartbeat ha = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, fsImage.getLastAppliedOrWrittenTxId());
    HeartbeatResponse response = new HeartbeatResponse(cmds, ha, null, ThreadLocalRandom.current().nextLong() | 1L);
    doReturn(response).when(spyNN).sendHeartbeat((DatanodeRegistration) any(), (StorageReport[]) any(), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), (VolumeFailureSummary) any(), anyBoolean(), any(SlowPeerReports.class));
}
Also used : HeartbeatResponse(org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse) NNHAStatusHeartbeat(org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat) StorageReport(org.apache.hadoop.hdfs.server.protocol.StorageReport) SlowPeerReports(org.apache.hadoop.hdfs.server.protocol.SlowPeerReports)

Aggregations

StorageReport (org.apache.hadoop.hdfs.server.protocol.StorageReport)16 DatanodeStorage (org.apache.hadoop.hdfs.server.protocol.DatanodeStorage)5 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)4 VolumeFailureSummary (org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary)4 IOException (java.io.IOException)3 Configuration (org.apache.hadoop.conf.Configuration)3 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)3 DatanodeRegistration (org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration)3 HeartbeatResponse (org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse)3 SlowPeerReports (org.apache.hadoop.hdfs.server.protocol.SlowPeerReports)3 Test (org.junit.Test)3 ServiceException (com.google.protobuf.ServiceException)2 ArrayList (java.util.ArrayList)2 Path (org.apache.hadoop.fs.Path)2 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)2 DatanodeCommand (org.apache.hadoop.hdfs.server.protocol.DatanodeCommand)2 DatanodeStorageReport (org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport)2 ReceivedDeletedBlockInfo (org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo)2 File (java.io.File)1 ClosedChannelException (java.nio.channels.ClosedChannelException)1