Search in sources :

Example 41 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestFsck method testFsckMissingECFile.

@Test(timeout = 300000)
public void testFsckMissingECFile() throws Exception {
    DistributedFileSystem fs = null;
    int dataBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumDataUnits();
    int parityBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumParityUnits();
    int cellSize = StripedFileTestUtil.getDefaultECPolicy().getCellSize();
    int totalSize = dataBlocks + parityBlocks;
    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(totalSize).build();
    fs = cluster.getFileSystem();
    // create file
    Path ecDirPath = new Path("/striped");
    fs.mkdir(ecDirPath, FsPermission.getDirDefault());
    fs.getClient().setErasureCodingPolicy(ecDirPath.toString(), StripedFileTestUtil.getDefaultECPolicy().getName());
    Path file = new Path(ecDirPath, "missing");
    final int length = cellSize * dataBlocks;
    final byte[] bytes = StripedFileTestUtil.generateBytes(length);
    DFSTestUtil.writeFile(fs, file, bytes);
    // make an unrecoverable ec file with missing blocks
    ArrayList<DataNode> dns = cluster.getDataNodes();
    DatanodeID dnId;
    for (int i = 0; i < parityBlocks + 1; i++) {
        dnId = dns.get(i).getDatanodeId();
        cluster.stopDataNode(dnId.getXferAddr());
        cluster.setDataNodeDead(dnId);
    }
    waitForUnrecoverableBlockGroup(conf);
    String outStr = runFsck(conf, 1, true, "/", "-files", "-blocks", "-locations");
    assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
    assertTrue(outStr.contains("Live_repl=" + (dataBlocks - 1)));
    assertTrue(outStr.contains("Under-erasure-coded block groups:\t0"));
    outStr = runFsck(conf, -1, true, "/", "-list-corruptfileblocks");
    assertTrue(outStr.contains("has 1 CORRUPT files"));
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Matchers.anyString(org.mockito.Matchers.anyString) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Test(org.junit.Test)

Example 42 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestListCorruptFileBlocks method testMaxCorruptFiles.

/**
   * Test if NN.listCorruptFiles() returns the right number of results.
   * The corrupt blocks are detected by the BlockPoolSliceScanner.
   * Also, test that DFS.listCorruptFileBlocks can make multiple successive
   * calls.
   */
@Test(timeout = 300000)
public void testMaxCorruptFiles() throws Exception {
    MiniDFSCluster cluster = null;
    try {
        Configuration conf = new HdfsConfiguration();
        // datanode sends block reports
        conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000);
        cluster = new MiniDFSCluster.Builder(conf).build();
        FileSystem fs = cluster.getFileSystem();
        final int maxCorruptFileBlocks = FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED;
        // create 110 files with one block each
        DFSTestUtil util = new DFSTestUtil.Builder().setName("testMaxCorruptFiles").setNumFiles(maxCorruptFileBlocks * 3).setMaxLevels(1).setMaxSize(512).build();
        util.createFiles(fs, "/srcdat2", (short) 1);
        util.waitReplication(fs, "/srcdat2", (short) 1);
        // verify that there are no bad blocks.
        final NameNode namenode = cluster.getNameNode();
        Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
        assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting none.", badFiles.size() == 0);
        // Now deliberately blocks from all files
        final String bpid = cluster.getNamesystem().getBlockPoolId();
        for (int i = 0; i < 4; i++) {
            for (int j = 0; j <= 1; j++) {
                File storageDir = cluster.getInstanceStorageDir(i, j);
                File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
                LOG.info("Removing files from " + data_dir);
                List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
                if (metadataFiles == null)
                    continue;
                for (File metadataFile : metadataFiles) {
                    File blockFile = Block.metaToBlockFile(metadataFile);
                    assertTrue("Cannot remove file.", blockFile.delete());
                    assertTrue("Cannot remove file.", metadataFile.delete());
                }
            }
        }
        // Run the direcrtoryScanner to update the Datanodes volumeMap
        DataNode dn = cluster.getDataNodes().get(0);
        DataNodeTestUtils.runDirectoryScanner(dn);
        // Occasionally the BlockPoolSliceScanner can run before we have removed
        // the blocks. Restart the Datanode to trigger the scanner into running
        // once more.
        LOG.info("Restarting Datanode to trigger BlockPoolSliceScanner");
        cluster.restartDataNodes();
        cluster.waitActive();
        badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
        while (badFiles.size() < maxCorruptFileBlocks) {
            LOG.info("# of corrupt files is: " + badFiles.size());
            Thread.sleep(10000);
            badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
        }
        badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
        LOG.info("Namenode has bad files. " + badFiles.size());
        assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + maxCorruptFileBlocks + ".", badFiles.size() == maxCorruptFileBlocks);
        CorruptFileBlockIterator iter = (CorruptFileBlockIterator) fs.listCorruptFileBlocks(new Path("/srcdat2"));
        int corruptPaths = countPaths(iter);
        assertTrue("Expected more than " + maxCorruptFileBlocks + " corrupt file blocks but got " + corruptPaths, corruptPaths > maxCorruptFileBlocks);
        assertTrue("Iterator should have made more than 1 call but made " + iter.getCallsMade(), iter.getCallsMade() > 1);
        util.cleanup(fs, "/srcdat2");
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DFSTestUtil(org.apache.hadoop.hdfs.DFSTestUtil) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) CorruptFileBlockIterator(org.apache.hadoop.hdfs.client.impl.CorruptFileBlockIterator) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Example 43 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestNameNodeMXBean method testDecommissioningNodes.

@Test(timeout = 120000)
public void testDecommissioningNodes() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 30);
    MiniDFSCluster cluster = null;
    HostsFileWriter hostsFileWriter = new HostsFileWriter();
    hostsFileWriter.initialize(conf, "temp/TestNameNodeMXBean");
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
        cluster.waitActive();
        FSNamesystem fsn = cluster.getNameNode().namesystem;
        MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
        ObjectName mxbeanName = new ObjectName("Hadoop:service=NameNode,name=NameNodeInfo");
        List<String> hosts = new ArrayList<>();
        for (DataNode dn : cluster.getDataNodes()) {
            hosts.add(dn.getDisplayName());
        }
        hostsFileWriter.initIncludeHosts(hosts.toArray(new String[hosts.size()]));
        fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
        // 1. Verify Live nodes
        String liveNodesInfo = (String) (mbs.getAttribute(mxbeanName, "LiveNodes"));
        Map<String, Map<String, Object>> liveNodes = (Map<String, Map<String, Object>>) JSON.parse(liveNodesInfo);
        assertEquals(fsn.getLiveNodes(), liveNodesInfo);
        assertEquals(fsn.getNumLiveDataNodes(), liveNodes.size());
        for (Map<String, Object> liveNode : liveNodes.values()) {
            assertTrue(liveNode.containsKey("lastContact"));
            assertTrue(liveNode.containsKey("xferaddr"));
        }
        // Add the 1st DataNode to Decommission list
        hostsFileWriter.initExcludeHost(cluster.getDataNodes().get(0).getDisplayName());
        fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
        // Wait for the DecommissionManager to complete refresh nodes
        GenericTestUtils.waitFor(new Supplier<Boolean>() {

            @Override
            public Boolean get() {
                try {
                    String decomNodesInfo = (String) (mbs.getAttribute(mxbeanName, "DecomNodes"));
                    Map<String, Map<String, Object>> decomNodes = (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
                    if (decomNodes.size() > 0) {
                        return true;
                    }
                } catch (Exception e) {
                    return false;
                }
                return false;
            }
        }, 1000, 60000);
        // 2. Verify Decommission InProgress nodes
        String decomNodesInfo = (String) (mbs.getAttribute(mxbeanName, "DecomNodes"));
        Map<String, Map<String, Object>> decomNodes = (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
        assertEquals(fsn.getDecomNodes(), decomNodesInfo);
        assertEquals(fsn.getNumDecommissioningDataNodes(), decomNodes.size());
        assertEquals(0, fsn.getNumDecomLiveDataNodes());
        assertEquals(0, fsn.getNumDecomDeadDataNodes());
        // Wait for the DecommissionManager to complete check
        GenericTestUtils.waitFor(new Supplier<Boolean>() {

            @Override
            public Boolean get() {
                if (fsn.getNumDecomLiveDataNodes() == 1) {
                    return true;
                }
                return false;
            }
        }, 1000, 60000);
        // 3. Verify Decommissioned nodes
        decomNodesInfo = (String) (mbs.getAttribute(mxbeanName, "DecomNodes"));
        decomNodes = (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
        assertEquals(0, decomNodes.size());
        assertEquals(fsn.getDecomNodes(), decomNodesInfo);
        assertEquals(1, fsn.getNumDecomLiveDataNodes());
        assertEquals(0, fsn.getNumDecomDeadDataNodes());
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
        hostsFileWriter.cleanup();
    }
}
Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) HostsFileWriter(org.apache.hadoop.hdfs.util.HostsFileWriter) ArrayList(java.util.ArrayList) BindException(java.net.BindException) IOException(java.io.IOException) ObjectName(javax.management.ObjectName) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Map(java.util.Map) HashMap(java.util.HashMap) MBeanServer(javax.management.MBeanServer) Test(org.junit.Test)

Example 44 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class DFSTestUtil method addBlockToFile.

/**
   * Adds a block or a striped block group to a file.
   * This method only manipulates NameNode
   * states of the file and the block without injecting data to DataNode.
   * It does mimic block reports.
   * You should disable periodical heartbeat before use this.
   * @param isStripedBlock a boolean tell if the block added a striped block
   * @param dataNodes List DataNodes to host the striped block group
   * @param previous Previous block in the file
   * @param numStripes Number of stripes in each block group
   * @param len block size for a non striped block added
   * @return The added block or block group
   */
public static Block addBlockToFile(boolean isStripedBlock, List<DataNode> dataNodes, DistributedFileSystem fs, FSNamesystem ns, String file, INodeFile fileNode, String clientName, ExtendedBlock previous, int numStripes, int len) throws Exception {
    fs.getClient().namenode.addBlock(file, clientName, previous, null, fileNode.getId(), null, null);
    final BlockInfo lastBlock = fileNode.getLastBlock();
    final int groupSize = fileNode.getPreferredBlockReplication();
    assert dataNodes.size() >= groupSize;
    // 1. RECEIVING_BLOCK IBR
    for (int i = 0; i < groupSize; i++) {
        DataNode dn = dataNodes.get(i);
        final Block block = new Block(lastBlock.getBlockId() + i, 0, lastBlock.getGenerationStamp());
        DatanodeStorage storage = new DatanodeStorage(UUID.randomUUID().toString());
        StorageReceivedDeletedBlocks[] reports = DFSTestUtil.makeReportForReceivedBlock(block, ReceivedDeletedBlockInfo.BlockStatus.RECEIVING_BLOCK, storage);
        for (StorageReceivedDeletedBlocks report : reports) {
            ns.processIncrementalBlockReport(dn.getDatanodeId(), report);
        }
    }
    final ErasureCodingPolicy ecPolicy = fs.getErasureCodingPolicy(new Path(file));
    // 2. RECEIVED_BLOCK IBR
    long blockSize = isStripedBlock ? numStripes * ecPolicy.getCellSize() : len;
    for (int i = 0; i < groupSize; i++) {
        DataNode dn = dataNodes.get(i);
        final Block block = new Block(lastBlock.getBlockId() + i, blockSize, lastBlock.getGenerationStamp());
        DatanodeStorage storage = new DatanodeStorage(UUID.randomUUID().toString());
        StorageReceivedDeletedBlocks[] reports = DFSTestUtil.makeReportForReceivedBlock(block, ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, storage);
        for (StorageReceivedDeletedBlocks report : reports) {
            ns.processIncrementalBlockReport(dn.getDatanodeId(), report);
        }
    }
    long bytes = isStripedBlock ? numStripes * ecPolicy.getCellSize() * ecPolicy.getNumDataUnits() : len;
    lastBlock.setNumBytes(bytes);
    return lastBlock;
}
Also used : Path(org.apache.hadoop.fs.Path) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) ReceivedDeletedBlockInfo(org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) DatanodeStorage(org.apache.hadoop.hdfs.server.protocol.DatanodeStorage) ErasureCodingPolicy(org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy) Block(org.apache.hadoop.hdfs.protocol.Block) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) StorageReceivedDeletedBlocks(org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks)

Example 45 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestLeaseRecovery2 method hardLeaseRecoveryRestartHelper.

public void hardLeaseRecoveryRestartHelper(boolean doRename, int size) throws Exception {
    if (size < 0) {
        size = AppendTestUtil.nextInt(FILE_SIZE + 1);
    }
    //create a file
    String fileStr = "/hardLeaseRecovery";
    AppendTestUtil.LOG.info("filestr=" + fileStr);
    Path filePath = new Path(fileStr);
    FSDataOutputStream stm = dfs.create(filePath, true, BUF_SIZE, REPLICATION_NUM, BLOCK_SIZE);
    assertTrue(dfs.dfs.exists(fileStr));
    // write bytes into the file.
    AppendTestUtil.LOG.info("size=" + size);
    stm.write(buffer, 0, size);
    String originalLeaseHolder = NameNodeAdapter.getLeaseHolderForPath(cluster.getNameNode(), fileStr);
    assertFalse("original lease holder should not be the NN", originalLeaseHolder.equals(HdfsServerConstants.NAMENODE_LEASE_HOLDER));
    // hflush file
    AppendTestUtil.LOG.info("hflush");
    stm.hflush();
    // check visible length
    final HdfsDataInputStream in = (HdfsDataInputStream) dfs.open(filePath);
    Assert.assertEquals(size, in.getVisibleLength());
    in.close();
    if (doRename) {
        fileStr += ".renamed";
        Path renamedPath = new Path(fileStr);
        assertTrue(dfs.rename(filePath, renamedPath));
        filePath = renamedPath;
    }
    // kill the lease renewal thread
    AppendTestUtil.LOG.info("leasechecker.interruptAndJoin()");
    dfs.dfs.getLeaseRenewer().interruptAndJoin();
    // won't actually get completed during lease recovery.
    for (DataNode dn : cluster.getDataNodes()) {
        DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true);
    }
    // set the hard limit to be 1 second 
    cluster.setLeasePeriod(LONG_LEASE_PERIOD, SHORT_LEASE_PERIOD);
    // Make sure lease recovery begins.
    final String path = fileStr;
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            return HdfsServerConstants.NAMENODE_LEASE_HOLDER.equals(NameNodeAdapter.getLeaseHolderForPath(cluster.getNameNode(), path));
        }
    }, (int) SHORT_LEASE_PERIOD, (int) SHORT_LEASE_PERIOD * 10);
    // Normally, the in-progress edit log would be finalized by
    // FSEditLog#endCurrentLogSegment.  For testing purposes, we
    // disable that here.
    FSEditLog spyLog = spy(cluster.getNameNode().getFSImage().getEditLog());
    doNothing().when(spyLog).endCurrentLogSegment(Mockito.anyBoolean());
    DFSTestUtil.setEditLogForTesting(cluster.getNamesystem(), spyLog);
    cluster.restartNameNode(false);
    checkLease(fileStr, size);
    // Let the DNs send heartbeats again.
    for (DataNode dn : cluster.getDataNodes()) {
        DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, false);
    }
    cluster.waitActive();
    // set the hard limit to be 1 second, to initiate lease recovery. 
    cluster.setLeasePeriod(LONG_LEASE_PERIOD, SHORT_LEASE_PERIOD);
    // wait for lease recovery to complete
    LocatedBlocks locatedBlocks;
    do {
        Thread.sleep(SHORT_LEASE_PERIOD);
        locatedBlocks = dfs.dfs.getLocatedBlocks(fileStr, 0L, size);
    } while (locatedBlocks.isUnderConstruction());
    assertEquals(size, locatedBlocks.getFileLength());
    // make sure that the client can't write data anymore.
    try {
        stm.write('b');
        stm.hflush();
        fail("Should not be able to flush after we've lost the lease");
    } catch (IOException e) {
        LOG.info("Expceted exception on write/hflush", e);
    }
    try {
        stm.close();
        fail("Should not be able to close after we've lost the lease");
    } catch (IOException e) {
        LOG.info("Expected exception on close", e);
    }
    // verify data
    AppendTestUtil.LOG.info("File size is good. Now validating sizes from datanodes...");
    AppendTestUtil.checkFullFile(dfs, filePath, size, buffer, fileStr);
}
Also used : Path(org.apache.hadoop.fs.Path) FSEditLog(org.apache.hadoop.hdfs.server.namenode.FSEditLog) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) HdfsDataInputStream(org.apache.hadoop.hdfs.client.HdfsDataInputStream)

Aggregations

DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)165 Test (org.junit.Test)110 Path (org.apache.hadoop.fs.Path)78 Configuration (org.apache.hadoop.conf.Configuration)60 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)47 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)37 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)37 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)35 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)29 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)28 FileSystem (org.apache.hadoop.fs.FileSystem)27 IOException (java.io.IOException)24 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)20 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)20 ArrayList (java.util.ArrayList)17 DiskBalancerDataNode (org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerDataNode)17 File (java.io.File)15 FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)14 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)13 FsDatasetSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi)12