Search in sources :

Example 56 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestDFSAdmin method testGetDatanodeInfo.

@Test(timeout = 30000)
public void testGetDatanodeInfo() throws Exception {
    redirectStream();
    final DFSAdmin dfsAdmin = new DFSAdmin(conf);
    for (int i = 0; i < cluster.getDataNodes().size(); i++) {
        resetStream();
        final DataNode dn = cluster.getDataNodes().get(i);
        final String addr = String.format("%s:%d", dn.getXferAddress().getHostString(), dn.getIpcPort());
        final int ret = ToolRunner.run(dfsAdmin, new String[] { "-getDatanodeInfo", addr });
        assertEquals(0, ret);
        /* collect outputs */
        final List<String> outs = Lists.newArrayList();
        scanIntoList(out, outs);
        /* verify results */
        assertEquals("One line per DataNode like: Uptime: XXX, Software version: x.y.z," + " Config version: core-x.y.z,hdfs-x", 1, outs.size());
        assertThat(outs.get(0), is(allOf(containsString("Uptime:"), containsString("Software version"), containsString("Config version"))));
    }
}
Also used : DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Test(org.junit.Test)

Example 57 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestDFSAdmin method testReportCommand.

@Test(timeout = 120000)
public void testReportCommand() throws Exception {
    redirectStream();
    /* init conf */
    final Configuration dfsConf = new HdfsConfiguration();
    dfsConf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, // 0.5s
    500);
    dfsConf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
    final Path baseDir = new Path(PathUtils.getTestDir(getClass()).getAbsolutePath(), GenericTestUtils.getMethodName());
    dfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.toString());
    final int numDn = 3;
    /* init cluster */
    try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(dfsConf).numDataNodes(numDn).build()) {
        miniCluster.waitActive();
        assertEquals(numDn, miniCluster.getDataNodes().size());
        /* local vars */
        final DFSAdmin dfsAdmin = new DFSAdmin(dfsConf);
        final DFSClient client = miniCluster.getFileSystem().getClient();
        /* run and verify report command */
        resetStream();
        assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
        verifyNodesAndCorruptBlocks(numDn, numDn, 0, client);
        /* shut down one DN */
        final List<DataNode> datanodes = miniCluster.getDataNodes();
        final DataNode last = datanodes.get(datanodes.size() - 1);
        last.shutdown();
        miniCluster.setDataNodeDead(last.getDatanodeId());
        /* run and verify report command */
        assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
        verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, client);
        /* corrupt one block */
        final short replFactor = 1;
        final long fileLength = 512L;
        final FileSystem fs = miniCluster.getFileSystem();
        final Path file = new Path(baseDir, "/corrupted");
        DFSTestUtil.createFile(fs, file, fileLength, replFactor, 12345L);
        DFSTestUtil.waitReplication(fs, file, replFactor);
        final ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file);
        final int blockFilesCorrupted = miniCluster.corruptBlockOnDataNodes(block);
        assertEquals("Fail to corrupt all replicas for block " + block, replFactor, blockFilesCorrupted);
        try {
            IOUtils.copyBytes(fs.open(file), new IOUtils.NullOutputStream(), conf, true);
            fail("Should have failed to read the file with corrupted blocks.");
        } catch (ChecksumException ignored) {
        // expected exception reading corrupt blocks
        }
        /*
       * Increase replication factor, this should invoke transfer request.
       * Receiving datanode fails on checksum and reports it to namenode
       */
        fs.setReplication(file, (short) (replFactor + 1));
        /* get block details and check if the block is corrupt */
        GenericTestUtils.waitFor(new Supplier<Boolean>() {

            @Override
            public Boolean get() {
                LocatedBlocks blocks = null;
                try {
                    miniCluster.triggerBlockReports();
                    blocks = client.getNamenode().getBlockLocations(file.toString(), 0, Long.MAX_VALUE);
                } catch (IOException e) {
                    return false;
                }
                return blocks != null && blocks.get(0).isCorrupt();
            }
        }, 1000, 60000);
        BlockManagerTestUtil.updateState(miniCluster.getNameNode().getNamesystem().getBlockManager());
        /* run and verify report command */
        resetStream();
        assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
        verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, client);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DFSClient(org.apache.hadoop.hdfs.DFSClient) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) ChecksumException(org.apache.hadoop.fs.ChecksumException) StrBuilder(org.apache.commons.lang.text.StrBuilder) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) IOUtils(org.apache.hadoop.io.IOUtils) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.junit.Test)

Example 58 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestDecommission method testRecommission.

/**
   * Test that over-replicated blocks are deleted on recommission.
   */
@Test(timeout = 120000)
public void testRecommission() throws Exception {
    final int numDatanodes = 6;
    try {
        LOG.info("Starting test testRecommission");
        startCluster(1, numDatanodes);
        final Path file1 = new Path("testDecommission.dat");
        final int replicas = numDatanodes - 1;
        ArrayList<DatanodeInfo> decommissionedNodes = Lists.newArrayList();
        final FileSystem fileSys = getCluster().getFileSystem();
        // Write a file to n-1 datanodes
        writeFile(fileSys, file1, replicas);
        // Decommission one of the datanodes with a replica
        BlockLocation loc = fileSys.getFileBlockLocations(file1, 0, 1)[0];
        assertEquals("Unexpected number of replicas from getFileBlockLocations", replicas, loc.getHosts().length);
        final String toDecomHost = loc.getNames()[0];
        String toDecomUuid = null;
        for (DataNode d : getCluster().getDataNodes()) {
            if (d.getDatanodeId().getXferAddr().equals(toDecomHost)) {
                toDecomUuid = d.getDatanodeId().getDatanodeUuid();
                break;
            }
        }
        assertNotNull("Could not find a dn with the block!", toDecomUuid);
        final DatanodeInfo decomNode = takeNodeOutofService(0, toDecomUuid, 0, decommissionedNodes, AdminStates.DECOMMISSIONED);
        decommissionedNodes.add(decomNode);
        final BlockManager blockManager = getCluster().getNamesystem().getBlockManager();
        final DatanodeManager datanodeManager = blockManager.getDatanodeManager();
        BlockManagerTestUtil.recheckDecommissionState(datanodeManager);
        // Ensure decommissioned datanode is not automatically shutdown
        DFSClient client = getDfsClient(0);
        assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length);
        // wait for the block to be replicated
        final ExtendedBlock b = DFSTestUtil.getFirstBlock(fileSys, file1);
        final String uuid = toDecomUuid;
        GenericTestUtils.waitFor(new Supplier<Boolean>() {

            @Override
            public Boolean get() {
                BlockInfo info = blockManager.getStoredBlock(b.getLocalBlock());
                int count = 0;
                StringBuilder sb = new StringBuilder("Replica locations: ");
                for (int i = 0; i < info.numNodes(); i++) {
                    DatanodeDescriptor dn = info.getDatanode(i);
                    sb.append(dn + ", ");
                    if (!dn.getDatanodeUuid().equals(uuid)) {
                        count++;
                    }
                }
                LOG.info(sb.toString());
                LOG.info("Count: " + count);
                return count == replicas;
            }
        }, 500, 30000);
        // redecommission and wait for over-replication to be fixed
        putNodeInService(0, decomNode);
        BlockManagerTestUtil.recheckDecommissionState(datanodeManager);
        DFSTestUtil.waitForReplication(getCluster(), b, 1, replicas, 0);
        cleanupFile(fileSys, file1);
    } finally {
        shutdownCluster();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) BlockLocation(org.apache.hadoop.fs.BlockLocation) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.junit.Test)

Example 59 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestDecommissionWithStriped method testDecommissionWithURBlockForSameBlockGroup.

@Test(timeout = 120000)
public void testDecommissionWithURBlockForSameBlockGroup() throws Exception {
    LOG.info("Starting test testDecommissionWithURBlocksForSameBlockGroup");
    final Path ecFile = new Path(ecDir, "testDecommissionWithCorruptBlocks");
    int writeBytes = cellSize * dataBlocks * 2;
    writeStripedFile(dfs, ecFile, writeBytes);
    Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks());
    final List<DatanodeInfo> decommisionNodes = new ArrayList<DatanodeInfo>();
    LocatedBlock lb = dfs.getClient().getLocatedBlocks(ecFile.toString(), 0).get(0);
    DatanodeInfo[] dnLocs = lb.getLocations();
    assertEquals(dataBlocks + parityBlocks, dnLocs.length);
    int decommNodeIndex = dataBlocks - 1;
    int stopNodeIndex = 1;
    // add the nodes which will be decommissioning
    decommisionNodes.add(dnLocs[decommNodeIndex]);
    // stop excess dns to avoid immediate reconstruction.
    DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
    List<DataNodeProperties> stoppedDns = new ArrayList<>();
    for (DatanodeInfo liveDn : info) {
        boolean usedNode = false;
        for (DatanodeInfo datanodeInfo : dnLocs) {
            if (liveDn.getXferAddr().equals(datanodeInfo.getXferAddr())) {
                usedNode = true;
                break;
            }
        }
        if (!usedNode) {
            DataNode dn = cluster.getDataNode(liveDn.getIpcPort());
            stoppedDns.add(cluster.stopDataNode(liveDn.getXferAddr()));
            cluster.setDataNodeDead(dn.getDatanodeId());
            LOG.info("stop datanode " + dn.getDatanodeId().getHostName());
        }
    }
    DataNode dn = cluster.getDataNode(dnLocs[stopNodeIndex].getIpcPort());
    cluster.stopDataNode(dnLocs[stopNodeIndex].getXferAddr());
    cluster.setDataNodeDead(dn.getDatanodeId());
    numDNs = numDNs - 1;
    // Decommission node in a new thread. Verify that node is decommissioned.
    final CountDownLatch decomStarted = new CountDownLatch(0);
    Thread decomTh = new Thread() {

        public void run() {
            try {
                decomStarted.countDown();
                decommissionNode(0, decommisionNodes, AdminStates.DECOMMISSIONED);
            } catch (Exception e) {
                LOG.error("Exception while decommissioning", e);
                Assert.fail("Shouldn't throw exception!");
            }
        }

        ;
    };
    int deadDecomissioned = fsn.getNumDecomDeadDataNodes();
    int liveDecomissioned = fsn.getNumDecomLiveDataNodes();
    decomTh.start();
    decomStarted.await(5, TimeUnit.SECONDS);
    // grace period to trigger decommissioning call
    Thread.sleep(3000);
    // start datanode so that decommissioning live node will be finished
    for (DataNodeProperties dnp : stoppedDns) {
        cluster.restartDataNode(dnp);
        LOG.info("Restarts stopped datanode:{} to trigger block reconstruction", dnp.datanode);
    }
    cluster.waitActive();
    LOG.info("Waiting to finish decommissioning node:{}", decommisionNodes);
    // waiting 20secs to finish decommission
    decomTh.join(20000);
    LOG.info("Finished decommissioning node:{}", decommisionNodes);
    assertEquals(deadDecomissioned, fsn.getNumDecomDeadDataNodes());
    assertEquals(liveDecomissioned + decommisionNodes.size(), fsn.getNumDecomLiveDataNodes());
    // Ensure decommissioned datanode is not automatically shutdown
    DFSClient client = getDfsClient(cluster.getNameNode(0), conf);
    assertEquals("All datanodes must be alive", numDNs, client.datanodeReport(DatanodeReportType.LIVE).length);
    assertNull(checkFile(dfs, ecFile, 9, decommisionNodes, numDNs));
    StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommisionNodes, null, blockGroupSize);
    cleanupFile(dfs, ecFile);
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) ArrayList(java.util.ArrayList) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) CountDownLatch(java.util.concurrent.CountDownLatch) IOException(java.io.IOException) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Test(org.junit.Test)

Example 60 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestDistributedFileSystem method testLocatedFileStatusStorageIdsTypes.

@Test(timeout = 120000)
public void testLocatedFileStatusStorageIdsTypes() throws Exception {
    final Configuration conf = getTestConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    try {
        final DistributedFileSystem fs = cluster.getFileSystem();
        final Path testFile = new Path("/testListLocatedStatus");
        final int blockSize = 4096;
        final int numBlocks = 10;
        // Create a test file
        final int repl = 2;
        DFSTestUtil.createFile(fs, testFile, blockSize, numBlocks * blockSize, blockSize, (short) repl, 0xADDED);
        DFSTestUtil.waitForReplication(fs, testFile, (short) repl, 30000);
        // Get the listing
        RemoteIterator<LocatedFileStatus> it = fs.listLocatedStatus(testFile);
        assertTrue("Expected file to be present", it.hasNext());
        LocatedFileStatus stat = it.next();
        BlockLocation[] locs = stat.getBlockLocations();
        assertEquals("Unexpected number of locations", numBlocks, locs.length);
        Set<String> dnStorageIds = new HashSet<>();
        for (DataNode d : cluster.getDataNodes()) {
            try (FsDatasetSpi.FsVolumeReferences volumes = d.getFSDataset().getFsVolumeReferences()) {
                for (FsVolumeSpi vol : volumes) {
                    dnStorageIds.add(vol.getStorageID());
                }
            }
        }
        for (BlockLocation loc : locs) {
            String[] ids = loc.getStorageIds();
            // Run it through a set to deduplicate, since there should be no dupes
            Set<String> storageIds = new HashSet<>();
            Collections.addAll(storageIds, ids);
            assertEquals("Unexpected num storage ids", repl, storageIds.size());
            // Make sure these are all valid storage IDs
            assertTrue("Unknown storage IDs found!", dnStorageIds.containsAll(storageIds));
            // Check storage types are the default, since we didn't set any
            StorageType[] types = loc.getStorageTypes();
            assertEquals("Unexpected num storage types", repl, types.length);
            for (StorageType t : types) {
                assertEquals("Unexpected storage type", StorageType.DEFAULT, t);
            }
        }
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StorageType(org.apache.hadoop.fs.StorageType) Configuration(org.apache.hadoop.conf.Configuration) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) BlockLocation(org.apache.hadoop.fs.BlockLocation) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)165 Test (org.junit.Test)110 Path (org.apache.hadoop.fs.Path)78 Configuration (org.apache.hadoop.conf.Configuration)60 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)47 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)37 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)37 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)35 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)29 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)28 FileSystem (org.apache.hadoop.fs.FileSystem)27 IOException (java.io.IOException)24 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)20 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)20 ArrayList (java.util.ArrayList)17 DiskBalancerDataNode (org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerDataNode)17 File (java.io.File)15 FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)14 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)13 FsDatasetSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi)12