Search in sources :

Example 6 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestDecommissioningStatus method testDecommissionStatusAfterDNRestart.

/**
   * Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked
   * as dead before decommission has completed. That will allow DN to resume
   * the replication process after it rejoins the cluster.
   */
@Test(timeout = 120000)
public void testDecommissionStatusAfterDNRestart() throws Exception {
    DistributedFileSystem fileSys = (DistributedFileSystem) cluster.getFileSystem();
    // Create a file with one block. That block has one replica.
    Path f = new Path("decommission.dat");
    DFSTestUtil.createFile(fileSys, f, fileSize, fileSize, fileSize, (short) 1, seed);
    // Find the DN that owns the only replica.
    RemoteIterator<LocatedFileStatus> fileList = fileSys.listLocatedStatus(f);
    BlockLocation[] blockLocations = fileList.next().getBlockLocations();
    String dnName = blockLocations[0].getNames()[0];
    // Decommission the DN.
    FSNamesystem fsn = cluster.getNamesystem();
    final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
    decommissionNode(dnName);
    dm.refreshNodes(conf);
    // Stop the DN when decommission is in progress.
    // Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of
    // the block, it will take much longer time that test timeout value for
    // the decommission to complete. So when stopDataNode is called,
    // decommission should be in progress.
    DataNodeProperties dataNodeProperties = cluster.stopDataNode(dnName);
    final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
    while (true) {
        dm.fetchDatanodes(null, dead, false);
        if (dead.size() == 1) {
            break;
        }
        Thread.sleep(1000);
    }
    // Force removal of the dead node's blocks.
    BlockManagerTestUtil.checkHeartbeat(fsn.getBlockManager());
    // Force DatanodeManager to check decommission state.
    BlockManagerTestUtil.recheckDecommissionState(dm);
    // Verify that the DN remains in DECOMMISSION_INPROGRESS state.
    assertTrue("the node should be DECOMMISSION_IN_PROGRESSS", dead.get(0).isDecommissionInProgress());
    // Check DatanodeManager#getDecommissionNodes, make sure it returns
    // the node as decommissioning, even if it's dead
    List<DatanodeDescriptor> decomlist = dm.getDecommissioningNodes();
    assertTrue("The node should be be decommissioning", decomlist.size() == 1);
    // Delete the under-replicated file, which should let the 
    // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED
    AdminStatesBaseTest.cleanupFile(fileSys, f);
    BlockManagerTestUtil.recheckDecommissionState(dm);
    assertTrue("the node should be decommissioned", dead.get(0).isDecommissioned());
    // Add the node back
    cluster.restartDataNode(dataNodeProperties, true);
    cluster.waitActive();
    // Call refreshNodes on FSNamesystem with empty exclude file.
    // This will remove the datanodes from decommissioning list and
    // make them available again.
    hostsFileWriter.initExcludeHost("");
    dm.refreshNodes(conf);
}
Also used : Path(org.apache.hadoop.fs.Path) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) BlockLocation(org.apache.hadoop.fs.BlockLocation) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) Test(org.junit.Test) AdminStatesBaseTest(org.apache.hadoop.hdfs.AdminStatesBaseTest)

Example 7 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestDecommissioningStatus method testDecommissionDeadDN.

/**
   * Verify the support for decommissioning a datanode that is already dead.
   * Under this scenario the datanode should immediately be marked as
   * DECOMMISSIONED
   */
@Test(timeout = 120000)
public void testDecommissionDeadDN() throws Exception {
    Logger log = Logger.getLogger(DecommissionManager.class);
    log.setLevel(Level.DEBUG);
    DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId();
    String dnName = dnID.getXferAddr();
    DataNodeProperties stoppedDN = cluster.stopDataNode(0);
    DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(), false, 30000);
    FSNamesystem fsn = cluster.getNamesystem();
    final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
    DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
    decommissionNode(dnName);
    dm.refreshNodes(conf);
    BlockManagerTestUtil.recheckDecommissionState(dm);
    assertTrue(dnDescriptor.isDecommissioned());
    // Add the node back
    cluster.restartDataNode(stoppedDN, true);
    cluster.waitActive();
    // Call refreshNodes on FSNamesystem with empty exclude file to remove the
    // datanode from decommissioning list and make it available again.
    hostsFileWriter.initExcludeHost("");
    dm.refreshNodes(conf);
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) Logger(org.apache.log4j.Logger) Test(org.junit.Test) AdminStatesBaseTest(org.apache.hadoop.hdfs.AdminStatesBaseTest)

Example 8 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestDecommissionWithStriped method testDecommissionWithURBlockForSameBlockGroup.

@Test(timeout = 120000)
public void testDecommissionWithURBlockForSameBlockGroup() throws Exception {
    LOG.info("Starting test testDecommissionWithURBlocksForSameBlockGroup");
    final Path ecFile = new Path(ecDir, "testDecommissionWithCorruptBlocks");
    int writeBytes = cellSize * dataBlocks * 2;
    writeStripedFile(dfs, ecFile, writeBytes);
    Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks());
    final List<DatanodeInfo> decommisionNodes = new ArrayList<DatanodeInfo>();
    LocatedBlock lb = dfs.getClient().getLocatedBlocks(ecFile.toString(), 0).get(0);
    DatanodeInfo[] dnLocs = lb.getLocations();
    assertEquals(dataBlocks + parityBlocks, dnLocs.length);
    int decommNodeIndex = dataBlocks - 1;
    int stopNodeIndex = 1;
    // add the nodes which will be decommissioning
    decommisionNodes.add(dnLocs[decommNodeIndex]);
    // stop excess dns to avoid immediate reconstruction.
    DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
    List<DataNodeProperties> stoppedDns = new ArrayList<>();
    for (DatanodeInfo liveDn : info) {
        boolean usedNode = false;
        for (DatanodeInfo datanodeInfo : dnLocs) {
            if (liveDn.getXferAddr().equals(datanodeInfo.getXferAddr())) {
                usedNode = true;
                break;
            }
        }
        if (!usedNode) {
            DataNode dn = cluster.getDataNode(liveDn.getIpcPort());
            stoppedDns.add(cluster.stopDataNode(liveDn.getXferAddr()));
            cluster.setDataNodeDead(dn.getDatanodeId());
            LOG.info("stop datanode " + dn.getDatanodeId().getHostName());
        }
    }
    DataNode dn = cluster.getDataNode(dnLocs[stopNodeIndex].getIpcPort());
    cluster.stopDataNode(dnLocs[stopNodeIndex].getXferAddr());
    cluster.setDataNodeDead(dn.getDatanodeId());
    numDNs = numDNs - 1;
    // Decommission node in a new thread. Verify that node is decommissioned.
    final CountDownLatch decomStarted = new CountDownLatch(0);
    Thread decomTh = new Thread() {

        public void run() {
            try {
                decomStarted.countDown();
                decommissionNode(0, decommisionNodes, AdminStates.DECOMMISSIONED);
            } catch (Exception e) {
                LOG.error("Exception while decommissioning", e);
                Assert.fail("Shouldn't throw exception!");
            }
        }

        ;
    };
    int deadDecomissioned = fsn.getNumDecomDeadDataNodes();
    int liveDecomissioned = fsn.getNumDecomLiveDataNodes();
    decomTh.start();
    decomStarted.await(5, TimeUnit.SECONDS);
    // grace period to trigger decommissioning call
    Thread.sleep(3000);
    // start datanode so that decommissioning live node will be finished
    for (DataNodeProperties dnp : stoppedDns) {
        cluster.restartDataNode(dnp);
        LOG.info("Restarts stopped datanode:{} to trigger block reconstruction", dnp.datanode);
    }
    cluster.waitActive();
    LOG.info("Waiting to finish decommissioning node:{}", decommisionNodes);
    // waiting 20secs to finish decommission
    decomTh.join(20000);
    LOG.info("Finished decommissioning node:{}", decommisionNodes);
    assertEquals(deadDecomissioned, fsn.getNumDecomDeadDataNodes());
    assertEquals(liveDecomissioned + decommisionNodes.size(), fsn.getNumDecomLiveDataNodes());
    // Ensure decommissioned datanode is not automatically shutdown
    DFSClient client = getDfsClient(cluster.getNameNode(0), conf);
    assertEquals("All datanodes must be alive", numDNs, client.datanodeReport(DatanodeReportType.LIVE).length);
    assertNull(checkFile(dfs, ecFile, 9, decommisionNodes, numDNs));
    StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommisionNodes, null, blockGroupSize);
    cleanupFile(dfs, ecFile);
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) ArrayList(java.util.ArrayList) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) CountDownLatch(java.util.concurrent.CountDownLatch) IOException(java.io.IOException) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Test(org.junit.Test)

Example 9 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestDataTransferKeepalive method testSlowReader.

/**
   * Test for the case where the client beings to read a long block, but doesn't
   * read bytes off the stream quickly. The datanode should time out sending the
   * chunks and the transceiver should die, even if it has a long keepalive.
   */
@Test(timeout = 300000)
public void testSlowReader() throws Exception {
    // Set a client socket cache expiry time much longer than 
    // the datanode-side expiration time.
    final long CLIENT_EXPIRY_MS = 600000L;
    Configuration clientConf = new Configuration(conf);
    clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
    clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader");
    DistributedFileSystem fs = (DistributedFileSystem) FileSystem.get(cluster.getURI(), clientConf);
    // Restart the DN with a shorter write timeout.
    DataNodeProperties props = cluster.stopDataNode(0);
    props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY, WRITE_TIMEOUT);
    props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY, 120000);
    assertTrue(cluster.restartDataNode(props, true));
    dn = cluster.getDataNodes().get(0);
    // Wait for heartbeats to avoid a startup race where we
    // try to write the block while the DN is still starting.
    cluster.triggerHeartbeats();
    DFSTestUtil.createFile(fs, TEST_FILE, 1024 * 1024 * 8L, (short) 1, 0L);
    FSDataInputStream stm = fs.open(TEST_FILE);
    stm.read();
    assertXceiverCount(1);
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        public Boolean get() {
            // the xceiver to exit.
            return getXceiverCountWithoutServer() == 0;
        }
    }, 500, 50000);
    IOUtils.closeStream(stm);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 10 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestDataNodeMultipleRegistrations method testDNWithInvalidStorageWithHA.

@Test
public void testDNWithInvalidStorageWithHA() throws Exception {
    MiniDFSNNTopology top = new MiniDFSNNTopology().addNameservice(new MiniDFSNNTopology.NSConf("ns1").addNN(new MiniDFSNNTopology.NNConf("nn0").setClusterId("cluster-1")).addNN(new MiniDFSNNTopology.NNConf("nn1").setClusterId("cluster-1")));
    top.setFederation(true);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(top).numDataNodes(0).build();
    try {
        cluster.startDataNodes(conf, 1, true, null, null);
        // let the initialization be complete
        Thread.sleep(10000);
        DataNode dn = cluster.getDataNodes().get(0);
        assertTrue("Datanode should be running", dn.isDatanodeUp());
        assertEquals("BPOfferService should be running", 1, dn.getAllBpOs().size());
        DataNodeProperties dnProp = cluster.stopDataNode(0);
        cluster.getNameNode(0).stop();
        cluster.getNameNode(1).stop();
        Configuration nn1 = cluster.getConfiguration(0);
        Configuration nn2 = cluster.getConfiguration(1);
        // setting up invalid cluster
        StartupOption.FORMAT.setClusterId("cluster-2");
        DFSTestUtil.formatNameNode(nn1);
        MiniDFSCluster.copyNameDirs(FSNamesystem.getNamespaceDirs(nn1), FSNamesystem.getNamespaceDirs(nn2), nn2);
        cluster.restartNameNode(0, false);
        cluster.restartNameNode(1, false);
        cluster.restartDataNode(dnProp);
        // let the initialization be complete
        Thread.sleep(10000);
        dn = cluster.getDataNodes().get(0);
        assertFalse("Datanode should have shutdown as only service failed", dn.isDatanodeUp());
    } finally {
        cluster.shutdown();
    }
}
Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) MiniDFSNNTopology(org.apache.hadoop.hdfs.MiniDFSNNTopology) Test(org.junit.Test)

Aggregations

DataNodeProperties (org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties)24 Test (org.junit.Test)21 Path (org.apache.hadoop.fs.Path)12 Configuration (org.apache.hadoop.conf.Configuration)11 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)6 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)3 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)3 File (java.io.File)2 IOException (java.io.IOException)2 OutputStream (java.io.OutputStream)2 ArrayList (java.util.ArrayList)2 BlockLocation (org.apache.hadoop.fs.BlockLocation)2 AdminStatesBaseTest (org.apache.hadoop.hdfs.AdminStatesBaseTest)2 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)2 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)2 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)2