Search in sources :

Example 11 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestDecommissionWithStriped method testDecommissionWithURBlockForSameBlockGroup.

@Test(timeout = 120000)
public void testDecommissionWithURBlockForSameBlockGroup() throws Exception {
    LOG.info("Starting test testDecommissionWithURBlocksForSameBlockGroup");
    final Path ecFile = new Path(ecDir, "testDecommissionWithCorruptBlocks");
    int writeBytes = cellSize * dataBlocks * 2;
    writeStripedFile(dfs, ecFile, writeBytes);
    Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks());
    final List<DatanodeInfo> decommisionNodes = new ArrayList<DatanodeInfo>();
    LocatedBlock lb = dfs.getClient().getLocatedBlocks(ecFile.toString(), 0).get(0);
    DatanodeInfo[] dnLocs = lb.getLocations();
    assertEquals(dataBlocks + parityBlocks, dnLocs.length);
    int decommNodeIndex = dataBlocks - 1;
    int stopNodeIndex = 1;
    // add the nodes which will be decommissioning
    decommisionNodes.add(dnLocs[decommNodeIndex]);
    // stop excess dns to avoid immediate reconstruction.
    DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
    List<DataNodeProperties> stoppedDns = new ArrayList<>();
    for (DatanodeInfo liveDn : info) {
        boolean usedNode = false;
        for (DatanodeInfo datanodeInfo : dnLocs) {
            if (liveDn.getXferAddr().equals(datanodeInfo.getXferAddr())) {
                usedNode = true;
                break;
            }
        }
        if (!usedNode) {
            DataNode dn = cluster.getDataNode(liveDn.getIpcPort());
            stoppedDns.add(cluster.stopDataNode(liveDn.getXferAddr()));
            cluster.setDataNodeDead(dn.getDatanodeId());
            LOG.info("stop datanode " + dn.getDatanodeId().getHostName());
        }
    }
    DataNode dn = cluster.getDataNode(dnLocs[stopNodeIndex].getIpcPort());
    cluster.stopDataNode(dnLocs[stopNodeIndex].getXferAddr());
    cluster.setDataNodeDead(dn.getDatanodeId());
    numDNs = numDNs - 1;
    // Decommission node in a new thread. Verify that node is decommissioned.
    final CountDownLatch decomStarted = new CountDownLatch(0);
    Thread decomTh = new Thread() {

        public void run() {
            try {
                decomStarted.countDown();
                decommissionNode(0, decommisionNodes, AdminStates.DECOMMISSIONED);
            } catch (Exception e) {
                LOG.error("Exception while decommissioning", e);
                Assert.fail("Shouldn't throw exception!");
            }
        }

        ;
    };
    int deadDecomissioned = fsn.getNumDecomDeadDataNodes();
    int liveDecomissioned = fsn.getNumDecomLiveDataNodes();
    decomTh.start();
    decomStarted.await(5, TimeUnit.SECONDS);
    // grace period to trigger decommissioning call
    Thread.sleep(3000);
    // start datanode so that decommissioning live node will be finished
    for (DataNodeProperties dnp : stoppedDns) {
        cluster.restartDataNode(dnp);
        LOG.info("Restarts stopped datanode:{} to trigger block reconstruction", dnp.datanode);
    }
    cluster.waitActive();
    LOG.info("Waiting to finish decommissioning node:{}", decommisionNodes);
    // waiting 20secs to finish decommission
    decomTh.join(20000);
    LOG.info("Finished decommissioning node:{}", decommisionNodes);
    assertEquals(deadDecomissioned, fsn.getNumDecomDeadDataNodes());
    assertEquals(liveDecomissioned + decommisionNodes.size(), fsn.getNumDecomLiveDataNodes());
    // Ensure decommissioned datanode is not automatically shutdown
    DFSClient client = getDfsClient(cluster.getNameNode(0), conf);
    assertEquals("All datanodes must be alive", numDNs, client.datanodeReport(DatanodeReportType.LIVE).length);
    assertNull(checkFile(dfs, ecFile, 9, decommisionNodes, numDNs));
    StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommisionNodes, null, blockGroupSize);
    cleanupFile(dfs, ecFile);
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) ArrayList(java.util.ArrayList) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) CountDownLatch(java.util.concurrent.CountDownLatch) IOException(java.io.IOException) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Test(org.junit.Test)

Example 12 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hbase by apache.

the class TestAsyncLogRolling method testLogRollOnDatanodeDeath.

@Test(timeout = 180000)
public void testLogRollOnDatanodeDeath() throws IOException, InterruptedException {
    dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), 3, true, null, null);
    tableName = getName();
    Table table = createTestTable(tableName);
    TEST_UTIL.waitUntilAllRegionsAssigned(table.getName());
    doPut(table, 1);
    server = TEST_UTIL.getRSForFirstRegionInTable(table.getName());
    HRegionInfo hri = server.getOnlineRegions(table.getName()).get(0).getRegionInfo();
    AsyncFSWAL wal = (AsyncFSWAL) server.getWAL(hri);
    int numRolledLogFiles = AsyncFSWALProvider.getNumRolledLogFiles(wal);
    DatanodeInfo[] dnInfos = wal.getPipeline();
    DataNodeProperties dnProp = TEST_UTIL.getDFSCluster().stopDataNode(dnInfos[0].getName());
    TEST_UTIL.getDFSCluster().restartDataNode(dnProp);
    doPut(table, 2);
    assertEquals(numRolledLogFiles + 1, AsyncFSWALProvider.getNumRolledLogFiles(wal));
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) Table(org.apache.hadoop.hbase.client.Table) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) Test(org.junit.Test)

Example 13 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestNameNodeMetadataConsistency method testGenerationStampInFuture.

/**
   * This test creates a file and modifies the block generation stamp to number
   * that name node has not seen yet. It then asserts that name node moves into
   * safe mode while it is in startup mode.
   */
@Test
public void testGenerationStampInFuture() throws Exception {
    cluster.waitActive();
    FileSystem fs = cluster.getFileSystem();
    OutputStream ostream = fs.create(filePath1);
    ostream.write(TEST_DATA_IN_FUTURE.getBytes());
    ostream.close();
    // Re-write the Generation Stamp to a Generation Stamp in future.
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath1);
    final long genStamp = block.getGenerationStamp();
    final int datanodeIndex = 0;
    cluster.changeGenStampOfBlock(datanodeIndex, block, genStamp + 1);
    // stop the data node so that it won't remove block
    final DataNodeProperties dnProps = cluster.stopDataNode(datanodeIndex);
    // Simulate Namenode forgetting a Block
    cluster.restartNameNode(true);
    cluster.getNameNode().getNamesystem().writeLock();
    BlockInfo bInfo = cluster.getNameNode().getNamesystem().getBlockManager().getStoredBlock(block.getLocalBlock());
    bInfo.delete();
    cluster.getNameNode().getNamesystem().getBlockManager().removeBlock(bInfo);
    cluster.getNameNode().getNamesystem().writeUnlock();
    // we also need to tell block manager that we are in the startup path
    BlockManagerTestUtil.setStartupSafeModeForTest(cluster.getNameNode().getNamesystem().getBlockManager());
    cluster.restartDataNode(dnProps);
    waitForNumBytes(TEST_DATA_IN_FUTURE.length());
    // Make sure that we find all written bytes in future block
    assertEquals(TEST_DATA_IN_FUTURE.length(), cluster.getNameNode().getBytesWithFutureGenerationStamps());
    // Assert safemode reason
    assertTrue(cluster.getNameNode().getNamesystem().getSafeModeTip().contains("Name node detected blocks with generation stamps in future"));
}
Also used : DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) Test(org.junit.Test)

Example 14 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestSafeMode method testInitializeReplQueuesEarly.

/**
   * Test that the NN initializes its under-replicated blocks queue
   * before it is ready to exit safemode (HDFS-1476)
   */
@Test(timeout = 45000)
public void testInitializeReplQueuesEarly() throws Exception {
    LOG.info("Starting testInitializeReplQueuesEarly");
    // Spray the blocks around the cluster when we add DNs instead of
    // concentrating all blocks on the first node.
    BlockManagerTestUtil.setWritingPrefersLocalNode(cluster.getNamesystem().getBlockManager(), false);
    cluster.startDataNodes(conf, 2, true, StartupOption.REGULAR, null);
    cluster.waitActive();
    LOG.info("Creating files");
    DFSTestUtil.createFile(fs, TEST_PATH, 15 * BLOCK_SIZE, (short) 1, 1L);
    LOG.info("Stopping all DataNodes");
    List<DataNodeProperties> dnprops = Lists.newLinkedList();
    dnprops.add(cluster.stopDataNode(0));
    dnprops.add(cluster.stopDataNode(0));
    dnprops.add(cluster.stopDataNode(0));
    cluster.getConfiguration(0).setFloat(DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 1f / 15f);
    LOG.info("Restarting NameNode");
    cluster.restartNameNode();
    final NameNode nn = cluster.getNameNode();
    String status = nn.getNamesystem().getSafemode();
    assertEquals("Safe mode is ON. The reported blocks 0 needs additional " + "14 blocks to reach the threshold 0.9990 of total blocks 15." + NEWLINE + "The number of live datanodes 0 has reached the minimum number 0. " + "Safe mode will be turned off automatically once the thresholds " + "have been reached.", status);
    assertFalse("Mis-replicated block queues should not be initialized " + "until threshold is crossed", NameNodeAdapter.safeModeInitializedReplQueues(nn));
    LOG.info("Restarting one DataNode");
    cluster.restartDataNode(dnprops.remove(0));
    // Wait for block reports from all attached storages of
    // the restarted DN to come in.
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            return getLongCounter("StorageBlockReportOps", getMetrics(NN_METRICS)) == cluster.getStoragesPerDatanode();
        }
    }, 10, 10000);
    final long safe = NameNodeAdapter.getSafeModeSafeBlocks(nn);
    assertTrue("Expected first block report to make some blocks safe.", safe > 0);
    assertTrue("Did not expect first block report to make all blocks safe.", safe < 15);
    assertTrue(NameNodeAdapter.safeModeInitializedReplQueues(nn));
    // Ensure that UnderReplicatedBlocks goes up to 15 - safe. Misreplicated
    // blocks are processed asynchronously so this may take a few seconds.
    // Failure here will manifest as a test timeout.
    BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
    long underReplicatedBlocks = nn.getNamesystem().getUnderReplicatedBlocks();
    while (underReplicatedBlocks != (15 - safe)) {
        LOG.info("UnderReplicatedBlocks expected=" + (15 - safe) + ", actual=" + underReplicatedBlocks);
        Thread.sleep(100);
        BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
        underReplicatedBlocks = nn.getNamesystem().getUnderReplicatedBlocks();
    }
    cluster.restartDataNodes();
}
Also used : NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) Test(org.junit.Test)

Example 15 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestRBWBlockInvalidation method testRWRInvalidation.

/**
   * Regression test for HDFS-4799, a case where, upon restart, if there
   * were RWR replicas with out-of-date genstamps, the NN could accidentally
   * delete good replicas instead of the bad replicas.
   */
@Test(timeout = 120000)
public void testRWRInvalidation() throws Exception {
    Configuration conf = new HdfsConfiguration();
    // Set the deletion policy to be randomized rather than the default.
    // The default is based on disk space, which isn't controllable
    // in the context of the test, whereas a random one is more accurate
    // to what is seen in real clusters (nodes have random amounts of free
    // space)
    conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, RandomDeleterPolicy.class, BlockPlacementPolicy.class);
    // Speed up the test a bit with faster heartbeats.
    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
    int numFiles = 10;
    // Test with a bunch of separate files, since otherwise the test may
    // fail just due to "good luck", even if a bug is present.
    List<Path> testPaths = Lists.newArrayList();
    for (int i = 0; i < numFiles; i++) {
        testPaths.add(new Path("/test" + i));
    }
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    try {
        List<FSDataOutputStream> streams = Lists.newArrayList();
        try {
            // Open the test files and write some data to each
            for (Path path : testPaths) {
                FSDataOutputStream out = cluster.getFileSystem().create(path, (short) 2);
                streams.add(out);
                out.writeBytes("old gs data\n");
                out.hflush();
            }
            for (Path path : testPaths) {
                DFSTestUtil.waitReplication(cluster.getFileSystem(), path, (short) 2);
            }
            // Shutdown one of the nodes in the pipeline
            DataNodeProperties oldGenstampNode = cluster.stopDataNode(0);
            // be in the latter genstamp copy of the blocks.
            for (int i = 0; i < streams.size(); i++) {
                Path path = testPaths.get(i);
                FSDataOutputStream out = streams.get(i);
                out.writeBytes("new gs data\n");
                out.hflush();
                // Set replication so that only one node is necessary for this block,
                // and close it.
                cluster.getFileSystem().setReplication(path, (short) 1);
                out.close();
            }
            for (Path path : testPaths) {
                DFSTestUtil.waitReplication(cluster.getFileSystem(), path, (short) 1);
            }
            // Upon restart, there will be two replicas, one with an old genstamp
            // and one current copy. This test wants to ensure that the old genstamp
            // copy is the one that is deleted.
            LOG.info("=========================== restarting cluster");
            DataNodeProperties otherNode = cluster.stopDataNode(0);
            cluster.restartNameNode();
            // Restart the datanode with the corrupt replica first.
            cluster.restartDataNode(oldGenstampNode);
            cluster.waitActive();
            // Then the other node
            cluster.restartDataNode(otherNode);
            cluster.waitActive();
            // Compute and send invalidations, waiting until they're fully processed.
            cluster.getNameNode().getNamesystem().getBlockManager().computeInvalidateWork(2);
            cluster.triggerHeartbeats();
            HATestUtil.waitForDNDeletions(cluster);
            cluster.triggerDeletionReports();
            waitForNumTotalBlocks(cluster, numFiles);
            // Make sure we can still read the blocks.
            for (Path path : testPaths) {
                String ret = DFSTestUtil.readFile(cluster.getFileSystem(), path);
                assertEquals("old gs data\n" + "new gs data\n", ret);
            }
        } finally {
            IOUtils.cleanup(LOG, streams.toArray(new Closeable[0]));
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) Closeable(java.io.Closeable) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Test(org.junit.Test)

Aggregations

DataNodeProperties (org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties)24 Test (org.junit.Test)21 Path (org.apache.hadoop.fs.Path)12 Configuration (org.apache.hadoop.conf.Configuration)11 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)6 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)3 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)3 File (java.io.File)2 IOException (java.io.IOException)2 OutputStream (java.io.OutputStream)2 ArrayList (java.util.ArrayList)2 BlockLocation (org.apache.hadoop.fs.BlockLocation)2 AdminStatesBaseTest (org.apache.hadoop.hdfs.AdminStatesBaseTest)2 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)2 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)2 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)2