Search in sources :

Example 16 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestNodeCount method testNodeCount.

@Test(timeout = 60000)
public void testNodeCount() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    // avoid invalidation by startup delay in order to make test non-transient
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_KEY, 60);
    // reduce intervals to make test execution time shorter
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
    // start a mini dfs cluster of 2 nodes
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR).build();
    try {
        final FSNamesystem namesystem = cluster.getNamesystem();
        final BlockManager bm = namesystem.getBlockManager();
        final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
        final FileSystem fs = cluster.getFileSystem();
        // populate the cluster with a one block file
        final Path FILE_PATH = new Path("/testfile");
        DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
        DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
        ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
        // keep a copy of all datanode descriptor
        final DatanodeDescriptor[] datanodes = hm.getDatanodes();
        // start two new nodes
        cluster.startDataNodes(conf, 2, true, null, null);
        cluster.waitActive();
        // bring down first datanode
        DatanodeDescriptor datanode = datanodes[0];
        DataNodeProperties dnprop = cluster.stopDataNode(datanode.getXferAddr());
        // make sure that NN detects that the datanode is down
        BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), datanode.getXferAddr());
        // the block will be replicated
        DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
        // restart the first datanode
        cluster.restartDataNode(dnprop);
        cluster.waitActive();
        // check if excessive replica is detected
        initializeTimeout(TIMEOUT);
        while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() == 0) {
            checkTimeout("excess replicas not detected");
        }
        // find out a non-excess node
        DatanodeDescriptor nonExcessDN = null;
        for (DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) {
            final DatanodeDescriptor dn = storage.getDatanodeDescriptor();
            final BlockInfo info = new BlockInfoContiguous(block.getLocalBlock(), (short) 0);
            if (!bm.isExcess(dn, info)) {
                nonExcessDN = dn;
                break;
            }
        }
        assertTrue(nonExcessDN != null);
        // bring down non excessive datanode
        dnprop = cluster.stopDataNode(nonExcessDN.getXferAddr());
        // make sure that NN detects that the datanode is down
        BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), nonExcessDN.getXferAddr());
        // The block should be replicated
        initializeTimeout(TIMEOUT);
        while (countNodes(block.getLocalBlock(), namesystem).liveReplicas() != REPLICATION_FACTOR) {
            checkTimeout("live replica count not correct", 1000);
        }
        // restart the first datanode
        cluster.restartDataNode(dnprop);
        cluster.waitActive();
        // check if excessive replica is detected
        initializeTimeout(TIMEOUT);
        while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() != 2) {
            checkTimeout("excess replica count not equal to 2");
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 17 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestOverReplicatedBlocks method testProcesOverReplicateBlock.

/** Test processOverReplicatedBlock can handle corrupt replicas fine.
   * It make sure that it won't treat corrupt replicas as valid ones 
   * thus prevents NN deleting valid replicas but keeping
   * corrupt ones.
   */
@Test
public void testProcesOverReplicateBlock() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
    conf.set(DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    FileSystem fs = cluster.getFileSystem();
    try {
        final Path fileName = new Path("/foo1");
        DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
        DFSTestUtil.waitReplication(fs, fileName, (short) 3);
        // corrupt the block on datanode 0
        ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
        cluster.corruptReplica(0, block);
        DataNodeProperties dnProps = cluster.stopDataNode(0);
        // remove block scanner log to trigger block scanning
        File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(), "scanner.cursor");
        //wait for one minute for deletion to succeed;
        for (int i = 0; !scanCursor.delete(); i++) {
            assertTrue("Could not delete " + scanCursor.getAbsolutePath() + " in one minute", i < 60);
            try {
                Thread.sleep(1000);
            } catch (InterruptedException ignored) {
            }
        }
        // restart the datanode so the corrupt replica will be detected
        cluster.restartDataNode(dnProps);
        DFSTestUtil.waitReplication(fs, fileName, (short) 2);
        String blockPoolId = cluster.getNamesystem().getBlockPoolId();
        final DatanodeID corruptDataNode = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), blockPoolId);
        final FSNamesystem namesystem = cluster.getNamesystem();
        final BlockManager bm = namesystem.getBlockManager();
        final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
        try {
            namesystem.writeLock();
            synchronized (hm) {
                // set live datanode's remaining space to be 0 
                // so they will be chosen to be deleted when over-replication occurs
                String corruptMachineName = corruptDataNode.getXferAddr();
                for (DatanodeDescriptor datanode : hm.getDatanodes()) {
                    if (!corruptMachineName.equals(datanode.getXferAddr())) {
                        datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
                        datanode.updateHeartbeat(BlockManagerTestUtil.getStorageReportsForDatanode(datanode), 0L, 0L, 0, 0, null);
                    }
                }
                // decrease the replication factor to 1; 
                NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short) 1);
                // corrupt one won't be chosen to be excess one
                // without 4910 the number of live replicas would be 0: block gets lost
                assertEquals(1, bm.countNodes(bm.getStoredBlock(block.getLocalBlock())).liveReplicas());
            }
        } finally {
            namesystem.writeUnlock();
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) FileSystem(org.apache.hadoop.fs.FileSystem) File(java.io.File) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 18 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestProcessCorruptBlocks method testByAddingAnExtraDataNode.

/**
   * The corrupt block has to be removed when the number of valid replicas
   * matches replication factor for the file. In this test, the above 
   * condition is achieved by increasing the number of good replicas by 
   * replicating on a new Datanode. 
   * The test strategy : 
   *   Bring up Cluster with 3 DataNodes
   *   Create a file  of replication factor 3
   *   Corrupt one replica of a block of the file 
   *   Verify that there are still 2 good replicas and 1 corrupt replica 
   *     (corrupt replica should not be removed since number of good replicas
   *      (2) is less  than replication factor (3)) 
   *   Start a new data node 
   *   Verify that the a new replica is created and corrupt replica is
   *   removed.
   * 
   */
@Test
public void testByAddingAnExtraDataNode() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
    conf.set(DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    FileSystem fs = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();
    DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);
    try {
        final Path fileName = new Path("/foo1");
        DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
        DFSTestUtil.waitReplication(fs, fileName, (short) 3);
        ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
        corruptBlock(cluster, fs, fileName, 0, block);
        DFSTestUtil.waitReplication(fs, fileName, (short) 2);
        assertEquals(2, countReplicas(namesystem, block).liveReplicas());
        assertEquals(1, countReplicas(namesystem, block).corruptReplicas());
        cluster.restartDataNode(dnPropsFourth);
        DFSTestUtil.waitReplication(fs, fileName, (short) 3);
        assertEquals(3, countReplicas(namesystem, block).liveReplicas());
        assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) FileSystem(org.apache.hadoop.fs.FileSystem) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Test(org.junit.Test)

Example 19 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestPendingCorruptDnMessages method testChangedStorageId.

@Test(timeout = 60000)
public void testChangedStorageId() throws IOException, URISyntaxException, InterruptedException, TimeoutException {
    HdfsConfiguration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
    try {
        cluster.transitionToActive(0);
        FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
        OutputStream out = fs.create(filePath);
        out.write("foo bar baz".getBytes());
        out.close();
        HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0), cluster.getNameNode(1));
        // Change the gen stamp of the block on datanode to go back in time (gen
        // stamps start at 1000)
        ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
        cluster.changeGenStampOfBlock(0, block, 900);
        // Run directory dsscanner to update Datanode's volumeMap
        DataNodeTestUtils.runDirectoryScanner(cluster.getDataNodes().get(0));
        // Stop the DN so the replica with the changed gen stamp will be reported
        // when this DN starts up.
        DataNodeProperties dnProps = cluster.stopDataNode(0);
        // Restart the namenode so that when the DN comes up it will see an initial
        // block report.
        cluster.restartNameNode(1, false);
        assertTrue(cluster.restartDataNode(dnProps, true));
        // Wait until the standby NN queues up the corrupt block in the pending DN
        // message queue.
        GenericTestUtils.waitFor(new Supplier<Boolean>() {

            @Override
            public Boolean get() {
                return cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount() == 1;
            }
        }, 1000, 30000);
        final String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
        assertNotNull(oldStorageId);
        // Reformat/restart the DN.
        assertTrue(wipeAndRestartDn(cluster, 0));
        GenericTestUtils.waitFor(new Supplier<Boolean>() {

            @Override
            public Boolean get() {
                final String newStorageId = getRegisteredDatanodeUid(cluster, 1);
                return newStorageId != null && !newStorageId.equals(oldStorageId);
            }
        }, 1000, 30000);
        assertEquals(0, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
        // Now try to fail over.
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
    } finally {
        cluster.shutdown();
    }
}
Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Test(org.junit.Test)

Example 20 with DataNodeProperties

use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.

the class TestStandbyIsHot method testDatanodeRestarts.

/**
   * Regression test for HDFS-2795:
   *  - Start an HA cluster with a DN.
   *  - Write several blocks to the FS with replication 1.
   *  - Shutdown the DN
   *  - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
   *  - Restart the DN.
   * In the bug, the standby node would only very slowly notice the blocks returning
   * to the cluster.
   */
@Test(timeout = 60000)
public void testDatanodeRestarts() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
    // We read from the standby to watch block locations
    HAUtil.setAllowStandbyReads(conf, true);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).build();
    try {
        NameNode nn0 = cluster.getNameNode(0);
        NameNode nn1 = cluster.getNameNode(1);
        cluster.transitionToActive(0);
        // Create 5 blocks.
        DFSTestUtil.createFile(cluster.getFileSystem(0), TEST_FILE_PATH, 5 * 1024, (short) 1, 1L);
        HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
        // Stop the DN.
        DataNode dn = cluster.getDataNodes().get(0);
        String dnName = dn.getDatanodeId().getXferAddr();
        DataNodeProperties dnProps = cluster.stopDataNode(0);
        // Make sure both NNs register it as dead.
        BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
        BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
        BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
        BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
        assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
        // The SBN will not have any blocks in its neededReplication queue
        // since the SBN doesn't process replication.
        assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
        LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(TEST_FILE, 0, 1);
        assertEquals("Standby should have registered that the block has no replicas", 0, locs.get(0).getLocations().length);
        cluster.restartDataNode(dnProps);
        // Wait for both NNs to re-register the DN.
        cluster.waitActive(0);
        cluster.waitActive(1);
        cluster.waitFirstBRCompleted(0, 10000);
        cluster.waitFirstBRCompleted(1, 10000);
        BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
        BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
        assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
        assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
        locs = nn1.getRpcServer().getBlockLocations(TEST_FILE, 0, 1);
        assertEquals("Standby should have registered that the block has replicas again", 1, locs.get(0).getLocations().length);
    } finally {
        cluster.shutdown();
    }
}
Also used : NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) Test(org.junit.Test)

Aggregations

DataNodeProperties (org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties)24 Test (org.junit.Test)21 Path (org.apache.hadoop.fs.Path)12 Configuration (org.apache.hadoop.conf.Configuration)11 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)6 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)3 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)3 File (java.io.File)2 IOException (java.io.IOException)2 OutputStream (java.io.OutputStream)2 ArrayList (java.util.ArrayList)2 BlockLocation (org.apache.hadoop.fs.BlockLocation)2 AdminStatesBaseTest (org.apache.hadoop.hdfs.AdminStatesBaseTest)2 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)2 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)2 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)2