Search in sources :

Example 56 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestOverReplicatedBlocks method testChooseReplicaToDelete.

/**
   * The test verifies that replica for deletion is chosen on a node,
   * with the oldest heartbeat, when this heartbeat is larger than the
   * tolerable heartbeat interval.
   * It creates a file with several blocks and replication 4.
   * The last DN is configured to send heartbeats rarely.
   * 
   * Test waits until the tolerable heartbeat interval expires, and reduces
   * replication of the file. All replica deletions should be scheduled for the
   * last node. No replicas will actually be deleted, since last DN doesn't
   * send heartbeats. 
   */
@Test
public void testChooseReplicaToDelete() throws Exception {
    MiniDFSCluster cluster = null;
    FileSystem fs = null;
    try {
        Configuration conf = new HdfsConfiguration();
        conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, SMALL_BLOCK_SIZE);
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
        fs = cluster.getFileSystem();
        final FSNamesystem namesystem = cluster.getNamesystem();
        final BlockManager bm = namesystem.getBlockManager();
        conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 300);
        cluster.startDataNodes(conf, 1, true, null, null, null);
        DataNode lastDN = cluster.getDataNodes().get(3);
        DatanodeRegistration dnReg = InternalDataNodeTestUtils.getDNRegistrationForBP(lastDN, namesystem.getBlockPoolId());
        String lastDNid = dnReg.getDatanodeUuid();
        final Path fileName = new Path("/foo2");
        DFSTestUtil.createFile(fs, fileName, SMALL_FILE_LENGTH, (short) 4, 0L);
        DFSTestUtil.waitReplication(fs, fileName, (short) 4);
        // Wait for tolerable number of heartbeats plus one
        DatanodeDescriptor nodeInfo = null;
        long lastHeartbeat = 0;
        long waitTime = DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT * 1000 * (DFSConfigKeys.DFS_NAMENODE_TOLERATE_HEARTBEAT_MULTIPLIER_DEFAULT + 1);
        do {
            nodeInfo = bm.getDatanodeManager().getDatanode(dnReg);
            lastHeartbeat = nodeInfo.getLastUpdateMonotonic();
        } while (monotonicNow() - lastHeartbeat < waitTime);
        fs.setReplication(fileName, (short) 3);
        BlockLocation[] locs = fs.getFileBlockLocations(fs.getFileStatus(fileName), 0, Long.MAX_VALUE);
        // All replicas for deletion should be scheduled on lastDN.
        // And should not actually be deleted, because lastDN does not heartbeat.
        namesystem.readLock();
        final int dnBlocks = bm.getExcessSize4Testing(dnReg.getDatanodeUuid());
        assertEquals("Replicas on node " + lastDNid + " should have been deleted", SMALL_FILE_LENGTH / SMALL_BLOCK_SIZE, dnBlocks);
        namesystem.readUnlock();
        for (BlockLocation location : locs) assertEquals("Block should still have 4 replicas", 4, location.getNames().length);
    } finally {
        if (fs != null)
            fs.close();
        if (cluster != null)
            cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) BlockLocation(org.apache.hadoop.fs.BlockLocation) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 57 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestOverReplicatedBlocks method testProcesOverReplicateBlock.

/** Test processOverReplicatedBlock can handle corrupt replicas fine.
   * It make sure that it won't treat corrupt replicas as valid ones 
   * thus prevents NN deleting valid replicas but keeping
   * corrupt ones.
   */
@Test
public void testProcesOverReplicateBlock() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
    conf.set(DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    FileSystem fs = cluster.getFileSystem();
    try {
        final Path fileName = new Path("/foo1");
        DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
        DFSTestUtil.waitReplication(fs, fileName, (short) 3);
        // corrupt the block on datanode 0
        ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
        cluster.corruptReplica(0, block);
        DataNodeProperties dnProps = cluster.stopDataNode(0);
        // remove block scanner log to trigger block scanning
        File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(), "scanner.cursor");
        //wait for one minute for deletion to succeed;
        for (int i = 0; !scanCursor.delete(); i++) {
            assertTrue("Could not delete " + scanCursor.getAbsolutePath() + " in one minute", i < 60);
            try {
                Thread.sleep(1000);
            } catch (InterruptedException ignored) {
            }
        }
        // restart the datanode so the corrupt replica will be detected
        cluster.restartDataNode(dnProps);
        DFSTestUtil.waitReplication(fs, fileName, (short) 2);
        String blockPoolId = cluster.getNamesystem().getBlockPoolId();
        final DatanodeID corruptDataNode = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), blockPoolId);
        final FSNamesystem namesystem = cluster.getNamesystem();
        final BlockManager bm = namesystem.getBlockManager();
        final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
        try {
            namesystem.writeLock();
            synchronized (hm) {
                // set live datanode's remaining space to be 0 
                // so they will be chosen to be deleted when over-replication occurs
                String corruptMachineName = corruptDataNode.getXferAddr();
                for (DatanodeDescriptor datanode : hm.getDatanodes()) {
                    if (!corruptMachineName.equals(datanode.getXferAddr())) {
                        datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
                        datanode.updateHeartbeat(BlockManagerTestUtil.getStorageReportsForDatanode(datanode), 0L, 0L, 0, 0, null);
                    }
                }
                // decrease the replication factor to 1; 
                NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short) 1);
                // corrupt one won't be chosen to be excess one
                // without 4910 the number of live replicas would be 0: block gets lost
                assertEquals(1, bm.countNodes(bm.getStoredBlock(block.getLocalBlock())).liveReplicas());
            }
        } finally {
            namesystem.writeUnlock();
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) FileSystem(org.apache.hadoop.fs.FileSystem) File(java.io.File) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 58 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestDatanodeManager method testNumVersionsReportedCorrect.

/**
   * This test sends a random sequence of node registrations and node removals
   * to the DatanodeManager (of nodes with different IDs and versions), and
   * checks that the DatanodeManager keeps a correct count of different software
   * versions at all times.
   */
@Test
public void testNumVersionsReportedCorrect() throws IOException {
    //Create the DatanodeManager which will be tested
    FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
    Mockito.when(fsn.hasWriteLock()).thenReturn(true);
    DatanodeManager dm = mockDatanodeManager(fsn, new Configuration());
    //Seed the RNG with a known value so test failures are easier to reproduce
    Random rng = new Random();
    int seed = rng.nextInt();
    rng = new Random(seed);
    LOG.info("Using seed " + seed + " for testing");
    //A map of the Storage IDs to the DN registration it was registered with
    HashMap<String, DatanodeRegistration> sIdToDnReg = new HashMap<String, DatanodeRegistration>();
    for (int i = 0; i < NUM_ITERATIONS; ++i) {
        //If true, remove a node for every 3rd time (if there's one)
        if (rng.nextBoolean() && i % 3 == 0 && sIdToDnReg.size() != 0) {
            //Pick a random node.
            int randomIndex = rng.nextInt() % sIdToDnReg.size();
            //Iterate to that random position 
            Iterator<Map.Entry<String, DatanodeRegistration>> it = sIdToDnReg.entrySet().iterator();
            for (int j = 0; j < randomIndex - 1; ++j) {
                it.next();
            }
            DatanodeRegistration toRemove = it.next().getValue();
            LOG.info("Removing node " + toRemove.getDatanodeUuid() + " ip " + toRemove.getXferAddr() + " version : " + toRemove.getSoftwareVersion());
            //Remove that random node
            dm.removeDatanode(toRemove);
            it.remove();
        } else // Otherwise register a node. This node may be a new / an old one
        {
            //Pick a random storageID to register.
            String storageID = "someStorageID" + rng.nextInt(5000);
            DatanodeRegistration dr = Mockito.mock(DatanodeRegistration.class);
            Mockito.when(dr.getDatanodeUuid()).thenReturn(storageID);
            //If this storageID had already been registered before
            if (sIdToDnReg.containsKey(storageID)) {
                dr = sIdToDnReg.get(storageID);
                //Half of the times, change the IP address
                if (rng.nextBoolean()) {
                    dr.setIpAddr(dr.getIpAddr() + "newIP");
                }
            } else {
                //This storageID has never been registered
                //Ensure IP address is unique to storageID
                String ip = "someIP" + storageID;
                Mockito.when(dr.getIpAddr()).thenReturn(ip);
                Mockito.when(dr.getXferAddr()).thenReturn(ip + ":9000");
                Mockito.when(dr.getXferPort()).thenReturn(9000);
            }
            //Pick a random version to register with
            Mockito.when(dr.getSoftwareVersion()).thenReturn("version" + rng.nextInt(5));
            LOG.info("Registering node storageID: " + dr.getDatanodeUuid() + ", version: " + dr.getSoftwareVersion() + ", IP address: " + dr.getXferAddr());
            //Register this random node
            dm.registerDatanode(dr);
            sIdToDnReg.put(storageID, dr);
        }
        //Verify DatanodeManager still has the right count
        Map<String, Integer> mapToCheck = dm.getDatanodesSoftwareVersions();
        //mapToCheck is empty
        for (Entry<String, DatanodeRegistration> it : sIdToDnReg.entrySet()) {
            String ver = it.getValue().getSoftwareVersion();
            if (!mapToCheck.containsKey(ver)) {
                throw new AssertionError("The correct number of datanodes of a " + "version was not found on iteration " + i);
            }
            mapToCheck.put(ver, mapToCheck.get(ver) - 1);
            if (mapToCheck.get(ver) == 0) {
                mapToCheck.remove(ver);
            }
        }
        for (Entry<String, Integer> entry : mapToCheck.entrySet()) {
            LOG.info("Still in map: " + entry.getKey() + " has " + entry.getValue());
        }
        assertEquals("The map of version counts returned by DatanodeManager was" + " not what it was expected to be on iteration " + i, 0, mapToCheck.size());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) Entry(java.util.Map.Entry) Random(java.util.Random) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 59 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestDatanodeManager method testRejectUnresolvedDatanodes.

@Test(timeout = 100000)
public void testRejectUnresolvedDatanodes() throws IOException {
    //Create the DatanodeManager which will be tested
    FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
    Mockito.when(fsn.hasWriteLock()).thenReturn(true);
    Configuration conf = new Configuration();
    //Set configuration property for rejecting unresolved topology mapping
    conf.setBoolean(DFSConfigKeys.DFS_REJECT_UNRESOLVED_DN_TOPOLOGY_MAPPING_KEY, true);
    //set TestDatanodeManager.MyResolver to be used for topology resolving
    conf.setClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, TestDatanodeManager.MyResolver.class, DNSToSwitchMapping.class);
    //create DatanodeManager
    DatanodeManager dm = mockDatanodeManager(fsn, conf);
    //storageID to register.
    String storageID = "someStorageID-123";
    DatanodeRegistration dr = Mockito.mock(DatanodeRegistration.class);
    Mockito.when(dr.getDatanodeUuid()).thenReturn(storageID);
    try {
        //Register this node
        dm.registerDatanode(dr);
        Assert.fail("Expected an UnresolvedTopologyException");
    } catch (UnresolvedTopologyException ute) {
        LOG.info("Expected - topology is not resolved and " + "registration is rejected.");
    } catch (Exception e) {
        Assert.fail("Expected an UnresolvedTopologyException");
    }
}
Also used : DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) Configuration(org.apache.hadoop.conf.Configuration) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) Test(org.junit.Test)

Example 60 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestHeartbeatHandling method testHeartbeat.

/**
   * Test if
   * {@link FSNamesystem#handleHeartbeat}
   * can pick up replication and/or invalidate requests and observes the max
   * limit
   */
@Test
public void testHeartbeat() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
    try {
        cluster.waitActive();
        final FSNamesystem namesystem = cluster.getNamesystem();
        final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
        final String poolId = namesystem.getBlockPoolId();
        final DatanodeRegistration nodeReg = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
        final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg);
        final String storageID = DatanodeStorage.generateUuid();
        dd.updateStorage(new DatanodeStorage(storageID));
        final int REMAINING_BLOCKS = 1;
        final int MAX_REPLICATE_LIMIT = conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2);
        final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT;
        final int MAX_INVALIDATE_BLOCKS = 2 * MAX_INVALIDATE_LIMIT + REMAINING_BLOCKS;
        final int MAX_REPLICATE_BLOCKS = 2 * MAX_REPLICATE_LIMIT + REMAINING_BLOCKS;
        final DatanodeStorageInfo[] ONE_TARGET = { dd.getStorageInfo(storageID) };
        try {
            namesystem.writeLock();
            synchronized (hm) {
                for (int i = 0; i < MAX_REPLICATE_BLOCKS; i++) {
                    dd.addBlockToBeReplicated(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP), ONE_TARGET);
                }
                DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
                assertEquals(1, cmds.length);
                assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
                assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);
                ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
                for (int i = 0; i < MAX_INVALIDATE_BLOCKS; i++) {
                    blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP));
                }
                dd.addBlocksToBeInvalidated(blockList);
                cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
                assertEquals(2, cmds.length);
                assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
                assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);
                assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
                assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);
                cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
                assertEquals(2, cmds.length);
                assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
                assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);
                assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
                assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);
                cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
                assertEquals(1, cmds.length);
                assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
                assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);
                cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
                assertEquals(0, cmds.length);
            }
        } finally {
            namesystem.writeUnlock();
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) ArrayList(java.util.ArrayList) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) BlockCommand(org.apache.hadoop.hdfs.server.protocol.BlockCommand) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) DatanodeCommand(org.apache.hadoop.hdfs.server.protocol.DatanodeCommand) DatanodeStorage(org.apache.hadoop.hdfs.server.protocol.DatanodeStorage) Block(org.apache.hadoop.hdfs.protocol.Block) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Aggregations

FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)77 Test (org.junit.Test)59 Path (org.apache.hadoop.fs.Path)51 FileSystem (org.apache.hadoop.fs.FileSystem)41 Configuration (org.apache.hadoop.conf.Configuration)37 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)27 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)25 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)23 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)19 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)14 ArrayList (java.util.ArrayList)12 DatanodeRegistration (org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration)12 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)9 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)7 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)7 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)6 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)6 File (java.io.File)5 IOException (java.io.IOException)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5