Search in sources :

Example 21 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestBlocksWithNotEnoughRacks method testCorruptBlockRereplicatedAcrossRacks.

/*
   * Test that a block that is re-replicated because one of its replicas
   * is found to be corrupt and is re-replicated across racks.
   */
@Test
public void testCorruptBlockRereplicatedAcrossRacks() throws Exception {
    Configuration conf = getConf();
    short REPLICATION_FACTOR = 2;
    int fileLen = 512;
    final Path filePath = new Path("/testFile");
    // Datanodes are spread across two racks
    String[] racks = { "/rack1", "/rack1", "/rack2", "/rack2" };
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(racks.length).racks(racks).build();
    final FSNamesystem ns = cluster.getNameNode().getNamesystem();
    try {
        // Create a file with one block with a replication factor of 2
        final FileSystem fs = cluster.getFileSystem();
        DFSTestUtil.createFile(fs, filePath, fileLen, REPLICATION_FACTOR, 1L);
        final byte[] fileContent = DFSTestUtil.readFileAsBytes(fs, filePath);
        ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
        DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
        // Corrupt a replica of the block
        int dnToCorrupt = DFSTestUtil.firstDnWithBlock(cluster, b);
        cluster.corruptReplica(dnToCorrupt, b);
        // Restart the datanode so blocks are re-scanned, and the corrupt
        // block is detected.
        cluster.restartDataNode(dnToCorrupt);
        // Wait for the namenode to notice the corrupt replica
        DFSTestUtil.waitCorruptReplicas(fs, ns, filePath, b, 1);
        // The rack policy is still respected
        DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
        // have been cleaned up yet).
        for (int i = 0; i < racks.length; i++) {
            byte[] blockContent = cluster.readBlockOnDataNodeAsBytes(i, b);
            if (blockContent != null && i != dnToCorrupt) {
                assertArrayEquals("Corrupt replica", fileContent, blockContent);
            }
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 22 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestBlockManager method testBlockReportQueueing.

@Test
public void testBlockReportQueueing() throws Exception {
    Configuration conf = new HdfsConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
    try {
        cluster.waitActive();
        final FSNamesystem fsn = cluster.getNamesystem();
        final BlockManager bm = fsn.getBlockManager();
        final ExecutorService executor = Executors.newCachedThreadPool();
        final CyclicBarrier startBarrier = new CyclicBarrier(2);
        final CountDownLatch endLatch = new CountDownLatch(3);
        final CountDownLatch doneLatch = new CountDownLatch(1);
        // create a task intended to block while processing, thus causing
        // the queue to backup.  simulates how a full BR is processed.
        FutureTask<?> blockingOp = new FutureTask<Void>(new Callable<Void>() {

            @Override
            public Void call() throws IOException {
                bm.runBlockOp(new Callable<Void>() {

                    @Override
                    public Void call() throws InterruptedException, BrokenBarrierException {
                        // use a barrier to control the blocking.
                        startBarrier.await();
                        endLatch.countDown();
                        return null;
                    }
                });
                // signal that runBlockOp returned
                doneLatch.countDown();
                return null;
            }
        });
        // create an async task.  simulates how an IBR is processed.
        Callable<?> asyncOp = new Callable<Void>() {

            @Override
            public Void call() throws IOException {
                bm.enqueueBlockOp(new Runnable() {

                    @Override
                    public void run() {
                        // use the latch to signal if the op has run.
                        endLatch.countDown();
                    }
                });
                return null;
            }
        };
        // calling get forces its execution so we can test if it's blocked.
        Future<?> blockedFuture = executor.submit(blockingOp);
        boolean isBlocked = false;
        try {
            // wait 1s for the future to block.  it should run instantaneously.
            blockedFuture.get(1, TimeUnit.SECONDS);
        } catch (TimeoutException te) {
            isBlocked = true;
        }
        assertTrue(isBlocked);
        // should effectively return immediately since calls are queued.
        // however they should be backed up in the queue behind the blocking
        // operation.
        executor.submit(asyncOp).get(1, TimeUnit.SECONDS);
        executor.submit(asyncOp).get(1, TimeUnit.SECONDS);
        // check the async calls are queued, and first is still blocked.
        assertEquals(2, bm.getBlockOpQueueLength());
        assertFalse(blockedFuture.isDone());
        // unblock the queue, wait for last op to complete, check the blocked
        // call has returned
        startBarrier.await(1, TimeUnit.SECONDS);
        assertTrue(endLatch.await(1, TimeUnit.SECONDS));
        assertEquals(0, bm.getBlockOpQueueLength());
        assertTrue(doneLatch.await(1, TimeUnit.SECONDS));
    } finally {
        cluster.shutdown();
    }
}
Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) CountDownLatch(java.util.concurrent.CountDownLatch) Callable(java.util.concurrent.Callable) CyclicBarrier(java.util.concurrent.CyclicBarrier) FutureTask(java.util.concurrent.FutureTask) ExecutorService(java.util.concurrent.ExecutorService) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 23 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestDatanodeManager method HelperFunction.

/**
   * Helper function that tests the DatanodeManagers SortedBlock function
   * we invoke this function with and without topology scripts
   *
   * @param scriptFileName - Script Name or null
   *
   * @throws URISyntaxException
   * @throws IOException
   */
public void HelperFunction(String scriptFileName) throws URISyntaxException, IOException {
    // create the DatanodeManager which will be tested
    Configuration conf = new Configuration();
    FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
    Mockito.when(fsn.hasWriteLock()).thenReturn(true);
    if (scriptFileName != null && !scriptFileName.isEmpty()) {
        URL shellScript = getClass().getResource(scriptFileName);
        Path resourcePath = Paths.get(shellScript.toURI());
        FileUtil.setExecutable(resourcePath.toFile(), true);
        conf.set(DFSConfigKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY, resourcePath.toString());
    }
    DatanodeManager dm = mockDatanodeManager(fsn, conf);
    // register 5 datanodes, each with different storage ID and type
    DatanodeInfo[] locs = new DatanodeInfo[5];
    String[] storageIDs = new String[5];
    StorageType[] storageTypes = new StorageType[] { StorageType.ARCHIVE, StorageType.DEFAULT, StorageType.DISK, StorageType.RAM_DISK, StorageType.SSD };
    for (int i = 0; i < 5; i++) {
        // register new datanode
        String uuid = "UUID-" + i;
        String ip = "IP-" + i;
        DatanodeRegistration dr = Mockito.mock(DatanodeRegistration.class);
        Mockito.when(dr.getDatanodeUuid()).thenReturn(uuid);
        Mockito.when(dr.getIpAddr()).thenReturn(ip);
        Mockito.when(dr.getXferAddr()).thenReturn(ip + ":9000");
        Mockito.when(dr.getXferPort()).thenReturn(9000);
        Mockito.when(dr.getSoftwareVersion()).thenReturn("version1");
        dm.registerDatanode(dr);
        // get location and storage information
        locs[i] = dm.getDatanode(uuid);
        storageIDs[i] = "storageID-" + i;
    }
    // set first 2 locations as decomissioned
    locs[0].setDecommissioned();
    locs[1].setDecommissioned();
    // create LocatedBlock with above locations
    ExtendedBlock b = new ExtendedBlock("somePoolID", 1234);
    LocatedBlock block = new LocatedBlock(b, locs, storageIDs, storageTypes);
    List<LocatedBlock> blocks = new ArrayList<>();
    blocks.add(block);
    final String targetIp = locs[4].getIpAddr();
    // sort block locations
    dm.sortLocatedBlocks(targetIp, blocks);
    // check that storage IDs/types are aligned with datanode locs
    DatanodeInfo[] sortedLocs = block.getLocations();
    storageIDs = block.getStorageIDs();
    storageTypes = block.getStorageTypes();
    assertThat(sortedLocs.length, is(5));
    assertThat(storageIDs.length, is(5));
    assertThat(storageTypes.length, is(5));
    for (int i = 0; i < sortedLocs.length; i++) {
        assertThat(((DatanodeInfoWithStorage) sortedLocs[i]).getStorageID(), is(storageIDs[i]));
        assertThat(((DatanodeInfoWithStorage) sortedLocs[i]).getStorageType(), is(storageTypes[i]));
    }
    // Ensure the local node is first.
    assertThat(sortedLocs[0].getIpAddr(), is(targetIp));
    // Ensure the two decommissioned DNs were moved to the end.
    assertThat(sortedLocs[sortedLocs.length - 1].getAdminState(), is(DatanodeInfo.AdminStates.DECOMMISSIONED));
    assertThat(sortedLocs[sortedLocs.length - 2].getAdminState(), is(DatanodeInfo.AdminStates.DECOMMISSIONED));
}
Also used : Path(java.nio.file.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) StorageType(org.apache.hadoop.fs.StorageType) Configuration(org.apache.hadoop.conf.Configuration) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) ArrayList(java.util.ArrayList) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) URL(java.net.URL) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem)

Example 24 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestDatanodeManager method testNumVersionsCorrectAfterReregister.

/**
   * This test checks that if a node is re-registered with a new software
   * version after the heartbeat expiry interval but before the HeartbeatManager
   * has a chance to detect this and remove it, the node's version will still
   * be correctly decremented.
   */
@Test
public void testNumVersionsCorrectAfterReregister() throws IOException, InterruptedException {
    //Create the DatanodeManager which will be tested
    FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
    Mockito.when(fsn.hasWriteLock()).thenReturn(true);
    Configuration conf = new Configuration();
    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 0);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 10);
    DatanodeManager dm = mockDatanodeManager(fsn, conf);
    String storageID = "someStorageID1";
    String ip = "someIP" + storageID;
    // Register then reregister the same node but with a different version
    for (int i = 0; i <= 1; i++) {
        dm.registerDatanode(new DatanodeRegistration(new DatanodeID(ip, "", storageID, 9000, 0, 0, 0), null, null, "version" + i));
        if (i == 0) {
            Thread.sleep(25);
        }
    }
    //Verify DatanodeManager has the correct count
    Map<String, Integer> mapToCheck = dm.getDatanodesSoftwareVersions();
    assertNull("should be no more version0 nodes", mapToCheck.get("version0"));
    assertEquals("should be one version1 node", mapToCheck.get("version1").intValue(), 1);
}
Also used : DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) Configuration(org.apache.hadoop.conf.Configuration) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 25 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestDatanodeManager method testRemoveIncludedNode.

/**
   * Test whether removing a host from the includes list without adding it to
   * the excludes list will exclude it from data node reports.
   */
@Test
public void testRemoveIncludedNode() throws IOException {
    FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
    // Set the write lock so that the DatanodeManager can start
    Mockito.when(fsn.hasWriteLock()).thenReturn(true);
    DatanodeManager dm = mockDatanodeManager(fsn, new Configuration());
    HostFileManager hm = new HostFileManager();
    HostSet noNodes = new HostSet();
    HostSet oneNode = new HostSet();
    HostSet twoNodes = new HostSet();
    DatanodeRegistration dr1 = new DatanodeRegistration(new DatanodeID("127.0.0.1", "127.0.0.1", "someStorageID-123", 12345, 12345, 12345, 12345), new StorageInfo(HdfsServerConstants.NodeType.DATA_NODE), new ExportedBlockKeys(), "test");
    DatanodeRegistration dr2 = new DatanodeRegistration(new DatanodeID("127.0.0.1", "127.0.0.1", "someStorageID-234", 23456, 23456, 23456, 23456), new StorageInfo(HdfsServerConstants.NodeType.DATA_NODE), new ExportedBlockKeys(), "test");
    twoNodes.add(entry("127.0.0.1:12345"));
    twoNodes.add(entry("127.0.0.1:23456"));
    oneNode.add(entry("127.0.0.1:23456"));
    hm.refresh(twoNodes, noNodes);
    Whitebox.setInternalState(dm, "hostConfigManager", hm);
    // Register two data nodes to simulate them coming up.
    // We need to add two nodes, because if we have only one node, removing it
    // will cause the includes list to be empty, which means all hosts will be
    // allowed.
    dm.registerDatanode(dr1);
    dm.registerDatanode(dr2);
    // Make sure that both nodes are reported
    List<DatanodeDescriptor> both = dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.ALL);
    // Sort the list so that we know which one is which
    Collections.sort(both);
    Assert.assertEquals("Incorrect number of hosts reported", 2, both.size());
    Assert.assertEquals("Unexpected host or host in unexpected position", "127.0.0.1:12345", both.get(0).getInfoAddr());
    Assert.assertEquals("Unexpected host or host in unexpected position", "127.0.0.1:23456", both.get(1).getInfoAddr());
    // Remove one node from includes, but do not add it to excludes.
    hm.refresh(oneNode, noNodes);
    // Make sure that only one node is still reported
    List<DatanodeDescriptor> onlyOne = dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.ALL);
    Assert.assertEquals("Incorrect number of hosts reported", 1, onlyOne.size());
    Assert.assertEquals("Unexpected host reported", "127.0.0.1:23456", onlyOne.get(0).getInfoAddr());
    // Remove all nodes from includes
    hm.refresh(noNodes, noNodes);
    // Check that both nodes are reported again
    List<DatanodeDescriptor> bothAgain = dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.ALL);
    // Sort the list so that we know which one is which
    Collections.sort(bothAgain);
    Assert.assertEquals("Incorrect number of hosts reported", 2, bothAgain.size());
    Assert.assertEquals("Unexpected host or host in unexpected position", "127.0.0.1:12345", bothAgain.get(0).getInfoAddr());
    Assert.assertEquals("Unexpected host or host in unexpected position", "127.0.0.1:23456", bothAgain.get(1).getInfoAddr());
}
Also used : DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) Configuration(org.apache.hadoop.conf.Configuration) StorageInfo(org.apache.hadoop.hdfs.server.common.StorageInfo) ExportedBlockKeys(org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Aggregations

FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)77 Test (org.junit.Test)59 Path (org.apache.hadoop.fs.Path)51 FileSystem (org.apache.hadoop.fs.FileSystem)41 Configuration (org.apache.hadoop.conf.Configuration)37 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)27 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)25 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)23 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)19 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)14 ArrayList (java.util.ArrayList)12 DatanodeRegistration (org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration)12 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)9 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)7 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)7 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)6 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)6 File (java.io.File)5 IOException (java.io.IOException)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5