Search in sources :

Example 36 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class TestMaintenanceState method testWithNNAndDNRestart.

/**
   * Verify the following scenario.
   * a. Put a live node to maintenance => 1 maintenance, 2 live.
   * b. The maintenance node becomes dead => block map still has 1 maintenance,
   *    2 live.
   * c. Restart nn => block map only has 2 live => restore the 3 live.
   * d. Restart the maintenance dn => 1 maintenance, 3 live.
   * e. Take the node out of maintenance => over replication => 3 live.
   */
@Test(timeout = 360000)
public void testWithNNAndDNRestart() throws Exception {
    LOG.info("Starting testWithNNAndDNRestart");
    final int numNamenodes = 1;
    final int numDatanodes = 4;
    startCluster(numNamenodes, numDatanodes);
    final Path file = new Path("/testWithNNAndDNRestart.dat");
    final int replicas = 3;
    final FileSystem fileSys = getCluster().getFileSystem(0);
    FSNamesystem ns = getCluster().getNamesystem(0);
    writeFile(fileSys, file, replicas, 1);
    DatanodeInfo nodeOutofService = takeNodeOutofService(0, getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1, nodeOutofService));
    DFSClient client = getDfsClient(0);
    assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length);
    MiniDFSCluster.DataNodeProperties dnProp = getCluster().stopDataNode(nodeOutofService.getXferAddr());
    DFSTestUtil.waitForDatanodeState(getCluster(), nodeOutofService.getDatanodeUuid(), false, 20000);
    assertEquals("maintenance node shouldn't be alive", numDatanodes - 1, client.datanodeReport(DatanodeReportType.LIVE).length);
    // Dead maintenance node's blocks should remain in block map.
    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1, nodeOutofService));
    // restart nn, nn will restore 3 live replicas given it doesn't
    // know the maintenance node has the replica.
    getCluster().restartNameNode(0);
    ns = getCluster().getNamesystem(0);
    assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
    // restart dn, nn has 1 maintenance replica and 3 live replicas.
    getCluster().restartDataNode(dnProp, true);
    getCluster().waitActive();
    assertNull(checkWithRetry(ns, fileSys, file, replicas, nodeOutofService));
    // Put the node in service, a redundant replica should be removed.
    putNodeInService(0, nodeOutofService.getDatanodeUuid());
    assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
    cleanupFile(fileSys, file);
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) FileSystem(org.apache.hadoop.fs.FileSystem) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 37 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class TestMaintenanceState method testFileCloseAfterEnteringMaintenance.

@Test(timeout = 120000)
public void testFileCloseAfterEnteringMaintenance() throws Exception {
    LOG.info("Starting testFileCloseAfterEnteringMaintenance");
    int expirationInMs = 30 * 1000;
    int numDataNodes = 3;
    int numNameNodes = 1;
    getConf().setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, 2);
    startCluster(numNameNodes, numDataNodes);
    getCluster().waitActive();
    FSNamesystem fsn = getCluster().getNameNode().getNamesystem();
    List<String> hosts = new ArrayList<>();
    for (DataNode dn : getCluster().getDataNodes()) {
        hosts.add(dn.getDisplayName());
        putNodeInService(0, dn.getDatanodeUuid());
    }
    assertEquals(numDataNodes, fsn.getNumLiveDataNodes());
    Path openFile = new Path("/testClosingFileInMaintenance.dat");
    // Lets write 2 blocks of data to the openFile
    writeFile(getCluster().getFileSystem(), openFile, (short) 3);
    // Lets write some more data and keep the file open
    FSDataOutputStream fsDataOutputStream = getCluster().getFileSystem().append(openFile);
    byte[] bytes = new byte[1024];
    fsDataOutputStream.write(bytes);
    fsDataOutputStream.hsync();
    LocatedBlocks lbs = NameNodeAdapter.getBlockLocations(getCluster().getNameNode(0), openFile.toString(), 0, 3 * blockSize);
    DatanodeInfo[] dnInfos4LastBlock = lbs.getLastLocatedBlock().getLocations();
    // Request maintenance for DataNodes 1 and 2 which has the last block.
    takeNodeOutofService(0, Lists.newArrayList(dnInfos4LastBlock[0].getDatanodeUuid(), dnInfos4LastBlock[1].getDatanodeUuid()), Time.now() + expirationInMs, null, null, AdminStates.ENTERING_MAINTENANCE);
    // Closing the file should succeed even when the
    // last blocks' nodes are entering maintenance.
    fsDataOutputStream.close();
    cleanupFile(getCluster().getFileSystem(), openFile);
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 38 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class TestMaintenanceState method testExpectedReplication.

private void testExpectedReplication(int replicationFactor, int expectedReplicasInRead) throws IOException {
    setup();
    startCluster(1, 5);
    final Path file = new Path("/testExpectedReplication.dat");
    final FileSystem fileSys = getCluster().getFileSystem(0);
    final FSNamesystem ns = getCluster().getNamesystem(0);
    writeFile(fileSys, file, replicationFactor, 1);
    DatanodeInfo nodeOutofService = takeNodeOutofService(0, getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
    // The block should be replicated to another datanode to meet
    // expected replication count.
    assertNull(checkWithRetry(ns, fileSys, file, expectedReplicasInRead, nodeOutofService));
    cleanupFile(fileSys, file);
    teardown();
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) FileSystem(org.apache.hadoop.fs.FileSystem) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem)

Example 39 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class TestMaintenanceState method checkFile.

/*
  * Verify that the number of replicas are as expected for each block in
  * the given file.
  *
  * @return - null if no failure found, else an error message string.
  */
static String checkFile(FSNamesystem ns, FileSystem fileSys, Path name, int repl, DatanodeInfo expectedExcludedNode, DatanodeInfo expectedMaintenanceNode) throws IOException {
    // need a raw stream
    assertTrue("Not HDFS:" + fileSys.getUri(), fileSys instanceof DistributedFileSystem);
    HdfsDataInputStream dis = (HdfsDataInputStream) fileSys.open(name);
    BlockManager bm = ns.getBlockManager();
    Collection<LocatedBlock> dinfo = dis.getAllBlocks();
    String output;
    for (LocatedBlock blk : dinfo) {
        // for each block
        DatanodeInfo[] nodes = blk.getLocations();
        for (int j = 0; j < nodes.length; j++) {
            // for each replica
            if (expectedExcludedNode != null && nodes[j].equals(expectedExcludedNode)) {
                //excluded node must not be in LocatedBlock.
                output = "For block " + blk.getBlock() + " replica on " + nodes[j] + " found in LocatedBlock.";
                LOG.info(output);
                return output;
            } else {
                if (nodes[j].isInMaintenance()) {
                    //IN_MAINTENANCE node must not be in LocatedBlock.
                    output = "For block " + blk.getBlock() + " replica on " + nodes[j] + " which is in maintenance state.";
                    LOG.info(output);
                    return output;
                }
            }
        }
        if (repl != nodes.length) {
            output = "Wrong number of replicas for block " + blk.getBlock() + ": expected " + repl + ", got " + nodes.length + " ,";
            for (int j = 0; j < nodes.length; j++) {
                // for each replica
                output += nodes[j] + ",";
            }
            output += "pending block # " + ns.getPendingReplicationBlocks() + " ,";
            output += "under replicated # " + ns.getUnderReplicatedBlocks() + " ,";
            if (expectedExcludedNode != null) {
                output += "excluded node " + expectedExcludedNode;
            }
            LOG.info(output);
            return output;
        }
        // Verify it has the expected maintenance node
        Iterator<DatanodeStorageInfo> storageInfoIter = bm.getStorages(blk.getBlock().getLocalBlock()).iterator();
        List<DatanodeInfo> maintenanceNodes = new ArrayList<>();
        while (storageInfoIter.hasNext()) {
            DatanodeInfo node = storageInfoIter.next().getDatanodeDescriptor();
            if (node.isMaintenance()) {
                maintenanceNodes.add(node);
            }
        }
        if (expectedMaintenanceNode != null) {
            if (!maintenanceNodes.contains(expectedMaintenanceNode)) {
                output = "No maintenance replica on " + expectedMaintenanceNode;
                LOG.info(output);
                return output;
            }
        } else {
            if (maintenanceNodes.size() != 0) {
                output = "Has maintenance replica(s)";
                LOG.info(output);
                return output;
            }
        }
    }
    return null;
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) ArrayList(java.util.ArrayList) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) HdfsDataInputStream(org.apache.hadoop.hdfs.client.HdfsDataInputStream)

Example 40 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class TestMaintenanceState method testTransitionToDecommission.

/**
   * Transition from IN_MAINTENANCE to DECOMMISSIONED.
   */
@Test(timeout = 360000)
public void testTransitionToDecommission() throws IOException {
    LOG.info("Starting testTransitionToDecommission");
    final int numNamenodes = 1;
    final int numDatanodes = 4;
    startCluster(numNamenodes, numDatanodes);
    final Path file = new Path("testTransitionToDecommission.dat");
    final int replicas = 3;
    FileSystem fileSys = getCluster().getFileSystem(0);
    FSNamesystem ns = getCluster().getNamesystem(0);
    writeFile(fileSys, file, replicas, 1);
    DatanodeInfo nodeOutofService = takeNodeOutofService(0, getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
    DFSClient client = getDfsClient(0);
    assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length);
    // test 1, verify the replica in IN_MAINTENANCE state isn't in LocatedBlock
    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1, nodeOutofService));
    takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(), 0, null, AdminStates.DECOMMISSIONED);
    // test 2 after decommission has completed, the replication count is
    // replicas + 1 which includes the decommissioned node.
    assertNull(checkWithRetry(ns, fileSys, file, replicas + 1, null));
    // test 3, put the node in service, replication count should restore.
    putNodeInService(0, nodeOutofService.getDatanodeUuid());
    assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
    cleanupFile(fileSys, file);
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) FileSystem(org.apache.hadoop.fs.FileSystem) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Aggregations

DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)214 Test (org.junit.Test)103 Path (org.apache.hadoop.fs.Path)91 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)73 IOException (java.io.IOException)47 FileSystem (org.apache.hadoop.fs.FileSystem)44 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)43 ArrayList (java.util.ArrayList)39 Configuration (org.apache.hadoop.conf.Configuration)38 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)37 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)32 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)32 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)29 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)27 FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)25 InetSocketAddress (java.net.InetSocketAddress)20 LocatedStripedBlock (org.apache.hadoop.hdfs.protocol.LocatedStripedBlock)20 StorageType (org.apache.hadoop.fs.StorageType)18 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)14 DatanodeInfoBuilder (org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder)14