Search in sources :

Example 81 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.

the class TestNamenodeCapacityReport method testXceiverCountInternal.

public void testXceiverCountInternal(int minMaintenanceR) throws Exception {
    Configuration conf = new HdfsConfiguration();
    // retry one time, if close fails
    conf.setInt(HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_RETRIES_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY, minMaintenanceR);
    MiniDFSCluster cluster = null;
    final int nodes = 8;
    final int fileCount = 5;
    final short fileRepl = 3;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(nodes).build();
        cluster.waitActive();
        final FSNamesystem namesystem = cluster.getNamesystem();
        final DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
        List<DataNode> datanodes = cluster.getDataNodes();
        final DistributedFileSystem fs = cluster.getFileSystem();
        // trigger heartbeats in case not already sent
        triggerHeartbeats(datanodes);
        // check that all nodes are live and in service
        // xceiver server adds 1 to load
        int expectedTotalLoad = nodes;
        int expectedInServiceNodes = nodes;
        int expectedInServiceLoad = nodes;
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // Ensure counts are accurate.
        for (int i = 0; i < nodes / 2; i++) {
            DataNode dn = datanodes.get(i);
            DatanodeDescriptor dnd = dnm.getDatanode(dn.getDatanodeId());
            dn.shutdown();
            DFSTestUtil.setDatanodeDead(dnd);
            BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
            //Admin operations on dead nodes won't impact nodesInService metrics.
            startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            expectedInServiceNodes--;
            assertEquals(expectedInServiceNodes, namesystem.getNumLiveDataNodes());
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
            stopDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
        }
        // restart the nodes to verify that counts are correct after
        // node re-registration 
        cluster.restartDataNodes();
        cluster.waitActive();
        datanodes = cluster.getDataNodes();
        expectedInServiceNodes = nodes;
        assertEquals(nodes, datanodes.size());
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // create streams and hsync to force datastreamers to start
        DFSOutputStream[] streams = new DFSOutputStream[fileCount];
        for (int i = 0; i < fileCount; i++) {
            streams[i] = (DFSOutputStream) fs.create(new Path("/f" + i), fileRepl).getWrappedStream();
            streams[i].write("1".getBytes());
            streams[i].hsync();
            // the load for writers is 2 because both the write xceiver & packet
            // responder threads are counted in the load
            expectedTotalLoad += 2 * fileRepl;
            expectedInServiceLoad += 2 * fileRepl;
        }
        // force nodes to send load update
        triggerHeartbeats(datanodes);
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // expected load, trigger heartbeat to force load update.
        for (int i = 0; i < fileRepl; i++) {
            expectedInServiceNodes--;
            DatanodeDescriptor dnd = dnm.getDatanode(datanodes.get(i).getDatanodeId());
            expectedInServiceLoad -= dnd.getXceiverCount();
            startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            DataNodeTestUtils.triggerHeartbeat(datanodes.get(i));
            Thread.sleep(100);
            checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        }
        // load based on whether the nodes in the pipeline are decomm
        for (int i = 0; i < fileCount; i++) {
            int adminOps = 0;
            for (DatanodeInfo dni : streams[i].getPipeline()) {
                DatanodeDescriptor dnd = dnm.getDatanode(dni);
                expectedTotalLoad -= 2;
                if (!dnd.isInService()) {
                    adminOps++;
                } else {
                    expectedInServiceLoad -= 2;
                }
            }
            try {
                streams[i].close();
            } catch (IOException ioe) {
                // bug for now
                if (adminOps < fileRepl) {
                    throw ioe;
                }
            }
            triggerHeartbeats(datanodes);
            // verify node count and loads 
            checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        }
        // shutdown each node, verify node counts based on admin state
        for (int i = 0; i < nodes; i++) {
            DataNode dn = datanodes.get(i);
            dn.shutdown();
            // force it to appear dead so live count decreases
            DatanodeDescriptor dnDesc = dnm.getDatanode(dn.getDatanodeId());
            DFSTestUtil.setDatanodeDead(dnDesc);
            BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
            assertEquals(nodes - 1 - i, namesystem.getNumLiveDataNodes());
            // first few nodes are already out of service
            if (i >= fileRepl) {
                expectedInServiceNodes--;
            }
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
            // live nodes always report load of 1.  no nodes is load 0
            double expectedXceiverAvg = (i == nodes - 1) ? 0.0 : 1.0;
            assertEquals((double) expectedXceiverAvg, getInServiceXceiverAverage(namesystem), EPSILON);
        }
        // final sanity check
        checkClusterHealth(0, namesystem, 0.0, 0, 0.0);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) DFSOutputStream(org.apache.hadoop.hdfs.DFSOutputStream)

Example 82 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hbase by apache.

the class TestFanOutOneBlockAsyncDFSOutput method testConnectToDatanodeFailed.

@Test
public void testConnectToDatanodeFailed() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InterruptedException, NoSuchFieldException {
    Field xceiverServerDaemonField = DataNode.class.getDeclaredField("dataXceiverServer");
    xceiverServerDaemonField.setAccessible(true);
    Class<?> xceiverServerClass = Class.forName("org.apache.hadoop.hdfs.server.datanode.DataXceiverServer");
    Method numPeersMethod = xceiverServerClass.getDeclaredMethod("getNumPeers");
    numPeersMethod.setAccessible(true);
    // make one datanode broken
    TEST_UTIL.getDFSCluster().getDataNodes().get(0).shutdownDatanode(true);
    try {
        Path f = new Path("/test");
        EventLoop eventLoop = EVENT_LOOP_GROUP.next();
        try {
            FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS, f, true, false, (short) 3, FS.getDefaultBlockSize(), eventLoop);
            fail("should fail with connection error");
        } catch (IOException e) {
            LOG.info("expected exception caught", e);
        }
        for (DataNode dn : TEST_UTIL.getDFSCluster().getDataNodes()) {
            Daemon daemon = (Daemon) xceiverServerDaemonField.get(dn);
            assertEquals(0, numPeersMethod.invoke(daemon.getRunnable()));
        }
    } finally {
        TEST_UTIL.getDFSCluster().restartDataNode(0);
        ensureAllDatanodeAlive();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Field(java.lang.reflect.Field) EventLoop(io.netty.channel.EventLoop) Daemon(org.apache.hadoop.util.Daemon) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Method(java.lang.reflect.Method) IOException(java.io.IOException) Test(org.junit.Test)

Example 83 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hbase by apache.

the class TestLogRolling method testLogRollOnDatanodeDeath.

/**
   * Tests that logs are rolled upon detecting datanode death Requires an HDFS jar with HDFS-826 &
   * syncFs() support (HDFS-200)
   */
@Test
public void testLogRollOnDatanodeDeath() throws Exception {
    TEST_UTIL.ensureSomeRegionServersAvailable(2);
    assertTrue("This test requires WAL file replication set to 2.", fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()) == 2);
    LOG.info("Replication=" + fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
    this.server = cluster.getRegionServer(0);
    // Create the test table and open it
    HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(getName()));
    desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
    admin.createTable(desc);
    Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
    server = TEST_UTIL.getRSForFirstRegionInTable(desc.getTableName());
    HRegionInfo region = server.getOnlineRegions(desc.getTableName()).get(0).getRegionInfo();
    final FSHLog log = (FSHLog) server.getWAL(region);
    final AtomicBoolean lowReplicationHookCalled = new AtomicBoolean(false);
    log.registerWALActionsListener(new WALActionsListener.Base() {

        @Override
        public void logRollRequested(boolean lowReplication) {
            if (lowReplication) {
                lowReplicationHookCalled.lazySet(true);
            }
        }
    });
    // add up the datanode count, to ensure proper replication when we kill 1
    // This function is synchronous; when it returns, the dfs cluster is active
    // We start 3 servers and then stop 2 to avoid a directory naming conflict
    // when we stop/start a namenode later, as mentioned in HBASE-5163
    List<DataNode> existingNodes = dfsCluster.getDataNodes();
    int numDataNodes = 3;
    dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), numDataNodes, true, null, null);
    List<DataNode> allNodes = dfsCluster.getDataNodes();
    for (int i = allNodes.size() - 1; i >= 0; i--) {
        if (existingNodes.contains(allNodes.get(i))) {
            dfsCluster.stopDataNode(i);
        }
    }
    assertTrue("DataNodes " + dfsCluster.getDataNodes().size() + " default replication " + fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()), dfsCluster.getDataNodes().size() >= fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()) + 1);
    writeData(table, 2);
    long curTime = System.currentTimeMillis();
    LOG.info("log.getCurrentFileName(): " + log.getCurrentFileName());
    long oldFilenum = AbstractFSWALProvider.extractFileNumFromWAL(log);
    assertTrue("Log should have a timestamp older than now", curTime > oldFilenum && oldFilenum != -1);
    assertTrue("The log shouldn't have rolled yet", oldFilenum == AbstractFSWALProvider.extractFileNumFromWAL(log));
    final DatanodeInfo[] pipeline = log.getPipeline();
    assertTrue(pipeline.length == fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
    // kill a datanode in the pipeline to force a log roll on the next sync()
    // This function is synchronous, when it returns the node is killed.
    assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
    // this write should succeed, but trigger a log roll
    writeData(table, 2);
    long newFilenum = AbstractFSWALProvider.extractFileNumFromWAL(log);
    assertTrue("Missing datanode should've triggered a log roll", newFilenum > oldFilenum && newFilenum > curTime);
    assertTrue("The log rolling hook should have been called with the low replication flag", lowReplicationHookCalled.get());
    // write some more log data (this should use a new hdfs_out)
    writeData(table, 3);
    assertTrue("The log should not roll again.", AbstractFSWALProvider.extractFileNumFromWAL(log) == newFilenum);
    // kill another datanode in the pipeline, so the replicas will be lower than
    // the configured value 2.
    assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
    batchWriteAndWait(table, log, 3, false, 14000);
    int replication = log.getLogReplication();
    assertTrue("LowReplication Roller should've been disabled, current replication=" + replication, !log.isLowReplicationRollEnabled());
    dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
    // Force roll writer. The new log file will have the default replications,
    // and the LowReplication Roller will be enabled.
    log.rollWriter(true);
    batchWriteAndWait(table, log, 13, true, 10000);
    replication = log.getLogReplication();
    assertTrue("New log file should have the default replication instead of " + replication, replication == fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
    assertTrue("LowReplication Roller should've been enabled", log.isLowReplicationRollEnabled());
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) Table(org.apache.hadoop.hbase.client.Table) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Test(org.junit.Test)

Example 84 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hbase by apache.

the class TestRegionFavoredNodes method testFavoredNodes.

@Test
public void testFavoredNodes() throws Exception {
    Assume.assumeTrue(createWithFavoredNode != null);
    // Get the addresses of the datanodes in the cluster.
    InetSocketAddress[] nodes = new InetSocketAddress[REGION_SERVERS];
    List<DataNode> datanodes = TEST_UTIL.getDFSCluster().getDataNodes();
    Method selfAddress;
    try {
        selfAddress = DataNode.class.getMethod("getSelfAddr");
    } catch (NoSuchMethodException ne) {
        selfAddress = DataNode.class.getMethod("getXferAddress");
    }
    for (int i = 0; i < REGION_SERVERS; i++) {
        nodes[i] = (InetSocketAddress) selfAddress.invoke(datanodes.get(i));
    }
    String[] nodeNames = new String[REGION_SERVERS];
    for (int i = 0; i < REGION_SERVERS; i++) {
        nodeNames[i] = nodes[i].getAddress().getHostAddress() + ":" + nodes[i].getPort();
    }
    // them as favored nodes through the region.
    for (int i = 0; i < REGION_SERVERS; i++) {
        HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(i);
        List<Region> regions = server.getOnlineRegions(TABLE_NAME);
        for (Region region : regions) {
            List<org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName> favoredNodes = new ArrayList<>(3);
            String encodedRegionName = region.getRegionInfo().getEncodedName();
            for (int j = 0; j < FAVORED_NODES_NUM; j++) {
                org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName.Builder b = org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName.newBuilder();
                b.setHostName(nodes[(i + j) % REGION_SERVERS].getAddress().getHostAddress());
                b.setPort(nodes[(i + j) % REGION_SERVERS].getPort());
                b.setStartCode(-1);
                favoredNodes.add(b.build());
            }
            server.updateRegionFavoredNodesMapping(encodedRegionName, favoredNodes);
        }
    }
    // get multiple files for each region.
    for (int i = 0; i < FLUSHES; i++) {
        TEST_UTIL.loadTable(table, COLUMN_FAMILY, false);
        TEST_UTIL.flush();
    }
    // they are consistent with the favored nodes for that region.
    for (int i = 0; i < REGION_SERVERS; i++) {
        HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(i);
        List<Region> regions = server.getOnlineRegions(TABLE_NAME);
        for (Region region : regions) {
            List<String> files = region.getStoreFileList(new byte[][] { COLUMN_FAMILY });
            for (String file : files) {
                FileStatus status = TEST_UTIL.getDFSCluster().getFileSystem().getFileStatus(new Path(new URI(file).getPath()));
                BlockLocation[] lbks = ((DistributedFileSystem) TEST_UTIL.getDFSCluster().getFileSystem()).getFileBlockLocations(status, 0, Long.MAX_VALUE);
                for (BlockLocation lbk : lbks) {
                    locations: for (String info : lbk.getNames()) {
                        for (int j = 0; j < FAVORED_NODES_NUM; j++) {
                            if (info.equals(nodeNames[(i + j) % REGION_SERVERS])) {
                                continue locations;
                            }
                        }
                        // This block was at a location that was not a favored location.
                        fail("Block location " + info + " not a favored node");
                    }
                }
            }
        }
    }
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) BlockLocation(org.apache.hadoop.fs.BlockLocation) URI(java.net.URI) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Path(org.apache.hadoop.fs.Path) Method(java.lang.reflect.Method) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Test(org.junit.Test)

Example 85 with DataNode

use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hbase by apache.

the class TestBlockReorder method getHostName.

/**
   * Allow to get the hostname, using getHostName (hadoop 1) or getDisplayName (hadoop 2)
   */
private String getHostName(DataNode dn) throws InvocationTargetException, IllegalAccessException {
    Method m;
    try {
        m = DataNode.class.getMethod("getDisplayName");
    } catch (NoSuchMethodException e) {
        try {
            m = DataNode.class.getMethod("getHostName");
        } catch (NoSuchMethodException e1) {
            throw new RuntimeException(e1);
        }
    }
    String res = (String) m.invoke(dn);
    if (res.contains(":")) {
        return res.split(":")[0];
    } else {
        return res;
    }
}
Also used : DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) Method(java.lang.reflect.Method)

Aggregations

DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)165 Test (org.junit.Test)110 Path (org.apache.hadoop.fs.Path)78 Configuration (org.apache.hadoop.conf.Configuration)60 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)47 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)37 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)37 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)35 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)29 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)28 FileSystem (org.apache.hadoop.fs.FileSystem)27 IOException (java.io.IOException)24 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)20 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)20 ArrayList (java.util.ArrayList)17 DiskBalancerDataNode (org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerDataNode)17 File (java.io.File)15 FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)14 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)13 FsDatasetSpi (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi)12