Examples with DFSOutputStream - org.apache.hadoop.hdfs.DFSOutputStream

Example 11 with DFSOutputStream

use of org.apache.hadoop.hdfs.DFSOutputStream in project hadoop by apache.

the class TestAddBlock method testAddBlockUC.

/**
   * Test adding new blocks but without closing the corresponding the file
   */
@Test
public void testAddBlockUC() throws Exception {
    DistributedFileSystem fs = cluster.getFileSystem();
    final Path file1 = new Path("/file1");
    DFSTestUtil.createFile(fs, file1, BLOCKSIZE - 1, REPLICATION, 0L);
    FSDataOutputStream out = null;
    try {
        // append files without closing the streams
        out = fs.append(file1);
        String appendContent = "appending-content";
        out.writeBytes(appendContent);
        ((DFSOutputStream) out.getWrappedStream()).hsync(EnumSet.of(SyncFlag.UPDATE_LENGTH));
        // restart NN
        cluster.restartNameNode(true);
        FSDirectory fsdir = cluster.getNamesystem().getFSDirectory();
        INodeFile fileNode = fsdir.getINode4Write(file1.toString()).asFile();
        BlockInfo[] fileBlocks = fileNode.getBlocks();
        assertEquals(2, fileBlocks.length);
        assertEquals(BLOCKSIZE, fileBlocks[0].getNumBytes());
        assertEquals(BlockUCState.COMPLETE, fileBlocks[0].getBlockUCState());
        assertEquals(appendContent.length() - 1, fileBlocks[1].getNumBytes());
        assertEquals(BlockUCState.UNDER_CONSTRUCTION, fileBlocks[1].getBlockUCState());
    } finally {
        if (out != null) {
            out.close();
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DFSOutputStream(org.apache.hadoop.hdfs.DFSOutputStream) Test(org.junit.Test)

Example 12 with DFSOutputStream

use of org.apache.hadoop.hdfs.DFSOutputStream in project hadoop by apache.

the class TestNamenodeCapacityReport method testXceiverCountInternal.

public void testXceiverCountInternal(int minMaintenanceR) throws Exception {
    Configuration conf = new HdfsConfiguration();
    // retry one time, if close fails
    conf.setInt(HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_RETRIES_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY, minMaintenanceR);
    MiniDFSCluster cluster = null;
    final int nodes = 8;
    final int fileCount = 5;
    final short fileRepl = 3;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(nodes).build();
        cluster.waitActive();
        final FSNamesystem namesystem = cluster.getNamesystem();
        final DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
        List<DataNode> datanodes = cluster.getDataNodes();
        final DistributedFileSystem fs = cluster.getFileSystem();
        // trigger heartbeats in case not already sent
        triggerHeartbeats(datanodes);
        // check that all nodes are live and in service
        // xceiver server adds 1 to load
        int expectedTotalLoad = nodes;
        int expectedInServiceNodes = nodes;
        int expectedInServiceLoad = nodes;
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // Ensure counts are accurate.
        for (int i = 0; i < nodes / 2; i++) {
            DataNode dn = datanodes.get(i);
            DatanodeDescriptor dnd = dnm.getDatanode(dn.getDatanodeId());
            dn.shutdown();
            DFSTestUtil.setDatanodeDead(dnd);
            BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
            //Admin operations on dead nodes won't impact nodesInService metrics.
            startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            expectedInServiceNodes--;
            assertEquals(expectedInServiceNodes, namesystem.getNumLiveDataNodes());
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
            stopDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
        }
        // restart the nodes to verify that counts are correct after
        // node re-registration 
        cluster.restartDataNodes();
        cluster.waitActive();
        datanodes = cluster.getDataNodes();
        expectedInServiceNodes = nodes;
        assertEquals(nodes, datanodes.size());
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // create streams and hsync to force datastreamers to start
        DFSOutputStream[] streams = new DFSOutputStream[fileCount];
        for (int i = 0; i < fileCount; i++) {
            streams[i] = (DFSOutputStream) fs.create(new Path("/f" + i), fileRepl).getWrappedStream();
            streams[i].write("1".getBytes());
            streams[i].hsync();
            // the load for writers is 2 because both the write xceiver & packet
            // responder threads are counted in the load
            expectedTotalLoad += 2 * fileRepl;
            expectedInServiceLoad += 2 * fileRepl;
        }
        // force nodes to send load update
        triggerHeartbeats(datanodes);
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // expected load, trigger heartbeat to force load update.
        for (int i = 0; i < fileRepl; i++) {
            expectedInServiceNodes--;
            DatanodeDescriptor dnd = dnm.getDatanode(datanodes.get(i).getDatanodeId());
            expectedInServiceLoad -= dnd.getXceiverCount();
            startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            DataNodeTestUtils.triggerHeartbeat(datanodes.get(i));
            Thread.sleep(100);
            checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        }
        // load based on whether the nodes in the pipeline are decomm
        for (int i = 0; i < fileCount; i++) {
            int adminOps = 0;
            for (DatanodeInfo dni : streams[i].getPipeline()) {
                DatanodeDescriptor dnd = dnm.getDatanode(dni);
                expectedTotalLoad -= 2;
                if (!dnd.isInService()) {
                    adminOps++;
                } else {
                    expectedInServiceLoad -= 2;
                }
            }
            try {
                streams[i].close();
            } catch (IOException ioe) {
                // bug for now
                if (adminOps < fileRepl) {
                    throw ioe;
                }
            }
            triggerHeartbeats(datanodes);
            // verify node count and loads 
            checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        }
        // shutdown each node, verify node counts based on admin state
        for (int i = 0; i < nodes; i++) {
            DataNode dn = datanodes.get(i);
            dn.shutdown();
            // force it to appear dead so live count decreases
            DatanodeDescriptor dnDesc = dnm.getDatanode(dn.getDatanodeId());
            DFSTestUtil.setDatanodeDead(dnDesc);
            BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
            assertEquals(nodes - 1 - i, namesystem.getNumLiveDataNodes());
            // first few nodes are already out of service
            if (i >= fileRepl) {
                expectedInServiceNodes--;
            }
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
            // live nodes always report load of 1.  no nodes is load 0
            double expectedXceiverAvg = (i == nodes - 1) ? 0.0 : 1.0;
            assertEquals((double) expectedXceiverAvg, getInServiceXceiverAverage(namesystem), EPSILON);
        }
        // final sanity check
        checkClusterHealth(0, namesystem, 0.0, 0, 0.0);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) DFSOutputStream(org.apache.hadoop.hdfs.DFSOutputStream)

Example 13 with DFSOutputStream

use of org.apache.hadoop.hdfs.DFSOutputStream in project hadoop by apache.

the class TestNamenodeCapacityReport method testVolumeSize.

/**
   * The following test first creates a file.
   * It verifies the block information from a datanode.
   * Then, it updates the block with new information and verifies again. 
   */
@Test
public void testVolumeSize() throws Exception {
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = null;
    // Set aside fifth of the total capacity as reserved
    long reserved = 10000;
    conf.setLong(DFSConfigKeys.DFS_DATANODE_DU_RESERVED_KEY, reserved);
    try {
        cluster = new MiniDFSCluster.Builder(conf).build();
        cluster.waitActive();
        final FSNamesystem namesystem = cluster.getNamesystem();
        final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
        // Ensure the data reported for each data node is right
        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
        dm.fetchDatanodes(live, dead, false);
        assertTrue(live.size() == 1);
        long used, remaining, configCapacity, nonDFSUsed, bpUsed;
        float percentUsed, percentRemaining, percentBpUsed;
        for (final DatanodeDescriptor datanode : live) {
            used = datanode.getDfsUsed();
            remaining = datanode.getRemaining();
            nonDFSUsed = datanode.getNonDfsUsed();
            configCapacity = datanode.getCapacity();
            percentUsed = datanode.getDfsUsedPercent();
            percentRemaining = datanode.getRemainingPercent();
            bpUsed = datanode.getBlockPoolUsed();
            percentBpUsed = datanode.getBlockPoolUsedPercent();
            LOG.info("Datanode configCapacity " + configCapacity + " used " + used + " non DFS used " + nonDFSUsed + " remaining " + remaining + " perentUsed " + percentUsed + " percentRemaining " + percentRemaining);
            // There will be 5% space reserved in ext filesystem which is not
            // considered.
            assertTrue(configCapacity >= (used + remaining + nonDFSUsed));
            assertTrue(percentUsed == DFSUtilClient.getPercentUsed(used, configCapacity));
            assertTrue(percentRemaining == DFSUtilClient.getPercentRemaining(remaining, configCapacity));
            assertTrue(percentBpUsed == DFSUtilClient.getPercentUsed(bpUsed, configCapacity));
        }
        //
        // Currently two data directories are created by the data node
        // in the MiniDFSCluster. This results in each data directory having
        // capacity equals to the disk capacity of the data directory.
        // Hence the capacity reported by the data node is twice the disk space
        // the disk capacity
        //
        // So multiply the disk capacity and reserved space by two 
        // for accommodating it
        //
        final FsDatasetTestUtils utils = cluster.getFsDatasetTestUtils(0);
        int numOfDataDirs = utils.getDefaultNumOfDataDirs();
        long diskCapacity = numOfDataDirs * utils.getRawCapacity();
        reserved *= numOfDataDirs;
        configCapacity = namesystem.getCapacityTotal();
        used = namesystem.getCapacityUsed();
        nonDFSUsed = namesystem.getNonDfsUsedSpace();
        remaining = namesystem.getCapacityRemaining();
        percentUsed = namesystem.getPercentUsed();
        percentRemaining = namesystem.getPercentRemaining();
        bpUsed = namesystem.getBlockPoolUsedSpace();
        percentBpUsed = namesystem.getPercentBlockPoolUsed();
        LOG.info("Data node directory " + cluster.getDataDirectory());
        LOG.info("Name node diskCapacity " + diskCapacity + " configCapacity " + configCapacity + " reserved " + reserved + " used " + used + " remaining " + remaining + " nonDFSUsed " + nonDFSUsed + " remaining " + remaining + " percentUsed " + percentUsed + " percentRemaining " + percentRemaining + " bpUsed " + bpUsed + " percentBpUsed " + percentBpUsed);
        // Ensure new total capacity reported excludes the reserved space
        assertTrue(configCapacity == diskCapacity - reserved);
        // Ensure new total capacity reported excludes the reserved space
        // There will be 5% space reserved in ext filesystem which is not
        // considered.
        assertTrue(configCapacity >= (used + remaining + nonDFSUsed));
        // Ensure percent used is calculated based on used and present capacity
        assertTrue(percentUsed == DFSUtilClient.getPercentUsed(used, configCapacity));
        // Ensure percent used is calculated based on used and present capacity
        assertTrue(percentBpUsed == DFSUtilClient.getPercentUsed(bpUsed, configCapacity));
        // Ensure percent used is calculated based on used and present capacity
        assertTrue(percentRemaining == ((float) remaining * 100.0f) / (float) configCapacity);
        //Adding testcase for non-dfs used where we need to consider
        // reserved replica also.
        final int fileCount = 5;
        final DistributedFileSystem fs = cluster.getFileSystem();
        // create streams and hsync to force datastreamers to start
        DFSOutputStream[] streams = new DFSOutputStream[fileCount];
        for (int i = 0; i < fileCount; i++) {
            streams[i] = (DFSOutputStream) fs.create(new Path("/f" + i)).getWrappedStream();
            streams[i].write("1".getBytes());
            streams[i].hsync();
        }
        triggerHeartbeats(cluster.getDataNodes());
        assertTrue(configCapacity > (namesystem.getCapacityUsed() + namesystem.getCapacityRemaining() + namesystem.getNonDfsUsedSpace()));
        // There is a chance that nonDFS usage might have slightly due to
        // testlogs, So assume 1MB other files used within this gap
        assertTrue((namesystem.getCapacityUsed() + namesystem.getCapacityRemaining() + namesystem.getNonDfsUsedSpace() + fileCount * fs.getDefaultBlockSize()) - configCapacity < 1 * 1024);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) FsDatasetTestUtils(org.apache.hadoop.hdfs.server.datanode.FsDatasetTestUtils) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) ArrayList(java.util.ArrayList) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DFSOutputStream(org.apache.hadoop.hdfs.DFSOutputStream) Test(org.junit.Test)

Example 14 with DFSOutputStream

use of org.apache.hadoop.hdfs.DFSOutputStream in project hadoop by apache.

the class TestLeaseRenewer method testManyDfsClientsWhereSomeNotOpen.

/**
   * Regression test for HDFS-2810. In this bug, the LeaseRenewer has handles
   * to several DFSClients with the same name, the first of which has no files
   * open. Previously, this was causing the lease to not get renewed.
   */
@Test
public void testManyDfsClientsWhereSomeNotOpen() throws Exception {
    // First DFSClient has no files open so doesn't renew leases.
    final DFSClient mockClient1 = createMockClient();
    Mockito.doReturn(false).when(mockClient1).renewLease();
    assertSame(renewer, LeaseRenewer.getInstance(FAKE_AUTHORITY, FAKE_UGI_A, mockClient1));
    // Set up a file so that we start renewing our lease.
    DFSOutputStream mockStream1 = Mockito.mock(DFSOutputStream.class);
    long fileId = 456L;
    renewer.put(fileId, mockStream1, mockClient1);
    // Second DFSClient does renew lease
    final DFSClient mockClient2 = createMockClient();
    Mockito.doReturn(true).when(mockClient2).renewLease();
    assertSame(renewer, LeaseRenewer.getInstance(FAKE_AUTHORITY, FAKE_UGI_A, mockClient2));
    // Set up a file so that we start renewing our lease.
    DFSOutputStream mockStream2 = Mockito.mock(DFSOutputStream.class);
    renewer.put(fileId, mockStream2, mockClient2);
    // Wait for lease to get renewed
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            try {
                Mockito.verify(mockClient1, Mockito.atLeastOnce()).renewLease();
                Mockito.verify(mockClient2, Mockito.atLeastOnce()).renewLease();
                return true;
            } catch (AssertionError err) {
                LeaseRenewer.LOG.warn("Not yet satisfied", err);
                return false;
            } catch (IOException e) {
                // should not throw!
                throw new RuntimeException(e);
            }
        }
    }, 100, 10000);
    renewer.closeFile(fileId, mockClient1);
    renewer.closeFile(fileId, mockClient2);
}

Also used : DFSClient(org.apache.hadoop.hdfs.DFSClient) IOException(java.io.IOException) DFSOutputStream(org.apache.hadoop.hdfs.DFSOutputStream) Test(org.junit.Test)

Example 15 with DFSOutputStream

use of org.apache.hadoop.hdfs.DFSOutputStream in project hadoop by apache.

the class TestLeaseRenewer method testThreadName.

@Test
public void testThreadName() throws Exception {
    DFSOutputStream mockStream = Mockito.mock(DFSOutputStream.class);
    long fileId = 789L;
    Assert.assertFalse("Renewer not initially running", renewer.isRunning());
    // Pretend to open a file
    renewer.put(fileId, mockStream, MOCK_DFSCLIENT);
    Assert.assertTrue("Renewer should have started running", renewer.isRunning());
    // Check the thread name is reasonable
    String threadName = renewer.getDaemonName();
    Assert.assertEquals("LeaseRenewer:myuser@hdfs://nn1/", threadName);
    // Pretend to close the file
    renewer.closeFile(fileId, MOCK_DFSCLIENT);
    renewer.setEmptyTime(Time.monotonicNow());
    // Should stop the renewer running within a few seconds
    long failTime = Time.monotonicNow() + 5000;
    while (renewer.isRunning() && Time.monotonicNow() < failTime) {
        Thread.sleep(50);
    }
    Assert.assertFalse(renewer.isRunning());
}

Also used : DFSOutputStream(org.apache.hadoop.hdfs.DFSOutputStream) Test(org.junit.Test)

Aggregations

DFSOutputStream (org.apache.hadoop.hdfs.DFSOutputStream)20 Test (org.junit.Test)16 Path (org.apache.hadoop.fs.Path)15 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)12 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)9 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)9 FileSystem (org.apache.hadoop.fs.FileSystem)6 Configuration (org.apache.hadoop.conf.Configuration)5 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 IOException (java.io.IOException)3 DFSClient (org.apache.hadoop.hdfs.DFSClient)3 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)3 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)3 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)2 BlockInfo (org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo)2 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)2 DatanodeManager (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager)2 NamenodeProtocols (org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols)2 File (java.io.File)1