Examples with FSDataInputStream - org.apache.hadoop.fs.FSDataInputStream

Example 71 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestTracingShortCircuitLocalRead method testShortCircuitTraceHooks.

@Test
public void testShortCircuitTraceHooks() throws IOException {
    assumeTrue(NativeCodeLoader.isNativeCodeLoaded());
    assumeNotWindows();
    conf = new Configuration();
    conf.set(TraceUtils.DEFAULT_HADOOP_TRACE_PREFIX + Tracer.SPAN_RECEIVER_CLASSES_KEY, SetSpanReceiver.class.getName());
    conf.set(TraceUtils.DEFAULT_HADOOP_TRACE_PREFIX + Tracer.SAMPLER_CLASSES_KEY, "AlwaysSampler");
    conf.setLong("dfs.blocksize", 100 * 1024);
    conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, true);
    conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.SKIP_CHECKSUM_KEY, false);
    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, new File(sockDir.getDir(), "testShortCircuitTraceHooks._PORT.sock").getAbsolutePath());
    conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C");
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    dfs = cluster.getFileSystem();
    try {
        DFSTestUtil.createFile(dfs, TEST_PATH, TEST_LENGTH, (short) 1, 5678L);
        TraceScope ts = FsTracer.get(conf).newScope("testShortCircuitTraceHooks");
        FSDataInputStream stream = dfs.open(TEST_PATH);
        byte[] buf = new byte[TEST_LENGTH];
        IOUtils.readFully(stream, buf, 0, TEST_LENGTH);
        stream.close();
        ts.close();
        String[] expectedSpanNames = { "OpRequestShortCircuitAccessProto", "ShortCircuitShmRequestProto" };
        SetSpanReceiver.assertSpanNamesFound(expectedSpanNames);
    } finally {
        dfs.close();
        cluster.shutdown();
    }
}

Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) TraceScope(org.apache.htrace.core.TraceScope) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) File(java.io.File) Test(org.junit.Test)

Example 72 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class JobHistoryCopyService method getPreviousJobHistoryFileStream.

public static FSDataInputStream getPreviousJobHistoryFileStream(Configuration conf, ApplicationAttemptId applicationAttemptId) throws IOException {
    FSDataInputStream in = null;
    Path historyFile = null;
    String jobId = TypeConverter.fromYarn(applicationAttemptId.getApplicationId()).toString();
    String jobhistoryDir = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf, jobId);
    Path histDirPath = FileContext.getFileContext(conf).makeQualified(new Path(jobhistoryDir));
    FileContext fc = FileContext.getFileContext(histDirPath.toUri(), conf);
    // read the previous history file
    historyFile = fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(histDirPath, jobId, (applicationAttemptId.getAttemptId() - 1)));
    LOG.info("History file is at " + historyFile);
    in = fc.open(historyFile);
    return in;
}

Also used : Path(org.apache.hadoop.fs.Path) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileContext(org.apache.hadoop.fs.FileContext)

Example 73 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestDFSClientRetries method namenodeRestartTest.

public static void namenodeRestartTest(final Configuration conf, final boolean isWebHDFS) throws Exception {
    GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL);
    final List<Exception> exceptions = new ArrayList<Exception>();
    final Path dir = new Path("/testNamenodeRestart");
    if (isWebHDFS) {
        conf.setBoolean(HdfsClientConfigKeys.HttpClient.RETRY_POLICY_ENABLED_KEY, true);
    } else {
        conf.setBoolean(HdfsClientConfigKeys.Retry.POLICY_ENABLED_KEY, true);
    }
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1);
    conf.setInt(MiniDFSCluster.DFS_NAMENODE_SAFEMODE_EXTENSION_TESTING_KEY, 5000);
    final short numDatanodes = 3;
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes).build();
    try {
        cluster.waitActive();
        final DistributedFileSystem dfs = cluster.getFileSystem();
        final FileSystem fs = isWebHDFS ? WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsConstants.WEBHDFS_SCHEME) : dfs;
        final URI uri = dfs.getUri();
        assertTrue(HdfsUtils.isHealthy(uri));
        //create a file
        final long length = 1L << 20;
        final Path file1 = new Path(dir, "foo");
        DFSTestUtil.createFile(fs, file1, length, numDatanodes, 20120406L);
        //get file status
        final FileStatus s1 = fs.getFileStatus(file1);
        assertEquals(length, s1.getLen());
        //create file4, write some data but not close
        final Path file4 = new Path(dir, "file4");
        final FSDataOutputStream out4 = fs.create(file4, false, 4096, fs.getDefaultReplication(file4), 1024L, null);
        final byte[] bytes = new byte[1000];
        new Random().nextBytes(bytes);
        out4.write(bytes);
        out4.write(bytes);
        if (isWebHDFS) {
            // WebHDFS does not support hflush. To avoid DataNode communicating with
            // NN while we're shutting down NN, we call out4.close() to finish
            // writing the data
            out4.close();
        } else {
            out4.hflush();
        }
        //shutdown namenode
        assertTrue(HdfsUtils.isHealthy(uri));
        cluster.shutdownNameNode(0);
        assertFalse(HdfsUtils.isHealthy(uri));
        //namenode is down, continue writing file4 in a thread
        final Thread file4thread = new Thread(new Runnable() {

            @Override
            public void run() {
                try {
                    //write some more data and then close the file
                    if (!isWebHDFS) {
                        out4.write(bytes);
                        out4.write(bytes);
                        out4.write(bytes);
                        out4.close();
                    }
                } catch (Exception e) {
                    exceptions.add(e);
                }
            }
        });
        file4thread.start();
        //namenode is down, read the file in a thread
        final Thread reader = new Thread(new Runnable() {

            @Override
            public void run() {
                try {
                    //it should retry till namenode is up.
                    final FileSystem fs = createFsWithDifferentUsername(conf, isWebHDFS);
                    final FSDataInputStream in = fs.open(file1);
                    int count = 0;
                    for (; in.read() != -1; count++) ;
                    in.close();
                    assertEquals(s1.getLen(), count);
                } catch (Exception e) {
                    exceptions.add(e);
                }
            }
        });
        reader.start();
        //namenode is down, create another file in a thread
        final Path file3 = new Path(dir, "file");
        final Thread thread = new Thread(new Runnable() {

            @Override
            public void run() {
                try {
                    //it should retry till namenode is up.
                    final FileSystem fs = createFsWithDifferentUsername(conf, isWebHDFS);
                    DFSTestUtil.createFile(fs, file3, length, numDatanodes, 20120406L);
                } catch (Exception e) {
                    exceptions.add(e);
                }
            }
        });
        thread.start();
        //restart namenode in a new thread
        new Thread(new Runnable() {

            @Override
            public void run() {
                try {
                    //sleep, restart, and then wait active
                    TimeUnit.SECONDS.sleep(30);
                    assertFalse(HdfsUtils.isHealthy(uri));
                    cluster.restartNameNode(0, false);
                    cluster.waitActive();
                    assertTrue(HdfsUtils.isHealthy(uri));
                } catch (Exception e) {
                    exceptions.add(e);
                }
            }
        }).start();
        //namenode is down, it should retry until namenode is up again. 
        final FileStatus s2 = fs.getFileStatus(file1);
        assertEquals(s1, s2);
        //check file1 and file3
        thread.join();
        assertEmpty(exceptions);
        assertEquals(s1.getLen(), fs.getFileStatus(file3).getLen());
        assertEquals(fs.getFileChecksum(file1), fs.getFileChecksum(file3));
        reader.join();
        assertEmpty(exceptions);
        //check file4
        file4thread.join();
        assertEmpty(exceptions);
        {
            final FSDataInputStream in = fs.open(file4);
            int count = 0;
            for (int r; (r = in.read()) != -1; count++) {
                Assert.assertEquals(String.format("count=%d", count), bytes[count % bytes.length], (byte) r);
            }
            if (!isWebHDFS) {
                Assert.assertEquals(5 * bytes.length, count);
            } else {
                Assert.assertEquals(2 * bytes.length, count);
            }
            in.close();
        }
        //enter safe mode
        assertTrue(HdfsUtils.isHealthy(uri));
        dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
        assertFalse(HdfsUtils.isHealthy(uri));
        //leave safe mode in a new thread
        new Thread(new Runnable() {

            @Override
            public void run() {
                try {
                    //sleep and then leave safe mode
                    TimeUnit.SECONDS.sleep(30);
                    assertFalse(HdfsUtils.isHealthy(uri));
                    dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
                    assertTrue(HdfsUtils.isHealthy(uri));
                } catch (Exception e) {
                    exceptions.add(e);
                }
            }
        }).start();
        //namenode is in safe mode, create should retry until it leaves safe mode.
        final Path file2 = new Path(dir, "bar");
        DFSTestUtil.createFile(fs, file2, length, numDatanodes, 20120406L);
        assertEquals(fs.getFileChecksum(file1), fs.getFileChecksum(file2));
        assertTrue(HdfsUtils.isHealthy(uri));
        //make sure it won't retry on exceptions like FileNotFoundException
        final Path nonExisting = new Path(dir, "nonExisting");
        LOG.info("setPermission: " + nonExisting);
        try {
            fs.setPermission(nonExisting, new FsPermission((short) 0));
            fail();
        } catch (FileNotFoundException fnfe) {
            LOG.info("GOOD!", fnfe);
        }
        assertEmpty(exceptions);
    } finally {
        cluster.shutdown();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) URI(java.net.URI) ThrowsException(org.mockito.internal.stubbing.answers.ThrowsException) ChecksumException(org.apache.hadoop.fs.ChecksumException) FileNotFoundException(java.io.FileNotFoundException) NotReplicatedYetException(org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException) SocketTimeoutException(java.net.SocketTimeoutException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FsPermission(org.apache.hadoop.fs.permission.FsPermission)

Example 74 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestDataTransferKeepalive method testSlowReader.

/**
   * Test for the case where the client beings to read a long block, but doesn't
   * read bytes off the stream quickly. The datanode should time out sending the
   * chunks and the transceiver should die, even if it has a long keepalive.
   */
@Test(timeout = 300000)
public void testSlowReader() throws Exception {
    // Set a client socket cache expiry time much longer than 
    // the datanode-side expiration time.
    final long CLIENT_EXPIRY_MS = 600000L;
    Configuration clientConf = new Configuration(conf);
    clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
    clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader");
    DistributedFileSystem fs = (DistributedFileSystem) FileSystem.get(cluster.getURI(), clientConf);
    // Restart the DN with a shorter write timeout.
    DataNodeProperties props = cluster.stopDataNode(0);
    props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY, WRITE_TIMEOUT);
    props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY, 120000);
    assertTrue(cluster.restartDataNode(props, true));
    dn = cluster.getDataNodes().get(0);
    // Wait for heartbeats to avoid a startup race where we
    // try to write the block while the DN is still starting.
    cluster.triggerHeartbeats();
    DFSTestUtil.createFile(fs, TEST_FILE, 1024 * 1024 * 8L, (short) 1, 0L);
    FSDataInputStream stm = fs.open(TEST_FILE);
    stm.read();
    assertXceiverCount(1);
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        public Boolean get() {
            // the xceiver to exit.
            return getXceiverCountWithoutServer() == 0;
        }
    }, 500, 50000);
    IOUtils.closeStream(stm);
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 75 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestDataTransferProtocol method testOpWrite.

@Test
public void testOpWrite() throws IOException {
    int numDataNodes = 1;
    final long BLOCK_ID_FUDGE = 128;
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
    try {
        cluster.waitActive();
        String poolId = cluster.getNamesystem().getBlockPoolId();
        datanode = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
        dnAddr = NetUtils.createSocketAddr(datanode.getXferAddr());
        FileSystem fileSys = cluster.getFileSystem();
        /* Test writing to finalized replicas */
        Path file = new Path("dataprotocol.dat");
        DFSTestUtil.createFile(fileSys, file, 1L, (short) numDataNodes, 0L);
        // get the first blockid for the file
        ExtendedBlock firstBlock = DFSTestUtil.getFirstBlock(fileSys, file);
        // test PIPELINE_SETUP_CREATE on a finalized block
        testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_CREATE, 0L, "Cannot create an existing block", true);
        // test PIPELINE_DATA_STREAMING on a finalized block
        testWrite(firstBlock, BlockConstructionStage.DATA_STREAMING, 0L, "Unexpected stage", true);
        // test PIPELINE_SETUP_STREAMING_RECOVERY on an existing block
        long newGS = firstBlock.getGenerationStamp() + 1;
        testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_STREAMING_RECOVERY, newGS, "Cannot recover data streaming to a finalized replica", true);
        // test PIPELINE_SETUP_APPEND on an existing block
        newGS = firstBlock.getGenerationStamp() + 1;
        testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_APPEND, newGS, "Append to a finalized replica", false);
        firstBlock.setGenerationStamp(newGS);
        // test PIPELINE_SETUP_APPEND_RECOVERY on an existing block
        file = new Path("dataprotocol1.dat");
        DFSTestUtil.createFile(fileSys, file, 1L, (short) numDataNodes, 0L);
        firstBlock = DFSTestUtil.getFirstBlock(fileSys, file);
        newGS = firstBlock.getGenerationStamp() + 1;
        testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_APPEND_RECOVERY, newGS, "Recover appending to a finalized replica", false);
        // test PIPELINE_CLOSE_RECOVERY on an existing block
        file = new Path("dataprotocol2.dat");
        DFSTestUtil.createFile(fileSys, file, 1L, (short) numDataNodes, 0L);
        firstBlock = DFSTestUtil.getFirstBlock(fileSys, file);
        newGS = firstBlock.getGenerationStamp() + 1;
        testWrite(firstBlock, BlockConstructionStage.PIPELINE_CLOSE_RECOVERY, newGS, "Recover failed close to a finalized replica", false);
        firstBlock.setGenerationStamp(newGS);
        // Test writing to a new block. Don't choose the next sequential
        // block ID to avoid conflicting with IDs chosen by the NN.
        long newBlockId = firstBlock.getBlockId() + BLOCK_ID_FUDGE;
        ExtendedBlock newBlock = new ExtendedBlock(firstBlock.getBlockPoolId(), newBlockId, 0, firstBlock.getGenerationStamp());
        // test PIPELINE_SETUP_CREATE on a new block
        testWrite(newBlock, BlockConstructionStage.PIPELINE_SETUP_CREATE, 0L, "Create a new block", false);
        // test PIPELINE_SETUP_STREAMING_RECOVERY on a new block
        newGS = newBlock.getGenerationStamp() + 1;
        newBlock.setBlockId(newBlock.getBlockId() + 1);
        testWrite(newBlock, BlockConstructionStage.PIPELINE_SETUP_STREAMING_RECOVERY, newGS, "Recover a new block", true);
        // test PIPELINE_SETUP_APPEND on a new block
        newGS = newBlock.getGenerationStamp() + 1;
        testWrite(newBlock, BlockConstructionStage.PIPELINE_SETUP_APPEND, newGS, "Cannot append to a new block", true);
        // test PIPELINE_SETUP_APPEND_RECOVERY on a new block
        newBlock.setBlockId(newBlock.getBlockId() + 1);
        newGS = newBlock.getGenerationStamp() + 1;
        testWrite(newBlock, BlockConstructionStage.PIPELINE_SETUP_APPEND_RECOVERY, newGS, "Cannot append to a new block", true);
        /* Test writing to RBW replicas */
        Path file1 = new Path("dataprotocol1.dat");
        DFSTestUtil.createFile(fileSys, file1, 1L, (short) numDataNodes, 0L);
        DFSOutputStream out = (DFSOutputStream) (fileSys.append(file1).getWrappedStream());
        out.write(1);
        out.hflush();
        FSDataInputStream in = fileSys.open(file1);
        firstBlock = DFSTestUtil.getAllBlocks(in).get(0).getBlock();
        firstBlock.setNumBytes(2L);
        try {
            // test PIPELINE_SETUP_CREATE on a RBW block
            testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_CREATE, 0L, "Cannot create a RBW block", true);
            // test PIPELINE_SETUP_APPEND on an existing block
            newGS = firstBlock.getGenerationStamp() + 1;
            testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_APPEND, newGS, "Cannot append to a RBW replica", true);
            // test PIPELINE_SETUP_APPEND on an existing block
            testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_APPEND_RECOVERY, newGS, "Recover append to a RBW replica", false);
            firstBlock.setGenerationStamp(newGS);
            // test PIPELINE_SETUP_STREAMING_RECOVERY on a RBW block
            file = new Path("dataprotocol2.dat");
            DFSTestUtil.createFile(fileSys, file, 1L, (short) numDataNodes, 0L);
            out = (DFSOutputStream) (fileSys.append(file).getWrappedStream());
            out.write(1);
            out.hflush();
            in = fileSys.open(file);
            firstBlock = DFSTestUtil.getAllBlocks(in).get(0).getBlock();
            firstBlock.setNumBytes(2L);
            newGS = firstBlock.getGenerationStamp() + 1;
            testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_STREAMING_RECOVERY, newGS, "Recover a RBW replica", false);
        } finally {
            IOUtils.closeStream(in);
            IOUtils.closeStream(out);
        }
    } finally {
        cluster.shutdown();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Builder(org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto.Builder) FileSystem(org.apache.hadoop.fs.FileSystem) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Aggregations

FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)431 Path (org.apache.hadoop.fs.Path)271 FileSystem (org.apache.hadoop.fs.FileSystem)143 Test (org.junit.Test)135 IOException (java.io.IOException)125 Configuration (org.apache.hadoop.conf.Configuration)94 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)93 FileStatus (org.apache.hadoop.fs.FileStatus)62 InputStreamReader (java.io.InputStreamReader)37 BufferedReader (java.io.BufferedReader)36 FileNotFoundException (java.io.FileNotFoundException)26 IgfsPath (org.apache.ignite.igfs.IgfsPath)26 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)21 ArrayList (java.util.ArrayList)20 Random (java.util.Random)19 EOFException (java.io.EOFException)18 HashMap (java.util.HashMap)16 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)15 URI (java.net.URI)14 File (java.io.File)13