Search in sources :

Example 76 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestDataTransferProtocol method readFile.

void readFile(FileSystem fs, Path path, int fileLen) throws IOException {
    byte[] arr = new byte[fileLen];
    FSDataInputStream in = fs.open(path);
    in.readFully(arr);
}
Also used : FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 77 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestCachingStrategy method readHdfsFile.

static long readHdfsFile(FileSystem fs, Path p, long length, Boolean dropBehind) throws Exception {
    FSDataInputStream fis = null;
    long totalRead = 0;
    try {
        fis = fs.open(p);
        if (dropBehind != null) {
            fis.setDropBehind(dropBehind);
        }
        byte[] buf = new byte[8196];
        while (length > 0) {
            int amt = (length > buf.length) ? buf.length : (int) length;
            int ret = fis.read(buf, 0, amt);
            if (ret == -1) {
                return totalRead;
            }
            totalRead += ret;
            length -= ret;
        }
    } catch (IOException e) {
        LOG.error("ioexception", e);
    } finally {
        if (fis != null) {
            fis.close();
        }
    }
    throw new RuntimeException("unreachable");
}
Also used : FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) NativeIOException(org.apache.hadoop.io.nativeio.NativeIOException) IOException(java.io.IOException)

Example 78 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestCachingStrategy method testFadviseSkippedForSmallReads.

@Test(timeout = 120000)
public void testFadviseSkippedForSmallReads() throws Exception {
    // start a cluster
    LOG.info("testFadviseSkippedForSmallReads");
    tracker.clear();
    Configuration conf = new HdfsConfiguration();
    conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_READS_KEY, true);
    conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_KEY, true);
    MiniDFSCluster cluster = null;
    String TEST_PATH = "/test";
    int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
    FSDataInputStream fis = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        // create new file
        createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, null);
        // Since the DataNode was configured with drop-behind, and we didn't
        // specify any policy, we should have done drop-behind.
        ExtendedBlock block = cluster.getNameNode().getRpcServer().getBlockLocations(TEST_PATH, 0, Long.MAX_VALUE).get(0).getBlock();
        String fadvisedFileName = cluster.getBlockFile(0, block).getName();
        Stats stats = tracker.getStats(fadvisedFileName);
        stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
        stats.clear();
        stats.assertNotDroppedInRange(0, TEST_PATH_LEN);
        // read file
        fis = fs.open(new Path(TEST_PATH));
        byte[] buf = new byte[17];
        fis.readFully(4096, buf, 0, buf.length);
        // we should not have dropped anything because of the small read.
        stats = tracker.getStats(fadvisedFileName);
        stats.assertNotDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
    } finally {
        IOUtils.cleanup(null, fis);
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Test(org.junit.Test)

Example 79 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestLocalDistributedCacheManager method testDuplicateDownload.

@Test
public void testDuplicateDownload() throws Exception {
    JobConf conf = new JobConf();
    conf.setClass("fs.mock.impl", MockFileSystem.class, FileSystem.class);
    URI mockBase = new URI("mock://test-nn1/");
    when(mockfs.getUri()).thenReturn(mockBase);
    Path working = new Path("mock://test-nn1/user/me/");
    when(mockfs.getWorkingDirectory()).thenReturn(working);
    when(mockfs.resolvePath(any(Path.class))).thenAnswer(new Answer<Path>() {

        @Override
        public Path answer(InvocationOnMock args) throws Throwable {
            return (Path) args.getArguments()[0];
        }
    });
    final URI file = new URI("mock://test-nn1/user/me/file.txt#link");
    final Path filePath = new Path(file);
    File link = new File("link");
    when(mockfs.getFileStatus(any(Path.class))).thenAnswer(new Answer<FileStatus>() {

        @Override
        public FileStatus answer(InvocationOnMock args) throws Throwable {
            Path p = (Path) args.getArguments()[0];
            if ("file.txt".equals(p.getName())) {
                return new FileStatus(201, false, 1, 500, 101, 101, FsPermission.getDefault(), "me", "me", filePath);
            } else {
                throw new FileNotFoundException(p + " not supported by mocking");
            }
        }
    });
    when(mockfs.getConf()).thenReturn(conf);
    final FSDataInputStream in = new FSDataInputStream(new MockInputStream("This is a test file\n".getBytes()));
    when(mockfs.open(any(Path.class), anyInt())).thenAnswer(new Answer<FSDataInputStream>() {

        @Override
        public FSDataInputStream answer(InvocationOnMock args) throws Throwable {
            Path src = (Path) args.getArguments()[0];
            if ("file.txt".equals(src.getName())) {
                return in;
            } else {
                throw new FileNotFoundException(src + " not supported by mocking");
            }
        }
    });
    DistributedCache.addCacheFile(file, conf);
    DistributedCache.addCacheFile(file, conf);
    conf.set(MRJobConfig.CACHE_FILE_TIMESTAMPS, "101,101");
    conf.set(MRJobConfig.CACHE_FILES_SIZES, "201,201");
    conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "false,false");
    conf.set(MRConfig.LOCAL_DIR, localDir.getAbsolutePath());
    LocalDistributedCacheManager manager = new LocalDistributedCacheManager();
    try {
        manager.setup(conf);
        assertTrue(link.exists());
    } finally {
        manager.close();
    }
    assertFalse(link.exists());
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileNotFoundException(java.io.FileNotFoundException) URI(java.net.URI) InvocationOnMock(org.mockito.invocation.InvocationOnMock) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) File(java.io.File) Test(org.junit.Test)

Example 80 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class MapTask method getSplitDetails.

@SuppressWarnings("unchecked")
private <T> T getSplitDetails(Path file, long offset) throws IOException {
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);
    String className = StringInterner.weakIntern(Text.readString(inFile));
    Class<T> cls;
    try {
        cls = (Class<T>) conf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls);
    deserializer.open(inFile);
    T split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    getCounters().findCounter(TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
    inFile.close();
    return split;
}
Also used : Deserializer(org.apache.hadoop.io.serializer.Deserializer) FileSystem(org.apache.hadoop.fs.FileSystem) RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) IOException(java.io.IOException)

Aggregations

FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)431 Path (org.apache.hadoop.fs.Path)271 FileSystem (org.apache.hadoop.fs.FileSystem)143 Test (org.junit.Test)135 IOException (java.io.IOException)125 Configuration (org.apache.hadoop.conf.Configuration)94 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)93 FileStatus (org.apache.hadoop.fs.FileStatus)62 InputStreamReader (java.io.InputStreamReader)37 BufferedReader (java.io.BufferedReader)36 FileNotFoundException (java.io.FileNotFoundException)26 IgfsPath (org.apache.ignite.igfs.IgfsPath)26 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)21 ArrayList (java.util.ArrayList)20 Random (java.util.Random)19 EOFException (java.io.EOFException)18 HashMap (java.util.HashMap)16 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)15 URI (java.net.URI)14 File (java.io.File)13