Examples with FSDataInputStream - org.apache.hadoop.fs.FSDataInputStream

Example 96 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class StreamInputFormat method createRecordReader.

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    String c = conf.get("stream.recordreader.class");
    if (c == null || c.indexOf("LineRecordReader") >= 0) {
        return super.createRecordReader(genericSplit, context);
    }
    // handling non-standard record reader (likely StreamXmlRecordReader)
    FileSplit split = (FileSplit) genericSplit;
    // LOG.info("getRecordReader start.....split=" + split);
    context.setStatus(split.toString());
    context.progress();
    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(conf);
    FSDataInputStream in = fs.open(split.getPath());
    // Factory dispatch based on available params..
    Class readerClass;
    {
        readerClass = StreamUtil.goodClassOrNull(conf, c, null);
        if (readerClass == null) {
            throw new RuntimeException("Class not found: " + c);
        }
    }
    Constructor ctor;
    try {
        ctor = readerClass.getConstructor(new Class[] { FSDataInputStream.class, FileSplit.class, TaskAttemptContext.class, Configuration.class, FileSystem.class });
    } catch (NoSuchMethodException nsm) {
        throw new RuntimeException(nsm);
    }
    RecordReader<Text, Text> reader;
    try {
        reader = (RecordReader<Text, Text>) ctor.newInstance(new Object[] { in, split, context, conf, fs });
    } catch (Exception nsm) {
        throw new RuntimeException(nsm);
    }
    return reader;
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) Constructor(java.lang.reflect.Constructor) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Text(org.apache.hadoop.io.Text) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 97 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class FileSystemRMStateStore method readFile.

private byte[] readFile(Path inputPath, long len) throws Exception {
    FSDataInputStream fsIn = null;
    try {
        fsIn = fs.open(inputPath);
        // state data will not be that "long"
        byte[] data = new byte[(int) len];
        fsIn.readFully(data);
        return data;
    } finally {
        IOUtils.cleanup(LOG, fsIn);
    }
}

Also used : FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 98 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestReadAndSeekPageBlobAfterWrite method readRandomDataAndVerify.

/**
   * Read "size" bytes of data and verify that what was read and what was written
   * are the same.
   */
private void readRandomDataAndVerify(int size) throws AzureException, IOException {
    byte[] b = new byte[size];
    FSDataInputStream stream = fs.open(PATH);
    int bytesRead = stream.read(b);
    stream.close();
    assertEquals(bytesRead, size);
    // compare the data read to the data written
    assertTrue(comparePrefix(randomData, b, size));
}

Also used : FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 99 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestReadAndSeekPageBlobAfterWrite method writeAndReadOneFile.

/**
   * Write a total of numWrites * recordLength data to a file, read it back,
   * and check to make sure what was read is the same as what was written.
   * The syncInterval is the number of writes after which to call hflush to
   * force the data to storage.
   */
private void writeAndReadOneFile(int numWrites, int recordLength, int syncInterval) throws IOException {
    final int NUM_WRITES = numWrites;
    final int RECORD_LENGTH = recordLength;
    final int SYNC_INTERVAL = syncInterval;
    // A lower bound on the minimum time we think it will take to do
    // a write to Azure storage.
    final long MINIMUM_EXPECTED_TIME = 20;
    LOG.info("Writing " + NUM_WRITES * RECORD_LENGTH + " bytes to " + PATH.getName());
    FSDataOutputStream output = fs.create(PATH);
    int writesSinceHFlush = 0;
    try {
        // Do a flush and hflush to exercise case for empty write queue in PageBlobOutputStream,
        // to test concurrent execution gates.
        output.flush();
        output.hflush();
        for (int i = 0; i < NUM_WRITES; i++) {
            output.write(randomData, i * RECORD_LENGTH, RECORD_LENGTH);
            writesSinceHFlush++;
            output.flush();
            if ((i % SYNC_INTERVAL) == 0) {
                long start = Time.monotonicNow();
                output.hflush();
                writesSinceHFlush = 0;
                long end = Time.monotonicNow();
                // A true, round-trip synchronous flush to Azure must take
                // a significant amount of time or we are not syncing to storage correctly.
                LOG.debug("hflush duration = " + (end - start) + " msec.");
                assertTrue(String.format("hflush duration of %d, less than minimum expected of %d", end - start, MINIMUM_EXPECTED_TIME), end - start >= MINIMUM_EXPECTED_TIME);
            }
        }
    } finally {
        long start = Time.monotonicNow();
        output.close();
        long end = Time.monotonicNow();
        LOG.debug("close duration = " + (end - start) + " msec.");
        if (writesSinceHFlush > 0) {
            assertTrue(String.format("close duration with >= 1 pending write is %d, less than minimum expected of %d", end - start, MINIMUM_EXPECTED_TIME), end - start >= MINIMUM_EXPECTED_TIME);
        }
    }
    // Read the data back and check it.
    FSDataInputStream stream = fs.open(PATH);
    int SIZE = NUM_WRITES * RECORD_LENGTH;
    byte[] b = new byte[SIZE];
    try {
        stream.seek(0);
        stream.read(b, 0, SIZE);
        verifyReadRandomData(b, SIZE, 0, SIZE);
    } finally {
        stream.close();
    }
    // delete the file
    fs.delete(PATH, false);
}

Also used : FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 100 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestReadAndSeekPageBlobAfterWrite method testPageBlobSeekAndReadAfterWrite.

/**
   * Write data to a page blob, open it, seek, and then read a range of data.
   * Then compare that the data read from that range is the same as the data originally written.
   */
@Test
public void testPageBlobSeekAndReadAfterWrite() throws IOException {
    writeRandomData(PAGE_SIZE * MAX_PAGES);
    int recordSize = 100;
    byte[] b = new byte[recordSize];
    FSDataInputStream stream = fs.open(PATH);
    // Seek to a boundary around the middle of the 6th page
    int seekPosition = 5 * PAGE_SIZE + 250;
    stream.seek(seekPosition);
    // Read a record's worth of bytes and verify results
    int bytesRead = stream.read(b);
    verifyReadRandomData(b, bytesRead, seekPosition, recordSize);
    // Seek to another spot and read a record greater than a page
    seekPosition = 10 * PAGE_SIZE + 250;
    stream.seek(seekPosition);
    recordSize = 1000;
    b = new byte[recordSize];
    bytesRead = stream.read(b);
    verifyReadRandomData(b, bytesRead, seekPosition, recordSize);
    // Read the last 100 bytes of the file
    recordSize = 100;
    seekPosition = PAGE_SIZE * MAX_PAGES - recordSize;
    stream.seek(seekPosition);
    b = new byte[recordSize];
    bytesRead = stream.read(b);
    verifyReadRandomData(b, bytesRead, seekPosition, recordSize);
    // Read past the end of the file and we should get only partial data.
    recordSize = 100;
    seekPosition = PAGE_SIZE * MAX_PAGES - recordSize + 50;
    stream.seek(seekPosition);
    b = new byte[recordSize];
    bytesRead = stream.read(b);
    assertEquals(50, bytesRead);
    // compare last 50 bytes written with those read
    byte[] tail = Arrays.copyOfRange(randomData, seekPosition, randomData.length);
    assertTrue(comparePrefix(tail, b, 50));
}

Also used : FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Aggregations

FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)431 Path (org.apache.hadoop.fs.Path)271 FileSystem (org.apache.hadoop.fs.FileSystem)143 Test (org.junit.Test)135 IOException (java.io.IOException)125 Configuration (org.apache.hadoop.conf.Configuration)94 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)93 FileStatus (org.apache.hadoop.fs.FileStatus)62 InputStreamReader (java.io.InputStreamReader)37 BufferedReader (java.io.BufferedReader)36 FileNotFoundException (java.io.FileNotFoundException)26 IgfsPath (org.apache.ignite.igfs.IgfsPath)26 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)21 ArrayList (java.util.ArrayList)20 Random (java.util.Random)19 EOFException (java.io.EOFException)18 HashMap (java.util.HashMap)16 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)15 URI (java.net.URI)14 File (java.io.File)13