Search in sources :

Example 1 with Reader

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Reader in project tez by apache.

the class TestIFile method readUsingIFileReader.

/**
 * Read data using IFile Reader
 *
 * @param originalData
 * @param codec
 * @throws IOException
 */
private void readUsingIFileReader(List<KVPair> originalData, CompressionCodec codec) throws IOException {
    LOG.info("Read using IFile reader");
    IFile.Reader reader = new IFile.Reader(localFs, outputPath, codec, null, null, false, 0, -1);
    verifyData(reader, originalData);
    reader.close();
}
Also used : Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) InMemoryReader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader)

Example 2 with Reader

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Reader in project tez by apache.

the class TestIFile method testReadToDisk.

@Test(timeout = 20000)
public void testReadToDisk() throws IOException {
    // verify sending a stream of zeroes generates an error
    byte[] zeroData = new byte[1000];
    Arrays.fill(zeroData, (byte) 0);
    ByteArrayInputStream in = new ByteArrayInputStream(zeroData);
    try {
        IFile.Reader.readToDisk(new ByteArrayOutputStream(), in, zeroData.length, false, 0);
        fail("Exception should have been thrown");
    } catch (IOException e) {
    }
    // verify sending same stream of zeroes with a valid IFile header still
    // generates an error
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    baos.write(IFile.HEADER);
    baos.write(zeroData);
    try {
        IFile.Reader.readToDisk(new ByteArrayOutputStream(), new ByteArrayInputStream(baos.toByteArray()), zeroData.length, false, 0);
        fail("Exception should have been thrown");
    } catch (IOException e) {
        assertTrue(e instanceof ChecksumException);
    }
    // verify valid data is copied properly
    List<KVPair> data = KVDataGen.generateTestData(true, 0);
    Writer writer = writeTestFile(false, false, data, codec);
    baos.reset();
    IFile.Reader.readToDisk(baos, localFs.open(outputPath), writer.getCompressedLength(), false, 0);
    byte[] diskData = baos.toByteArray();
    Reader reader = new Reader(new ByteArrayInputStream(diskData), diskData.length, codec, null, null, false, 0, 1024);
    verifyData(reader, data);
    reader.close();
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ChecksumException(org.apache.hadoop.fs.ChecksumException) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) InMemoryReader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BoundedByteArrayOutputStream(org.apache.hadoop.io.BoundedByteArrayOutputStream) IOException(java.io.IOException) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) Test(org.junit.Test)

Example 3 with Reader

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Reader in project tez by apache.

the class TestIFile method testExceedMaxSize.

@Test(timeout = 5000)
public // test overflow
void testExceedMaxSize() throws IOException {
    final int oldMaxBufferSize = IFile.Reader.MAX_BUFFER_SIZE;
    Text shortString = new Text("string");
    Text longString = new Text("A string of length 22.");
    assertEquals(22, longString.getLength());
    Text readKey = new Text();
    Text readValue = new Text();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    IFile.Writer writer;
    IFile.Reader reader;
    FSDataOutputStream out;
    // Check Key length exceeding MAX_BUFFER_SIZE
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, false);
    writer.append(longString, shortString);
    writer.close();
    out.close();
    // Set this to a smaller value for testing
    IFile.Reader.MAX_BUFFER_SIZE = 16;
    reader = new IFile.Reader(localFs, outputPath, null, null, null, false, 0, -1);
    try {
        reader.nextRawKey(keyIn);
        Assert.fail("Expected IllegalArgumentException to be thrown");
    } catch (IllegalArgumentException e) {
    // test passed
    }
    reader.close();
    // Check Value length exceeding MAX_BUFFER_SIZE
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, false);
    writer.append(shortString, longString);
    writer.close();
    out.close();
    // Set this to a smaller value for testing
    IFile.Reader.MAX_BUFFER_SIZE = 16;
    reader = new IFile.Reader(localFs, outputPath, null, null, null, false, 0, -1);
    try {
        reader.nextRawKey(keyIn);
        reader.nextRawValue(valIn);
        Assert.fail("Expected IllegalArgumentException to be thrown");
    } catch (IllegalArgumentException e) {
    // test passed
    }
    reader.close();
    // Check Key length not getting doubled
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, false);
    writer.append(longString, shortString);
    writer.close();
    out.close();
    // Set this to a smaller value for testing
    IFile.Reader.MAX_BUFFER_SIZE = 32;
    reader = new IFile.Reader(localFs, outputPath, null, null, null, false, 0, -1);
    reader.nextRawKey(keyIn);
    assertEquals(longString.getLength() + 1, keyIn.getData().length);
    reader.close();
    // Check Value length not getting doubled
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, false);
    writer.append(shortString, longString);
    writer.close();
    out.close();
    // Set this to a smaller value for testing
    IFile.Reader.MAX_BUFFER_SIZE = 32;
    reader = new IFile.Reader(localFs, outputPath, null, null, null, false, 0, -1);
    reader.nextRawKey(keyIn);
    reader.nextRawValue(valIn);
    assertEquals(longString.getLength() + 1, valIn.getData().length);
    reader.close();
    // revert back to original value
    IFile.Reader.MAX_BUFFER_SIZE = oldMaxBufferSize;
}
Also used : Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Text(org.apache.hadoop.io.Text) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Test(org.junit.Test)

Example 4 with Reader

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Reader in project tez by apache.

the class TestIFile method testWritingEmptyKeyValues.

@Test(timeout = 5000)
public // Write empty key value pairs
void testWritingEmptyKeyValues() throws IOException {
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, null, null, null, null, null);
    writer.append(key, value);
    writer.append(key, value);
    writer.append(key, value);
    writer.append(key, value);
    writer.close();
    IFile.Reader reader = new Reader(localFs, outputPath, null, null, null, false, -1, 1024);
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    int records = 0;
    while (reader.nextRawKey(keyIn)) {
        reader.nextRawValue(valIn);
        records++;
        assert (keyIn.getLength() == 0);
        assert (valIn.getLength() == 0);
    }
    assertTrue("Number of records read does not match", (records == 4));
    reader.close();
}
Also used : Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) InMemoryReader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) Test(org.junit.Test)

Aggregations

Reader (org.apache.tez.runtime.library.common.sort.impl.IFile.Reader)4 InMemoryReader (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader)3 Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)3 Test (org.junit.Test)3 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)2 InMemoryWriter (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 ChecksumException (org.apache.hadoop.fs.ChecksumException)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 BoundedByteArrayOutputStream (org.apache.hadoop.io.BoundedByteArrayOutputStream)1 Text (org.apache.hadoop.io.Text)1 KVPair (org.apache.tez.runtime.library.testutils.KVDataGen.KVPair)1