Search in sources :

Example 1 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method writeTestFileUsingDataBuffer.

private Writer writeTestFileUsingDataBuffer(Writer writer, boolean repeatKeys, List<KVPair> data) throws IOException {
    DataInputBuffer previousKey = new DataInputBuffer();
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    for (KVPair kvp : data) {
        populateData(kvp, key, value);
        if (repeatKeys && (previousKey != null && BufferUtils.compare(key, previousKey) == 0)) {
            writer.append(IFile.REPEAT_KEY, value);
        } else {
            writer.append(key, value);
        }
        previousKey.reset(key.getData(), 0, key.getLength());
    }
    writer.close();
    LOG.info("Uncompressed: " + writer.getRawLength());
    LOG.info("CompressedSize: " + writer.getCompressedLength());
    return writer;
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair)

Example 2 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method testAppendValues.

@Test(timeout = 5000)
public // Test appendValues feature
void testAppendValues() throws IOException {
    List<KVPair> data = new ArrayList<KVPair>();
    List<IntWritable> values = new ArrayList<IntWritable>();
    Text key = new Text("key");
    IntWritable val = new IntWritable(1);
    for (int i = 0; i < 5; i++) {
        data.add(new KVPair(key, val));
        values.add(val);
    }
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null);
    // write first KV pair
    writer.append(data.get(0).getKey(), data.get(0).getvalue());
    // add the rest here
    writer.appendValues(values.subList(1, values.size()).iterator());
    Text lastKey = new Text("key3");
    IntWritable lastVal = new IntWritable(10);
    data.add(new KVPair(lastKey, lastVal));
    writer.append(lastKey, lastVal);
    writer.close();
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
Also used : KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) Test(org.junit.Test)

Example 3 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method testAppendValue.

@Test(timeout = 5000)
public // Test appendValue feature
void testAppendValue() throws IOException {
    List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null);
    Text previousKey = null;
    for (KVPair kvp : data) {
        if ((previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
            writer.appendValue(kvp.getvalue());
        } else {
            writer.append(kvp.getKey(), kvp.getvalue());
        }
        previousKey = kvp.getKey();
    }
    writer.close();
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
Also used : KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Text(org.apache.hadoop.io.Text) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) Test(org.junit.Test)

Example 4 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method testReadToDisk.

@Test(timeout = 20000)
public void testReadToDisk() throws IOException {
    // verify sending a stream of zeroes generates an error
    byte[] zeroData = new byte[1000];
    Arrays.fill(zeroData, (byte) 0);
    ByteArrayInputStream in = new ByteArrayInputStream(zeroData);
    try {
        IFile.Reader.readToDisk(new ByteArrayOutputStream(), in, zeroData.length, false, 0);
        fail("Exception should have been thrown");
    } catch (IOException e) {
    }
    // verify sending same stream of zeroes with a valid IFile header still
    // generates an error
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    baos.write(IFile.HEADER);
    baos.write(zeroData);
    try {
        IFile.Reader.readToDisk(new ByteArrayOutputStream(), new ByteArrayInputStream(baos.toByteArray()), zeroData.length, false, 0);
        fail("Exception should have been thrown");
    } catch (IOException e) {
        assertTrue(e instanceof ChecksumException);
    }
    // verify valid data is copied properly
    List<KVPair> data = KVDataGen.generateTestData(true, 0);
    Writer writer = writeTestFile(false, false, data, codec);
    baos.reset();
    IFile.Reader.readToDisk(baos, localFs.open(outputPath), writer.getCompressedLength(), false, 0);
    byte[] diskData = baos.toByteArray();
    Reader reader = new Reader(new ByteArrayInputStream(diskData), diskData.length, codec, null, null, false, 0, 1024);
    verifyData(reader, data);
    reader.close();
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ChecksumException(org.apache.hadoop.fs.ChecksumException) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) InMemoryReader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BoundedByteArrayOutputStream(org.apache.hadoop.io.BoundedByteArrayOutputStream) IOException(java.io.IOException) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) Test(org.junit.Test)

Example 5 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method testAppendValueWithDataInputBuffer.

@Test(timeout = 5000)
public // Test appendValue with DataInputBuffer
void testAppendValueWithDataInputBuffer() throws IOException {
    List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null);
    final DataInputBuffer previousKey = new DataInputBuffer();
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    for (KVPair kvp : data) {
        populateData(kvp, key, value);
        if ((previousKey != null && BufferUtils.compare(key, previousKey) == 0)) {
            writer.appendValue(value);
        } else {
            writer.append(key, value);
        }
        previousKey.reset(k.getData(), 0, k.getLength());
    }
    writer.close();
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) Test(org.junit.Test)

Aggregations

KVPair (org.apache.tez.runtime.library.testutils.KVDataGen.KVPair)10 Test (org.junit.Test)7 Text (org.apache.hadoop.io.Text)6 InMemoryWriter (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter)6 Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)5 IntWritable (org.apache.hadoop.io.IntWritable)4 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)3 ArrayList (java.util.ArrayList)2 BoundedByteArrayOutputStream (org.apache.hadoop.io.BoundedByteArrayOutputStream)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 Configuration (org.apache.hadoop.conf.Configuration)1 ChecksumException (org.apache.hadoop.fs.ChecksumException)1 SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)1 TezSharedExecutor (org.apache.tez.common.TezSharedExecutor)1 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)1 Event (org.apache.tez.runtime.api.Event)1 OutputContext (org.apache.tez.runtime.api.OutputContext)1 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)1