Search in sources :

Example 21 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class TestIFile method testWritingEmptyKeyValues.

@Test(timeout = 5000)
public // Write empty key value pairs
void testWritingEmptyKeyValues() throws IOException {
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, null, null, null, null, null);
    writer.append(key, value);
    writer.append(key, value);
    writer.append(key, value);
    writer.append(key, value);
    writer.close();
    IFile.Reader reader = new Reader(localFs, outputPath, null, null, null, false, -1, 1024);
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    int records = 0;
    while (reader.nextRawKey(keyIn)) {
        reader.nextRawValue(valIn);
        records++;
        assert (keyIn.getLength() == 0);
        assert (valIn.getLength() == 0);
    }
    assertTrue("Number of records read does not match", (records == 4));
    reader.close();
}
Also used : Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) InMemoryReader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) Test(org.junit.Test)

Example 22 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class TestIFile method testWithDataBuffer.

/**
 * Test different options (RLE, repeat keys, compression) on reader/writer
 *
 * @param data
 * @throws IOException
 */
private void testWithDataBuffer(List<KVPair> data) throws IOException {
    Writer writer = null;
    // No RLE, No RepeatKeys
    writer = writeTestFileUsingDataBuffer(false, false, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);
    writer = writeTestFileUsingDataBuffer(false, false, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
    // No RLE, RepeatKeys
    writer = writeTestFileUsingDataBuffer(false, true, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);
    writer = writeTestFileUsingDataBuffer(false, true, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
    // RLE, No RepeatKeys
    writer = writeTestFileUsingDataBuffer(true, false, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);
    writer = writeTestFileUsingDataBuffer(true, false, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
    // RLE, RepeatKeys
    writer = writeTestFileUsingDataBuffer(true, true, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);
    writer = writeTestFileUsingDataBuffer(true, true, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
Also used : Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter)

Example 23 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class TestIFile method testWriterAndReader.

/**
 * Test different options (RLE, repeat keys, compression) on reader/writer
 *
 * @param data
 * @throws IOException
 */
private void testWriterAndReader(List<KVPair> data) throws IOException {
    Writer writer = null;
    // No RLE, No RepeatKeys
    writer = writeTestFile(false, false, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);
    writer = writeTestFile(false, false, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
    // No RLE, RepeatKeys
    writer = writeTestFile(false, true, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);
    writer = writeTestFile(false, true, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
    // RLE, No RepeatKeys
    writer = writeTestFile(true, false, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);
    writer = writeTestFile(true, false, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
    // RLE, RepeatKeys
    writer = writeTestFile(true, true, data, null);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, null);
    writer = writeTestFile(true, true, data, codec);
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
Also used : Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter)

Example 24 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class TestMRCombiner method testTop2RunNewCombiner.

@Test
public void testTop2RunNewCombiner() throws IOException, InterruptedException {
    TezConfiguration conf = new TezConfiguration();
    setKeyAndValueClassTypes(conf);
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setClass(MRJobConfig.COMBINE_CLASS_ATTR, Top2NewReducer.class, Object.class);
    TaskContext taskContext = getTaskContext(conf);
    MRCombiner combiner = new MRCombiner(taskContext);
    Writer writer = Mockito.mock(Writer.class);
    combiner.combine(new TezRawKeyValueIteratorTest(), writer);
    long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue();
    long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue();
    assertEquals(6, inputRecords);
    assertEquals(5, outputRecords);
}
Also used : TaskContext(org.apache.tez.runtime.api.TaskContext) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 25 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class TestMRCombiner method testTop2RunOldCombiner.

@Test
public void testTop2RunOldCombiner() throws IOException, InterruptedException {
    TezConfiguration conf = new TezConfiguration();
    setKeyAndValueClassTypes(conf);
    conf.setClass("mapred.combiner.class", Top2OldReducer.class, Object.class);
    TaskContext taskContext = getTaskContext(conf);
    MRCombiner combiner = new MRCombiner(taskContext);
    Writer writer = Mockito.mock(Writer.class);
    combiner.combine(new TezRawKeyValueIteratorTest(), writer);
    long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue();
    long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue();
    assertEquals(6, inputRecords);
    assertEquals(5, outputRecords);
}
Also used : TaskContext(org.apache.tez.runtime.api.TaskContext) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Aggregations

Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)25 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)12 Test (org.junit.Test)12 InMemoryWriter (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter)11 Path (org.apache.hadoop.fs.Path)8 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)6 ArrayList (java.util.ArrayList)5 Text (org.apache.hadoop.io.Text)5 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)5 TezSpillRecord (org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord)5 KVPair (org.apache.tez.runtime.library.testutils.KVDataGen.KVPair)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)4 TaskContext (org.apache.tez.runtime.api.TaskContext)4 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)4 IOException (java.io.IOException)3 IntWritable (org.apache.hadoop.io.IntWritable)3 Reader (org.apache.tez.runtime.library.common.sort.impl.IFile.Reader)3 DiskSegment (org.apache.tez.runtime.library.common.sort.impl.TezMerger.DiskSegment)3 Segment (org.apache.tez.runtime.library.common.sort.impl.TezMerger.Segment)3 TezRawKeyValueIterator (org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)3