Search in sources :

Example 6 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestOnFileUnorderedKVOutput method testGeneratedDataMovementEvent.

@Test(timeout = 5000)
public void testGeneratedDataMovementEvent() throws Exception {
    Configuration conf = new Configuration();
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
    OutputContext outputContext = createOutputContext(conf, sharedExecutor);
    UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
    List<Event> events = null;
    events = kvOutput.initialize();
    kvOutput.start();
    assertTrue(events != null && events.size() == 0);
    KeyValueWriter kvWriter = kvOutput.getWriter();
    List<KVPair> data = KVDataGen.generateTestData(true, 0);
    for (KVPair kvp : data) {
        kvWriter.write(kvp.getKey(), kvp.getvalue());
    }
    events = kvOutput.close();
    assertEquals(45, task.getTaskStatistics().getIOStatistics().values().iterator().next().getDataSize());
    assertEquals(5, task.getTaskStatistics().getIOStatistics().values().iterator().next().getItemsProcessed());
    assertTrue(events != null && events.size() == 2);
    CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
    assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
    DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
    assertFalse(shufflePayload.hasEmptyPartitions());
    assertEquals(outputContext.getUniqueIdentifier(), shufflePayload.getPathComponent());
    assertEquals(shufflePort, shufflePayload.getPort());
    assertEquals("localhost", shufflePayload.getHost());
    sharedExecutor.shutdownNow();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) Text(org.apache.hadoop.io.Text) OutputContext(org.apache.tez.runtime.api.OutputContext) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 7 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method writeTestFile.

private Writer writeTestFile(IFile.Writer writer, boolean repeatKeys, List<KVPair> data) throws IOException {
    assertNotNull(writer);
    Text previousKey = null;
    for (KVPair kvp : data) {
        if (repeatKeys && (previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
            // RLE is enabled in IFile when IFile.REPEAT_KEY is set
            writer.append(IFile.REPEAT_KEY, kvp.getvalue());
        } else {
            writer.append(kvp.getKey(), kvp.getvalue());
        }
        previousKey = kvp.getKey();
    }
    writer.close();
    LOG.info("Uncompressed: " + writer.getRawLength());
    LOG.info("CompressedSize: " + writer.getCompressedLength());
    return writer;
}
Also used : KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Text(org.apache.hadoop.io.Text)

Example 8 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method testAppendKeyValues.

@Test(timeout = 5000)
public // Test appendKeyValues feature
void testAppendKeyValues() throws IOException {
    List<KVPair> data = new ArrayList<KVPair>();
    List<IntWritable> values = new ArrayList<IntWritable>();
    Text key = new Text("key");
    IntWritable val = new IntWritable(1);
    for (int i = 0; i < 5; i++) {
        data.add(new KVPair(key, val));
        values.add(val);
    }
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null);
    writer.appendKeyValues(data.get(0).getKey(), values.iterator());
    Text lastKey = new Text("key3");
    IntWritable lastVal = new IntWritable(10);
    data.add(new KVPair(lastKey, lastVal));
    writer.append(lastKey, lastVal);
    writer.close();
    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
Also used : KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) Test(org.junit.Test)

Example 9 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method verifyData.

/**
 * Data verification
 *
 * @param reader
 * @param data
 * @throws IOException
 */
private void verifyData(Reader reader, List<KVPair> data) throws IOException {
    LOG.info("Data verification");
    Text readKey = new Text();
    IntWritable readValue = new IntWritable();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    Deserializer<Text> keyDeserializer;
    Deserializer<IntWritable> valDeserializer;
    SerializationFactory serializationFactory = new SerializationFactory(defaultConf);
    keyDeserializer = serializationFactory.getDeserializer(Text.class);
    valDeserializer = serializationFactory.getDeserializer(IntWritable.class);
    keyDeserializer.open(keyIn);
    valDeserializer.open(valIn);
    int numRecordsRead = 0;
    while (reader.nextRawKey(keyIn)) {
        reader.nextRawValue(valIn);
        readKey = keyDeserializer.deserialize(readKey);
        readValue = valDeserializer.deserialize(readValue);
        KVPair expected = data.get(numRecordsRead);
        assertEquals("Key does not match: Expected: " + expected.getKey() + ", Read: " + readKey, expected.getKey(), readKey);
        assertEquals("Value does not match: Expected: " + expected.getvalue() + ", Read: " + readValue, expected.getvalue(), readValue);
        numRecordsRead++;
    }
    assertEquals("Expected: " + data.size() + " records, but found: " + numRecordsRead, data.size(), numRecordsRead);
    LOG.info("Found: " + numRecordsRead + " records");
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable)

Example 10 with KVPair

use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.

the class TestIFile method testInMemoryWriter.

@Test(timeout = 5000)
public // Test InMemoryWriter
void testInMemoryWriter() throws IOException {
    InMemoryWriter writer = null;
    BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
    List<KVPair> data = KVDataGen.generateTestData(true, 10);
    // No RLE, No RepeatKeys, no compression
    writer = new InMemoryWriter(bout);
    writeTestFileUsingDataBuffer(writer, false, data);
    readUsingInMemoryReader(bout.getBuffer(), data);
    // No RLE, RepeatKeys, no compression
    bout.reset();
    writer = new InMemoryWriter(bout);
    writeTestFileUsingDataBuffer(writer, true, data);
    readUsingInMemoryReader(bout.getBuffer(), data);
    // RLE, No RepeatKeys, no compression
    bout.reset();
    writer = new InMemoryWriter(bout, true);
    writeTestFileUsingDataBuffer(writer, false, data);
    readUsingInMemoryReader(bout.getBuffer(), data);
    // RLE, RepeatKeys, no compression
    bout.reset();
    writer = new InMemoryWriter(bout, true);
    writeTestFileUsingDataBuffer(writer, true, data);
    readUsingInMemoryReader(bout.getBuffer(), data);
}
Also used : InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) BoundedByteArrayOutputStream(org.apache.hadoop.io.BoundedByteArrayOutputStream) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Test(org.junit.Test)

Aggregations

KVPair (org.apache.tez.runtime.library.testutils.KVDataGen.KVPair)10 Test (org.junit.Test)7 Text (org.apache.hadoop.io.Text)6 InMemoryWriter (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter)6 Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)5 IntWritable (org.apache.hadoop.io.IntWritable)4 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)3 ArrayList (java.util.ArrayList)2 BoundedByteArrayOutputStream (org.apache.hadoop.io.BoundedByteArrayOutputStream)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 Configuration (org.apache.hadoop.conf.Configuration)1 ChecksumException (org.apache.hadoop.fs.ChecksumException)1 SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)1 TezSharedExecutor (org.apache.tez.common.TezSharedExecutor)1 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)1 Event (org.apache.tez.runtime.api.Event)1 OutputContext (org.apache.tez.runtime.api.OutputContext)1 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)1