use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.
the class TestOnFileUnorderedKVOutput method testGeneratedDataMovementEvent.
@Test(timeout = 5000)
public void testGeneratedDataMovementEvent() throws Exception {
Configuration conf = new Configuration();
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
OutputContext outputContext = createOutputContext(conf, sharedExecutor);
UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
List<Event> events = null;
events = kvOutput.initialize();
kvOutput.start();
assertTrue(events != null && events.size() == 0);
KeyValueWriter kvWriter = kvOutput.getWriter();
List<KVPair> data = KVDataGen.generateTestData(true, 0);
for (KVPair kvp : data) {
kvWriter.write(kvp.getKey(), kvp.getvalue());
}
events = kvOutput.close();
assertEquals(45, task.getTaskStatistics().getIOStatistics().values().iterator().next().getDataSize());
assertEquals(5, task.getTaskStatistics().getIOStatistics().values().iterator().next().getItemsProcessed());
assertTrue(events != null && events.size() == 2);
CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
assertFalse(shufflePayload.hasEmptyPartitions());
assertEquals(outputContext.getUniqueIdentifier(), shufflePayload.getPathComponent());
assertEquals(shufflePort, shufflePayload.getPort());
assertEquals("localhost", shufflePayload.getHost());
sharedExecutor.shutdownNow();
}
use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.
the class TestIFile method writeTestFile.
private Writer writeTestFile(IFile.Writer writer, boolean repeatKeys, List<KVPair> data) throws IOException {
assertNotNull(writer);
Text previousKey = null;
for (KVPair kvp : data) {
if (repeatKeys && (previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
// RLE is enabled in IFile when IFile.REPEAT_KEY is set
writer.append(IFile.REPEAT_KEY, kvp.getvalue());
} else {
writer.append(kvp.getKey(), kvp.getvalue());
}
previousKey = kvp.getKey();
}
writer.close();
LOG.info("Uncompressed: " + writer.getRawLength());
LOG.info("CompressedSize: " + writer.getCompressedLength());
return writer;
}
use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.
the class TestIFile method testAppendKeyValues.
@Test(timeout = 5000)
public // Test appendKeyValues feature
void testAppendKeyValues() throws IOException {
List<KVPair> data = new ArrayList<KVPair>();
List<IntWritable> values = new ArrayList<IntWritable>();
Text key = new Text("key");
IntWritable val = new IntWritable(1);
for (int i = 0; i < 5; i++) {
data.add(new KVPair(key, val));
values.add(val);
}
IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null);
writer.appendKeyValues(data.get(0).getKey(), values.iterator());
Text lastKey = new Text("key3");
IntWritable lastVal = new IntWritable(10);
data.add(new KVPair(lastKey, lastVal));
writer.append(lastKey, lastVal);
writer.close();
readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.
the class TestIFile method verifyData.
/**
* Data verification
*
* @param reader
* @param data
* @throws IOException
*/
private void verifyData(Reader reader, List<KVPair> data) throws IOException {
LOG.info("Data verification");
Text readKey = new Text();
IntWritable readValue = new IntWritable();
DataInputBuffer keyIn = new DataInputBuffer();
DataInputBuffer valIn = new DataInputBuffer();
Deserializer<Text> keyDeserializer;
Deserializer<IntWritable> valDeserializer;
SerializationFactory serializationFactory = new SerializationFactory(defaultConf);
keyDeserializer = serializationFactory.getDeserializer(Text.class);
valDeserializer = serializationFactory.getDeserializer(IntWritable.class);
keyDeserializer.open(keyIn);
valDeserializer.open(valIn);
int numRecordsRead = 0;
while (reader.nextRawKey(keyIn)) {
reader.nextRawValue(valIn);
readKey = keyDeserializer.deserialize(readKey);
readValue = valDeserializer.deserialize(readValue);
KVPair expected = data.get(numRecordsRead);
assertEquals("Key does not match: Expected: " + expected.getKey() + ", Read: " + readKey, expected.getKey(), readKey);
assertEquals("Value does not match: Expected: " + expected.getvalue() + ", Read: " + readValue, expected.getvalue(), readValue);
numRecordsRead++;
}
assertEquals("Expected: " + data.size() + " records, but found: " + numRecordsRead, data.size(), numRecordsRead);
LOG.info("Found: " + numRecordsRead + " records");
}
use of org.apache.tez.runtime.library.testutils.KVDataGen.KVPair in project tez by apache.
the class TestIFile method testInMemoryWriter.
@Test(timeout = 5000)
public // Test InMemoryWriter
void testInMemoryWriter() throws IOException {
InMemoryWriter writer = null;
BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
List<KVPair> data = KVDataGen.generateTestData(true, 10);
// No RLE, No RepeatKeys, no compression
writer = new InMemoryWriter(bout);
writeTestFileUsingDataBuffer(writer, false, data);
readUsingInMemoryReader(bout.getBuffer(), data);
// No RLE, RepeatKeys, no compression
bout.reset();
writer = new InMemoryWriter(bout);
writeTestFileUsingDataBuffer(writer, true, data);
readUsingInMemoryReader(bout.getBuffer(), data);
// RLE, No RepeatKeys, no compression
bout.reset();
writer = new InMemoryWriter(bout, true);
writeTestFileUsingDataBuffer(writer, false, data);
readUsingInMemoryReader(bout.getBuffer(), data);
// RLE, RepeatKeys, no compression
bout.reset();
writer = new InMemoryWriter(bout, true);
writeTestFileUsingDataBuffer(writer, true, data);
readUsingInMemoryReader(bout.getBuffer(), data);
}
Aggregations