use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.
the class TestShuffleUtils method testGenerateOnSpillEvent_With_FinalMerge.
@Test
public void testGenerateOnSpillEvent_With_FinalMerge() throws Exception {
List<Event> events = Lists.newLinkedList();
Path indexFile = createIndexFile(10, false);
boolean finalMergeEnabled = true;
boolean isLastEvent = true;
int spillId = 0;
int physicalOutputs = 10;
String pathComponent = "/attempt_x_y_0/file.out";
String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
// normal code path where we do final merge all the time
ShuffleUtils.generateEventOnSpill(events, finalMergeEnabled, isLastEvent, outputContext, spillId, new TezSpillRecord(indexFile, conf), physicalOutputs, true, pathComponent, null, false, auxiliaryService, TezCommonUtils.newBestCompressionDeflater());
// one for VM
Assert.assertTrue(events.size() == 2);
Assert.assertTrue(events.get(0) instanceof VertexManagerEvent);
Assert.assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
Assert.assertTrue(cdme.getCount() == physicalOutputs);
Assert.assertTrue(cdme.getSourceIndexStart() == 0);
ShuffleUserPayloads.DataMovementEventPayloadProto dmeProto = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
// With final merge, spill details should not be present
Assert.assertFalse(dmeProto.hasSpillId());
Assert.assertFalse(dmeProto.hasLastEvent() || dmeProto.getLastEvent());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(dmeProto.getEmptyPartitions());
BitSet emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
Assert.assertTrue("emptyPartitionBitSet cardinality (expecting 5) = " + emptyPartitionsBitSet.cardinality(), emptyPartitionsBitSet.cardinality() == 5);
}
use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.
the class TestDefaultSorter method testWithMultipleSpillsWithFinalMergeDisabled.
@Test(timeout = 60000)
@SuppressWarnings("unchecked")
public void testWithMultipleSpillsWithFinalMergeDisabled() throws IOException {
OutputContext context = createTezOutputContext();
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 4);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 1);
MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned());
writeData(sorter, 10000, 1000);
int spillCount = sorter.getNumSpills();
ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
verify(context, times(1)).sendEvents(eventCaptor.capture());
List<Event> events = eventCaptor.getValue();
int spillIndex = 0;
for (Event event : events) {
if (event instanceof CompositeDataMovementEvent) {
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) event;
ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
assertTrue(shufflePayload.getPathComponent().equalsIgnoreCase(UniqueID + "_" + spillIndex));
verifyOutputPermissions(shufflePayload.getPathComponent());
spillIndex++;
}
}
assertTrue(spillIndex == spillCount);
verifyCounters(sorter, context);
}
use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTestWithPipelinedTransfer.
@SuppressWarnings("unchecked")
private void baseTestWithPipelinedTransfer(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
long availableMemory = 2048;
int numRecordsWritten = 0;
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
BitSet partitionsWithData = new BitSet(numPartitions);
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
List<Event> lastEvents = kvWriter.close();
if (numPartitions == 1) {
assertEquals(false, kvWriter.skipBuffers);
}
// no events are sent to kvWriter upon close with pipelining
assertTrue(lastEvents.size() == 0);
verify(outputContext, atLeast(numExpectedSpills)).sendEvents(eventCaptor.capture());
int numOfCapturedEvents = eventCaptor.getAllValues().size();
lastEvents = eventCaptor.getAllValues().get(numOfCapturedEvents - 1);
VertexManagerEvent VMEvent = (VertexManagerEvent) lastEvents.get(0);
for (int i = 0; i < numOfCapturedEvents; i++) {
List<Event> events = eventCaptor.getAllValues().get(i);
if (i < numOfCapturedEvents - 1) {
assertTrue(events.size() == 1);
assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
} else {
assertTrue(events.size() == 2);
assertTrue(events.get(0) instanceof VertexManagerEvent);
assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
}
}
verifyPartitionStats(VMEvent, partitionsWithData);
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if (!shouldCompress) {
assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
// No additional spill bytes written when final merge is disabled.
assertEquals(additionalSpillBytesWritten, 0);
// No additional spills when final merge is disabled.
assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
// No additional spills when final merge is disabled.
assertEquals(numAdditionalSpillsCounter.getValue(), 0);
assertTrue(lastEvents.size() > 0);
// Get the last event
int index = lastEvents.size() - 1;
assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
// Ensure that this is the last event
assertTrue(eventProto.getLastEvent());
verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
verify(outputContext, atLeast(1)).notifyProgress();
// Verify if all spill files are available.
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
if (numRecordsWritten > 0) {
int numSpills = kvWriter.numSpills.get();
for (int i = 0; i < numSpills; i++) {
Path outputFile = taskOutput.getSpillFileForWrite(i, 10);
Path indexFile = taskOutput.getSpillIndexFileForWrite(i, 10);
assertTrue(localFs.exists(outputFile));
assertTrue(localFs.exists(indexFile));
assertEquals("Incorrect output permissions", (short) 0640, localFs.getFileStatus(outputFile).getPermission().toShort());
assertEquals("Incorrect index permissions", (short) 0640, localFs.getFileStatus(indexFile).getPermission().toShort());
}
} else {
return;
}
}
use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.
the class TestOnFileSortedOutput method testWithSomeEmptyPartition.
@Test(timeout = 5000)
public void testWithSomeEmptyPartition() throws Exception {
// ensure atleast 2 partitions are available
partitions = Math.max(2, partitions);
startSortedOutput(partitions);
// write random data
for (int i = 0; i < 2 * partitions; i++) {
Text key = new Text(new BigInteger(256, rnd).toString());
Text value = new Text(new BigInteger(256, rnd).toString());
// skip writing to certain partitions
if (i % partitions != emptyPartitionIdx) {
writer.write(key, value);
}
}
List<Event> eventList = sortedOutput.close();
assertTrue(eventList != null && eventList.size() == 2);
ShuffleUserPayloads.DataMovementEventPayloadProto payload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(((CompositeDataMovementEvent) eventList.get(1)).getUserPayload()));
assertEquals(HOST, payload.getHost());
assertEquals(PORT, payload.getPort());
assertEquals(UniqueID, payload.getPathComponent());
}
use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.
the class TestOnFileUnorderedKVOutput method testGeneratedDataMovementEvent.
@Test(timeout = 5000)
public void testGeneratedDataMovementEvent() throws Exception {
Configuration conf = new Configuration();
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
OutputContext outputContext = createOutputContext(conf, sharedExecutor);
UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
List<Event> events = null;
events = kvOutput.initialize();
kvOutput.start();
assertTrue(events != null && events.size() == 0);
KeyValueWriter kvWriter = kvOutput.getWriter();
List<KVPair> data = KVDataGen.generateTestData(true, 0);
for (KVPair kvp : data) {
kvWriter.write(kvp.getKey(), kvp.getvalue());
}
events = kvOutput.close();
assertEquals(45, task.getTaskStatistics().getIOStatistics().values().iterator().next().getDataSize());
assertEquals(5, task.getTaskStatistics().getIOStatistics().values().iterator().next().getItemsProcessed());
assertTrue(events != null && events.size() == 2);
CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
assertFalse(shufflePayload.hasEmptyPartitions());
assertEquals(outputContext.getUniqueIdentifier(), shufflePayload.getPathComponent());
assertEquals(shufflePort, shufflePayload.getPort());
assertEquals("localhost", shufflePayload.getHost());
sharedExecutor.shutdownNow();
}
Aggregations