use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTestWithPipelinedTransfer.
@SuppressWarnings("unchecked")
private void baseTestWithPipelinedTransfer(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
long availableMemory = 2048;
int numRecordsWritten = 0;
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
BitSet partitionsWithData = new BitSet(numPartitions);
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
List<Event> lastEvents = kvWriter.close();
if (numPartitions == 1) {
assertEquals(false, kvWriter.skipBuffers);
}
// no events are sent to kvWriter upon close with pipelining
assertTrue(lastEvents.size() == 0);
verify(outputContext, atLeast(numExpectedSpills)).sendEvents(eventCaptor.capture());
int numOfCapturedEvents = eventCaptor.getAllValues().size();
lastEvents = eventCaptor.getAllValues().get(numOfCapturedEvents - 1);
VertexManagerEvent VMEvent = (VertexManagerEvent) lastEvents.get(0);
for (int i = 0; i < numOfCapturedEvents; i++) {
List<Event> events = eventCaptor.getAllValues().get(i);
if (i < numOfCapturedEvents - 1) {
assertTrue(events.size() == 1);
assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
} else {
assertTrue(events.size() == 2);
assertTrue(events.get(0) instanceof VertexManagerEvent);
assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
}
}
verifyPartitionStats(VMEvent, partitionsWithData);
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if (!shouldCompress) {
assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
// No additional spill bytes written when final merge is disabled.
assertEquals(additionalSpillBytesWritten, 0);
// No additional spills when final merge is disabled.
assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
// No additional spills when final merge is disabled.
assertEquals(numAdditionalSpillsCounter.getValue(), 0);
assertTrue(lastEvents.size() > 0);
// Get the last event
int index = lastEvents.size() - 1;
assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
// Ensure that this is the last event
assertTrue(eventProto.getLastEvent());
verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
verify(outputContext, atLeast(1)).notifyProgress();
// Verify if all spill files are available.
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
if (numRecordsWritten > 0) {
int numSpills = kvWriter.numSpills.get();
for (int i = 0; i < numSpills; i++) {
Path outputFile = taskOutput.getSpillFileForWrite(i, 10);
Path indexFile = taskOutput.getSpillIndexFileForWrite(i, 10);
assertTrue(localFs.exists(outputFile));
assertTrue(localFs.exists(indexFile));
assertEquals("Incorrect output permissions", (short) 0640, localFs.getFileStatus(outputFile).getPermission().toShort());
assertEquals("Incorrect index permissions", (short) 0640, localFs.getFileStatus(indexFile).getPermission().toShort());
}
} else {
return;
}
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestUnorderedPartitionedKVWriter method createMockOutputContext.
private OutputContext createMockOutputContext(TezCounters counters, ApplicationId appId, String uniqueId, String auxiliaryService) {
OutputContext outputContext = mock(OutputContext.class);
doReturn(counters).when(outputContext).getCounters();
doReturn(appId).when(outputContext).getApplicationId();
doReturn(1).when(outputContext).getDAGAttemptNumber();
doReturn("dagName").when(outputContext).getDAGName();
doReturn("destinationVertexName").when(outputContext).getDestinationVertexName();
doReturn(1).when(outputContext).getOutputIndex();
doReturn(1).when(outputContext).getTaskAttemptNumber();
doReturn(1).when(outputContext).getTaskIndex();
doReturn(1).when(outputContext).getTaskVertexIndex();
doReturn("vertexName").when(outputContext).getTaskVertexName();
doReturn(uniqueId).when(outputContext).getUniqueIdentifier();
doAnswer(new Answer<ByteBuffer>() {
@Override
public ByteBuffer answer(InvocationOnMock invocation) throws Throwable {
ByteBuffer portBuffer = ByteBuffer.allocate(4);
portBuffer.mark();
portBuffer.putInt(SHUFFLE_PORT);
portBuffer.reset();
return portBuffer;
}
}).when(outputContext).getServiceProviderMetaData(eq(auxiliaryService));
Path outDirBase = new Path(TEST_ROOT_DIR, "outDir_" + uniqueId);
String[] outDirs = new String[] { outDirBase.toString() };
doReturn(outDirs).when(outputContext).getWorkDirs();
return outputContext;
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestOnFileSortedOutput method testSortBufferSize.
@Test(timeout = 5000)
public void testSortBufferSize() throws Exception {
OutputContext context = createTezOutputContext();
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 2048);
UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
doReturn(payLoad).when(context).getUserPayload();
sortedOutput = new OrderedPartitionedKVOutput(context, partitions);
try {
// Memory limit checks are done in sorter impls. For e.g, defaultsorter does not support > 2GB
sortedOutput.initialize();
DefaultSorter sorter = new DefaultSorter(context, conf, 100, 3500 * 1024 * 1024l);
fail();
} catch (IllegalArgumentException e) {
assertTrue(e.getMessage().contains(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB));
}
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 0);
payLoad = TezUtils.createUserPayloadFromConf(conf);
doReturn(payLoad).when(context).getUserPayload();
sortedOutput = new OrderedPartitionedKVOutput(context, partitions);
try {
sortedOutput.initialize();
fail();
} catch (IllegalArgumentException e) {
assertTrue(e.getMessage().contains(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB));
}
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestOnFileSortedOutput method createTezOutputContext.
private OutputContext createTezOutputContext() throws IOException {
String[] workingDirs = { workingDir.toString() };
UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
DataOutputBuffer serviceProviderMetaData = new DataOutputBuffer();
serviceProviderMetaData.writeInt(PORT);
TezCounters counters = new TezCounters();
OutputStatisticsReporter reporter = mock(OutputStatisticsReporter.class);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
outputSize.set((Long) invocation.getArguments()[0]);
return null;
}
}).when(reporter).reportDataSize(anyLong());
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
numRecords.set((Long) invocation.getArguments()[0]);
return null;
}
}).when(reporter).reportItemsProcessed(anyLong());
OutputContext context = mock(OutputContext.class);
doReturn(counters).when(context).getCounters();
doReturn(workingDirs).when(context).getWorkDirs();
doReturn(payLoad).when(context).getUserPayload();
doReturn(5 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
doReturn(UniqueID).when(context).getUniqueIdentifier();
doReturn("v1").when(context).getDestinationVertexName();
doReturn(ByteBuffer.wrap(serviceProviderMetaData.getData())).when(context).getServiceProviderMetaData(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT));
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
long requestedSize = (Long) invocation.getArguments()[0];
MemoryUpdateCallbackHandler callback = (MemoryUpdateCallbackHandler) invocation.getArguments()[1];
callback.memoryAssigned(requestedSize);
return null;
}
}).when(context).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class));
ExecutionContext ExecutionContext = mock(ExecutionContext.class);
doReturn(HOST).when(ExecutionContext).getHostName();
doReturn(reporter).when(context).getStatisticsReporter();
doReturn(ExecutionContext).when(context).getExecutionContext();
return context;
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestOrderedPartitionedKVOutput2 method testClose.
@Test(timeout = 10000)
public void testClose() throws Exception {
OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, workingDir);
int numPartitions = 10;
OrderedPartitionedKVOutput output = new OrderedPartitionedKVOutput(outputContext, numPartitions);
output.initialize();
output.start();
output.close();
}
Aggregations