use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTestWithFinalMergeDisabled.
@SuppressWarnings("unchecked")
private void baseTestWithFinalMergeDisabled(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, false);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
long availableMemory = 2048;
int numRecordsWritten = 0;
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
BitSet partitionsWithData = new BitSet(numPartitions);
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
List<Event> lastEvents = kvWriter.close();
if (numPartitions == 1) {
assertEquals(true, kvWriter.skipBuffers);
}
// max events sent are spills + one VM event. If there are no spills, atleast empty
// partitions would be sent out finally.
int spills = Math.max(1, kvWriter.numSpills.get());
// spills + VMEvent
assertEquals((spills + 1), lastEvents.size());
verify(outputContext, atMost(0)).sendEvents(eventCaptor.capture());
for (int i = 0; i < lastEvents.size(); i++) {
Event event = lastEvents.get(i);
if (event instanceof VertexManagerEvent) {
// to stats.
if (numRecordsWritten > 0) {
verifyPartitionStats(((VertexManagerEvent) event), partitionsWithData);
}
}
}
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
if (outputRecordsCounter.getValue() > 0) {
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
} else {
assertEquals(0, outputBytesWithOverheadCounter.getValue());
}
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if (!shouldCompress) {
assertTrue("fileOutputBytes=" + fileOutputBytes + ", outputRecordBytes=" + outputRecordBytesCounter.getValue(), fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
// No additional spill bytes written when final merge is disabled.
assertEquals(additionalSpillBytesWritten, 0);
// No additional spills when final merge is disabled.
assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
// No additional spills when final merge is disabled.
assertEquals(numAdditionalSpillsCounter.getValue(), 0);
assertTrue(lastEvents.size() > 0);
// Get the last event
int index = lastEvents.size() - 1;
assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
if (outputRecordsCounter.getValue() > 0) {
// Ensure that this is the last event
assertTrue(eventProto.getLastEvent());
}
// Verify if all path components have spillIds when final merge is disabled
Pattern mergePathComponentPattern = Pattern.compile("(.*)(_\\d+)");
for (Event event : lastEvents) {
if (!(event instanceof CompositeDataMovementEvent)) {
continue;
}
cdme = (CompositeDataMovementEvent) event;
eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
assertEquals(false, eventProto.getPipelined());
if (eventProto.hasPathComponent()) {
// for final merge disabled cases, it should have _spillId
Matcher matcher = mergePathComponentPattern.matcher(eventProto.getPathComponent());
assertTrue("spill id should be present in path component " + eventProto.getPathComponent(), matcher.matches());
assertEquals(2, matcher.groupCount());
assertEquals(uniqueId, matcher.group(1));
assertTrue("spill id should be present in path component", matcher.group(2) != null);
Path outputPath = new Path(outputContext.getWorkDirs()[0], "output/" + eventProto.getPathComponent() + "/" + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING);
Path indexPath = outputPath.suffix(Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
assertEquals("Incorrect output permissions", (short) 0640, localFs.getFileStatus(outputPath).getPermission().toShort());
assertEquals("Incorrect index permissions", (short) 0640, localFs.getFileStatus(indexPath).getPermission().toShort());
} else {
assertEquals(0, eventProto.getSpillId());
if (outputRecordsCounter.getValue() > 0) {
assertEquals(true, eventProto.getLastEvent());
} else {
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
BitSet emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
assertEquals(numPartitions, emptyPartitionBits.cardinality());
}
}
}
verify(outputContext, atLeast(1)).notifyProgress();
// Verify if all spill files are available.
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
if (numRecordsWritten > 0) {
int numSpills = kvWriter.numSpills.get();
for (int i = 0; i < numSpills; i++) {
assertTrue(localFs.exists(taskOutput.getSpillFileForWrite(i, 10)));
assertTrue(localFs.exists(taskOutput.getSpillIndexFileForWrite(i, 10)));
}
} else {
return;
}
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestOnFileUnorderedKVOutput method testWithPipelinedShuffle.
@Test(timeout = 30000)
@SuppressWarnings("unchecked")
public void testWithPipelinedShuffle() throws Exception {
Configuration conf = new Configuration();
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 1);
TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
OutputContext outputContext = createOutputContext(conf, sharedExecutor);
UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
List<Event> events = null;
events = kvOutput.initialize();
kvOutput.start();
assertTrue(events != null && events.size() == 0);
KeyValueWriter kvWriter = kvOutput.getWriter();
for (int i = 0; i < 500; i++) {
kvWriter.write(new Text(RandomStringUtils.randomAscii(10000)), new IntWritable(i));
}
events = kvOutput.close();
// When pipelining is on, all events are sent out within writer itself.
assertTrue(events != null && events.size() == 0);
// ensure that data is sent via outputContext.
ArgumentCaptor<List> eventsCaptor = ArgumentCaptor.forClass(List.class);
verify(outputContext, atLeast(1)).sendEvents(eventsCaptor.capture());
events = eventsCaptor.getValue();
CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
assertTrue(shufflePayload.hasLastEvent());
assertFalse(shufflePayload.hasEmptyPartitions());
assertEquals(shufflePort, shufflePayload.getPort());
assertEquals("localhost", shufflePayload.getHost());
sharedExecutor.shutdownNow();
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestOnFileUnorderedKVOutput method createOutputContext.
private OutputContext createOutputContext(Configuration conf, TezSharedExecutor sharedExecutor) throws IOException {
int appAttemptNumber = 1;
TezUmbilical tezUmbilical = mock(TezUmbilical.class);
String dagName = "currentDAG";
String taskVertexName = "currentVertex";
String destinationVertexName = "destinationVertex";
TezDAGID dagID = TezDAGID.getInstance("2000", 1, 1);
TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
TezTaskID taskID = TezTaskID.getInstance(vertexID, 1);
TezTaskAttemptID taskAttemptID = TezTaskAttemptID.getInstance(taskID, 1);
UserPayload userPayload = TezUtils.createUserPayloadFromConf(conf);
TaskSpec mockSpec = mock(TaskSpec.class);
when(mockSpec.getInputs()).thenReturn(Collections.singletonList(mock(InputSpec.class)));
when(mockSpec.getOutputs()).thenReturn(Collections.singletonList(mock(OutputSpec.class)));
task = new LogicalIOProcessorRuntimeTask(mockSpec, appAttemptNumber, new Configuration(), new String[] { "/" }, tezUmbilical, null, null, null, null, "", null, 1024, false, new DefaultHadoopShim(), sharedExecutor);
LogicalIOProcessorRuntimeTask runtimeTask = spy(task);
Map<String, String> auxEnv = new HashMap<String, String>();
ByteBuffer bb = ByteBuffer.allocate(4);
bb.putInt(shufflePort);
bb.position(0);
AuxiliaryServiceHelper.setServiceDataIntoEnv(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT), bb, auxEnv);
OutputDescriptor outputDescriptor = mock(OutputDescriptor.class);
when(outputDescriptor.getClassName()).thenReturn("OutputDescriptor");
OutputContext realOutputContext = new TezOutputContextImpl(conf, new String[] { workDir.toString() }, appAttemptNumber, tezUmbilical, dagName, taskVertexName, destinationVertexName, -1, taskAttemptID, 0, userPayload, runtimeTask, null, auxEnv, new MemoryDistributor(1, 1, conf), outputDescriptor, null, new ExecutionContextImpl("localhost"), 2048, new TezSharedExecutor(defaultConf));
verify(runtimeTask, times(1)).addAndGetTezCounter(destinationVertexName);
verify(runtimeTask, times(1)).getTaskStatistics();
// verify output stats object got created
Assert.assertTrue(task.getTaskStatistics().getIOStatistics().containsKey(destinationVertexName));
OutputContext outputContext = spy(realOutputContext);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
long requestedSize = (Long) invocation.getArguments()[0];
MemoryUpdateCallbackHandler callback = (MemoryUpdateCallbackHandler) invocation.getArguments()[1];
callback.memoryAssigned(requestedSize);
return null;
}
}).when(outputContext).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class));
return outputContext;
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestOnFileSortedOutput method _testPipelinedShuffle.
private void _testPipelinedShuffle(String sorterName) throws Exception {
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 3);
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, sorterName);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
OutputContext context = createTezOutputContext();
UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
doReturn(payLoad).when(context).getUserPayload();
sortedOutput = new OrderedPartitionedKVOutput(context, partitions);
sortedOutput.initialize();
sortedOutput.start();
assertFalse(sortedOutput.finalMergeEnabled);
assertTrue(sortedOutput.pipelinedShuffle);
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestOnFileSortedOutput method startSortedOutput.
private void startSortedOutput(int partitions) throws Exception {
OutputContext context = createTezOutputContext();
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 4);
UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
doReturn(payLoad).when(context).getUserPayload();
sortedOutput = new OrderedPartitionedKVOutput(context, partitions);
sortedOutput.initialize();
sortedOutput.start();
writer = sortedOutput.getWriter();
}
Aggregations