use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleVertexManager method testLargeDataSize.
@Test(timeout = 5000)
public void testLargeDataSize() throws IOException {
Configuration conf = new Configuration();
ShuffleVertexManagerBase manager;
final String mockSrcVertexId1 = "Vertex1";
final String mockSrcVertexId2 = "Vertex2";
final String mockSrcVertexId3 = "Vertex3";
final String mockManagedVertexId = "Vertex4";
final List<Integer> scheduledTasks = Lists.newLinkedList();
final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, newEdgeManagers);
VertexManagerEvent vmEvent = getVertexManagerEvent(null, 5000L, mockSrcVertexId1);
// parallelism not change due to large data size
manager = createManager(conf, mockContext, 0.1f, 0.1f);
// Tez notified of reconfig
verify(mockContext, times(1)).vertexReconfigurationPlanned();
manager.onVertexStarted(emptyCompletions);
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 4);
manager.onVertexManagerEventReceived(vmEvent);
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED));
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0));
verify(mockContext, times(0)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
verify(mockContext, times(0)).doneReconfiguringVertex();
// trigger scheduling
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED));
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 4);
verify(mockContext, times(0)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
// reconfig done
verify(mockContext, times(1)).doneReconfiguringVertex();
// all tasks scheduled
Assert.assertEquals(0, manager.pendingTasks.size());
Assert.assertEquals(4, scheduledTasks.size());
// TODO TEZ-1714 locking verify(mockContext, times(2)).vertexManagerDone(); // notified after scheduling all tasks
Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(5000L, manager.completedSourceTasksOutputSize);
scheduledTasks.clear();
// Ensure long overflow doesn't reduce mistakenly
// Overflow can occur previously when output size * num tasks for a single vertex would over flow max long
//
manager = createManager(conf, mockContext, true, (long) (Long.MAX_VALUE / 1.5), 1.0f, 1.0f);
manager.onVertexStarted(emptyCompletions);
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED));
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
// task completion from non-bipartite stage does nothing
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId3, 0));
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
// First source 1 task completes
vmEvent = getVertexManagerEvent(null, 0L, mockSrcVertexId1);
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
Assert.assertEquals(0L, manager.completedSourceTasksOutputSize);
// Second source 1 task completes
vmEvent = getVertexManagerEvent(null, 0L, mockSrcVertexId1);
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 1));
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(0L, manager.completedSourceTasksOutputSize);
// First source 2 task completes
vmEvent = getVertexManagerEvent(null, Long.MAX_VALUE >> 1, mockSrcVertexId2);
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0));
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(3, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(Long.MAX_VALUE >> 1, manager.completedSourceTasksOutputSize);
// Second source 2 task completes
vmEvent = getVertexManagerEvent(null, Long.MAX_VALUE >> 1, mockSrcVertexId2);
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 1));
// Auto-reduce is triggered
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap());
Assert.assertEquals(2, newEdgeManagers.size());
// all tasks scheduled
Assert.assertEquals(0, manager.pendingTasks.size());
Assert.assertEquals(2, scheduledTasks.size());
Assert.assertTrue(scheduledTasks.contains(new Integer(0)));
Assert.assertTrue(scheduledTasks.contains(new Integer(1)));
Assert.assertEquals(4, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(4, manager.numVertexManagerEventsReceived);
Assert.assertEquals(Long.MAX_VALUE >> 1 << 1, manager.completedSourceTasksOutputSize);
// reset context for next test
when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(2);
when(mockContext.getVertexNumTasks(mockSrcVertexId2)).thenReturn(2);
when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(4);
// parallelism changed due to small data size
scheduledTasks.clear();
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleVertexManagerBase method test_Tez1649_with_scatter_gather_edges.
/**
* Tasks should be scheduled only when all source vertices are configured completely
* @throws IOException
*/
@Test(timeout = 5000)
public void test_Tez1649_with_scatter_gather_edges() throws IOException {
Configuration conf = new Configuration();
ShuffleVertexManagerBase manager = null;
HashMap<String, EdgeProperty> mockInputVertices_R2 = new HashMap<String, EdgeProperty>();
String r1 = "R1";
EdgeProperty eProp1 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
String m2 = "M2";
EdgeProperty eProp2 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
String m3 = "M3";
EdgeProperty eProp3 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
final String mockManagedVertexId_R2 = "R2";
mockInputVertices_R2.put(r1, eProp1);
mockInputVertices_R2.put(m2, eProp2);
mockInputVertices_R2.put(m3, eProp3);
final VertexManagerPluginContext mockContext_R2 = mock(VertexManagerPluginContext.class);
when(mockContext_R2.getInputVertexEdgeProperties()).thenReturn(mockInputVertices_R2);
when(mockContext_R2.getVertexName()).thenReturn(mockManagedVertexId_R2);
when(mockContext_R2.getVertexNumTasks(mockManagedVertexId_R2)).thenReturn(3);
when(mockContext_R2.getVertexNumTasks(r1)).thenReturn(3);
when(mockContext_R2.getVertexNumTasks(m2)).thenReturn(3);
when(mockContext_R2.getVertexNumTasks(m3)).thenReturn(3);
VertexManagerEvent vmEvent = getVertexManagerEvent(null, 50L, r1);
// check initialization
manager = createManager(conf, mockContext_R2, 0.001f, 0.001f);
final List<Integer> scheduledTasks = Lists.newLinkedList();
doAnswer(new ScheduledTasksAnswer(scheduledTasks)).when(mockContext_R2).scheduleTasks(anyList());
manager.onVertexStarted(emptyCompletions);
Assert.assertTrue(manager.bipartiteSources == 3);
manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED));
manager.onVertexManagerEventReceived(vmEvent);
// no tasks scheduled
Assert.assertEquals(3, manager.pendingTasks.size());
Assert.assertEquals(6, manager.totalNumBipartiteSourceTasks);
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 3);
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 6);
// Send events for all tasks of m3.
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m3, 0));
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m3, 1));
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m3, 2));
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 3);
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 6);
// Send events for m2. But still we need to wait for at least 1 event from r1.
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m2, 0));
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m2, 1));
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 3);
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 6);
// we need to wait for at least 1 event from r1 to make sure all vertices cross min threshold
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(r1, 0));
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 3);
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 6);
// Ensure that setVertexParallelism is not called for R2.
verify(mockContext_R2, times(0)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
// ShuffleVertexManager's updatePendingTasks relies on getVertexNumTasks. Setting this for test
when(mockContext_R2.getVertexNumTasks(mockManagedVertexId_R2)).thenReturn(1);
// complete configuration of r1 triggers the scheduling
manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED));
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 9);
verify(mockContext_R2, times(1)).reconfigureVertex(eq(1), any(VertexLocationHint.class), anyMap());
// all tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 0);
Assert.assertTrue(scheduledTasks.size() == 1);
// try with zero task vertices
scheduledTasks.clear();
when(mockContext_R2.getInputVertexEdgeProperties()).thenReturn(mockInputVertices_R2);
when(mockContext_R2.getVertexName()).thenReturn(mockManagedVertexId_R2);
when(mockContext_R2.getVertexNumTasks(mockManagedVertexId_R2)).thenReturn(3);
when(mockContext_R2.getVertexNumTasks(r1)).thenReturn(0);
when(mockContext_R2.getVertexNumTasks(m2)).thenReturn(0);
when(mockContext_R2.getVertexNumTasks(m3)).thenReturn(3);
manager = createManager(conf, mockContext_R2, 0.001f, 0.001f);
manager.onVertexStarted(emptyCompletions);
// no tasks scheduled
Assert.assertEquals(3, manager.pendingTasks.size());
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
// Only need completed configuration notification from m3
manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED));
Assert.assertEquals(3, manager.totalNumBipartiteSourceTasks);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m3, 0));
// all tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 0);
Assert.assertTrue(scheduledTasks.size() == 3);
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleVertexManagerBase method testTez978.
// Delay determining parallelism until enough data has been received.
@Test(timeout = 5000)
public void testTez978() throws IOException {
Configuration conf = new Configuration();
ShuffleVertexManagerBase manager;
final String mockSrcVertexId1 = "Vertex1";
final String mockSrcVertexId2 = "Vertex2";
final String mockSrcVertexId3 = "Vertex3";
final String mockManagedVertexId = "Vertex4";
final List<Integer> scheduledTasks = Lists.newLinkedList();
final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, null);
// min/max fraction of 0.01/0.75 would ensure that we hit determineParallelism code path on receiving first event itself.
manager = createManager(conf, mockContext, 0.01f, 0.75f);
manager.onVertexStarted(emptyCompletions);
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED));
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
// First task in src1 completed with small payload
VertexManagerEvent vmEvent = getVertexManagerEvent(null, 1L, mockSrcVertexId1);
// small payload
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
Assert.assertTrue(manager.determineParallelismAndApply(0f) == false);
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
Assert.assertEquals(1L, manager.completedSourceTasksOutputSize);
// First task in src2 completed with small payload
vmEvent = getVertexManagerEvent(null, 1L, mockSrcVertexId2);
// small payload
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0));
// Still overall data gathered has not reached threshold; So, ensure parallelism can be determined later
Assert.assertTrue(manager.determineParallelismAndApply(0.25f) == false);
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(2, manager.numVertexManagerEventsReceived);
Assert.assertEquals(2L, manager.completedSourceTasksOutputSize);
// First task in src2 completed (with larger payload) to trigger determining parallelism
vmEvent = getVertexManagerEvent(null, 160 * MB, mockSrcVertexId2);
manager.onVertexManagerEventReceived(vmEvent);
// ensure parallelism is determined
Assert.assertTrue(manager.determineParallelismAndApply(0.25f));
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap());
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0));
Assert.assertEquals(0, manager.pendingTasks.size());
Assert.assertEquals(2, scheduledTasks.size());
Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(3, manager.numVertexManagerEventsReceived);
Assert.assertEquals(160 * MB + 2, manager.completedSourceTasksOutputSize);
// Test for max fraction. Min fraction is just instruction to framework, but honor max fraction
when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(20);
when(mockContext.getVertexNumTasks(mockSrcVertexId2)).thenReturn(20);
when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(40);
scheduledTasks.clear();
// min/max fraction of 0.0/0.2
manager = createManager(conf, mockContext, 0.0f, 0.2f);
// initial invocation count == 3
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
manager.onVertexStarted(emptyCompletions);
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED));
// no tasks scheduled
Assert.assertEquals(40, manager.pendingTasks.size());
Assert.assertEquals(40, manager.totalNumBipartiteSourceTasks);
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
// send 8 events with payload size as 10MB
for (int i = 0; i < 8; i++) {
// small payload - create new event each time or it will be ignored (from same task)
manager.onVertexManagerEventReceived(getVertexManagerEvent(null, 10 * MB, mockSrcVertexId1));
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, i));
// should not change parallelism
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
}
for (int i = 0; i < 3; i++) {
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, i));
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
}
// Since max threshold (40 * 0.2 = 8) is met, vertex manager should determine parallelism
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 8));
// parallelism updated
verify(mockContext, times(2)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
// check exact update value - 8 events with 100 each => 20 -> 2000 => 2 tasks (with 1000 per task)
verify(mockContext, times(2)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap());
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleVertexManagerUtils method getVertexManagerEvent.
VertexManagerEvent getVertexManagerEvent(long[] partitionSizes, long uncompressedTotalSize, String vertexName, boolean reportDetailedStats) throws IOException {
ByteBuffer payload;
long totalSize = 0;
// Use partition sizes to compute the total size.
if (partitionSizes != null) {
totalSize = estimatedUncompressedSum(partitionSizes);
} else {
totalSize = uncompressedTotalSize;
}
if (partitionSizes != null) {
RoaringBitmap partitionStats = ShuffleUtils.getPartitionStatsForPhysicalOutput(partitionSizes);
DataOutputBuffer dout = new DataOutputBuffer();
partitionStats.serialize(dout);
ByteString partitionStatsBytes = TezCommonUtils.compressByteArrayToByteString(dout.getData());
if (reportDetailedStats) {
payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(totalSize).setDetailedPartitionStats(ShuffleUtils.getDetailedPartitionStatsForPhysicalOutput(partitionSizes)).build().toByteString().asReadOnlyByteBuffer();
} else {
payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(totalSize).setPartitionStats(partitionStatsBytes).build().toByteString().asReadOnlyByteBuffer();
}
} else {
payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(totalSize).build().toByteString().asReadOnlyByteBuffer();
}
TaskAttemptIdentifierImpl taId = new TaskAttemptIdentifierImpl("dag", vertexName, TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, taskId++), 0));
VertexManagerEvent vmEvent = VertexManagerEvent.create(vertexName, payload);
vmEvent.setProducerAttemptIdentifier(taId);
return vmEvent;
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTestWithPipelinedTransfer.
@SuppressWarnings("unchecked")
private void baseTestWithPipelinedTransfer(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
long availableMemory = 2048;
int numRecordsWritten = 0;
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
BitSet partitionsWithData = new BitSet(numPartitions);
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
List<Event> lastEvents = kvWriter.close();
if (numPartitions == 1) {
assertEquals(false, kvWriter.skipBuffers);
}
// no events are sent to kvWriter upon close with pipelining
assertTrue(lastEvents.size() == 0);
verify(outputContext, atLeast(numExpectedSpills)).sendEvents(eventCaptor.capture());
int numOfCapturedEvents = eventCaptor.getAllValues().size();
lastEvents = eventCaptor.getAllValues().get(numOfCapturedEvents - 1);
VertexManagerEvent VMEvent = (VertexManagerEvent) lastEvents.get(0);
for (int i = 0; i < numOfCapturedEvents; i++) {
List<Event> events = eventCaptor.getAllValues().get(i);
if (i < numOfCapturedEvents - 1) {
assertTrue(events.size() == 1);
assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
} else {
assertTrue(events.size() == 2);
assertTrue(events.get(0) instanceof VertexManagerEvent);
assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
}
}
verifyPartitionStats(VMEvent, partitionsWithData);
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if (!shouldCompress) {
assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
// No additional spill bytes written when final merge is disabled.
assertEquals(additionalSpillBytesWritten, 0);
// No additional spills when final merge is disabled.
assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
// No additional spills when final merge is disabled.
assertEquals(numAdditionalSpillsCounter.getValue(), 0);
assertTrue(lastEvents.size() > 0);
// Get the last event
int index = lastEvents.size() - 1;
assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
// Ensure that this is the last event
assertTrue(eventProto.getLastEvent());
verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
verify(outputContext, atLeast(1)).notifyProgress();
// Verify if all spill files are available.
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
if (numRecordsWritten > 0) {
int numSpills = kvWriter.numSpills.get();
for (int i = 0; i < numSpills; i++) {
Path outputFile = taskOutput.getSpillFileForWrite(i, 10);
Path indexFile = taskOutput.getSpillIndexFileForWrite(i, 10);
assertTrue(localFs.exists(outputFile));
assertTrue(localFs.exists(indexFile));
assertEquals("Incorrect output permissions", (short) 0640, localFs.getFileStatus(outputFile).getPermission().toShort());
assertEquals("Incorrect index permissions", (short) 0640, localFs.getFileStatus(indexFile).getPermission().toShort());
}
} else {
return;
}
}
Aggregations