use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestOnFileSortedOutput method baseTest.
@Test(timeout = 5000)
public void baseTest() throws Exception {
startSortedOutput(partitions);
// Write random set of keys
long recordsWritten = numRecords.get();
for (int i = 0; i < Math.max(1, rnd.nextInt(50)); i++) {
Text key = new Text(new BigInteger(256, rnd).toString());
LinkedList values = new LinkedList();
for (int j = 0; j < Math.max(2, rnd.nextInt(10)); j++) {
recordsWritten++;
values.add(new Text(new BigInteger(256, rnd).toString()));
}
writer.write(key, values);
}
List<Event> eventList = sortedOutput.close();
assertTrue(eventList != null && eventList.size() == 2);
assertEquals(recordsWritten, numRecords.get());
ShuffleUserPayloads.DataMovementEventPayloadProto payload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(((CompositeDataMovementEvent) eventList.get(1)).getUserPayload()));
ShuffleUserPayloads.VertexManagerEventPayloadProto vmPayload = ShuffleUserPayloads.VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(((VertexManagerEvent) eventList.get(0)).getUserPayload()));
if (reportPartitionStats.isPrecise()) {
assertTrue(vmPayload.hasDetailedPartitionStats());
} else {
assertTrue(vmPayload.hasPartitionStats());
}
assertEquals(HOST, payload.getHost());
assertEquals(PORT, payload.getPort());
assertEquals(UniqueID, payload.getPathComponent());
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestOrderedPartitionedKVOutput2 method testNonStartedOutput.
@Test(timeout = 5000)
public void testNonStartedOutput() throws IOException {
OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, workingDir);
int numPartitions = 10;
OrderedPartitionedKVOutput output = new OrderedPartitionedKVOutput(outputContext, numPartitions);
output.initialize();
List<Event> events = output.close();
assertEquals(2, events.size());
Event event1 = events.get(0);
assertTrue(event1 instanceof VertexManagerEvent);
Event event2 = events.get(1);
assertTrue(event2 instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) event2;
ByteBuffer bb = cdme.getUserPayload();
ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb));
assertTrue(shufflePayload.hasEmptyPartitions());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(shufflePayload.getEmptyPartitions());
BitSet emptyPartionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
assertEquals(numPartitions, emptyPartionsBitSet.cardinality());
for (int i = 0; i < numPartitions; i++) {
assertTrue(emptyPartionsBitSet.get(i));
}
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleVertexManagerBase method testVMEventFirst.
// Test vmEvent and vertexStatusUpdate before started.
@Test(timeout = 5000)
public void testVMEventFirst() throws IOException {
Configuration conf = new Configuration();
ShuffleVertexManagerBase manager;
final String mockSrcVertexId1 = "Vertex1";
final String mockSrcVertexId2 = "Vertex2";
final String mockSrcVertexId3 = "Vertex3";
final String mockManagedVertexId = "Vertex4";
final List<Integer> scheduledTasks = Lists.newLinkedList();
final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, null);
VertexManagerEvent vmEvent = getVertexManagerEvent(null, 1L, "Vertex");
manager = createManager(conf, mockContext, 0.01f, 0.75f);
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
TezTaskAttemptID taId1 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_0");
vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId1));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
manager.onVertexManagerEventReceived(vmEvent);
// nothing happens
Assert.assertEquals(0, manager.numVertexManagerEventsReceived);
// now the processing happens
manager.onVertexStarted(emptyCompletions);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleVertexManagerBase method testAutoParallelism.
@Test(timeout = 5000)
public void testAutoParallelism() throws Exception {
Configuration conf = new Configuration();
ShuffleVertexManagerBase manager;
final String mockSrcVertexId1 = "Vertex1";
final String mockSrcVertexId2 = "Vertex2";
final String mockSrcVertexId3 = "Vertex3";
final String mockManagedVertexId = "Vertex4";
final List<Integer> scheduledTasks = Lists.newLinkedList();
final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, newEdgeManagers);
// parallelism changed due to small data size
manager = createManager(conf, mockContext, 0.5f, 0.5f);
manager.onVertexStarted(emptyCompletions);
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED));
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
// task completion from non-bipartite stage does nothing
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId3, 0));
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
VertexManagerEvent vmEvent = getVertexManagerEvent(null, 50 * MB, mockSrcVertexId1);
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
Assert.assertEquals(50 * MB, manager.completedSourceTasksOutputSize);
// ignore duplicate completion
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(50 * MB, manager.completedSourceTasksOutputSize);
vmEvent = getVertexManagerEvent(null, 50 * MB, mockSrcVertexId2);
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 1));
// managedVertex tasks reduced
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap());
Assert.assertEquals(2, newEdgeManagers.size());
// TODO improve tests for parallelism
// all tasks scheduled
Assert.assertEquals(0, manager.pendingTasks.size());
Assert.assertEquals(2, scheduledTasks.size());
Assert.assertTrue(scheduledTasks.contains(new Integer(0)));
Assert.assertTrue(scheduledTasks.contains(new Integer(1)));
Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(2, manager.numVertexManagerEventsReceived);
Assert.assertEquals(100 * MB, manager.completedSourceTasksOutputSize);
// more completions dont cause recalculation of parallelism
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0));
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
Assert.assertEquals(2, newEdgeManagers.size());
EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
// 4 source task outputs - same as original number of partitions
Assert.assertEquals(4, edgeManager.getNumSourceTaskPhysicalOutputs(0));
// 4 destination task inputs - 2 source tasks * 2 merged partitions
Assert.assertEquals(4, edgeManager.getNumDestinationTaskPhysicalInputs(0));
EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeDataMovementEventToDestination(1, 1, 0);
Assert.assertEquals(1, routeMetadata.getNumEvents());
Assert.assertEquals(3, routeMetadata.getTargetIndices()[0]);
routeMetadata = edgeManager.routeDataMovementEventToDestination(0, 2, 1);
Assert.assertEquals(1, routeMetadata.getNumEvents());
Assert.assertEquals(0, routeMetadata.getTargetIndices()[0]);
routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(1, 0);
Assert.assertEquals(2, routeMetadata.getNumEvents());
Assert.assertEquals(2, routeMetadata.getTargetIndices()[0]);
Assert.assertEquals(3, routeMetadata.getTargetIndices()[1]);
routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(1, 1);
Assert.assertEquals(2, routeMetadata.getNumEvents());
Assert.assertEquals(2, routeMetadata.getTargetIndices()[0]);
Assert.assertEquals(3, routeMetadata.getTargetIndices()[1]);
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleVertexManagerBase method testPartitionStats.
// Test partition stats.
@Test(timeout = 5000)
public void testPartitionStats() throws IOException {
Configuration conf = new Configuration();
ShuffleVertexManagerBase manager;
final String mockSrcVertexId1 = "Vertex1";
final String mockSrcVertexId2 = "Vertex2";
final String mockSrcVertexId3 = "Vertex3";
final String mockManagedVertexId = "Vertex4";
final List<Integer> scheduledTasks = Lists.newLinkedList();
final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, null);
// {5,9,12,18} in bitmap
final long MB = 1024l * 1024l;
long[] sizes = new long[] { (0l), (1 * MB), (964 * MB), (48 * MB) };
VertexManagerEvent vmEvent = getVertexManagerEvent(sizes, 0, "Vertex", false);
manager = createManager(conf, mockContext, 0.01f, 0.75f);
manager.onVertexStarted(emptyCompletions);
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
TezTaskAttemptID taId1 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_0");
vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId1));
manager.onVertexManagerEventReceived(vmEvent);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
// 0 MB bucket
Assert.assertEquals(0, manager.getCurrentlyKnownStatsAtIndex(0));
// 1 MB bucket
Assert.assertEquals(1, manager.getCurrentlyKnownStatsAtIndex(1));
// 100 MB bucket
Assert.assertEquals(100, manager.getCurrentlyKnownStatsAtIndex(2));
// 10 MB bucket
Assert.assertEquals(10, manager.getCurrentlyKnownStatsAtIndex(3));
// sending again from a different version of the same task has not impact
TezTaskAttemptID taId2 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_1");
vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId2));
manager.onVertexManagerEventReceived(vmEvent);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
// 0 MB bucket
Assert.assertEquals(0, manager.getCurrentlyKnownStatsAtIndex(0));
// 1 MB bucket
Assert.assertEquals(1, manager.getCurrentlyKnownStatsAtIndex(1));
// 100 MB bucket
Assert.assertEquals(100, manager.getCurrentlyKnownStatsAtIndex(2));
// 10 MB bucket
Assert.assertEquals(10, manager.getCurrentlyKnownStatsAtIndex(3));
// Testing for detailed partition stats
vmEvent = getVertexManagerEvent(sizes, 0, "Vertex", true);
manager = createManager(conf, mockContext, 0.01f, 0.75f);
manager.onVertexStarted(emptyCompletions);
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
taId1 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_0");
vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId1));
manager.onVertexManagerEventReceived(vmEvent);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
Assert.assertEquals(0, manager.getCurrentlyKnownStatsAtIndex(0));
Assert.assertEquals(1, manager.getCurrentlyKnownStatsAtIndex(1));
Assert.assertEquals(964, manager.getCurrentlyKnownStatsAtIndex(2));
Assert.assertEquals(48, manager.getCurrentlyKnownStatsAtIndex(3));
// sending again from a different version of the same task has not impact
taId2 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_1");
vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId2));
manager.onVertexManagerEventReceived(vmEvent);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
Assert.assertEquals(0, manager.getCurrentlyKnownStatsAtIndex(0));
Assert.assertEquals(1, manager.getCurrentlyKnownStatsAtIndex(1));
Assert.assertEquals(964, manager.getCurrentlyKnownStatsAtIndex(2));
Assert.assertEquals(48, manager.getCurrentlyKnownStatsAtIndex(3));
}
Aggregations