use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.
the class TestFairShuffleVertexManager method testReduceSchedulingWithPartitionStats.
@Test(timeout = 5000)
public void testReduceSchedulingWithPartitionStats() throws Exception {
final int numScatherAndGatherSourceTasks = 300;
final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
long[] partitionStats = new long[] { (MB), (2 * MB), (5 * MB) };
testSchedulingWithPartitionStats(FairRoutingType.REDUCE_PARALLELISM, numScatherAndGatherSourceTasks, partitionStats, 2, 2, 2, newEdgeManagers);
EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
// The first destination task fetches two partitions from all source tasks.
// Thus the # of inputs == # of source tasks * 2 merged partitions
Assert.assertEquals(numScatherAndGatherSourceTasks * 2, edgeManager.getNumDestinationTaskPhysicalInputs(0));
for (int sourceTaskIndex = 0; sourceTaskIndex < numScatherAndGatherSourceTasks; sourceTaskIndex++) {
for (int j = 0; j < 2; j++) {
if (j == 0) {
EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(sourceTaskIndex, 0);
Assert.assertEquals(2, routeMetadata.getCount());
Assert.assertEquals(0, routeMetadata.getSource());
Assert.assertEquals(sourceTaskIndex * 2, routeMetadata.getTarget());
} else {
EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(sourceTaskIndex, 0);
Assert.assertEquals(2, routeMetadata.getNumEvents());
Assert.assertArrayEquals(new int[] { 0 + sourceTaskIndex * 2, 1 + sourceTaskIndex * 2 }, routeMetadata.getTargetIndices());
}
}
}
}
use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.
the class TestShuffleVertexManagerBase method testAutoParallelism.
@Test(timeout = 5000)
public void testAutoParallelism() throws Exception {
Configuration conf = new Configuration();
ShuffleVertexManagerBase manager;
final String mockSrcVertexId1 = "Vertex1";
final String mockSrcVertexId2 = "Vertex2";
final String mockSrcVertexId3 = "Vertex3";
final String mockManagedVertexId = "Vertex4";
final List<Integer> scheduledTasks = Lists.newLinkedList();
final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, newEdgeManagers);
// parallelism changed due to small data size
manager = createManager(conf, mockContext, 0.5f, 0.5f);
manager.onVertexStarted(emptyCompletions);
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED));
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
// task completion from non-bipartite stage does nothing
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId3, 0));
// no tasks scheduled
Assert.assertEquals(4, manager.pendingTasks.size());
Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
VertexManagerEvent vmEvent = getVertexManagerEvent(null, 50 * MB, mockSrcVertexId1);
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
Assert.assertEquals(50 * MB, manager.completedSourceTasksOutputSize);
// ignore duplicate completion
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
Assert.assertEquals(4, manager.pendingTasks.size());
// no tasks scheduled
Assert.assertEquals(0, scheduledTasks.size());
Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(50 * MB, manager.completedSourceTasksOutputSize);
vmEvent = getVertexManagerEvent(null, 50 * MB, mockSrcVertexId2);
manager.onVertexManagerEventReceived(vmEvent);
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 1));
// managedVertex tasks reduced
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap());
Assert.assertEquals(2, newEdgeManagers.size());
// TODO improve tests for parallelism
// all tasks scheduled
Assert.assertEquals(0, manager.pendingTasks.size());
Assert.assertEquals(2, scheduledTasks.size());
Assert.assertTrue(scheduledTasks.contains(new Integer(0)));
Assert.assertTrue(scheduledTasks.contains(new Integer(1)));
Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted);
Assert.assertEquals(2, manager.numVertexManagerEventsReceived);
Assert.assertEquals(100 * MB, manager.completedSourceTasksOutputSize);
// more completions dont cause recalculation of parallelism
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0));
verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
Assert.assertEquals(2, newEdgeManagers.size());
EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
// 4 source task outputs - same as original number of partitions
Assert.assertEquals(4, edgeManager.getNumSourceTaskPhysicalOutputs(0));
// 4 destination task inputs - 2 source tasks * 2 merged partitions
Assert.assertEquals(4, edgeManager.getNumDestinationTaskPhysicalInputs(0));
EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeDataMovementEventToDestination(1, 1, 0);
Assert.assertEquals(1, routeMetadata.getNumEvents());
Assert.assertEquals(3, routeMetadata.getTargetIndices()[0]);
routeMetadata = edgeManager.routeDataMovementEventToDestination(0, 2, 1);
Assert.assertEquals(1, routeMetadata.getNumEvents());
Assert.assertEquals(0, routeMetadata.getTargetIndices()[0]);
routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(1, 0);
Assert.assertEquals(2, routeMetadata.getNumEvents());
Assert.assertEquals(2, routeMetadata.getTargetIndices()[0]);
Assert.assertEquals(3, routeMetadata.getTargetIndices()[1]);
routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(1, 1);
Assert.assertEquals(2, routeMetadata.getNumEvents());
Assert.assertEquals(2, routeMetadata.getTargetIndices()[0]);
Assert.assertEquals(3, routeMetadata.getTargetIndices()[1]);
}
use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.
the class TestVertexImpl method testSetCustomEdgeManager.
@Test(timeout = 5000)
public void testSetCustomEdgeManager() throws Exception {
// Vertex5 linked to v3 (v3 src, v5 dest)
VertexImpl v5 = vertices.get("vertex5");
v5.vertexReconfigurationPlanned();
initAllVertices(VertexState.INITED);
Edge edge = edges.get("e4");
EdgeManagerPlugin em = edge.getEdgeManager();
EdgeManagerForTest originalEm = (EdgeManagerForTest) em;
assertTrue(Arrays.equals(edgePayload, originalEm.getEdgeManagerContext().getUserPayload().deepCopyAsArray()));
UserPayload userPayload = UserPayload.create(ByteBuffer.wrap(new String("foo").getBytes()));
EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(EdgeManagerForTest.class.getName());
edgeManagerDescriptor.setUserPayload(userPayload);
EdgeProperty edgeProp = EdgeProperty.create(edgeManagerDescriptor, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In"));
Vertex v3 = vertices.get("vertex3");
Map<String, EdgeProperty> edgeManagerDescriptors = Collections.singletonMap(v3.getName(), edgeProp);
v5.reconfigureVertex(v5.getTotalTasks() - 1, null, edgeManagerDescriptors);
v5.doneReconfiguringVertex();
VertexImpl v5Impl = (VertexImpl) v5;
EdgeManagerPlugin modifiedEdgeManager = v5Impl.sourceVertices.get(v3).getEdgeManager();
Assert.assertNotNull(modifiedEdgeManager);
assertTrue(modifiedEdgeManager instanceof EdgeManagerForTest);
// Ensure initialize() is called with the correct payload
assertTrue(Arrays.equals(userPayload.deepCopyAsArray(), ((EdgeManagerForTest) modifiedEdgeManager).getUserPayload().deepCopyAsArray()));
}
use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.
the class TestFairShuffleVertexManager method testFairSchedulingWithPartitionStats.
@Test(timeout = 5000)
public void testFairSchedulingWithPartitionStats() throws Exception {
final int numScatherAndGatherSourceTasks = 300;
final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
long[] partitionStats = new long[] { (MB), (2 * MB), (5 * MB) };
testSchedulingWithPartitionStats(FairRoutingType.FAIR_PARALLELISM, numScatherAndGatherSourceTasks, partitionStats, 2, 3, 2, newEdgeManagers);
// Get the first edgeManager which is SCATTER_GATHER.
EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
// The first destination task fetches two partitions from all source tasks.
// Thus the # of inputs == # of source tasks * 2 merged partitions
Assert.assertEquals(numScatherAndGatherSourceTasks * 2, edgeManager.getNumDestinationTaskPhysicalInputs(0));
for (int sourceTaskIndex = 0; sourceTaskIndex < numScatherAndGatherSourceTasks; sourceTaskIndex++) {
for (int j = 0; j < 2; j++) {
if (j == 0) {
EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(sourceTaskIndex, 0);
Assert.assertEquals(2, routeMetadata.getCount());
Assert.assertEquals(0, routeMetadata.getSource());
Assert.assertEquals(sourceTaskIndex * 2, routeMetadata.getTarget());
} else {
EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(sourceTaskIndex, 0);
Assert.assertEquals(2, routeMetadata.getNumEvents());
Assert.assertArrayEquals(new int[] { 0 + sourceTaskIndex * 2, 1 + sourceTaskIndex * 2 }, routeMetadata.getTargetIndices());
}
}
}
// The 2nd destination task fetches one partition from the first half of
// source tasks.
Assert.assertEquals(numScatherAndGatherSourceTasks / 2, edgeManager.getNumDestinationTaskPhysicalInputs(1));
for (int j = 0; j < 2; j++) {
if (j == 0) {
EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(0, 1);
Assert.assertEquals(1, routeMetadata.getCount());
Assert.assertEquals(2, routeMetadata.getSource());
Assert.assertEquals(0, routeMetadata.getTarget());
} else {
EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(0, 1);
Assert.assertEquals(1, routeMetadata.getNumEvents());
Assert.assertEquals(0, routeMetadata.getTargetIndices()[0]);
}
}
// The 3rd destination task fetches one partition from 2nd half of
// source tasks.
Assert.assertEquals(numScatherAndGatherSourceTasks / 2, edgeManager.getNumDestinationTaskPhysicalInputs(2));
for (int sourceTaskIndex = numScatherAndGatherSourceTasks / 2; sourceTaskIndex < numScatherAndGatherSourceTasks; sourceTaskIndex++) {
for (int j = 0; j < 2; j++) {
if (j == 0) {
EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(sourceTaskIndex, 2);
Assert.assertEquals(1, routeMetadata.getCount());
Assert.assertEquals(2, routeMetadata.getSource());
Assert.assertEquals(sourceTaskIndex - numScatherAndGatherSourceTasks / 2, routeMetadata.getTarget());
} else {
EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(sourceTaskIndex, 2);
Assert.assertEquals(1, routeMetadata.getNumEvents());
Assert.assertEquals(sourceTaskIndex - numScatherAndGatherSourceTasks / 2, routeMetadata.getTargetIndices()[0]);
}
}
}
}
use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.
the class TestFairShuffleVertexManager method testOverflow.
@Test(timeout = 500000)
public void testOverflow() throws Exception {
final int numScatherAndGatherSourceTasks = 30000;
final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
final int firstPartitionSize = 1;
final int secondPartitionSize = 2;
final int thirdPartitionSize = 500;
long[] partitionStats = new long[] { (firstPartitionSize * MB), (secondPartitionSize * MB), (thirdPartitionSize * MB) };
final int expectedDestinationTasks = (firstPartitionSize + secondPartitionSize + thirdPartitionSize) * numScatherAndGatherSourceTasks / 1000;
testSchedulingWithPartitionStats(FairRoutingType.FAIR_PARALLELISM, numScatherAndGatherSourceTasks, partitionStats, 1000, expectedDestinationTasks, 3, newEdgeManagers);
}
Aggregations