Search in sources :

Example 1 with EdgeManagerPlugin

use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.

the class TestFairShuffleVertexManager method testReduceSchedulingWithPartitionStats.

@Test(timeout = 5000)
public void testReduceSchedulingWithPartitionStats() throws Exception {
    final int numScatherAndGatherSourceTasks = 300;
    final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
    long[] partitionStats = new long[] { (MB), (2 * MB), (5 * MB) };
    testSchedulingWithPartitionStats(FairRoutingType.REDUCE_PARALLELISM, numScatherAndGatherSourceTasks, partitionStats, 2, 2, 2, newEdgeManagers);
    EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
    // The first destination task fetches two partitions from all source tasks.
    // Thus the # of inputs == # of source tasks * 2 merged partitions
    Assert.assertEquals(numScatherAndGatherSourceTasks * 2, edgeManager.getNumDestinationTaskPhysicalInputs(0));
    for (int sourceTaskIndex = 0; sourceTaskIndex < numScatherAndGatherSourceTasks; sourceTaskIndex++) {
        for (int j = 0; j < 2; j++) {
            if (j == 0) {
                EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(sourceTaskIndex, 0);
                Assert.assertEquals(2, routeMetadata.getCount());
                Assert.assertEquals(0, routeMetadata.getSource());
                Assert.assertEquals(sourceTaskIndex * 2, routeMetadata.getTarget());
            } else {
                EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(sourceTaskIndex, 0);
                Assert.assertEquals(2, routeMetadata.getNumEvents());
                Assert.assertArrayEquals(new int[] { 0 + sourceTaskIndex * 2, 1 + sourceTaskIndex * 2 }, routeMetadata.getTargetIndices());
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) EdgeManagerPluginOnDemand(org.apache.tez.dag.api.EdgeManagerPluginOnDemand) EdgeManagerPlugin(org.apache.tez.dag.api.EdgeManagerPlugin) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) Test(org.junit.Test)

Example 2 with EdgeManagerPlugin

use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.

the class TestShuffleVertexManagerBase method testAutoParallelism.

@Test(timeout = 5000)
public void testAutoParallelism() throws Exception {
    Configuration conf = new Configuration();
    ShuffleVertexManagerBase manager;
    final String mockSrcVertexId1 = "Vertex1";
    final String mockSrcVertexId2 = "Vertex2";
    final String mockSrcVertexId3 = "Vertex3";
    final String mockManagedVertexId = "Vertex4";
    final List<Integer> scheduledTasks = Lists.newLinkedList();
    final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
    final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, newEdgeManagers);
    // parallelism changed due to small data size
    manager = createManager(conf, mockContext, 0.5f, 0.5f);
    manager.onVertexStarted(emptyCompletions);
    manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
    manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED));
    manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED));
    // no tasks scheduled
    Assert.assertEquals(4, manager.pendingTasks.size());
    Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
    // task completion from non-bipartite stage does nothing
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId3, 0));
    // no tasks scheduled
    Assert.assertEquals(4, manager.pendingTasks.size());
    Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
    Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
    VertexManagerEvent vmEvent = getVertexManagerEvent(null, 50 * MB, mockSrcVertexId1);
    manager.onVertexManagerEventReceived(vmEvent);
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
    Assert.assertEquals(4, manager.pendingTasks.size());
    // no tasks scheduled
    Assert.assertEquals(0, scheduledTasks.size());
    Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
    Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
    Assert.assertEquals(50 * MB, manager.completedSourceTasksOutputSize);
    // ignore duplicate completion
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
    Assert.assertEquals(4, manager.pendingTasks.size());
    // no tasks scheduled
    Assert.assertEquals(0, scheduledTasks.size());
    Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
    Assert.assertEquals(50 * MB, manager.completedSourceTasksOutputSize);
    vmEvent = getVertexManagerEvent(null, 50 * MB, mockSrcVertexId2);
    manager.onVertexManagerEventReceived(vmEvent);
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 1));
    // managedVertex tasks reduced
    verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
    verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap());
    Assert.assertEquals(2, newEdgeManagers.size());
    // TODO improve tests for parallelism
    // all tasks scheduled
    Assert.assertEquals(0, manager.pendingTasks.size());
    Assert.assertEquals(2, scheduledTasks.size());
    Assert.assertTrue(scheduledTasks.contains(new Integer(0)));
    Assert.assertTrue(scheduledTasks.contains(new Integer(1)));
    Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted);
    Assert.assertEquals(2, manager.numVertexManagerEventsReceived);
    Assert.assertEquals(100 * MB, manager.completedSourceTasksOutputSize);
    // more completions dont cause recalculation of parallelism
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0));
    verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
    Assert.assertEquals(2, newEdgeManagers.size());
    EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
    // 4 source task outputs - same as original number of partitions
    Assert.assertEquals(4, edgeManager.getNumSourceTaskPhysicalOutputs(0));
    // 4 destination task inputs - 2 source tasks * 2 merged partitions
    Assert.assertEquals(4, edgeManager.getNumDestinationTaskPhysicalInputs(0));
    EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeDataMovementEventToDestination(1, 1, 0);
    Assert.assertEquals(1, routeMetadata.getNumEvents());
    Assert.assertEquals(3, routeMetadata.getTargetIndices()[0]);
    routeMetadata = edgeManager.routeDataMovementEventToDestination(0, 2, 1);
    Assert.assertEquals(1, routeMetadata.getNumEvents());
    Assert.assertEquals(0, routeMetadata.getTargetIndices()[0]);
    routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(1, 0);
    Assert.assertEquals(2, routeMetadata.getNumEvents());
    Assert.assertEquals(2, routeMetadata.getTargetIndices()[0]);
    Assert.assertEquals(3, routeMetadata.getTargetIndices()[1]);
    routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(1, 1);
    Assert.assertEquals(2, routeMetadata.getNumEvents());
    Assert.assertEquals(2, routeMetadata.getTargetIndices()[0]);
    Assert.assertEquals(3, routeMetadata.getTargetIndices()[1]);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) HashMap(java.util.HashMap) EdgeManagerPluginOnDemand(org.apache.tez.dag.api.EdgeManagerPluginOnDemand) EdgeManagerPlugin(org.apache.tez.dag.api.EdgeManagerPlugin) VertexStateUpdate(org.apache.tez.dag.api.event.VertexStateUpdate) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) Test(org.junit.Test)

Example 3 with EdgeManagerPlugin

use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.

the class TestVertexImpl method testSetCustomEdgeManager.

@Test(timeout = 5000)
public void testSetCustomEdgeManager() throws Exception {
    // Vertex5 linked to v3 (v3 src, v5 dest)
    VertexImpl v5 = vertices.get("vertex5");
    v5.vertexReconfigurationPlanned();
    initAllVertices(VertexState.INITED);
    Edge edge = edges.get("e4");
    EdgeManagerPlugin em = edge.getEdgeManager();
    EdgeManagerForTest originalEm = (EdgeManagerForTest) em;
    assertTrue(Arrays.equals(edgePayload, originalEm.getEdgeManagerContext().getUserPayload().deepCopyAsArray()));
    UserPayload userPayload = UserPayload.create(ByteBuffer.wrap(new String("foo").getBytes()));
    EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(EdgeManagerForTest.class.getName());
    edgeManagerDescriptor.setUserPayload(userPayload);
    EdgeProperty edgeProp = EdgeProperty.create(edgeManagerDescriptor, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In"));
    Vertex v3 = vertices.get("vertex3");
    Map<String, EdgeProperty> edgeManagerDescriptors = Collections.singletonMap(v3.getName(), edgeProp);
    v5.reconfigureVertex(v5.getTotalTasks() - 1, null, edgeManagerDescriptors);
    v5.doneReconfiguringVertex();
    VertexImpl v5Impl = (VertexImpl) v5;
    EdgeManagerPlugin modifiedEdgeManager = v5Impl.sourceVertices.get(v3).getEdgeManager();
    Assert.assertNotNull(modifiedEdgeManager);
    assertTrue(modifiedEdgeManager instanceof EdgeManagerForTest);
    // Ensure initialize() is called with the correct payload
    assertTrue(Arrays.equals(userPayload.deepCopyAsArray(), ((EdgeManagerForTest) modifiedEdgeManager).getUserPayload().deepCopyAsArray()));
}
Also used : Vertex(org.apache.tez.dag.app.dag.Vertex) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) UserPayload(org.apache.tez.dag.api.UserPayload) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) ByteString(com.google.protobuf.ByteString) EdgeManagerPlugin(org.apache.tez.dag.api.EdgeManagerPlugin) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest) VertexManagerPluginForTest(org.apache.tez.test.VertexManagerPluginForTest) Test(org.junit.Test) GraceShuffleVertexManagerForTest(org.apache.tez.test.GraceShuffleVertexManagerForTest) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest)

Example 4 with EdgeManagerPlugin

use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.

the class TestFairShuffleVertexManager method testFairSchedulingWithPartitionStats.

@Test(timeout = 5000)
public void testFairSchedulingWithPartitionStats() throws Exception {
    final int numScatherAndGatherSourceTasks = 300;
    final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
    long[] partitionStats = new long[] { (MB), (2 * MB), (5 * MB) };
    testSchedulingWithPartitionStats(FairRoutingType.FAIR_PARALLELISM, numScatherAndGatherSourceTasks, partitionStats, 2, 3, 2, newEdgeManagers);
    // Get the first edgeManager which is SCATTER_GATHER.
    EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
    // The first destination task fetches two partitions from all source tasks.
    // Thus the # of inputs == # of source tasks * 2 merged partitions
    Assert.assertEquals(numScatherAndGatherSourceTasks * 2, edgeManager.getNumDestinationTaskPhysicalInputs(0));
    for (int sourceTaskIndex = 0; sourceTaskIndex < numScatherAndGatherSourceTasks; sourceTaskIndex++) {
        for (int j = 0; j < 2; j++) {
            if (j == 0) {
                EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(sourceTaskIndex, 0);
                Assert.assertEquals(2, routeMetadata.getCount());
                Assert.assertEquals(0, routeMetadata.getSource());
                Assert.assertEquals(sourceTaskIndex * 2, routeMetadata.getTarget());
            } else {
                EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(sourceTaskIndex, 0);
                Assert.assertEquals(2, routeMetadata.getNumEvents());
                Assert.assertArrayEquals(new int[] { 0 + sourceTaskIndex * 2, 1 + sourceTaskIndex * 2 }, routeMetadata.getTargetIndices());
            }
        }
    }
    // The 2nd destination task fetches one partition from the first half of
    // source tasks.
    Assert.assertEquals(numScatherAndGatherSourceTasks / 2, edgeManager.getNumDestinationTaskPhysicalInputs(1));
    for (int j = 0; j < 2; j++) {
        if (j == 0) {
            EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(0, 1);
            Assert.assertEquals(1, routeMetadata.getCount());
            Assert.assertEquals(2, routeMetadata.getSource());
            Assert.assertEquals(0, routeMetadata.getTarget());
        } else {
            EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(0, 1);
            Assert.assertEquals(1, routeMetadata.getNumEvents());
            Assert.assertEquals(0, routeMetadata.getTargetIndices()[0]);
        }
    }
    // The 3rd destination task fetches one partition from 2nd half of
    // source tasks.
    Assert.assertEquals(numScatherAndGatherSourceTasks / 2, edgeManager.getNumDestinationTaskPhysicalInputs(2));
    for (int sourceTaskIndex = numScatherAndGatherSourceTasks / 2; sourceTaskIndex < numScatherAndGatherSourceTasks; sourceTaskIndex++) {
        for (int j = 0; j < 2; j++) {
            if (j == 0) {
                EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(sourceTaskIndex, 2);
                Assert.assertEquals(1, routeMetadata.getCount());
                Assert.assertEquals(2, routeMetadata.getSource());
                Assert.assertEquals(sourceTaskIndex - numScatherAndGatherSourceTasks / 2, routeMetadata.getTarget());
            } else {
                EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(sourceTaskIndex, 2);
                Assert.assertEquals(1, routeMetadata.getNumEvents());
                Assert.assertEquals(sourceTaskIndex - numScatherAndGatherSourceTasks / 2, routeMetadata.getTargetIndices()[0]);
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) EdgeManagerPluginOnDemand(org.apache.tez.dag.api.EdgeManagerPluginOnDemand) EdgeManagerPlugin(org.apache.tez.dag.api.EdgeManagerPlugin) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) Test(org.junit.Test)

Example 5 with EdgeManagerPlugin

use of org.apache.tez.dag.api.EdgeManagerPlugin in project tez by apache.

the class TestFairShuffleVertexManager method testOverflow.

@Test(timeout = 500000)
public void testOverflow() throws Exception {
    final int numScatherAndGatherSourceTasks = 30000;
    final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
    final int firstPartitionSize = 1;
    final int secondPartitionSize = 2;
    final int thirdPartitionSize = 500;
    long[] partitionStats = new long[] { (firstPartitionSize * MB), (secondPartitionSize * MB), (thirdPartitionSize * MB) };
    final int expectedDestinationTasks = (firstPartitionSize + secondPartitionSize + thirdPartitionSize) * numScatherAndGatherSourceTasks / 1000;
    testSchedulingWithPartitionStats(FairRoutingType.FAIR_PARALLELISM, numScatherAndGatherSourceTasks, partitionStats, 1000, expectedDestinationTasks, 3, newEdgeManagers);
}
Also used : HashMap(java.util.HashMap) EdgeManagerPlugin(org.apache.tez.dag.api.EdgeManagerPlugin) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) Test(org.junit.Test)

Aggregations

EdgeManagerPlugin (org.apache.tez.dag.api.EdgeManagerPlugin)5 Test (org.junit.Test)5 HashMap (java.util.HashMap)4 VertexLocationHint (org.apache.tez.dag.api.VertexLocationHint)4 EdgeManagerPluginOnDemand (org.apache.tez.dag.api.EdgeManagerPluginOnDemand)3 ByteString (com.google.protobuf.ByteString)1 Configuration (org.apache.hadoop.conf.Configuration)1 EdgeManagerPluginDescriptor (org.apache.tez.dag.api.EdgeManagerPluginDescriptor)1 EdgeProperty (org.apache.tez.dag.api.EdgeProperty)1 UserPayload (org.apache.tez.dag.api.UserPayload)1 VertexManagerPluginContext (org.apache.tez.dag.api.VertexManagerPluginContext)1 VertexStateUpdate (org.apache.tez.dag.api.event.VertexStateUpdate)1 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)1 Vertex (org.apache.tez.dag.app.dag.Vertex)1 VertexManagerEvent (org.apache.tez.runtime.api.events.VertexManagerEvent)1 EdgeManagerForTest (org.apache.tez.test.EdgeManagerForTest)1 GraceShuffleVertexManagerForTest (org.apache.tez.test.GraceShuffleVertexManagerForTest)1 VertexManagerPluginForTest (org.apache.tez.test.VertexManagerPluginForTest)1