Search in sources :

Example 6 with ScheduleTaskRequest

use of org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest in project tez by apache.

the class FairShuffleVertexManager method getTasksToSchedule.

@Override
List<ScheduleTaskRequest> getTasksToSchedule(TaskAttemptIdentifier completedSourceAttempt) {
    float minSourceVertexCompletedTaskFraction = getMinSourceVertexCompletedTaskFraction();
    int numTasksToSchedule = getNumOfTasksToScheduleAndLog(minSourceVertexCompletedTaskFraction);
    if (numTasksToSchedule > 0) {
        boolean scheduleAll = (numTasksToSchedule == pendingTasks.size());
        List<ScheduleTaskRequest> tasksToSchedule = Lists.newArrayListWithCapacity(numTasksToSchedule);
        Iterator<PendingTaskInfo> it = pendingTasks.iterator();
        FairSourceVertexInfo srcInfo = null;
        int srcTaskId = 0;
        if (completedSourceAttempt != null) {
            srcTaskId = completedSourceAttempt.getTaskIdentifier().getIdentifier();
            String srcVertexName = completedSourceAttempt.getTaskIdentifier().getVertexIdentifier().getName();
            srcInfo = (FairSourceVertexInfo) getSourceVertexInfo(srcVertexName);
        }
        while (it.hasNext() && numTasksToSchedule > 0) {
            Integer taskIndex = it.next().getIndex();
            // is skipped.
            if (!scheduleAll && config.isAutoParallelismEnabled() && srcInfo != null && srcInfo.getDestinationInputsProperties().size() > 0) {
                DestinationTaskInputsProperty property = srcInfo.getDestinationInputsProperties().get(taskIndex);
                if (!property.isSourceTaskInRange(srcTaskId)) {
                    LOG.debug("completedSourceTaskIndex {} and taskIndex {} don't " + "connect.", srcTaskId, taskIndex);
                    continue;
                }
            }
            tasksToSchedule.add(ScheduleTaskRequest.create(taskIndex, null));
            it.remove();
            numTasksToSchedule--;
        }
        return tasksToSchedule;
    }
    return null;
}
Also used : BigInteger(java.math.BigInteger) ScheduleTaskRequest(org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest)

Example 7 with ScheduleTaskRequest

use of org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest in project tez by apache.

the class FairCartesianProductVertexManager method scheduleTasksDependOnCompletion.

private void scheduleTasksDependOnCompletion(TaskAttemptIdentifier attempt) {
    if (parallelism == 0) {
        return;
    }
    int taskId = attempt.getTaskIdentifier().getIdentifier();
    String vertex = attempt.getTaskIdentifier().getVertexIdentifier().getName();
    SrcVertex srcV = srcVerticesByName.get(vertex);
    Source src = srcV.source;
    List<ScheduleTaskRequest> requests = new ArrayList<>();
    CartesianProductCombination combination = new CartesianProductCombination(numChunksPerSrc, src.position);
    grouper.init(srcV.numTask * numPartitions, src.numChunk);
    int firstRelevantChunk = grouper.getGroupId(taskId * numPartitions);
    int lastRelevantChunk = grouper.getGroupId(taskId * numPartitions + numPartitions - 1);
    for (int chunkId = firstRelevantChunk; chunkId <= lastRelevantChunk; chunkId++) {
        combination.firstTaskWithFixedChunk(chunkId);
        do {
            List<Integer> list = combination.getCombination();
            if (scheduledTasks.contains(combination.getTaskId())) {
                continue;
            }
            // a task is ready for schedule only if all its src chunk has been completed
            boolean readyToSchedule = src.isChunkCompleted(list.get(src.position));
            for (int srcId = 0; readyToSchedule && srcId < list.size(); srcId++) {
                if (srcId != src.position) {
                    readyToSchedule = sourcesByName.get(sourceList.get(srcId)).isChunkCompleted(list.get(srcId));
                }
            }
            if (readyToSchedule) {
                requests.add(ScheduleTaskRequest.create(combination.getTaskId(), null));
                scheduledTasks.add(combination.getTaskId());
            }
        } while (combination.nextTaskWithFixedChunk());
    }
    if (!requests.isEmpty()) {
        getContext().scheduleTasks(requests);
    }
}
Also used : ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) ScheduleTaskRequest(org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest)

Example 8 with ScheduleTaskRequest

use of org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest in project tez by apache.

the class TestVertexImpl method testVertexGetTAAttemptsObsoletion.

@Test(timeout = 5000)
public void testVertexGetTAAttemptsObsoletion() throws Exception {
    initAllVertices(VertexState.INITED);
    VertexImpl v1 = vertices.get("vertex1");
    startVertex(v1);
    VertexImpl v2 = vertices.get("vertex2");
    startVertex(v2);
    VertexImpl v3 = vertices.get("vertex3");
    VertexImpl v4 = vertices.get("vertex4");
    List<ScheduleTaskRequest> taskList = new LinkedList<VertexManagerPluginContext.ScheduleTaskRequest>();
    // scheduling start to trigger edge routing to begin
    for (int i = 0; i < v4.getTotalTasks(); ++i) {
        taskList.add(ScheduleTaskRequest.create(i, null));
    }
    v4.scheduleTasks(taskList);
    Assert.assertEquals(VertexState.RUNNING, v4.getState());
    Assert.assertEquals(1, v4.sourceVertices.size());
    Edge e = v4.sourceVertices.get(v3);
    TezTaskAttemptID v3TaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(v3.getVertexId(), 0), 0);
    TezTaskAttemptID v4TaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(v4.getVertexId(), 0), 0);
    for (int i = 0; i < 11; ++i) {
        v4.handle(new VertexEventRouteEvent(v4.getVertexId(), Collections.singletonList(new TezEvent(DataMovementEvent.create(0, null), new EventMetaData(EventProducerConsumerType.OUTPUT, v3.getName(), v3.getName(), v3TaId)))));
    }
    dispatcher.await();
    // verify all events have been are in taskEvents
    Assert.assertEquals(11, v4.getOnDemandRouteEvents().size());
    TaskAttemptEventInfo eventInfo;
    EdgeManagerPluginOnDemand mockPlugin = mock(EdgeManagerPluginOnDemand.class);
    EventRouteMetadata mockRoute = EventRouteMetadata.create(1, new int[] { 0 });
    e.edgeManager = mockPlugin;
    when(mockPlugin.routeInputSourceTaskFailedEventToDestination(anyInt(), anyInt())).thenReturn(mockRoute);
    when(mockPlugin.routeDataMovementEventToDestination(anyInt(), anyInt(), anyInt())).thenReturn(mockRoute);
    // send an input failed event
    v4.handle(new VertexEventRouteEvent(v4.getVertexId(), Collections.singletonList(new TezEvent(InputFailedEvent.create(0, 0), new EventMetaData(EventProducerConsumerType.OUTPUT, v3.getName(), v3.getName(), v3TaId)))));
    // ask for events with sufficient buffer. get only input failed event. all DM events obsoleted
    int fromEventId = 0;
    eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 0, 100);
    fromEventId = eventInfo.getNextFromEventId();
    Assert.assertEquals(12, fromEventId);
    Assert.assertEquals(1, eventInfo.getEvents().size());
    Assert.assertEquals(EventType.INPUT_FAILED_EVENT, eventInfo.getEvents().get(0).getEventType());
    // Let failed task send more event
    for (int i = 11; i < 14; ++i) {
        v4.handle(new VertexEventRouteEvent(v4.getVertexId(), Collections.singletonList(new TezEvent(DataMovementEvent.create(0, null), new EventMetaData(EventProducerConsumerType.OUTPUT, v3.getName(), v3.getName(), v3TaId)))));
    }
    dispatcher.await();
    // 11 events + 1 INPUT_FAILED_EVENT.
    // Events sent out later by failed tasks should not be available.
    Assert.assertEquals(12, v4.getOnDemandRouteEvents().size());
    fromEventId = 0;
    eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 0, 100);
    Assert.assertEquals(EventType.INPUT_FAILED_EVENT, eventInfo.getEvents().get(0).getEventType());
}
Also used : VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) EdgeManagerPluginOnDemand(org.apache.tez.dag.api.EdgeManagerPluginOnDemand) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) LinkedList(java.util.LinkedList) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) PlanTaskLocationHint(org.apache.tez.dag.api.records.DAGProtos.PlanTaskLocationHint) ScheduleTaskRequest(org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) EventRouteMetadata(org.apache.tez.dag.api.EdgeManagerPluginOnDemand.EventRouteMetadata) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) VertexManagerPluginForTest(org.apache.tez.test.VertexManagerPluginForTest) Test(org.junit.Test) GraceShuffleVertexManagerForTest(org.apache.tez.test.GraceShuffleVertexManagerForTest) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest)

Example 9 with ScheduleTaskRequest

use of org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest in project tez by apache.

the class InputReadyVertexManager method handleSourceTaskFinished.

void handleSourceTaskFinished(String vertex, Integer taskId) {
    SourceVertexInfo srcInfo = srcVertexInfo.get(vertex);
    if (srcInfo.taskIsFinished[taskId.intValue()] == null) {
        // not a duplicate completion
        srcInfo.taskIsFinished[taskId.intValue()] = Boolean.valueOf(true);
        srcInfo.numFinishedTasks++;
        if (srcInfo.edgeProperty.getDataMovementType() == DataMovementType.ONE_TO_ONE) {
            oneToOneSrcTasksDoneCount[taskId.intValue()]++;
            // keep the latest container that completed as the location hint
            // After there is standard data size info available then use it
            oneToOneLocationHints[taskId.intValue()] = TaskLocationHint.createTaskLocationHint(vertex, taskId);
        }
    }
    // Check if current source task's vertex is completed.
    if (srcInfo.edgeProperty.getDataMovementType() != DataMovementType.ONE_TO_ONE && srcInfo.numTasks != srcInfo.numFinishedTasks) {
        // we depend on all tasks to finish. So nothing to do now.
        return;
    }
    // currently finished vertex task may trigger us to schedule
    for (SourceVertexInfo vInfo : srcVertexInfo.values()) {
        if (vInfo.edgeProperty.getDataMovementType() != DataMovementType.ONE_TO_ONE) {
            // we depend on all tasks to finish.
            if (vInfo.numTasks != vInfo.numFinishedTasks) {
                // we depend on all tasks to finish. So nothing to do now.
                return;
            }
        }
    }
    // all source vertices will full dependencies are done
    List<ScheduleTaskRequest> tasksToStart = null;
    if (numOneToOneEdges == 0) {
        // no 1-1 dependency. Start all tasks
        int numTasks = taskIsStarted.length;
        LOG.info("Starting all " + numTasks + "tasks for vertex: " + getContext().getVertexName());
        tasksToStart = Lists.newArrayListWithCapacity(numTasks);
        for (int i = 0; i < numTasks; ++i) {
            taskIsStarted[i] = true;
            tasksToStart.add(ScheduleTaskRequest.create(i, null));
        }
    } else {
        // start only the ready 1-1 tasks
        tasksToStart = Lists.newLinkedList();
        for (int i = 0; i < taskIsStarted.length; ++i) {
            if (!taskIsStarted[i] && oneToOneSrcTasksDoneCount[i] == numOneToOneEdges) {
                taskIsStarted[i] = true;
                TaskLocationHint locationHint = null;
                if (oneToOneLocationHints[i] != null) {
                    locationHint = oneToOneLocationHints[i];
                }
                LOG.info("Starting task " + i + " for vertex: " + getContext().getVertexName() + " with location: " + ((locationHint != null) ? locationHint.getAffinitizedTask() : "null"));
                tasksToStart.add(ScheduleTaskRequest.create(Integer.valueOf(i), locationHint));
            }
        }
    }
    if (tasksToStart != null && !tasksToStart.isEmpty()) {
        getContext().scheduleTasks(tasksToStart);
    }
}
Also used : TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) ScheduleTaskRequest(org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint)

Example 10 with ScheduleTaskRequest

use of org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest in project tez by apache.

the class ShuffleVertexManager method getTasksToSchedule.

/**
 * Get the list of tasks to schedule based on the overall progress.
 * Parameter completedSourceAttempt is part of the base class used by other
 * VertexManagerPlugins; it isn't used here.
 */
@Override
List<ScheduleTaskRequest> getTasksToSchedule(TaskAttemptIdentifier completedSourceAttempt) {
    float minSourceVertexCompletedTaskFraction = getMinSourceVertexCompletedTaskFraction();
    int numTasksToSchedule = getNumOfTasksToScheduleAndLog(minSourceVertexCompletedTaskFraction);
    if (numTasksToSchedule > 0) {
        List<ScheduleTaskRequest> tasksToSchedule = Lists.newArrayListWithCapacity(numTasksToSchedule);
        while (!pendingTasks.isEmpty() && numTasksToSchedule > 0) {
            numTasksToSchedule--;
            Integer taskIndex = pendingTasks.get(0).getIndex();
            tasksToSchedule.add(ScheduleTaskRequest.create(taskIndex, null));
            pendingTasks.remove(0);
        }
        return tasksToSchedule;
    }
    return null;
}
Also used : BigInteger(java.math.BigInteger) ScheduleTaskRequest(org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest)

Aggregations

ScheduleTaskRequest (org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest)13 TaskLocationHint (org.apache.tez.dag.api.TaskLocationHint)6 VertexLocationHint (org.apache.tez.dag.api.VertexLocationHint)6 Test (org.junit.Test)6 VertexManagerPluginContext (org.apache.tez.dag.api.VertexManagerPluginContext)5 LinkedList (java.util.LinkedList)4 PlanTaskLocationHint (org.apache.tez.dag.api.records.DAGProtos.PlanTaskLocationHint)4 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)4 VertexEventRouteEvent (org.apache.tez.dag.app.dag.event.VertexEventRouteEvent)4 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)4 EventMetaData (org.apache.tez.runtime.api.impl.EventMetaData)4 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)4 EdgeManagerForTest (org.apache.tez.test.EdgeManagerForTest)4 GraceShuffleVertexManagerForTest (org.apache.tez.test.GraceShuffleVertexManagerForTest)4 VertexManagerPluginForTest (org.apache.tez.test.VertexManagerPluginForTest)4 EdgeManagerPluginOnDemand (org.apache.tez.dag.api.EdgeManagerPluginOnDemand)3 EventRouteMetadata (org.apache.tez.dag.api.EdgeManagerPluginOnDemand.EventRouteMetadata)3 VertexStateUpdate (org.apache.tez.dag.api.event.VertexStateUpdate)3 TaskAttemptEventInfo (org.apache.tez.dag.app.TaskAttemptEventInfo)3 BigInteger (java.math.BigInteger)2