use of org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted in project tez by apache.
the class VertexImpl method checkTasksForCompletion.
// triggered by task_complete
static VertexState checkTasksForCompletion(final VertexImpl vertex) {
// this log helps quickly count the completion count for a vertex.
// grepping and counting for attempts and handling re-tries is time consuming
LOG.info("Task Completion: " + constructCheckTasksForCompletionLog(vertex));
// check for vertex failure first
if (vertex.completedTaskCount > vertex.tasks.size()) {
LOG.error("task completion accounting issue: completedTaskCount > nTasks:" + constructCheckTasksForCompletionLog(vertex));
}
if (vertex.completedTaskCount == vertex.tasks.size()) {
// finished - gather stats
vertex.finalStatistics = vertex.constructStatistics();
// Only succeed if tasks complete successfully and no terminationCause is registered or if failures are below configured threshold.
boolean vertexSucceeded = vertex.succeededTaskCount == vertex.numTasks;
boolean vertexFailuresBelowThreshold = (vertex.succeededTaskCount + vertex.failedTaskCount == vertex.numTasks) && (vertex.failedTaskCount * 100 <= vertex.maxFailuresPercent * vertex.numTasks);
if ((vertexSucceeded || vertexFailuresBelowThreshold) && vertex.terminationCause == null) {
if (vertexSucceeded) {
LOG.info("All tasks have succeeded, vertex:" + vertex.logIdentifier);
} else {
LOG.info("All tasks in the vertex " + vertex.logIdentifier + " have completed and the percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
vertex.addDiagnostic("Vertex succeeded as percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
vertex.logSuccessDiagnostics = true;
for (Task task : vertex.tasks.values()) {
if (!task.getState().equals(TaskState.FAILED)) {
continue;
}
// Find the last attempt and mark that as successful
Iterator<TezTaskAttemptID> attempts = task.getAttempts().keySet().iterator();
TezTaskAttemptID lastAttempt = null;
while (attempts.hasNext()) {
TezTaskAttemptID attempt = attempts.next();
if (lastAttempt == null || attempt.getId() > lastAttempt.getId()) {
lastAttempt = attempt;
}
}
LOG.info("Succeeding failed task attempt:" + lastAttempt);
for (Map.Entry<Vertex, Edge> vertexEdge : vertex.targetVertices.entrySet()) {
Vertex destVertex = vertexEdge.getKey();
Edge edge = vertexEdge.getValue();
try {
List<TezEvent> tezEvents = edge.generateEmptyEventsForAttempt(lastAttempt);
// Downstream vertices need to receive a SUCCEEDED completion event for each failed task to ensure num bipartite count is correct
VertexEventTaskAttemptCompleted completionEvent = new VertexEventTaskAttemptCompleted(lastAttempt, TaskAttemptStateInternal.SUCCEEDED);
// Notify all target vertices
vertex.eventHandler.handle(new VertexEventSourceTaskAttemptCompleted(destVertex.getVertexId(), completionEvent));
vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex.getVertexId(), tezEvents));
} catch (Exception e) {
throw new TezUncheckedException(e);
}
}
}
}
if (vertex.commitVertexOutputs && !vertex.committed.getAndSet(true)) {
// start commit if there're commits or just finish if no commits
return commitOrFinish(vertex);
} else {
// just finish because no vertex committing needed
return vertex.finished(VertexState.SUCCEEDED);
}
}
return finishWithTerminationCause(vertex);
}
// return the current state, Vertex not finished yet
return vertex.getInternalState();
}
use of org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted in project tez by apache.
the class TestVertexImpl method testVertexWithInitializerParallelismSetTo0.
@Test(timeout = 10000)
public void testVertexWithInitializerParallelismSetTo0() throws InterruptedException, TezException {
useCustomInitializer = true;
customInitializer = new RootInitializerSettingParallelismTo0(null);
RootInitializerSettingParallelismTo0 initializer = (RootInitializerSettingParallelismTo0) customInitializer;
setupPreDagCreation();
dagPlan = createDAGPlanWithInitializer0Tasks(RootInitializerSettingParallelismTo0.class.getName());
setupPostDagCreation();
VertexImpl v1 = vertices.get("vertex1");
VertexImpl v2 = vertices.get("vertex2");
initVertex(v2);
TezTaskID v2t1 = TezTaskID.getInstance(v2.getVertexId(), 0);
TezTaskAttemptID ta1V2T1 = TezTaskAttemptID.getInstance(v2t1, 0);
TezEvent tezEvent = new TezEvent(DataMovementEvent.create(null), new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex2", "vertex1", ta1V2T1));
List<TezEvent> events = new LinkedList<TezEvent>();
events.add(tezEvent);
v1.handle(new VertexEventRouteEvent(v1.getVertexId(), events));
startVertex(v2);
dispatcher.await();
v2.handle(new VertexEventTaskAttemptCompleted(ta1V2T1, TaskAttemptStateInternal.SUCCEEDED));
v2.handle(new VertexEventTaskCompleted(v2t1, TaskState.SUCCEEDED));
dispatcher.await();
Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
while (v1.getState() == VertexState.INITIALIZING || v1.getState() == VertexState.INITED) {
// initializer thread may not have started, so call initializer.go() in the loop all the time
initializer.go();
Thread.sleep(10);
}
while (v1.getState() != VertexState.SUCCEEDED) {
Thread.sleep(10);
}
Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
}
use of org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted in project tez by apache.
the class TestVertexImpl method testInputInitializerEventNoDirectConnection.
@Test(timeout = 10000)
public void testInputInitializerEventNoDirectConnection() throws Exception {
useCustomInitializer = true;
customInitializer = new EventHandlingRootInputInitializer(null);
EventHandlingRootInputInitializer initializer = (EventHandlingRootInputInitializer) customInitializer;
setupPreDagCreation();
dagPlan = createDAGPlanWithRunningInitializer4();
setupPostDagCreation();
VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1");
VertexImplWithRunningInputInitializer v2 = (VertexImplWithRunningInputInitializer) vertices.get("vertex2");
VertexImplWithRunningInputInitializer v3 = (VertexImplWithRunningInputInitializer) vertices.get("vertex3");
initVertex(v1);
startVertex(v1);
dispatcher.await();
// Vertex1 start should trigger downstream vertices
Assert.assertEquals(VertexState.RUNNING, v1.getState());
Assert.assertEquals(VertexState.RUNNING, v2.getState());
Assert.assertEquals(VertexState.INITIALIZING, v3.getState());
// Genrate events from v1 to v3's InputInitializer
InputInitializerEvent event = InputInitializerEvent.create("vertex3", "input1", null);
// Create taskId and taskAttemptId for the single task that exists in vertex1
TezTaskID t0_v1 = TezTaskID.getInstance(v1.getVertexId(), 0);
TezTaskAttemptID ta0_t0_v1 = TezTaskAttemptID.getInstance(t0_v1, 0);
TezEvent tezEvent = new TezEvent(event, new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex1", "vertex3", ta0_t0_v1));
dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v1.getVertexId(), Collections.singletonList(tezEvent)));
dispatcher.await();
// Events should not be cached in the vertex, since the initializer is running
Assert.assertEquals(0, v3.pendingInitializerEvents.size());
// Events should be cached since the tasks have not succeeded.
// Verify that events are cached
RootInputInitializerManager.InitializerWrapper initializerWrapper = v3.rootInputInitializerManager.getInitializerWrapper("input1");
Assert.assertEquals(1, initializerWrapper.getFirstSuccessfulAttemptMap().size());
Assert.assertEquals(1, initializerWrapper.getPendingEvents().get(v1.getName()).size());
// Get all tasks of vertex1 to succeed.
for (TezTaskID taskId : v1.getTasks().keySet()) {
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
v1.handle(new VertexEventTaskAttemptCompleted(taskAttemptId, TaskAttemptStateInternal.SUCCEEDED));
v1.handle(new VertexEventTaskCompleted(taskId, TaskState.SUCCEEDED));
dispatcher.await();
v1.stateChangeNotifier.taskSucceeded(v1.getName(), taskId, taskAttemptId.getId());
}
dispatcher.await();
// Initializer would have run, and processed events.
while (v3.getState() != VertexState.RUNNING) {
Thread.sleep(10);
}
Assert.assertEquals(VertexState.RUNNING, v3.getState());
Assert.assertEquals(1, initializerWrapper.getFirstSuccessfulAttemptMap().size());
Assert.assertEquals(0, initializerWrapper.getPendingEvents().get(v1.getName()).size());
Assert.assertTrue(initializer.eventReceived.get());
Assert.assertEquals(3, initializer.stateUpdates.size());
Assert.assertEquals(org.apache.tez.dag.api.event.VertexState.CONFIGURED, initializer.stateUpdates.get(0).getVertexState());
Assert.assertEquals(org.apache.tez.dag.api.event.VertexState.RUNNING, initializer.stateUpdates.get(1).getVertexState());
Assert.assertEquals(org.apache.tez.dag.api.event.VertexState.SUCCEEDED, initializer.stateUpdates.get(2).getVertexState());
}
use of org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted in project tez by apache.
the class TestVertexImpl method testInputInitializerEventsMultipleSources.
@Test(timeout = 10000)
public void testInputInitializerEventsMultipleSources() throws Exception {
useCustomInitializer = true;
customInitializer = new EventHandlingRootInputInitializer(null);
EventHandlingRootInputInitializer initializer = (EventHandlingRootInputInitializer) customInitializer;
initializer.setNumExpectedEvents(4);
setupPreDagCreation();
dagPlan = createDAGPlanWithRunningInitializer4();
setupPostDagCreation();
VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1");
VertexImplWithRunningInputInitializer v2 = (VertexImplWithRunningInputInitializer) vertices.get("vertex2");
VertexImplWithRunningInputInitializer v3 = (VertexImplWithRunningInputInitializer) vertices.get("vertex3");
initVertex(v1);
startVertex(v1);
dispatcher.await();
// Vertex1 start should trigger downstream vertices
Assert.assertEquals(VertexState.RUNNING, v1.getState());
Assert.assertEquals(VertexState.RUNNING, v2.getState());
Assert.assertEquals(VertexState.INITIALIZING, v3.getState());
List<ByteBuffer> expectedPayloads = new LinkedList<ByteBuffer>();
// Genrate events from v1 to v3's InputInitializer
ByteBuffer payload = ByteBuffer.allocate(12).putInt(0, 1).putInt(4, 0).putInt(8, 0);
expectedPayloads.add(payload);
InputInitializerEvent event = InputInitializerEvent.create("vertex3", "input1", payload);
// Create taskId and taskAttemptId for the single task that exists in vertex1
TezTaskID t0_v1 = TezTaskID.getInstance(v1.getVertexId(), 0);
TezTaskAttemptID ta0_t0_v1 = TezTaskAttemptID.getInstance(t0_v1, 0);
TezEvent tezEvent = new TezEvent(event, new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex1", "vertex3", ta0_t0_v1));
dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v1.getVertexId(), Collections.singletonList(tezEvent)));
dispatcher.await();
// Events should not be cached in the vertex, since the initializer is running
Assert.assertEquals(0, v3.pendingInitializerEvents.size());
// Events should be cached since the tasks have not succeeded.
// Verify that events are cached
RootInputInitializerManager.InitializerWrapper initializerWrapper = v3.rootInputInitializerManager.getInitializerWrapper("input1");
Assert.assertEquals(1, initializerWrapper.getFirstSuccessfulAttemptMap().size());
Assert.assertEquals(1, initializerWrapper.getPendingEvents().get(v1.getName()).size());
// Get all tasks of vertex1 to succeed.
for (TezTaskID taskId : v1.getTasks().keySet()) {
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
v1.handle(new VertexEventTaskAttemptCompleted(taskAttemptId, TaskAttemptStateInternal.SUCCEEDED));
v1.handle(new VertexEventTaskCompleted(taskId, TaskState.SUCCEEDED));
dispatcher.await();
v1.stateChangeNotifier.taskSucceeded(v1.getName(), taskId, taskAttemptId.getId());
}
dispatcher.await();
Assert.assertEquals(1, initializer.initializerEvents.size());
// Test written based on this
Assert.assertEquals(2, v2.getTotalTasks());
// Generate events from v2 to v3's initializer. 1 from task 0, 2 from task 1
for (Task task : v2.getTasks().values()) {
TezTaskID taskId = task.getTaskId();
TezTaskAttemptID attemptId = TezTaskAttemptID.getInstance(taskId, 0);
int numEventsFromTask = taskId.getId() + 1;
for (int i = 0; i < numEventsFromTask; i++) {
payload = ByteBuffer.allocate(12).putInt(0, 2).putInt(4, taskId.getId()).putInt(8, i);
expectedPayloads.add(payload);
InputInitializerEvent event2 = InputInitializerEvent.create("vertex3", "input1", payload);
TezEvent tezEvent2 = new TezEvent(event2, new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex2", "vertex3", attemptId));
dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Collections.singletonList(tezEvent2)));
dispatcher.await();
}
}
// Validate queueing of these events
// Only v2 events pending
Assert.assertEquals(1, initializerWrapper.getPendingEvents().keySet().size());
// 3 events pending
Assert.assertEquals(3, initializerWrapper.getPendingEvents().get(v2.getName()).size());
// Get all tasks of vertex1 to succeed.
for (TezTaskID taskId : v2.getTasks().keySet()) {
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
v2.handle(new VertexEventTaskAttemptCompleted(taskAttemptId, TaskAttemptStateInternal.SUCCEEDED));
v2.handle(new VertexEventTaskCompleted(taskId, TaskState.SUCCEEDED));
dispatcher.await();
v2.stateChangeNotifier.taskSucceeded(v2.getName(), taskId, taskAttemptId.getId());
}
dispatcher.await();
// Initializer would have run, and processed events.
while (v3.getState() != VertexState.RUNNING) {
Thread.sleep(10);
}
Assert.assertEquals(VertexState.RUNNING, v3.getState());
Assert.assertEquals(4, initializer.initializerEvents.size());
Assert.assertTrue(initializer.initComplete.get());
Assert.assertEquals(2, initializerWrapper.getFirstSuccessfulAttemptMap().size());
Assert.assertEquals(0, initializerWrapper.getPendingEvents().get(v1.getName()).size());
for (InputInitializerEvent initializerEvent : initializer.initializerEvents) {
expectedPayloads.remove(initializerEvent.getUserPayload());
}
Assert.assertEquals(0, expectedPayloads.size());
}
use of org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted in project tez by apache.
the class TestVertexImpl method testSourceTaskAttemptCompletionEvents.
@Test(timeout = 5000)
public void testSourceTaskAttemptCompletionEvents() {
LOG.info("Testing testSourceTaskAttemptCompletionEvents");
initAllVertices(VertexState.INITED);
VertexImpl v4 = vertices.get("vertex4");
VertexImpl v5 = vertices.get("vertex5");
VertexImpl v6 = vertices.get("vertex6");
startVertex(vertices.get("vertex1"));
startVertex(vertices.get("vertex2"));
dispatcher.await();
LOG.info("Verifying v6 state " + v6.getState());
Assert.assertEquals(VertexState.RUNNING, v6.getState());
TezTaskID t1_v4 = TezTaskID.getInstance(v4.getVertexId(), 0);
TezTaskID t2_v4 = TezTaskID.getInstance(v4.getVertexId(), 1);
TezTaskID t1_v5 = TezTaskID.getInstance(v5.getVertexId(), 0);
TezTaskID t2_v5 = TezTaskID.getInstance(v5.getVertexId(), 1);
TezTaskAttemptID ta1_t1_v4 = TezTaskAttemptID.getInstance(t1_v4, 0);
TezTaskAttemptID ta2_t1_v4 = TezTaskAttemptID.getInstance(t1_v4, 0);
TezTaskAttemptID ta1_t2_v4 = TezTaskAttemptID.getInstance(t2_v4, 0);
TezTaskAttemptID ta1_t1_v5 = TezTaskAttemptID.getInstance(t1_v5, 0);
TezTaskAttemptID ta1_t2_v5 = TezTaskAttemptID.getInstance(t2_v5, 0);
TezTaskAttemptID ta2_t2_v5 = TezTaskAttemptID.getInstance(t2_v5, 0);
v4.handle(new VertexEventTaskAttemptCompleted(ta1_t1_v4, TaskAttemptStateInternal.FAILED));
v4.handle(new VertexEventTaskAttemptCompleted(ta2_t1_v4, TaskAttemptStateInternal.SUCCEEDED));
v4.handle(new VertexEventTaskAttemptCompleted(ta1_t2_v4, TaskAttemptStateInternal.SUCCEEDED));
v5.handle(new VertexEventTaskAttemptCompleted(ta1_t1_v5, TaskAttemptStateInternal.SUCCEEDED));
v5.handle(new VertexEventTaskAttemptCompleted(ta1_t2_v5, TaskAttemptStateInternal.FAILED));
v5.handle(new VertexEventTaskAttemptCompleted(ta2_t2_v5, TaskAttemptStateInternal.SUCCEEDED));
v4.handle(new VertexEventTaskCompleted(t1_v4, TaskState.SUCCEEDED));
v4.handle(new VertexEventTaskCompleted(t2_v4, TaskState.SUCCEEDED));
v5.handle(new VertexEventTaskCompleted(t1_v5, TaskState.SUCCEEDED));
v5.handle(new VertexEventTaskCompleted(t2_v5, TaskState.SUCCEEDED));
dispatcher.await();
Assert.assertEquals(VertexState.SUCCEEDED, v4.getState());
Assert.assertEquals(VertexState.SUCCEEDED, v5.getState());
Assert.assertEquals(VertexState.RUNNING, v6.getState());
Assert.assertEquals(4, v6.numSuccessSourceAttemptCompletions);
}
Aggregations