use of org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData in project tez by apache.
the class DAGImpl method vertexSucceeded.
private boolean vertexSucceeded(Vertex vertex) {
numSuccessfulVertices++;
boolean recoveryFailed = false;
if (!commitAllOutputsOnSuccess && isCommittable()) {
// committing successful outputs immediately. check for shared outputs
List<VertexGroupInfo> groupsList = vertexGroupInfo.get(vertex.getName());
if (groupsList != null) {
List<VertexGroupInfo> commitList = Lists.newArrayListWithCapacity(groupsList.size());
for (VertexGroupInfo groupInfo : groupsList) {
groupInfo.successfulMembers++;
if (groupInfo.groupMembers.size() == groupInfo.successfulMembers && !groupInfo.outputs.isEmpty()) {
// group has outputs and all vertex members are done
LOG.info("All members of group: " + groupInfo.groupName + " are succeeded. Commiting outputs");
commitList.add(groupInfo);
}
}
for (VertexGroupInfo groupInfo : commitList) {
if (recoveryData != null && recoveryData.isVertexGroupCommitted(groupInfo.groupName)) {
LOG.info("VertexGroup was already committed as per recovery" + " data, groupName=" + groupInfo.groupName);
for (String vertexName : groupInfo.groupMembers) {
VertexRecoveryData vertexRecoveryData = recoveryData.getVertexRecoveryData(getVertex(vertexName).getVertexId());
Preconditions.checkArgument(vertexRecoveryData != null, "Vertex Group has been committed" + ", but no VertexRecoveryData found for its vertex " + vertexName);
VertexFinishedEvent vertexFinishedEvent = vertexRecoveryData.getVertexFinishedEvent();
Preconditions.checkArgument(vertexFinishedEvent != null, "Vertex Group has been committed" + ", but no VertexFinishedEvent found in its vertex " + vertexName);
Preconditions.checkArgument(vertexFinishedEvent.getState() == VertexState.SUCCEEDED, "Vertex Group has been committed, but unexpected vertex state of its vertex " + vertexName + ", vertexstate=" + vertexFinishedEvent.getState());
}
continue;
}
groupInfo.commitStarted = true;
final Vertex v = getVertex(groupInfo.groupMembers.iterator().next());
try {
Collection<TezVertexID> vertexIds = getVertexIds(groupInfo.groupMembers);
appContext.getHistoryHandler().handleCriticalEvent(new DAGHistoryEvent(getID(), new VertexGroupCommitStartedEvent(dagId, groupInfo.groupName, vertexIds, clock.getTime())));
} catch (IOException e) {
LOG.error("Failed to send commit recovery event to handler", e);
recoveryFailed = true;
}
if (!recoveryFailed) {
for (final String outputName : groupInfo.outputs) {
OutputKey outputKey = new OutputKey(outputName, groupInfo.groupName, true);
CommitCallback groupCommitCallback = new CommitCallback(outputKey);
CallableEvent groupCommitCallableEvent = new CallableEvent(groupCommitCallback) {
public Void call() throws Exception {
OutputCommitter committer = v.getOutputCommitters().get(outputName);
LOG.info("Committing output: " + outputName);
commitOutput(committer);
return null;
}
};
ListenableFuture<Void> groupCommitFuture = appContext.getExecService().submit(groupCommitCallableEvent);
Futures.addCallback(groupCommitFuture, groupCommitCallableEvent.getCallback());
commitFutures.put(outputKey, groupCommitFuture);
}
}
}
}
}
if (recoveryFailed) {
LOG.info("Recovery failure occurred during commit");
enactKill(DAGTerminationCause.RECOVERY_FAILURE, VertexTerminationCause.COMMIT_FAILURE);
}
return !recoveryFailed;
}
use of org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData in project tez by apache.
the class TestDAGRecovery method testVertexRecoverFromStart.
/**
* RecoveryEvents:
* DAG: DAGInitedEvent -> DAGStartedEvent
* V1: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent
*
* V1 skip initialization.
*/
@Test(timeout = 5000)
public void testVertexRecoverFromStart() {
initMockDAGRecoveryDataForVertex();
List<TezEvent> inputGeneratedTezEvents = new ArrayList<TezEvent>();
VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null);
VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, null, true);
VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime);
VertexRecoveryData vertexRecoveryData = new VertexRecoveryData(v1InitedEvent, v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap<TezTaskID, TaskRecoveryData>(), false);
doReturn(vertexRecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id);
DAGEventRecoverEvent recoveryEvent = new DAGEventRecoverEvent(dagId, dagRecoveryData);
dag.handle(recoveryEvent);
dispatcher.await();
VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1");
VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2");
VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3");
assertEquals(DAGState.RUNNING, dag.getState());
// v1 skip initialization
assertEquals(VertexState.RUNNING, v1.getState());
assertEquals(v1InitedTime, v1.initedTime);
assertEquals(v1StartedTime, v1.startedTime);
assertEquals(v1NumTask, v1.getTotalTasks());
assertEquals(VertexState.RUNNING, v2.getState());
assertEquals(VertexState.RUNNING, v3.getState());
}
use of org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData in project tez by apache.
the class TestDAGRecovery method initMockDAGRecoveryDataForTask.
// ///////////////////////////// Task ////////////////////////////////////////////////////////////
private void initMockDAGRecoveryDataForTask() {
List<TezEvent> inputGeneratedTezEvents = new ArrayList<TezEvent>();
VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null);
Map<String, InputSpecUpdate> rootInputSpecs = new HashMap<String, InputSpecUpdate>();
VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, rootInputSpecs, true);
VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime);
VertexRecoveryData v1RecoveryData = new VertexRecoveryData(v1InitedEvent, v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap<TezTaskID, TaskRecoveryData>(), false);
DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagId, dagInitedTime, "user", "dagName", null);
DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagId, dagStartedTime, "user", "dagName");
doReturn(v1RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id);
doReturn(dagInitedEvent).when(dagRecoveryData).getDAGInitializedEvent();
doReturn(dagStartedEvent).when(dagRecoveryData).getDAGStartedEvent();
}
use of org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData in project tez by apache.
the class TestRecoveryParser method testRecoveryData.
@Test(timeout = 5000)
public void testRecoveryData() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
when(mockDAGImpl.getID()).thenReturn(dagID);
when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
when(appContext.getApplicationID()).thenReturn(appId);
RecoveryService rService = new RecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// DAG DAGSubmittedEvent -> DAGInitializedEvent -> DAGStartedEvent
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagID, 100L, "user", "dagName", null);
DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagID, 0L, "user", "dagName");
rService.handle(new DAGHistoryEvent(dagID, dagInitedEvent));
rService.handle(new DAGHistoryEvent(dagID, dagStartedEvent));
// 3 vertices of this dag: v0, v1, v2
TezVertexID v0Id = TezVertexID.getInstance(dagID, 0);
TezVertexID v1Id = TezVertexID.getInstance(dagID, 1);
TezVertexID v2Id = TezVertexID.getInstance(dagID, 2);
// v0 VertexInitializedEvent
VertexInitializedEvent v0InitedEvent = new VertexInitializedEvent(v0Id, "v0", 200L, 400L, 2, null, null, null, null);
rService.handle(new DAGHistoryEvent(dagID, v0InitedEvent));
// v1 VertexFinishedEvent(KILLED)
VertexFinishedEvent v1FinishedEvent = new VertexFinishedEvent(v1Id, "v1", 2, 300L, 400L, 500L, 600L, 700L, VertexState.KILLED, "", null, null, null, null);
rService.handle(new DAGHistoryEvent(dagID, v1FinishedEvent));
// v2 VertexInitializedEvent -> VertexStartedEvent
List<TezEvent> initGeneratedEvents = Lists.newArrayList(new TezEvent(DataMovementEvent.create(ByteBuffer.wrap(new byte[0])), null));
VertexInitializedEvent v2InitedEvent = new VertexInitializedEvent(v2Id, "v2", 200L, 300L, 2, null, null, initGeneratedEvents, null);
VertexStartedEvent v2StartedEvent = new VertexStartedEvent(v2Id, 0L, 0L);
rService.handle(new DAGHistoryEvent(dagID, v2InitedEvent));
rService.handle(new DAGHistoryEvent(dagID, v2StartedEvent));
// 3 tasks of v2
TezTaskID t0v2Id = TezTaskID.getInstance(v2Id, 0);
TezTaskID t1v2Id = TezTaskID.getInstance(v2Id, 1);
TezTaskID t2v2Id = TezTaskID.getInstance(v2Id, 2);
// t0v2 TaskStartedEvent
TaskStartedEvent t0v2StartedEvent = new TaskStartedEvent(t0v2Id, "v2", 400L, 5000L);
rService.handle(new DAGHistoryEvent(dagID, t0v2StartedEvent));
// t1v2 TaskFinishedEvent
TaskFinishedEvent t1v2FinishedEvent = new TaskFinishedEvent(t1v2Id, "v1", 0L, 0L, null, TaskState.KILLED, "", null, 4);
rService.handle(new DAGHistoryEvent(dagID, t1v2FinishedEvent));
// t2v2 TaskStartedEvent -> TaskFinishedEvent
TaskStartedEvent t2v2StartedEvent = new TaskStartedEvent(t2v2Id, "v2", 400L, 500L);
rService.handle(new DAGHistoryEvent(dagID, t2v2StartedEvent));
TaskFinishedEvent t2v2FinishedEvent = new TaskFinishedEvent(t2v2Id, "v1", 0L, 0L, null, TaskState.SUCCEEDED, "", null, 4);
rService.handle(new DAGHistoryEvent(dagID, t2v2FinishedEvent));
// attempts under t0v2
ContainerId containerId = ContainerId.newInstance(appAttemptId, 1);
NodeId nodeId = NodeId.newInstance("localhost", 9999);
TezTaskAttemptID ta0t0v2Id = TezTaskAttemptID.getInstance(t0v2Id, 0);
TaskAttemptStartedEvent ta0t0v2StartedEvent = new TaskAttemptStartedEvent(ta0t0v2Id, "v1", 0L, containerId, nodeId, "", "", "");
rService.handle(new DAGHistoryEvent(dagID, ta0t0v2StartedEvent));
// attempts under t2v2
TezTaskAttemptID ta0t2v2Id = TezTaskAttemptID.getInstance(t2v2Id, 0);
TaskAttemptStartedEvent ta0t2v2StartedEvent = new TaskAttemptStartedEvent(ta0t2v2Id, "v1", 500L, containerId, nodeId, "", "", "");
rService.handle(new DAGHistoryEvent(dagID, ta0t2v2StartedEvent));
TaskAttemptFinishedEvent ta0t2v2FinishedEvent = new TaskAttemptFinishedEvent(ta0t2v2Id, "v1", 500L, 600L, TaskAttemptState.SUCCEEDED, null, null, "", null, null, null, 0L, null, 0L, null, null, null, null, null);
rService.handle(new DAGHistoryEvent(dagID, ta0t2v2FinishedEvent));
rService.stop();
DAGRecoveryData dagData = parser.parseRecoveryData();
assertFalse(dagData.nonRecoverable);
// There's no equals method for the history event, so here only verify the init/start/finish time of each event for simplicity
assertEquals(dagInitedEvent.getInitTime(), dagData.getDAGInitializedEvent().getInitTime());
assertEquals(dagStartedEvent.getStartTime(), dagData.getDAGStartedEvent().getStartTime());
assertNull(dagData.getDAGFinishedEvent());
VertexRecoveryData v0Data = dagData.getVertexRecoveryData(v0Id);
VertexRecoveryData v1Data = dagData.getVertexRecoveryData(v1Id);
VertexRecoveryData v2Data = dagData.getVertexRecoveryData(v2Id);
assertNotNull(v0Data);
assertNotNull(v1Data);
assertNotNull(v2Data);
assertEquals(v0InitedEvent.getInitedTime(), v0Data.getVertexInitedEvent().getInitedTime());
assertNull(v0Data.getVertexStartedEvent());
assertNull(v1Data.getVertexInitedEvent());
assertEquals(v1FinishedEvent.getFinishTime(), v1Data.getVertexFinishedEvent().getFinishTime());
assertEquals(v2InitedEvent.getInitedTime(), v2Data.getVertexInitedEvent().getInitedTime());
assertEquals(v2StartedEvent.getStartTime(), v2Data.getVertexStartedEvent().getStartTime());
TaskRecoveryData t0v2Data = dagData.getTaskRecoveryData(t0v2Id);
TaskRecoveryData t1v2Data = dagData.getTaskRecoveryData(t1v2Id);
TaskRecoveryData t2v2Data = dagData.getTaskRecoveryData(t2v2Id);
assertNotNull(t0v2Data);
assertNotNull(t1v2Data);
assertNotNull(t2v2Data);
assertEquals(t0v2StartedEvent.getStartTime(), t0v2Data.getTaskStartedEvent().getStartTime());
assertNull(t0v2Data.getTaskFinishedEvent());
assertEquals(t1v2FinishedEvent.getFinishTime(), t1v2Data.getTaskFinishedEvent().getFinishTime());
assertNull(t1v2Data.getTaskStartedEvent());
assertEquals(t2v2StartedEvent.getStartTime(), t2v2Data.getTaskStartedEvent().getStartTime());
assertEquals(t2v2FinishedEvent.getFinishTime(), t2v2Data.getTaskFinishedEvent().getFinishTime());
TaskAttemptRecoveryData ta0t0v2Data = dagData.getTaskAttemptRecoveryData(ta0t0v2Id);
TaskAttemptRecoveryData ta0t2v2Data = dagData.getTaskAttemptRecoveryData(ta0t2v2Id);
assertNotNull(ta0t0v2Data);
assertNotNull(ta0t2v2Data);
assertEquals(ta0t0v2StartedEvent.getStartTime(), ta0t0v2Data.getTaskAttemptStartedEvent().getStartTime());
assertNull(ta0t0v2Data.getTaskAttemptFinishedEvent());
assertEquals(ta0t2v2StartedEvent.getStartTime(), ta0t2v2Data.getTaskAttemptStartedEvent().getStartTime());
assertEquals(ta0t2v2FinishedEvent.getFinishTime(), ta0t2v2Data.getTaskAttemptFinishedEvent().getFinishTime());
}
use of org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData in project tez by apache.
the class TestDAGRecovery method initMockDAGRecoveryDataForTaskAttempt.
// ///////////////////////////// TaskAttempt Recovery /////////////////////////////////////////////////////
private void initMockDAGRecoveryDataForTaskAttempt() {
TaskStartedEvent t1StartedEvent = new TaskStartedEvent(t1v1Id, "vertex1", 0L, t1StartedTime);
TaskRecoveryData taskRecoveryData = new TaskRecoveryData(t1StartedEvent, null, null);
Map<TezTaskID, TaskRecoveryData> taskRecoveryDataMap = new HashMap<TezTaskID, TaskRecoveryData>();
taskRecoveryDataMap.put(t1v1Id, taskRecoveryData);
List<TezEvent> inputGeneratedTezEvents = new ArrayList<TezEvent>();
VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null);
Map<String, InputSpecUpdate> rootInputSpecs = new HashMap<String, InputSpecUpdate>();
VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, rootInputSpecs, true);
VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime);
VertexRecoveryData v1RecoveryData = new VertexRecoveryData(v1InitedEvent, v1ReconfigureDoneEvent, v1StartedEvent, null, taskRecoveryDataMap, false);
DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagId, dagInitedTime, "user", "dagName", null);
DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagId, dagStartedTime, "user", "dagName");
doReturn(v1RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id);
doReturn(dagInitedEvent).when(dagRecoveryData).getDAGInitializedEvent();
doReturn(dagStartedEvent).when(dagRecoveryData).getDAGStartedEvent();
}
Aggregations