Search in sources :

Example 36 with TezVertexID

use of org.apache.tez.dag.records.TezVertexID in project tez by apache.

the class TestSpeculation method testSingleTaskSpeculation.

@Test(timeout = 10000)
public void testSingleTaskSpeculation() throws Exception {
    // Map<Timeout conf value, expected number of tasks>
    Map<Long, Integer> confToExpected = new HashMap<Long, Integer>();
    // Really long time to speculate
    confToExpected.put(Long.MAX_VALUE >> 1, 1);
    confToExpected.put(100L, 2);
    // Don't speculate
    confToExpected.put(-1L, 1);
    for (Map.Entry<Long, Integer> entry : confToExpected.entrySet()) {
        defaultConf.setLong(TezConfiguration.TEZ_AM_LEGACY_SPECULATIVE_SINGLE_TASK_VERTEX_TIMEOUT, entry.getKey());
        DAG dag = DAG.create("test");
        Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 1);
        dag.addVertex(vA);
        MockTezClient tezClient = createTezSession();
        DAGClient dagClient = tezClient.submitDAG(dag);
        DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
        TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0);
        // original attempt is killed and speculative one is successful
        TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
        TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1);
        Thread.sleep(200);
        // cause speculation trigger
        mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
        mockLauncher.startScheduling(true);
        dagClient.waitForCompletion();
        Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
        Task task = dagImpl.getTask(killedTaId.getTaskID());
        Assert.assertEquals(entry.getValue().intValue(), task.getAttempts().size());
        if (entry.getValue() > 1) {
            Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID());
            TaskAttempt killedAttempt = task.getAttempt(killedTaId);
            Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt");
            Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION, killedAttempt.getTerminationCause());
        }
        tezClient.stop();
    }
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) Task(org.apache.tez.dag.app.dag.Task) HashMap(java.util.HashMap) DAG(org.apache.tez.dag.api.DAG) DAGImpl(org.apache.tez.dag.app.dag.impl.DAGImpl) DAGClient(org.apache.tez.dag.api.client.DAGClient) TaskAttempt(org.apache.tez.dag.app.dag.TaskAttempt) HashMap(java.util.HashMap) Map(java.util.Map) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 37 with TezVertexID

use of org.apache.tez.dag.records.TezVertexID in project tez by apache.

the class TestSpeculation method testBasicSpeculation.

public void testBasicSpeculation(boolean withProgress) throws Exception {
    DAG dag = DAG.create("test");
    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
    dag.addVertex(vA);
    MockTezClient tezClient = createTezSession();
    DAGClient dagClient = tezClient.submitDAG(dag);
    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
    TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0);
    // original attempt is killed and speculative one is successful
    TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
    TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1);
    mockLauncher.updateProgress(withProgress);
    // cause speculation trigger
    mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
    mockLauncher.startScheduling(true);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
    Task task = dagImpl.getTask(killedTaId.getTaskID());
    Assert.assertEquals(2, task.getAttempts().size());
    Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID());
    TaskAttempt killedAttempt = task.getAttempt(killedTaId);
    Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt");
    Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION, killedAttempt.getTerminationCause());
    if (withProgress) {
        // without progress updates occasionally more than 1 task speculates
        Assert.assertEquals(1, task.getCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue());
        Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue());
        org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID());
        Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue());
    }
    tezClient.stop();
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) Task(org.apache.tez.dag.app.dag.Task) DAGImpl(org.apache.tez.dag.app.dag.impl.DAGImpl) DAGClient(org.apache.tez.dag.api.client.DAGClient) DAG(org.apache.tez.dag.api.DAG) TaskAttempt(org.apache.tez.dag.app.dag.TaskAttempt) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 38 with TezVertexID

use of org.apache.tez.dag.records.TezVertexID in project tez by apache.

the class TestSpeculation method testBasicSpeculationNotUseful.

@Test(timeout = 10000)
public void testBasicSpeculationNotUseful() throws Exception {
    DAG dag = DAG.create("test");
    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
    dag.addVertex(vA);
    MockTezClient tezClient = createTezSession();
    DAGClient dagClient = tezClient.submitDAG(dag);
    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
    TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0);
    // original attempt is successful and speculative one is killed
    TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
    TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1);
    mockLauncher.setStatusUpdatesForTask(successTaId, 100);
    mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
    mockLauncher.startScheduling(true);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
    Task task = dagImpl.getTask(killedTaId.getTaskID());
    Assert.assertEquals(2, task.getAttempts().size());
    Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID());
    TaskAttempt killedAttempt = task.getAttempt(killedTaId);
    Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed speculative attempt as");
    Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_INEFFECTIVE_SPECULATION, killedAttempt.getTerminationCause());
    Assert.assertEquals(1, task.getCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue());
    Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue());
    org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID());
    Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue());
    tezClient.stop();
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) Task(org.apache.tez.dag.app.dag.Task) DAGImpl(org.apache.tez.dag.app.dag.impl.DAGImpl) DAGClient(org.apache.tez.dag.api.client.DAGClient) DAG(org.apache.tez.dag.api.DAG) TaskAttempt(org.apache.tez.dag.app.dag.TaskAttempt) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 39 with TezVertexID

use of org.apache.tez.dag.records.TezVertexID in project tez by apache.

the class TestRecoveryParser method testRecoverableNonSummary1.

@Test(timeout = 5000)
public void testRecoverableNonSummary1() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
    when(appContext.getClock()).thenReturn(new SystemClock());
    when(mockDAGImpl.getID()).thenReturn(dagID);
    when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
    when(appContext.getApplicationID()).thenReturn(appId);
    // MockRecoveryService will skip the non-summary event
    MockRecoveryService rService = new MockRecoveryService(appContext);
    Configuration conf = new Configuration();
    conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
    rService.init(conf);
    rService.start();
    DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
    // write a DAGSubmittedEvent first to initialize summaryStream
    rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
    // It should be fine to skip other events, just for testing.
    TezVertexID vertexId = TezVertexID.getInstance(dagID, 0);
    rService.handle(new DAGHistoryEvent(dagID, new VertexCommitStartedEvent(vertexId, 0L)));
    rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(vertexId, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
    rService.stop();
    DAGRecoveryData dagData = parser.parseRecoveryData();
    assertTrue(dagData.nonRecoverable);
    assertTrue(dagData.reason.contains("Vertex has been committed, but its full recovery events are not seen"));
}
Also used : Path(org.apache.hadoop.fs.Path) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Configuration(org.apache.hadoop.conf.Configuration) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) TezDAGID(org.apache.tez.dag.records.TezDAGID) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGRecoveryData(org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData) TezVertexID(org.apache.tez.dag.records.TezVertexID) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent) VertexCommitStartedEvent(org.apache.tez.dag.history.events.VertexCommitStartedEvent)

Example 40 with TezVertexID

use of org.apache.tez.dag.records.TezVertexID in project tez by apache.

the class TestRecoveryParser method testRecoverableSummary_VertexFinishCommitting.

@Test(timeout = 5000)
public void testRecoverableSummary_VertexFinishCommitting() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
    when(appContext.getClock()).thenReturn(new SystemClock());
    when(mockDAGImpl.getID()).thenReturn(dagID);
    when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
    when(appContext.getApplicationID()).thenReturn(appId);
    RecoveryService rService = new RecoveryService(appContext);
    Configuration conf = new Configuration();
    conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
    rService.init(conf);
    rService.start();
    DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
    // write a DAGSubmittedEvent first to initialize summaryStream
    rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
    // It should be fine to skip other events, just for testing.
    TezVertexID vertexId = TezVertexID.getInstance(dagID, 0);
    rService.handle(new DAGHistoryEvent(dagID, new VertexCommitStartedEvent(vertexId, 0L)));
    rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(vertexId, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
    rService.stop();
    DAGRecoveryData dagData = parser.parseRecoveryData();
    assertEquals(dagID, dagData.recoveredDagID);
    assertFalse(dagData.nonRecoverable);
}
Also used : Path(org.apache.hadoop.fs.Path) RecoveryService(org.apache.tez.dag.history.recovery.RecoveryService) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Configuration(org.apache.hadoop.conf.Configuration) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) TezDAGID(org.apache.tez.dag.records.TezDAGID) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGRecoveryData(org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData) TezVertexID(org.apache.tez.dag.records.TezVertexID) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent) VertexCommitStartedEvent(org.apache.tez.dag.history.events.VertexCommitStartedEvent)

Aggregations

TezVertexID (org.apache.tez.dag.records.TezVertexID)117 Test (org.junit.Test)64 TezDAGID (org.apache.tez.dag.records.TezDAGID)61 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)54 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)46 Configuration (org.apache.hadoop.conf.Configuration)43 TezTaskID (org.apache.tez.dag.records.TezTaskID)43 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)40 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)31 Resource (org.apache.hadoop.yarn.api.records.Resource)29 Container (org.apache.hadoop.yarn.api.records.Container)28 ClusterInfo (org.apache.tez.dag.app.ClusterInfo)28 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)28 ContainerHeartbeatHandler (org.apache.tez.dag.app.ContainerHeartbeatHandler)27 AMContainerMap (org.apache.tez.dag.app.rm.container.AMContainerMap)27 ContainerContextMatcher (org.apache.tez.dag.app.rm.container.ContainerContextMatcher)27 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)24 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)24 SystemClock (org.apache.hadoop.yarn.util.SystemClock)22 Vertex (org.apache.tez.dag.app.dag.Vertex)22