Search in sources :

Example 11 with DefaultHadoopShim

use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.

the class TestLogicalIOProcessorRuntimeTask method testAutoStart.

@Test(timeout = 5000)
public void testAutoStart() throws Exception {
    TezDAGID dagId = createTezDagId();
    TezVertexID vertexId = createTezVertexId(dagId);
    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    Multimap<String, String> startedInputsMap = HashMultimap.create();
    TezUmbilical umbilical = mock(TezUmbilical.class);
    TezConfiguration tezConf = new TezConfiguration();
    tezConf.set(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ALLOCATOR_CLASS, ScalingAllocator.class.getName());
    TezTaskAttemptID taId1 = createTaskAttemptID(vertexId, 1);
    TaskSpec task1 = createTaskSpec(taId1, "dag1", "vertex1", 30);
    TezTaskAttemptID taId2 = createTaskAttemptID(vertexId, 2);
    TaskSpec task2 = createTaskSpec(taId2, "dag2", "vertex1", 10);
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(tezConf);
    LogicalIOProcessorRuntimeTask lio1 = new LogicalIOProcessorRuntimeTask(task1, 0, tezConf, null, umbilical, serviceConsumerMetadata, new HashMap<String, String>(), startedInputsMap, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
    try {
        lio1.initialize();
        lio1.run();
        lio1.close();
        // Input should've been started, Output should not have been started
        assertEquals(1, TestProcessor.runCount);
        assertEquals(1, TestInput.startCount);
        assertEquals(0, TestOutput.startCount);
        // test that invocations of progress are counted correctly
        assertEquals(true, lio1.getAndClearProgressNotification());
        // cleared after getting
        assertEquals(false, lio1.getAndClearProgressNotification());
        assertEquals(30, TestInput.vertexParallelism);
        assertEquals(0, TestOutput.vertexParallelism);
        assertEquals(30, lio1.getProcessorContext().getVertexParallelism());
        assertEquals(30, lio1.getInputContexts().iterator().next().getVertexParallelism());
        assertEquals(30, lio1.getOutputContexts().iterator().next().getVertexParallelism());
    } catch (Exception e) {
        fail();
        sharedExecutor.shutdownNow();
    } finally {
        cleanupAndTest(lio1);
    }
    // local mode
    tezConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
    LogicalIOProcessorRuntimeTask lio2 = new LogicalIOProcessorRuntimeTask(task2, 0, tezConf, null, umbilical, serviceConsumerMetadata, new HashMap<String, String>(), startedInputsMap, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
    try {
        lio2.initialize();
        lio2.run();
        lio2.close();
        // Input should not have been started again, Output should not have been started
        assertEquals(2, TestProcessor.runCount);
        assertEquals(1, TestInput.startCount);
        assertEquals(0, TestOutput.startCount);
        assertEquals(30, TestInput.vertexParallelism);
        assertEquals(0, TestOutput.vertexParallelism);
        // Check if parallelism is available in processor/ i/p / o/p contexts
        assertEquals(10, lio2.getProcessorContext().getVertexParallelism());
        assertEquals(10, lio2.getInputContexts().iterator().next().getVertexParallelism());
        assertEquals(10, lio2.getOutputContexts().iterator().next().getVertexParallelism());
    } catch (Exception e) {
        fail();
    } finally {
        cleanupAndTest(lio2);
        sharedExecutor.shutdownNow();
    }
}
Also used : HashMap(java.util.HashMap) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) ScalingAllocator(org.apache.tez.runtime.common.resources.ScalingAllocator) ByteBuffer(java.nio.ByteBuffer) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) TezDAGID(org.apache.tez.dag.records.TezDAGID) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) TezUmbilical(org.apache.tez.runtime.api.impl.TezUmbilical) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 12 with DefaultHadoopShim

use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.

the class TestRecoveryParser method testRecoverableSummary_VertexGroupFinishCommitting.

@Test(timeout = 5000)
public void testRecoverableSummary_VertexGroupFinishCommitting() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
    when(appContext.getClock()).thenReturn(new SystemClock());
    when(mockDAGImpl.getID()).thenReturn(dagID);
    when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
    when(appContext.getApplicationID()).thenReturn(appId);
    RecoveryService rService = new RecoveryService(appContext);
    Configuration conf = new Configuration();
    conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
    rService.init(conf);
    rService.start();
    DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
    // write a DAGSubmittedEvent first to initialize summaryStream
    rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
    // It should be fine to skip other events, just for testing.
    TezVertexID v0 = TezVertexID.getInstance(dagID, 0);
    TezVertexID v1 = TezVertexID.getInstance(dagID, 1);
    rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitStartedEvent(dagID, "group_1", Lists.newArrayList(v0, v1), 0L)));
    rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitFinishedEvent(dagID, "group_1", Lists.newArrayList(v0, v1), 0L)));
    // also write VertexFinishedEvent, otherwise it is still non-recoverable
    // when checking with non-summary event
    rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(v0, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
    rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(v1, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
    rService.stop();
    DAGRecoveryData dagData = parser.parseRecoveryData();
    assertEquals(dagID, dagData.recoveredDagID);
    assertFalse(dagData.nonRecoverable);
}
Also used : Path(org.apache.hadoop.fs.Path) RecoveryService(org.apache.tez.dag.history.recovery.RecoveryService) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Configuration(org.apache.hadoop.conf.Configuration) VertexGroupCommitStartedEvent(org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) VertexGroupCommitFinishedEvent(org.apache.tez.dag.history.events.VertexGroupCommitFinishedEvent) TezDAGID(org.apache.tez.dag.records.TezDAGID) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGRecoveryData(org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData) TezVertexID(org.apache.tez.dag.records.TezVertexID) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent)

Example 13 with DefaultHadoopShim

use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.

the class TestRootInputInitializerManager method testCorrectUgiUsage.

@Test(timeout = 5000)
public void testCorrectUgiUsage() throws TezException, InterruptedException {
    Vertex vertex = mock(Vertex.class);
    doReturn(mock(TezVertexID.class)).when(vertex).getVertexId();
    AppContext appContext = mock(AppContext.class);
    doReturn(new DefaultHadoopShim()).when(appContext).getHadoopShim();
    doReturn(mock(EventHandler.class)).when(appContext).getEventHandler();
    UserGroupInformation dagUgi = UserGroupInformation.createRemoteUser("fakeuser");
    StateChangeNotifier stateChangeNotifier = mock(StateChangeNotifier.class);
    RootInputInitializerManager rootInputInitializerManager = new RootInputInitializerManager(vertex, appContext, dagUgi, stateChangeNotifier);
    InputDescriptor id = mock(InputDescriptor.class);
    InputInitializerDescriptor iid = InputInitializerDescriptor.create(InputInitializerForUgiTest.class.getName());
    RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> rootInput = new RootInputLeafOutput<>("InputName", id, iid);
    rootInputInitializerManager.runInputInitializers(Collections.singletonList(rootInput));
    InputInitializerForUgiTest.awaitInitialize();
    assertEquals(dagUgi, InputInitializerForUgiTest.ctorUgi);
    assertEquals(dagUgi, InputInitializerForUgiTest.initializeUgi);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) RootInputLeafOutput(org.apache.tez.dag.api.RootInputLeafOutput) AppContext(org.apache.tez.dag.app.AppContext) EventHandler(org.apache.hadoop.yarn.event.EventHandler) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) TezVertexID(org.apache.tez.dag.records.TezVertexID) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 14 with DefaultHadoopShim

use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.

the class TestCommit method setupDAG.

public void setupDAG(DAGPlan dagPlan) {
    conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false);
    appAttemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(100, 1), 1);
    dagId = TezDAGID.getInstance(appAttemptId.getApplicationId(), 1);
    Assert.assertNotNull(dagId);
    dispatcher = new DrainDispatcher();
    fsTokens = new Credentials();
    appContext = mock(AppContext.class);
    when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
    rawExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true).setNameFormat("App Shared Pool - " + "#%d").build());
    execService = MoreExecutors.listeningDecorator(rawExecutor);
    doReturn(execService).when(appContext).getExecService();
    historyEventHandler = new MockHistoryEventHandler(appContext);
    aclManager = new ACLManager("amUser");
    doReturn(conf).when(appContext).getAMConf();
    doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
    doReturn(appAttemptId.getApplicationId()).when(appContext).getApplicationID();
    doReturn(dagId).when(appContext).getCurrentDAGID();
    doReturn(historyEventHandler).when(appContext).getHistoryHandler();
    doReturn(aclManager).when(appContext).getAMACLManager();
    dag = new DAGImpl(dagId, conf, dagPlan, dispatcher.getEventHandler(), taskCommunicatorManagerInterface, fsTokens, clock, "user", thh, appContext);
    doReturn(dag).when(appContext).getCurrentDAG();
    doReturn(dispatcher.getEventHandler()).when(appContext).getEventHandler();
    ClusterInfo clusterInfo = new ClusterInfo(Resource.newInstance(8192, 10));
    doReturn(clusterInfo).when(appContext).getClusterInfo();
    dispatcher.register(CallableEventType.class, new CallableEventDispatcher());
    taskEventDispatcher = new TaskEventDispatcher();
    dispatcher.register(TaskEventType.class, taskEventDispatcher);
    taskAttemptEventDispatcher = new TaskAttemptEventDispatcher();
    dispatcher.register(TaskAttemptEventType.class, taskAttemptEventDispatcher);
    vertexEventDispatcher = new VertexEventDispatcher();
    dispatcher.register(VertexEventType.class, vertexEventDispatcher);
    dagEventDispatcher = new DagEventDispatcher();
    dispatcher.register(DAGEventType.class, dagEventDispatcher);
    dagFinishEventHandler = new DAGFinishEventHandler();
    dispatcher.register(DAGAppMasterEventType.class, dagFinishEventHandler);
    dispatcher.init(conf);
    dispatcher.start();
}
Also used : DrainDispatcher(org.apache.tez.common.DrainDispatcher) AppContext(org.apache.tez.dag.app.AppContext) ACLManager(org.apache.tez.common.security.ACLManager) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) ClusterInfo(org.apache.tez.dag.app.ClusterInfo) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Credentials(org.apache.hadoop.security.Credentials)

Example 15 with DefaultHadoopShim

use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.

the class TestDAGRecovery method setup.

@SuppressWarnings({ "unchecked", "rawtypes" })
@Before
public void setup() {
    conf = new Configuration();
    conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false);
    appAttemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(100, 1), 1);
    dagId = TezDAGID.getInstance(appAttemptId.getApplicationId(), 1);
    Assert.assertNotNull(dagId);
    dagPlan = createDAGPlan();
    dispatcher = new DrainDispatcher();
    fsTokens = new Credentials();
    appContext = mock(AppContext.class);
    execService = mock(ListeningExecutorService.class);
    thh = mock(TaskHeartbeatHandler.class);
    final ListenableFuture<Void> mockFuture = mock(ListenableFuture.class);
    when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
    when(appContext.getApplicationID()).thenReturn(appAttemptId.getApplicationId());
    when(appContext.getClock()).thenReturn(new SystemClock());
    Mockito.doAnswer(new Answer() {

        public ListenableFuture<Void> answer(InvocationOnMock invocation) {
            Object[] args = invocation.getArguments();
            CallableEvent e = (CallableEvent) args[0];
            dispatcher.getEventHandler().handle(e);
            return mockFuture;
        }
    }).when(execService).submit((Callable<Void>) any());
    doReturn(execService).when(appContext).getExecService();
    historyEventHandler = new MockHistoryEventHandler(appContext);
    aclManager = new ACLManager("amUser");
    doReturn(conf).when(appContext).getAMConf();
    doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
    doReturn(appAttemptId.getApplicationId()).when(appContext).getApplicationID();
    doReturn(dagId).when(appContext).getCurrentDAGID();
    doReturn(historyEventHandler).when(appContext).getHistoryHandler();
    doReturn(aclManager).when(appContext).getAMACLManager();
    doReturn(dagRecoveryData).when(appContext).getDAGRecoveryData();
    dag = new DAGImpl(dagId, conf, dagPlan, dispatcher.getEventHandler(), taskCommunicatorManagerInterface, fsTokens, clock, "user", thh, appContext);
    dag.entityUpdateTracker = new StateChangeNotifierForTest(dag);
    doReturn(dag).when(appContext).getCurrentDAG();
    ugi = mock(UserGroupInformation.class);
    UserGroupInformation ugi = dag.getDagUGI();
    doReturn(clusterInfo).when(appContext).getClusterInfo();
    TaskSchedulerManager mockTaskScheduler = mock(TaskSchedulerManager.class);
    doReturn(mockTaskScheduler).when(appContext).getTaskScheduler();
    v1Id = TezVertexID.getInstance(dagId, 0);
    t1v1Id = TezTaskID.getInstance(v1Id, 0);
    ta1t1v1Id = TezTaskAttemptID.getInstance(t1v1Id, 0);
    v2Id = TezVertexID.getInstance(dagId, 1);
    t1v2Id = TezTaskID.getInstance(v2Id, 0);
    ta1t1v2Id = TezTaskAttemptID.getInstance(t1v2Id, 0);
    dispatcher.register(CallableEventType.class, new CallableEventDispatcher());
    taskEventDispatcher = new TaskEventDispatcher();
    dispatcher.register(TaskEventType.class, taskEventDispatcher);
    taskAttemptEventDispatcher = new TaskAttemptEventDispatcher();
    dispatcher.register(TaskAttemptEventType.class, taskAttemptEventDispatcher);
    vertexEventDispatcher = new VertexEventDispatcher();
    dispatcher.register(VertexEventType.class, vertexEventDispatcher);
    dagEventDispatcher = new DagEventDispatcher();
    dispatcher.register(DAGEventType.class, dagEventDispatcher);
    dagFinishEventHandler = new DAGFinishEventHandler();
    dispatcher.register(DAGAppMasterEventType.class, dagFinishEventHandler);
    dispatcher.register(AMSchedulerEventType.class, new AMSchedulerEventDispatcher());
    dispatcher.init(conf);
    dispatcher.start();
    doReturn(dispatcher.getEventHandler()).when(appContext).getEventHandler();
    LogManager.getRootLogger().setLevel(Level.DEBUG);
}
Also used : DrainDispatcher(org.apache.tez.common.DrainDispatcher) PlanTaskConfiguration(org.apache.tez.dag.api.records.DAGProtos.PlanTaskConfiguration) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) ACLManager(org.apache.tez.common.security.ACLManager) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) CallableEvent(org.apache.tez.dag.app.dag.event.CallableEvent) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) TaskHeartbeatHandler(org.apache.tez.dag.app.TaskHeartbeatHandler) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) SystemClock(org.apache.hadoop.yarn.util.SystemClock) AppContext(org.apache.tez.dag.app.AppContext) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) TaskSchedulerManager(org.apache.tez.dag.app.rm.TaskSchedulerManager) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Credentials(org.apache.hadoop.security.Credentials) Before(org.junit.Before)

Aggregations

DefaultHadoopShim (org.apache.tez.hadoop.shim.DefaultHadoopShim)19 Configuration (org.apache.hadoop.conf.Configuration)11 Path (org.apache.hadoop.fs.Path)10 TezDAGID (org.apache.tez.dag.records.TezDAGID)10 TezVertexID (org.apache.tez.dag.records.TezVertexID)10 SystemClock (org.apache.hadoop.yarn.util.SystemClock)8 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)7 TaskSpec (org.apache.tez.runtime.api.impl.TaskSpec)7 HashMap (java.util.HashMap)6 TezSharedExecutor (org.apache.tez.common.TezSharedExecutor)6 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)6 DAGPlan (org.apache.tez.dag.api.records.DAGProtos.DAGPlan)6 DAGRecoveryData (org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData)6 ByteBuffer (java.nio.ByteBuffer)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5 DAGHistoryEvent (org.apache.tez.dag.history.DAGHistoryEvent)5 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)5 LogicalIOProcessorRuntimeTask (org.apache.tez.runtime.LogicalIOProcessorRuntimeTask)5 ExecutionContextImpl (org.apache.tez.runtime.api.impl.ExecutionContextImpl)5