Search in sources :

Example 6 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestFetcher method testSetupLocalDiskFetchEmptyPartitions.

@Test(timeout = 5000)
public void testSetupLocalDiskFetchEmptyPartitions() throws Exception {
    Configuration conf = new TezConfiguration();
    ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
    MergeManager merger = mock(MergeManager.class);
    Shuffle shuffle = mock(Shuffle.class);
    InputContext inputContext = mock(InputContext.class);
    when(inputContext.getCounters()).thenReturn(new TezCounters());
    when(inputContext.getSourceVertexName()).thenReturn("");
    MapHost host = new MapHost(HOST, PORT, 1, 1);
    FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
    FetcherOrderedGrouped spyFetcher = spy(fetcher);
    final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1));
    doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
    final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
    for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
        for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
            ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
            pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
        }
    }
    doAnswer(new Answer<InputAttemptIdentifier>() {

        @Override
        public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            String path = (String) args[0];
            int reduceId = (int) args[1];
            return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
        }
    }).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
    doAnswer(new Answer<Path>() {

        @Override
        public Path answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
        }
    }).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
    for (int i = 0; i < host.getPartitionCount(); i++) {
        doAnswer(new Answer<TezIndexRecord>() {

            @Override
            public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
                Object[] args = invocation.getArguments();
                String pathComponent = (String) args[0];
                int len = pathComponent.length();
                long p = Long.valueOf(pathComponent.substring(len - 1, len));
                // match with params for copySucceeded below.
                return new TezIndexRecord(p * 10, 0, 0);
            }
        }).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
    }
    doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
    spyFetcher.setupLocalDiskFetch(host);
    verify(scheduler, times(0)).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
    verify(spyFetcher).putBackRemainingMapOutputs(host);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Matchers.anyString(org.mockito.Matchers.anyString) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Path(org.apache.hadoop.fs.Path) InputContext(org.apache.tez.runtime.api.InputContext) TezCounters(org.apache.tez.common.counters.TezCounters) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 7 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestFetcher method testAsyncWithException.

@Test
@SuppressWarnings("unchecked")
public void testAsyncWithException() throws Exception {
    Configuration conf = new TezConfiguration();
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 3000);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 3000);
    ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
    MergeManager merger = mock(MergeManager.class);
    Shuffle shuffle = mock(Shuffle.class);
    TezCounters counters = new TezCounters();
    InputContext inputContext = mock(InputContext.class);
    when(inputContext.getCounters()).thenReturn(counters);
    when(inputContext.getSourceVertexName()).thenReturn("");
    JobTokenSecretManager jobMgr = mock(JobTokenSecretManager.class);
    doReturn(new byte[10]).when(jobMgr).computeHash(any(byte[].class));
    HttpConnectionParams httpConnectionParams = ShuffleUtils.getHttpConnectionParams(conf);
    final MapHost host = new MapHost(HOST, PORT, 1, 1);
    FetcherOrderedGrouped mockFetcher = new FetcherOrderedGrouped(httpConnectionParams, scheduler, merger, shuffle, jobMgr, false, 0, null, conf, false, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, true, false, true, false);
    final FetcherOrderedGrouped fetcher = spy(mockFetcher);
    fetcher.remaining = new LinkedHashMap<String, InputAttemptIdentifier>();
    final List<InputAttemptIdentifier> srcAttempts = Arrays.asList(new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0"), new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1"), new InputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3"));
    doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
    try {
        long currentIOErrors = ioErrsCounter.getValue();
        boolean connected = fetcher.setupConnection(host, srcAttempts);
        Assert.assertTrue(connected == false);
        // Ensure that counters are incremented (i.e it followed the exception codepath)
        Assert.assertTrue(ioErrsCounter.getValue() > currentIOErrors);
    } catch (IOException e) {
        fail();
    }
}
Also used : HttpConnectionParams(org.apache.tez.http.HttpConnectionParams) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Matchers.anyString(org.mockito.Matchers.anyString) IOException(java.io.IOException) TezCounters(org.apache.tez.common.counters.TezCounters) JobTokenSecretManager(org.apache.tez.common.security.JobTokenSecretManager) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 8 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestFetcher method testSetupLocalDiskFetch.

@Test(timeout = 5000)
public void testSetupLocalDiskFetch() throws Exception {
    Configuration conf = new TezConfiguration();
    ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
    MergeManager merger = mock(MergeManager.class);
    Shuffle shuffle = mock(Shuffle.class);
    InputContext inputContext = mock(InputContext.class);
    when(inputContext.getCounters()).thenReturn(new TezCounters());
    when(inputContext.getSourceVertexName()).thenReturn("");
    MapHost host = new MapHost(HOST, PORT, 1, 1);
    FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
    FetcherOrderedGrouped spyFetcher = spy(fetcher);
    final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1));
    final int FIRST_FAILED_ATTEMPT_IDX = 2;
    final int SECOND_FAILED_ATTEMPT_IDX = 4;
    final int[] sucessfulAttemptsIndexes = { 0, 1, 3 };
    doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
    final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
    for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
        for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
            ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
            pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
        }
    }
    doAnswer(new Answer<InputAttemptIdentifier>() {

        @Override
        public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            String path = (String) args[0];
            int reduceId = (int) args[1];
            return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
        }
    }).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
    doAnswer(new Answer<MapOutput>() {

        @Override
        public MapOutput answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            MapOutput mapOutput = mock(MapOutput.class);
            doReturn(MapOutput.Type.DISK_DIRECT).when(mapOutput).getType();
            doReturn(args[0]).when(mapOutput).getAttemptIdentifier();
            return mapOutput;
        }
    }).when(spyFetcher).getMapOutputForDirectDiskFetch(any(InputAttemptIdentifier.class), any(Path.class), any(TezIndexRecord.class));
    doAnswer(new Answer<Path>() {

        @Override
        public Path answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
        }
    }).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
    for (int i = 0; i < host.getPartitionCount(); i++) {
        doAnswer(new Answer<TezIndexRecord>() {

            @Override
            public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
                Object[] args = invocation.getArguments();
                String pathComponent = (String) args[0];
                int len = pathComponent.length();
                long p = Long.valueOf(pathComponent.substring(len - 1, len));
                if (p == FIRST_FAILED_ATTEMPT_IDX || p == SECOND_FAILED_ATTEMPT_IDX) {
                    throw new IOException("failing to simulate failure case");
                }
                // match with params for copySucceeded below.
                return new TezIndexRecord(p * 10, (p + 1) * 1000, (p + 2) * 100);
            }
        }).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
    }
    doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
    doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
    doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
    spyFetcher.setupLocalDiskFetch(host);
    // should have exactly 3 success and 1 failure.
    for (int i : sucessfulAttemptsIndexes) {
        for (int j = 0; j < host.getPartitionCount(); j++) {
            verifyCopySucceeded(scheduler, host, srcAttempts, i, j);
        }
    }
    verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
    verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
    verify(spyFetcher).putBackRemainingMapOutputs(host);
    verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
    verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Matchers.anyString(org.mockito.Matchers.anyString) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Path(org.apache.hadoop.fs.Path) InputContext(org.apache.tez.runtime.api.InputContext) IOException(java.io.IOException) TezCounters(org.apache.tez.common.counters.TezCounters) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 9 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestShuffle method testSchedulerTerminatesOnException.

@Test(timeout = 10000)
public void testSchedulerTerminatesOnException() throws IOException, InterruptedException {
    InputContext inputContext = createTezInputContext();
    TezConfiguration conf = new TezConfiguration();
    conf.setLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 300000l);
    Shuffle shuffle = new Shuffle(inputContext, conf, 1, 3000000l);
    try {
        shuffle.run();
        ShuffleScheduler scheduler = shuffle.scheduler;
        MergeManager mergeManager = shuffle.merger;
        assertFalse(scheduler.isShutdown());
        assertFalse(mergeManager.isShutdown());
        String exceptionMessage = "Simulating fetch failure";
        shuffle.reportException(new IOException(exceptionMessage));
        while (!scheduler.isShutdown()) {
            Thread.sleep(100l);
        }
        assertTrue(scheduler.isShutdown());
        while (!mergeManager.isShutdown()) {
            Thread.sleep(100l);
        }
        assertTrue(mergeManager.isShutdown());
        ArgumentCaptor<Throwable> throwableArgumentCaptor = ArgumentCaptor.forClass(Throwable.class);
        ArgumentCaptor<String> stringArgumentCaptor = ArgumentCaptor.forClass(String.class);
        verify(inputContext, times(1)).reportFailure(eq(TaskFailureType.NON_FATAL), throwableArgumentCaptor.capture(), stringArgumentCaptor.capture());
        Throwable t = throwableArgumentCaptor.getValue();
        assertTrue(t.getCause().getMessage().contains(exceptionMessage));
    } finally {
        shuffle.shutdown();
    }
}
Also used : InputContext(org.apache.tez.runtime.api.InputContext) Matchers.anyString(org.mockito.Matchers.anyString) IOException(java.io.IOException) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 10 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestShuffle method createTezInputContext.

private InputContext createTezInputContext() throws IOException {
    ApplicationId applicationId = ApplicationId.newInstance(1, 1);
    InputContext inputContext = mock(InputContext.class);
    doReturn(applicationId).when(inputContext).getApplicationId();
    doReturn("sourceVertex").when(inputContext).getSourceVertexName();
    when(inputContext.getCounters()).thenReturn(new TezCounters());
    ExecutionContext executionContext = new ExecutionContextImpl("localhost");
    doReturn(executionContext).when(inputContext).getExecutionContext();
    ByteBuffer shuffleBuffer = ByteBuffer.allocate(4).putInt(0, 4);
    doReturn(shuffleBuffer).when(inputContext).getServiceProviderMetaData(anyString());
    Token<JobTokenIdentifier> sessionToken = new Token<JobTokenIdentifier>(new JobTokenIdentifier(new Text("text")), new JobTokenSecretManager());
    ByteBuffer tokenBuffer = TezCommonUtils.serializeServiceData(sessionToken);
    doReturn(tokenBuffer).when(inputContext).getServiceConsumerMetaData(anyString());
    when(inputContext.createTezFrameworkExecutorService(anyInt(), anyString())).thenAnswer(new Answer<ExecutorService>() {

        @Override
        public ExecutorService answer(InvocationOnMock invocation) throws Throwable {
            return sharedExecutor.createExecutorService(invocation.getArgumentAt(0, Integer.class), invocation.getArgumentAt(1, String.class));
        }
    });
    return inputContext;
}
Also used : ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) InputContext(org.apache.tez.runtime.api.InputContext) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) Token(org.apache.hadoop.security.token.Token) Text(org.apache.hadoop.io.Text) ByteBuffer(java.nio.ByteBuffer) TezCounters(org.apache.tez.common.counters.TezCounters) ExecutionContext(org.apache.tez.runtime.api.ExecutionContext) JobTokenSecretManager(org.apache.tez.common.security.JobTokenSecretManager) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ExecutorService(java.util.concurrent.ExecutorService) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Aggregations

InputContext (org.apache.tez.runtime.api.InputContext)65 Test (org.junit.Test)47 Configuration (org.apache.hadoop.conf.Configuration)30 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)28 TezCounters (org.apache.tez.common.counters.TezCounters)19 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)18 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)17 IOException (java.io.IOException)16 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)16 Event (org.apache.tez.runtime.api.Event)14 LinkedList (java.util.LinkedList)12 Path (org.apache.hadoop.fs.Path)12 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)10 InvocationOnMock (org.mockito.invocation.InvocationOnMock)10 ExecutorService (java.util.concurrent.ExecutorService)9 OutputContext (org.apache.tez.runtime.api.OutputContext)9 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)8 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)8 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)8 Text (org.apache.hadoop.io.Text)7