Search in sources :

Example 1 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestFetcher method testSetupLocalDiskFetchEmptyPartitions.

@Test(timeout = 5000)
public void testSetupLocalDiskFetchEmptyPartitions() throws Exception {
    Configuration conf = new TezConfiguration();
    ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
    MergeManager merger = mock(MergeManager.class);
    Shuffle shuffle = mock(Shuffle.class);
    InputContext inputContext = mock(InputContext.class);
    when(inputContext.getCounters()).thenReturn(new TezCounters());
    when(inputContext.getSourceVertexName()).thenReturn("");
    MapHost host = new MapHost(HOST, PORT, 1, 1);
    FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
    FetcherOrderedGrouped spyFetcher = spy(fetcher);
    final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1));
    doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
    final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
    for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
        for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
            ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
            pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
        }
    }
    doAnswer(new Answer<InputAttemptIdentifier>() {

        @Override
        public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            String path = (String) args[0];
            int reduceId = (int) args[1];
            return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
        }
    }).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
    doAnswer(new Answer<Path>() {

        @Override
        public Path answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
        }
    }).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
    for (int i = 0; i < host.getPartitionCount(); i++) {
        doAnswer(new Answer<TezIndexRecord>() {

            @Override
            public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
                Object[] args = invocation.getArguments();
                String pathComponent = (String) args[0];
                int len = pathComponent.length();
                long p = Long.valueOf(pathComponent.substring(len - 1, len));
                // match with params for copySucceeded below.
                return new TezIndexRecord(p * 10, 0, 0);
            }
        }).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
    }
    doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
    spyFetcher.setupLocalDiskFetch(host);
    verify(scheduler, times(0)).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
    verify(spyFetcher).putBackRemainingMapOutputs(host);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Matchers.anyString(org.mockito.Matchers.anyString) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Path(org.apache.hadoop.fs.Path) InputContext(org.apache.tez.runtime.api.InputContext) TezCounters(org.apache.tez.common.counters.TezCounters) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 2 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestFetcher method testSetupLocalDiskFetch.

@Test(timeout = 5000)
public void testSetupLocalDiskFetch() throws Exception {
    Configuration conf = new TezConfiguration();
    ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
    MergeManager merger = mock(MergeManager.class);
    Shuffle shuffle = mock(Shuffle.class);
    InputContext inputContext = mock(InputContext.class);
    when(inputContext.getCounters()).thenReturn(new TezCounters());
    when(inputContext.getSourceVertexName()).thenReturn("");
    MapHost host = new MapHost(HOST, PORT, 1, 1);
    FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
    FetcherOrderedGrouped spyFetcher = spy(fetcher);
    final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1));
    final int FIRST_FAILED_ATTEMPT_IDX = 2;
    final int SECOND_FAILED_ATTEMPT_IDX = 4;
    final int[] sucessfulAttemptsIndexes = { 0, 1, 3 };
    doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
    final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
    for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
        for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
            ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
            pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
        }
    }
    doAnswer(new Answer<InputAttemptIdentifier>() {

        @Override
        public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            String path = (String) args[0];
            int reduceId = (int) args[1];
            return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
        }
    }).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
    doAnswer(new Answer<MapOutput>() {

        @Override
        public MapOutput answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            MapOutput mapOutput = mock(MapOutput.class);
            doReturn(MapOutput.Type.DISK_DIRECT).when(mapOutput).getType();
            doReturn(args[0]).when(mapOutput).getAttemptIdentifier();
            return mapOutput;
        }
    }).when(spyFetcher).getMapOutputForDirectDiskFetch(any(InputAttemptIdentifier.class), any(Path.class), any(TezIndexRecord.class));
    doAnswer(new Answer<Path>() {

        @Override
        public Path answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
        }
    }).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
    for (int i = 0; i < host.getPartitionCount(); i++) {
        doAnswer(new Answer<TezIndexRecord>() {

            @Override
            public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
                Object[] args = invocation.getArguments();
                String pathComponent = (String) args[0];
                int len = pathComponent.length();
                long p = Long.valueOf(pathComponent.substring(len - 1, len));
                if (p == FIRST_FAILED_ATTEMPT_IDX || p == SECOND_FAILED_ATTEMPT_IDX) {
                    throw new IOException("failing to simulate failure case");
                }
                // match with params for copySucceeded below.
                return new TezIndexRecord(p * 10, (p + 1) * 1000, (p + 2) * 100);
            }
        }).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
    }
    doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
    doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
    doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
    spyFetcher.setupLocalDiskFetch(host);
    // should have exactly 3 success and 1 failure.
    for (int i : sucessfulAttemptsIndexes) {
        for (int j = 0; j < host.getPartitionCount(); j++) {
            verifyCopySucceeded(scheduler, host, srcAttempts, i, j);
        }
    }
    verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
    verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
    verify(spyFetcher).putBackRemainingMapOutputs(host);
    verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
    verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Matchers.anyString(org.mockito.Matchers.anyString) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Path(org.apache.hadoop.fs.Path) InputContext(org.apache.tez.runtime.api.InputContext) IOException(java.io.IOException) TezCounters(org.apache.tez.common.counters.TezCounters) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 3 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestFetcher method verifyCopySucceeded.

private void verifyCopySucceeded(ShuffleScheduler scheduler, MapHost host, List<CompositeInputAttemptIdentifier> srcAttempts, long p, int j) throws IOException {
    // need to verify filename, offsets, sizes wherever they are used.
    InputAttemptIdentifier srcAttemptToMatch = srcAttempts.get((int) p).expand(j);
    String filenameToMatch = SHUFFLE_INPUT_FILE_PREFIX + srcAttemptToMatch.getPathComponent();
    ArgumentCaptor<MapOutput> captureMapOutput = ArgumentCaptor.forClass(MapOutput.class);
    verify(scheduler).copySucceeded(eq(srcAttemptToMatch), eq(host), eq((p + 2) * 100), eq((p + 1) * 1000), anyLong(), captureMapOutput.capture(), anyBoolean());
    // cannot use the equals of MapOutput as it compares id which is private. so doing it manually
    MapOutput m = captureMapOutput.getAllValues().get(0);
    Assert.assertTrue(m.getType().equals(MapOutput.Type.DISK_DIRECT) && m.getAttemptIdentifier().equals(srcAttemptToMatch));
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Matchers.anyString(org.mockito.Matchers.anyString)

Example 4 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method testPenalty.

@Test(timeout = 60000)
public void testPenalty() throws IOException, InterruptedException {
    long startTime = System.currentTimeMillis();
    Shuffle shuffle = mock(Shuffle.class);
    final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 1, shuffle);
    CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(0, 0, "attempt_", 1);
    scheduler.addKnownMapOutput("host0", 10000, 0, inputAttemptIdentifier);
    assertTrue(scheduler.pendingHosts.size() == 1);
    assertTrue(scheduler.pendingHosts.iterator().next().getState() == MapHost.State.PENDING);
    MapHost mapHost = scheduler.pendingHosts.iterator().next();
    // Fails to pull from host0. host0 should be added to penalties
    scheduler.copyFailed(inputAttemptIdentifier, mapHost, false, true, false);
    // Should not get host, as it is added to penalty loop
    MapHost host = scheduler.getHost();
    assertFalse("Host identifier mismatch", (host.getHost() + ":" + host.getPort() + ":" + host.getPartitionId()).equalsIgnoreCase("host0:10000"));
    // Refree thread would release it after INITIAL_PENALTY timeout
    Thread.sleep(ShuffleScheduler.INITIAL_PENALTY + 1000);
    host = scheduler.getHost();
    assertFalse("Host identifier mismatch", (host.getHost() + ":" + host.getPort() + ":" + host.getPartitionId()).equalsIgnoreCase("host0:10000"));
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) Test(org.junit.Test)

Example 5 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method testReducerHealth_4.

@Test(timeout = 60000)
public /**
 * Scenario
 *    - reducer has progressed enough
 *    - failures have happened randomly in nodes, but tasks are completed
 *    - failures start happening after that in last fetch
 *    - no of attempts failing does not exceed maxFailedUniqueFetches (5)
 *    - Stalled
 * Expected result
 *    - reducer is stalled. But since errors are not seen across multiple
 *    nodes, it is left to the AM to retart producer. Do not kill consumer.
 */
void testReducerHealth_4() throws IOException {
    long startTime = System.currentTimeMillis() - 500000;
    Shuffle shuffle = mock(Shuffle.class);
    final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
    int totalProducerNodes = 20;
    // Generate 320 events
    for (int i = 0; i < 320; i++) {
        CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
        scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
    }
    // Tasks fail in 20% of nodes 3 times, but are able to proceed further
    for (int i = 0; i < 64; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
        scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
    }
    // 319 succeeds
    for (int i = 64; i < 319; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
        scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
    }
    // 1 fails (last fetch)
    InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(319, 0, "attempt_");
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
    // stall the shuffle (but within limits)
    scheduler.lastProgressTime = System.currentTimeMillis() - 100000;
    assertEquals(scheduler.remainingMaps.get(), 1);
    // Retry for 3 more times
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
    // failedShufflesSinceLastCompletion has crossed the limits. 20% of other nodes had failures as
    // well. However, it has failed only in one host. So this should proceed
    // until AM decides to restart the producer.
    verify(shuffle, times(0)).reportException(any(Throwable.class));
    // stall the shuffle (but within limits)
    scheduler.lastProgressTime = System.currentTimeMillis() - 300000;
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
    verify(shuffle, times(1)).reportException(any(Throwable.class));
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Test(org.junit.Test)

Aggregations

CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)36 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)24 Test (org.junit.Test)24 InputContext (org.apache.tez.runtime.api.InputContext)12 IOException (java.io.IOException)10 Event (org.apache.tez.runtime.api.Event)10 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)8 Configuration (org.apache.hadoop.conf.Configuration)7 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)7 Matchers.anyString (org.mockito.Matchers.anyString)7 LinkedList (java.util.LinkedList)6 Path (org.apache.hadoop.fs.Path)5 InputFailedEvent (org.apache.tez.runtime.api.events.InputFailedEvent)5 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)5 ByteString (com.google.protobuf.ByteString)4 ExecutorService (java.util.concurrent.ExecutorService)4 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)4 InvocationOnMock (org.mockito.invocation.InvocationOnMock)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3