Search in sources :

Example 31 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method testReducerHealth_3.

@Test(timeout = 60000)
public /**
 * Scenario
 *    - reducer has progressed enough
 *    - failures start happening after that in last fetch
 *    - no of attempts failing does not exceed maxFailedUniqueFetches (5)
 *    - Stalled
 * Expected result
 *    - Since reducer is stalled and if failures haven't happened across nodes,
 *    it should be fine to proceed. AM would restart source task eventually.
 */
void testReducerHealth_3() throws IOException {
    long startTime = System.currentTimeMillis() - 500000;
    Shuffle shuffle = mock(Shuffle.class);
    final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
    int totalProducerNodes = 20;
    // Generate 320 events
    for (int i = 0; i < 320; i++) {
        CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
        scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
    }
    // 319 succeeds
    for (int i = 0; i < 319; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
        scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
    }
    // 1 fails (last fetch)
    InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(319, 0, "attempt_");
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
    // stall the shuffle
    scheduler.lastProgressTime = System.currentTimeMillis() - 1000000;
    assertEquals(scheduler.remainingMaps.get(), 1);
    // Retry for 3 more times
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 310, 1), false, true, false);
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 310, 1), false, true, false);
    // failedShufflesSinceLastCompletion has crossed the limits. Throw error
    verify(shuffle, times(0)).reportException(any(Throwable.class));
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Test(org.junit.Test)

Example 32 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method testShutdownWithInterrupt.

@Test(timeout = 30000)
public void testShutdownWithInterrupt() throws Exception {
    InputContext inputContext = createTezInputContext();
    Configuration conf = new TezConfiguration();
    int numInputs = 10;
    Shuffle shuffle = mock(Shuffle.class);
    MergeManager mergeManager = mock(MergeManager.class);
    final ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName");
    ExecutorService executor = Executors.newFixedThreadPool(1);
    Future<Void> executorFuture = executor.submit(new Callable<Void>() {

        @Override
        public Void call() throws Exception {
            scheduler.start();
            return null;
        }
    });
    InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs];
    for (int i = 0; i < numInputs; i++) {
        CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
        scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier);
        identifiers[i] = inputAttemptIdentifier;
    }
    MapHost[] mapHosts = new MapHost[numInputs];
    int count = 0;
    for (MapHost mh : scheduler.mapLocations.values()) {
        mapHosts[count++] = mh;
    }
    // Copy succeeded for 1 less host
    for (int i = 0; i < numInputs - 1; i++) {
        MapOutput mapOutput = MapOutput.createMemoryMapOutput(identifiers[i], mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
        scheduler.copySucceeded(identifiers[i], mapHosts[i], 20, 25, 100, mapOutput, false);
        scheduler.freeHost(mapHosts[i]);
    }
    try {
        // Close the scheduler on different thread to trigger interrupt
        Thread thread = new Thread(new Runnable() {

            @Override
            public void run() {
                scheduler.close();
            }
        });
        thread.start();
        thread.join();
    } finally {
        assertTrue("Fetcher executor should be shutdown, but still running", scheduler.hasFetcherExecutorStopped());
        executor.shutdownNow();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) ExecutorService(java.util.concurrent.ExecutorService) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 33 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method _testReducerHealth_6.

public void _testReducerHealth_6(Configuration conf) throws IOException {
    long startTime = System.currentTimeMillis() - 500000;
    Shuffle shuffle = mock(Shuffle.class);
    final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle, conf);
    int totalProducerNodes = 20;
    // Generate 320 events (last event has not arrived)
    for (int i = 0; i < 320; i++) {
        CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
        scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
    }
    // 10 succeeds
    for (int i = 0; i < 10; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
        scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
    }
    // 5 fetches fail once
    for (int i = 10; i < 15; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
    }
    assertTrue(scheduler.failureCounts.size() >= 5);
    assertEquals(scheduler.remainingMaps.get(), 310);
    // Do not bail out (number of failures is just 5)
    verify(scheduler.reporter, times(0)).reportException(any(Throwable.class));
    // 5 fetches fail repeatedly
    for (int i = 10; i < 15; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
    }
    boolean checkFailedFetchSinceLastCompletion = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FAILED_CHECK_SINCE_LAST_COMPLETION, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FAILED_CHECK_SINCE_LAST_COMPLETION_DEFAULT);
    if (checkFailedFetchSinceLastCompletion) {
        // Now bail out, as Shuffle has crossed the
        // failedShufflesSinceLastCompletion limits. (even
        // though reducerHeathly is
        verify(shuffle, atLeast(1)).reportException(any(Throwable.class));
    } else {
        // Do not bail out yet.
        verify(shuffle, atLeast(0)).reportException(any(Throwable.class));
    }
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier)

Example 34 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestFetcher method testSetupLocalDiskFetch.

@Test(timeout = 3000)
public void testSetupLocalDiskFetch() throws Exception {
    CompositeInputAttemptIdentifier[] srcAttempts = { new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 5, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1) };
    final int FIRST_FAILED_ATTEMPT_IDX = 2;
    final int SECOND_FAILED_ATTEMPT_IDX = 4;
    final int[] sucessfulAttempts = { 0, 1, 3 };
    TezConfiguration conf = new TezConfiguration();
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "true");
    int partition = 42;
    FetcherCallback callback = mock(FetcherCallback.class);
    Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null, ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, true, HOST, PORT, false, true, true);
    ArrayList<InputAttemptIdentifier> inputAttemptIdentifiers = new ArrayList<>();
    for (CompositeInputAttemptIdentifier compositeInputAttemptIdentifier : srcAttempts) {
        for (int i = 0; i < compositeInputAttemptIdentifier.getInputIdentifierCount(); i++) {
            inputAttemptIdentifiers.add(compositeInputAttemptIdentifier.expand(i));
        }
    }
    ArrayList<InputAttemptIdentifier> list = new ArrayList<InputAttemptIdentifier>();
    list.addAll(Arrays.asList(srcAttempts));
    builder.assignWork(HOST, PORT, partition, 1, list);
    Fetcher fetcher = spy(builder.build());
    for (CompositeInputAttemptIdentifier compositeInputAttemptIdentifier : srcAttempts) {
        for (int i = 0; i < compositeInputAttemptIdentifier.getInputIdentifierCount(); i++) {
            inputAttemptIdentifiers.add(compositeInputAttemptIdentifier.expand(i));
            Fetcher.PathPartition pathPartition = new Fetcher.PathPartition(compositeInputAttemptIdentifier.getPathComponent(), partition + i);
            fetcher.getPathToAttemptMap().put(pathPartition, compositeInputAttemptIdentifier.expand(i));
        }
    }
    doAnswer(new Answer<Path>() {

        @Override
        public Path answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
        }
    }).when(fetcher).getShuffleInputFileName(anyString(), anyString());
    doAnswer(new Answer<TezIndexRecord>() {

        @Override
        public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            InputAttemptIdentifier srcAttemptId = (InputAttemptIdentifier) args[0];
            String pathComponent = srcAttemptId.getPathComponent();
            int len = pathComponent.length();
            long p = Long.valueOf(pathComponent.substring(len - 1, len));
            // Fail the 3rd one and 5th one.
            if (p == FIRST_FAILED_ATTEMPT_IDX || p == SECOND_FAILED_ATTEMPT_IDX) {
                throw new IOException("failing on 3/5th input to simulate failure case");
            }
            // match with params for copySucceeded below.
            return new TezIndexRecord(p * 10, p * 1000, p * 100);
        }
    }).when(fetcher).getTezIndexRecord(any(InputAttemptIdentifier.class), anyInt());
    doNothing().when(fetcher).shutdown();
    doNothing().when(callback).fetchSucceeded(anyString(), any(InputAttemptIdentifier.class), any(FetchedInput.class), anyLong(), anyLong(), anyLong());
    doNothing().when(callback).fetchFailed(anyString(), any(InputAttemptIdentifier.class), eq(false));
    FetchResult fetchResult = fetcher.call();
    verify(fetcher).setupLocalDiskFetch();
    // expect 3 sucesses and 2 failures
    for (int i : sucessfulAttempts) {
        verifyFetchSucceeded(callback, srcAttempts[i], conf);
    }
    verify(callback).fetchFailed(eq(HOST), eq(srcAttempts[FIRST_FAILED_ATTEMPT_IDX]), eq(false));
    verify(callback).fetchFailed(eq(HOST), eq(srcAttempts[SECOND_FAILED_ATTEMPT_IDX]), eq(false));
    Assert.assertEquals("fetchResult host", fetchResult.getHost(), HOST);
    Assert.assertEquals("fetchResult partition", fetchResult.getPartition(), partition);
    Assert.assertEquals("fetchResult port", fetchResult.getPort(), PORT);
    // 3nd and 5th attempt failed
    List<InputAttemptIdentifier> pendingInputs = Lists.newArrayList(fetchResult.getPendingInputs());
    Assert.assertEquals("fetchResult pendingInput size", pendingInputs.size(), 2);
    Assert.assertEquals("fetchResult failed attempt", pendingInputs.get(0), srcAttempts[FIRST_FAILED_ATTEMPT_IDX]);
    Assert.assertEquals("fetchResult failed attempt", pendingInputs.get(1), srcAttempts[SECOND_FAILED_ATTEMPT_IDX]);
}
Also used : ArrayList(java.util.ArrayList) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Matchers.anyString(org.mockito.Matchers.anyString) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 35 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleInputEventHandlerOrderedGrouped method testPiplinedShuffleEvents_WithOutofOrderAttempts.

@Test(timeout = 5000)
public void testPiplinedShuffleEvents_WithOutofOrderAttempts() throws IOException, InterruptedException {
    // Process attempt #1 first
    int attemptNum = 1;
    int inputIdx = 1;
    Event dme1 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 0, attemptNum);
    handler.handleEvents(Collections.singletonList(dme1));
    CompositeInputAttemptIdentifier id1 = new CompositeInputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0, 1);
    verify(scheduler, times(1)).addKnownMapOutput(eq(HOST), eq(PORT), eq(1), eq(id1));
    assertTrue("Shuffle info events should not be empty for pipelined shuffle", !scheduler.pipelinedShuffleInfoEventsMap.isEmpty());
    int valuesInMapLocations = scheduler.mapLocations.values().size();
    assertTrue("Maplocations should have values. current size: " + valuesInMapLocations, valuesInMapLocations > 0);
    // start scheduling for download
    scheduler.getMapsForHost(scheduler.mapLocations.values().iterator().next());
    // Attempt #0 comes up. When processing this, it should report exception
    attemptNum = 0;
    inputIdx = 1;
    Event dme2 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 0, attemptNum);
    handler.handleEvents(Collections.singletonList(dme2));
    // task should issue kill request
    verify(scheduler, times(1)).killSelf(any(IOException.class), any(String.class));
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) Event(org.apache.tez.runtime.api.Event) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) IOException(java.io.IOException) Matchers.anyString(org.mockito.Matchers.anyString) ByteString(com.google.protobuf.ByteString) Test(org.junit.Test)

Aggregations

CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)36 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)24 Test (org.junit.Test)24 InputContext (org.apache.tez.runtime.api.InputContext)12 IOException (java.io.IOException)10 Event (org.apache.tez.runtime.api.Event)10 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)8 Configuration (org.apache.hadoop.conf.Configuration)7 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)7 Matchers.anyString (org.mockito.Matchers.anyString)7 LinkedList (java.util.LinkedList)6 Path (org.apache.hadoop.fs.Path)5 InputFailedEvent (org.apache.tez.runtime.api.events.InputFailedEvent)5 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)5 ByteString (com.google.protobuf.ByteString)4 ExecutorService (java.util.concurrent.ExecutorService)4 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)4 InvocationOnMock (org.mockito.invocation.InvocationOnMock)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3