Search in sources :

Example 6 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method testShutdown.

@Test(timeout = 5000)
public void testShutdown() throws Exception {
    InputContext inputContext = createTezInputContext();
    Configuration conf = new TezConfiguration();
    int numInputs = 10;
    Shuffle shuffle = mock(Shuffle.class);
    MergeManager mergeManager = mock(MergeManager.class);
    final ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName");
    ExecutorService executor = Executors.newFixedThreadPool(1);
    try {
        Future<Void> executorFuture = executor.submit(new Callable<Void>() {

            @Override
            public Void call() throws Exception {
                scheduler.start();
                return null;
            }
        });
        InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs];
        for (int i = 0; i < numInputs; i++) {
            CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
            scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier);
            identifiers[i] = inputAttemptIdentifier;
        }
        MapHost[] mapHosts = new MapHost[numInputs];
        int count = 0;
        for (MapHost mh : scheduler.mapLocations.values()) {
            mapHosts[count++] = mh;
        }
        // Copy succeeded for 1 less host
        for (int i = 0; i < numInputs - 1; i++) {
            MapOutput mapOutput = MapOutput.createMemoryMapOutput(identifiers[i], mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
            scheduler.copySucceeded(identifiers[i], mapHosts[i], 20, 25, 100, mapOutput, false);
            scheduler.freeHost(mapHosts[i]);
        }
        scheduler.close();
        // Ensure the executor exits, and without an error.
        executorFuture.get();
    } finally {
        scheduler.close();
        executor.shutdownNow();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) ExecutorService(java.util.concurrent.ExecutorService) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 7 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method testReducerHealth_2.

@Test(timeout = 60000)
public /**
 * Scenario
 *    - reducer has progressed enough
 *    - failures start happening after that
 *    - no of attempts failing exceeds maxFailedUniqueFetches (5)
 *    - Has not stalled
 * Expected result
 *    - Since reducer is not stalled, it should continue without error
 *
 * When reducer stalls, wait until enough retries are done and throw exception
 */
void testReducerHealth_2() throws IOException, InterruptedException {
    long startTime = System.currentTimeMillis() - 500000;
    Shuffle shuffle = mock(Shuffle.class);
    final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
    int totalProducerNodes = 20;
    // Generate 0-200 events
    for (int i = 0; i < 200; i++) {
        CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
        scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
    }
    assertEquals(320, scheduler.remainingMaps.get());
    // Generate 200-320 events with empty partitions
    for (int i = 200; i < 320; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        scheduler.copySucceeded(inputAttemptIdentifier, null, 0, 0, 0, null, true);
    }
    // 120 are successful. so remaining is 200
    assertEquals(200, scheduler.remainingMaps.get());
    // 200 pending to be downloaded. Download 190.
    for (int i = 0; i < 190; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
        scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
    }
    assertEquals(10, scheduler.remainingMaps.get());
    // 10 fails
    for (int i = 190; i < 200; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
    }
    // Shuffle has not stalled. so no issues.
    verify(scheduler.reporter, times(0)).reportException(any(Throwable.class));
    // stall shuffle
    scheduler.lastProgressTime = System.currentTimeMillis() - 250000;
    InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(190, 0, "attempt_");
    scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (190 % totalProducerNodes), 10000, 190, 1), false, true, false);
    // Even when it is stalled, need (320 - 300 = 20) * 3 = 60 failures
    verify(scheduler.reporter, times(0)).reportException(any(Throwable.class));
    assertEquals(11, scheduler.failedShufflesSinceLastCompletion);
    // fail to download 50 more times across attempts
    for (int i = 190; i < 200; i++) {
        inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
    }
    assertEquals(61, scheduler.failedShufflesSinceLastCompletion);
    assertEquals(10, scheduler.remainingMaps.get());
    verify(shuffle, atLeast(0)).reportException(any(Throwable.class));
    // fail another 30
    for (int i = 110; i < 120; i++) {
        inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
    }
    // Should fail now due to fetcherHealthy. (stall has already happened and
    // these are the only pending tasks)
    verify(shuffle, atLeast(1)).reportException(any(Throwable.class));
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Test(org.junit.Test)

Example 8 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method testReducerHealth_7.

@Test(timeout = 60000)
public /**
 * Scenario
 *    - reducer has not progressed enough
 *    - fetch fails >
 *    TEZ_RUNTIME_SHUFFLE_ACCEPTABLE_HOST_FETCH_FAILURE_FRACTION
 * Expected result
 *    - fail the reducer
 */
void testReducerHealth_7() throws IOException {
    long startTime = System.currentTimeMillis() - 500000;
    Shuffle shuffle = mock(Shuffle.class);
    final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
    int totalProducerNodes = 20;
    // Generate 320 events
    for (int i = 0; i < 320; i++) {
        CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
        scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
    }
    // 100 succeeds
    for (int i = 0; i < 100; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
        scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
    }
    // 99 fails
    for (int i = 100; i < 199; i++) {
        InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
        scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
    }
    verify(shuffle, atLeast(1)).reportException(any(Throwable.class));
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Test(org.junit.Test)

Example 9 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleScheduler method testNumParallelScheduledFetchers.

@Test(timeout = 10000)
public void testNumParallelScheduledFetchers() throws IOException, InterruptedException {
    InputContext inputContext = createTezInputContext();
    Configuration conf = new TezConfiguration();
    // Allow 10 parallel copies at once.
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 10);
    int numInputs = 50;
    Shuffle shuffle = mock(Shuffle.class);
    MergeManager mergeManager = mock(MergeManager.class);
    final ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName", true);
    Future<Void> executorFuture = null;
    ExecutorService executor = Executors.newFixedThreadPool(1);
    try {
        executorFuture = executor.submit(new Callable<Void>() {

            @Override
            public Void call() throws Exception {
                scheduler.start();
                return null;
            }
        });
        InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs];
        // Schedule all copies.
        for (int i = 0; i < numInputs; i++) {
            CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
            scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier);
            identifiers[i] = inputAttemptIdentifier;
        }
        // Sleep for a bit to allow the copies to be scheduled.
        Thread.sleep(2000l);
        assertEquals(10, scheduler.numFetchersCreated.get());
    } finally {
        scheduler.close();
        if (executorFuture != null) {
            executorFuture.cancel(true);
        }
        executor.shutdownNow();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Callable(java.util.concurrent.Callable) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) ExecutorService(java.util.concurrent.ExecutorService) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 10 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleInputEventHandlerImpl method testPipelinedShuffleEvents_WithOutOfOrderAttempts.

/**
 * In pipelined shuffle, check if processing & exceptions are done correctly when attempts are
 * received in out of order fashion (e.g attemptNum 1 arrives before attemptNum 0)
 *
 * @throws IOException
 */
@Test(timeout = 5000)
public void testPipelinedShuffleEvents_WithOutOfOrderAttempts() throws IOException {
    InputContext inputContext = createInputContext();
    ShuffleManager shuffleManager = createShuffleManager(inputContext);
    FetchedInputAllocator inputAllocator = mock(FetchedInputAllocator.class);
    ShuffleInputEventHandlerImpl handler = new ShuffleInputEventHandlerImpl(inputContext, shuffleManager, inputAllocator, null, false, 0, false);
    // 0--> 1 with spill id 0 (attemptNum 1).  attemptNum 0 is not sent.
    Event dme = createDataMovementEvent(true, 0, 1, 0, false, new BitSet(), 4, 1);
    handler.handleEvents(Collections.singletonList(dme));
    CompositeInputAttemptIdentifier expected = new CompositeInputAttemptIdentifier(1, 1, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1, 1);
    verify(shuffleManager, times(1)).addKnownInput(eq(HOST), eq(PORT), eq(expected), eq(0));
    // Let attemptNum 1 be scheduled.
    shuffleManager.shuffleInfoEventsMap.get(expected.getInputIdentifier()).scheduledForDownload = true;
    // Now send attemptNum 0.  This should throw exception, because attempt #1 is already added
    dme = createDataMovementEvent(true, 0, 1, 0, false, new BitSet(), 4, 0);
    handler.handleEvents(Collections.singletonList(dme));
    verify(inputContext).killSelf(any(Throwable.class), anyString());
}
Also used : FetchedInputAllocator(org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputContext(org.apache.tez.runtime.api.InputContext) BitSet(java.util.BitSet) Event(org.apache.tez.runtime.api.Event) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) Test(org.junit.Test)

Aggregations

CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)36 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)24 Test (org.junit.Test)24 InputContext (org.apache.tez.runtime.api.InputContext)12 IOException (java.io.IOException)10 Event (org.apache.tez.runtime.api.Event)10 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)8 Configuration (org.apache.hadoop.conf.Configuration)7 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)7 Matchers.anyString (org.mockito.Matchers.anyString)7 LinkedList (java.util.LinkedList)6 Path (org.apache.hadoop.fs.Path)5 InputFailedEvent (org.apache.tez.runtime.api.events.InputFailedEvent)5 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)5 ByteString (com.google.protobuf.ByteString)4 ExecutorService (java.util.concurrent.ExecutorService)4 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)4 InvocationOnMock (org.mockito.invocation.InvocationOnMock)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3