use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testReducerHealth_5.
@Test(timeout = 60000)
public /**
* Scenario
* - Shuffle has progressed enough
* - Last event is yet to arrive
* - Failures start happening after Shuffle has progressed enough
* - no of attempts failing does not exceed maxFailedUniqueFetches (5)
* - Stalled
* Expected result
* - Do not throw errors, as Shuffle is yet to receive inputs
*/
void testReducerHealth_5() throws IOException {
long startTime = System.currentTimeMillis() - 500000;
Shuffle shuffle = mock(Shuffle.class);
final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
int totalProducerNodes = 20;
// Generate 319 events (last event has not arrived)
for (int i = 0; i < 319; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
}
// 318 succeeds
for (int i = 0; i < 319; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
}
// 1 fails (last fetch)
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(318, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), false, true, false);
// stall the shuffle
scheduler.lastProgressTime = System.currentTimeMillis() - 1000000;
assertEquals(scheduler.remainingMaps.get(), 1);
// Retry for 3 more times
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), false, true, false);
// Shuffle has not received the events completely. So do not bail out yet.
verify(shuffle, times(0)).reportException(any(Throwable.class));
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testSimpleFlow.
@Test(timeout = 5000)
public void testSimpleFlow() throws Exception {
InputContext inputContext = createTezInputContext();
Configuration conf = new TezConfiguration();
int numInputs = 10;
Shuffle shuffle = mock(Shuffle.class);
MergeManager mergeManager = mock(MergeManager.class);
final ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName");
ExecutorService executor = Executors.newFixedThreadPool(1);
try {
Future<Void> executorFuture = executor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
scheduler.start();
return null;
}
});
InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs];
for (int i = 0; i < numInputs; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier);
identifiers[i] = inputAttemptIdentifier;
}
MapHost[] mapHosts = new MapHost[numInputs];
int count = 0;
for (MapHost mh : scheduler.mapLocations.values()) {
mapHosts[count++] = mh;
}
for (int i = 0; i < numInputs; i++) {
MapOutput mapOutput = MapOutput.createMemoryMapOutput(identifiers[i], mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(identifiers[i], mapHosts[i], 20, 25, 100, mapOutput, false);
scheduler.freeHost(mapHosts[i]);
}
verify(inputContext, atLeast(numInputs)).notifyProgress();
// Ensure the executor exits, and without an error.
executorFuture.get();
} finally {
scheduler.close();
executor.shutdownNow();
}
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testReducerHealth_3.
@Test(timeout = 60000)
public /**
* Scenario
* - reducer has progressed enough
* - failures start happening after that in last fetch
* - no of attempts failing does not exceed maxFailedUniqueFetches (5)
* - Stalled
* Expected result
* - Since reducer is stalled and if failures haven't happened across nodes,
* it should be fine to proceed. AM would restart source task eventually.
*/
void testReducerHealth_3() throws IOException {
long startTime = System.currentTimeMillis() - 500000;
Shuffle shuffle = mock(Shuffle.class);
final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
int totalProducerNodes = 20;
// Generate 320 events
for (int i = 0; i < 320; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
}
// 319 succeeds
for (int i = 0; i < 319; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
}
// 1 fails (last fetch)
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(319, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
// stall the shuffle
scheduler.lastProgressTime = System.currentTimeMillis() - 1000000;
assertEquals(scheduler.remainingMaps.get(), 1);
// Retry for 3 more times
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 310, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), 10000, 310, 1), false, true, false);
// failedShufflesSinceLastCompletion has crossed the limits. Throw error
verify(shuffle, times(0)).reportException(any(Throwable.class));
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testShutdownWithInterrupt.
@Test(timeout = 30000)
public void testShutdownWithInterrupt() throws Exception {
InputContext inputContext = createTezInputContext();
Configuration conf = new TezConfiguration();
int numInputs = 10;
Shuffle shuffle = mock(Shuffle.class);
MergeManager mergeManager = mock(MergeManager.class);
final ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName");
ExecutorService executor = Executors.newFixedThreadPool(1);
Future<Void> executorFuture = executor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
scheduler.start();
return null;
}
});
InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs];
for (int i = 0; i < numInputs; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier);
identifiers[i] = inputAttemptIdentifier;
}
MapHost[] mapHosts = new MapHost[numInputs];
int count = 0;
for (MapHost mh : scheduler.mapLocations.values()) {
mapHosts[count++] = mh;
}
// Copy succeeded for 1 less host
for (int i = 0; i < numInputs - 1; i++) {
MapOutput mapOutput = MapOutput.createMemoryMapOutput(identifiers[i], mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(identifiers[i], mapHosts[i], 20, 25, 100, mapOutput, false);
scheduler.freeHost(mapHosts[i]);
}
try {
// Close the scheduler on different thread to trigger interrupt
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
scheduler.close();
}
});
thread.start();
thread.join();
} finally {
assertTrue("Fetcher executor should be shutdown, but still running", scheduler.hasFetcherExecutorStopped());
executor.shutdownNow();
}
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method _testReducerHealth_6.
public void _testReducerHealth_6(Configuration conf) throws IOException {
long startTime = System.currentTimeMillis() - 500000;
Shuffle shuffle = mock(Shuffle.class);
final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle, conf);
int totalProducerNodes = 20;
// Generate 320 events (last event has not arrived)
for (int i = 0; i < 320; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
}
// 10 succeeds
for (int i = 0; i < 10; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
}
// 5 fetches fail once
for (int i = 10; i < 15; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
}
assertTrue(scheduler.failureCounts.size() >= 5);
assertEquals(scheduler.remainingMaps.get(), 310);
// Do not bail out (number of failures is just 5)
verify(scheduler.reporter, times(0)).reportException(any(Throwable.class));
// 5 fetches fail repeatedly
for (int i = 10; i < 15; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
}
boolean checkFailedFetchSinceLastCompletion = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FAILED_CHECK_SINCE_LAST_COMPLETION, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FAILED_CHECK_SINCE_LAST_COMPLETION_DEFAULT);
if (checkFailedFetchSinceLastCompletion) {
// Now bail out, as Shuffle has crossed the
// failedShufflesSinceLastCompletion limits. (even
// though reducerHeathly is
verify(shuffle, atLeast(1)).reportException(any(Throwable.class));
} else {
// Do not bail out yet.
verify(shuffle, atLeast(0)).reportException(any(Throwable.class));
}
}
Aggregations