use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testShutdown.
@Test(timeout = 5000)
public void testShutdown() throws Exception {
InputContext inputContext = createTezInputContext();
Configuration conf = new TezConfiguration();
int numInputs = 10;
Shuffle shuffle = mock(Shuffle.class);
MergeManager mergeManager = mock(MergeManager.class);
final ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName");
ExecutorService executor = Executors.newFixedThreadPool(1);
try {
Future<Void> executorFuture = executor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
scheduler.start();
return null;
}
});
InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs];
for (int i = 0; i < numInputs; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier);
identifiers[i] = inputAttemptIdentifier;
}
MapHost[] mapHosts = new MapHost[numInputs];
int count = 0;
for (MapHost mh : scheduler.mapLocations.values()) {
mapHosts[count++] = mh;
}
// Copy succeeded for 1 less host
for (int i = 0; i < numInputs - 1; i++) {
MapOutput mapOutput = MapOutput.createMemoryMapOutput(identifiers[i], mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(identifiers[i], mapHosts[i], 20, 25, 100, mapOutput, false);
scheduler.freeHost(mapHosts[i]);
}
scheduler.close();
// Ensure the executor exits, and without an error.
executorFuture.get();
} finally {
scheduler.close();
executor.shutdownNow();
}
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testReducerHealth_2.
@Test(timeout = 60000)
public /**
* Scenario
* - reducer has progressed enough
* - failures start happening after that
* - no of attempts failing exceeds maxFailedUniqueFetches (5)
* - Has not stalled
* Expected result
* - Since reducer is not stalled, it should continue without error
*
* When reducer stalls, wait until enough retries are done and throw exception
*/
void testReducerHealth_2() throws IOException, InterruptedException {
long startTime = System.currentTimeMillis() - 500000;
Shuffle shuffle = mock(Shuffle.class);
final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
int totalProducerNodes = 20;
// Generate 0-200 events
for (int i = 0; i < 200; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
}
assertEquals(320, scheduler.remainingMaps.get());
// Generate 200-320 events with empty partitions
for (int i = 200; i < 320; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
scheduler.copySucceeded(inputAttemptIdentifier, null, 0, 0, 0, null, true);
}
// 120 are successful. so remaining is 200
assertEquals(200, scheduler.remainingMaps.get());
// 200 pending to be downloaded. Download 190.
for (int i = 0; i < 190; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
}
assertEquals(10, scheduler.remainingMaps.get());
// 10 fails
for (int i = 190; i < 200; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
}
// Shuffle has not stalled. so no issues.
verify(scheduler.reporter, times(0)).reportException(any(Throwable.class));
// stall shuffle
scheduler.lastProgressTime = System.currentTimeMillis() - 250000;
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(190, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (190 % totalProducerNodes), 10000, 190, 1), false, true, false);
// Even when it is stalled, need (320 - 300 = 20) * 3 = 60 failures
verify(scheduler.reporter, times(0)).reportException(any(Throwable.class));
assertEquals(11, scheduler.failedShufflesSinceLastCompletion);
// fail to download 50 more times across attempts
for (int i = 190; i < 200; i++) {
inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
}
assertEquals(61, scheduler.failedShufflesSinceLastCompletion);
assertEquals(10, scheduler.remainingMaps.get());
verify(shuffle, atLeast(0)).reportException(any(Throwable.class));
// fail another 30
for (int i = 110; i < 120; i++) {
inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
}
// Should fail now due to fetcherHealthy. (stall has already happened and
// these are the only pending tasks)
verify(shuffle, atLeast(1)).reportException(any(Throwable.class));
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testReducerHealth_7.
@Test(timeout = 60000)
public /**
* Scenario
* - reducer has not progressed enough
* - fetch fails >
* TEZ_RUNTIME_SHUFFLE_ACCEPTABLE_HOST_FETCH_FAILURE_FRACTION
* Expected result
* - fail the reducer
*/
void testReducerHealth_7() throws IOException {
long startTime = System.currentTimeMillis() - 500000;
Shuffle shuffle = mock(Shuffle.class);
final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
int totalProducerNodes = 20;
// Generate 320 events
for (int i = 0; i < 320; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
}
// 100 succeeds
for (int i = 0; i < 100; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
}
// 99 fails
for (int i = 100; i < 199; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
}
verify(shuffle, atLeast(1)).reportException(any(Throwable.class));
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testNumParallelScheduledFetchers.
@Test(timeout = 10000)
public void testNumParallelScheduledFetchers() throws IOException, InterruptedException {
InputContext inputContext = createTezInputContext();
Configuration conf = new TezConfiguration();
// Allow 10 parallel copies at once.
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 10);
int numInputs = 50;
Shuffle shuffle = mock(Shuffle.class);
MergeManager mergeManager = mock(MergeManager.class);
final ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName", true);
Future<Void> executorFuture = null;
ExecutorService executor = Executors.newFixedThreadPool(1);
try {
executorFuture = executor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
scheduler.start();
return null;
}
});
InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs];
// Schedule all copies.
for (int i = 0; i < numInputs; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier);
identifiers[i] = inputAttemptIdentifier;
}
// Sleep for a bit to allow the copies to be scheduled.
Thread.sleep(2000l);
assertEquals(10, scheduler.numFetchersCreated.get());
} finally {
scheduler.close();
if (executorFuture != null) {
executorFuture.cancel(true);
}
executor.shutdownNow();
}
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleInputEventHandlerImpl method testPipelinedShuffleEvents_WithOutOfOrderAttempts.
/**
* In pipelined shuffle, check if processing & exceptions are done correctly when attempts are
* received in out of order fashion (e.g attemptNum 1 arrives before attemptNum 0)
*
* @throws IOException
*/
@Test(timeout = 5000)
public void testPipelinedShuffleEvents_WithOutOfOrderAttempts() throws IOException {
InputContext inputContext = createInputContext();
ShuffleManager shuffleManager = createShuffleManager(inputContext);
FetchedInputAllocator inputAllocator = mock(FetchedInputAllocator.class);
ShuffleInputEventHandlerImpl handler = new ShuffleInputEventHandlerImpl(inputContext, shuffleManager, inputAllocator, null, false, 0, false);
// 0--> 1 with spill id 0 (attemptNum 1). attemptNum 0 is not sent.
Event dme = createDataMovementEvent(true, 0, 1, 0, false, new BitSet(), 4, 1);
handler.handleEvents(Collections.singletonList(dme));
CompositeInputAttemptIdentifier expected = new CompositeInputAttemptIdentifier(1, 1, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1, 1);
verify(shuffleManager, times(1)).addKnownInput(eq(HOST), eq(PORT), eq(expected), eq(0));
// Let attemptNum 1 be scheduled.
shuffleManager.shuffleInfoEventsMap.get(expected.getInputIdentifier()).scheduledForDownload = true;
// Now send attemptNum 0. This should throw exception, because attempt #1 is already added
dme = createDataMovementEvent(true, 0, 1, 0, false, new BitSet(), 4, 0);
handler.handleEvents(Collections.singletonList(dme));
verify(inputContext).killSelf(any(Throwable.class), anyString());
}
Aggregations