use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class Fetcher method callInternal.
@Override
public FetchResult callInternal() throws Exception {
boolean multiplex = (this.sharedFetchEnabled && this.localDiskFetchEnabled);
if (srcAttempts.size() == 0) {
return new FetchResult(host, port, partition, partitionCount, srcAttempts);
}
populateRemainingMap(srcAttempts);
for (InputAttemptIdentifier in : srcAttemptsRemaining.values()) {
if (in instanceof CompositeInputAttemptIdentifier) {
CompositeInputAttemptIdentifier cin = (CompositeInputAttemptIdentifier) in;
for (int i = 0; i < cin.getInputIdentifierCount(); i++) {
pathToAttemptMap.put(new PathPartition(cin.getPathComponent(), partition + i), cin.expand(i));
}
} else {
pathToAttemptMap.put(new PathPartition(in.getPathComponent(), 0), in);
}
// do only if all of them are shared fetches
multiplex &= in.isShared();
}
if (multiplex) {
Preconditions.checkArgument(partition == 0, "Shared fetches cannot be done for partitioned input" + "- partition is non-zero (%d)", partition);
}
HostFetchResult hostFetchResult;
if (localDiskFetchEnabled && host.equals(localHostname) && port == shufflePort) {
hostFetchResult = setupLocalDiskFetch();
} else if (multiplex) {
hostFetchResult = doSharedFetch();
} else {
hostFetchResult = doHttpFetch();
}
if (hostFetchResult.failedInputs != null && hostFetchResult.failedInputs.length > 0) {
if (!isShutDown.get()) {
LOG.warn("copyInputs failed for tasks " + Arrays.toString(hostFetchResult.failedInputs));
for (InputAttemptIdentifier left : hostFetchResult.failedInputs) {
fetcherCallback.fetchFailed(host, left, hostFetchResult.connectFailed);
}
} else {
if (isDebugEnabled) {
LOG.debug("Ignoring failed fetch reports for " + hostFetchResult.failedInputs.length + " inputs since the fetcher has already been stopped");
}
}
}
shutdown();
// Sanity check
if (hostFetchResult.failedInputs == null && !srcAttemptsRemaining.isEmpty()) {
if (!multiplex) {
throw new IOException("server didn't return all expected map outputs: " + srcAttemptsRemaining.size() + " left.");
}
}
return hostFetchResult.fetchResult;
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestFetcher method testSetupLocalDiskFetchAutoReduce.
@Test(timeout = 5000)
public void testSetupLocalDiskFetchAutoReduce() throws Exception {
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
InputContext inputContext = mock(InputContext.class);
when(inputContext.getCounters()).thenReturn(new TezCounters());
when(inputContext.getSourceVertexName()).thenReturn("");
MapHost host = new MapHost(HOST, PORT, 1, 2);
FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
FetcherOrderedGrouped spyFetcher = spy(fetcher);
final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", host.getPartitionCount()), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", host.getPartitionCount()), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", host.getPartitionCount()), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", host.getPartitionCount()), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", host.getPartitionCount()));
final int FIRST_FAILED_ATTEMPT_IDX = 2;
final int SECOND_FAILED_ATTEMPT_IDX = 4;
final int[] sucessfulAttemptsIndexes = { 0, 1, 3 };
doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
}
}
doAnswer(new Answer<InputAttemptIdentifier>() {
@Override
public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String path = (String) args[0];
int reduceId = (int) args[1];
return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
}
}).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
doAnswer(new Answer<MapOutput>() {
@Override
public MapOutput answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
MapOutput mapOutput = mock(MapOutput.class);
doReturn(MapOutput.Type.DISK_DIRECT).when(mapOutput).getType();
doReturn(args[0]).when(mapOutput).getAttemptIdentifier();
return mapOutput;
}
}).when(spyFetcher).getMapOutputForDirectDiskFetch(any(InputAttemptIdentifier.class), any(Path.class), any(TezIndexRecord.class));
doAnswer(new Answer<Path>() {
@Override
public Path answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
}
}).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
for (int i = 0; i < host.getPartitionCount(); i++) {
doAnswer(new Answer<TezIndexRecord>() {
@Override
public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String pathComponent = (String) args[0];
int len = pathComponent.length();
long p = Long.valueOf(pathComponent.substring(len - 1, len));
if (pathComponent.equals(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).getPathComponent()) || pathComponent.equals(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).getPathComponent())) {
throw new IOException("Thowing exception to simulate failure case");
}
// match with params for copySucceeded below.
return new TezIndexRecord(p * 10, (p + 1) * 1000, (p + 2) * 100);
}
}).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
}
doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1));
spyFetcher.setupLocalDiskFetch(host);
// should have exactly 3 success and 1 failure.
for (int i : sucessfulAttemptsIndexes) {
for (int j = 0; j < host.getPartitionCount(); j++) {
verifyCopySucceeded(scheduler, host, srcAttempts, i, j);
}
}
verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(1), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1), host, true, false, true);
verify(spyFetcher).putBackRemainingMapOutputs(host);
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method _testReducerHealth_1.
public void _testReducerHealth_1(Configuration conf) throws IOException {
long startTime = System.currentTimeMillis() - 500000;
Shuffle shuffle = mock(Shuffle.class);
final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle, conf);
int totalProducerNodes = 20;
// Generate 320 events
for (int i = 0; i < 320; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
}
// 100 succeeds
for (int i = 0; i < 100; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
}
// 99 fails
for (int i = 100; i < 199; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true, false);
}
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(200, 0, "attempt_");
// Should fail here and report exception as reducer is not healthy
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (200 % totalProducerNodes), 10000, 200, 1), false, true, false);
int minFailurePerHost = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MIN_FAILURES_PER_HOST, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MIN_FAILURES_PER_HOST_DEFAULT);
if (minFailurePerHost <= 4) {
// As per test threshold. Should fail & retrigger shuffle
verify(shuffle, atLeast(0)).reportException(any(Throwable.class));
} else if (minFailurePerHost > 100) {
// host failure is so high that this would not retrigger shuffle re-execution
verify(shuffle, atLeast(1)).reportException(any(Throwable.class));
}
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testReducerHealth_5.
@Test(timeout = 60000)
public /**
* Scenario
* - Shuffle has progressed enough
* - Last event is yet to arrive
* - Failures start happening after Shuffle has progressed enough
* - no of attempts failing does not exceed maxFailedUniqueFetches (5)
* - Stalled
* Expected result
* - Do not throw errors, as Shuffle is yet to receive inputs
*/
void testReducerHealth_5() throws IOException {
long startTime = System.currentTimeMillis() - 500000;
Shuffle shuffle = mock(Shuffle.class);
final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 320, shuffle);
int totalProducerNodes = 20;
// Generate 319 events (last event has not arrived)
for (int i = 0; i < 319; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i, inputAttemptIdentifier);
}
// 318 succeeds
for (int i = 0; i < 319; i++) {
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_");
MapOutput mapOutput = MapOutput.createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), 100, 200, startTime + (i * 100), mapOutput, false);
}
// 1 fails (last fetch)
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(318, 0, "attempt_");
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), false, true, false);
// stall the shuffle
scheduler.lastProgressTime = System.currentTimeMillis() - 1000000;
assertEquals(scheduler.remainingMaps.get(), 1);
// Retry for 3 more times
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), false, true, false);
scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), false, true, false);
// Shuffle has not received the events completely. So do not bail out yet.
verify(shuffle, times(0)).reportException(any(Throwable.class));
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleScheduler method testSimpleFlow.
@Test(timeout = 5000)
public void testSimpleFlow() throws Exception {
InputContext inputContext = createTezInputContext();
Configuration conf = new TezConfiguration();
int numInputs = 10;
Shuffle shuffle = mock(Shuffle.class);
MergeManager mergeManager = mock(MergeManager.class);
final ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName");
ExecutorService executor = Executors.newFixedThreadPool(1);
try {
Future<Void> executorFuture = executor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
scheduler.start();
return null;
}
});
InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs];
for (int i = 0; i < numInputs; i++) {
CompositeInputAttemptIdentifier inputAttemptIdentifier = new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1);
scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier);
identifiers[i] = inputAttemptIdentifier;
}
MapHost[] mapHosts = new MapHost[numInputs];
int count = 0;
for (MapHost mh : scheduler.mapLocations.values()) {
mapHosts[count++] = mh;
}
for (int i = 0; i < numInputs; i++) {
MapOutput mapOutput = MapOutput.createMemoryMapOutput(identifiers[i], mock(FetchedInputAllocatorOrderedGrouped.class), 100, false);
scheduler.copySucceeded(identifiers[i], mapHosts[i], 20, 25, 100, mapOutput, false);
scheduler.freeHost(mapHosts[i]);
}
verify(inputContext, atLeast(numInputs)).notifyProgress();
// Ensure the executor exits, and without an error.
executorFuture.get();
} finally {
scheduler.close();
executor.shutdownNow();
}
}
Aggregations