use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestUnorderedKVReader method setupReader.
private void setupReader() throws IOException, InterruptedException {
defaultConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
defaultConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
createIFile(outputPath, 1);
final LinkedList<LocalDiskFetchedInput> inputs = new LinkedList<LocalDiskFetchedInput>();
LocalDiskFetchedInput realFetchedInput = new LocalDiskFetchedInput(0, compLen, new InputAttemptIdentifier(0, 0), outputPath, defaultConf, new FetchedInputCallback() {
@Override
public void fetchComplete(FetchedInput fetchedInput) {
}
@Override
public void fetchFailed(FetchedInput fetchedInput) {
}
@Override
public void freeResources(FetchedInput fetchedInput) {
}
});
LocalDiskFetchedInput fetchedInput = spy(realFetchedInput);
doNothing().when(fetchedInput).free();
inputs.add(fetchedInput);
TezCounters counters = new TezCounters();
TezCounter inputRecords = counters.findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);
ShuffleManager manager = mock(ShuffleManager.class);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
return (inputs.isEmpty()) ? null : inputs.remove();
}
}).when(manager).getNextInput();
unorderedKVReader = new UnorderedKVReader<Text, Text>(manager, defaultConf, null, false, -1, -1, inputRecords, mock(InputContext.class));
}
use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestFetcher method testWithRetry.
@Test(timeout = 5000)
@SuppressWarnings("unchecked")
public void testWithRetry() throws Exception {
Configuration conf = new TezConfiguration();
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 3000);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 3000);
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
InputContext inputContext = mock(InputContext.class);
when(inputContext.getCounters()).thenReturn(new TezCounters());
when(inputContext.getSourceVertexName()).thenReturn("");
when(inputContext.getApplicationId()).thenReturn(ApplicationId.newInstance(0, 1));
HttpConnectionParams httpConnectionParams = ShuffleUtils.getHttpConnectionParams(conf);
final MapHost host = new MapHost(HOST, PORT, 1, 1);
FetcherOrderedGrouped mockFetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, false, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
final FetcherOrderedGrouped fetcher = spy(mockFetcher);
final List<InputAttemptIdentifier> srcAttempts = Arrays.asList(new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0"), new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1"), new InputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3"));
doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
doReturn(true).when(fetcher).setupConnection(any(MapHost.class), any(Collection.class));
URL url = ShuffleUtils.constructInputURL("http://" + HOST + ":" + PORT + "/mapOutput?job=job_123&&reduce=1&map=", srcAttempts, false);
fetcher.httpConnection = new FakeHttpConnection(url, null, "", null);
doAnswer(new Answer<MapOutput>() {
@Override
public MapOutput answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
MapOutput mapOutput = mock(MapOutput.class);
doReturn(MapOutput.Type.MEMORY).when(mapOutput).getType();
doReturn(args[0]).when(mapOutput).getAttemptIdentifier();
return mapOutput;
}
}).when(merger).reserve(any(InputAttemptIdentifier.class), anyInt(), anyInt(), anyInt());
// Create read timeout when reading data
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
// Emulate host down for 4 seconds.
Thread.sleep(4000);
doReturn(false).when(fetcher).setupConnection(any(MapHost.class), any(Collection.class));
// Throw IOException when fetcher tries to connect again to the same node
throw new FetcherReadTimeoutException("creating fetcher socket read timeout exception");
}
}).when(fetcher).copyMapOutput(any(MapHost.class), any(DataInputStream.class), any(InputAttemptIdentifier.class));
try {
fetcher.copyFromHost(host);
} catch (IOException e) {
// ignore
}
// setup connection should be called twice (1 for connect and another for retry)
verify(fetcher, times(2)).setupConnection(any(MapHost.class), any(Collection.class));
// since copyMapOutput consistently fails, it should call copyFailed once
verify(scheduler, times(1)).copyFailed(any(InputAttemptIdentifier.class), any(MapHost.class), anyBoolean(), anyBoolean(), anyBoolean());
verify(fetcher, times(1)).putBackRemainingMapOutputs(any(MapHost.class));
verify(scheduler, times(3)).putBackKnownMapOutput(any(MapHost.class), any(InputAttemptIdentifier.class));
// Verify by stopping the fetcher abruptly
try {
// flag to indicate fetcher stopped
fetcher.stopped = false;
fetcher.copyFromHost(host);
verify(fetcher, times(2)).putBackRemainingMapOutputs(any(MapHost.class));
} catch (IOException e) {
// ignore
}
}
use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestFetcher method testLocalFetchModeSetting1.
@Test(timeout = 5000)
public void testLocalFetchModeSetting1() throws Exception {
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
InputContext inputContext = mock(InputContext.class);
doReturn(new TezCounters()).when(inputContext).getCounters();
doReturn("src vertex").when(inputContext).getSourceVertexName();
final boolean ENABLE_LOCAL_FETCH = true;
final boolean DISABLE_LOCAL_FETCH = false;
MapHost mapHost = new MapHost(HOST, PORT, 0, 1);
FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, "src vertex", mapHost, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
// when local mode is enabled and host and port matches use local fetch
FetcherOrderedGrouped spyFetcher = spy(fetcher);
doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
spyFetcher.fetchNext();
verify(spyFetcher, times(1)).setupLocalDiskFetch(mapHost);
verify(spyFetcher, never()).copyFromHost(any(MapHost.class));
// if hostname does not match use http
mapHost = new MapHost(HOST + "_OTHER", PORT, 0, 1);
fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, "src vertex", mapHost, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
spyFetcher = spy(fetcher);
doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
spyFetcher.fetchNext();
verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
verify(spyFetcher, times(1)).copyFromHost(mapHost);
// if port does not match use http
mapHost = new MapHost(HOST, PORT + 1, 0, 1);
fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, "src vertex", mapHost, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
spyFetcher = spy(fetcher);
doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
spyFetcher.fetchNext();
verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
verify(spyFetcher, times(1)).copyFromHost(mapHost);
// if local fetch is not enabled
mapHost = new MapHost(HOST, PORT, 0, 1);
fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, DISABLE_LOCAL_FETCH, HOST, PORT, "src vertex", mapHost, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
spyFetcher = spy(fetcher);
doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
spyFetcher.fetchNext();
verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
verify(spyFetcher, times(1)).copyFromHost(mapHost);
}
use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestFetcher method testSetupLocalDiskFetchAutoReduce.
@Test(timeout = 5000)
public void testSetupLocalDiskFetchAutoReduce() throws Exception {
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
InputContext inputContext = mock(InputContext.class);
when(inputContext.getCounters()).thenReturn(new TezCounters());
when(inputContext.getSourceVertexName()).thenReturn("");
MapHost host = new MapHost(HOST, PORT, 1, 2);
FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
FetcherOrderedGrouped spyFetcher = spy(fetcher);
final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", host.getPartitionCount()), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", host.getPartitionCount()), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", host.getPartitionCount()), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", host.getPartitionCount()), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", host.getPartitionCount()));
final int FIRST_FAILED_ATTEMPT_IDX = 2;
final int SECOND_FAILED_ATTEMPT_IDX = 4;
final int[] sucessfulAttemptsIndexes = { 0, 1, 3 };
doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
}
}
doAnswer(new Answer<InputAttemptIdentifier>() {
@Override
public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String path = (String) args[0];
int reduceId = (int) args[1];
return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
}
}).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
doAnswer(new Answer<MapOutput>() {
@Override
public MapOutput answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
MapOutput mapOutput = mock(MapOutput.class);
doReturn(MapOutput.Type.DISK_DIRECT).when(mapOutput).getType();
doReturn(args[0]).when(mapOutput).getAttemptIdentifier();
return mapOutput;
}
}).when(spyFetcher).getMapOutputForDirectDiskFetch(any(InputAttemptIdentifier.class), any(Path.class), any(TezIndexRecord.class));
doAnswer(new Answer<Path>() {
@Override
public Path answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
}
}).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
for (int i = 0; i < host.getPartitionCount(); i++) {
doAnswer(new Answer<TezIndexRecord>() {
@Override
public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String pathComponent = (String) args[0];
int len = pathComponent.length();
long p = Long.valueOf(pathComponent.substring(len - 1, len));
if (pathComponent.equals(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).getPathComponent()) || pathComponent.equals(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).getPathComponent())) {
throw new IOException("Thowing exception to simulate failure case");
}
// match with params for copySucceeded below.
return new TezIndexRecord(p * 10, (p + 1) * 1000, (p + 2) * 100);
}
}).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
}
doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1));
spyFetcher.setupLocalDiskFetch(host);
// should have exactly 3 success and 1 failure.
for (int i : sucessfulAttemptsIndexes) {
for (int j = 0; j < host.getPartitionCount(); j++) {
verifyCopySucceeded(scheduler, host, srcAttempts, i, j);
}
}
verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(1), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1), host, true, false, true);
verify(spyFetcher).putBackRemainingMapOutputs(host);
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
}
use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestFetcher method testInputsReturnedOnConnectionException.
@Test(timeout = 5000)
public void testInputsReturnedOnConnectionException() throws Exception {
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
InputContext inputContext = mock(InputContext.class);
doReturn(new TezCounters()).when(inputContext).getCounters();
doReturn("src vertex").when(inputContext).getSourceVertexName();
MapHost mapHost = new MapHost(HOST, PORT, 0, 1);
InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(0, 0, "attempt");
mapHost.addKnownMap(inputAttemptIdentifier);
List<InputAttemptIdentifier> mapsForHost = Lists.newArrayList(inputAttemptIdentifier);
doReturn(mapsForHost).when(scheduler).getMapsForHost(mapHost);
FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, false, HOST, PORT, "src vertex", mapHost, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
fetcher.call();
verify(scheduler).getMapsForHost(mapHost);
verify(scheduler).freeHost(mapHost);
verify(scheduler).putBackKnownMapOutput(mapHost, inputAttemptIdentifier);
}
Aggregations