use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestFetcher method testSetupLocalDiskFetchEmptyPartitions.
@Test(timeout = 5000)
public void testSetupLocalDiskFetchEmptyPartitions() throws Exception {
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
InputContext inputContext = mock(InputContext.class);
when(inputContext.getCounters()).thenReturn(new TezCounters());
when(inputContext.getSourceVertexName()).thenReturn("");
MapHost host = new MapHost(HOST, PORT, 1, 1);
FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
FetcherOrderedGrouped spyFetcher = spy(fetcher);
final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1));
doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
}
}
doAnswer(new Answer<InputAttemptIdentifier>() {
@Override
public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String path = (String) args[0];
int reduceId = (int) args[1];
return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
}
}).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
doAnswer(new Answer<Path>() {
@Override
public Path answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
}
}).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
for (int i = 0; i < host.getPartitionCount(); i++) {
doAnswer(new Answer<TezIndexRecord>() {
@Override
public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String pathComponent = (String) args[0];
int len = pathComponent.length();
long p = Long.valueOf(pathComponent.substring(len - 1, len));
// match with params for copySucceeded below.
return new TezIndexRecord(p * 10, 0, 0);
}
}).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
}
doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
spyFetcher.setupLocalDiskFetch(host);
verify(scheduler, times(0)).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
verify(spyFetcher).putBackRemainingMapOutputs(host);
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestFetcher method testAsyncWithException.
@Test
@SuppressWarnings("unchecked")
public void testAsyncWithException() throws Exception {
Configuration conf = new TezConfiguration();
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 3000);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 3000);
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
TezCounters counters = new TezCounters();
InputContext inputContext = mock(InputContext.class);
when(inputContext.getCounters()).thenReturn(counters);
when(inputContext.getSourceVertexName()).thenReturn("");
JobTokenSecretManager jobMgr = mock(JobTokenSecretManager.class);
doReturn(new byte[10]).when(jobMgr).computeHash(any(byte[].class));
HttpConnectionParams httpConnectionParams = ShuffleUtils.getHttpConnectionParams(conf);
final MapHost host = new MapHost(HOST, PORT, 1, 1);
FetcherOrderedGrouped mockFetcher = new FetcherOrderedGrouped(httpConnectionParams, scheduler, merger, shuffle, jobMgr, false, 0, null, conf, false, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, true, false, true, false);
final FetcherOrderedGrouped fetcher = spy(mockFetcher);
fetcher.remaining = new LinkedHashMap<String, InputAttemptIdentifier>();
final List<InputAttemptIdentifier> srcAttempts = Arrays.asList(new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0"), new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1"), new InputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3"));
doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
try {
long currentIOErrors = ioErrsCounter.getValue();
boolean connected = fetcher.setupConnection(host, srcAttempts);
Assert.assertTrue(connected == false);
// Ensure that counters are incremented (i.e it followed the exception codepath)
Assert.assertTrue(ioErrsCounter.getValue() > currentIOErrors);
} catch (IOException e) {
fail();
}
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestFetcher method testSetupLocalDiskFetch.
@Test(timeout = 5000)
public void testSetupLocalDiskFetch() throws Exception {
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
InputContext inputContext = mock(InputContext.class);
when(inputContext.getCounters()).thenReturn(new TezCounters());
when(inputContext.getSourceVertexName()).thenReturn("");
MapHost host = new MapHost(HOST, PORT, 1, 1);
FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
FetcherOrderedGrouped spyFetcher = spy(fetcher);
final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1));
final int FIRST_FAILED_ATTEMPT_IDX = 2;
final int SECOND_FAILED_ATTEMPT_IDX = 4;
final int[] sucessfulAttemptsIndexes = { 0, 1, 3 };
doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
}
}
doAnswer(new Answer<InputAttemptIdentifier>() {
@Override
public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String path = (String) args[0];
int reduceId = (int) args[1];
return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
}
}).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
doAnswer(new Answer<MapOutput>() {
@Override
public MapOutput answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
MapOutput mapOutput = mock(MapOutput.class);
doReturn(MapOutput.Type.DISK_DIRECT).when(mapOutput).getType();
doReturn(args[0]).when(mapOutput).getAttemptIdentifier();
return mapOutput;
}
}).when(spyFetcher).getMapOutputForDirectDiskFetch(any(InputAttemptIdentifier.class), any(Path.class), any(TezIndexRecord.class));
doAnswer(new Answer<Path>() {
@Override
public Path answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
}
}).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
for (int i = 0; i < host.getPartitionCount(); i++) {
doAnswer(new Answer<TezIndexRecord>() {
@Override
public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String pathComponent = (String) args[0];
int len = pathComponent.length();
long p = Long.valueOf(pathComponent.substring(len - 1, len));
if (p == FIRST_FAILED_ATTEMPT_IDX || p == SECOND_FAILED_ATTEMPT_IDX) {
throw new IOException("failing to simulate failure case");
}
// match with params for copySucceeded below.
return new TezIndexRecord(p * 10, (p + 1) * 1000, (p + 2) * 100);
}
}).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
}
doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
spyFetcher.setupLocalDiskFetch(host);
// should have exactly 3 success and 1 failure.
for (int i : sucessfulAttemptsIndexes) {
for (int j = 0; j < host.getPartitionCount(); j++) {
verifyCopySucceeded(scheduler, host, srcAttempts, i, j);
}
}
verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
verify(spyFetcher).putBackRemainingMapOutputs(host);
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestFetcher method verifyCopySucceeded.
private void verifyCopySucceeded(ShuffleScheduler scheduler, MapHost host, List<CompositeInputAttemptIdentifier> srcAttempts, long p, int j) throws IOException {
// need to verify filename, offsets, sizes wherever they are used.
InputAttemptIdentifier srcAttemptToMatch = srcAttempts.get((int) p).expand(j);
String filenameToMatch = SHUFFLE_INPUT_FILE_PREFIX + srcAttemptToMatch.getPathComponent();
ArgumentCaptor<MapOutput> captureMapOutput = ArgumentCaptor.forClass(MapOutput.class);
verify(scheduler).copySucceeded(eq(srcAttemptToMatch), eq(host), eq((p + 2) * 100), eq((p + 1) * 1000), anyLong(), captureMapOutput.capture(), anyBoolean());
// cannot use the equals of MapOutput as it compares id which is private. so doing it manually
MapOutput m = captureMapOutput.getAllValues().get(0);
Assert.assertTrue(m.getType().equals(MapOutput.Type.DISK_DIRECT) && m.getAttemptIdentifier().equals(srcAttemptToMatch));
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestFetcher method testInputAttemptIdentifierMap.
@Test(timeout = 5000)
public void testInputAttemptIdentifierMap() {
InputAttemptIdentifier[] srcAttempts = { new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0), // duplicate entry
new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0), // pipeline shuffle based identifiers, with multiple attempts
new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0), new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0), new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1), new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 2), new InputAttemptIdentifier(2, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", false, InputAttemptIdentifier.SPILL_INFO.FINAL_MERGE_ENABLED, 0) };
InputAttemptIdentifier[] expectedSrcAttempts = { new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0), // pipeline shuffle based identifiers
new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0), new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0), new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1), new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 2), new InputAttemptIdentifier(2, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", false, InputAttemptIdentifier.SPILL_INFO.FINAL_MERGE_ENABLED, 0) };
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
MapHost mapHost = new MapHost(HOST, PORT, 0, 1);
FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, false, HOST, PORT, "src vertex", mapHost, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
fetcher.populateRemainingMap(new LinkedList<InputAttemptIdentifier>(Arrays.asList(srcAttempts)));
Assert.assertEquals(expectedSrcAttempts.length, fetcher.remaining.size());
Iterator<Entry<String, InputAttemptIdentifier>> iterator = fetcher.remaining.entrySet().iterator();
int count = 0;
while (iterator.hasNext()) {
String key = iterator.next().getKey();
Assert.assertTrue(expectedSrcAttempts[count++].toString().compareTo(key) == 0);
}
}
Aggregations