use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class TestShuffleUtils method createIndexFile.
private Path createIndexFile(int numPartitions, boolean allEmptyPartitions) throws IOException {
Path path = new Path(workingDir, "file.index.out");
TezSpillRecord spillRecord = new TezSpillRecord(numPartitions);
long startOffset = 0;
// compressed
long partLen = 200;
for (int i = 0; i < numPartitions; i++) {
long rawLen = ThreadLocalRandom.current().nextLong(100, 200);
if (i % 2 == 0 || allEmptyPartitions) {
// indicates empty partition, see TEZ-3605
rawLen = 0;
}
TezIndexRecord indexRecord = new TezIndexRecord(startOffset, rawLen, partLen);
startOffset += partLen;
spillRecord.putIndex(indexRecord, i);
}
spillRecord.writeToFile(path, conf);
return path;
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class TestFetcher method testSetupLocalDiskFetchAutoReduce.
@Test(timeout = 5000)
public void testSetupLocalDiskFetchAutoReduce() throws Exception {
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
MergeManager merger = mock(MergeManager.class);
Shuffle shuffle = mock(Shuffle.class);
InputContext inputContext = mock(InputContext.class);
when(inputContext.getCounters()).thenReturn(new TezCounters());
when(inputContext.getSourceVertexName()).thenReturn("");
MapHost host = new MapHost(HOST, PORT, 1, 2);
FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
FetcherOrderedGrouped spyFetcher = spy(fetcher);
final List<CompositeInputAttemptIdentifier> srcAttempts = Arrays.asList(new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", host.getPartitionCount()), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", host.getPartitionCount()), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", host.getPartitionCount()), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", host.getPartitionCount()), new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", host.getPartitionCount()));
final int FIRST_FAILED_ATTEMPT_IDX = 2;
final int SECOND_FAILED_ATTEMPT_IDX = 4;
final int[] sucessfulAttemptsIndexes = { 0, 1, 3 };
doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
final ConcurrentMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier> pathToIdentifierMap = new ConcurrentHashMap<ShuffleScheduler.PathPartition, InputAttemptIdentifier>();
for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) {
for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) {
ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i);
pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i));
}
}
doAnswer(new Answer<InputAttemptIdentifier>() {
@Override
public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String path = (String) args[0];
int reduceId = (int) args[1];
return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId));
}
}).when(scheduler).getIdentifierForFetchedOutput(any(String.class), any(int.class));
doAnswer(new Answer<MapOutput>() {
@Override
public MapOutput answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
MapOutput mapOutput = mock(MapOutput.class);
doReturn(MapOutput.Type.DISK_DIRECT).when(mapOutput).getType();
doReturn(args[0]).when(mapOutput).getAttemptIdentifier();
return mapOutput;
}
}).when(spyFetcher).getMapOutputForDirectDiskFetch(any(InputAttemptIdentifier.class), any(Path.class), any(TezIndexRecord.class));
doAnswer(new Answer<Path>() {
@Override
public Path answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
}
}).when(spyFetcher).getShuffleInputFileName(anyString(), anyString());
for (int i = 0; i < host.getPartitionCount(); i++) {
doAnswer(new Answer<TezIndexRecord>() {
@Override
public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
String pathComponent = (String) args[0];
int len = pathComponent.length();
long p = Long.valueOf(pathComponent.substring(len - 1, len));
if (pathComponent.equals(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).getPathComponent()) || pathComponent.equals(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).getPathComponent())) {
throw new IOException("Thowing exception to simulate failure case");
}
// match with params for copySucceeded below.
return new TezIndexRecord(p * 10, (p + 1) * 1000, (p + 2) * 100);
}
}).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i));
}
doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean());
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0));
doNothing().when(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1));
spyFetcher.setupLocalDiskFetch(host);
// should have exactly 3 success and 1 failure.
for (int i : sucessfulAttemptsIndexes) {
for (int j = 0; j < host.getPartitionCount(); j++) {
verifyCopySucceeded(scheduler, host, srcAttempts, i, j);
}
}
verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(1), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true);
verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1), host, true, false, true);
verify(spyFetcher).putBackRemainingMapOutputs(host);
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX));
verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX));
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class TestDefaultSorter method testEmptyPartitionsHelper.
public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDetails) throws IOException {
OutputContext context = createTezOutputContext();
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, sendEmptyPartitionDetails);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
int partitions = 50;
DefaultSorter sorter = new DefaultSorter(context, conf, partitions, handler.getMemoryAssigned());
writeData(sorter, numKeys, 1000000);
if (numKeys == 0) {
assertTrue(sorter.getNumSpills() == 1);
} else {
assertTrue(sorter.getNumSpills() == numKeys);
}
verifyCounters(sorter, context);
verifyOutputPermissions(context.getUniqueIdentifier());
if (sorter.indexCacheList.size() != 0) {
for (int i = 0; i < sorter.getNumSpills(); i++) {
TezSpillRecord record = sorter.indexCacheList.get(i);
for (int j = 0; j < partitions; j++) {
TezIndexRecord tezIndexRecord = record.getIndex(j);
if (tezIndexRecord.hasData()) {
continue;
}
if (sendEmptyPartitionDetails) {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
} else {
Assert.assertEquals("", tezIndexRecord.getRawLength(), 6);
}
}
}
}
Path indexFile = sorter.getFinalIndexFile();
TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
for (int i = 0; i < partitions; i++) {
TezIndexRecord tezIndexRecord = spillRecord.getIndex(i);
if (tezIndexRecord.hasData()) {
continue;
}
if (sendEmptyPartitionDetails) {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
} else {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 6, tezIndexRecord.getRawLength());
}
}
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class TestFetcher method testSetupLocalDiskFetch.
@Test(timeout = 3000)
public void testSetupLocalDiskFetch() throws Exception {
CompositeInputAttemptIdentifier[] srcAttempts = { new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 5, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1) };
final int FIRST_FAILED_ATTEMPT_IDX = 2;
final int SECOND_FAILED_ATTEMPT_IDX = 4;
final int[] sucessfulAttempts = { 0, 1, 3 };
TezConfiguration conf = new TezConfiguration();
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "true");
int partition = 42;
FetcherCallback callback = mock(FetcherCallback.class);
Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null, ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, true, HOST, PORT, false, true, true);
ArrayList<InputAttemptIdentifier> inputAttemptIdentifiers = new ArrayList<>();
for (CompositeInputAttemptIdentifier compositeInputAttemptIdentifier : srcAttempts) {
for (int i = 0; i < compositeInputAttemptIdentifier.getInputIdentifierCount(); i++) {
inputAttemptIdentifiers.add(compositeInputAttemptIdentifier.expand(i));
}
}
ArrayList<InputAttemptIdentifier> list = new ArrayList<InputAttemptIdentifier>();
list.addAll(Arrays.asList(srcAttempts));
builder.assignWork(HOST, PORT, partition, 1, list);
Fetcher fetcher = spy(builder.build());
for (CompositeInputAttemptIdentifier compositeInputAttemptIdentifier : srcAttempts) {
for (int i = 0; i < compositeInputAttemptIdentifier.getInputIdentifierCount(); i++) {
inputAttemptIdentifiers.add(compositeInputAttemptIdentifier.expand(i));
Fetcher.PathPartition pathPartition = new Fetcher.PathPartition(compositeInputAttemptIdentifier.getPathComponent(), partition + i);
fetcher.getPathToAttemptMap().put(pathPartition, compositeInputAttemptIdentifier.expand(i));
}
}
doAnswer(new Answer<Path>() {
@Override
public Path answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
}
}).when(fetcher).getShuffleInputFileName(anyString(), anyString());
doAnswer(new Answer<TezIndexRecord>() {
@Override
public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
InputAttemptIdentifier srcAttemptId = (InputAttemptIdentifier) args[0];
String pathComponent = srcAttemptId.getPathComponent();
int len = pathComponent.length();
long p = Long.valueOf(pathComponent.substring(len - 1, len));
// Fail the 3rd one and 5th one.
if (p == FIRST_FAILED_ATTEMPT_IDX || p == SECOND_FAILED_ATTEMPT_IDX) {
throw new IOException("failing on 3/5th input to simulate failure case");
}
// match with params for copySucceeded below.
return new TezIndexRecord(p * 10, p * 1000, p * 100);
}
}).when(fetcher).getTezIndexRecord(any(InputAttemptIdentifier.class), anyInt());
doNothing().when(fetcher).shutdown();
doNothing().when(callback).fetchSucceeded(anyString(), any(InputAttemptIdentifier.class), any(FetchedInput.class), anyLong(), anyLong(), anyLong());
doNothing().when(callback).fetchFailed(anyString(), any(InputAttemptIdentifier.class), eq(false));
FetchResult fetchResult = fetcher.call();
verify(fetcher).setupLocalDiskFetch();
// expect 3 sucesses and 2 failures
for (int i : sucessfulAttempts) {
verifyFetchSucceeded(callback, srcAttempts[i], conf);
}
verify(callback).fetchFailed(eq(HOST), eq(srcAttempts[FIRST_FAILED_ATTEMPT_IDX]), eq(false));
verify(callback).fetchFailed(eq(HOST), eq(srcAttempts[SECOND_FAILED_ATTEMPT_IDX]), eq(false));
Assert.assertEquals("fetchResult host", fetchResult.getHost(), HOST);
Assert.assertEquals("fetchResult partition", fetchResult.getPartition(), partition);
Assert.assertEquals("fetchResult port", fetchResult.getPort(), PORT);
// 3nd and 5th attempt failed
List<InputAttemptIdentifier> pendingInputs = Lists.newArrayList(fetchResult.getPendingInputs());
Assert.assertEquals("fetchResult pendingInput size", pendingInputs.size(), 2);
Assert.assertEquals("fetchResult failed attempt", pendingInputs.get(0), srcAttempts[FIRST_FAILED_ATTEMPT_IDX]);
Assert.assertEquals("fetchResult failed attempt", pendingInputs.get(1), srcAttempts[SECOND_FAILED_ATTEMPT_IDX]);
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project hive by apache.
the class TestIndexCache method testLRCPolicy.
@Test
public void testLRCPolicy() throws Exception {
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
System.out.println("seed: " + seed);
fs.delete(p, true);
conf.setInt(INDEX_CACHE_MB, 1);
final int partsPerMap = 1000;
final int bytesPerFile = partsPerMap * 24;
IndexCache cache = new IndexCache(conf);
// fill cache
int totalsize = bytesPerFile;
for (; totalsize < 1024 * 1024; totalsize += bytesPerFile) {
Path f = new Path(p, Integer.toString(totalsize, 36));
writeFile(fs, f, totalsize, partsPerMap);
TezIndexRecord rec = cache.getIndexInformation(Integer.toString(totalsize, 36), r.nextInt(partsPerMap), f, UserGroupInformation.getCurrentUser().getShortUserName());
checkRecord(rec, totalsize);
}
// delete files, ensure cache retains all elem
for (FileStatus stat : fs.listStatus(p)) {
fs.delete(stat.getPath(), true);
}
for (int i = bytesPerFile; i < 1024 * 1024; i += bytesPerFile) {
Path f = new Path(p, Integer.toString(i, 36));
TezIndexRecord rec = cache.getIndexInformation(Integer.toString(i, 36), r.nextInt(partsPerMap), f, UserGroupInformation.getCurrentUser().getShortUserName());
checkRecord(rec, i);
}
// push oldest (bytesPerFile) out of cache
Path f = new Path(p, Integer.toString(totalsize, 36));
writeFile(fs, f, totalsize, partsPerMap);
cache.getIndexInformation(Integer.toString(totalsize, 36), r.nextInt(partsPerMap), f, UserGroupInformation.getCurrentUser().getShortUserName());
fs.delete(f, false);
// oldest fails to read, or error
boolean fnf = false;
try {
cache.getIndexInformation(Integer.toString(bytesPerFile, 36), r.nextInt(partsPerMap), new Path(p, Integer.toString(bytesPerFile)), UserGroupInformation.getCurrentUser().getShortUserName());
} catch (IOException e) {
if (e.getCause() == null || !(e.getCause() instanceof FileNotFoundException)) {
throw e;
} else {
fnf = true;
}
}
if (!fnf)
fail("Failed to push out last entry");
// should find all the other entries
for (int i = bytesPerFile << 1; i < 1024 * 1024; i += bytesPerFile) {
TezIndexRecord rec = cache.getIndexInformation(Integer.toString(i, 36), r.nextInt(partsPerMap), new Path(p, Integer.toString(i, 36)), UserGroupInformation.getCurrentUser().getShortUserName());
checkRecord(rec, i);
}
TezIndexRecord rec = cache.getIndexInformation(Integer.toString(totalsize, 36), r.nextInt(partsPerMap), f, UserGroupInformation.getCurrentUser().getShortUserName());
checkRecord(rec, totalsize);
}
Aggregations