use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestDefaultSorter method createTezOutputContext.
private OutputContext createTezOutputContext() throws IOException {
String[] workingDirs = { workingDir.toString() };
UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
DataOutputBuffer serviceProviderMetaData = new DataOutputBuffer();
serviceProviderMetaData.writeInt(PORT);
TezCounters counters = new TezCounters();
OutputContext context = mock(OutputContext.class);
ExecutionContext execContext = new ExecutionContextImpl("localhost");
doReturn(mock(OutputStatisticsReporter.class)).when(context).getStatisticsReporter();
doReturn(execContext).when(context).getExecutionContext();
doReturn(counters).when(context).getCounters();
doReturn(workingDirs).when(context).getWorkDirs();
doReturn(payLoad).when(context).getUserPayload();
doReturn(5 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
doReturn(UniqueID).when(context).getUniqueIdentifier();
doReturn("v1").when(context).getDestinationVertexName();
doReturn(ByteBuffer.wrap(serviceProviderMetaData.getData())).when(context).getServiceProviderMetaData(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT));
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
long requestedSize = (Long) invocation.getArguments()[0];
MemoryUpdateCallbackHandler callback = (MemoryUpdateCallbackHandler) invocation.getArguments()[1];
callback.memoryAssigned(requestedSize);
return null;
}
}).when(context).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class));
return context;
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestDefaultSorter method testPartitionStats.
void testPartitionStats(boolean withStats) throws IOException {
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS, withStats);
OutputContext context = createTezOutputContext();
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 4);
MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned());
writeData(sorter, 1000, 10);
assertTrue(sorter.getNumSpills() == 1);
verifyCounters(sorter, context);
if (withStats) {
assertTrue(sorter.getPartitionStats() != null);
} else {
assertTrue(sorter.getPartitionStats() == null);
}
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestDefaultSorter method testSortLimitsWithLargeRecords.
@Test
@Ignore
public /**
* Disabling this, as this would need 2047 MB io.sort.mb for testing.
* Provide > 2GB to JVM when running this test to avoid OOM in string generation.
*
* Set DefaultSorter.MAX_IO_SORT_MB = 2047 for running this.
*/
void testSortLimitsWithLargeRecords() throws IOException {
OutputContext context = createTezOutputContext();
doReturn(2800 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
// Setting IO_SORT_MB to 2047 MB
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 2047);
context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), new MemoryUpdateCallbackHandler());
DefaultSorter sorter = new DefaultSorter(context, conf, 2, 2047 << 20);
int i = 0;
/**
* If io.sort.mb is not capped to 1800, this would end up throwing
* "java.lang.ArrayIndexOutOfBoundsException" after many spills.
* Intentionally made it as infinite loop.
*/
while (true) {
Text key = new Text(i + "");
// Generate random size between 1 MB to 100 MB.
int valSize = ThreadLocalRandom.current().nextInt(1 * 1024 * 1024, 100 * 1024 * 1024);
String val = StringInterner.weakIntern(StringUtils.repeat("v", valSize));
sorter.write(key, new Text(val));
i = (i + 1) % 10;
}
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestDefaultSorter method testEmptyPartitionsHelper.
public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDetails) throws IOException {
OutputContext context = createTezOutputContext();
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, sendEmptyPartitionDetails);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
int partitions = 50;
DefaultSorter sorter = new DefaultSorter(context, conf, partitions, handler.getMemoryAssigned());
writeData(sorter, numKeys, 1000000);
if (numKeys == 0) {
assertTrue(sorter.getNumSpills() == 1);
} else {
assertTrue(sorter.getNumSpills() == numKeys);
}
verifyCounters(sorter, context);
verifyOutputPermissions(context.getUniqueIdentifier());
if (sorter.indexCacheList.size() != 0) {
for (int i = 0; i < sorter.getNumSpills(); i++) {
TezSpillRecord record = sorter.indexCacheList.get(i);
for (int j = 0; j < partitions; j++) {
TezIndexRecord tezIndexRecord = record.getIndex(j);
if (tezIndexRecord.hasData()) {
continue;
}
if (sendEmptyPartitionDetails) {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
} else {
Assert.assertEquals("", tezIndexRecord.getRawLength(), 6);
}
}
}
}
Path indexFile = sorter.getFinalIndexFile();
TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
for (int i = 0; i < partitions; i++) {
TezIndexRecord tezIndexRecord = spillRecord.getIndex(i);
if (tezIndexRecord.hasData()) {
continue;
}
if (sendEmptyPartitionDetails) {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
} else {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 6, tezIndexRecord.getRawLength());
}
}
}
use of org.apache.tez.runtime.api.OutputContext in project tez by apache.
the class TestUnorderedPartitionedKVWriter method testBufferSizing.
@Test(timeout = 10000)
public void testBufferSizing() throws IOException {
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
final int maxSingleBufferSizeBytes = 2047;
final long sizePerBuffer = maxSingleBufferSizeBytes - 64 - maxSingleBufferSizeBytes % 4;
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, false, maxSingleBufferSizeBytes);
int numOutputs = 10;
UnorderedPartitionedKVWriter kvWriter = null;
// Not enough memory so divide into 2 buffers.
kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 2048);
assertEquals(2, kvWriter.numBuffers);
assertEquals(1024, kvWriter.sizePerBuffer);
assertEquals(1024, kvWriter.lastBufferSize);
assertEquals(1, kvWriter.numInitializedBuffers);
assertEquals(1, kvWriter.spillLimit);
// allocate exact
kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 3);
assertEquals(3, kvWriter.numBuffers);
assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
assertEquals(1, kvWriter.numInitializedBuffers);
assertEquals(1, kvWriter.spillLimit);
// under allocate
kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 2 + maxSingleBufferSizeBytes / 2);
assertEquals(2, kvWriter.numBuffers);
assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
assertEquals(1, kvWriter.numInitializedBuffers);
assertEquals(1, kvWriter.spillLimit);
// over allocate
kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 2 + maxSingleBufferSizeBytes / 2 + 1);
assertEquals(3, kvWriter.numBuffers);
assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
assertEquals(maxSingleBufferSizeBytes / 2 + 1, kvWriter.lastBufferSize);
assertEquals(1, kvWriter.numInitializedBuffers);
assertEquals(1, kvWriter.spillLimit);
// spill limit 1.
kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 4 * maxSingleBufferSizeBytes + 1);
assertEquals(4, kvWriter.numBuffers);
assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
assertEquals(1, kvWriter.numInitializedBuffers);
assertEquals(1, kvWriter.spillLimit);
// spill limit 2.
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT, 50);
kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 4 * maxSingleBufferSizeBytes + 1);
assertEquals(4, kvWriter.numBuffers);
assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
assertEquals(1, kvWriter.numInitializedBuffers);
assertEquals(2, kvWriter.spillLimit);
// Available memory is less than buffer size.
conf.unset(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES);
kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 2048);
assertEquals(2, kvWriter.numBuffers);
assertEquals(1024, kvWriter.sizePerBuffer);
assertEquals(1024, kvWriter.lastBufferSize);
assertEquals(1, kvWriter.numInitializedBuffers);
assertEquals(1, kvWriter.spillLimit);
}
Aggregations