Search in sources :

Example 36 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestDefaultSorter method createTezOutputContext.

private OutputContext createTezOutputContext() throws IOException {
    String[] workingDirs = { workingDir.toString() };
    UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
    DataOutputBuffer serviceProviderMetaData = new DataOutputBuffer();
    serviceProviderMetaData.writeInt(PORT);
    TezCounters counters = new TezCounters();
    OutputContext context = mock(OutputContext.class);
    ExecutionContext execContext = new ExecutionContextImpl("localhost");
    doReturn(mock(OutputStatisticsReporter.class)).when(context).getStatisticsReporter();
    doReturn(execContext).when(context).getExecutionContext();
    doReturn(counters).when(context).getCounters();
    doReturn(workingDirs).when(context).getWorkDirs();
    doReturn(payLoad).when(context).getUserPayload();
    doReturn(5 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
    doReturn(UniqueID).when(context).getUniqueIdentifier();
    doReturn("v1").when(context).getDestinationVertexName();
    doReturn(ByteBuffer.wrap(serviceProviderMetaData.getData())).when(context).getServiceProviderMetaData(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT));
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            long requestedSize = (Long) invocation.getArguments()[0];
            MemoryUpdateCallbackHandler callback = (MemoryUpdateCallbackHandler) invocation.getArguments()[1];
            callback.memoryAssigned(requestedSize);
            return null;
        }
    }).when(context).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class));
    return context;
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) ByteString(com.google.protobuf.ByteString) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) ExecutionContext(org.apache.tez.runtime.api.ExecutionContext) OutputStatisticsReporter(org.apache.tez.runtime.api.OutputStatisticsReporter) InvocationOnMock(org.mockito.invocation.InvocationOnMock) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) MemoryUpdateCallback(org.apache.tez.runtime.api.MemoryUpdateCallback)

Example 37 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestDefaultSorter method testPartitionStats.

void testPartitionStats(boolean withStats) throws IOException {
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS, withStats);
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 4);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned());
    writeData(sorter, 1000, 10);
    assertTrue(sorter.getNumSpills() == 1);
    verifyCounters(sorter, context);
    if (withStats) {
        assertTrue(sorter.getPartitionStats() != null);
    } else {
        assertTrue(sorter.getPartitionStats() == null);
    }
}
Also used : MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) OutputContext(org.apache.tez.runtime.api.OutputContext)

Example 38 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestDefaultSorter method testSortLimitsWithLargeRecords.

@Test
@Ignore
public /**
 * Disabling this, as this would need 2047 MB io.sort.mb for testing.
 * Provide > 2GB to JVM when running this test to avoid OOM in string generation.
 *
 * Set DefaultSorter.MAX_IO_SORT_MB = 2047 for running this.
 */
void testSortLimitsWithLargeRecords() throws IOException {
    OutputContext context = createTezOutputContext();
    doReturn(2800 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
    // Setting IO_SORT_MB to 2047 MB
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 2047);
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), new MemoryUpdateCallbackHandler());
    DefaultSorter sorter = new DefaultSorter(context, conf, 2, 2047 << 20);
    int i = 0;
    /**
     * If io.sort.mb is not capped to 1800, this would end up throwing
     * "java.lang.ArrayIndexOutOfBoundsException" after many spills.
     * Intentionally made it as infinite loop.
     */
    while (true) {
        Text key = new Text(i + "");
        // Generate random size between 1 MB to 100 MB.
        int valSize = ThreadLocalRandom.current().nextInt(1 * 1024 * 1024, 100 * 1024 * 1024);
        String val = StringInterner.weakIntern(StringUtils.repeat("v", valSize));
        sorter.write(key, new Text(val));
        i = (i + 1) % 10;
    }
}
Also used : MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) Text(org.apache.hadoop.io.Text) ByteString(com.google.protobuf.ByteString) OutputContext(org.apache.tez.runtime.api.OutputContext) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 39 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestDefaultSorter method testEmptyPartitionsHelper.

public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDetails) throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, sendEmptyPartitionDetails);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    int partitions = 50;
    DefaultSorter sorter = new DefaultSorter(context, conf, partitions, handler.getMemoryAssigned());
    writeData(sorter, numKeys, 1000000);
    if (numKeys == 0) {
        assertTrue(sorter.getNumSpills() == 1);
    } else {
        assertTrue(sorter.getNumSpills() == numKeys);
    }
    verifyCounters(sorter, context);
    verifyOutputPermissions(context.getUniqueIdentifier());
    if (sorter.indexCacheList.size() != 0) {
        for (int i = 0; i < sorter.getNumSpills(); i++) {
            TezSpillRecord record = sorter.indexCacheList.get(i);
            for (int j = 0; j < partitions; j++) {
                TezIndexRecord tezIndexRecord = record.getIndex(j);
                if (tezIndexRecord.hasData()) {
                    continue;
                }
                if (sendEmptyPartitionDetails) {
                    Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
                } else {
                    Assert.assertEquals("", tezIndexRecord.getRawLength(), 6);
                }
            }
        }
    }
    Path indexFile = sorter.getFinalIndexFile();
    TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
    for (int i = 0; i < partitions; i++) {
        TezIndexRecord tezIndexRecord = spillRecord.getIndex(i);
        if (tezIndexRecord.hasData()) {
            continue;
        }
        if (sendEmptyPartitionDetails) {
            Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
        } else {
            Assert.assertEquals("Unexpected raw length for " + i + "th partition", 6, tezIndexRecord.getRawLength());
        }
    }
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) Path(org.apache.hadoop.fs.Path) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) OutputContext(org.apache.tez.runtime.api.OutputContext)

Example 40 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestUnorderedPartitionedKVWriter method testBufferSizing.

@Test(timeout = 10000)
public void testBufferSizing() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(10000000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
    final int maxSingleBufferSizeBytes = 2047;
    final long sizePerBuffer = maxSingleBufferSizeBytes - 64 - maxSingleBufferSizeBytes % 4;
    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, false, maxSingleBufferSizeBytes);
    int numOutputs = 10;
    UnorderedPartitionedKVWriter kvWriter = null;
    // Not enough memory so divide into 2 buffers.
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 2048);
    assertEquals(2, kvWriter.numBuffers);
    assertEquals(1024, kvWriter.sizePerBuffer);
    assertEquals(1024, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // allocate exact
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 3);
    assertEquals(3, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // under allocate
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 2 + maxSingleBufferSizeBytes / 2);
    assertEquals(2, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // over allocate
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 2 + maxSingleBufferSizeBytes / 2 + 1);
    assertEquals(3, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(maxSingleBufferSizeBytes / 2 + 1, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // spill limit 1.
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 4 * maxSingleBufferSizeBytes + 1);
    assertEquals(4, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // spill limit 2.
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT, 50);
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 4 * maxSingleBufferSizeBytes + 1);
    assertEquals(4, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(2, kvWriter.spillLimit);
    // Available memory is less than buffer size.
    conf.unset(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES);
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 2048);
    assertEquals(2, kvWriter.numBuffers);
    assertEquals(1024, kvWriter.sizePerBuffer);
    assertEquals(1024, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) ByteString(com.google.protobuf.ByteString) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Aggregations

OutputContext (org.apache.tez.runtime.api.OutputContext)61 Test (org.junit.Test)38 Configuration (org.apache.hadoop.conf.Configuration)19 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)15 MemoryUpdateCallbackHandler (org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler)14 TezCounters (org.apache.tez.common.counters.TezCounters)13 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)13 UserPayload (org.apache.tez.dag.api.UserPayload)13 Path (org.apache.hadoop.fs.Path)12 Event (org.apache.tez.runtime.api.Event)12 ByteString (com.google.protobuf.ByteString)11 DataSinkDescriptor (org.apache.tez.dag.api.DataSinkDescriptor)11 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)11 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 InputContext (org.apache.tez.runtime.api.InputContext)9 BitSet (java.util.BitSet)8 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)8 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)8 ByteBuffer (java.nio.ByteBuffer)6 Text (org.apache.hadoop.io.Text)6