Search in sources :

Example 16 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class TestDefaultSorter method testSortLimitsWithLargeRecords.

@Test
@Ignore
public /**
 * Disabling this, as this would need 2047 MB io.sort.mb for testing.
 * Provide > 2GB to JVM when running this test to avoid OOM in string generation.
 *
 * Set DefaultSorter.MAX_IO_SORT_MB = 2047 for running this.
 */
void testSortLimitsWithLargeRecords() throws IOException {
    OutputContext context = createTezOutputContext();
    doReturn(2800 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
    // Setting IO_SORT_MB to 2047 MB
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 2047);
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), new MemoryUpdateCallbackHandler());
    DefaultSorter sorter = new DefaultSorter(context, conf, 2, 2047 << 20);
    int i = 0;
    /**
     * If io.sort.mb is not capped to 1800, this would end up throwing
     * "java.lang.ArrayIndexOutOfBoundsException" after many spills.
     * Intentionally made it as infinite loop.
     */
    while (true) {
        Text key = new Text(i + "");
        // Generate random size between 1 MB to 100 MB.
        int valSize = ThreadLocalRandom.current().nextInt(1 * 1024 * 1024, 100 * 1024 * 1024);
        String val = StringInterner.weakIntern(StringUtils.repeat("v", valSize));
        sorter.write(key, new Text(val));
        i = (i + 1) % 10;
    }
}
Also used : MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) Text(org.apache.hadoop.io.Text) ByteString(com.google.protobuf.ByteString) OutputContext(org.apache.tez.runtime.api.OutputContext) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 17 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class TestDefaultSorter method testEmptyPartitionsHelper.

public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDetails) throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, sendEmptyPartitionDetails);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    int partitions = 50;
    DefaultSorter sorter = new DefaultSorter(context, conf, partitions, handler.getMemoryAssigned());
    writeData(sorter, numKeys, 1000000);
    if (numKeys == 0) {
        assertTrue(sorter.getNumSpills() == 1);
    } else {
        assertTrue(sorter.getNumSpills() == numKeys);
    }
    verifyCounters(sorter, context);
    verifyOutputPermissions(context.getUniqueIdentifier());
    if (sorter.indexCacheList.size() != 0) {
        for (int i = 0; i < sorter.getNumSpills(); i++) {
            TezSpillRecord record = sorter.indexCacheList.get(i);
            for (int j = 0; j < partitions; j++) {
                TezIndexRecord tezIndexRecord = record.getIndex(j);
                if (tezIndexRecord.hasData()) {
                    continue;
                }
                if (sendEmptyPartitionDetails) {
                    Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
                } else {
                    Assert.assertEquals("", tezIndexRecord.getRawLength(), 6);
                }
            }
        }
    }
    Path indexFile = sorter.getFinalIndexFile();
    TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
    for (int i = 0; i < partitions; i++) {
        TezIndexRecord tezIndexRecord = spillRecord.getIndex(i);
        if (tezIndexRecord.hasData()) {
            continue;
        }
        if (sendEmptyPartitionDetails) {
            Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
        } else {
            Assert.assertEquals("Unexpected raw length for " + i + "th partition", 6, tezIndexRecord.getRawLength());
        }
    }
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) Path(org.apache.hadoop.fs.Path) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) OutputContext(org.apache.tez.runtime.api.OutputContext)

Example 18 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class TestOnFileSortedOutput method createTezOutputContext.

private OutputContext createTezOutputContext() throws IOException {
    String[] workingDirs = { workingDir.toString() };
    UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
    DataOutputBuffer serviceProviderMetaData = new DataOutputBuffer();
    serviceProviderMetaData.writeInt(PORT);
    TezCounters counters = new TezCounters();
    OutputStatisticsReporter reporter = mock(OutputStatisticsReporter.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            outputSize.set((Long) invocation.getArguments()[0]);
            return null;
        }
    }).when(reporter).reportDataSize(anyLong());
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            numRecords.set((Long) invocation.getArguments()[0]);
            return null;
        }
    }).when(reporter).reportItemsProcessed(anyLong());
    OutputContext context = mock(OutputContext.class);
    doReturn(counters).when(context).getCounters();
    doReturn(workingDirs).when(context).getWorkDirs();
    doReturn(payLoad).when(context).getUserPayload();
    doReturn(5 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
    doReturn(UniqueID).when(context).getUniqueIdentifier();
    doReturn("v1").when(context).getDestinationVertexName();
    doReturn(ByteBuffer.wrap(serviceProviderMetaData.getData())).when(context).getServiceProviderMetaData(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT));
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            long requestedSize = (Long) invocation.getArguments()[0];
            MemoryUpdateCallbackHandler callback = (MemoryUpdateCallbackHandler) invocation.getArguments()[1];
            callback.memoryAssigned(requestedSize);
            return null;
        }
    }).when(context).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class));
    ExecutionContext ExecutionContext = mock(ExecutionContext.class);
    doReturn(HOST).when(ExecutionContext).getHostName();
    doReturn(reporter).when(context).getStatisticsReporter();
    doReturn(ExecutionContext).when(context).getExecutionContext();
    return context;
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload) ByteString(com.google.protobuf.ByteString) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) OutputStatisticsReporter(org.apache.tez.runtime.api.OutputStatisticsReporter) ExecutionContext(org.apache.tez.runtime.api.ExecutionContext) InvocationOnMock(org.mockito.invocation.InvocationOnMock) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Matchers.anyLong(org.mockito.Matchers.anyLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) MemoryUpdateCallback(org.apache.tez.runtime.api.MemoryUpdateCallback)

Example 19 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class OutputTestHelpers method createOutputContext.

static OutputContext createOutputContext(Configuration conf, Path workingDir) throws IOException {
    OutputContext ctx = mock(OutputContext.class);
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            long requestedSize = (Long) invocation.getArguments()[0];
            MemoryUpdateCallbackHandler callback = (MemoryUpdateCallbackHandler) invocation.getArguments()[1];
            callback.memoryAssigned(requestedSize);
            return null;
        }
    }).when(ctx).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class));
    doReturn(TezUtils.createUserPayloadFromConf(conf)).when(ctx).getUserPayload();
    doReturn("destinationVertex").when(ctx).getDestinationVertexName();
    doReturn("UUID").when(ctx).getUniqueIdentifier();
    doReturn(new String[] { workingDir.toString() }).when(ctx).getWorkDirs();
    doReturn(200 * 1024 * 1024l).when(ctx).getTotalMemoryAvailableToTask();
    doReturn(new TezCounters()).when(ctx).getCounters();
    OutputStatisticsReporter statsReporter = mock(OutputStatisticsReporter.class);
    doReturn(statsReporter).when(ctx).getStatisticsReporter();
    doReturn(new ExecutionContextImpl("localhost")).when(ctx).getExecutionContext();
    return ctx;
}
Also used : OutputStatisticsReporter(org.apache.tez.runtime.api.OutputStatisticsReporter) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) InvocationOnMock(org.mockito.invocation.InvocationOnMock) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) MemoryUpdateCallback(org.apache.tez.runtime.api.MemoryUpdateCallback) OutputContext(org.apache.tez.runtime.api.OutputContext) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 20 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class OrderedPartitionedKVOutput method initialize.

@Override
public synchronized List<Event> initialize() throws IOException {
    this.startTime = System.nanoTime();
    this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());
    // Initializing this parametr in this conf since it is used in multiple
    // places (wherever LocalDirAllocator is used) - TezTaskOutputFiles,
    // TezMerger, etc.
    this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());
    this.memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
    getContext().requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, getContext().getTotalMemoryAvailableToTask()), memoryUpdateCallbackHandler);
    sendEmptyPartitionDetails = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED_DEFAULT);
    return Collections.emptyList();
}
Also used : MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler)

Aggregations

MemoryUpdateCallbackHandler (org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler)20 OutputContext (org.apache.tez.runtime.api.OutputContext)14 Test (org.junit.Test)7 ByteString (com.google.protobuf.ByteString)5 InvocationOnMock (org.mockito.invocation.InvocationOnMock)5 IOException (java.io.IOException)4 TezCounters (org.apache.tez.common.counters.TezCounters)4 UserPayload (org.apache.tez.dag.api.UserPayload)4 MemoryUpdateCallback (org.apache.tez.runtime.api.MemoryUpdateCallback)4 Mockito.doAnswer (org.mockito.Mockito.doAnswer)4 Answer (org.mockito.stubbing.Answer)4 ArrayList (java.util.ArrayList)3 Event (org.apache.tez.runtime.api.Event)3 OutputStatisticsReporter (org.apache.tez.runtime.api.OutputStatisticsReporter)3 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)3 ExecutionContextImpl (org.apache.tez.runtime.api.impl.ExecutionContextImpl)3 ShuffleUserPayloads (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads)3 ByteBuffer (java.nio.ByteBuffer)2 List (java.util.List)2 Configuration (org.apache.hadoop.conf.Configuration)2