Search in sources :

Example 1 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class TestDefaultSorter method testEmptyCaseFileLengthsHelper.

public void testEmptyCaseFileLengthsHelper(int numPartitions, int numKeys, int keyLen, int expectedEmptyPartitions) throws IOException {
    OutputContext context = createTezOutputContext();
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    String auxService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    DefaultSorter sorter = new DefaultSorter(context, conf, numPartitions, handler.getMemoryAssigned());
    try {
        writeData(sorter, numKeys, keyLen);
        List<Event> events = new ArrayList<Event>();
        String pathComponent = (context.getUniqueIdentifier() + "_" + 0);
        ShuffleUtils.generateEventOnSpill(events, true, true, context, 0, sorter.indexCacheList.get(0), 0, true, pathComponent, sorter.getPartitionStats(), sorter.reportDetailedPartitionStats(), auxService, TezCommonUtils.newBestCompressionDeflater());
        CompositeDataMovementEvent compositeDataMovementEvent = (CompositeDataMovementEvent) events.get(1);
        ByteBuffer bb = compositeDataMovementEvent.getUserPayload();
        ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb));
        if (shufflePayload.hasEmptyPartitions()) {
            byte[] emptyPartitionsBytesString = TezCommonUtils.decompressByteStringToByteArray(shufflePayload.getEmptyPartitions());
            BitSet emptyPartitionBitSet = TezUtilsInternal.fromByteArray(emptyPartitionsBytesString);
            Assert.assertTrue("Number of empty partitions did not match!", emptyPartitionBitSet.cardinality() == expectedEmptyPartitions);
        } else {
            Assert.assertTrue(expectedEmptyPartitions == 0);
        }
        // 4 bytes of header + numKeys* 2 *(keydata.length + keyLength.length) + 2 * 1 byte of EOF_MARKER + 4 bytes of checksum
        assertEquals("Unexpected Output File Size!", localFs.getFileStatus(sorter.getFinalOutputFile()).getLen(), numKeys * (4 + (2 * (2 + keyLen)) + 2 + 4));
        assertTrue(sorter.getNumSpills() == 1);
        verifyCounters(sorter, context);
    } catch (IOException ioe) {
        fail(ioe.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) BitSet(java.util.BitSet) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) OutputContext(org.apache.tez.runtime.api.OutputContext) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads)

Example 2 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class TestDefaultSorter method testSortLimitsWithSmallRecord.

@Test
@Ignore
public /**
 * Disabling this, as this would need 2047 MB sort mb for testing.
 * Set DefaultSorter.MAX_IO_SORT_MB = 20467 for running this.
 */
void testSortLimitsWithSmallRecord() throws IOException {
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, NullWritable.class.getName());
    OutputContext context = createTezOutputContext();
    doReturn(2800 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
    // Setting IO_SORT_MB to 2047 MB
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 2047);
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), new MemoryUpdateCallbackHandler());
    DefaultSorter sorter = new DefaultSorter(context, conf, 2, 2047 << 20);
    // Reset key/value in conf back to Text for other test cases
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
    int i = 0;
    /**
     * If io.sort.mb is not capped to 1800, this would end up throwing
     * "java.lang.ArrayIndexOutOfBoundsException" after many spills.
     * Intentionally made it as infinite loop.
     */
    while (true) {
        // test for the avg record size 2 (in lower spectrum)
        Text key = new Text(i + "");
        sorter.write(key, NullWritable.get());
        i = (i + 1) % 10;
    }
}
Also used : Text(org.apache.hadoop.io.Text) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) NullWritable(org.apache.hadoop.io.NullWritable) OutputContext(org.apache.tez.runtime.api.OutputContext) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class TestDefaultSorter method testWithEmptyDataWithFinalMergeDisabled.

@Test(timeout = 30000)
public void testWithEmptyDataWithFinalMergeDisabled() throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    DefaultSorter sorter = new DefaultSorter(context, conf, 5, handler.getMemoryAssigned());
    // no data written. Empty
    try {
        sorter.flush();
        sorter.close();
        assertTrue(sorter.isClosed());
        assertTrue(sorter.getFinalOutputFile().getParent().getName().equalsIgnoreCase(UniqueID + "_0"));
        verifyCounters(sorter, context);
    } catch (Exception e) {
        fail();
    }
}
Also used : MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) OutputContext(org.apache.tez.runtime.api.OutputContext) IOException(java.io.IOException) Test(org.junit.Test)

Example 4 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class TestDefaultSorter method testWithSingleSpillWithFinalMergeDisabled.

@Test(timeout = 60000)
@SuppressWarnings("unchecked")
public void testWithSingleSpillWithFinalMergeDisabled() throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 4);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned());
    writeData(sorter, 1000, 10);
    assertTrue(sorter.getNumSpills() == 1);
    ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
    verify(context, times(1)).sendEvents(eventCaptor.capture());
    List<Event> events = eventCaptor.getValue();
    for (Event event : events) {
        if (event instanceof CompositeDataMovementEvent) {
            CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) event;
            ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
            assertTrue(shufflePayload.getPathComponent().equalsIgnoreCase(UniqueID + "_0"));
            verifyOutputPermissions(shufflePayload.getPathComponent());
        }
    }
    verifyCounters(sorter, context);
}
Also used : CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) List(java.util.List) ArrayList(java.util.ArrayList) OutputContext(org.apache.tez.runtime.api.OutputContext) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Example 5 with MemoryUpdateCallbackHandler

use of org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler in project tez by apache.

the class TestDefaultSorter method testWithEmptyData.

@Test
public void testWithEmptyData() throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned());
    // no data written. Empty
    try {
        sorter.flush();
        sorter.close();
        assertTrue(sorter.isClosed());
        assertTrue(sorter.getFinalOutputFile().getParent().getName().equalsIgnoreCase(UniqueID));
        verifyCounters(sorter, context);
    } catch (Exception e) {
        fail();
    }
}
Also used : MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) OutputContext(org.apache.tez.runtime.api.OutputContext) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

MemoryUpdateCallbackHandler (org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler)20 OutputContext (org.apache.tez.runtime.api.OutputContext)14 Test (org.junit.Test)7 ByteString (com.google.protobuf.ByteString)5 InvocationOnMock (org.mockito.invocation.InvocationOnMock)5 IOException (java.io.IOException)4 TezCounters (org.apache.tez.common.counters.TezCounters)4 UserPayload (org.apache.tez.dag.api.UserPayload)4 MemoryUpdateCallback (org.apache.tez.runtime.api.MemoryUpdateCallback)4 Mockito.doAnswer (org.mockito.Mockito.doAnswer)4 Answer (org.mockito.stubbing.Answer)4 ArrayList (java.util.ArrayList)3 Event (org.apache.tez.runtime.api.Event)3 OutputStatisticsReporter (org.apache.tez.runtime.api.OutputStatisticsReporter)3 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)3 ExecutionContextImpl (org.apache.tez.runtime.api.impl.ExecutionContextImpl)3 ShuffleUserPayloads (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads)3 ByteBuffer (java.nio.ByteBuffer)2 List (java.util.List)2 Configuration (org.apache.hadoop.conf.Configuration)2