Search in sources :

Example 1 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestShuffleUtils method createTezOutputContext.

private OutputContext createTezOutputContext() throws IOException {
    ApplicationId applicationId = ApplicationId.newInstance(1, 1);
    OutputContext outputContext = mock(OutputContext.class);
    ExecutionContextImpl executionContext = mock(ExecutionContextImpl.class);
    doReturn("localhost").when(executionContext).getHostName();
    doReturn(executionContext).when(outputContext).getExecutionContext();
    DataOutputBuffer serviceProviderMetaData = new DataOutputBuffer();
    serviceProviderMetaData.writeInt(80);
    doReturn(ByteBuffer.wrap(serviceProviderMetaData.getData())).when(outputContext).getServiceProviderMetaData(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT));
    doReturn(1).when(outputContext).getTaskVertexIndex();
    doReturn(1).when(outputContext).getOutputIndex();
    doReturn(0).when(outputContext).getDAGAttemptNumber();
    doReturn("destVertex").when(outputContext).getDestinationVertexName();
    when(outputContext.getCounters()).thenReturn(new TezCounters());
    return outputContext;
}
Also used : ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) OutputContext(org.apache.tez.runtime.api.OutputContext) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 2 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestDefaultSorter method testEmptyCaseFileLengthsHelper.

public void testEmptyCaseFileLengthsHelper(int numPartitions, int numKeys, int keyLen, int expectedEmptyPartitions) throws IOException {
    OutputContext context = createTezOutputContext();
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    String auxService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    DefaultSorter sorter = new DefaultSorter(context, conf, numPartitions, handler.getMemoryAssigned());
    try {
        writeData(sorter, numKeys, keyLen);
        List<Event> events = new ArrayList<Event>();
        String pathComponent = (context.getUniqueIdentifier() + "_" + 0);
        ShuffleUtils.generateEventOnSpill(events, true, true, context, 0, sorter.indexCacheList.get(0), 0, true, pathComponent, sorter.getPartitionStats(), sorter.reportDetailedPartitionStats(), auxService, TezCommonUtils.newBestCompressionDeflater());
        CompositeDataMovementEvent compositeDataMovementEvent = (CompositeDataMovementEvent) events.get(1);
        ByteBuffer bb = compositeDataMovementEvent.getUserPayload();
        ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb));
        if (shufflePayload.hasEmptyPartitions()) {
            byte[] emptyPartitionsBytesString = TezCommonUtils.decompressByteStringToByteArray(shufflePayload.getEmptyPartitions());
            BitSet emptyPartitionBitSet = TezUtilsInternal.fromByteArray(emptyPartitionsBytesString);
            Assert.assertTrue("Number of empty partitions did not match!", emptyPartitionBitSet.cardinality() == expectedEmptyPartitions);
        } else {
            Assert.assertTrue(expectedEmptyPartitions == 0);
        }
        // 4 bytes of header + numKeys* 2 *(keydata.length + keyLength.length) + 2 * 1 byte of EOF_MARKER + 4 bytes of checksum
        assertEquals("Unexpected Output File Size!", localFs.getFileStatus(sorter.getFinalOutputFile()).getLen(), numKeys * (4 + (2 * (2 + keyLen)) + 2 + 4));
        assertTrue(sorter.getNumSpills() == 1);
        verifyCounters(sorter, context);
    } catch (IOException ioe) {
        fail(ioe.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) BitSet(java.util.BitSet) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) OutputContext(org.apache.tez.runtime.api.OutputContext) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads)

Example 3 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestDefaultSorter method testSortLimitsWithSmallRecord.

@Test
@Ignore
public /**
 * Disabling this, as this would need 2047 MB sort mb for testing.
 * Set DefaultSorter.MAX_IO_SORT_MB = 20467 for running this.
 */
void testSortLimitsWithSmallRecord() throws IOException {
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, NullWritable.class.getName());
    OutputContext context = createTezOutputContext();
    doReturn(2800 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
    // Setting IO_SORT_MB to 2047 MB
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 2047);
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), new MemoryUpdateCallbackHandler());
    DefaultSorter sorter = new DefaultSorter(context, conf, 2, 2047 << 20);
    // Reset key/value in conf back to Text for other test cases
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
    int i = 0;
    /**
     * If io.sort.mb is not capped to 1800, this would end up throwing
     * "java.lang.ArrayIndexOutOfBoundsException" after many spills.
     * Intentionally made it as infinite loop.
     */
    while (true) {
        // test for the avg record size 2 (in lower spectrum)
        Text key = new Text(i + "");
        sorter.write(key, NullWritable.get());
        i = (i + 1) % 10;
    }
}
Also used : Text(org.apache.hadoop.io.Text) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) NullWritable(org.apache.hadoop.io.NullWritable) OutputContext(org.apache.tez.runtime.api.OutputContext) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 4 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestDefaultSorter method testWithEmptyDataWithFinalMergeDisabled.

@Test(timeout = 30000)
public void testWithEmptyDataWithFinalMergeDisabled() throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    DefaultSorter sorter = new DefaultSorter(context, conf, 5, handler.getMemoryAssigned());
    // no data written. Empty
    try {
        sorter.flush();
        sorter.close();
        assertTrue(sorter.isClosed());
        assertTrue(sorter.getFinalOutputFile().getParent().getName().equalsIgnoreCase(UniqueID + "_0"));
        verifyCounters(sorter, context);
    } catch (Exception e) {
        fail();
    }
}
Also used : MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) OutputContext(org.apache.tez.runtime.api.OutputContext) IOException(java.io.IOException) Test(org.junit.Test)

Example 5 with OutputContext

use of org.apache.tez.runtime.api.OutputContext in project tez by apache.

the class TestDefaultSorter method testWithSingleSpillWithFinalMergeDisabled.

@Test(timeout = 60000)
@SuppressWarnings("unchecked")
public void testWithSingleSpillWithFinalMergeDisabled() throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 4);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned());
    writeData(sorter, 1000, 10);
    assertTrue(sorter.getNumSpills() == 1);
    ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
    verify(context, times(1)).sendEvents(eventCaptor.capture());
    List<Event> events = eventCaptor.getValue();
    for (Event event : events) {
        if (event instanceof CompositeDataMovementEvent) {
            CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) event;
            ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
            assertTrue(shufflePayload.getPathComponent().equalsIgnoreCase(UniqueID + "_0"));
            verifyOutputPermissions(shufflePayload.getPathComponent());
        }
    }
    verifyCounters(sorter, context);
}
Also used : CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) List(java.util.List) ArrayList(java.util.ArrayList) OutputContext(org.apache.tez.runtime.api.OutputContext) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Aggregations

OutputContext (org.apache.tez.runtime.api.OutputContext)61 Test (org.junit.Test)38 Configuration (org.apache.hadoop.conf.Configuration)19 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)15 MemoryUpdateCallbackHandler (org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler)14 TezCounters (org.apache.tez.common.counters.TezCounters)13 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)13 UserPayload (org.apache.tez.dag.api.UserPayload)13 Path (org.apache.hadoop.fs.Path)12 Event (org.apache.tez.runtime.api.Event)12 ByteString (com.google.protobuf.ByteString)11 DataSinkDescriptor (org.apache.tez.dag.api.DataSinkDescriptor)11 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)11 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 InputContext (org.apache.tez.runtime.api.InputContext)9 BitSet (java.util.BitSet)8 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)8 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)8 ByteBuffer (java.nio.ByteBuffer)6 Text (org.apache.hadoop.io.Text)6