Search in sources :

Example 21 with CompositeDataMovementEvent

use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.

the class TestShuffleUtils method testGenerateOnSpillEvent_With_FinalMerge.

@Test
public void testGenerateOnSpillEvent_With_FinalMerge() throws Exception {
    List<Event> events = Lists.newLinkedList();
    Path indexFile = createIndexFile(10, false);
    boolean finalMergeEnabled = true;
    boolean isLastEvent = true;
    int spillId = 0;
    int physicalOutputs = 10;
    String pathComponent = "/attempt_x_y_0/file.out";
    String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    // normal code path where we do final merge all the time
    ShuffleUtils.generateEventOnSpill(events, finalMergeEnabled, isLastEvent, outputContext, spillId, new TezSpillRecord(indexFile, conf), physicalOutputs, true, pathComponent, null, false, auxiliaryService, TezCommonUtils.newBestCompressionDeflater());
    // one for VM
    Assert.assertTrue(events.size() == 2);
    Assert.assertTrue(events.get(0) instanceof VertexManagerEvent);
    Assert.assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
    Assert.assertTrue(cdme.getCount() == physicalOutputs);
    Assert.assertTrue(cdme.getSourceIndexStart() == 0);
    ShuffleUserPayloads.DataMovementEventPayloadProto dmeProto = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    // With final merge, spill details should not be present
    Assert.assertFalse(dmeProto.hasSpillId());
    Assert.assertFalse(dmeProto.hasLastEvent() || dmeProto.getLastEvent());
    byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(dmeProto.getEmptyPartitions());
    BitSet emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
    Assert.assertTrue("emptyPartitionBitSet cardinality (expecting 5) = " + emptyPartitionsBitSet.cardinality(), emptyPartitionsBitSet.cardinality() == 5);
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) ByteString(com.google.protobuf.ByteString) Mockito.anyString(org.mockito.Mockito.anyString) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Example 22 with CompositeDataMovementEvent

use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.

the class TestDefaultSorter method testWithMultipleSpillsWithFinalMergeDisabled.

@Test(timeout = 60000)
@SuppressWarnings("unchecked")
public void testWithMultipleSpillsWithFinalMergeDisabled() throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 4);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES, 1);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned());
    writeData(sorter, 10000, 1000);
    int spillCount = sorter.getNumSpills();
    ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
    verify(context, times(1)).sendEvents(eventCaptor.capture());
    List<Event> events = eventCaptor.getValue();
    int spillIndex = 0;
    for (Event event : events) {
        if (event instanceof CompositeDataMovementEvent) {
            CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) event;
            ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
            assertTrue(shufflePayload.getPathComponent().equalsIgnoreCase(UniqueID + "_" + spillIndex));
            verifyOutputPermissions(shufflePayload.getPathComponent());
            spillIndex++;
        }
    }
    assertTrue(spillIndex == spillCount);
    verifyCounters(sorter, context);
}
Also used : CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) List(java.util.List) ArrayList(java.util.ArrayList) OutputContext(org.apache.tez.runtime.api.OutputContext) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Example 23 with CompositeDataMovementEvent

use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.

the class TestUnorderedPartitionedKVWriter method baseTestWithPipelinedTransfer.

@SuppressWarnings("unchecked")
private void baseTestWithPipelinedTransfer(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    int dagId = 1;
    String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }
    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;
    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
    int sizePerBuffer = kvWriter.sizePerBuffer;
    // IntW + LongW
    int sizePerRecord = 4 + 8;
    // Record + META_OVERHEAD
    int sizePerRecordWithOverhead = sizePerRecord + 12;
    BitSet partitionsWithData = new BitSet(numPartitions);
    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        partitionsWithData.set(partition);
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
    ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
    List<Event> lastEvents = kvWriter.close();
    if (numPartitions == 1) {
        assertEquals(false, kvWriter.skipBuffers);
    }
    // no events are sent to kvWriter upon close with pipelining
    assertTrue(lastEvents.size() == 0);
    verify(outputContext, atLeast(numExpectedSpills)).sendEvents(eventCaptor.capture());
    int numOfCapturedEvents = eventCaptor.getAllValues().size();
    lastEvents = eventCaptor.getAllValues().get(numOfCapturedEvents - 1);
    VertexManagerEvent VMEvent = (VertexManagerEvent) lastEvents.get(0);
    for (int i = 0; i < numOfCapturedEvents; i++) {
        List<Event> events = eventCaptor.getAllValues().get(i);
        if (i < numOfCapturedEvents - 1) {
            assertTrue(events.size() == 1);
            assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
        } else {
            assertTrue(events.size() == 2);
            assertTrue(events.get(0) instanceof VertexManagerEvent);
            assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
        }
    }
    verifyPartitionStats(VMEvent, partitionsWithData);
    verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());
    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    // due to multiple threads, buffers could be merged in chunks in scheduleSpill.
    assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    // No additional spill bytes written when final merge is disabled.
    assertEquals(additionalSpillBytesWritten, 0);
    // No additional spills when final merge is disabled.
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    // No additional spills when final merge is disabled.
    assertEquals(numAdditionalSpillsCounter.getValue(), 0);
    assertTrue(lastEvents.size() > 0);
    // Get the last event
    int index = lastEvents.size() - 1;
    assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    // Ensure that this is the last event
    assertTrue(eventProto.getLastEvent());
    verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
    verify(outputContext, atLeast(1)).notifyProgress();
    // Verify if all spill files are available.
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
    if (numRecordsWritten > 0) {
        int numSpills = kvWriter.numSpills.get();
        for (int i = 0; i < numSpills; i++) {
            Path outputFile = taskOutput.getSpillFileForWrite(i, 10);
            Path indexFile = taskOutput.getSpillIndexFileForWrite(i, 10);
            assertTrue(localFs.exists(outputFile));
            assertTrue(localFs.exists(indexFile));
            assertEquals("Incorrect output permissions", (short) 0640, localFs.getFileStatus(outputFile).getPermission().toShort());
            assertEquals("Incorrect index permissions", (short) 0640, localFs.getFileStatus(indexFile).getPermission().toShort());
        }
    } else {
        return;
    }
}
Also used : TezTaskOutputFiles(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) ByteString(com.google.protobuf.ByteString) Configurable(org.apache.hadoop.conf.Configurable) TezCounter(org.apache.tez.common.counters.TezCounter) List(java.util.List) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TaskFailureType(org.apache.tez.runtime.api.TaskFailureType) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskOutput(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput)

Example 24 with CompositeDataMovementEvent

use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.

the class TestOnFileSortedOutput method testWithSomeEmptyPartition.

@Test(timeout = 5000)
public void testWithSomeEmptyPartition() throws Exception {
    // ensure atleast 2 partitions are available
    partitions = Math.max(2, partitions);
    startSortedOutput(partitions);
    // write random data
    for (int i = 0; i < 2 * partitions; i++) {
        Text key = new Text(new BigInteger(256, rnd).toString());
        Text value = new Text(new BigInteger(256, rnd).toString());
        // skip writing to certain partitions
        if (i % partitions != emptyPartitionIdx) {
            writer.write(key, value);
        }
    }
    List<Event> eventList = sortedOutput.close();
    assertTrue(eventList != null && eventList.size() == 2);
    ShuffleUserPayloads.DataMovementEventPayloadProto payload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(((CompositeDataMovementEvent) eventList.get(1)).getUserPayload()));
    assertEquals(HOST, payload.getHost());
    assertEquals(PORT, payload.getPort());
    assertEquals(UniqueID, payload.getPathComponent());
}
Also used : CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) BigInteger(java.math.BigInteger) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Text(org.apache.hadoop.io.Text) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Example 25 with CompositeDataMovementEvent

use of org.apache.tez.runtime.api.events.CompositeDataMovementEvent in project tez by apache.

the class TestOnFileUnorderedKVOutput method testGeneratedDataMovementEvent.

@Test(timeout = 5000)
public void testGeneratedDataMovementEvent() throws Exception {
    Configuration conf = new Configuration();
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
    OutputContext outputContext = createOutputContext(conf, sharedExecutor);
    UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
    List<Event> events = null;
    events = kvOutput.initialize();
    kvOutput.start();
    assertTrue(events != null && events.size() == 0);
    KeyValueWriter kvWriter = kvOutput.getWriter();
    List<KVPair> data = KVDataGen.generateTestData(true, 0);
    for (KVPair kvp : data) {
        kvWriter.write(kvp.getKey(), kvp.getvalue());
    }
    events = kvOutput.close();
    assertEquals(45, task.getTaskStatistics().getIOStatistics().values().iterator().next().getDataSize());
    assertEquals(5, task.getTaskStatistics().getIOStatistics().values().iterator().next().getItemsProcessed());
    assertTrue(events != null && events.size() == 2);
    CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
    assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
    DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
    assertFalse(shufflePayload.hasEmptyPartitions());
    assertEquals(outputContext.getUniqueIdentifier(), shufflePayload.getPathComponent());
    assertEquals(shufflePort, shufflePayload.getPort());
    assertEquals("localhost", shufflePayload.getHost());
    sharedExecutor.shutdownNow();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) Text(org.apache.hadoop.io.Text) OutputContext(org.apache.tez.runtime.api.OutputContext) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)25 Event (org.apache.tez.runtime.api.Event)17 Test (org.junit.Test)15 VertexManagerEvent (org.apache.tez.runtime.api.events.VertexManagerEvent)13 BitSet (java.util.BitSet)11 OutputContext (org.apache.tez.runtime.api.OutputContext)11 ShuffleUserPayloads (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads)10 ByteString (com.google.protobuf.ByteString)9 Path (org.apache.hadoop.fs.Path)8 ByteBuffer (java.nio.ByteBuffer)7 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)7 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)7 DataMovementEventPayloadProto (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto)7 Configuration (org.apache.hadoop.conf.Configuration)6 Text (org.apache.hadoop.io.Text)6 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)6 List (java.util.List)5 IntWritable (org.apache.hadoop.io.IntWritable)5 Configurable (org.apache.hadoop.conf.Configurable)4 LongWritable (org.apache.hadoop.io.LongWritable)4