Search in sources :

Example 81 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestShuffleScheduler method createTezInputContext.

private InputContext createTezInputContext() throws IOException {
    ApplicationId applicationId = ApplicationId.newInstance(1, 1);
    InputContext inputContext = mock(InputContext.class);
    doReturn(applicationId).when(inputContext).getApplicationId();
    doReturn("sourceVertex").when(inputContext).getSourceVertexName();
    when(inputContext.getCounters()).thenReturn(new TezCounters());
    ExecutionContext executionContext = new ExecutionContextImpl("localhost");
    doReturn(executionContext).when(inputContext).getExecutionContext();
    ByteBuffer shuffleBuffer = ByteBuffer.allocate(4).putInt(0, 4);
    doReturn(shuffleBuffer).when(inputContext).getServiceProviderMetaData(anyString());
    Token<JobTokenIdentifier> sessionToken = new Token<JobTokenIdentifier>(new JobTokenIdentifier(new Text("text")), new JobTokenSecretManager());
    ByteBuffer tokenBuffer = TezCommonUtils.serializeServiceData(sessionToken);
    doReturn(tokenBuffer).when(inputContext).getServiceConsumerMetaData(anyString());
    when(inputContext.createTezFrameworkExecutorService(anyInt(), anyString())).thenAnswer(new Answer<ExecutorService>() {

        @Override
        public ExecutorService answer(InvocationOnMock invocation) throws Throwable {
            return sharedExecutor.createExecutorService(invocation.getArgumentAt(0, Integer.class), invocation.getArgumentAt(1, String.class));
        }
    });
    return inputContext;
}
Also used : ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) InputContext(org.apache.tez.runtime.api.InputContext) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) Token(org.apache.hadoop.security.token.Token) Text(org.apache.hadoop.io.Text) ByteBuffer(java.nio.ByteBuffer) TezCounters(org.apache.tez.common.counters.TezCounters) ExecutionContext(org.apache.tez.runtime.api.ExecutionContext) JobTokenSecretManager(org.apache.tez.common.security.JobTokenSecretManager) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ExecutorService(java.util.concurrent.ExecutorService) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 82 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestPipelinedSorter method setup.

@Before
public void setup() throws IOException {
    conf = getConf();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    this.outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
}
Also used : ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezCounters(org.apache.tez.common.counters.TezCounters) Before(org.junit.Before)

Example 83 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestDefaultSorter method createTezOutputContext.

private OutputContext createTezOutputContext() throws IOException {
    String[] workingDirs = { workingDir.toString() };
    UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
    DataOutputBuffer serviceProviderMetaData = new DataOutputBuffer();
    serviceProviderMetaData.writeInt(PORT);
    TezCounters counters = new TezCounters();
    OutputContext context = mock(OutputContext.class);
    ExecutionContext execContext = new ExecutionContextImpl("localhost");
    doReturn(mock(OutputStatisticsReporter.class)).when(context).getStatisticsReporter();
    doReturn(execContext).when(context).getExecutionContext();
    doReturn(counters).when(context).getCounters();
    doReturn(workingDirs).when(context).getWorkDirs();
    doReturn(payLoad).when(context).getUserPayload();
    doReturn(5 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask();
    doReturn(UniqueID).when(context).getUniqueIdentifier();
    doReturn("v1").when(context).getDestinationVertexName();
    doReturn(ByteBuffer.wrap(serviceProviderMetaData.getData())).when(context).getServiceProviderMetaData(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT));
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            long requestedSize = (Long) invocation.getArguments()[0];
            MemoryUpdateCallbackHandler callback = (MemoryUpdateCallbackHandler) invocation.getArguments()[1];
            callback.memoryAssigned(requestedSize);
            return null;
        }
    }).when(context).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class));
    return context;
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) ByteString(com.google.protobuf.ByteString) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) ExecutionContext(org.apache.tez.runtime.api.ExecutionContext) OutputStatisticsReporter(org.apache.tez.runtime.api.OutputStatisticsReporter) InvocationOnMock(org.mockito.invocation.InvocationOnMock) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) MemoryUpdateCallback(org.apache.tez.runtime.api.MemoryUpdateCallback)

Example 84 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestUnorderedPartitionedKVWriter method testBufferSizing.

@Test(timeout = 10000)
public void testBufferSizing() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(10000000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
    final int maxSingleBufferSizeBytes = 2047;
    final long sizePerBuffer = maxSingleBufferSizeBytes - 64 - maxSingleBufferSizeBytes % 4;
    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, false, maxSingleBufferSizeBytes);
    int numOutputs = 10;
    UnorderedPartitionedKVWriter kvWriter = null;
    // Not enough memory so divide into 2 buffers.
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 2048);
    assertEquals(2, kvWriter.numBuffers);
    assertEquals(1024, kvWriter.sizePerBuffer);
    assertEquals(1024, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // allocate exact
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 3);
    assertEquals(3, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // under allocate
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 2 + maxSingleBufferSizeBytes / 2);
    assertEquals(2, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // over allocate
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, maxSingleBufferSizeBytes * 2 + maxSingleBufferSizeBytes / 2 + 1);
    assertEquals(3, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(maxSingleBufferSizeBytes / 2 + 1, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // spill limit 1.
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 4 * maxSingleBufferSizeBytes + 1);
    assertEquals(4, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
    // spill limit 2.
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT, 50);
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 4 * maxSingleBufferSizeBytes + 1);
    assertEquals(4, kvWriter.numBuffers);
    assertEquals(sizePerBuffer, kvWriter.sizePerBuffer);
    assertEquals(sizePerBuffer, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(2, kvWriter.spillLimit);
    // Available memory is less than buffer size.
    conf.unset(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES);
    kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, 2048);
    assertEquals(2, kvWriter.numBuffers);
    assertEquals(1024, kvWriter.sizePerBuffer);
    assertEquals(1024, kvWriter.lastBufferSize);
    assertEquals(1, kvWriter.numInitializedBuffers);
    assertEquals(1, kvWriter.spillLimit);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) ByteString(com.google.protobuf.ByteString) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 85 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestUnorderedPartitionedKVWriter method baseTestWithPipelinedTransfer.

@SuppressWarnings("unchecked")
private void baseTestWithPipelinedTransfer(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    int dagId = 1;
    String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }
    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;
    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
    int sizePerBuffer = kvWriter.sizePerBuffer;
    // IntW + LongW
    int sizePerRecord = 4 + 8;
    // Record + META_OVERHEAD
    int sizePerRecordWithOverhead = sizePerRecord + 12;
    BitSet partitionsWithData = new BitSet(numPartitions);
    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        partitionsWithData.set(partition);
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
    ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
    List<Event> lastEvents = kvWriter.close();
    if (numPartitions == 1) {
        assertEquals(false, kvWriter.skipBuffers);
    }
    // no events are sent to kvWriter upon close with pipelining
    assertTrue(lastEvents.size() == 0);
    verify(outputContext, atLeast(numExpectedSpills)).sendEvents(eventCaptor.capture());
    int numOfCapturedEvents = eventCaptor.getAllValues().size();
    lastEvents = eventCaptor.getAllValues().get(numOfCapturedEvents - 1);
    VertexManagerEvent VMEvent = (VertexManagerEvent) lastEvents.get(0);
    for (int i = 0; i < numOfCapturedEvents; i++) {
        List<Event> events = eventCaptor.getAllValues().get(i);
        if (i < numOfCapturedEvents - 1) {
            assertTrue(events.size() == 1);
            assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
        } else {
            assertTrue(events.size() == 2);
            assertTrue(events.get(0) instanceof VertexManagerEvent);
            assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
        }
    }
    verifyPartitionStats(VMEvent, partitionsWithData);
    verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());
    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    // due to multiple threads, buffers could be merged in chunks in scheduleSpill.
    assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    // No additional spill bytes written when final merge is disabled.
    assertEquals(additionalSpillBytesWritten, 0);
    // No additional spills when final merge is disabled.
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    // No additional spills when final merge is disabled.
    assertEquals(numAdditionalSpillsCounter.getValue(), 0);
    assertTrue(lastEvents.size() > 0);
    // Get the last event
    int index = lastEvents.size() - 1;
    assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    // Ensure that this is the last event
    assertTrue(eventProto.getLastEvent());
    verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
    verify(outputContext, atLeast(1)).notifyProgress();
    // Verify if all spill files are available.
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
    if (numRecordsWritten > 0) {
        int numSpills = kvWriter.numSpills.get();
        for (int i = 0; i < numSpills; i++) {
            Path outputFile = taskOutput.getSpillFileForWrite(i, 10);
            Path indexFile = taskOutput.getSpillIndexFileForWrite(i, 10);
            assertTrue(localFs.exists(outputFile));
            assertTrue(localFs.exists(indexFile));
            assertEquals("Incorrect output permissions", (short) 0640, localFs.getFileStatus(outputFile).getPermission().toShort());
            assertEquals("Incorrect index permissions", (short) 0640, localFs.getFileStatus(indexFile).getPermission().toShort());
        }
    } else {
        return;
    }
}
Also used : TezTaskOutputFiles(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) ByteString(com.google.protobuf.ByteString) Configurable(org.apache.hadoop.conf.Configurable) TezCounter(org.apache.tez.common.counters.TezCounter) List(java.util.List) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TaskFailureType(org.apache.tez.runtime.api.TaskFailureType) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskOutput(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput)

Aggregations

TezCounters (org.apache.tez.common.counters.TezCounters)100 Test (org.junit.Test)33 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)22 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)20 InputContext (org.apache.tez.runtime.api.InputContext)20 TezCounter (org.apache.tez.common.counters.TezCounter)18 Configuration (org.apache.hadoop.conf.Configuration)17 InvocationOnMock (org.mockito.invocation.InvocationOnMock)14 OutputContext (org.apache.tez.runtime.api.OutputContext)13 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)12 IOException (java.io.IOException)10 Path (org.apache.hadoop.fs.Path)10 DAG (org.apache.tez.dag.api.DAG)10 HashMap (java.util.HashMap)9 CounterGroup (org.apache.tez.common.counters.CounterGroup)9 StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)8 ByteString (com.google.protobuf.ByteString)7 Map (java.util.Map)7 Set (java.util.Set)7 LimitExceededException (org.apache.tez.common.counters.LimitExceededException)7