Search in sources :

Example 16 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class Utils method parseTezCountersFromJSON.

/**
 * Parse tez counters from json
 *
 * @param jsonObject
 * @return TezCounters
 * @throws JSONException
 */
public static TezCounters parseTezCountersFromJSON(JSONObject jsonObject) throws JSONException {
    TezCounters counters = new TezCounters();
    if (jsonObject == null) {
        // empty counters.
        return counters;
    }
    final JSONArray counterGroupNodes = jsonObject.optJSONArray(Constants.COUNTER_GROUPS);
    if (counterGroupNodes != null) {
        for (int i = 0; i < counterGroupNodes.length(); i++) {
            JSONObject counterGroupNode = counterGroupNodes.optJSONObject(i);
            final String groupName = counterGroupNode.optString(Constants.COUNTER_GROUP_NAME);
            final String groupDisplayName = counterGroupNode.optString(Constants.COUNTER_GROUP_DISPLAY_NAME, groupName);
            CounterGroup group = counters.addGroup(groupName, groupDisplayName);
            final JSONArray counterNodes = counterGroupNode.optJSONArray(Constants.COUNTERS);
            // Parse counter nodes
            for (int j = 0; j < counterNodes.length(); j++) {
                JSONObject counterNode = counterNodes.optJSONObject(j);
                final String counterName = counterNode.getString(Constants.COUNTER_NAME);
                final String counterDisplayName = counterNode.optString(Constants.COUNTER_DISPLAY_NAME, counterName);
                final long counterValue = counterNode.getLong(Constants.COUNTER_VALUE);
                addCounter(group, counterName, counterDisplayName, counterValue);
            }
        }
    }
    return counters;
}
Also used : JSONObject(org.codehaus.jettison.json.JSONObject) CounterGroup(org.apache.tez.common.counters.CounterGroup) JSONArray(org.codehaus.jettison.json.JSONArray) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 17 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestHistoryEventProtoConverter method testConvertTaskFinishedEvent.

@Test(timeout = 5000)
public void testConvertTaskFinishedEvent() {
    String vertexName = "testVertexName";
    long startTime = random.nextLong();
    long finishTime = random.nextLong();
    TaskState state = TaskState.values()[random.nextInt(TaskState.values().length)];
    String diagnostics = "diagnostics message";
    TezCounters counters = new TezCounters();
    TaskFinishedEvent event = new TaskFinishedEvent(tezTaskID, vertexName, startTime, finishTime, tezTaskAttemptID, state, diagnostics, counters, 3);
    HistoryEventProto proto = converter.convert(event);
    assertCommon(proto, HistoryEventType.TASK_FINISHED, finishTime, EntityTypes.TEZ_TASK_ID, null, null, 6);
    assertEventData(proto, ATSConstants.STATUS, state.name());
    assertEventData(proto, ATSConstants.TIME_TAKEN, String.valueOf(finishTime - startTime));
    assertEventData(proto, ATSConstants.SUCCESSFUL_ATTEMPT_ID, tezTaskAttemptID.toString());
    assertEventData(proto, ATSConstants.NUM_FAILED_TASKS_ATTEMPTS, "3");
    assertEventData(proto, ATSConstants.DIAGNOSTICS, diagnostics);
    assertEventData(proto, ATSConstants.COUNTERS, null);
}
Also used : TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) HistoryEventProto(org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.HistoryEventProto) TaskState(org.apache.tez.dag.api.oldrecords.TaskState) TezCounters(org.apache.tez.common.counters.TezCounters) Test(org.junit.Test)

Example 18 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestUnorderedPartitionedKVWriter method textTest.

public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys, int numLargevalues, int numLargeKvPairs, boolean pipeliningEnabled, boolean isFinalMergeEnabled) throws IOException, InterruptedException {
    Partitioner partitioner = new HashPartitioner();
    ApplicationId appId = ApplicationId.newInstance(10000000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    int dagId = 1;
    String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
    Random random = new Random();
    Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1, HashPartitioner.class);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, pipeliningEnabled);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, isFinalMergeEnabled);
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }
    int numRecordsWritten = 0;
    Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
    for (int i = 0; i < numPartitions; i++) {
        expectedValues.put(i, LinkedListMultimap.<String, String>create());
    }
    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numPartitions, availableMemory);
    int sizePerBuffer = kvWriter.sizePerBuffer;
    BitSet partitionsWithData = new BitSet(numPartitions);
    Text keyText = new Text();
    Text valText = new Text();
    for (int i = 0; i < numRegularRecords; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }
    // Write Large key records
    for (int i = 0; i < numLargeKeys; i++) {
        String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }
    if (pipeliningEnabled) {
        verify(outputContext, times(numLargeKeys)).sendEvents(anyListOf(Event.class));
    }
    // Write Large val records
    for (int i = 0; i < numLargevalues; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }
    if (pipeliningEnabled) {
        verify(outputContext, times(numLargevalues + numLargeKeys)).sendEvents(anyListOf(Event.class));
    }
    // Write records where key + val are large (but both can fit in the buffer individually)
    for (int i = 0; i < numLargeKvPairs; i++) {
        String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }
    if (pipeliningEnabled) {
        verify(outputContext, times(numLargevalues + numLargeKeys + numLargeKvPairs)).sendEvents(anyListOf(Event.class));
    }
    List<Event> events = kvWriter.close();
    verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
    if (!pipeliningEnabled) {
        VertexManagerEvent vmEvent = null;
        for (Event event : events) {
            if (event instanceof VertexManagerEvent) {
                assertNull(vmEvent);
                vmEvent = (VertexManagerEvent) event;
            }
        }
        VertexManagerEventPayloadProto vmEventPayload = VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(vmEvent.getUserPayload().asReadOnlyBuffer()));
        assertEquals(numRecordsWritten, vmEventPayload.getNumRecord());
    }
    TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
    assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue());
    if (pipeliningEnabled || !isFinalMergeEnabled) {
        // verify spill data files and index file exist
        for (int i = 0; i < kvWriter.numSpills.get(); i++) {
            assertTrue(localFs.exists(kvWriter.outputFileHandler.getSpillFileForWrite(i, 0)));
            assertTrue(localFs.exists(kvWriter.outputFileHandler.getSpillIndexFileForWrite(i, 0)));
        }
        return;
    }
    // Validate the events
    assertEquals(2, events.size());
    assertTrue(events.get(0) instanceof VertexManagerEvent);
    VertexManagerEvent vme = (VertexManagerEvent) events.get(0);
    verifyPartitionStats(vme, partitionsWithData);
    assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numPartitions, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    BitSet emptyPartitionBits = null;
    if (partitionsWithData.cardinality() != numPartitions) {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality());
    } else {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    }
    assertEquals(HOST_STRING, eventProto.getHost());
    assertEquals(SHUFFLE_PORT, eventProto.getPort());
    assertEquals(uniqueId, eventProto.getPathComponent());
    // Verify the data
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;
    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
        checkPermissions(outputFilePath, spillFilePath);
    } else {
        return;
    }
    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
        if (emptyPartitionBits.get(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
            assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
        }
        inStream.close();
    }
    for (int i = 0; i < numPartitions; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}
Also used : TezTaskOutputFiles(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) HashMap(java.util.HashMap) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) ByteString(com.google.protobuf.ByteString) Configurable(org.apache.hadoop.conf.Configurable) TezCounter(org.apache.tez.common.counters.TezCounter) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) Random(java.util.Random) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Partitioner(org.apache.tez.runtime.library.api.Partitioner) HashPartitioner(org.apache.tez.runtime.library.partitioner.HashPartitioner) Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) LinkedListMultimap(com.google.common.collect.LinkedListMultimap) Multimap(com.google.common.collect.Multimap) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TaskFailureType(org.apache.tez.runtime.api.TaskFailureType) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) HashPartitioner(org.apache.tez.runtime.library.partitioner.HashPartitioner) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) VertexManagerEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto) TezTaskOutput(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput)

Example 19 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestUnorderedPartitionedKVWriter method baseTest.

private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress, int maxSingleBufferSizeBytes, int bufferMergePercent, int availableMemory, boolean dataViaEventEnabled) throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    int dagId = 1;
    String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, maxSingleBufferSizeBytes);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT, bufferMergePercent);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED, dataViaEventEnabled);
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }
    int numOutputs = numPartitions;
    int numRecordsWritten = 0;
    Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
    for (int i = 0; i < numOutputs; i++) {
        expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
    }
    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
    int sizePerBuffer = kvWriter.sizePerBuffer;
    // IntW + LongW
    int sizePerRecord = 4 + 8;
    // Record + META_OVERHEAD
    int sizePerRecordWithOverhead = sizePerRecord + 12;
    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    BitSet partitionsWithData = new BitSet(numPartitions);
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(intWritable.get(), longWritable.get());
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    List<Event> events = kvWriter.close();
    if (numPartitions == 1) {
        assertEquals(true, kvWriter.skipBuffers);
        // VM & DME events
        assertEquals(2, events.size());
        Event event1 = events.get(1);
        assertTrue(event1 instanceof CompositeDataMovementEvent);
        CompositeDataMovementEvent dme = (CompositeDataMovementEvent) event1;
        ByteBuffer bb = dme.getUserPayload();
        ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb));
        assertEquals(kvWriter.outputRecordsCounter.getValue(), shufflePayload.getNumRecord());
    }
    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer / kvWriter.spillLimit;
    verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());
    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    if (numPartitions > 1) {
        assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    }
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if ((!shouldCompress) && (!dataViaEventEnabled)) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    if (!dataViaEventEnabled) {
        assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
    }
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    if (numExpectedSpills == 0) {
        assertEquals(0, additionalSpillBytesWritten);
        assertEquals(0, additionalSpillBytesRead);
    } else {
        assertTrue(additionalSpillBytesWritten > 0);
        if (!dataViaEventEnabled) {
            assertTrue(additionalSpillBytesRead > 0);
            if (!shouldCompress) {
                assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
                assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
            }
        } else {
            if (kvWriter.writer.getCompressedLength() > TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE_DEFAULT) {
                assertTrue(additionalSpillBytesWritten > 0);
            }
        }
    }
    if (!dataViaEventEnabled) {
        assertEquals(additionalSpillBytesWritten, additionalSpillBytesRead);
    }
    // due to multiple threads, buffers could be merged in chunks in scheduleSpill.
    assertTrue(numExpectedSpills >= numAdditionalSpillsCounter.getValue());
    BitSet emptyPartitionBits = null;
    // Verify the events returned
    assertEquals(2, events.size());
    assertTrue(events.get(0) instanceof VertexManagerEvent);
    VertexManagerEvent vme = (VertexManagerEvent) events.get(0);
    verifyPartitionStats(vme, partitionsWithData);
    assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    if (skippedPartitions == null && numRecordsWritten > 0) {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    } else {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        if (numRecordsWritten == 0) {
            assertEquals(numPartitions, emptyPartitionBits.cardinality());
        } else {
            for (Integer e : skippedPartitions) {
                assertTrue(emptyPartitionBits.get(e));
            }
            assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
        }
    }
    if (emptyPartitionBits.cardinality() != numPartitions) {
        assertEquals(HOST_STRING, eventProto.getHost());
        assertEquals(SHUFFLE_PORT, eventProto.getPort());
        assertEquals(uniqueId, eventProto.getPathComponent());
    } else {
        assertFalse(eventProto.hasHost());
        assertFalse(eventProto.hasPort());
        assertFalse(eventProto.hasPathComponent());
    }
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;
    if (numRecordsWritten <= 0) {
        return;
    }
    boolean isInMem = eventProto.getData().hasData();
    assertTrue(localFs.exists(outputFilePath));
    assertEquals("Incorrect output permissions (user)", FsAction.READ_WRITE, localFs.getFileStatus(outputFilePath).getPermission().getUserAction());
    assertEquals("Incorrect output permissions (group)", FsAction.READ, localFs.getFileStatus(outputFilePath).getPermission().getGroupAction());
    if (!isInMem) {
        assertTrue(localFs.exists(spillFilePath));
        assertEquals("Incorrect index permissions (user)", FsAction.READ_WRITE, localFs.getFileStatus(spillFilePath).getPermission().getUserAction());
        assertEquals("Incorrect index permissions (group)", FsAction.READ, localFs.getFileStatus(spillFilePath).getPermission().getGroupAction());
        // verify no intermediate spill files have been left around
        synchronized (kvWriter.spillInfoList) {
            for (SpillInfo spill : kvWriter.spillInfoList) {
                assertFalse("lingering intermediate spill file " + spill.outPath, localFs.exists(spill.outPath));
            }
        }
    }
    // Special case for 0 records.
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
        IFile.Reader reader = null;
        InputStream inStream;
        if (isInMem) {
            // Read from in memory payload
            int dataLoadSize = eventProto.getData().getData().size();
            inStream = new ByteArrayInputStream(eventProto.getData().getData().toByteArray());
            reader = new IFile.Reader(inStream, dataLoadSize, codec, null, null, false, 0, -1);
        } else {
            TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
            TezIndexRecord indexRecord = spillRecord.getIndex(i);
            if (skippedPartitions != null && skippedPartitions.contains(i)) {
                assertFalse("The Index Record for partition " + i + " should not have any data", indexRecord.hasData());
                continue;
            }
            FSDataInputStream tmpStream = FileSystem.getLocal(conf).open(outputFilePath);
            tmpStream.seek(indexRecord.getStartOffset());
            inStream = tmpStream;
            reader = new IFile.Reader(tmpStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1);
        }
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
            assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
        }
        inStream.close();
    }
    for (int i = 0; i < numOutputs; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
    verify(outputContext, atLeast(1)).notifyProgress();
}
Also used : TezTaskOutputFiles(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) HashMap(java.util.HashMap) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) ByteString(com.google.protobuf.ByteString) Configurable(org.apache.hadoop.conf.Configurable) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) TezCounter(org.apache.tez.common.counters.TezCounter) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) SpillInfo(org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.SpillInfo) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Path(org.apache.hadoop.fs.Path) ByteArrayInputStream(java.io.ByteArrayInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) BitSet(java.util.BitSet) ByteBuffer(java.nio.ByteBuffer) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) LinkedListMultimap(com.google.common.collect.LinkedListMultimap) Multimap(com.google.common.collect.Multimap) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TaskFailureType(org.apache.tez.runtime.api.TaskFailureType) ByteArrayInputStream(java.io.ByteArrayInputStream) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskOutput(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput)

Example 20 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project tez by apache.

the class TestUnorderedPartitionedKVWriter method baseTestWithFinalMergeDisabled.

@SuppressWarnings("unchecked")
private void baseTestWithFinalMergeDisabled(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    int dagId = 1;
    String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, false);
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }
    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;
    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
    int sizePerBuffer = kvWriter.sizePerBuffer;
    // IntW + LongW
    int sizePerRecord = 4 + 8;
    // Record + META_OVERHEAD
    int sizePerRecordWithOverhead = sizePerRecord + 12;
    BitSet partitionsWithData = new BitSet(numPartitions);
    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        partitionsWithData.set(partition);
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
    ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
    List<Event> lastEvents = kvWriter.close();
    if (numPartitions == 1) {
        assertEquals(true, kvWriter.skipBuffers);
    }
    // max events sent are spills + one VM event. If there are no spills, atleast empty
    // partitions would be sent out finally.
    int spills = Math.max(1, kvWriter.numSpills.get());
    // spills + VMEvent
    assertEquals((spills + 1), lastEvents.size());
    verify(outputContext, atMost(0)).sendEvents(eventCaptor.capture());
    for (int i = 0; i < lastEvents.size(); i++) {
        Event event = lastEvents.get(i);
        if (event instanceof VertexManagerEvent) {
            // to stats.
            if (numRecordsWritten > 0) {
                verifyPartitionStats(((VertexManagerEvent) event), partitionsWithData);
            }
        }
    }
    verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());
    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    if (outputRecordsCounter.getValue() > 0) {
        assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    } else {
        assertEquals(0, outputBytesWithOverheadCounter.getValue());
    }
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue("fileOutputBytes=" + fileOutputBytes + ", outputRecordBytes=" + outputRecordBytesCounter.getValue(), fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    // due to multiple threads, buffers could be merged in chunks in scheduleSpill.
    assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    // No additional spill bytes written when final merge is disabled.
    assertEquals(additionalSpillBytesWritten, 0);
    // No additional spills when final merge is disabled.
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    // No additional spills when final merge is disabled.
    assertEquals(numAdditionalSpillsCounter.getValue(), 0);
    assertTrue(lastEvents.size() > 0);
    // Get the last event
    int index = lastEvents.size() - 1;
    assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
    if (outputRecordsCounter.getValue() > 0) {
        // Ensure that this is the last event
        assertTrue(eventProto.getLastEvent());
    }
    // Verify if all path components have spillIds when final merge is disabled
    Pattern mergePathComponentPattern = Pattern.compile("(.*)(_\\d+)");
    for (Event event : lastEvents) {
        if (!(event instanceof CompositeDataMovementEvent)) {
            continue;
        }
        cdme = (CompositeDataMovementEvent) event;
        eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
        assertEquals(false, eventProto.getPipelined());
        if (eventProto.hasPathComponent()) {
            // for final merge disabled cases, it should have _spillId
            Matcher matcher = mergePathComponentPattern.matcher(eventProto.getPathComponent());
            assertTrue("spill id should be present in path component " + eventProto.getPathComponent(), matcher.matches());
            assertEquals(2, matcher.groupCount());
            assertEquals(uniqueId, matcher.group(1));
            assertTrue("spill id should be present in path component", matcher.group(2) != null);
            Path outputPath = new Path(outputContext.getWorkDirs()[0], "output/" + eventProto.getPathComponent() + "/" + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING);
            Path indexPath = outputPath.suffix(Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
            checkPermissions(outputPath, indexPath);
        } else {
            assertEquals(0, eventProto.getSpillId());
            if (outputRecordsCounter.getValue() > 0) {
                assertEquals(true, eventProto.getLastEvent());
            } else {
                byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
                BitSet emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
                assertEquals(numPartitions, emptyPartitionBits.cardinality());
            }
        }
    }
    verify(outputContext, atLeast(1)).notifyProgress();
    // Verify if all spill files are available.
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
    if (numRecordsWritten > 0) {
        int numSpills = kvWriter.numSpills.get();
        for (int i = 0; i < numSpills; i++) {
            assertTrue(localFs.exists(taskOutput.getSpillFileForWrite(i, 10)));
            assertTrue(localFs.exists(taskOutput.getSpillIndexFileForWrite(i, 10)));
        }
    } else {
        return;
    }
}
Also used : TezTaskOutputFiles(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) Matcher(java.util.regex.Matcher) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) ByteString(com.google.protobuf.ByteString) Configurable(org.apache.hadoop.conf.Configurable) TezCounter(org.apache.tez.common.counters.TezCounter) List(java.util.List) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Path(org.apache.hadoop.fs.Path) Pattern(java.util.regex.Pattern) BitSet(java.util.BitSet) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) TaskFailureType(org.apache.tez.runtime.api.TaskFailureType) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskOutput(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput)

Aggregations

TezCounters (org.apache.tez.common.counters.TezCounters)102 Test (org.junit.Test)34 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)24 TezCounter (org.apache.tez.common.counters.TezCounter)18 InputContext (org.apache.tez.runtime.api.InputContext)18 Configuration (org.apache.hadoop.conf.Configuration)17 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)17 OutputContext (org.apache.tez.runtime.api.OutputContext)15 DAG (org.apache.tez.dag.api.DAG)11 InvocationOnMock (org.mockito.invocation.InvocationOnMock)10 HashMap (java.util.HashMap)9 CounterGroup (org.apache.tez.common.counters.CounterGroup)9 StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)8 ByteString (com.google.protobuf.ByteString)7 IOException (java.io.IOException)7 Map (java.util.Map)7 Set (java.util.Set)7 Path (org.apache.hadoop.fs.Path)7 AggregateTezCounters (org.apache.tez.common.counters.AggregateTezCounters)7 LimitExceededException (org.apache.tez.common.counters.LimitExceededException)7