Search in sources :

Example 1 with VertexManagerEventPayloadProto

use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto in project tez by apache.

the class TestUnorderedPartitionedKVWriter method textTest.

public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys, int numLargevalues, int numLargeKvPairs, boolean pipeliningEnabled, boolean isFinalMergeEnabled) throws IOException, InterruptedException {
    Partitioner partitioner = new HashPartitioner();
    ApplicationId appId = ApplicationId.newInstance(10000000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    int dagId = 1;
    String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
    Random random = new Random();
    Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1, HashPartitioner.class);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, pipeliningEnabled);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, isFinalMergeEnabled);
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }
    int numRecordsWritten = 0;
    Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
    for (int i = 0; i < numPartitions; i++) {
        expectedValues.put(i, LinkedListMultimap.<String, String>create());
    }
    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numPartitions, availableMemory);
    int sizePerBuffer = kvWriter.sizePerBuffer;
    BitSet partitionsWithData = new BitSet(numPartitions);
    Text keyText = new Text();
    Text valText = new Text();
    for (int i = 0; i < numRegularRecords; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }
    // Write Large key records
    for (int i = 0; i < numLargeKeys; i++) {
        String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }
    if (pipeliningEnabled) {
        verify(outputContext, times(numLargeKeys)).sendEvents(anyListOf(Event.class));
    }
    // Write Large val records
    for (int i = 0; i < numLargevalues; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }
    if (pipeliningEnabled) {
        verify(outputContext, times(numLargevalues + numLargeKeys)).sendEvents(anyListOf(Event.class));
    }
    // Write records where key + val are large (but both can fit in the buffer individually)
    for (int i = 0; i < numLargeKvPairs; i++) {
        String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }
    if (pipeliningEnabled) {
        verify(outputContext, times(numLargevalues + numLargeKeys + numLargeKvPairs)).sendEvents(anyListOf(Event.class));
    }
    List<Event> events = kvWriter.close();
    verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
    if (!pipeliningEnabled) {
        VertexManagerEvent vmEvent = null;
        for (Event event : events) {
            if (event instanceof VertexManagerEvent) {
                assertNull(vmEvent);
                vmEvent = (VertexManagerEvent) event;
            }
        }
        VertexManagerEventPayloadProto vmEventPayload = VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(vmEvent.getUserPayload().asReadOnlyBuffer()));
        assertEquals(numRecordsWritten, vmEventPayload.getNumRecord());
    }
    TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
    assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue());
    if (pipeliningEnabled || !isFinalMergeEnabled) {
        // verify spill data files and index file exist
        for (int i = 0; i < kvWriter.numSpills.get(); i++) {
            assertTrue(localFs.exists(kvWriter.outputFileHandler.getSpillFileForWrite(i, 0)));
            assertTrue(localFs.exists(kvWriter.outputFileHandler.getSpillIndexFileForWrite(i, 0)));
        }
        return;
    }
    // Validate the events
    assertEquals(2, events.size());
    assertTrue(events.get(0) instanceof VertexManagerEvent);
    VertexManagerEvent vme = (VertexManagerEvent) events.get(0);
    verifyPartitionStats(vme, partitionsWithData);
    assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numPartitions, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    BitSet emptyPartitionBits = null;
    if (partitionsWithData.cardinality() != numPartitions) {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality());
    } else {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    }
    assertEquals(HOST_STRING, eventProto.getHost());
    assertEquals(SHUFFLE_PORT, eventProto.getPort());
    assertEquals(uniqueId, eventProto.getPathComponent());
    // Verify the data
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;
    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
        assertEquals("Incorrect output permissions", (short) 0640, localFs.getFileStatus(outputFilePath).getPermission().toShort());
        assertEquals("Incorrect index permissions", (short) 0640, localFs.getFileStatus(spillFilePath).getPermission().toShort());
    } else {
        return;
    }
    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
        if (emptyPartitionBits.get(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
            assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
        }
        inStream.close();
    }
    for (int i = 0; i < numPartitions; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}
Also used : TezTaskOutputFiles(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) HashMap(java.util.HashMap) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) ByteString(com.google.protobuf.ByteString) Configurable(org.apache.hadoop.conf.Configurable) TezCounter(org.apache.tez.common.counters.TezCounter) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) Random(java.util.Random) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Partitioner(org.apache.tez.runtime.library.api.Partitioner) HashPartitioner(org.apache.tez.runtime.library.partitioner.HashPartitioner) Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) TezCounters(org.apache.tez.common.counters.TezCounters) OutputContext(org.apache.tez.runtime.api.OutputContext) LinkedListMultimap(com.google.common.collect.LinkedListMultimap) Multimap(com.google.common.collect.Multimap) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TaskFailureType(org.apache.tez.runtime.api.TaskFailureType) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) HashPartitioner(org.apache.tez.runtime.library.partitioner.HashPartitioner) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) VertexManagerEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto) TezTaskOutput(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput)

Example 2 with VertexManagerEventPayloadProto

use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto in project tez by apache.

the class FairCartesianProductVertexManager method onVertexManagerEventReceived.

@Override
public synchronized void onVertexManagerEventReceived(VertexManagerEvent vmEvent) throws IOException {
    /* vmEvent after reconfigure doesn't matter */
    if (vertexReconfigured) {
        return;
    }
    if (vmEvent.getUserPayload() != null) {
        String srcVertex = vmEvent.getProducerAttemptIdentifier().getTaskIdentifier().getVertexIdentifier().getName();
        SrcVertex srcV = srcVerticesByName.get(srcVertex);
        // vmEvent from non-cp vertex doesn't matter
        if (srcV == null) {
            return;
        }
        VertexManagerEventPayloadProto proto = VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(vmEvent.getUserPayload()));
        srcV.numRecord += proto.getNumRecord();
        srcV.taskWithVMEvent.add(vmEvent.getProducerAttemptIdentifier().getTaskIdentifier().getIdentifier());
    }
    tryScheduleTasks();
}
Also used : ByteString(com.google.protobuf.ByteString) VertexManagerEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto)

Example 3 with VertexManagerEventPayloadProto

use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto in project tez by apache.

the class ShuffleVertexManagerBase method handleVertexManagerEvent.

private void handleVertexManagerEvent(VertexManagerEvent vmEvent) {
    // currently events from multiple attempts of the same task can be ignored because
    // their output will be the same.
    TaskIdentifier producerTask = vmEvent.getProducerAttemptIdentifier().getTaskIdentifier();
    if (!taskWithVmEvents.add(producerTask)) {
        LOG.info("Ignoring vertex manager event from: {}", producerTask);
        return;
    }
    String vName = producerTask.getVertexIdentifier().getName();
    SourceVertexInfo srcInfo = srcVertexInfo.get(vName);
    Preconditions.checkState(srcInfo != null, "Unknown vmEvent from " + producerTask);
    numVertexManagerEventsReceived++;
    long sourceTaskOutputSize = 0;
    if (vmEvent.getUserPayload() != null) {
        // save output size
        VertexManagerEventPayloadProto proto;
        try {
            proto = VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(vmEvent.getUserPayload()));
        } catch (InvalidProtocolBufferException e) {
            throw new TezUncheckedException(e);
        }
        sourceTaskOutputSize = proto.getOutputSize();
        if (proto.hasPartitionStats()) {
            try {
                RoaringBitmap partitionStats = new RoaringBitmap();
                ByteString compressedPartitionStats = proto.getPartitionStats();
                byte[] rawData = TezCommonUtils.decompressByteStringToByteArray(compressedPartitionStats, inflater);
                NonSyncByteArrayInputStream bin = new NonSyncByteArrayInputStream(rawData);
                partitionStats.deserialize(new DataInputStream(bin));
                parsePartitionStats(srcInfo, partitionStats);
            } catch (IOException e) {
                throw new TezUncheckedException(e);
            }
        } else if (proto.hasDetailedPartitionStats()) {
            List<Integer> detailedPartitionStats = proto.getDetailedPartitionStats().getSizeInMbList();
            parseDetailedPartitionStats(srcInfo, detailedPartitionStats);
        }
        srcInfo.numVMEventsReceived++;
        srcInfo.outputSize += sourceTaskOutputSize;
        completedSourceTasksOutputSize += sourceTaskOutputSize;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("For attempt: {} received info of output size: {}" + " vertex numEventsReceived: {} vertex output size: {}" + " total numEventsReceived: {} total output size: {}", vmEvent.getProducerAttemptIdentifier(), sourceTaskOutputSize, srcInfo.numVMEventsReceived, srcInfo.outputSize, numVertexManagerEventsReceived, completedSourceTasksOutputSize);
    }
}
Also used : TaskIdentifier(org.apache.tez.runtime.api.TaskIdentifier) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) ByteString(com.google.protobuf.ByteString) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) NonSyncByteArrayInputStream(org.apache.tez.common.io.NonSyncByteArrayInputStream) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) RoaringBitmap(org.roaringbitmap.RoaringBitmap) List(java.util.List) VertexManagerEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto)

Example 4 with VertexManagerEventPayloadProto

use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto in project tez by apache.

the class TestUnorderedPartitionedKVWriter method getPartitionStats.

private int[] getPartitionStats(VertexManagerEvent vme) throws IOException {
    RoaringBitmap partitionStats = new RoaringBitmap();
    VertexManagerEventPayloadProto payload = VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(vme.getUserPayload()));
    if (!reportPartitionStats.isEnabled()) {
        assertFalse(payload.hasPartitionStats());
        assertFalse(payload.hasDetailedPartitionStats());
        return null;
    }
    if (reportPartitionStats.isPrecise()) {
        assertTrue(payload.hasDetailedPartitionStats());
        List<Integer> sizeInMBList = payload.getDetailedPartitionStats().getSizeInMbList();
        int[] stats = new int[sizeInMBList.size()];
        for (int i = 0; i < sizeInMBList.size(); i++) {
            stats[i] += sizeInMBList.get(i);
        }
        return stats;
    } else {
        assertTrue(payload.hasPartitionStats());
        ByteString compressedPartitionStats = payload.getPartitionStats();
        byte[] rawData = TezCommonUtils.decompressByteStringToByteArray(compressedPartitionStats);
        ByteArrayInputStream bin = new ByteArrayInputStream(rawData);
        partitionStats.deserialize(new DataInputStream(bin));
        int[] stats = new int[partitionStats.getCardinality()];
        Iterator<Integer> it = partitionStats.iterator();
        final DATA_RANGE_IN_MB[] RANGES = DATA_RANGE_IN_MB.values();
        final int RANGE_LEN = RANGES.length;
        while (it.hasNext()) {
            int pos = it.next();
            int index = ((pos) / RANGE_LEN);
            int rangeIndex = ((pos) % RANGE_LEN);
            if (RANGES[rangeIndex].getSizeInMB() > 0) {
                stats[index] += RANGES[rangeIndex].getSizeInMB();
            }
        }
        return stats;
    }
}
Also used : ByteString(com.google.protobuf.ByteString) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) DataInputStream(java.io.DataInputStream) DATA_RANGE_IN_MB(org.apache.tez.runtime.library.utils.DATA_RANGE_IN_MB) RoaringBitmap(org.roaringbitmap.RoaringBitmap) ByteArrayInputStream(java.io.ByteArrayInputStream) VertexManagerEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto)

Aggregations

ByteString (com.google.protobuf.ByteString)4 VertexManagerEventPayloadProto (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto)4 DataInputStream (java.io.DataInputStream)2 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 RoaringBitmap (org.roaringbitmap.RoaringBitmap)2 LinkedListMultimap (com.google.common.collect.LinkedListMultimap)1 Multimap (com.google.common.collect.Multimap)1 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1 BitSet (java.util.BitSet)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Random (java.util.Random)1 Configurable (org.apache.hadoop.conf.Configurable)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)1 Text (org.apache.hadoop.io.Text)1 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)1