Search in sources :

Example 16 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class TestShuffleUtils method testGenerateOnSpillEvent_With_FinalMerge.

@Test
public void testGenerateOnSpillEvent_With_FinalMerge() throws Exception {
    List<Event> events = Lists.newLinkedList();
    Path indexFile = createIndexFile(10, false);
    boolean finalMergeEnabled = true;
    boolean isLastEvent = true;
    int spillId = 0;
    int physicalOutputs = 10;
    String pathComponent = "/attempt_x_y_0/file.out";
    String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    // normal code path where we do final merge all the time
    ShuffleUtils.generateEventOnSpill(events, finalMergeEnabled, isLastEvent, outputContext, spillId, new TezSpillRecord(indexFile, conf), physicalOutputs, true, pathComponent, null, false, auxiliaryService, TezCommonUtils.newBestCompressionDeflater());
    // one for VM
    Assert.assertTrue(events.size() == 2);
    Assert.assertTrue(events.get(0) instanceof VertexManagerEvent);
    Assert.assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
    Assert.assertTrue(cdme.getCount() == physicalOutputs);
    Assert.assertTrue(cdme.getSourceIndexStart() == 0);
    ShuffleUserPayloads.DataMovementEventPayloadProto dmeProto = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    // With final merge, spill details should not be present
    Assert.assertFalse(dmeProto.hasSpillId());
    Assert.assertFalse(dmeProto.hasLastEvent() || dmeProto.getLastEvent());
    byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(dmeProto.getEmptyPartitions());
    BitSet emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
    Assert.assertTrue("emptyPartitionBitSet cardinality (expecting 5) = " + emptyPartitionsBitSet.cardinality(), emptyPartitionsBitSet.cardinality() == 5);
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) ByteString(com.google.protobuf.ByteString) Mockito.anyString(org.mockito.Mockito.anyString) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Example 17 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class TestShuffleUtils method createIndexFile.

private Path createIndexFile(int numPartitions, boolean allEmptyPartitions) throws IOException {
    Path path = new Path(workingDir, "file.index.out");
    TezSpillRecord spillRecord = new TezSpillRecord(numPartitions);
    long startOffset = 0;
    // compressed
    long partLen = 200;
    for (int i = 0; i < numPartitions; i++) {
        long rawLen = ThreadLocalRandom.current().nextLong(100, 200);
        if (i % 2 == 0 || allEmptyPartitions) {
            // indicates empty partition, see TEZ-3605
            rawLen = 0;
        }
        TezIndexRecord indexRecord = new TezIndexRecord(startOffset, rawLen, partLen);
        startOffset += partLen;
        spillRecord.putIndex(indexRecord, i);
    }
    spillRecord.writeToFile(path, conf);
    return path;
}
Also used : Path(org.apache.hadoop.fs.Path) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)

Example 18 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class TestDefaultSorter method testEmptyPartitionsHelper.

public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDetails) throws IOException {
    OutputContext context = createTezOutputContext();
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, sendEmptyPartitionDetails);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
    conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
    MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
    context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
    int partitions = 50;
    DefaultSorter sorter = new DefaultSorter(context, conf, partitions, handler.getMemoryAssigned());
    writeData(sorter, numKeys, 1000000);
    if (numKeys == 0) {
        assertTrue(sorter.getNumSpills() == 1);
    } else {
        assertTrue(sorter.getNumSpills() == numKeys);
    }
    verifyCounters(sorter, context);
    verifyOutputPermissions(context.getUniqueIdentifier());
    if (sorter.indexCacheList.size() != 0) {
        for (int i = 0; i < sorter.getNumSpills(); i++) {
            TezSpillRecord record = sorter.indexCacheList.get(i);
            for (int j = 0; j < partitions; j++) {
                TezIndexRecord tezIndexRecord = record.getIndex(j);
                if (tezIndexRecord.hasData()) {
                    continue;
                }
                if (sendEmptyPartitionDetails) {
                    Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
                } else {
                    Assert.assertEquals("", tezIndexRecord.getRawLength(), 6);
                }
            }
        }
    }
    Path indexFile = sorter.getFinalIndexFile();
    TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
    for (int i = 0; i < partitions; i++) {
        TezIndexRecord tezIndexRecord = spillRecord.getIndex(i);
        if (tezIndexRecord.hasData()) {
            continue;
        }
        if (sendEmptyPartitionDetails) {
            Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
        } else {
            Assert.assertEquals("Unexpected raw length for " + i + "th partition", 6, tezIndexRecord.getRawLength());
        }
    }
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) Path(org.apache.hadoop.fs.Path) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) OutputContext(org.apache.tez.runtime.api.OutputContext)

Example 19 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class OrderedPartitionedKVOutput method generateEvents.

private List<Event> generateEvents() throws IOException {
    List<Event> eventList = Lists.newLinkedList();
    if (finalMergeEnabled && !pipelinedShuffle) {
        boolean isLastEvent = true;
        String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
        ShuffleUtils.generateEventOnSpill(eventList, finalMergeEnabled, isLastEvent, getContext(), 0, new TezSpillRecord(sorter.getFinalIndexFile(), conf), getNumPhysicalOutputs(), sendEmptyPartitionDetails, getContext().getUniqueIdentifier(), sorter.getPartitionStats(), sorter.reportDetailedPartitionStats(), auxiliaryService, deflater);
    }
    return eventList;
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) Event(org.apache.tez.runtime.api.Event)

Example 20 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class IndexCache method readIndexFileToCache.

private IndexInformation readIndexFileToCache(Path indexFileName, String mapId, String expectedIndexOwner) throws IOException {
    IndexInformation info;
    IndexInformation newInd = new IndexInformation();
    if ((info = cache.putIfAbsent(mapId, newInd)) != null) {
        synchronized (info) {
            while (isUnderConstruction(info)) {
                try {
                    info.wait();
                } catch (InterruptedException e) {
                    throw new IOException("Interrupted waiting for construction", e);
                }
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("IndexCache HIT: MapId " + mapId + " found");
        }
        return info;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("IndexCache MISS: MapId " + mapId + " not found");
    }
    TezSpillRecord tmp = null;
    try {
        tmp = new TezSpillRecord(indexFileName, conf, expectedIndexOwner);
    } catch (Throwable e) {
        tmp = new TezSpillRecord(0);
        cache.remove(mapId);
        throw new IOException("Error Reading IndexFile", e);
    } finally {
        synchronized (newInd) {
            newInd.mapSpillRecord = tmp;
            newInd.notifyAll();
        }
    }
    queue.add(mapId);
    if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
        freeIndexInformation();
    }
    return newInd;
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) IOException(java.io.IOException)

Aggregations

TezSpillRecord (org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord)20 Path (org.apache.hadoop.fs.Path)14 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)11 Event (org.apache.tez.runtime.api.Event)9 BitSet (java.util.BitSet)7 ByteString (com.google.protobuf.ByteString)6 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)6 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)6 IOException (java.io.IOException)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 VertexManagerEvent (org.apache.tez.runtime.api.events.VertexManagerEvent)5 Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)5 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)3 OutputContext (org.apache.tez.runtime.api.OutputContext)3 ShuffleUserPayloads (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads)3 Test (org.junit.Test)3 Mockito.anyString (org.mockito.Mockito.anyString)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 LinkedListMultimap (com.google.common.collect.LinkedListMultimap)2