use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class TestShuffleUtils method testGenerateOnSpillEvent_With_FinalMerge.
@Test
public void testGenerateOnSpillEvent_With_FinalMerge() throws Exception {
List<Event> events = Lists.newLinkedList();
Path indexFile = createIndexFile(10, false);
boolean finalMergeEnabled = true;
boolean isLastEvent = true;
int spillId = 0;
int physicalOutputs = 10;
String pathComponent = "/attempt_x_y_0/file.out";
String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
// normal code path where we do final merge all the time
ShuffleUtils.generateEventOnSpill(events, finalMergeEnabled, isLastEvent, outputContext, spillId, new TezSpillRecord(indexFile, conf), physicalOutputs, true, pathComponent, null, false, auxiliaryService, TezCommonUtils.newBestCompressionDeflater());
// one for VM
Assert.assertTrue(events.size() == 2);
Assert.assertTrue(events.get(0) instanceof VertexManagerEvent);
Assert.assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
Assert.assertTrue(cdme.getCount() == physicalOutputs);
Assert.assertTrue(cdme.getSourceIndexStart() == 0);
ShuffleUserPayloads.DataMovementEventPayloadProto dmeProto = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
// With final merge, spill details should not be present
Assert.assertFalse(dmeProto.hasSpillId());
Assert.assertFalse(dmeProto.hasLastEvent() || dmeProto.getLastEvent());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(dmeProto.getEmptyPartitions());
BitSet emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
Assert.assertTrue("emptyPartitionBitSet cardinality (expecting 5) = " + emptyPartitionsBitSet.cardinality(), emptyPartitionsBitSet.cardinality() == 5);
}
use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class TestShuffleUtils method createIndexFile.
private Path createIndexFile(int numPartitions, boolean allEmptyPartitions) throws IOException {
Path path = new Path(workingDir, "file.index.out");
TezSpillRecord spillRecord = new TezSpillRecord(numPartitions);
long startOffset = 0;
// compressed
long partLen = 200;
for (int i = 0; i < numPartitions; i++) {
long rawLen = ThreadLocalRandom.current().nextLong(100, 200);
if (i % 2 == 0 || allEmptyPartitions) {
// indicates empty partition, see TEZ-3605
rawLen = 0;
}
TezIndexRecord indexRecord = new TezIndexRecord(startOffset, rawLen, partLen);
startOffset += partLen;
spillRecord.putIndex(indexRecord, i);
}
spillRecord.writeToFile(path, conf);
return path;
}
use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class TestDefaultSorter method testEmptyPartitionsHelper.
public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDetails) throws IOException {
OutputContext context = createTezOutputContext();
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, sendEmptyPartitionDetails);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler);
int partitions = 50;
DefaultSorter sorter = new DefaultSorter(context, conf, partitions, handler.getMemoryAssigned());
writeData(sorter, numKeys, 1000000);
if (numKeys == 0) {
assertTrue(sorter.getNumSpills() == 1);
} else {
assertTrue(sorter.getNumSpills() == numKeys);
}
verifyCounters(sorter, context);
verifyOutputPermissions(context.getUniqueIdentifier());
if (sorter.indexCacheList.size() != 0) {
for (int i = 0; i < sorter.getNumSpills(); i++) {
TezSpillRecord record = sorter.indexCacheList.get(i);
for (int j = 0; j < partitions; j++) {
TezIndexRecord tezIndexRecord = record.getIndex(j);
if (tezIndexRecord.hasData()) {
continue;
}
if (sendEmptyPartitionDetails) {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
} else {
Assert.assertEquals("", tezIndexRecord.getRawLength(), 6);
}
}
}
}
Path indexFile = sorter.getFinalIndexFile();
TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
for (int i = 0; i < partitions; i++) {
TezIndexRecord tezIndexRecord = spillRecord.getIndex(i);
if (tezIndexRecord.hasData()) {
continue;
}
if (sendEmptyPartitionDetails) {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 0, tezIndexRecord.getRawLength());
} else {
Assert.assertEquals("Unexpected raw length for " + i + "th partition", 6, tezIndexRecord.getRawLength());
}
}
}
use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class OrderedPartitionedKVOutput method generateEvents.
private List<Event> generateEvents() throws IOException {
List<Event> eventList = Lists.newLinkedList();
if (finalMergeEnabled && !pipelinedShuffle) {
boolean isLastEvent = true;
String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
ShuffleUtils.generateEventOnSpill(eventList, finalMergeEnabled, isLastEvent, getContext(), 0, new TezSpillRecord(sorter.getFinalIndexFile(), conf), getNumPhysicalOutputs(), sendEmptyPartitionDetails, getContext().getUniqueIdentifier(), sorter.getPartitionStats(), sorter.reportDetailedPartitionStats(), auxiliaryService, deflater);
}
return eventList;
}
use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class IndexCache method readIndexFileToCache.
private IndexInformation readIndexFileToCache(Path indexFileName, String mapId, String expectedIndexOwner) throws IOException {
IndexInformation info;
IndexInformation newInd = new IndexInformation();
if ((info = cache.putIfAbsent(mapId, newInd)) != null) {
synchronized (info) {
while (isUnderConstruction(info)) {
try {
info.wait();
} catch (InterruptedException e) {
throw new IOException("Interrupted waiting for construction", e);
}
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("IndexCache HIT: MapId " + mapId + " found");
}
return info;
}
if (LOG.isDebugEnabled()) {
LOG.debug("IndexCache MISS: MapId " + mapId + " not found");
}
TezSpillRecord tmp = null;
try {
tmp = new TezSpillRecord(indexFileName, conf, expectedIndexOwner);
} catch (Throwable e) {
tmp = new TezSpillRecord(0);
cache.remove(mapId);
throw new IOException("Error Reading IndexFile", e);
} finally {
synchronized (newInd) {
newInd.mapSpillRecord = tmp;
newInd.notifyAll();
}
}
queue.add(mapId);
if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
freeIndexInformation();
}
return newInd;
}
Aggregations