use of org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.SpillInfo in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTest.
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress, int maxSingleBufferSizeBytes, int bufferMergePercent, int availableMemory) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, maxSingleBufferSizeBytes);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT, bufferMergePercent);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
int numRecordsWritten = 0;
Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
for (int i = 0; i < numOutputs; i++) {
expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
}
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
BitSet partitionsWithData = new BitSet(numPartitions);
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
expectedValues.get(partition).put(intWritable.get(), longWritable.get());
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
List<Event> events = kvWriter.close();
if (numPartitions == 1) {
assertEquals(true, kvWriter.skipBuffers);
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer / kvWriter.spillLimit;
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
if (numPartitions > 1) {
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
}
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if (!shouldCompress) {
assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
if (numExpectedSpills == 0) {
assertEquals(0, additionalSpillBytesWritten);
assertEquals(0, additionalSpillBytesRead);
} else {
assertTrue(additionalSpillBytesWritten > 0);
assertTrue(additionalSpillBytesRead > 0);
if (!shouldCompress) {
assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
}
}
assertEquals(additionalSpillBytesWritten, additionalSpillBytesRead);
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(numExpectedSpills >= numAdditionalSpillsCounter.getValue());
BitSet emptyPartitionBits = null;
// Verify the events returned
assertEquals(2, events.size());
assertTrue(events.get(0) instanceof VertexManagerEvent);
VertexManagerEvent vme = (VertexManagerEvent) events.get(0);
verifyPartitionStats(vme, partitionsWithData);
assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
if (skippedPartitions == null && numRecordsWritten > 0) {
assertFalse(eventProto.hasEmptyPartitions());
emptyPartitionBits = new BitSet(numPartitions);
} else {
assertTrue(eventProto.hasEmptyPartitions());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
if (numRecordsWritten == 0) {
assertEquals(numPartitions, emptyPartitionBits.cardinality());
} else {
for (Integer e : skippedPartitions) {
assertTrue(emptyPartitionBits.get(e));
}
assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
}
}
if (emptyPartitionBits.cardinality() != numPartitions) {
assertEquals(HOST_STRING, eventProto.getHost());
assertEquals(SHUFFLE_PORT, eventProto.getPort());
assertEquals(uniqueId, eventProto.getPathComponent());
} else {
assertFalse(eventProto.hasHost());
assertFalse(eventProto.hasPort());
assertFalse(eventProto.hasPathComponent());
}
// Verify the actual data
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
Path outputFilePath = kvWriter.finalOutPath;
Path spillFilePath = kvWriter.finalIndexPath;
if (numRecordsWritten <= 0) {
return;
}
assertTrue(localFs.exists(outputFilePath));
assertTrue(localFs.exists(spillFilePath));
// verify no intermediate spill files have been left around
synchronized (kvWriter.spillInfoList) {
for (SpillInfo spill : kvWriter.spillInfoList) {
assertFalse("lingering intermediate spill file " + spill.outPath, localFs.exists(spill.outPath));
}
}
// Special case for 0 records.
TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
DataInputBuffer keyBuffer = new DataInputBuffer();
DataInputBuffer valBuffer = new DataInputBuffer();
IntWritable keyDeser = new IntWritable();
LongWritable valDeser = new LongWritable();
for (int i = 0; i < numOutputs; i++) {
TezIndexRecord indexRecord = spillRecord.getIndex(i);
if (skippedPartitions != null && skippedPartitions.contains(i)) {
assertFalse("The Index Record for partition " + i + " should not have any data", indexRecord.hasData());
continue;
}
FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
inStream.seek(indexRecord.getStartOffset());
IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1);
while (reader.nextRawKey(keyBuffer)) {
reader.nextRawValue(valBuffer);
keyDeser.readFields(keyBuffer);
valDeser.readFields(valBuffer);
int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
}
inStream.close();
}
for (int i = 0; i < numOutputs; i++) {
assertEquals(0, expectedValues.get(i).size());
expectedValues.remove(i);
}
assertEquals(0, expectedValues.size());
verify(outputContext, atLeast(1)).notifyProgress();
}
Aggregations