use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class Utils method parseTezCountersFromJSON.
/**
* Parse tez counters from json
*
* @param jsonObject
* @return TezCounters
* @throws JSONException
*/
public static TezCounters parseTezCountersFromJSON(JSONObject jsonObject) throws JSONException {
TezCounters counters = new TezCounters();
if (jsonObject == null) {
// empty counters.
return counters;
}
final JSONArray counterGroupNodes = jsonObject.optJSONArray(Constants.COUNTER_GROUPS);
if (counterGroupNodes != null) {
for (int i = 0; i < counterGroupNodes.length(); i++) {
JSONObject counterGroupNode = counterGroupNodes.optJSONObject(i);
final String groupName = counterGroupNode.optString(Constants.COUNTER_GROUP_NAME);
final String groupDisplayName = counterGroupNode.optString(Constants.COUNTER_GROUP_DISPLAY_NAME, groupName);
CounterGroup group = counters.addGroup(groupName, groupDisplayName);
final JSONArray counterNodes = counterGroupNode.optJSONArray(Constants.COUNTERS);
// Parse counter nodes
for (int j = 0; j < counterNodes.length(); j++) {
JSONObject counterNode = counterNodes.optJSONObject(j);
final String counterName = counterNode.getString(Constants.COUNTER_NAME);
final String counterDisplayName = counterNode.optString(Constants.COUNTER_DISPLAY_NAME, counterName);
final long counterValue = counterNode.getLong(Constants.COUNTER_VALUE);
addCounter(group, counterName, counterDisplayName, counterValue);
}
}
}
return counters;
}
use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestHistoryEventProtoConverter method testConvertTaskFinishedEvent.
@Test(timeout = 5000)
public void testConvertTaskFinishedEvent() {
String vertexName = "testVertexName";
long startTime = random.nextLong();
long finishTime = random.nextLong();
TaskState state = TaskState.values()[random.nextInt(TaskState.values().length)];
String diagnostics = "diagnostics message";
TezCounters counters = new TezCounters();
TaskFinishedEvent event = new TaskFinishedEvent(tezTaskID, vertexName, startTime, finishTime, tezTaskAttemptID, state, diagnostics, counters, 3);
HistoryEventProto proto = converter.convert(event);
assertCommon(proto, HistoryEventType.TASK_FINISHED, finishTime, EntityTypes.TEZ_TASK_ID, null, null, 6);
assertEventData(proto, ATSConstants.STATUS, state.name());
assertEventData(proto, ATSConstants.TIME_TAKEN, String.valueOf(finishTime - startTime));
assertEventData(proto, ATSConstants.SUCCESSFUL_ATTEMPT_ID, tezTaskAttemptID.toString());
assertEventData(proto, ATSConstants.NUM_FAILED_TASKS_ATTEMPTS, "3");
assertEventData(proto, ATSConstants.DIAGNOSTICS, diagnostics);
assertEventData(proto, ATSConstants.COUNTERS, null);
}
use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestUnorderedPartitionedKVWriter method textTest.
public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys, int numLargevalues, int numLargeKvPairs, boolean pipeliningEnabled, boolean isFinalMergeEnabled) throws IOException, InterruptedException {
Partitioner partitioner = new HashPartitioner();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Random random = new Random();
Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1, HashPartitioner.class);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, pipeliningEnabled);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, isFinalMergeEnabled);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numRecordsWritten = 0;
Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
for (int i = 0; i < numPartitions; i++) {
expectedValues.put(i, LinkedListMultimap.<String, String>create());
}
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numPartitions, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
BitSet partitionsWithData = new BitSet(numPartitions);
Text keyText = new Text();
Text valText = new Text();
for (int i = 0; i < numRegularRecords; i++) {
String key = createRandomString(Math.abs(random.nextInt(10)));
String val = createRandomString(Math.abs(random.nextInt(20)));
keyText.set(key);
valText.set(val);
int partition = partitioner.getPartition(keyText, valText, numPartitions);
partitionsWithData.set(partition);
expectedValues.get(partition).put(key, val);
kvWriter.write(keyText, valText);
numRecordsWritten++;
}
// Write Large key records
for (int i = 0; i < numLargeKeys; i++) {
String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
String val = createRandomString(Math.abs(random.nextInt(20)));
keyText.set(key);
valText.set(val);
int partition = partitioner.getPartition(keyText, valText, numPartitions);
partitionsWithData.set(partition);
expectedValues.get(partition).put(key, val);
kvWriter.write(keyText, valText);
numRecordsWritten++;
}
if (pipeliningEnabled) {
verify(outputContext, times(numLargeKeys)).sendEvents(anyListOf(Event.class));
}
// Write Large val records
for (int i = 0; i < numLargevalues; i++) {
String key = createRandomString(Math.abs(random.nextInt(10)));
String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
keyText.set(key);
valText.set(val);
int partition = partitioner.getPartition(keyText, valText, numPartitions);
partitionsWithData.set(partition);
expectedValues.get(partition).put(key, val);
kvWriter.write(keyText, valText);
numRecordsWritten++;
}
if (pipeliningEnabled) {
verify(outputContext, times(numLargevalues + numLargeKeys)).sendEvents(anyListOf(Event.class));
}
// Write records where key + val are large (but both can fit in the buffer individually)
for (int i = 0; i < numLargeKvPairs; i++) {
String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
keyText.set(key);
valText.set(val);
int partition = partitioner.getPartition(keyText, valText, numPartitions);
partitionsWithData.set(partition);
expectedValues.get(partition).put(key, val);
kvWriter.write(keyText, valText);
numRecordsWritten++;
}
if (pipeliningEnabled) {
verify(outputContext, times(numLargevalues + numLargeKeys + numLargeKvPairs)).sendEvents(anyListOf(Event.class));
}
List<Event> events = kvWriter.close();
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
if (!pipeliningEnabled) {
VertexManagerEvent vmEvent = null;
for (Event event : events) {
if (event instanceof VertexManagerEvent) {
assertNull(vmEvent);
vmEvent = (VertexManagerEvent) event;
}
}
VertexManagerEventPayloadProto vmEventPayload = VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(vmEvent.getUserPayload().asReadOnlyBuffer()));
assertEquals(numRecordsWritten, vmEventPayload.getNumRecord());
}
TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue());
if (pipeliningEnabled || !isFinalMergeEnabled) {
// verify spill data files and index file exist
for (int i = 0; i < kvWriter.numSpills.get(); i++) {
assertTrue(localFs.exists(kvWriter.outputFileHandler.getSpillFileForWrite(i, 0)));
assertTrue(localFs.exists(kvWriter.outputFileHandler.getSpillIndexFileForWrite(i, 0)));
}
return;
}
// Validate the events
assertEquals(2, events.size());
assertTrue(events.get(0) instanceof VertexManagerEvent);
VertexManagerEvent vme = (VertexManagerEvent) events.get(0);
verifyPartitionStats(vme, partitionsWithData);
assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numPartitions, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
assertFalse(eventProto.hasData());
BitSet emptyPartitionBits = null;
if (partitionsWithData.cardinality() != numPartitions) {
assertTrue(eventProto.hasEmptyPartitions());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality());
} else {
assertFalse(eventProto.hasEmptyPartitions());
emptyPartitionBits = new BitSet(numPartitions);
}
assertEquals(HOST_STRING, eventProto.getHost());
assertEquals(SHUFFLE_PORT, eventProto.getPort());
assertEquals(uniqueId, eventProto.getPathComponent());
// Verify the data
// Verify the actual data
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
Path outputFilePath = kvWriter.finalOutPath;
Path spillFilePath = kvWriter.finalIndexPath;
if (numRecordsWritten > 0) {
assertTrue(localFs.exists(outputFilePath));
assertTrue(localFs.exists(spillFilePath));
checkPermissions(outputFilePath, spillFilePath);
} else {
return;
}
// Special case for 0 records.
TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
DataInputBuffer keyBuffer = new DataInputBuffer();
DataInputBuffer valBuffer = new DataInputBuffer();
Text keyDeser = new Text();
Text valDeser = new Text();
for (int i = 0; i < numPartitions; i++) {
if (emptyPartitionBits.get(i)) {
continue;
}
TezIndexRecord indexRecord = spillRecord.getIndex(i);
FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
inStream.seek(indexRecord.getStartOffset());
IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1);
while (reader.nextRawKey(keyBuffer)) {
reader.nextRawValue(valBuffer);
keyDeser.readFields(keyBuffer);
valDeser.readFields(valBuffer);
int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
}
inStream.close();
}
for (int i = 0; i < numPartitions; i++) {
assertEquals(0, expectedValues.get(i).size());
expectedValues.remove(i);
}
assertEquals(0, expectedValues.size());
}
use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTest.
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress, int maxSingleBufferSizeBytes, int bufferMergePercent, int availableMemory, boolean dataViaEventEnabled) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, maxSingleBufferSizeBytes);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT, bufferMergePercent);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED, dataViaEventEnabled);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
int numRecordsWritten = 0;
Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
for (int i = 0; i < numOutputs; i++) {
expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
}
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
BitSet partitionsWithData = new BitSet(numPartitions);
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
expectedValues.get(partition).put(intWritable.get(), longWritable.get());
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
List<Event> events = kvWriter.close();
if (numPartitions == 1) {
assertEquals(true, kvWriter.skipBuffers);
// VM & DME events
assertEquals(2, events.size());
Event event1 = events.get(1);
assertTrue(event1 instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent dme = (CompositeDataMovementEvent) event1;
ByteBuffer bb = dme.getUserPayload();
ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb));
assertEquals(kvWriter.outputRecordsCounter.getValue(), shufflePayload.getNumRecord());
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer / kvWriter.spillLimit;
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
if (numPartitions > 1) {
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
}
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if ((!shouldCompress) && (!dataViaEventEnabled)) {
assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
if (!dataViaEventEnabled) {
assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
}
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
if (numExpectedSpills == 0) {
assertEquals(0, additionalSpillBytesWritten);
assertEquals(0, additionalSpillBytesRead);
} else {
assertTrue(additionalSpillBytesWritten > 0);
if (!dataViaEventEnabled) {
assertTrue(additionalSpillBytesRead > 0);
if (!shouldCompress) {
assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
}
} else {
if (kvWriter.writer.getCompressedLength() > TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE_DEFAULT) {
assertTrue(additionalSpillBytesWritten > 0);
}
}
}
if (!dataViaEventEnabled) {
assertEquals(additionalSpillBytesWritten, additionalSpillBytesRead);
}
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(numExpectedSpills >= numAdditionalSpillsCounter.getValue());
BitSet emptyPartitionBits = null;
// Verify the events returned
assertEquals(2, events.size());
assertTrue(events.get(0) instanceof VertexManagerEvent);
VertexManagerEvent vme = (VertexManagerEvent) events.get(0);
verifyPartitionStats(vme, partitionsWithData);
assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
if (skippedPartitions == null && numRecordsWritten > 0) {
assertFalse(eventProto.hasEmptyPartitions());
emptyPartitionBits = new BitSet(numPartitions);
} else {
assertTrue(eventProto.hasEmptyPartitions());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
if (numRecordsWritten == 0) {
assertEquals(numPartitions, emptyPartitionBits.cardinality());
} else {
for (Integer e : skippedPartitions) {
assertTrue(emptyPartitionBits.get(e));
}
assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
}
}
if (emptyPartitionBits.cardinality() != numPartitions) {
assertEquals(HOST_STRING, eventProto.getHost());
assertEquals(SHUFFLE_PORT, eventProto.getPort());
assertEquals(uniqueId, eventProto.getPathComponent());
} else {
assertFalse(eventProto.hasHost());
assertFalse(eventProto.hasPort());
assertFalse(eventProto.hasPathComponent());
}
// Verify the actual data
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
Path outputFilePath = kvWriter.finalOutPath;
Path spillFilePath = kvWriter.finalIndexPath;
if (numRecordsWritten <= 0) {
return;
}
boolean isInMem = eventProto.getData().hasData();
assertTrue(localFs.exists(outputFilePath));
assertEquals("Incorrect output permissions (user)", FsAction.READ_WRITE, localFs.getFileStatus(outputFilePath).getPermission().getUserAction());
assertEquals("Incorrect output permissions (group)", FsAction.READ, localFs.getFileStatus(outputFilePath).getPermission().getGroupAction());
if (!isInMem) {
assertTrue(localFs.exists(spillFilePath));
assertEquals("Incorrect index permissions (user)", FsAction.READ_WRITE, localFs.getFileStatus(spillFilePath).getPermission().getUserAction());
assertEquals("Incorrect index permissions (group)", FsAction.READ, localFs.getFileStatus(spillFilePath).getPermission().getGroupAction());
// verify no intermediate spill files have been left around
synchronized (kvWriter.spillInfoList) {
for (SpillInfo spill : kvWriter.spillInfoList) {
assertFalse("lingering intermediate spill file " + spill.outPath, localFs.exists(spill.outPath));
}
}
}
// Special case for 0 records.
DataInputBuffer keyBuffer = new DataInputBuffer();
DataInputBuffer valBuffer = new DataInputBuffer();
IntWritable keyDeser = new IntWritable();
LongWritable valDeser = new LongWritable();
for (int i = 0; i < numOutputs; i++) {
IFile.Reader reader = null;
InputStream inStream;
if (isInMem) {
// Read from in memory payload
int dataLoadSize = eventProto.getData().getData().size();
inStream = new ByteArrayInputStream(eventProto.getData().getData().toByteArray());
reader = new IFile.Reader(inStream, dataLoadSize, codec, null, null, false, 0, -1);
} else {
TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
TezIndexRecord indexRecord = spillRecord.getIndex(i);
if (skippedPartitions != null && skippedPartitions.contains(i)) {
assertFalse("The Index Record for partition " + i + " should not have any data", indexRecord.hasData());
continue;
}
FSDataInputStream tmpStream = FileSystem.getLocal(conf).open(outputFilePath);
tmpStream.seek(indexRecord.getStartOffset());
inStream = tmpStream;
reader = new IFile.Reader(tmpStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1);
}
while (reader.nextRawKey(keyBuffer)) {
reader.nextRawValue(valBuffer);
keyDeser.readFields(keyBuffer);
valDeser.readFields(valBuffer);
int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
}
inStream.close();
}
for (int i = 0; i < numOutputs; i++) {
assertEquals(0, expectedValues.get(i).size());
expectedValues.remove(i);
}
assertEquals(0, expectedValues.size());
verify(outputContext, atLeast(1)).notifyProgress();
}
use of org.apache.tez.common.counters.TezCounters in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTestWithFinalMergeDisabled.
@SuppressWarnings("unchecked")
private void baseTestWithFinalMergeDisabled(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, false);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
long availableMemory = 2048;
int numRecordsWritten = 0;
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
BitSet partitionsWithData = new BitSet(numPartitions);
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
List<Event> lastEvents = kvWriter.close();
if (numPartitions == 1) {
assertEquals(true, kvWriter.skipBuffers);
}
// max events sent are spills + one VM event. If there are no spills, atleast empty
// partitions would be sent out finally.
int spills = Math.max(1, kvWriter.numSpills.get());
// spills + VMEvent
assertEquals((spills + 1), lastEvents.size());
verify(outputContext, atMost(0)).sendEvents(eventCaptor.capture());
for (int i = 0; i < lastEvents.size(); i++) {
Event event = lastEvents.get(i);
if (event instanceof VertexManagerEvent) {
// to stats.
if (numRecordsWritten > 0) {
verifyPartitionStats(((VertexManagerEvent) event), partitionsWithData);
}
}
}
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
if (outputRecordsCounter.getValue() > 0) {
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
} else {
assertEquals(0, outputBytesWithOverheadCounter.getValue());
}
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if (!shouldCompress) {
assertTrue("fileOutputBytes=" + fileOutputBytes + ", outputRecordBytes=" + outputRecordBytesCounter.getValue(), fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
// No additional spill bytes written when final merge is disabled.
assertEquals(additionalSpillBytesWritten, 0);
// No additional spills when final merge is disabled.
assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
// No additional spills when final merge is disabled.
assertEquals(numAdditionalSpillsCounter.getValue(), 0);
assertTrue(lastEvents.size() > 0);
// Get the last event
int index = lastEvents.size() - 1;
assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
if (outputRecordsCounter.getValue() > 0) {
// Ensure that this is the last event
assertTrue(eventProto.getLastEvent());
}
// Verify if all path components have spillIds when final merge is disabled
Pattern mergePathComponentPattern = Pattern.compile("(.*)(_\\d+)");
for (Event event : lastEvents) {
if (!(event instanceof CompositeDataMovementEvent)) {
continue;
}
cdme = (CompositeDataMovementEvent) event;
eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
assertEquals(false, eventProto.getPipelined());
if (eventProto.hasPathComponent()) {
// for final merge disabled cases, it should have _spillId
Matcher matcher = mergePathComponentPattern.matcher(eventProto.getPathComponent());
assertTrue("spill id should be present in path component " + eventProto.getPathComponent(), matcher.matches());
assertEquals(2, matcher.groupCount());
assertEquals(uniqueId, matcher.group(1));
assertTrue("spill id should be present in path component", matcher.group(2) != null);
Path outputPath = new Path(outputContext.getWorkDirs()[0], "output/" + eventProto.getPathComponent() + "/" + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING);
Path indexPath = outputPath.suffix(Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
checkPermissions(outputPath, indexPath);
} else {
assertEquals(0, eventProto.getSpillId());
if (outputRecordsCounter.getValue() > 0) {
assertEquals(true, eventProto.getLastEvent());
} else {
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
BitSet emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
assertEquals(numPartitions, emptyPartitionBits.cardinality());
}
}
}
verify(outputContext, atLeast(1)).notifyProgress();
// Verify if all spill files are available.
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
if (numRecordsWritten > 0) {
int numSpills = kvWriter.numSpills.get();
for (int i = 0; i < numSpills; i++) {
assertTrue(localFs.exists(taskOutput.getSpillFileForWrite(i, 10)));
assertTrue(localFs.exists(taskOutput.getSpillIndexFileForWrite(i, 10)));
}
} else {
return;
}
}
Aggregations