use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto in project tez by apache.
the class ShuffleInputEventHandlerOrderedGrouped method handleEvent.
private void handleEvent(Event event) throws IOException {
if (event instanceof DataMovementEvent) {
numDmeEvents.incrementAndGet();
DataMovementEvent dmEvent = (DataMovementEvent) event;
DataMovementEventPayloadProto shufflePayload;
try {
shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
} catch (InvalidProtocolBufferException e) {
throw new TezUncheckedException("Unable to parse DataMovementEvent payload", e);
}
BitSet emptyPartitionsBitSet = null;
if (shufflePayload.hasEmptyPartitions()) {
try {
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(shufflePayload.getEmptyPartitions(), inflater);
emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
} catch (IOException e) {
throw new TezUncheckedException("Unable to set the empty partition to succeeded", e);
}
}
processDataMovementEvent(dmEvent, shufflePayload, emptyPartitionsBitSet);
scheduler.updateEventReceivedTime();
} else if (event instanceof CompositeRoutedDataMovementEvent) {
CompositeRoutedDataMovementEvent crdme = (CompositeRoutedDataMovementEvent) event;
DataMovementEventPayloadProto shufflePayload;
try {
shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(crdme.getUserPayload()));
} catch (InvalidProtocolBufferException e) {
throw new TezUncheckedException("Unable to parse DataMovementEvent payload", e);
}
BitSet emptyPartitionsBitSet = null;
if (shufflePayload.hasEmptyPartitions()) {
try {
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(shufflePayload.getEmptyPartitions(), inflater);
emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
} catch (IOException e) {
throw new TezUncheckedException("Unable to set the empty partition to succeeded", e);
}
}
if (compositeFetch) {
numDmeEvents.addAndGet(crdme.getCount());
processCompositeRoutedDataMovementEvent(crdme, shufflePayload, emptyPartitionsBitSet);
} else {
for (int offset = 0; offset < crdme.getCount(); offset++) {
numDmeEvents.incrementAndGet();
processDataMovementEvent(crdme.expand(offset), shufflePayload, emptyPartitionsBitSet);
}
}
scheduler.updateEventReceivedTime();
} else if (event instanceof InputFailedEvent) {
numObsoletionEvents.incrementAndGet();
processTaskFailedEvent((InputFailedEvent) event);
}
if (numDmeEvents.get() + numObsoletionEvents.get() > nextToLogEventCount.get()) {
logProgress(false);
// Log every 50 events seen.
nextToLogEventCount.addAndGet(50);
}
}
use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto in project tez by apache.
the class ShuffleUtils method generateEventsForNonStartedOutput.
/**
* Generate events for outputs which have not been started.
* @param eventList
* @param numPhysicalOutputs
* @param context
* @param generateVmEvent whether to generate a vm event or not
* @param isCompositeEvent whether to generate a CompositeDataMovementEvent or a DataMovementEvent
* @param deflater
* @throws IOException
*/
public static void generateEventsForNonStartedOutput(List<Event> eventList, int numPhysicalOutputs, OutputContext context, boolean generateVmEvent, boolean isCompositeEvent, Deflater deflater) throws IOException {
DataMovementEventPayloadProto.Builder payloadBuilder = DataMovementEventPayloadProto.newBuilder();
// Construct the VertexManager event if required.
if (generateVmEvent) {
ShuffleUserPayloads.VertexManagerEventPayloadProto.Builder vmBuilder = ShuffleUserPayloads.VertexManagerEventPayloadProto.newBuilder();
vmBuilder.setOutputSize(0);
VertexManagerEvent vmEvent = VertexManagerEvent.create(context.getDestinationVertexName(), vmBuilder.build().toByteString().asReadOnlyByteBuffer());
eventList.add(vmEvent);
}
// Construct the DataMovementEvent
// Always set empty partition information since no files were generated.
LOG.info("Setting all {} partitions as empty for non-started output", numPhysicalOutputs);
BitSet emptyPartitionDetails = new BitSet(numPhysicalOutputs);
emptyPartitionDetails.set(0, numPhysicalOutputs, true);
ByteString emptyPartitionsBytesString = TezCommonUtils.compressByteArrayToByteString(TezUtilsInternal.toByteArray(emptyPartitionDetails), deflater);
payloadBuilder.setEmptyPartitions(emptyPartitionsBytesString);
payloadBuilder.setRunDuration(0);
DataMovementEventPayloadProto payloadProto = payloadBuilder.build();
ByteBuffer dmePayload = payloadProto.toByteString().asReadOnlyByteBuffer();
if (isCompositeEvent) {
CompositeDataMovementEvent cdme = CompositeDataMovementEvent.create(0, numPhysicalOutputs, dmePayload);
eventList.add(cdme);
} else {
DataMovementEvent dme = DataMovementEvent.create(0, dmePayload);
eventList.add(dme);
}
}
use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto in project tez by apache.
the class ShuffleInputEventHandlerImpl method handleEvent.
private void handleEvent(Event event) throws IOException {
if (event instanceof DataMovementEvent) {
numDmeEvents.incrementAndGet();
DataMovementEvent dmEvent = (DataMovementEvent) event;
DataMovementEventPayloadProto shufflePayload;
try {
shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
} catch (InvalidProtocolBufferException e) {
throw new TezUncheckedException("Unable to parse DataMovementEvent payload", e);
}
BitSet emptyPartitionsBitSet = null;
if (shufflePayload.hasEmptyPartitions()) {
try {
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(shufflePayload.getEmptyPartitions(), inflater);
emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
} catch (IOException e) {
throw new TezUncheckedException("Unable to set the empty partition to succeeded", e);
}
}
processDataMovementEvent(dmEvent, shufflePayload, emptyPartitionsBitSet);
shuffleManager.updateEventReceivedTime();
} else if (event instanceof CompositeRoutedDataMovementEvent) {
CompositeRoutedDataMovementEvent crdme = (CompositeRoutedDataMovementEvent) event;
DataMovementEventPayloadProto shufflePayload;
try {
shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(crdme.getUserPayload()));
} catch (InvalidProtocolBufferException e) {
throw new TezUncheckedException("Unable to parse DataMovementEvent payload", e);
}
BitSet emptyPartitionsBitSet = null;
if (shufflePayload.hasEmptyPartitions()) {
try {
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(shufflePayload.getEmptyPartitions(), inflater);
emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
} catch (IOException e) {
throw new TezUncheckedException("Unable to set the empty partition to succeeded", e);
}
}
if (compositeFetch) {
numDmeEvents.addAndGet(crdme.getCount());
processCompositeRoutedDataMovementEvent(crdme, shufflePayload, emptyPartitionsBitSet);
} else {
for (int offset = 0; offset < crdme.getCount(); offset++) {
numDmeEvents.incrementAndGet();
processDataMovementEvent(crdme.expand(offset), shufflePayload, emptyPartitionsBitSet);
}
}
shuffleManager.updateEventReceivedTime();
} else if (event instanceof InputFailedEvent) {
numObsoletionEvents.incrementAndGet();
processInputFailedEvent((InputFailedEvent) event);
} else {
throw new TezUncheckedException("Unexpected event type: " + event.getClass().getName());
}
if (numDmeEvents.get() + numObsoletionEvents.get() > nextToLogEventCount.get()) {
logProgress(false);
// Log every 50 events seen.
nextToLogEventCount.addAndGet(50);
}
}
use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTestWithPipelinedTransfer.
@SuppressWarnings("unchecked")
private void baseTestWithPipelinedTransfer(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
long availableMemory = 2048;
int numRecordsWritten = 0;
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
BitSet partitionsWithData = new BitSet(numPartitions);
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer;
ArgumentCaptor<List> eventCaptor = ArgumentCaptor.forClass(List.class);
List<Event> lastEvents = kvWriter.close();
if (numPartitions == 1) {
assertEquals(false, kvWriter.skipBuffers);
}
// no events are sent to kvWriter upon close with pipelining
assertTrue(lastEvents.size() == 0);
verify(outputContext, atLeast(numExpectedSpills)).sendEvents(eventCaptor.capture());
int numOfCapturedEvents = eventCaptor.getAllValues().size();
lastEvents = eventCaptor.getAllValues().get(numOfCapturedEvents - 1);
VertexManagerEvent VMEvent = (VertexManagerEvent) lastEvents.get(0);
for (int i = 0; i < numOfCapturedEvents; i++) {
List<Event> events = eventCaptor.getAllValues().get(i);
if (i < numOfCapturedEvents - 1) {
assertTrue(events.size() == 1);
assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
} else {
assertTrue(events.size() == 2);
assertTrue(events.get(0) instanceof VertexManagerEvent);
assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
}
}
verifyPartitionStats(VMEvent, partitionsWithData);
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if (!shouldCompress) {
assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(recordsPerBuffer * numExpectedSpills >= spilledRecordsCounter.getValue());
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
// No additional spill bytes written when final merge is disabled.
assertEquals(additionalSpillBytesWritten, 0);
// No additional spills when final merge is disabled.
assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
// No additional spills when final merge is disabled.
assertEquals(numAdditionalSpillsCounter.getValue(), 0);
assertTrue(lastEvents.size() > 0);
// Get the last event
int index = lastEvents.size() - 1;
assertTrue(lastEvents.get(index) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) lastEvents.get(index);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
// Ensure that this is the last event
assertTrue(eventProto.getLastEvent());
verifyEmptyPartitions(eventProto, numRecordsWritten, numPartitions, skippedPartitions);
verify(outputContext, atLeast(1)).notifyProgress();
// Verify if all spill files are available.
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
if (numRecordsWritten > 0) {
int numSpills = kvWriter.numSpills.get();
for (int i = 0; i < numSpills; i++) {
Path outputFile = taskOutput.getSpillFileForWrite(i, 10);
Path indexFile = taskOutput.getSpillIndexFileForWrite(i, 10);
assertTrue(localFs.exists(outputFile));
assertTrue(localFs.exists(indexFile));
assertEquals("Incorrect output permissions", (short) 0640, localFs.getFileStatus(outputFile).getPermission().toShort());
assertEquals("Incorrect index permissions", (short) 0640, localFs.getFileStatus(indexFile).getPermission().toShort());
}
} else {
return;
}
}
use of org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto in project tez by apache.
the class TestOnFileUnorderedKVOutput method testGeneratedDataMovementEvent.
@Test(timeout = 5000)
public void testGeneratedDataMovementEvent() throws Exception {
Configuration conf = new Configuration();
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
OutputContext outputContext = createOutputContext(conf, sharedExecutor);
UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
List<Event> events = null;
events = kvOutput.initialize();
kvOutput.start();
assertTrue(events != null && events.size() == 0);
KeyValueWriter kvWriter = kvOutput.getWriter();
List<KVPair> data = KVDataGen.generateTestData(true, 0);
for (KVPair kvp : data) {
kvWriter.write(kvp.getKey(), kvp.getvalue());
}
events = kvOutput.close();
assertEquals(45, task.getTaskStatistics().getIOStatistics().values().iterator().next().getDataSize());
assertEquals(5, task.getTaskStatistics().getIOStatistics().values().iterator().next().getItemsProcessed());
assertTrue(events != null && events.size() == 2);
CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
assertFalse(shufflePayload.hasEmptyPartitions());
assertEquals(outputContext.getUniqueIdentifier(), shufflePayload.getPathComponent());
assertEquals(shufflePort, shufflePayload.getPort());
assertEquals("localhost", shufflePayload.getHost());
sharedExecutor.shutdownNow();
}
Aggregations