use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleInputEventHandlerOrderedGrouped method testPiplinedShuffleEvents.
@Test(timeout = 10000)
public void testPiplinedShuffleEvents() throws IOException, InterruptedException {
// test with 2 events per input (2 inputs)
int attemptNum = 0;
int inputIdx = 0;
Event dme1 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 0);
CompositeInputAttemptIdentifier id1 = new CompositeInputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0, 1);
handler.handleEvents(Collections.singletonList(dme1));
int partitionId = attemptNum;
verify(scheduler).addKnownMapOutput(eq(HOST), eq(PORT), eq(partitionId), eq(id1));
verify(scheduler).pipelinedShuffleInfoEventsMap.containsKey(id1.getInputIdentifier());
// Send final_update event.
Event dme2 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, false, 1);
CompositeInputAttemptIdentifier id2 = new CompositeInputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 1, 1);
handler.handleEvents(Collections.singletonList(dme2));
partitionId = attemptNum;
assertTrue(scheduler.pipelinedShuffleInfoEventsMap.containsKey(id2.getInputIdentifier()));
verify(scheduler).addKnownMapOutput(eq(HOST), eq(PORT), eq(partitionId), eq(id2));
assertTrue(scheduler.pipelinedShuffleInfoEventsMap.containsKey(id2.getInputIdentifier()));
MapHost host = scheduler.getHost();
assertTrue(host != null);
List<InputAttemptIdentifier> list = scheduler.getMapsForHost(host);
assertTrue(!list.isEmpty());
// Let the final_update event pass
MapOutput output = MapOutput.createMemoryMapOutput(id2, mergeManager, 1000, true);
scheduler.copySucceeded(id2, host, 1000, 10000, 10000, output, false);
// we haven't downloaded id1 yet
assertTrue(!scheduler.isDone());
output = MapOutput.createMemoryMapOutput(id1, mergeManager, 1000, true);
scheduler.copySucceeded(id1, host, 1000, 10000, 10000, output, false);
// we haven't downloaded another source yet
assertTrue(!scheduler.isDone());
// Send events for source 2
attemptNum = 0;
inputIdx = 1;
Event dme3 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 1);
InputAttemptIdentifier id3 = new InputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0);
handler.handleEvents(Collections.singletonList(dme3));
// Send final_update event (empty partition directly invoking copySucceeded).
InputAttemptIdentifier id4 = new InputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 1);
assertTrue(!scheduler.isInputFinished(id4.getInputIdentifier()));
scheduler.copySucceeded(id4, null, 0, 0, 0, null, false);
// we haven't downloaded another id yet
assertTrue(!scheduler.isDone());
// Let the incremental event pass
output = MapOutput.createMemoryMapOutput(id3, mergeManager, 1000, true);
scheduler.copySucceeded(id3, host, 1000, 10000, 10000, output, false);
assertTrue(scheduler.isDone());
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class TestShuffleInputEventHandlerOrderedGrouped method testPipelinedShuffle_WithObsoleteEvents.
@Test(timeout = 5000)
public void testPipelinedShuffle_WithObsoleteEvents() throws IOException, InterruptedException {
// Process attempt #1 first
int attemptNum = 1;
int inputIdx = 1;
Event dme1 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 0, attemptNum);
handler.handleEvents(Collections.singletonList(dme1));
CompositeInputAttemptIdentifier id1 = new CompositeInputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0, 1);
verify(scheduler, times(1)).addKnownMapOutput(eq(HOST), eq(PORT), eq(1), eq(id1));
assertTrue("Shuffle info events should not be empty for pipelined shuffle", !scheduler.pipelinedShuffleInfoEventsMap.isEmpty());
int valuesInMapLocations = scheduler.mapLocations.values().size();
assertTrue("Maplocations should have values. current size: " + valuesInMapLocations, valuesInMapLocations > 0);
// start scheduling for download. Sets up scheduledForDownload in eventInfo.
scheduler.getMapsForHost(scheduler.mapLocations.values().iterator().next());
// send input failed event.
List<Event> events = new LinkedList<Event>();
int targetIdx = 1;
InputFailedEvent failedEvent = InputFailedEvent.create(targetIdx, 0);
events.add(failedEvent);
handler.handleEvents(events);
// task should issue kill request, as inputs are scheduled for download already.
verify(scheduler, times(1)).killSelf(any(IOException.class), any(String.class));
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class ShuffleScheduler method inputShouldBeConsumed.
private synchronized boolean inputShouldBeConsumed(InputAttemptIdentifier id) {
boolean isInputFinished = false;
if (id instanceof CompositeInputAttemptIdentifier) {
CompositeInputAttemptIdentifier cid = (CompositeInputAttemptIdentifier) id;
isInputFinished = isInputFinished(cid.getInputIdentifier(), cid.getInputIdentifier() + cid.getInputIdentifierCount());
} else {
isInputFinished = isInputFinished(id.getInputIdentifier());
}
return !obsoleteInputs.contains(id) && !isInputFinished;
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class ShuffleManager method constructFetcherForHost.
@VisibleForTesting
Fetcher constructFetcherForHost(InputHost inputHost, Configuration conf) {
Path lockDisk = null;
if (sharedFetchEnabled) {
// pick a single lock disk from the edge name's hashcode + host hashcode
final int h = Math.abs(Objects.hashCode(this.srcNameTrimmed, inputHost.getHost()));
lockDisk = new Path(this.localDisks[h % this.localDisks.length], "locks");
}
FetcherBuilder fetcherBuilder = new FetcherBuilder(ShuffleManager.this, httpConnectionParams, inputManager, inputContext.getApplicationId(), inputContext.getDagIdentifier(), jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator, lockDisk, localDiskFetchEnabled, sharedFetchEnabled, localhostName, shufflePort, asyncHttp, verifyDiskChecksum, compositeFetch);
if (codec != null) {
fetcherBuilder.setCompressionParameters(codec);
}
fetcherBuilder.setIFileParams(ifileReadAhead, ifileReadAheadLength);
// Remove obsolete inputs from the list being given to the fetcher. Also
// remove from the obsolete list.
PartitionToInputs pendingInputsOfOnePartitionRange = inputHost.clearAndGetOnePartitionRange();
int includedMaps = 0;
for (Iterator<InputAttemptIdentifier> inputIter = pendingInputsOfOnePartitionRange.getInputs().iterator(); inputIter.hasNext(); ) {
InputAttemptIdentifier input = inputIter.next();
// For pipelined shuffle.
if (!validateInputAttemptForPipelinedShuffle(input)) {
continue;
}
// Avoid adding attempts which have already completed.
boolean alreadyCompleted;
if (input instanceof CompositeInputAttemptIdentifier) {
CompositeInputAttemptIdentifier compositeInput = (CompositeInputAttemptIdentifier) input;
int nextClearBit = completedInputSet.nextClearBit(compositeInput.getInputIdentifier());
int maxClearBit = compositeInput.getInputIdentifier() + compositeInput.getInputIdentifierCount();
alreadyCompleted = nextClearBit > maxClearBit;
} else {
alreadyCompleted = completedInputSet.get(input.getInputIdentifier());
}
// Avoid adding attempts which have already completed or have been marked as OBSOLETE
if (alreadyCompleted || obsoletedInputs.contains(input)) {
inputIter.remove();
continue;
}
// Check if max threshold is met
if (includedMaps >= maxTaskOutputAtOnce) {
inputIter.remove();
// add to inputHost
inputHost.addKnownInput(pendingInputsOfOnePartitionRange.getPartition(), pendingInputsOfOnePartitionRange.getPartitionCount(), input);
} else {
includedMaps++;
}
}
if (inputHost.getNumPendingPartitions() > 0) {
// add it to queue
pendingHosts.add(inputHost);
}
for (InputAttemptIdentifier input : pendingInputsOfOnePartitionRange.getInputs()) {
ShuffleEventInfo eventInfo = shuffleInfoEventsMap.get(input.getInputIdentifier());
if (eventInfo != null) {
eventInfo.scheduledForDownload = true;
}
}
fetcherBuilder.assignWork(inputHost.getHost(), inputHost.getPort(), pendingInputsOfOnePartitionRange.getPartition(), pendingInputsOfOnePartitionRange.getPartitionCount(), pendingInputsOfOnePartitionRange.getInputs());
if (LOG.isDebugEnabled()) {
LOG.debug("Created Fetcher for host: " + inputHost.getHost() + ", info: " + inputHost.getAdditionalInfo() + ", with inputs: " + pendingInputsOfOnePartitionRange);
}
return fetcherBuilder.build();
}
use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.
the class ShuffleInputEventHandlerImpl method processCompositeRoutedDataMovementEvent.
private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovementEvent crdme, DataMovementEventPayloadProto shufflePayload, BitSet emptyPartitionsBitSet) throws IOException {
int partitionId = crdme.getSourceIndex();
if (LOG.isDebugEnabled()) {
LOG.debug("DME srcIdx: " + partitionId + ", targetIndex: " + crdme.getTargetIndex() + ", count:" + crdme.getCount() + ", attemptNum: " + crdme.getVersion() + ", payload: " + ShuffleUtils.stringify(shufflePayload));
}
if (shufflePayload.hasEmptyPartitions()) {
CompositeInputAttemptIdentifier compositeInputAttemptIdentifier = constructInputAttemptIdentifier(crdme.getTargetIndex(), crdme.getCount(), crdme.getVersion(), shufflePayload, false);
boolean allPartitionsEmpty = true;
for (int i = 0; i < crdme.getCount(); i++) {
int srcPartitionId = partitionId + i;
allPartitionsEmpty &= emptyPartitionsBitSet.get(srcPartitionId);
if (emptyPartitionsBitSet.get(srcPartitionId)) {
InputAttemptIdentifier srcAttemptIdentifier = compositeInputAttemptIdentifier.expand(i);
if (LOG.isDebugEnabled()) {
LOG.debug("Source partition: " + srcPartitionId + " did not generate any data. SrcAttempt: [" + srcAttemptIdentifier + "]. Not fetching.");
}
numDmeEventsNoData.getAndIncrement();
shuffleManager.addCompletedInputWithNoData(srcAttemptIdentifier);
}
}
if (allPartitionsEmpty) {
return;
}
}
CompositeInputAttemptIdentifier srcAttemptIdentifier = constructInputAttemptIdentifier(crdme.getTargetIndex(), crdme.getCount(), crdme.getVersion(), shufflePayload, (useSharedInputs && partitionId == 0));
shuffleManager.addKnownInput(shufflePayload.getHost(), shufflePayload.getPort(), srcAttemptIdentifier, partitionId);
}
Aggregations