Search in sources :

Example 16 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleInputEventHandlerOrderedGrouped method testPiplinedShuffleEvents.

@Test(timeout = 10000)
public void testPiplinedShuffleEvents() throws IOException, InterruptedException {
    // test with 2 events per input (2 inputs)
    int attemptNum = 0;
    int inputIdx = 0;
    Event dme1 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 0);
    CompositeInputAttemptIdentifier id1 = new CompositeInputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0, 1);
    handler.handleEvents(Collections.singletonList(dme1));
    int partitionId = attemptNum;
    verify(scheduler).addKnownMapOutput(eq(HOST), eq(PORT), eq(partitionId), eq(id1));
    verify(scheduler).pipelinedShuffleInfoEventsMap.containsKey(id1.getInputIdentifier());
    // Send final_update event.
    Event dme2 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, false, 1);
    CompositeInputAttemptIdentifier id2 = new CompositeInputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 1, 1);
    handler.handleEvents(Collections.singletonList(dme2));
    partitionId = attemptNum;
    assertTrue(scheduler.pipelinedShuffleInfoEventsMap.containsKey(id2.getInputIdentifier()));
    verify(scheduler).addKnownMapOutput(eq(HOST), eq(PORT), eq(partitionId), eq(id2));
    assertTrue(scheduler.pipelinedShuffleInfoEventsMap.containsKey(id2.getInputIdentifier()));
    MapHost host = scheduler.getHost();
    assertTrue(host != null);
    List<InputAttemptIdentifier> list = scheduler.getMapsForHost(host);
    assertTrue(!list.isEmpty());
    // Let the final_update event pass
    MapOutput output = MapOutput.createMemoryMapOutput(id2, mergeManager, 1000, true);
    scheduler.copySucceeded(id2, host, 1000, 10000, 10000, output, false);
    // we haven't downloaded id1 yet
    assertTrue(!scheduler.isDone());
    output = MapOutput.createMemoryMapOutput(id1, mergeManager, 1000, true);
    scheduler.copySucceeded(id1, host, 1000, 10000, 10000, output, false);
    // we haven't downloaded another source yet
    assertTrue(!scheduler.isDone());
    // Send events for source 2
    attemptNum = 0;
    inputIdx = 1;
    Event dme3 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 1);
    InputAttemptIdentifier id3 = new InputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0);
    handler.handleEvents(Collections.singletonList(dme3));
    // Send final_update event (empty partition directly invoking copySucceeded).
    InputAttemptIdentifier id4 = new InputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 1);
    assertTrue(!scheduler.isInputFinished(id4.getInputIdentifier()));
    scheduler.copySucceeded(id4, null, 0, 0, 0, null, false);
    // we haven't downloaded another id yet
    assertTrue(!scheduler.isDone());
    // Let the incremental event pass
    output = MapOutput.createMemoryMapOutput(id3, mergeManager, 1000, true);
    scheduler.copySucceeded(id3, host, 1000, 10000, 10000, output, false);
    assertTrue(scheduler.isDone());
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) Event(org.apache.tez.runtime.api.Event) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Test(org.junit.Test)

Example 17 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class TestShuffleInputEventHandlerOrderedGrouped method testPipelinedShuffle_WithObsoleteEvents.

@Test(timeout = 5000)
public void testPipelinedShuffle_WithObsoleteEvents() throws IOException, InterruptedException {
    // Process attempt #1 first
    int attemptNum = 1;
    int inputIdx = 1;
    Event dme1 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 0, attemptNum);
    handler.handleEvents(Collections.singletonList(dme1));
    CompositeInputAttemptIdentifier id1 = new CompositeInputAttemptIdentifier(inputIdx, attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0, 1);
    verify(scheduler, times(1)).addKnownMapOutput(eq(HOST), eq(PORT), eq(1), eq(id1));
    assertTrue("Shuffle info events should not be empty for pipelined shuffle", !scheduler.pipelinedShuffleInfoEventsMap.isEmpty());
    int valuesInMapLocations = scheduler.mapLocations.values().size();
    assertTrue("Maplocations should have values. current size: " + valuesInMapLocations, valuesInMapLocations > 0);
    // start scheduling for download. Sets up scheduledForDownload in eventInfo.
    scheduler.getMapsForHost(scheduler.mapLocations.values().iterator().next());
    // send input failed event.
    List<Event> events = new LinkedList<Event>();
    int targetIdx = 1;
    InputFailedEvent failedEvent = InputFailedEvent.create(targetIdx, 0);
    events.add(failedEvent);
    handler.handleEvents(events);
    // task should issue kill request, as inputs are scheduled for download already.
    verify(scheduler, times(1)).killSelf(any(IOException.class), any(String.class));
}
Also used : InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) Event(org.apache.tez.runtime.api.Event) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) IOException(java.io.IOException) Matchers.anyString(org.mockito.Matchers.anyString) ByteString(com.google.protobuf.ByteString) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 18 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class ShuffleScheduler method inputShouldBeConsumed.

private synchronized boolean inputShouldBeConsumed(InputAttemptIdentifier id) {
    boolean isInputFinished = false;
    if (id instanceof CompositeInputAttemptIdentifier) {
        CompositeInputAttemptIdentifier cid = (CompositeInputAttemptIdentifier) id;
        isInputFinished = isInputFinished(cid.getInputIdentifier(), cid.getInputIdentifier() + cid.getInputIdentifierCount());
    } else {
        isInputFinished = isInputFinished(id.getInputIdentifier());
    }
    return !obsoleteInputs.contains(id) && !isInputFinished;
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)

Example 19 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class ShuffleManager method constructFetcherForHost.

@VisibleForTesting
Fetcher constructFetcherForHost(InputHost inputHost, Configuration conf) {
    Path lockDisk = null;
    if (sharedFetchEnabled) {
        // pick a single lock disk from the edge name's hashcode + host hashcode
        final int h = Math.abs(Objects.hashCode(this.srcNameTrimmed, inputHost.getHost()));
        lockDisk = new Path(this.localDisks[h % this.localDisks.length], "locks");
    }
    FetcherBuilder fetcherBuilder = new FetcherBuilder(ShuffleManager.this, httpConnectionParams, inputManager, inputContext.getApplicationId(), inputContext.getDagIdentifier(), jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator, lockDisk, localDiskFetchEnabled, sharedFetchEnabled, localhostName, shufflePort, asyncHttp, verifyDiskChecksum, compositeFetch);
    if (codec != null) {
        fetcherBuilder.setCompressionParameters(codec);
    }
    fetcherBuilder.setIFileParams(ifileReadAhead, ifileReadAheadLength);
    // Remove obsolete inputs from the list being given to the fetcher. Also
    // remove from the obsolete list.
    PartitionToInputs pendingInputsOfOnePartitionRange = inputHost.clearAndGetOnePartitionRange();
    int includedMaps = 0;
    for (Iterator<InputAttemptIdentifier> inputIter = pendingInputsOfOnePartitionRange.getInputs().iterator(); inputIter.hasNext(); ) {
        InputAttemptIdentifier input = inputIter.next();
        // For pipelined shuffle.
        if (!validateInputAttemptForPipelinedShuffle(input)) {
            continue;
        }
        // Avoid adding attempts which have already completed.
        boolean alreadyCompleted;
        if (input instanceof CompositeInputAttemptIdentifier) {
            CompositeInputAttemptIdentifier compositeInput = (CompositeInputAttemptIdentifier) input;
            int nextClearBit = completedInputSet.nextClearBit(compositeInput.getInputIdentifier());
            int maxClearBit = compositeInput.getInputIdentifier() + compositeInput.getInputIdentifierCount();
            alreadyCompleted = nextClearBit > maxClearBit;
        } else {
            alreadyCompleted = completedInputSet.get(input.getInputIdentifier());
        }
        // Avoid adding attempts which have already completed or have been marked as OBSOLETE
        if (alreadyCompleted || obsoletedInputs.contains(input)) {
            inputIter.remove();
            continue;
        }
        // Check if max threshold is met
        if (includedMaps >= maxTaskOutputAtOnce) {
            inputIter.remove();
            // add to inputHost
            inputHost.addKnownInput(pendingInputsOfOnePartitionRange.getPartition(), pendingInputsOfOnePartitionRange.getPartitionCount(), input);
        } else {
            includedMaps++;
        }
    }
    if (inputHost.getNumPendingPartitions() > 0) {
        // add it to queue
        pendingHosts.add(inputHost);
    }
    for (InputAttemptIdentifier input : pendingInputsOfOnePartitionRange.getInputs()) {
        ShuffleEventInfo eventInfo = shuffleInfoEventsMap.get(input.getInputIdentifier());
        if (eventInfo != null) {
            eventInfo.scheduledForDownload = true;
        }
    }
    fetcherBuilder.assignWork(inputHost.getHost(), inputHost.getPort(), pendingInputsOfOnePartitionRange.getPartition(), pendingInputsOfOnePartitionRange.getPartitionCount(), pendingInputsOfOnePartitionRange.getInputs());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created Fetcher for host: " + inputHost.getHost() + ", info: " + inputHost.getAdditionalInfo() + ", with inputs: " + pendingInputsOfOnePartitionRange);
    }
    return fetcherBuilder.build();
}
Also used : Path(org.apache.hadoop.fs.Path) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) FetcherBuilder(org.apache.tez.runtime.library.common.shuffle.Fetcher.FetcherBuilder) PartitionToInputs(org.apache.tez.runtime.library.common.shuffle.InputHost.PartitionToInputs) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 20 with CompositeInputAttemptIdentifier

use of org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier in project tez by apache.

the class ShuffleInputEventHandlerImpl method processCompositeRoutedDataMovementEvent.

private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovementEvent crdme, DataMovementEventPayloadProto shufflePayload, BitSet emptyPartitionsBitSet) throws IOException {
    int partitionId = crdme.getSourceIndex();
    if (LOG.isDebugEnabled()) {
        LOG.debug("DME srcIdx: " + partitionId + ", targetIndex: " + crdme.getTargetIndex() + ", count:" + crdme.getCount() + ", attemptNum: " + crdme.getVersion() + ", payload: " + ShuffleUtils.stringify(shufflePayload));
    }
    if (shufflePayload.hasEmptyPartitions()) {
        CompositeInputAttemptIdentifier compositeInputAttemptIdentifier = constructInputAttemptIdentifier(crdme.getTargetIndex(), crdme.getCount(), crdme.getVersion(), shufflePayload, false);
        boolean allPartitionsEmpty = true;
        for (int i = 0; i < crdme.getCount(); i++) {
            int srcPartitionId = partitionId + i;
            allPartitionsEmpty &= emptyPartitionsBitSet.get(srcPartitionId);
            if (emptyPartitionsBitSet.get(srcPartitionId)) {
                InputAttemptIdentifier srcAttemptIdentifier = compositeInputAttemptIdentifier.expand(i);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Source partition: " + srcPartitionId + " did not generate any data. SrcAttempt: [" + srcAttemptIdentifier + "]. Not fetching.");
                }
                numDmeEventsNoData.getAndIncrement();
                shuffleManager.addCompletedInputWithNoData(srcAttemptIdentifier);
            }
        }
        if (allPartitionsEmpty) {
            return;
        }
    }
    CompositeInputAttemptIdentifier srcAttemptIdentifier = constructInputAttemptIdentifier(crdme.getTargetIndex(), crdme.getCount(), crdme.getVersion(), shufflePayload, (useSharedInputs && partitionId == 0));
    shuffleManager.addKnownInput(shufflePayload.getHost(), shufflePayload.getPort(), srcAttemptIdentifier, partitionId);
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier)

Aggregations

CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)36 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)24 Test (org.junit.Test)24 InputContext (org.apache.tez.runtime.api.InputContext)12 IOException (java.io.IOException)10 Event (org.apache.tez.runtime.api.Event)10 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)8 Configuration (org.apache.hadoop.conf.Configuration)7 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)7 Matchers.anyString (org.mockito.Matchers.anyString)7 LinkedList (java.util.LinkedList)6 Path (org.apache.hadoop.fs.Path)5 InputFailedEvent (org.apache.tez.runtime.api.events.InputFailedEvent)5 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)5 ByteString (com.google.protobuf.ByteString)4 ExecutorService (java.util.concurrent.ExecutorService)4 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)4 InvocationOnMock (org.mockito.invocation.InvocationOnMock)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3