Search in sources :

Example 26 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class FetcherOrderedGrouped method setupConnection.

@VisibleForTesting
boolean setupConnection(MapHost host, Collection<InputAttemptIdentifier> attempts) throws IOException {
    boolean connectSucceeded = false;
    try {
        StringBuilder baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host.getHost(), host.getPort(), host.getPartitionId(), host.getPartitionCount(), applicationId, dagId, sslShuffle);
        URL url = ShuffleUtils.constructInputURL(baseURI.toString(), attempts, httpConnectionParams.isKeepAlive());
        httpConnection = ShuffleUtils.getHttpConnection(asyncHttp, url, httpConnectionParams, logIdentifier, jobTokenSecretManager);
        connectSucceeded = httpConnection.connect();
        if (stopped) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Detected fetcher has been shutdown after connection establishment. Returning");
            }
            return false;
        }
        input = httpConnection.getInputStream();
        httpConnection.validate();
        return true;
    } catch (IOException | InterruptedException ie) {
        if (ie instanceof InterruptedException) {
            // reset status
            Thread.currentThread().interrupt();
        }
        if (stopped) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Not reporting fetch failure, since an Exception was caught after shutdown");
            }
            return false;
        }
        ioErrs.increment(1);
        if (!connectSucceeded) {
            LOG.warn("Failed to connect to " + host + " with " + remaining.size() + " inputs", ie);
            connectionErrs.increment(1);
        } else {
            LOG.warn("Failed to verify reply after connecting to " + host + " with " + remaining.size() + " inputs pending", ie);
        }
        // This ends up indirectly penalizing the host (multiple failures reported on the single host)
        for (InputAttemptIdentifier left : remaining.values()) {
            // Need to be handling temporary glitches ..
            // Report read error to the AM to trigger source failure heuristics
            scheduler.copyFailed(left, host, connectSucceeded, !connectSucceeded, false);
        }
        return false;
    }
}
Also used : InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException) URL(java.net.URL) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 27 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class ShuffleInputEventHandlerOrderedGrouped method processCompositeRoutedDataMovementEvent.

private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovementEvent crdmEvent, DataMovementEventPayloadProto shufflePayload, BitSet emptyPartitionsBitSet) throws IOException {
    int partitionId = crdmEvent.getSourceIndex();
    CompositeInputAttemptIdentifier compositeInputAttemptIdentifier = constructInputAttemptIdentifier(crdmEvent.getTargetIndex(), crdmEvent.getCount(), crdmEvent.getVersion(), shufflePayload);
    if (LOG.isDebugEnabled()) {
        LOG.debug("DME srcIdx: " + partitionId + ", targetIdx: " + crdmEvent.getTargetIndex() + ", count:" + crdmEvent.getCount() + ", attemptNum: " + crdmEvent.getVersion() + ", payload: " + ShuffleUtils.stringify(shufflePayload));
    }
    if (shufflePayload.hasEmptyPartitions()) {
        boolean allPartitionsEmpty = true;
        for (int i = 0; i < crdmEvent.getCount(); i++) {
            int srcPartitionId = partitionId + i;
            allPartitionsEmpty &= emptyPartitionsBitSet.get(srcPartitionId);
            if (emptyPartitionsBitSet.get(srcPartitionId)) {
                InputAttemptIdentifier srcInputAttemptIdentifier = compositeInputAttemptIdentifier.expand(i);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Source partition: " + srcPartitionId + " did not generate any data. SrcAttempt: [" + srcInputAttemptIdentifier + "]. Not fetching.");
                }
                numDmeEventsNoData.getAndIncrement();
                scheduler.copySucceeded(srcInputAttemptIdentifier, null, 0, 0, 0, null, true);
            }
        }
        if (allPartitionsEmpty) {
            return;
        }
    }
    scheduler.addKnownMapOutput(StringInterner.weakIntern(shufflePayload.getHost()), shufflePayload.getPort(), partitionId, compositeInputAttemptIdentifier);
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier)

Example 28 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class ShuffleInputEventHandlerOrderedGrouped method constructInputAttemptIdentifier.

/**
 * Helper method to create InputAttemptIdentifier
 *
 * @param targetIndex
 * @param targetIndexCount
 * @param version
 * @param shufflePayload
 * @return CompositeInputAttemptIdentifier
 */
private CompositeInputAttemptIdentifier constructInputAttemptIdentifier(int targetIndex, int targetIndexCount, int version, DataMovementEventPayloadProto shufflePayload) {
    String pathComponent = (shufflePayload.hasPathComponent()) ? StringInterner.weakIntern(shufflePayload.getPathComponent()) : null;
    int spillEventId = shufflePayload.getSpillId();
    CompositeInputAttemptIdentifier srcAttemptIdentifier = null;
    if (shufflePayload.hasSpillId()) {
        boolean lastEvent = shufflePayload.getLastEvent();
        InputAttemptIdentifier.SPILL_INFO info = (lastEvent) ? InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE : InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE;
        srcAttemptIdentifier = new CompositeInputAttemptIdentifier(targetIndex, version, pathComponent, false, info, spillEventId, targetIndexCount);
    } else {
        srcAttemptIdentifier = new CompositeInputAttemptIdentifier(targetIndex, version, pathComponent, targetIndexCount);
    }
    return srcAttemptIdentifier;
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) ByteString(com.google.protobuf.ByteString)

Example 29 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class Fetcher method fetchInputs.

private InputAttemptIdentifier[] fetchInputs(DataInputStream input, CachingCallBack callback, InputAttemptIdentifier inputAttemptIdentifier) throws FetcherReadTimeoutException {
    FetchedInput fetchedInput = null;
    InputAttemptIdentifier srcAttemptId = null;
    long decompressedLength = 0;
    long compressedLength = 0;
    try {
        long startTime = System.currentTimeMillis();
        int partitionCount = 1;
        if (this.compositeFetch) {
            // Multiple partitions are fetched
            partitionCount = WritableUtils.readVInt(input);
        }
        ArrayList<MapOutputStat> mapOutputStats = new ArrayList<>(partitionCount);
        for (int mapOutputIndex = 0; mapOutputIndex < partitionCount; mapOutputIndex++) {
            MapOutputStat mapOutputStat = null;
            int responsePartition = -1;
            // Read the shuffle header
            String pathComponent = null;
            try {
                ShuffleHeader header = new ShuffleHeader();
                header.readFields(input);
                pathComponent = header.getMapId();
                if (!pathComponent.startsWith(InputAttemptIdentifier.PATH_PREFIX)) {
                    throw new IllegalArgumentException("Invalid map id: " + header.getMapId() + ", expected to start with " + InputAttemptIdentifier.PATH_PREFIX + ", partition: " + header.getPartition() + " while fetching " + inputAttemptIdentifier);
                }
                srcAttemptId = pathToAttemptMap.get(new PathPartition(pathComponent, header.getPartition()));
                if (srcAttemptId == null) {
                    throw new IllegalArgumentException("Source attempt not found for map id: " + header.getMapId() + ", partition: " + header.getPartition() + " while fetching " + inputAttemptIdentifier);
                }
                if (header.getCompressedLength() == 0) {
                    // Empty partitions are already accounted for
                    continue;
                }
                mapOutputStat = new MapOutputStat(srcAttemptId, header.getUncompressedLength(), header.getCompressedLength(), header.getPartition());
                mapOutputStats.add(mapOutputStat);
                responsePartition = header.getPartition();
            } catch (IllegalArgumentException e) {
                // badIdErrs.increment(1);
                if (!isShutDown.get()) {
                    LOG.warn("Invalid src id ", e);
                    // Don't know which one was bad, so consider all of them as bad
                    return srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.size()]);
                } else {
                    if (isDebugEnabled) {
                        LOG.debug("Already shutdown. Ignoring badId error with message: " + e.getMessage());
                    }
                    return null;
                }
            }
            // Do some basic sanity verification
            if (!verifySanity(mapOutputStat.compressedLength, mapOutputStat.decompressedLength, responsePartition, mapOutputStat.srcAttemptId, pathComponent)) {
                if (!isShutDown.get()) {
                    srcAttemptId = mapOutputStat.srcAttemptId;
                    if (srcAttemptId == null) {
                        LOG.warn("Was expecting " + getNextRemainingAttempt() + " but got null");
                        srcAttemptId = getNextRemainingAttempt();
                    }
                    assert (srcAttemptId != null);
                    return new InputAttemptIdentifier[] { srcAttemptId };
                } else {
                    if (isDebugEnabled) {
                        LOG.debug("Already shutdown. Ignoring verification failure.");
                    }
                    return null;
                }
            }
            if (isDebugEnabled) {
                LOG.debug("header: " + mapOutputStat.srcAttemptId + ", len: " + mapOutputStat.compressedLength + ", decomp len: " + mapOutputStat.decompressedLength);
            }
        }
        for (MapOutputStat mapOutputStat : mapOutputStats) {
            // Get the location for the map output - either in-memory or on-disk
            srcAttemptId = mapOutputStat.srcAttemptId;
            decompressedLength = mapOutputStat.decompressedLength;
            compressedLength = mapOutputStat.compressedLength;
            // TODO TEZ-957. handle IOException here when Broadcast has better error checking
            if (srcAttemptId.isShared() && callback != null) {
                // force disk if input is being shared
                fetchedInput = inputManager.allocateType(Type.DISK, decompressedLength, compressedLength, srcAttemptId);
            } else {
                fetchedInput = inputManager.allocate(decompressedLength, compressedLength, srcAttemptId);
            }
            // Go!
            if (isDebugEnabled) {
                LOG.debug("fetcher" + " about to shuffle output of srcAttempt " + fetchedInput.getInputAttemptIdentifier() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + fetchedInput.getType());
            }
            if (fetchedInput.getType() == Type.MEMORY) {
                ShuffleUtils.shuffleToMemory(((MemoryFetchedInput) fetchedInput).getBytes(), input, (int) decompressedLength, (int) compressedLength, codec, ifileReadAhead, ifileReadAheadLength, LOG, fetchedInput.getInputAttemptIdentifier());
            } else if (fetchedInput.getType() == Type.DISK) {
                ShuffleUtils.shuffleToDisk(((DiskFetchedInput) fetchedInput).getOutputStream(), (host + ":" + port), input, compressedLength, decompressedLength, LOG, fetchedInput.getInputAttemptIdentifier(), ifileReadAhead, ifileReadAheadLength, verifyDiskChecksum);
            } else {
                throw new TezUncheckedException("Bad fetchedInput type while fetching shuffle data " + fetchedInput);
            }
            // offer the fetched input for caching
            if (srcAttemptId.isShared() && callback != null) {
                // this has to be before the fetchSucceeded, because that goes across
                // threads into the reader thread and can potentially shutdown this thread
                // while it is still caching.
                callback.cache(host, srcAttemptId, fetchedInput, compressedLength, decompressedLength);
            }
            // Inform the shuffle scheduler
            long endTime = System.currentTimeMillis();
            // Reset retryStartTime as map task make progress if retried before.
            retryStartTime = 0;
            fetcherCallback.fetchSucceeded(host, srcAttemptId, fetchedInput, compressedLength, decompressedLength, (endTime - startTime));
        // Note successful shuffle
        // metrics.successFetch();
        }
        srcAttemptsRemaining.remove(inputAttemptIdentifier.toString());
    } catch (IOException | InternalError ioe) {
        if (isShutDown.get()) {
            cleanupFetchedInput(fetchedInput);
            if (isDebugEnabled) {
                LOG.debug("Already shutdown. Ignoring exception during fetch " + ioe.getClass().getName() + ", Message: " + ioe.getMessage());
            }
            return null;
        }
        if (shouldRetry(srcAttemptId, ioe)) {
            // release mem/file handles
            cleanupFetchedInput(fetchedInput);
            throw new FetcherReadTimeoutException(ioe);
        }
        // ioErrs.increment(1);
        if (srcAttemptId == null || fetchedInput == null) {
            LOG.info("fetcher" + " failed to read map header" + srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe);
            // Cleanup the fetchedInput before returning.
            cleanupFetchedInput(fetchedInput);
            if (srcAttemptId == null) {
                return srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.size()]);
            } else {
                return new InputAttemptIdentifier[] { srcAttemptId };
            }
        }
        LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host, ioe);
        // Cleanup the fetchedInput
        cleanupFetchedInput(fetchedInput);
        // metrics.failedFetch();
        return new InputAttemptIdentifier[] { srcAttemptId };
    }
    return null;
}
Also used : TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) FetcherReadTimeoutException(org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException) ShuffleHeader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader) ArrayList(java.util.ArrayList) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException)

Example 30 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class ShuffleInputEventHandlerOrderedGrouped method processTaskFailedEvent.

private void processTaskFailedEvent(InputFailedEvent ifEvent) {
    InputAttemptIdentifier taIdentifier = new InputAttemptIdentifier(ifEvent.getTargetIndex(), ifEvent.getVersion());
    scheduler.obsoleteInput(taIdentifier);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Obsoleting output of src-task: " + taIdentifier);
    }
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier)

Aggregations

InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)55 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)41 Test (org.junit.Test)31 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)17 Configuration (org.apache.hadoop.conf.Configuration)16 InputContext (org.apache.tez.runtime.api.InputContext)16 IOException (java.io.IOException)15 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)15 Path (org.apache.hadoop.fs.Path)10 LinkedList (java.util.LinkedList)8 Matchers.anyString (org.mockito.Matchers.anyString)8 VisibleForTesting (com.google.common.annotations.VisibleForTesting)7 TezCounters (org.apache.tez.common.counters.TezCounters)7 Event (org.apache.tez.runtime.api.Event)7 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)7 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)6 InvocationOnMock (org.mockito.invocation.InvocationOnMock)6 FetcherReadTimeoutException (org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException)5 URL (java.net.URL)4 ArrayList (java.util.ArrayList)4