Search in sources :

Example 1 with FetcherReadTimeoutException

use of org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException in project tez by apache.

the class FetcherOrderedGrouped method copyMapOutput.

protected InputAttemptIdentifier[] copyMapOutput(MapHost host, DataInputStream input, InputAttemptIdentifier inputAttemptIdentifier) throws FetcherReadTimeoutException {
    MapOutput mapOutput = null;
    InputAttemptIdentifier srcAttemptId = null;
    long decompressedLength = 0;
    long compressedLength = 0;
    try {
        long startTime = System.currentTimeMillis();
        int partitionCount = 1;
        if (this.compositeFetch) {
            // Multiple partitions are fetched
            partitionCount = WritableUtils.readVInt(input);
        }
        ArrayList<MapOutputStat> mapOutputStats = new ArrayList<>(partitionCount);
        for (int mapOutputIndex = 0; mapOutputIndex < partitionCount; mapOutputIndex++) {
            MapOutputStat mapOutputStat = null;
            try {
                // Read the shuffle header
                ShuffleHeader header = new ShuffleHeader();
                // TODO Review: Multiple header reads in case of status WAIT ?
                header.readFields(input);
                if (!header.mapId.startsWith(InputAttemptIdentifier.PATH_PREFIX)) {
                    if (!stopped) {
                        badIdErrs.increment(1);
                        LOG.warn("Invalid map id: " + header.mapId + ", expected to start with " + InputAttemptIdentifier.PATH_PREFIX + ", partition: " + header.forReduce);
                        return new InputAttemptIdentifier[] { getNextRemainingAttempt() };
                    } else {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Already shutdown. Ignoring invalid map id error");
                        }
                        return EMPTY_ATTEMPT_ID_ARRAY;
                    }
                }
                if (header.getCompressedLength() == 0) {
                    // Empty partitions are already accounted for
                    continue;
                }
                mapOutputStat = new MapOutputStat(scheduler.getIdentifierForFetchedOutput(header.mapId, header.forReduce), header.uncompressedLength, header.compressedLength, header.forReduce);
                mapOutputStats.add(mapOutputStat);
            } catch (IllegalArgumentException e) {
                if (!stopped) {
                    badIdErrs.increment(1);
                    LOG.warn("Invalid map id ", e);
                    // the remaining because we dont know where to start reading from. YARN-1773
                    return new InputAttemptIdentifier[] { getNextRemainingAttempt() };
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Already shutdown. Ignoring invalid map id error. Exception: " + e.getClass().getName() + ", Message: " + e.getMessage());
                    }
                    return EMPTY_ATTEMPT_ID_ARRAY;
                }
            }
            // Do some basic sanity verification
            if (!verifySanity(mapOutputStat.compressedLength, mapOutputStat.decompressedLength, mapOutputStat.forReduce, remaining, mapOutputStat.srcAttemptId)) {
                if (!stopped) {
                    srcAttemptId = mapOutputStat.srcAttemptId;
                    if (srcAttemptId == null) {
                        srcAttemptId = getNextRemainingAttempt();
                        LOG.warn("Was expecting " + srcAttemptId + " but got null");
                    }
                    assert (srcAttemptId != null);
                    return new InputAttemptIdentifier[] { srcAttemptId };
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Already stopped. Ignoring verification failure.");
                    }
                    return EMPTY_ATTEMPT_ID_ARRAY;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("header: " + mapOutputStat.srcAttemptId + ", len: " + mapOutputStat.compressedLength + ", decomp len: " + mapOutputStat.decompressedLength);
            }
        }
        for (MapOutputStat mapOutputStat : mapOutputStats) {
            // Get the location for the map output - either in-memory or on-disk
            srcAttemptId = mapOutputStat.srcAttemptId;
            decompressedLength = mapOutputStat.decompressedLength;
            compressedLength = mapOutputStat.compressedLength;
            try {
                mapOutput = allocator.reserve(srcAttemptId, decompressedLength, compressedLength, id);
            } catch (IOException e) {
                if (!stopped) {
                    // Kill the reduce attempt
                    ioErrs.increment(1);
                    scheduler.reportLocalError(e);
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Already stopped. Ignoring error from merger.reserve");
                    }
                }
                return EMPTY_ATTEMPT_ID_ARRAY;
            }
            // Check if we can shuffle *now* ...
            if (mapOutput.getType() == Type.WAIT) {
                LOG.info("fetcher#" + id + " - MergerManager returned Status.WAIT ...");
                // Not an error but wait to process data.
                return EMPTY_ATTEMPT_ID_ARRAY;
            }
            // Go!
            if (LOG.isDebugEnabled()) {
                LOG.debug("fetcher#" + id + " about to shuffle output of map " + mapOutput.getAttemptIdentifier() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getType());
            }
            if (mapOutput.getType() == Type.MEMORY) {
                ShuffleUtils.shuffleToMemory(mapOutput.getMemory(), input, (int) decompressedLength, (int) compressedLength, codec, ifileReadAhead, ifileReadAheadLength, LOG, mapOutput.getAttemptIdentifier());
            } else if (mapOutput.getType() == Type.DISK) {
                ShuffleUtils.shuffleToDisk(mapOutput.getDisk(), host.getHostIdentifier(), input, compressedLength, decompressedLength, LOG, mapOutput.getAttemptIdentifier(), ifileReadAhead, ifileReadAheadLength, verifyDiskChecksum);
            } else {
                throw new IOException("Unknown mapOutput type while fetching shuffle data:" + mapOutput.getType());
            }
            // Inform the shuffle scheduler
            long endTime = System.currentTimeMillis();
            // Reset retryStartTime as map task make progress if retried before.
            retryStartTime = 0;
            scheduler.copySucceeded(srcAttemptId, host, compressedLength, decompressedLength, endTime - startTime, mapOutput, false);
        }
        remaining.remove(inputAttemptIdentifier.toString());
    } catch (IOException | InternalError ioe) {
        if (stopped) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Not reporting fetch failure for exception during data copy: [" + ioe.getClass().getName() + ", " + ioe.getMessage() + "]");
            }
            cleanupCurrentConnection(true);
            if (mapOutput != null) {
                // Release resources
                mapOutput.abort();
            }
            // Don't need to put back - since that's handled by the invoker
            return EMPTY_ATTEMPT_ID_ARRAY;
        }
        if (shouldRetry(host, ioe)) {
            // release mem/file handles
            if (mapOutput != null) {
                mapOutput.abort();
            }
            throw new FetcherReadTimeoutException(ioe);
        }
        ioErrs.increment(1);
        if (srcAttemptId == null || mapOutput == null) {
            LOG.info("fetcher#" + id + " failed to read map header" + srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe);
            if (srcAttemptId == null) {
                return remaining.values().toArray(new InputAttemptIdentifier[remaining.values().size()]);
            } else {
                return new InputAttemptIdentifier[] { srcAttemptId };
            }
        }
        LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host.getHostIdentifier(), ioe);
        // Inform the shuffle-scheduler
        mapOutput.abort();
        return new InputAttemptIdentifier[] { srcAttemptId };
    }
    return null;
}
Also used : FetcherReadTimeoutException(org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException) ArrayList(java.util.ArrayList) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException)

Example 2 with FetcherReadTimeoutException

use of org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException in project tez by apache.

the class Fetcher method fetchInputs.

private InputAttemptIdentifier[] fetchInputs(DataInputStream input, CachingCallBack callback, InputAttemptIdentifier inputAttemptIdentifier) throws FetcherReadTimeoutException {
    FetchedInput fetchedInput = null;
    InputAttemptIdentifier srcAttemptId = null;
    long decompressedLength = 0;
    long compressedLength = 0;
    try {
        long startTime = System.currentTimeMillis();
        int partitionCount = 1;
        if (this.compositeFetch) {
            // Multiple partitions are fetched
            partitionCount = WritableUtils.readVInt(input);
        }
        ArrayList<MapOutputStat> mapOutputStats = new ArrayList<>(partitionCount);
        for (int mapOutputIndex = 0; mapOutputIndex < partitionCount; mapOutputIndex++) {
            MapOutputStat mapOutputStat = null;
            int responsePartition = -1;
            // Read the shuffle header
            String pathComponent = null;
            try {
                ShuffleHeader header = new ShuffleHeader();
                header.readFields(input);
                pathComponent = header.getMapId();
                if (!pathComponent.startsWith(InputAttemptIdentifier.PATH_PREFIX)) {
                    throw new IllegalArgumentException("Invalid map id: " + header.getMapId() + ", expected to start with " + InputAttemptIdentifier.PATH_PREFIX + ", partition: " + header.getPartition() + " while fetching " + inputAttemptIdentifier);
                }
                srcAttemptId = pathToAttemptMap.get(new PathPartition(pathComponent, header.getPartition()));
                if (srcAttemptId == null) {
                    throw new IllegalArgumentException("Source attempt not found for map id: " + header.getMapId() + ", partition: " + header.getPartition() + " while fetching " + inputAttemptIdentifier);
                }
                if (header.getCompressedLength() == 0) {
                    // Empty partitions are already accounted for
                    continue;
                }
                mapOutputStat = new MapOutputStat(srcAttemptId, header.getUncompressedLength(), header.getCompressedLength(), header.getPartition());
                mapOutputStats.add(mapOutputStat);
                responsePartition = header.getPartition();
            } catch (IllegalArgumentException e) {
                // badIdErrs.increment(1);
                if (!isShutDown.get()) {
                    LOG.warn("Invalid src id ", e);
                    // Don't know which one was bad, so consider all of them as bad
                    return srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.size()]);
                } else {
                    if (isDebugEnabled) {
                        LOG.debug("Already shutdown. Ignoring badId error with message: " + e.getMessage());
                    }
                    return null;
                }
            }
            // Do some basic sanity verification
            if (!verifySanity(mapOutputStat.compressedLength, mapOutputStat.decompressedLength, responsePartition, mapOutputStat.srcAttemptId, pathComponent)) {
                if (!isShutDown.get()) {
                    srcAttemptId = mapOutputStat.srcAttemptId;
                    if (srcAttemptId == null) {
                        LOG.warn("Was expecting " + getNextRemainingAttempt() + " but got null");
                        srcAttemptId = getNextRemainingAttempt();
                    }
                    assert (srcAttemptId != null);
                    return new InputAttemptIdentifier[] { srcAttemptId };
                } else {
                    if (isDebugEnabled) {
                        LOG.debug("Already shutdown. Ignoring verification failure.");
                    }
                    return null;
                }
            }
            if (isDebugEnabled) {
                LOG.debug("header: " + mapOutputStat.srcAttemptId + ", len: " + mapOutputStat.compressedLength + ", decomp len: " + mapOutputStat.decompressedLength);
            }
        }
        for (MapOutputStat mapOutputStat : mapOutputStats) {
            // Get the location for the map output - either in-memory or on-disk
            srcAttemptId = mapOutputStat.srcAttemptId;
            decompressedLength = mapOutputStat.decompressedLength;
            compressedLength = mapOutputStat.compressedLength;
            // TODO TEZ-957. handle IOException here when Broadcast has better error checking
            if (srcAttemptId.isShared() && callback != null) {
                // force disk if input is being shared
                fetchedInput = inputManager.allocateType(Type.DISK, decompressedLength, compressedLength, srcAttemptId);
            } else {
                fetchedInput = inputManager.allocate(decompressedLength, compressedLength, srcAttemptId);
            }
            // Go!
            if (isDebugEnabled) {
                LOG.debug("fetcher" + " about to shuffle output of srcAttempt " + fetchedInput.getInputAttemptIdentifier() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + fetchedInput.getType());
            }
            if (fetchedInput.getType() == Type.MEMORY) {
                ShuffleUtils.shuffleToMemory(((MemoryFetchedInput) fetchedInput).getBytes(), input, (int) decompressedLength, (int) compressedLength, codec, ifileReadAhead, ifileReadAheadLength, LOG, fetchedInput.getInputAttemptIdentifier());
            } else if (fetchedInput.getType() == Type.DISK) {
                ShuffleUtils.shuffleToDisk(((DiskFetchedInput) fetchedInput).getOutputStream(), (host + ":" + port), input, compressedLength, decompressedLength, LOG, fetchedInput.getInputAttemptIdentifier(), ifileReadAhead, ifileReadAheadLength, verifyDiskChecksum);
            } else {
                throw new TezUncheckedException("Bad fetchedInput type while fetching shuffle data " + fetchedInput);
            }
            // offer the fetched input for caching
            if (srcAttemptId.isShared() && callback != null) {
                // this has to be before the fetchSucceeded, because that goes across
                // threads into the reader thread and can potentially shutdown this thread
                // while it is still caching.
                callback.cache(host, srcAttemptId, fetchedInput, compressedLength, decompressedLength);
            }
            // Inform the shuffle scheduler
            long endTime = System.currentTimeMillis();
            // Reset retryStartTime as map task make progress if retried before.
            retryStartTime = 0;
            fetcherCallback.fetchSucceeded(host, srcAttemptId, fetchedInput, compressedLength, decompressedLength, (endTime - startTime));
        // Note successful shuffle
        // metrics.successFetch();
        }
        srcAttemptsRemaining.remove(inputAttemptIdentifier.toString());
    } catch (IOException | InternalError ioe) {
        if (isShutDown.get()) {
            cleanupFetchedInput(fetchedInput);
            if (isDebugEnabled) {
                LOG.debug("Already shutdown. Ignoring exception during fetch " + ioe.getClass().getName() + ", Message: " + ioe.getMessage());
            }
            return null;
        }
        if (shouldRetry(srcAttemptId, ioe)) {
            // release mem/file handles
            cleanupFetchedInput(fetchedInput);
            throw new FetcherReadTimeoutException(ioe);
        }
        // ioErrs.increment(1);
        if (srcAttemptId == null || fetchedInput == null) {
            LOG.info("fetcher" + " failed to read map header" + srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe);
            // Cleanup the fetchedInput before returning.
            cleanupFetchedInput(fetchedInput);
            if (srcAttemptId == null) {
                return srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.size()]);
            } else {
                return new InputAttemptIdentifier[] { srcAttemptId };
            }
        }
        LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host, ioe);
        // Cleanup the fetchedInput
        cleanupFetchedInput(fetchedInput);
        // metrics.failedFetch();
        return new InputAttemptIdentifier[] { srcAttemptId };
    }
    return null;
}
Also used : TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) FetcherReadTimeoutException(org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException) ShuffleHeader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader) ArrayList(java.util.ArrayList) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException)

Example 3 with FetcherReadTimeoutException

use of org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException in project tez by apache.

the class FetcherOrderedGrouped method copyFromHost.

/**
 * The crux of the matter...
 *
 * @param host {@link MapHost} from which we need to
 *              shuffle available map-outputs.
 */
@VisibleForTesting
protected void copyFromHost(MapHost host) throws IOException {
    // reset retryStartTime for a new host
    retryStartTime = 0;
    // Get completed maps on 'host'
    List<InputAttemptIdentifier> srcAttempts = scheduler.getMapsForHost(host);
    // especially at the tail of large jobs
    if (srcAttempts.size() == 0) {
        return;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Fetcher " + id + " going to fetch from " + host + " for: " + srcAttempts + ", partition range: " + minPartition + "-" + maxPartition);
    }
    populateRemainingMap(srcAttempts);
    // Construct the url and connect
    try {
        if (!setupConnection(host, remaining.values())) {
            if (stopped) {
                cleanupCurrentConnection(true);
            }
            // Maps will be added back in the finally block in case of failure.
            return;
        }
        // Loop through available map-outputs and fetch them
        // On any error, faildTasks is not null and we exit
        // after putting back the remaining maps to the
        // yet_to_be_fetched list and marking the failed tasks.
        InputAttemptIdentifier[] failedTasks = null;
        while (!remaining.isEmpty() && failedTasks == null) {
            InputAttemptIdentifier inputAttemptIdentifier = remaining.entrySet().iterator().next().getValue();
            // remaining outputs. YARN-1773. Will get to them in the next retry.
            try {
                failedTasks = copyMapOutput(host, input, inputAttemptIdentifier);
            } catch (FetcherReadTimeoutException e) {
                // Setup connection again if disconnected
                cleanupCurrentConnection(true);
                if (stopped) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Not re-establishing connection since Fetcher has been stopped");
                    }
                    return;
                }
                // Connect with retry
                if (!setupConnection(host, remaining.values())) {
                    if (stopped) {
                        cleanupCurrentConnection(true);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Not reporting connection re-establishment failure since fetcher is stopped");
                        }
                        return;
                    }
                    failedTasks = new InputAttemptIdentifier[] { getNextRemainingAttempt() };
                    break;
                }
            }
        }
        if (failedTasks != null && failedTasks.length > 0) {
            if (stopped) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Ignoring copyMapOutput failures for tasks: " + Arrays.toString(failedTasks) + " since Fetcher has been stopped");
                }
            } else {
                LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks));
                for (InputAttemptIdentifier left : failedTasks) {
                    scheduler.copyFailed(left, host, true, false, false);
                }
            }
        }
        cleanupCurrentConnection(false);
        // Sanity check
        if (failedTasks == null && !remaining.isEmpty()) {
            throw new IOException("server didn't return all expected map outputs: " + remaining.size() + " left.");
        }
    } finally {
        putBackRemainingMapOutputs(host);
    }
}
Also used : FetcherReadTimeoutException(org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with FetcherReadTimeoutException

use of org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException in project tez by apache.

the class Fetcher method doHttpFetch.

@VisibleForTesting
protected HostFetchResult doHttpFetch(CachingCallBack callback) {
    HostFetchResult connectionsWithRetryResult = setupConnection(srcAttemptsRemaining.values());
    if (connectionsWithRetryResult != null) {
        return connectionsWithRetryResult;
    }
    // Handle any shutdown which may have been invoked.
    if (isShutDown.get()) {
        // shutdown would have no effect if in the process of establishing the connection.
        shutdownInternal();
        if (isDebugEnabled) {
            LOG.debug("Detected fetcher has been shutdown after opening stream. Returning");
        }
        return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
    }
    // After this point, closing the stream and connection, should cause a
    // SocketException,
    // which will be ignored since shutdown has been invoked.
    // Loop through available map-outputs and fetch them
    // On any error, faildTasks is not null and we exit
    // after putting back the remaining maps to the
    // yet_to_be_fetched list and marking the failed tasks.
    InputAttemptIdentifier[] failedInputs = null;
    while (!srcAttemptsRemaining.isEmpty() && failedInputs == null) {
        InputAttemptIdentifier inputAttemptIdentifier = srcAttemptsRemaining.entrySet().iterator().next().getValue();
        if (isShutDown.get()) {
            shutdownInternal(true);
            if (isDebugEnabled) {
                LOG.debug("Fetcher already shutdown. Aborting queued fetches for " + srcAttemptsRemaining.size() + " inputs");
            }
            return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
        }
        try {
            failedInputs = fetchInputs(input, callback, inputAttemptIdentifier);
        } catch (FetcherReadTimeoutException e) {
            // clean up connection
            shutdownInternal(true);
            if (isShutDown.get()) {
                if (isDebugEnabled) {
                    LOG.debug("Fetcher already shutdown. Aborting reconnection and queued fetches for " + srcAttemptsRemaining.size() + " inputs");
                }
                return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
            }
            // Connect again.
            connectionsWithRetryResult = setupConnection(srcAttemptsRemaining.values());
            if (connectionsWithRetryResult != null) {
                break;
            }
        }
    }
    if (isShutDown.get() && failedInputs != null && failedInputs.length > 0) {
        if (isDebugEnabled) {
            LOG.debug("Fetcher already shutdown. Not reporting fetch failures for: " + failedInputs.length + " failed inputs");
        }
        failedInputs = null;
    }
    return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), failedInputs, false);
}
Also used : FetcherReadTimeoutException(org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 5 with FetcherReadTimeoutException

use of org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException in project tez by apache.

the class TestFetcher method testWithRetry.

@Test(timeout = 5000)
@SuppressWarnings("unchecked")
public void testWithRetry() throws Exception {
    Configuration conf = new TezConfiguration();
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 3000);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 3000);
    ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
    MergeManager merger = mock(MergeManager.class);
    Shuffle shuffle = mock(Shuffle.class);
    InputContext inputContext = mock(InputContext.class);
    when(inputContext.getCounters()).thenReturn(new TezCounters());
    when(inputContext.getSourceVertexName()).thenReturn("");
    when(inputContext.getApplicationId()).thenReturn(ApplicationId.newInstance(0, 1));
    HttpConnectionParams httpConnectionParams = ShuffleUtils.getHttpConnectionParams(conf);
    final MapHost host = new MapHost(HOST, PORT, 1, 1);
    FetcherOrderedGrouped mockFetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, false, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, false, false, true, false);
    final FetcherOrderedGrouped fetcher = spy(mockFetcher);
    final List<InputAttemptIdentifier> srcAttempts = Arrays.asList(new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0"), new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1"), new InputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3"));
    doReturn(srcAttempts).when(scheduler).getMapsForHost(host);
    doReturn(true).when(fetcher).setupConnection(any(MapHost.class), any(Collection.class));
    URL url = ShuffleUtils.constructInputURL("http://" + HOST + ":" + PORT + "/mapOutput?job=job_123&&reduce=1&map=", srcAttempts, false);
    fetcher.httpConnection = new FakeHttpConnection(url, null, "", null);
    doAnswer(new Answer<MapOutput>() {

        @Override
        public MapOutput answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            MapOutput mapOutput = mock(MapOutput.class);
            doReturn(MapOutput.Type.MEMORY).when(mapOutput).getType();
            doReturn(args[0]).when(mapOutput).getAttemptIdentifier();
            return mapOutput;
        }
    }).when(merger).reserve(any(InputAttemptIdentifier.class), anyInt(), anyInt(), anyInt());
    // Create read timeout when reading data
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            // Emulate host down for 4 seconds.
            Thread.sleep(4000);
            doReturn(false).when(fetcher).setupConnection(any(MapHost.class), any(Collection.class));
            // Throw IOException when fetcher tries to connect again to the same node
            throw new FetcherReadTimeoutException("creating fetcher socket read timeout exception");
        }
    }).when(fetcher).copyMapOutput(any(MapHost.class), any(DataInputStream.class), any(InputAttemptIdentifier.class));
    try {
        fetcher.copyFromHost(host);
    } catch (IOException e) {
    // ignore
    }
    // setup connection should be called twice (1 for connect and another for retry)
    verify(fetcher, times(2)).setupConnection(any(MapHost.class), any(Collection.class));
    // since copyMapOutput consistently fails, it should call copyFailed once
    verify(scheduler, times(1)).copyFailed(any(InputAttemptIdentifier.class), any(MapHost.class), anyBoolean(), anyBoolean(), anyBoolean());
    verify(fetcher, times(1)).putBackRemainingMapOutputs(any(MapHost.class));
    verify(scheduler, times(3)).putBackKnownMapOutput(any(MapHost.class), any(InputAttemptIdentifier.class));
    // Verify by stopping the fetcher abruptly
    try {
        // flag to indicate fetcher stopped
        fetcher.stopped = false;
        fetcher.copyFromHost(host);
        verify(fetcher, times(2)).putBackRemainingMapOutputs(any(MapHost.class));
    } catch (IOException e) {
    // ignore
    }
}
Also used : HttpConnectionParams(org.apache.tez.http.HttpConnectionParams) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) URL(java.net.URL) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) FetcherReadTimeoutException(org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException) InputContext(org.apache.tez.runtime.api.InputContext) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) TezCounters(org.apache.tez.common.counters.TezCounters) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Collection(java.util.Collection) Test(org.junit.Test)

Aggregations

InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)5 FetcherReadTimeoutException (org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException)5 IOException (java.io.IOException)4 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ArrayList (java.util.ArrayList)2 DataInputStream (java.io.DataInputStream)1 URL (java.net.URL)1 Collection (java.util.Collection)1 Configuration (org.apache.hadoop.conf.Configuration)1 TezCounters (org.apache.tez.common.counters.TezCounters)1 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)1 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)1 HttpConnectionParams (org.apache.tez.http.HttpConnectionParams)1 InputContext (org.apache.tez.runtime.api.InputContext)1 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)1 ShuffleHeader (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader)1 Test (org.junit.Test)1 InvocationOnMock (org.mockito.invocation.InvocationOnMock)1