Search in sources :

Example 36 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class FetcherOrderedGrouped method copyFromHost.

/**
 * The crux of the matter...
 *
 * @param host {@link MapHost} from which we need to
 *              shuffle available map-outputs.
 */
@VisibleForTesting
protected void copyFromHost(MapHost host) throws IOException {
    // reset retryStartTime for a new host
    retryStartTime = 0;
    // Get completed maps on 'host'
    List<InputAttemptIdentifier> srcAttempts = scheduler.getMapsForHost(host);
    // especially at the tail of large jobs
    if (srcAttempts.size() == 0) {
        return;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Fetcher " + id + " going to fetch from " + host + " for: " + srcAttempts + ", partition range: " + minPartition + "-" + maxPartition);
    }
    populateRemainingMap(srcAttempts);
    // Construct the url and connect
    try {
        if (!setupConnection(host, remaining.values())) {
            if (stopped) {
                cleanupCurrentConnection(true);
            }
            // Maps will be added back in the finally block in case of failure.
            return;
        }
        // Loop through available map-outputs and fetch them
        // On any error, faildTasks is not null and we exit
        // after putting back the remaining maps to the
        // yet_to_be_fetched list and marking the failed tasks.
        InputAttemptIdentifier[] failedTasks = null;
        while (!remaining.isEmpty() && failedTasks == null) {
            InputAttemptIdentifier inputAttemptIdentifier = remaining.entrySet().iterator().next().getValue();
            // remaining outputs. YARN-1773. Will get to them in the next retry.
            try {
                failedTasks = copyMapOutput(host, input, inputAttemptIdentifier);
            } catch (FetcherReadTimeoutException e) {
                // Setup connection again if disconnected
                cleanupCurrentConnection(true);
                if (stopped) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Not re-establishing connection since Fetcher has been stopped");
                    }
                    return;
                }
                // Connect with retry
                if (!setupConnection(host, remaining.values())) {
                    if (stopped) {
                        cleanupCurrentConnection(true);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Not reporting connection re-establishment failure since fetcher is stopped");
                        }
                        return;
                    }
                    failedTasks = new InputAttemptIdentifier[] { getNextRemainingAttempt() };
                    break;
                }
            }
        }
        if (failedTasks != null && failedTasks.length > 0) {
            if (stopped) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Ignoring copyMapOutput failures for tasks: " + Arrays.toString(failedTasks) + " since Fetcher has been stopped");
                }
            } else {
                LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks));
                for (InputAttemptIdentifier left : failedTasks) {
                    scheduler.copyFailed(left, host, true, false, false);
                }
            }
        }
        cleanupCurrentConnection(false);
        // Sanity check
        if (failedTasks == null && !remaining.isEmpty()) {
            throw new IOException("server didn't return all expected map outputs: " + remaining.size() + " left.");
        }
    } finally {
        putBackRemainingMapOutputs(host);
    }
}
Also used : FetcherReadTimeoutException(org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 37 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class Fetcher method callInternal.

@Override
public FetchResult callInternal() throws Exception {
    boolean multiplex = (this.sharedFetchEnabled && this.localDiskFetchEnabled);
    if (srcAttempts.size() == 0) {
        return new FetchResult(host, port, partition, partitionCount, srcAttempts);
    }
    populateRemainingMap(srcAttempts);
    for (InputAttemptIdentifier in : srcAttemptsRemaining.values()) {
        if (in instanceof CompositeInputAttemptIdentifier) {
            CompositeInputAttemptIdentifier cin = (CompositeInputAttemptIdentifier) in;
            for (int i = 0; i < cin.getInputIdentifierCount(); i++) {
                pathToAttemptMap.put(new PathPartition(cin.getPathComponent(), partition + i), cin.expand(i));
            }
        } else {
            pathToAttemptMap.put(new PathPartition(in.getPathComponent(), 0), in);
        }
        // do only if all of them are shared fetches
        multiplex &= in.isShared();
    }
    if (multiplex) {
        Preconditions.checkArgument(partition == 0, "Shared fetches cannot be done for partitioned input" + "- partition is non-zero (%d)", partition);
    }
    HostFetchResult hostFetchResult;
    if (localDiskFetchEnabled && host.equals(localHostname) && port == shufflePort) {
        hostFetchResult = setupLocalDiskFetch();
    } else if (multiplex) {
        hostFetchResult = doSharedFetch();
    } else {
        hostFetchResult = doHttpFetch();
    }
    if (hostFetchResult.failedInputs != null && hostFetchResult.failedInputs.length > 0) {
        if (!isShutDown.get()) {
            LOG.warn("copyInputs failed for tasks " + Arrays.toString(hostFetchResult.failedInputs));
            for (InputAttemptIdentifier left : hostFetchResult.failedInputs) {
                fetcherCallback.fetchFailed(host, left, hostFetchResult.connectFailed);
            }
        } else {
            if (isDebugEnabled) {
                LOG.debug("Ignoring failed fetch reports for " + hostFetchResult.failedInputs.length + " inputs since the fetcher has already been stopped");
            }
        }
    }
    shutdown();
    // Sanity check
    if (hostFetchResult.failedInputs == null && !srcAttemptsRemaining.isEmpty()) {
        if (!multiplex) {
            throw new IOException("server didn't return all expected map outputs: " + srcAttemptsRemaining.size() + " left.");
        }
    }
    return hostFetchResult.fetchResult;
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException)

Example 38 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class Fetcher method doLocalDiskFetch.

@VisibleForTesting
private HostFetchResult doLocalDiskFetch(boolean failMissing) {
    Iterator<Entry<String, InputAttemptIdentifier>> iterator = srcAttemptsRemaining.entrySet().iterator();
    while (iterator.hasNext()) {
        boolean hasFailures = false;
        if (isShutDown.get()) {
            if (isDebugEnabled) {
                LOG.debug("Already shutdown. Skipping fetch for " + srcAttemptsRemaining.size() + " inputs");
            }
            break;
        }
        InputAttemptIdentifier srcAttemptId = iterator.next().getValue();
        for (int curPartition = 0; curPartition < partitionCount; curPartition++) {
            int reduceId = curPartition + partition;
            srcAttemptId = pathToAttemptMap.get(new PathPartition(srcAttemptId.getPathComponent(), reduceId));
            long startTime = System.currentTimeMillis();
            FetchedInput fetchedInput = null;
            try {
                TezIndexRecord idxRecord;
                // for missing files, this will throw an exception
                idxRecord = getTezIndexRecord(srcAttemptId, reduceId);
                fetchedInput = new LocalDiskFetchedInput(idxRecord.getStartOffset(), idxRecord.getPartLength(), srcAttemptId, getShuffleInputFileName(srcAttemptId.getPathComponent(), null), conf, new FetchedInputCallback() {

                    @Override
                    public void fetchComplete(FetchedInput fetchedInput) {
                    }

                    @Override
                    public void fetchFailed(FetchedInput fetchedInput) {
                    }

                    @Override
                    public void freeResources(FetchedInput fetchedInput) {
                    }
                });
                if (isDebugEnabled) {
                    LOG.debug("fetcher" + " about to shuffle output of srcAttempt (direct disk)" + srcAttemptId + " decomp: " + idxRecord.getRawLength() + " len: " + idxRecord.getPartLength() + " to " + fetchedInput.getType());
                }
                long endTime = System.currentTimeMillis();
                fetcherCallback.fetchSucceeded(host, srcAttemptId, fetchedInput, idxRecord.getPartLength(), idxRecord.getRawLength(), (endTime - startTime));
            } catch (IOException | InternalError e) {
                hasFailures = true;
                cleanupFetchedInput(fetchedInput);
                if (isShutDown.get()) {
                    if (isDebugEnabled) {
                        LOG.debug("Already shutdown. Ignoring Local Fetch Failure for " + srcAttemptId + " from host " + host + " : " + e.getClass().getName() + ", message=" + e.getMessage());
                    }
                    break;
                }
                if (failMissing) {
                    LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host + "(local fetch)", e);
                }
            }
        }
        if (!hasFailures) {
            iterator.remove();
        }
    }
    InputAttemptIdentifier[] failedFetches = null;
    if (failMissing && srcAttemptsRemaining.size() > 0) {
        if (isShutDown.get()) {
            if (isDebugEnabled) {
                LOG.debug("Already shutdown, not reporting fetch failures for: " + srcAttemptsRemaining.size() + " remaining inputs");
            }
        } else {
            failedFetches = srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.values().size()]);
        }
    } else {
    // nothing needs to be done to requeue remaining entries
    }
    return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), failedFetches, false);
}
Also used : CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) IOException(java.io.IOException) Entry(java.util.Map.Entry) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 39 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class Fetcher method doHttpFetch.

@VisibleForTesting
protected HostFetchResult doHttpFetch(CachingCallBack callback) {
    HostFetchResult connectionsWithRetryResult = setupConnection(srcAttemptsRemaining.values());
    if (connectionsWithRetryResult != null) {
        return connectionsWithRetryResult;
    }
    // Handle any shutdown which may have been invoked.
    if (isShutDown.get()) {
        // shutdown would have no effect if in the process of establishing the connection.
        shutdownInternal();
        if (isDebugEnabled) {
            LOG.debug("Detected fetcher has been shutdown after opening stream. Returning");
        }
        return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
    }
    // After this point, closing the stream and connection, should cause a
    // SocketException,
    // which will be ignored since shutdown has been invoked.
    // Loop through available map-outputs and fetch them
    // On any error, faildTasks is not null and we exit
    // after putting back the remaining maps to the
    // yet_to_be_fetched list and marking the failed tasks.
    InputAttemptIdentifier[] failedInputs = null;
    while (!srcAttemptsRemaining.isEmpty() && failedInputs == null) {
        InputAttemptIdentifier inputAttemptIdentifier = srcAttemptsRemaining.entrySet().iterator().next().getValue();
        if (isShutDown.get()) {
            shutdownInternal(true);
            if (isDebugEnabled) {
                LOG.debug("Fetcher already shutdown. Aborting queued fetches for " + srcAttemptsRemaining.size() + " inputs");
            }
            return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
        }
        try {
            failedInputs = fetchInputs(input, callback, inputAttemptIdentifier);
        } catch (FetcherReadTimeoutException e) {
            // clean up connection
            shutdownInternal(true);
            if (isShutDown.get()) {
                if (isDebugEnabled) {
                    LOG.debug("Fetcher already shutdown. Aborting reconnection and queued fetches for " + srcAttemptsRemaining.size() + " inputs");
                }
                return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
            }
            // Connect again.
            connectionsWithRetryResult = setupConnection(srcAttemptsRemaining.values());
            if (connectionsWithRetryResult != null) {
                break;
            }
        }
    }
    if (isShutDown.get() && failedInputs != null && failedInputs.length > 0) {
        if (isDebugEnabled) {
            LOG.debug("Fetcher already shutdown. Not reporting fetch failures for: " + failedInputs.length + " failed inputs");
        }
        failedInputs = null;
    }
    return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), failedInputs, false);
}
Also used : FetcherReadTimeoutException(org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 40 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class TestUnorderedKVReader method setupReader.

private void setupReader() throws IOException, InterruptedException {
    defaultConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    defaultConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
    createIFile(outputPath, 1);
    final LinkedList<LocalDiskFetchedInput> inputs = new LinkedList<LocalDiskFetchedInput>();
    LocalDiskFetchedInput realFetchedInput = new LocalDiskFetchedInput(0, compLen, new InputAttemptIdentifier(0, 0), outputPath, defaultConf, new FetchedInputCallback() {

        @Override
        public void fetchComplete(FetchedInput fetchedInput) {
        }

        @Override
        public void fetchFailed(FetchedInput fetchedInput) {
        }

        @Override
        public void freeResources(FetchedInput fetchedInput) {
        }
    });
    LocalDiskFetchedInput fetchedInput = spy(realFetchedInput);
    doNothing().when(fetchedInput).free();
    inputs.add(fetchedInput);
    TezCounters counters = new TezCounters();
    TezCounter inputRecords = counters.findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);
    ShuffleManager manager = mock(ShuffleManager.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
            return (inputs.isEmpty()) ? null : inputs.remove();
        }
    }).when(manager).getNextInput();
    unorderedKVReader = new UnorderedKVReader<Text, Text>(manager, defaultConf, null, false, -1, -1, inputRecords, mock(InputContext.class));
}
Also used : LocalDiskFetchedInput(org.apache.tez.runtime.library.common.shuffle.LocalDiskFetchedInput) FetchedInput(org.apache.tez.runtime.library.common.shuffle.FetchedInput) Text(org.apache.hadoop.io.Text) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) TezCounter(org.apache.tez.common.counters.TezCounter) LinkedList(java.util.LinkedList) TezCounters(org.apache.tez.common.counters.TezCounters) Answer(org.mockito.stubbing.Answer) Mockito.doAnswer(org.mockito.Mockito.doAnswer) FetchedInputCallback(org.apache.tez.runtime.library.common.shuffle.FetchedInputCallback) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ShuffleManager(org.apache.tez.runtime.library.common.shuffle.impl.ShuffleManager) LocalDiskFetchedInput(org.apache.tez.runtime.library.common.shuffle.LocalDiskFetchedInput)

Aggregations

InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)55 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)41 Test (org.junit.Test)31 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)17 Configuration (org.apache.hadoop.conf.Configuration)16 InputContext (org.apache.tez.runtime.api.InputContext)16 IOException (java.io.IOException)15 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)15 Path (org.apache.hadoop.fs.Path)10 LinkedList (java.util.LinkedList)8 Matchers.anyString (org.mockito.Matchers.anyString)8 VisibleForTesting (com.google.common.annotations.VisibleForTesting)7 TezCounters (org.apache.tez.common.counters.TezCounters)7 Event (org.apache.tez.runtime.api.Event)7 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)7 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)6 InvocationOnMock (org.mockito.invocation.InvocationOnMock)6 FetcherReadTimeoutException (org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException)5 URL (java.net.URL)4 ArrayList (java.util.ArrayList)4