use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class FetcherOrderedGrouped method copyFromHost.
/**
* The crux of the matter...
*
* @param host {@link MapHost} from which we need to
* shuffle available map-outputs.
*/
@VisibleForTesting
protected void copyFromHost(MapHost host) throws IOException {
// reset retryStartTime for a new host
retryStartTime = 0;
// Get completed maps on 'host'
List<InputAttemptIdentifier> srcAttempts = scheduler.getMapsForHost(host);
// especially at the tail of large jobs
if (srcAttempts.size() == 0) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Fetcher " + id + " going to fetch from " + host + " for: " + srcAttempts + ", partition range: " + minPartition + "-" + maxPartition);
}
populateRemainingMap(srcAttempts);
// Construct the url and connect
try {
if (!setupConnection(host, remaining.values())) {
if (stopped) {
cleanupCurrentConnection(true);
}
// Maps will be added back in the finally block in case of failure.
return;
}
// Loop through available map-outputs and fetch them
// On any error, faildTasks is not null and we exit
// after putting back the remaining maps to the
// yet_to_be_fetched list and marking the failed tasks.
InputAttemptIdentifier[] failedTasks = null;
while (!remaining.isEmpty() && failedTasks == null) {
InputAttemptIdentifier inputAttemptIdentifier = remaining.entrySet().iterator().next().getValue();
// remaining outputs. YARN-1773. Will get to them in the next retry.
try {
failedTasks = copyMapOutput(host, input, inputAttemptIdentifier);
} catch (FetcherReadTimeoutException e) {
// Setup connection again if disconnected
cleanupCurrentConnection(true);
if (stopped) {
if (LOG.isDebugEnabled()) {
LOG.debug("Not re-establishing connection since Fetcher has been stopped");
}
return;
}
// Connect with retry
if (!setupConnection(host, remaining.values())) {
if (stopped) {
cleanupCurrentConnection(true);
if (LOG.isDebugEnabled()) {
LOG.debug("Not reporting connection re-establishment failure since fetcher is stopped");
}
return;
}
failedTasks = new InputAttemptIdentifier[] { getNextRemainingAttempt() };
break;
}
}
}
if (failedTasks != null && failedTasks.length > 0) {
if (stopped) {
if (LOG.isDebugEnabled()) {
LOG.debug("Ignoring copyMapOutput failures for tasks: " + Arrays.toString(failedTasks) + " since Fetcher has been stopped");
}
} else {
LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks));
for (InputAttemptIdentifier left : failedTasks) {
scheduler.copyFailed(left, host, true, false, false);
}
}
}
cleanupCurrentConnection(false);
// Sanity check
if (failedTasks == null && !remaining.isEmpty()) {
throw new IOException("server didn't return all expected map outputs: " + remaining.size() + " left.");
}
} finally {
putBackRemainingMapOutputs(host);
}
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class Fetcher method callInternal.
@Override
public FetchResult callInternal() throws Exception {
boolean multiplex = (this.sharedFetchEnabled && this.localDiskFetchEnabled);
if (srcAttempts.size() == 0) {
return new FetchResult(host, port, partition, partitionCount, srcAttempts);
}
populateRemainingMap(srcAttempts);
for (InputAttemptIdentifier in : srcAttemptsRemaining.values()) {
if (in instanceof CompositeInputAttemptIdentifier) {
CompositeInputAttemptIdentifier cin = (CompositeInputAttemptIdentifier) in;
for (int i = 0; i < cin.getInputIdentifierCount(); i++) {
pathToAttemptMap.put(new PathPartition(cin.getPathComponent(), partition + i), cin.expand(i));
}
} else {
pathToAttemptMap.put(new PathPartition(in.getPathComponent(), 0), in);
}
// do only if all of them are shared fetches
multiplex &= in.isShared();
}
if (multiplex) {
Preconditions.checkArgument(partition == 0, "Shared fetches cannot be done for partitioned input" + "- partition is non-zero (%d)", partition);
}
HostFetchResult hostFetchResult;
if (localDiskFetchEnabled && host.equals(localHostname) && port == shufflePort) {
hostFetchResult = setupLocalDiskFetch();
} else if (multiplex) {
hostFetchResult = doSharedFetch();
} else {
hostFetchResult = doHttpFetch();
}
if (hostFetchResult.failedInputs != null && hostFetchResult.failedInputs.length > 0) {
if (!isShutDown.get()) {
LOG.warn("copyInputs failed for tasks " + Arrays.toString(hostFetchResult.failedInputs));
for (InputAttemptIdentifier left : hostFetchResult.failedInputs) {
fetcherCallback.fetchFailed(host, left, hostFetchResult.connectFailed);
}
} else {
if (isDebugEnabled) {
LOG.debug("Ignoring failed fetch reports for " + hostFetchResult.failedInputs.length + " inputs since the fetcher has already been stopped");
}
}
}
shutdown();
// Sanity check
if (hostFetchResult.failedInputs == null && !srcAttemptsRemaining.isEmpty()) {
if (!multiplex) {
throw new IOException("server didn't return all expected map outputs: " + srcAttemptsRemaining.size() + " left.");
}
}
return hostFetchResult.fetchResult;
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class Fetcher method doLocalDiskFetch.
@VisibleForTesting
private HostFetchResult doLocalDiskFetch(boolean failMissing) {
Iterator<Entry<String, InputAttemptIdentifier>> iterator = srcAttemptsRemaining.entrySet().iterator();
while (iterator.hasNext()) {
boolean hasFailures = false;
if (isShutDown.get()) {
if (isDebugEnabled) {
LOG.debug("Already shutdown. Skipping fetch for " + srcAttemptsRemaining.size() + " inputs");
}
break;
}
InputAttemptIdentifier srcAttemptId = iterator.next().getValue();
for (int curPartition = 0; curPartition < partitionCount; curPartition++) {
int reduceId = curPartition + partition;
srcAttemptId = pathToAttemptMap.get(new PathPartition(srcAttemptId.getPathComponent(), reduceId));
long startTime = System.currentTimeMillis();
FetchedInput fetchedInput = null;
try {
TezIndexRecord idxRecord;
// for missing files, this will throw an exception
idxRecord = getTezIndexRecord(srcAttemptId, reduceId);
fetchedInput = new LocalDiskFetchedInput(idxRecord.getStartOffset(), idxRecord.getPartLength(), srcAttemptId, getShuffleInputFileName(srcAttemptId.getPathComponent(), null), conf, new FetchedInputCallback() {
@Override
public void fetchComplete(FetchedInput fetchedInput) {
}
@Override
public void fetchFailed(FetchedInput fetchedInput) {
}
@Override
public void freeResources(FetchedInput fetchedInput) {
}
});
if (isDebugEnabled) {
LOG.debug("fetcher" + " about to shuffle output of srcAttempt (direct disk)" + srcAttemptId + " decomp: " + idxRecord.getRawLength() + " len: " + idxRecord.getPartLength() + " to " + fetchedInput.getType());
}
long endTime = System.currentTimeMillis();
fetcherCallback.fetchSucceeded(host, srcAttemptId, fetchedInput, idxRecord.getPartLength(), idxRecord.getRawLength(), (endTime - startTime));
} catch (IOException | InternalError e) {
hasFailures = true;
cleanupFetchedInput(fetchedInput);
if (isShutDown.get()) {
if (isDebugEnabled) {
LOG.debug("Already shutdown. Ignoring Local Fetch Failure for " + srcAttemptId + " from host " + host + " : " + e.getClass().getName() + ", message=" + e.getMessage());
}
break;
}
if (failMissing) {
LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host + "(local fetch)", e);
}
}
}
if (!hasFailures) {
iterator.remove();
}
}
InputAttemptIdentifier[] failedFetches = null;
if (failMissing && srcAttemptsRemaining.size() > 0) {
if (isShutDown.get()) {
if (isDebugEnabled) {
LOG.debug("Already shutdown, not reporting fetch failures for: " + srcAttemptsRemaining.size() + " remaining inputs");
}
} else {
failedFetches = srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.values().size()]);
}
} else {
// nothing needs to be done to requeue remaining entries
}
return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), failedFetches, false);
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class Fetcher method doHttpFetch.
@VisibleForTesting
protected HostFetchResult doHttpFetch(CachingCallBack callback) {
HostFetchResult connectionsWithRetryResult = setupConnection(srcAttemptsRemaining.values());
if (connectionsWithRetryResult != null) {
return connectionsWithRetryResult;
}
// Handle any shutdown which may have been invoked.
if (isShutDown.get()) {
// shutdown would have no effect if in the process of establishing the connection.
shutdownInternal();
if (isDebugEnabled) {
LOG.debug("Detected fetcher has been shutdown after opening stream. Returning");
}
return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
}
// After this point, closing the stream and connection, should cause a
// SocketException,
// which will be ignored since shutdown has been invoked.
// Loop through available map-outputs and fetch them
// On any error, faildTasks is not null and we exit
// after putting back the remaining maps to the
// yet_to_be_fetched list and marking the failed tasks.
InputAttemptIdentifier[] failedInputs = null;
while (!srcAttemptsRemaining.isEmpty() && failedInputs == null) {
InputAttemptIdentifier inputAttemptIdentifier = srcAttemptsRemaining.entrySet().iterator().next().getValue();
if (isShutDown.get()) {
shutdownInternal(true);
if (isDebugEnabled) {
LOG.debug("Fetcher already shutdown. Aborting queued fetches for " + srcAttemptsRemaining.size() + " inputs");
}
return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
}
try {
failedInputs = fetchInputs(input, callback, inputAttemptIdentifier);
} catch (FetcherReadTimeoutException e) {
// clean up connection
shutdownInternal(true);
if (isShutDown.get()) {
if (isDebugEnabled) {
LOG.debug("Fetcher already shutdown. Aborting reconnection and queued fetches for " + srcAttemptsRemaining.size() + " inputs");
}
return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), null, false);
}
// Connect again.
connectionsWithRetryResult = setupConnection(srcAttemptsRemaining.values());
if (connectionsWithRetryResult != null) {
break;
}
}
}
if (isShutDown.get() && failedInputs != null && failedInputs.length > 0) {
if (isDebugEnabled) {
LOG.debug("Fetcher already shutdown. Not reporting fetch failures for: " + failedInputs.length + " failed inputs");
}
failedInputs = null;
}
return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), failedInputs, false);
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class TestUnorderedKVReader method setupReader.
private void setupReader() throws IOException, InterruptedException {
defaultConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
defaultConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
createIFile(outputPath, 1);
final LinkedList<LocalDiskFetchedInput> inputs = new LinkedList<LocalDiskFetchedInput>();
LocalDiskFetchedInput realFetchedInput = new LocalDiskFetchedInput(0, compLen, new InputAttemptIdentifier(0, 0), outputPath, defaultConf, new FetchedInputCallback() {
@Override
public void fetchComplete(FetchedInput fetchedInput) {
}
@Override
public void fetchFailed(FetchedInput fetchedInput) {
}
@Override
public void freeResources(FetchedInput fetchedInput) {
}
});
LocalDiskFetchedInput fetchedInput = spy(realFetchedInput);
doNothing().when(fetchedInput).free();
inputs.add(fetchedInput);
TezCounters counters = new TezCounters();
TezCounter inputRecords = counters.findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);
ShuffleManager manager = mock(ShuffleManager.class);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
return (inputs.isEmpty()) ? null : inputs.remove();
}
}).when(manager).getNextInput();
unorderedKVReader = new UnorderedKVReader<Text, Text>(manager, defaultConf, null, false, -1, -1, inputRecords, mock(InputContext.class));
}
Aggregations