use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class FetcherOrderedGrouped method setupConnection.
@VisibleForTesting
boolean setupConnection(MapHost host, Collection<InputAttemptIdentifier> attempts) throws IOException {
boolean connectSucceeded = false;
try {
StringBuilder baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host.getHost(), host.getPort(), host.getPartitionId(), host.getPartitionCount(), applicationId, dagId, sslShuffle);
URL url = ShuffleUtils.constructInputURL(baseURI.toString(), attempts, httpConnectionParams.isKeepAlive());
httpConnection = ShuffleUtils.getHttpConnection(asyncHttp, url, httpConnectionParams, logIdentifier, jobTokenSecretManager);
connectSucceeded = httpConnection.connect();
if (stopped) {
if (LOG.isDebugEnabled()) {
LOG.debug("Detected fetcher has been shutdown after connection establishment. Returning");
}
return false;
}
input = httpConnection.getInputStream();
httpConnection.validate();
return true;
} catch (IOException | InterruptedException ie) {
if (ie instanceof InterruptedException) {
// reset status
Thread.currentThread().interrupt();
}
if (stopped) {
if (LOG.isDebugEnabled()) {
LOG.debug("Not reporting fetch failure, since an Exception was caught after shutdown");
}
return false;
}
ioErrs.increment(1);
if (!connectSucceeded) {
LOG.warn("Failed to connect to " + host + " with " + remaining.size() + " inputs", ie);
connectionErrs.increment(1);
} else {
LOG.warn("Failed to verify reply after connecting to " + host + " with " + remaining.size() + " inputs pending", ie);
}
// This ends up indirectly penalizing the host (multiple failures reported on the single host)
for (InputAttemptIdentifier left : remaining.values()) {
// Need to be handling temporary glitches ..
// Report read error to the AM to trigger source failure heuristics
scheduler.copyFailed(left, host, connectSucceeded, !connectSucceeded, false);
}
return false;
}
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class ShuffleInputEventHandlerOrderedGrouped method processCompositeRoutedDataMovementEvent.
private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovementEvent crdmEvent, DataMovementEventPayloadProto shufflePayload, BitSet emptyPartitionsBitSet) throws IOException {
int partitionId = crdmEvent.getSourceIndex();
CompositeInputAttemptIdentifier compositeInputAttemptIdentifier = constructInputAttemptIdentifier(crdmEvent.getTargetIndex(), crdmEvent.getCount(), crdmEvent.getVersion(), shufflePayload);
if (LOG.isDebugEnabled()) {
LOG.debug("DME srcIdx: " + partitionId + ", targetIdx: " + crdmEvent.getTargetIndex() + ", count:" + crdmEvent.getCount() + ", attemptNum: " + crdmEvent.getVersion() + ", payload: " + ShuffleUtils.stringify(shufflePayload));
}
if (shufflePayload.hasEmptyPartitions()) {
boolean allPartitionsEmpty = true;
for (int i = 0; i < crdmEvent.getCount(); i++) {
int srcPartitionId = partitionId + i;
allPartitionsEmpty &= emptyPartitionsBitSet.get(srcPartitionId);
if (emptyPartitionsBitSet.get(srcPartitionId)) {
InputAttemptIdentifier srcInputAttemptIdentifier = compositeInputAttemptIdentifier.expand(i);
if (LOG.isDebugEnabled()) {
LOG.debug("Source partition: " + srcPartitionId + " did not generate any data. SrcAttempt: [" + srcInputAttemptIdentifier + "]. Not fetching.");
}
numDmeEventsNoData.getAndIncrement();
scheduler.copySucceeded(srcInputAttemptIdentifier, null, 0, 0, 0, null, true);
}
}
if (allPartitionsEmpty) {
return;
}
}
scheduler.addKnownMapOutput(StringInterner.weakIntern(shufflePayload.getHost()), shufflePayload.getPort(), partitionId, compositeInputAttemptIdentifier);
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class ShuffleInputEventHandlerOrderedGrouped method constructInputAttemptIdentifier.
/**
* Helper method to create InputAttemptIdentifier
*
* @param targetIndex
* @param targetIndexCount
* @param version
* @param shufflePayload
* @return CompositeInputAttemptIdentifier
*/
private CompositeInputAttemptIdentifier constructInputAttemptIdentifier(int targetIndex, int targetIndexCount, int version, DataMovementEventPayloadProto shufflePayload) {
String pathComponent = (shufflePayload.hasPathComponent()) ? StringInterner.weakIntern(shufflePayload.getPathComponent()) : null;
int spillEventId = shufflePayload.getSpillId();
CompositeInputAttemptIdentifier srcAttemptIdentifier = null;
if (shufflePayload.hasSpillId()) {
boolean lastEvent = shufflePayload.getLastEvent();
InputAttemptIdentifier.SPILL_INFO info = (lastEvent) ? InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE : InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE;
srcAttemptIdentifier = new CompositeInputAttemptIdentifier(targetIndex, version, pathComponent, false, info, spillEventId, targetIndexCount);
} else {
srcAttemptIdentifier = new CompositeInputAttemptIdentifier(targetIndex, version, pathComponent, targetIndexCount);
}
return srcAttemptIdentifier;
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class Fetcher method fetchInputs.
private InputAttemptIdentifier[] fetchInputs(DataInputStream input, CachingCallBack callback, InputAttemptIdentifier inputAttemptIdentifier) throws FetcherReadTimeoutException {
FetchedInput fetchedInput = null;
InputAttemptIdentifier srcAttemptId = null;
long decompressedLength = 0;
long compressedLength = 0;
try {
long startTime = System.currentTimeMillis();
int partitionCount = 1;
if (this.compositeFetch) {
// Multiple partitions are fetched
partitionCount = WritableUtils.readVInt(input);
}
ArrayList<MapOutputStat> mapOutputStats = new ArrayList<>(partitionCount);
for (int mapOutputIndex = 0; mapOutputIndex < partitionCount; mapOutputIndex++) {
MapOutputStat mapOutputStat = null;
int responsePartition = -1;
// Read the shuffle header
String pathComponent = null;
try {
ShuffleHeader header = new ShuffleHeader();
header.readFields(input);
pathComponent = header.getMapId();
if (!pathComponent.startsWith(InputAttemptIdentifier.PATH_PREFIX)) {
throw new IllegalArgumentException("Invalid map id: " + header.getMapId() + ", expected to start with " + InputAttemptIdentifier.PATH_PREFIX + ", partition: " + header.getPartition() + " while fetching " + inputAttemptIdentifier);
}
srcAttemptId = pathToAttemptMap.get(new PathPartition(pathComponent, header.getPartition()));
if (srcAttemptId == null) {
throw new IllegalArgumentException("Source attempt not found for map id: " + header.getMapId() + ", partition: " + header.getPartition() + " while fetching " + inputAttemptIdentifier);
}
if (header.getCompressedLength() == 0) {
// Empty partitions are already accounted for
continue;
}
mapOutputStat = new MapOutputStat(srcAttemptId, header.getUncompressedLength(), header.getCompressedLength(), header.getPartition());
mapOutputStats.add(mapOutputStat);
responsePartition = header.getPartition();
} catch (IllegalArgumentException e) {
// badIdErrs.increment(1);
if (!isShutDown.get()) {
LOG.warn("Invalid src id ", e);
// Don't know which one was bad, so consider all of them as bad
return srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.size()]);
} else {
if (isDebugEnabled) {
LOG.debug("Already shutdown. Ignoring badId error with message: " + e.getMessage());
}
return null;
}
}
// Do some basic sanity verification
if (!verifySanity(mapOutputStat.compressedLength, mapOutputStat.decompressedLength, responsePartition, mapOutputStat.srcAttemptId, pathComponent)) {
if (!isShutDown.get()) {
srcAttemptId = mapOutputStat.srcAttemptId;
if (srcAttemptId == null) {
LOG.warn("Was expecting " + getNextRemainingAttempt() + " but got null");
srcAttemptId = getNextRemainingAttempt();
}
assert (srcAttemptId != null);
return new InputAttemptIdentifier[] { srcAttemptId };
} else {
if (isDebugEnabled) {
LOG.debug("Already shutdown. Ignoring verification failure.");
}
return null;
}
}
if (isDebugEnabled) {
LOG.debug("header: " + mapOutputStat.srcAttemptId + ", len: " + mapOutputStat.compressedLength + ", decomp len: " + mapOutputStat.decompressedLength);
}
}
for (MapOutputStat mapOutputStat : mapOutputStats) {
// Get the location for the map output - either in-memory or on-disk
srcAttemptId = mapOutputStat.srcAttemptId;
decompressedLength = mapOutputStat.decompressedLength;
compressedLength = mapOutputStat.compressedLength;
// TODO TEZ-957. handle IOException here when Broadcast has better error checking
if (srcAttemptId.isShared() && callback != null) {
// force disk if input is being shared
fetchedInput = inputManager.allocateType(Type.DISK, decompressedLength, compressedLength, srcAttemptId);
} else {
fetchedInput = inputManager.allocate(decompressedLength, compressedLength, srcAttemptId);
}
// Go!
if (isDebugEnabled) {
LOG.debug("fetcher" + " about to shuffle output of srcAttempt " + fetchedInput.getInputAttemptIdentifier() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + fetchedInput.getType());
}
if (fetchedInput.getType() == Type.MEMORY) {
ShuffleUtils.shuffleToMemory(((MemoryFetchedInput) fetchedInput).getBytes(), input, (int) decompressedLength, (int) compressedLength, codec, ifileReadAhead, ifileReadAheadLength, LOG, fetchedInput.getInputAttemptIdentifier());
} else if (fetchedInput.getType() == Type.DISK) {
ShuffleUtils.shuffleToDisk(((DiskFetchedInput) fetchedInput).getOutputStream(), (host + ":" + port), input, compressedLength, decompressedLength, LOG, fetchedInput.getInputAttemptIdentifier(), ifileReadAhead, ifileReadAheadLength, verifyDiskChecksum);
} else {
throw new TezUncheckedException("Bad fetchedInput type while fetching shuffle data " + fetchedInput);
}
// offer the fetched input for caching
if (srcAttemptId.isShared() && callback != null) {
// this has to be before the fetchSucceeded, because that goes across
// threads into the reader thread and can potentially shutdown this thread
// while it is still caching.
callback.cache(host, srcAttemptId, fetchedInput, compressedLength, decompressedLength);
}
// Inform the shuffle scheduler
long endTime = System.currentTimeMillis();
// Reset retryStartTime as map task make progress if retried before.
retryStartTime = 0;
fetcherCallback.fetchSucceeded(host, srcAttemptId, fetchedInput, compressedLength, decompressedLength, (endTime - startTime));
// Note successful shuffle
// metrics.successFetch();
}
srcAttemptsRemaining.remove(inputAttemptIdentifier.toString());
} catch (IOException | InternalError ioe) {
if (isShutDown.get()) {
cleanupFetchedInput(fetchedInput);
if (isDebugEnabled) {
LOG.debug("Already shutdown. Ignoring exception during fetch " + ioe.getClass().getName() + ", Message: " + ioe.getMessage());
}
return null;
}
if (shouldRetry(srcAttemptId, ioe)) {
// release mem/file handles
cleanupFetchedInput(fetchedInput);
throw new FetcherReadTimeoutException(ioe);
}
// ioErrs.increment(1);
if (srcAttemptId == null || fetchedInput == null) {
LOG.info("fetcher" + " failed to read map header" + srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe);
// Cleanup the fetchedInput before returning.
cleanupFetchedInput(fetchedInput);
if (srcAttemptId == null) {
return srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.size()]);
} else {
return new InputAttemptIdentifier[] { srcAttemptId };
}
}
LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host, ioe);
// Cleanup the fetchedInput
cleanupFetchedInput(fetchedInput);
// metrics.failedFetch();
return new InputAttemptIdentifier[] { srcAttemptId };
}
return null;
}
use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.
the class ShuffleInputEventHandlerOrderedGrouped method processTaskFailedEvent.
private void processTaskFailedEvent(InputFailedEvent ifEvent) {
InputAttemptIdentifier taIdentifier = new InputAttemptIdentifier(ifEvent.getTargetIndex(), ifEvent.getVersion());
scheduler.obsoleteInput(taIdentifier);
if (LOG.isDebugEnabled()) {
LOG.debug("Obsoleting output of src-task: " + taIdentifier);
}
}
Aggregations