use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class Fetcher method getTezIndexRecord.
@VisibleForTesting
protected TezIndexRecord getTezIndexRecord(InputAttemptIdentifier srcAttemptId, int partition) throws IOException {
TezIndexRecord idxRecord;
Path indexFile = getShuffleInputFileName(srcAttemptId.getPathComponent(), Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
idxRecord = spillRecord.getIndex(partition);
return idxRecord;
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class FetcherOrderedGrouped method setupLocalDiskFetch.
@VisibleForTesting
protected void setupLocalDiskFetch(MapHost host) throws InterruptedException {
// Get completed maps on 'host'
List<InputAttemptIdentifier> srcAttempts = scheduler.getMapsForHost(host);
// especially at the tail of large jobs
if (srcAttempts.size() == 0) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Fetcher " + id + " going to fetch (local disk) from " + host + " for: " + srcAttempts + ", partition range: " + minPartition + "-" + maxPartition);
}
// List of maps to be fetched yet
populateRemainingMap(srcAttempts);
try {
final Iterator<InputAttemptIdentifier> iter = remaining.values().iterator();
while (iter.hasNext()) {
// Avoid fetching more if already stopped
if (stopped) {
return;
}
InputAttemptIdentifier srcAttemptId = iter.next();
MapOutput mapOutput = null;
boolean hasFailures = false;
// Fetch partition count number of map outputs (handles auto-reduce case)
for (int curPartition = minPartition; curPartition <= maxPartition; curPartition++) {
try {
long startTime = System.currentTimeMillis();
// Partition id is the base partition id plus the relative offset
int reduceId = host.getPartitionId() + curPartition - minPartition;
srcAttemptId = scheduler.getIdentifierForFetchedOutput(srcAttemptId.getPathComponent(), reduceId);
Path filename = getShuffleInputFileName(srcAttemptId.getPathComponent(), null);
TezIndexRecord indexRecord = getIndexRecord(srcAttemptId.getPathComponent(), reduceId);
if (!indexRecord.hasData()) {
continue;
}
mapOutput = getMapOutputForDirectDiskFetch(srcAttemptId, filename, indexRecord);
long endTime = System.currentTimeMillis();
scheduler.copySucceeded(srcAttemptId, host, indexRecord.getPartLength(), indexRecord.getRawLength(), (endTime - startTime), mapOutput, true);
} catch (IOException | InternalError e) {
if (mapOutput != null) {
mapOutput.abort();
}
if (!stopped) {
hasFailures = true;
ioErrs.increment(1);
scheduler.copyFailed(srcAttemptId, host, true, false, true);
LOG.warn("Failed to read local disk output of " + srcAttemptId + " from " + host.getHostIdentifier(), e);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Ignoring fetch error during local disk copy since fetcher has already been stopped");
}
return;
}
}
}
if (!hasFailures) {
iter.remove();
}
}
} finally {
putBackRemainingMapOutputs(host);
}
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class Fetcher method doLocalDiskFetch.
@VisibleForTesting
private HostFetchResult doLocalDiskFetch(boolean failMissing) {
Iterator<Entry<String, InputAttemptIdentifier>> iterator = srcAttemptsRemaining.entrySet().iterator();
while (iterator.hasNext()) {
boolean hasFailures = false;
if (isShutDown.get()) {
if (isDebugEnabled) {
LOG.debug("Already shutdown. Skipping fetch for " + srcAttemptsRemaining.size() + " inputs");
}
break;
}
InputAttemptIdentifier srcAttemptId = iterator.next().getValue();
for (int curPartition = 0; curPartition < partitionCount; curPartition++) {
int reduceId = curPartition + partition;
srcAttemptId = pathToAttemptMap.get(new PathPartition(srcAttemptId.getPathComponent(), reduceId));
long startTime = System.currentTimeMillis();
FetchedInput fetchedInput = null;
try {
TezIndexRecord idxRecord;
// for missing files, this will throw an exception
idxRecord = getTezIndexRecord(srcAttemptId, reduceId);
fetchedInput = new LocalDiskFetchedInput(idxRecord.getStartOffset(), idxRecord.getPartLength(), srcAttemptId, getShuffleInputFileName(srcAttemptId.getPathComponent(), null), conf, new FetchedInputCallback() {
@Override
public void fetchComplete(FetchedInput fetchedInput) {
}
@Override
public void fetchFailed(FetchedInput fetchedInput) {
}
@Override
public void freeResources(FetchedInput fetchedInput) {
}
});
if (isDebugEnabled) {
LOG.debug("fetcher" + " about to shuffle output of srcAttempt (direct disk)" + srcAttemptId + " decomp: " + idxRecord.getRawLength() + " len: " + idxRecord.getPartLength() + " to " + fetchedInput.getType());
}
long endTime = System.currentTimeMillis();
fetcherCallback.fetchSucceeded(host, srcAttemptId, fetchedInput, idxRecord.getPartLength(), idxRecord.getRawLength(), (endTime - startTime));
} catch (IOException | InternalError e) {
hasFailures = true;
cleanupFetchedInput(fetchedInput);
if (isShutDown.get()) {
if (isDebugEnabled) {
LOG.debug("Already shutdown. Ignoring Local Fetch Failure for " + srcAttemptId + " from host " + host + " : " + e.getClass().getName() + ", message=" + e.getMessage());
}
break;
}
if (failMissing) {
LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host + "(local fetch)", e);
}
}
}
if (!hasFailures) {
iterator.remove();
}
}
InputAttemptIdentifier[] failedFetches = null;
if (failMissing && srcAttemptsRemaining.size() > 0) {
if (isShutDown.get()) {
if (isDebugEnabled) {
LOG.debug("Already shutdown, not reporting fetch failures for: " + srcAttemptsRemaining.size() + " remaining inputs");
}
} else {
failedFetches = srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.values().size()]);
}
} else {
// nothing needs to be done to requeue remaining entries
}
return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), failedFetches, false);
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class DefaultSorter method spillSingleRecord.
/**
* Handles the degenerate case where serialization fails to fit in
* the in-memory buffer, so we must spill the record from collect
* directly to a spill file. Consider this "losing".
*/
private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException {
long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
FSDataOutputStream out = null;
try {
// create spill file
final TezSpillRecord spillRec = new TezSpillRecord(partitions);
final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
spillFilePaths.put(numSpills, filename);
out = rfs.create(filename);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(filename, SPILL_FILE_PERMS);
}
// we don't run the combiner for a single record
for (int i = 0; i < partitions; ++i) {
IFile.Writer writer = null;
try {
long segmentStart = out.getPos();
// Create a new codec, don't care!
if (!sendEmptyPartitionDetails || (i == partition)) {
writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, false);
}
if (i == partition) {
final long recordStart = out.getPos();
writer.append(key, value);
// Note that our map byte count will not be accurate with
// compression
mapOutputByteCounter.increment(out.getPos() - recordStart);
}
long rawLength = 0;
long partLength = 0;
if (writer != null) {
writer.close();
rawLength = writer.getRawLength();
partLength = writer.getCompressedLength();
}
adjustSpillCounters(rawLength, partLength);
// record offsets
TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
spillRec.putIndex(rec, i);
writer = null;
} catch (IOException e) {
if (null != writer)
writer.close();
throw e;
}
}
if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
// create spill index file
Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillFileIndexPaths.put(numSpills, indexFilename);
spillRec.writeToFile(indexFilename, conf);
} else {
indexCacheList.add(spillRec);
totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
}
++numSpills;
if (!isFinalMergeEnabled()) {
numShuffleChunks.setValue(numSpills);
} else if (numSpills > 1) {
// Increment only when there is atleast one previous spill
numAdditionalSpills.increment(1);
}
} finally {
if (out != null)
out.close();
}
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class UnorderedPartitionedKVWriter method close.
@Override
public List<Event> close() throws IOException, InterruptedException {
// In case there are buffers to be spilled, schedule spilling
scheduleSpill(true);
List<Event> eventList = Lists.newLinkedList();
isShutdown.set(true);
spillLock.lock();
try {
LOG.info(destNameTrimmed + ": " + "Waiting for all spills to complete : Pending : " + pendingSpillCount.get());
while (pendingSpillCount.get() != 0 && spillException == null) {
spillInProgress.await();
}
} finally {
spillLock.unlock();
}
if (spillException != null) {
LOG.error(destNameTrimmed + ": " + "Error during spill, throwing");
// Assuming close will be called on the same thread as the write
cleanup();
currentBuffer.cleanup();
currentBuffer = null;
if (spillException instanceof IOException) {
throw (IOException) spillException;
} else {
throw new IOException(spillException);
}
} else {
LOG.info(destNameTrimmed + ": " + "All spills complete");
// Assuming close will be called on the same thread as the write
cleanup();
List<Event> events = Lists.newLinkedList();
if (!pipelinedShuffle) {
if (skipBuffers) {
writer.close();
long rawLen = writer.getRawLength();
long compLen = writer.getCompressedLength();
TezIndexRecord rec = new TezIndexRecord(0, rawLen, compLen);
TezSpillRecord sr = new TezSpillRecord(1);
sr.putIndex(rec, 0);
sr.writeToFile(finalIndexPath, conf);
BitSet emptyPartitions = new BitSet();
if (outputRecordsCounter.getValue() == 0) {
emptyPartitions.set(0);
}
if (reportPartitionStats()) {
if (outputRecordsCounter.getValue() > 0) {
sizePerPartition[0] = rawLen;
}
}
cleanupCurrentBuffer();
if (outputRecordsCounter.getValue() > 0) {
outputBytesWithOverheadCounter.increment(rawLen);
fileOutputBytesCounter.increment(compLen + indexFileSizeEstimate);
}
eventList.add(generateVMEvent());
eventList.add(generateDMEvent(false, -1, false, outputContext.getUniqueIdentifier(), emptyPartitions));
return eventList;
}
/*
1. Final merge enabled
- When lots of spills are there, mergeAll, generate events and return
- If there are no existing spills, check for final spill and generate events
2. Final merge disabled
- If finalSpill generated data, generate events and return
- If finalSpill did not generate data, it would automatically populate events
*/
if (isFinalMergeEnabled) {
if (numSpills.get() > 0) {
mergeAll();
} else {
finalSpill();
}
updateTezCountersAndNotify();
eventList.add(generateVMEvent());
eventList.add(generateDMEvent());
} else {
// if no data is generated, finalSpill would create VMEvent & add to finalEvents
SpillResult result = finalSpill();
if (result != null) {
updateTezCountersAndNotify();
// Generate vm event
finalEvents.add(generateVMEvent());
// compute empty partitions based on spill result and generate DME
int spillNum = numSpills.get() - 1;
SpillCallback callback = new SpillCallback(spillNum);
callback.computePartitionStats(result);
BitSet emptyPartitions = getEmptyPartitions(callback.getRecordsPerPartition());
String pathComponent = generatePathComponent(outputContext.getUniqueIdentifier(), spillNum);
Event finalEvent = generateDMEvent(true, spillNum, true, pathComponent, emptyPartitions);
finalEvents.add(finalEvent);
}
// all events to be sent out are in finalEvents.
eventList.addAll(finalEvents);
}
cleanupCurrentBuffer();
return eventList;
}
// For pipelined case, send out an event in case finalspill generated a spill file.
if (finalSpill() != null) {
// VertexManagerEvent is only sent at the end and thus sizePerPartition is used
// for the sum of all spills.
mayBeSendEventsForSpill(currentBuffer.recordsPerPartition, sizePerPartition, numSpills.get() - 1, true);
}
updateTezCountersAndNotify();
cleanupCurrentBuffer();
return events;
}
}
Aggregations