use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.
the class DefaultSorter method spill.
protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCount) throws IOException, InterruptedException {
// approximate the length of the output file to be the length of the
// buffer + header lengths for the partitions
final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH;
FSDataOutputStream out = null;
try {
// create spill file
final TezSpillRecord spillRec = new TezSpillRecord(partitions);
final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
spillFilePaths.put(numSpills, filename);
out = rfs.create(filename);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(filename, SPILL_FILE_PERMS);
}
int spindex = mstart;
final InMemValBytes value = createInMemValBytes();
boolean rle = isRLENeeded(sameKeyCount, totalKeysCount);
for (int i = 0; i < partitions; ++i) {
IFile.Writer writer = null;
try {
long segmentStart = out.getPos();
if (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i || !sendEmptyPartitionDetails) {
writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, rle);
}
if (combiner == null) {
// spill directly
DataInputBuffer key = new DataInputBuffer();
while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
final int kvoff = offsetFor(spindex);
int keystart = kvmeta.get(kvoff + KEYSTART);
int valstart = kvmeta.get(kvoff + VALSTART);
key.reset(kvbuffer, keystart, valstart - keystart);
getVBytesForOffset(kvoff, value);
writer.append(key, value);
++spindex;
}
} else {
int spstart = spindex;
while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
++spindex;
}
// than some threshold of records for a partition
if (spstart != spindex) {
TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
if (LOG.isDebugEnabled()) {
LOG.debug(outputContext.getDestinationVertexName() + ": " + "Running combine processor");
}
runCombineProcessor(kvIter, writer);
}
}
long rawLength = 0;
long partLength = 0;
// close the writer
if (writer != null) {
writer.close();
rawLength = writer.getRawLength();
partLength = writer.getCompressedLength();
}
adjustSpillCounters(rawLength, partLength);
// record offsets
final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
spillRec.putIndex(rec, i);
if (!isFinalMergeEnabled() && reportPartitionStats() && writer != null) {
partitionStats[i] += partLength;
}
writer = null;
} finally {
if (null != writer)
writer.close();
}
}
if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
// create spill index file
Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillFileIndexPaths.put(numSpills, indexFilename);
spillRec.writeToFile(indexFilename, conf);
} else {
indexCacheList.add(spillRec);
totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
}
LOG.info(outputContext.getDestinationVertexName() + ": " + "Finished spill " + numSpills + " at " + filename.toString());
++numSpills;
if (!isFinalMergeEnabled()) {
numShuffleChunks.setValue(numSpills);
} else if (numSpills > 1) {
// Increment only when there was atleast one previous spill
numAdditionalSpills.increment(1);
}
} finally {
if (out != null)
out.close();
}
}
use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.
the class PipelinedSorter method spillSingleRecord.
// it is guaranteed that when spillSingleRecord is called, there is
// no merger spans queued in executor.
private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException {
final TezSpillRecord spillRec = new TezSpillRecord(partitions);
// getSpillFileForWrite with size -1 as the serialized size of KV pair is still unknown
final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, -1);
Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillFilePaths.put(numSpills, filename);
FSDataOutputStream out = rfs.create(filename, true, 4096);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(filename, SPILL_FILE_PERMS);
}
try {
LOG.info(outputContext.getDestinationVertexName() + ": Spilling to " + filename.toString() + ", indexFilename=" + indexFilename);
for (int i = 0; i < partitions; ++i) {
if (isThreadInterrupted()) {
return;
}
Writer writer = null;
try {
long segmentStart = out.getPos();
if (!sendEmptyPartitionDetails || (i == partition)) {
writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, false);
}
// we need not check for combiner since its a single record
if (i == partition) {
final long recordStart = out.getPos();
writer.append(key, value);
mapOutputRecordCounter.increment(1);
mapOutputByteCounter.increment(out.getPos() - recordStart);
}
long rawLength = 0;
long partLength = 0;
if (writer != null) {
writer.close();
rawLength = writer.getRawLength();
partLength = writer.getCompressedLength();
}
adjustSpillCounters(rawLength, partLength);
// record offsets
final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
spillRec.putIndex(rec, i);
writer = null;
} finally {
if (null != writer) {
writer.close();
}
}
}
spillFileIndexPaths.put(numSpills, indexFilename);
spillRec.writeToFile(indexFilename, conf);
// TODO: honor cache limits
indexCacheList.add(spillRec);
++numSpills;
if (!isFinalMergeEnabled()) {
fileOutputByteCounter.increment(rfs.getFileStatus(filename).getLen());
// No final merge. Set the number of files offered via shuffle-handler
numShuffleChunks.setValue(numSpills);
}
if (pipelinedShuffle) {
sendPipelinedShuffleEvents();
}
} finally {
out.close();
}
}
use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.
the class PipelinedSorter method spill.
public boolean spill(boolean ignoreEmptySpills) throws IOException {
FSDataOutputStream out = null;
try {
try {
boolean ret = merger.ready();
// then return directly without spilling
if (!ret && ignoreEmptySpills) {
return false;
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.info(outputContext.getDestinationVertexName() + ": Interrupted while waiting for mergers to complete");
throw new IOInterruptedException(outputContext.getDestinationVertexName() + ": Interrupted while waiting for mergers to complete", e);
}
// create spill file
final long size = capacity + +(partitions * APPROX_HEADER_LENGTH);
final TezSpillRecord spillRec = new TezSpillRecord(partitions);
final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
spillFilePaths.put(numSpills, filename);
out = rfs.create(filename, true, 4096);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(filename, SPILL_FILE_PERMS);
}
LOG.info(outputContext.getDestinationVertexName() + ": Spilling to " + filename.toString());
for (int i = 0; i < partitions; ++i) {
if (isThreadInterrupted()) {
return false;
}
outputContext.notifyProgress();
TezRawKeyValueIterator kvIter = merger.filter(i);
// write merged output to disk
long segmentStart = out.getPos();
Writer writer = null;
boolean hasNext = kvIter.hasNext();
if (hasNext || !sendEmptyPartitionDetails) {
writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, merger.needsRLE());
}
if (combiner == null) {
while (kvIter.next()) {
writer.append(kvIter.getKey(), kvIter.getValue());
}
} else {
if (hasNext) {
runCombineProcessor(kvIter, writer);
}
}
long rawLength = 0;
long partLength = 0;
// close
if (writer != null) {
writer.close();
rawLength = writer.getRawLength();
partLength = writer.getCompressedLength();
}
adjustSpillCounters(rawLength, partLength);
// record offsets
final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
spillRec.putIndex(rec, i);
if (!isFinalMergeEnabled() && reportPartitionStats()) {
partitionStats[i] += partLength;
}
}
Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillFileIndexPaths.put(numSpills, indexFilename);
spillRec.writeToFile(indexFilename, conf);
// TODO: honor cache limits
indexCacheList.add(spillRec);
++numSpills;
if (!isFinalMergeEnabled()) {
fileOutputByteCounter.increment(rfs.getFileStatus(filename).getLen());
// No final merge. Set the number of files offered via shuffle-handler
numShuffleChunks.setValue(numSpills);
}
return true;
} finally {
if (out != null) {
out.close();
}
}
}
use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.
the class MergeManager method finalMerge.
private TezRawKeyValueIterator finalMerge(Configuration job, FileSystem fs, List<MapOutput> inMemoryMapOutputs, List<FileChunk> onDiskMapOutputs) throws IOException, InterruptedException {
logFinalMergeStart(inMemoryMapOutputs, onDiskMapOutputs);
StringBuilder finalMergeLog = new StringBuilder();
inputContext.notifyProgress();
// merge config params
Class keyClass = (Class) ConfigUtils.getIntermediateInputKeyClass(job);
Class valueClass = (Class) ConfigUtils.getIntermediateInputValueClass(job);
final Path tmpDir = new Path(inputContext.getUniqueIdentifier());
final RawComparator comparator = (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(job);
// segments required to vacate memory
List<Segment> memDiskSegments = new ArrayList<Segment>();
long inMemToDiskBytes = 0;
boolean mergePhaseFinished = false;
if (inMemoryMapOutputs.size() > 0) {
int srcTaskId = inMemoryMapOutputs.get(0).getAttemptIdentifier().getInputIdentifier();
inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, this.postMergeMemLimit);
final int numMemDiskSegments = memDiskSegments.size();
if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) {
// If we reach here, it implies that we have less than io.sort.factor
// disk segments and this will be incremented by 1 (result of the
// memory segments merge). Since this total would still be
// <= io.sort.factor, we will not do any more intermediate merges,
// the merge of all these disk segments would be directly fed to the
// reduce method
mergePhaseFinished = true;
// must spill to disk, but can't retain in-mem for intermediate merge
// Can not use spill id in final merge as it would clobber with other files, hence using
// Integer.MAX_VALUE
final Path outputPath = mapOutputFile.getInputFileForWrite(srcTaskId, Integer.MAX_VALUE, inMemToDiskBytes).suffix(Constants.MERGED_OUTPUT_PREFIX);
final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass, memDiskSegments, numMemDiskSegments, tmpDir, comparator, progressable, spilledRecordsCounter, null, additionalBytesRead, null);
final Writer writer = new Writer(job, fs, outputPath, keyClass, valueClass, codec, null, null);
try {
TezMerger.writeFile(rIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
} catch (IOException e) {
if (null != outputPath) {
try {
fs.delete(outputPath, true);
} catch (IOException ie) {
// NOTHING
}
}
throw e;
} finally {
if (null != writer) {
writer.close();
additionalBytesWritten.increment(writer.getCompressedLength());
}
}
final FileStatus fStatus = localFS.getFileStatus(outputPath);
// add to list of final disk outputs.
onDiskMapOutputs.add(new FileChunk(outputPath, 0, fStatus.getLen()));
if (LOG.isInfoEnabled()) {
finalMergeLog.append("MemMerged: " + numMemDiskSegments + ", " + inMemToDiskBytes);
if (LOG.isDebugEnabled()) {
LOG.debug("Merged " + numMemDiskSegments + "segments, size=" + inMemToDiskBytes + " to " + outputPath);
}
}
inMemToDiskBytes = 0;
memDiskSegments.clear();
} else if (inMemToDiskBytes != 0) {
if (LOG.isInfoEnabled()) {
finalMergeLog.append("DelayedMemMerge: " + numMemDiskSegments + ", " + inMemToDiskBytes);
if (LOG.isDebugEnabled()) {
LOG.debug("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes in memory for " + "intermediate, on-disk merge");
}
}
}
}
// segments on disk
List<Segment> diskSegments = new ArrayList<Segment>();
long onDiskBytes = inMemToDiskBytes;
FileChunk[] onDisk = onDiskMapOutputs.toArray(new FileChunk[onDiskMapOutputs.size()]);
for (FileChunk fileChunk : onDisk) {
final long fileLength = fileChunk.getLength();
onDiskBytes += fileLength;
if (LOG.isDebugEnabled()) {
LOG.debug("Disk file=" + fileChunk.getPath() + ", len=" + fileLength + ", isLocal=" + fileChunk.isLocalFile());
}
final Path file = fileChunk.getPath();
TezCounter counter = file.toString().endsWith(Constants.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter;
final long fileOffset = fileChunk.getOffset();
final boolean preserve = fileChunk.isLocalFile();
diskSegments.add(new DiskSegment(fs, file, fileOffset, fileLength, codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, preserve, counter));
}
if (LOG.isInfoEnabled()) {
finalMergeLog.append(". DiskSeg: " + onDisk.length + ", " + onDiskBytes);
if (LOG.isDebugEnabled()) {
LOG.debug("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk");
}
}
Collections.sort(diskSegments, new Comparator<Segment>() {
public int compare(Segment o1, Segment o2) {
if (o1.getLength() == o2.getLength()) {
return 0;
}
return o1.getLength() < o2.getLength() ? -1 : 1;
}
});
// build final list of segments from merged backed by disk + in-mem
List<Segment> finalSegments = new ArrayList<Segment>();
long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0);
if (LOG.isInfoEnabled()) {
finalMergeLog.append(". MemSeg: " + finalSegments.size() + ", " + inMemBytes);
if (LOG.isDebugEnabled()) {
LOG.debug("Merging " + finalSegments.size() + " segments, " + inMemBytes + " bytes from memory into reduce");
}
}
if (0 != onDiskBytes) {
final int numInMemSegments = memDiskSegments.size();
diskSegments.addAll(0, memDiskSegments);
memDiskSegments.clear();
TezRawKeyValueIterator diskMerge = TezMerger.merge(job, fs, keyClass, valueClass, codec, diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, progressable, false, spilledRecordsCounter, null, additionalBytesRead, null);
diskSegments.clear();
if (0 == finalSegments.size()) {
return diskMerge;
}
finalSegments.add(new Segment(new RawKVIteratorReader(diskMerge, onDiskBytes), null));
}
if (LOG.isInfoEnabled()) {
LOG.info(finalMergeLog.toString());
}
// This is doing nothing but creating an iterator over the segments.
return TezMerger.merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.size(), tmpDir, comparator, progressable, spilledRecordsCounter, null, additionalBytesRead, null);
}
use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.
the class UnorderedPartitionedKVWriter method mergeAll.
private void mergeAll() throws IOException {
long expectedSize = spilledSize;
if (currentBuffer.nextPosition != 0) {
expectedSize += currentBuffer.nextPosition - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize + numPartitions * APPROX_HEADER_LENGTH;
// Update final statistics.
updateGlobalStats(currentBuffer);
}
SpillPathDetails spillPathDetails = getSpillPathDetails(true, expectedSize);
finalIndexPath = spillPathDetails.indexFilePath;
finalOutPath = spillPathDetails.outputFilePath;
TezSpillRecord finalSpillRecord = new TezSpillRecord(numPartitions);
DataInputBuffer keyBuffer = new DataInputBuffer();
DataInputBuffer valBuffer = new DataInputBuffer();
DataInputBuffer keyBufferIFile = new DataInputBuffer();
DataInputBuffer valBufferIFile = new DataInputBuffer();
FSDataOutputStream out = null;
try {
out = rfs.create(finalOutPath);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(finalOutPath, SPILL_FILE_PERMS);
}
Writer writer = null;
for (int i = 0; i < numPartitions; i++) {
long segmentStart = out.getPos();
if (numRecordsPerPartition[i] == 0) {
LOG.info(destNameTrimmed + ": " + "Skipping partition: " + i + " in final merge since it has no records");
continue;
}
writer = new Writer(conf, out, keyClass, valClass, codec, null, null);
try {
if (currentBuffer.nextPosition != 0 && currentBuffer.partitionPositions[i] != WrappedBuffer.PARTITION_ABSENT_POSITION) {
// Write current buffer.
writePartition(currentBuffer.partitionPositions[i], currentBuffer, writer, keyBuffer, valBuffer);
}
synchronized (spillInfoList) {
for (SpillInfo spillInfo : spillInfoList) {
TezIndexRecord indexRecord = spillInfo.spillRecord.getIndex(i);
if (indexRecord.getPartLength() == 0) {
// Skip empty partitions within a spill
continue;
}
FSDataInputStream in = rfs.open(spillInfo.outPath);
in.seek(indexRecord.getStartOffset());
IFile.Reader reader = new IFile.Reader(in, indexRecord.getPartLength(), codec, null, additionalSpillBytesReadCounter, ifileReadAhead, ifileReadAheadLength, ifileBufferSize);
while (reader.nextRawKey(keyBufferIFile)) {
// TODO Inefficient. If spills are not compressed, a direct copy should be possible
// given the current IFile format. Also exteremely inefficient for large records,
// since the entire record will be read into memory.
reader.nextRawValue(valBufferIFile);
writer.append(keyBufferIFile, valBufferIFile);
}
reader.close();
}
}
writer.close();
fileOutputBytesCounter.increment(writer.getCompressedLength());
TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength());
writer = null;
finalSpillRecord.putIndex(indexRecord, i);
outputContext.notifyProgress();
} finally {
if (writer != null) {
writer.close();
}
}
}
} finally {
if (out != null) {
out.close();
}
deleteIntermediateSpills();
}
finalSpillRecord.writeToFile(finalIndexPath, conf);
fileOutputBytesCounter.increment(indexFileSizeEstimate);
LOG.info(destNameTrimmed + ": " + "Finished final spill after merging : " + numSpills.get() + " spills");
}
Aggregations