use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class ShuffleUtils method generateDMEPayload.
/**
* Generate DataMovementEvent
*
* @param sendEmptyPartitionDetails
* @param numPhysicalOutputs
* @param spillRecord
* @param context
* @param spillId
* @param finalMergeEnabled
* @param isLastEvent
* @param pathComponent
* @param auxiliaryService
* @param deflater
* @return ByteBuffer
* @throws IOException
*/
static ByteBuffer generateDMEPayload(boolean sendEmptyPartitionDetails, int numPhysicalOutputs, TezSpillRecord spillRecord, OutputContext context, int spillId, boolean finalMergeEnabled, boolean isLastEvent, String pathComponent, String auxiliaryService, Deflater deflater) throws IOException {
DataMovementEventPayloadProto.Builder payloadBuilder = DataMovementEventPayloadProto.newBuilder();
boolean outputGenerated = true;
if (sendEmptyPartitionDetails) {
BitSet emptyPartitionDetails = new BitSet();
for (int i = 0; i < spillRecord.size(); i++) {
TezIndexRecord indexRecord = spillRecord.getIndex(i);
if (!indexRecord.hasData()) {
emptyPartitionDetails.set(i);
}
}
int emptyPartitions = emptyPartitionDetails.cardinality();
outputGenerated = (spillRecord.size() != emptyPartitions);
if (emptyPartitions > 0) {
ByteString emptyPartitionsBytesString = TezCommonUtils.compressByteArrayToByteString(TezUtilsInternal.toByteArray(emptyPartitionDetails), deflater);
payloadBuilder.setEmptyPartitions(emptyPartitionsBytesString);
LOG.info("EmptyPartition bitsetSize=" + emptyPartitionDetails.cardinality() + ", numOutputs=" + numPhysicalOutputs + ", emptyPartitions=" + emptyPartitions + ", compressedSize=" + emptyPartitionsBytesString.size());
}
}
if (!sendEmptyPartitionDetails || outputGenerated) {
String host = context.getExecutionContext().getHostName();
ByteBuffer shuffleMetadata = context.getServiceProviderMetaData(auxiliaryService);
int shufflePort = ShuffleUtils.deserializeShuffleProviderMetaData(shuffleMetadata);
payloadBuilder.setHost(host);
payloadBuilder.setPort(shufflePort);
// Path component is always 0 indexed
payloadBuilder.setPathComponent(pathComponent);
}
if (!finalMergeEnabled) {
payloadBuilder.setSpillId(spillId);
payloadBuilder.setLastEvent(isLastEvent);
}
// TODO: who is dependent on this?
payloadBuilder.setRunDuration(0);
DataMovementEventPayloadProto payloadProto = payloadBuilder.build();
ByteBuffer payload = payloadProto.toByteString().asReadOnlyByteBuffer();
return payload;
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class DefaultSorter method mergeParts.
private void mergeParts() throws IOException, InterruptedException {
// get the approximate size of the final output/index files
long finalOutFileSize = 0;
long finalIndexFileSize = 0;
final Path[] filename = new Path[numSpills];
final String taskIdentifier = outputContext.getUniqueIdentifier();
for (int i = 0; i < numSpills; i++) {
filename[i] = spillFilePaths.get(i);
finalOutFileSize += rfs.getFileStatus(filename[i]).getLen();
}
if (numSpills == 1) {
// the spill is the final output
TezSpillRecord spillRecord = null;
if (isFinalMergeEnabled()) {
finalOutputFile = mapOutputFile.getOutputFileForWriteInVolume(filename[0]);
finalIndexFile = mapOutputFile.getOutputIndexFileForWriteInVolume(filename[0]);
sameVolRename(filename[0], finalOutputFile);
if (indexCacheList.size() == 0) {
sameVolRename(spillFileIndexPaths.get(0), finalIndexFile);
spillRecord = new TezSpillRecord(finalIndexFile, conf);
} else {
spillRecord = indexCacheList.get(0);
spillRecord.writeToFile(finalIndexFile, conf);
}
} else {
List<Event> events = Lists.newLinkedList();
// Since there is only one spill, spill record would be present in cache.
spillRecord = indexCacheList.get(0);
Path indexPath = mapOutputFile.getSpillIndexFileForWrite(numSpills - 1, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillRecord.writeToFile(indexPath, conf);
maybeSendEventForSpill(events, true, spillRecord, 0, true);
fileOutputByteCounter.increment(rfs.getFileStatus(spillFilePaths.get(0)).getLen());
// No need to populate finalIndexFile, finalOutputFile etc when finalMerge is disabled
}
if (spillRecord != null && reportPartitionStats()) {
for (int i = 0; i < spillRecord.size(); i++) {
partitionStats[i] += spillRecord.getIndex(i).getPartLength();
}
}
numShuffleChunks.setValue(numSpills);
return;
}
// read in paged indices
for (int i = indexCacheList.size(); i < numSpills; ++i) {
Path indexFileName = spillFileIndexPaths.get(i);
indexCacheList.add(new TezSpillRecord(indexFileName, conf));
}
// Check if it is needed to do final merge. Or else, exit early.
if (numSpills > 0 && !isFinalMergeEnabled()) {
maybeAddEventsForSpills();
// No need to do final merge.
return;
}
// make correction in the length to include the sequence file header
// lengths for each partition
finalOutFileSize += partitions * APPROX_HEADER_LENGTH;
finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;
if (isFinalMergeEnabled()) {
finalOutputFile = mapOutputFile.getOutputFileForWrite(finalOutFileSize);
finalIndexFile = mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);
} else if (numSpills == 0) {
// e.g attempt_1424502260528_0119_1_07_000058_0_10012_0/file.out when final merge is
// disabled
finalOutputFile = mapOutputFile.getSpillFileForWrite(numSpills, finalOutFileSize);
finalIndexFile = mapOutputFile.getSpillIndexFileForWrite(numSpills, finalIndexFileSize);
}
// The output stream for the final single output file
FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(finalOutputFile, SPILL_FILE_PERMS);
}
if (numSpills == 0) {
// TODO Change event generation to say there is no data rather than generating a dummy file
// create dummy files
long rawLength = 0;
long partLength = 0;
TezSpillRecord sr = new TezSpillRecord(partitions);
try {
for (int i = 0; i < partitions; i++) {
long segmentStart = finalOut.getPos();
if (!sendEmptyPartitionDetails) {
Writer writer = new Writer(conf, finalOut, keyClass, valClass, codec, null, null);
writer.close();
rawLength = writer.getRawLength();
partLength = writer.getCompressedLength();
}
TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
// Covers the case of multiple spills.
outputBytesWithOverheadCounter.increment(rawLength);
sr.putIndex(rec, i);
}
sr.writeToFile(finalIndexFile, conf);
} finally {
finalOut.close();
}
++numSpills;
if (!isFinalMergeEnabled()) {
List<Event> events = Lists.newLinkedList();
maybeSendEventForSpill(events, true, sr, 0, true);
fileOutputByteCounter.increment(rfs.getFileStatus(finalOutputFile).getLen());
}
numShuffleChunks.setValue(numSpills);
return;
} else {
final TezSpillRecord spillRec = new TezSpillRecord(partitions);
for (int parts = 0; parts < partitions; parts++) {
boolean shouldWrite = false;
// create the segments to be merged
List<Segment> segmentList = new ArrayList<Segment>(numSpills);
for (int i = 0; i < numSpills; i++) {
outputContext.notifyProgress();
TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);
if (indexRecord.hasData() || !sendEmptyPartitionDetails) {
shouldWrite = true;
DiskSegment s = new DiskSegment(rfs, filename[i], indexRecord.getStartOffset(), indexRecord.getPartLength(), codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, true);
segmentList.add(s);
}
if (LOG.isDebugEnabled()) {
LOG.debug(outputContext.getDestinationVertexName() + ": " + "TaskIdentifier=" + taskIdentifier + " Partition=" + parts + "Spill =" + i + "(" + indexRecord.getStartOffset() + "," + indexRecord.getRawLength() + ", " + indexRecord.getPartLength() + ")");
}
}
int mergeFactor = this.conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);
// sort the segments only if there are intermediate merges
boolean sortSegments = segmentList.size() > mergeFactor;
// merge
TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs, keyClass, valClass, codec, segmentList, mergeFactor, new Path(taskIdentifier), (RawComparator) ConfigUtils.getIntermediateOutputKeyComparator(conf), progressable, sortSegments, true, null, spilledRecordsCounter, additionalSpillBytesRead, // Not using any Progress in TezMerger. Should just work.
null);
// write merged output to disk
long segmentStart = finalOut.getPos();
long rawLength = 0;
long partLength = 0;
if (shouldWrite) {
Writer writer = new Writer(conf, finalOut, keyClass, valClass, codec, spilledRecordsCounter, null);
if (combiner == null || numSpills < minSpillsForCombine) {
TezMerger.writeFile(kvIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
} else {
runCombineProcessor(kvIter, writer);
}
writer.close();
rawLength = writer.getRawLength();
partLength = writer.getCompressedLength();
}
outputBytesWithOverheadCounter.increment(rawLength);
// record offsets
final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
spillRec.putIndex(rec, parts);
if (reportPartitionStats()) {
partitionStats[parts] += partLength;
}
}
// final merge has happened
numShuffleChunks.setValue(1);
spillRec.writeToFile(finalIndexFile, conf);
finalOut.close();
for (int i = 0; i < numSpills; i++) {
rfs.delete(filename[i], true);
}
}
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class DefaultSorter method spill.
protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCount) throws IOException, InterruptedException {
// approximate the length of the output file to be the length of the
// buffer + header lengths for the partitions
final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH;
FSDataOutputStream out = null;
try {
// create spill file
final TezSpillRecord spillRec = new TezSpillRecord(partitions);
final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
spillFilePaths.put(numSpills, filename);
out = rfs.create(filename);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(filename, SPILL_FILE_PERMS);
}
int spindex = mstart;
final InMemValBytes value = createInMemValBytes();
boolean rle = isRLENeeded(sameKeyCount, totalKeysCount);
for (int i = 0; i < partitions; ++i) {
IFile.Writer writer = null;
try {
long segmentStart = out.getPos();
if (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i || !sendEmptyPartitionDetails) {
writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, rle);
}
if (combiner == null) {
// spill directly
DataInputBuffer key = new DataInputBuffer();
while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
final int kvoff = offsetFor(spindex);
int keystart = kvmeta.get(kvoff + KEYSTART);
int valstart = kvmeta.get(kvoff + VALSTART);
key.reset(kvbuffer, keystart, valstart - keystart);
getVBytesForOffset(kvoff, value);
writer.append(key, value);
++spindex;
}
} else {
int spstart = spindex;
while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
++spindex;
}
// than some threshold of records for a partition
if (spstart != spindex) {
TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
if (LOG.isDebugEnabled()) {
LOG.debug(outputContext.getDestinationVertexName() + ": " + "Running combine processor");
}
runCombineProcessor(kvIter, writer);
}
}
long rawLength = 0;
long partLength = 0;
// close the writer
if (writer != null) {
writer.close();
rawLength = writer.getRawLength();
partLength = writer.getCompressedLength();
}
adjustSpillCounters(rawLength, partLength);
// record offsets
final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
spillRec.putIndex(rec, i);
if (!isFinalMergeEnabled() && reportPartitionStats() && writer != null) {
partitionStats[i] += partLength;
}
writer = null;
} finally {
if (null != writer)
writer.close();
}
}
if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
// create spill index file
Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillFileIndexPaths.put(numSpills, indexFilename);
spillRec.writeToFile(indexFilename, conf);
} else {
indexCacheList.add(spillRec);
totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
}
LOG.info(outputContext.getDestinationVertexName() + ": " + "Finished spill " + numSpills + " at " + filename.toString());
++numSpills;
if (!isFinalMergeEnabled()) {
numShuffleChunks.setValue(numSpills);
} else if (numSpills > 1) {
// Increment only when there was atleast one previous spill
numAdditionalSpills.increment(1);
}
} finally {
if (out != null)
out.close();
}
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class UnorderedPartitionedKVWriter method mergeAll.
private void mergeAll() throws IOException {
long expectedSize = spilledSize;
if (currentBuffer.nextPosition != 0) {
expectedSize += currentBuffer.nextPosition - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize + numPartitions * APPROX_HEADER_LENGTH;
// Update final statistics.
updateGlobalStats(currentBuffer);
}
SpillPathDetails spillPathDetails = getSpillPathDetails(true, expectedSize);
finalIndexPath = spillPathDetails.indexFilePath;
finalOutPath = spillPathDetails.outputFilePath;
TezSpillRecord finalSpillRecord = new TezSpillRecord(numPartitions);
DataInputBuffer keyBuffer = new DataInputBuffer();
DataInputBuffer valBuffer = new DataInputBuffer();
DataInputBuffer keyBufferIFile = new DataInputBuffer();
DataInputBuffer valBufferIFile = new DataInputBuffer();
FSDataOutputStream out = null;
try {
out = rfs.create(finalOutPath);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(finalOutPath, SPILL_FILE_PERMS);
}
Writer writer = null;
for (int i = 0; i < numPartitions; i++) {
long segmentStart = out.getPos();
if (numRecordsPerPartition[i] == 0) {
LOG.info(destNameTrimmed + ": " + "Skipping partition: " + i + " in final merge since it has no records");
continue;
}
writer = new Writer(conf, out, keyClass, valClass, codec, null, null);
try {
if (currentBuffer.nextPosition != 0 && currentBuffer.partitionPositions[i] != WrappedBuffer.PARTITION_ABSENT_POSITION) {
// Write current buffer.
writePartition(currentBuffer.partitionPositions[i], currentBuffer, writer, keyBuffer, valBuffer);
}
synchronized (spillInfoList) {
for (SpillInfo spillInfo : spillInfoList) {
TezIndexRecord indexRecord = spillInfo.spillRecord.getIndex(i);
if (indexRecord.getPartLength() == 0) {
// Skip empty partitions within a spill
continue;
}
FSDataInputStream in = rfs.open(spillInfo.outPath);
in.seek(indexRecord.getStartOffset());
IFile.Reader reader = new IFile.Reader(in, indexRecord.getPartLength(), codec, null, additionalSpillBytesReadCounter, ifileReadAhead, ifileReadAheadLength, ifileBufferSize);
while (reader.nextRawKey(keyBufferIFile)) {
// TODO Inefficient. If spills are not compressed, a direct copy should be possible
// given the current IFile format. Also exteremely inefficient for large records,
// since the entire record will be read into memory.
reader.nextRawValue(valBufferIFile);
writer.append(keyBufferIFile, valBufferIFile);
}
reader.close();
}
}
writer.close();
fileOutputBytesCounter.increment(writer.getCompressedLength());
TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength());
writer = null;
finalSpillRecord.putIndex(indexRecord, i);
outputContext.notifyProgress();
} finally {
if (writer != null) {
writer.close();
}
}
}
} finally {
if (out != null) {
out.close();
}
deleteIntermediateSpills();
}
finalSpillRecord.writeToFile(finalIndexPath, conf);
fileOutputBytesCounter.increment(indexFileSizeEstimate);
LOG.info(destNameTrimmed + ": " + "Finished final spill after merging : " + numSpills.get() + " spills");
}
use of org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord in project tez by apache.
the class UnorderedPartitionedKVWriter method writeLargeRecord.
private void writeLargeRecord(final Object key, final Object value, final int partition) throws IOException {
numAdditionalSpillsCounter.increment(1);
long size = sizePerBuffer - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize + numPartitions * APPROX_HEADER_LENGTH;
SpillPathDetails spillPathDetails = getSpillPathDetails(false, size);
int spillIndex = spillPathDetails.spillIndex;
FSDataOutputStream out = null;
long outSize = 0;
try {
final TezSpillRecord spillRecord = new TezSpillRecord(numPartitions);
final Path outPath = spillPathDetails.outputFilePath;
out = rfs.create(outPath);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(outPath, SPILL_FILE_PERMS);
}
BitSet emptyPartitions = null;
if (pipelinedShuffle || !isFinalMergeEnabled) {
emptyPartitions = new BitSet(numPartitions);
}
for (int i = 0; i < numPartitions; i++) {
final long recordStart = out.getPos();
if (i == partition) {
spilledRecordsCounter.increment(1);
Writer writer = null;
try {
writer = new IFile.Writer(conf, out, keyClass, valClass, codec, null, null);
writer.append(key, value);
outputLargeRecordsCounter.increment(1);
numRecordsPerPartition[i]++;
if (reportPartitionStats()) {
sizePerPartition[i] += writer.getRawLength();
}
writer.close();
synchronized (additionalSpillBytesWritternCounter) {
additionalSpillBytesWritternCounter.increment(writer.getCompressedLength());
}
TezIndexRecord indexRecord = new TezIndexRecord(recordStart, writer.getRawLength(), writer.getCompressedLength());
spillRecord.putIndex(indexRecord, i);
outSize = writer.getCompressedLength();
writer = null;
} finally {
if (writer != null) {
writer.close();
}
}
} else {
if (emptyPartitions != null) {
emptyPartitions.set(i);
}
}
}
handleSpillIndex(spillPathDetails, spillRecord);
mayBeSendEventsForSpill(emptyPartitions, sizePerPartition, spillIndex, false);
LOG.info(destNameTrimmed + ": " + "Finished writing large record of size " + outSize + " to spill file " + spillIndex);
if (LOG.isDebugEnabled()) {
LOG.debug(destNameTrimmed + ": " + "LargeRecord Spill=" + spillIndex + ", indexPath=" + spillPathDetails.indexFilePath + ", outputPath=" + spillPathDetails.outputFilePath);
}
} finally {
if (out != null) {
out.close();
}
}
}
Aggregations