Search in sources :

Example 6 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class DefaultSorter method mergeParts.

private void mergeParts() throws IOException, InterruptedException {
    // get the approximate size of the final output/index files
    long finalOutFileSize = 0;
    long finalIndexFileSize = 0;
    final Path[] filename = new Path[numSpills];
    final String taskIdentifier = outputContext.getUniqueIdentifier();
    for (int i = 0; i < numSpills; i++) {
        filename[i] = spillFilePaths.get(i);
        finalOutFileSize += rfs.getFileStatus(filename[i]).getLen();
    }
    if (numSpills == 1) {
        // the spill is the final output
        TezSpillRecord spillRecord = null;
        if (isFinalMergeEnabled()) {
            finalOutputFile = mapOutputFile.getOutputFileForWriteInVolume(filename[0]);
            finalIndexFile = mapOutputFile.getOutputIndexFileForWriteInVolume(filename[0]);
            sameVolRename(filename[0], finalOutputFile);
            if (indexCacheList.size() == 0) {
                sameVolRename(spillFileIndexPaths.get(0), finalIndexFile);
                spillRecord = new TezSpillRecord(finalIndexFile, conf);
            } else {
                spillRecord = indexCacheList.get(0);
                spillRecord.writeToFile(finalIndexFile, conf);
            }
        } else {
            List<Event> events = Lists.newLinkedList();
            // Since there is only one spill, spill record would be present in cache.
            spillRecord = indexCacheList.get(0);
            Path indexPath = mapOutputFile.getSpillIndexFileForWrite(numSpills - 1, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillRecord.writeToFile(indexPath, conf);
            maybeSendEventForSpill(events, true, spillRecord, 0, true);
            fileOutputByteCounter.increment(rfs.getFileStatus(spillFilePaths.get(0)).getLen());
        // No need to populate finalIndexFile, finalOutputFile etc when finalMerge is disabled
        }
        if (spillRecord != null && reportPartitionStats()) {
            for (int i = 0; i < spillRecord.size(); i++) {
                partitionStats[i] += spillRecord.getIndex(i).getPartLength();
            }
        }
        numShuffleChunks.setValue(numSpills);
        return;
    }
    // read in paged indices
    for (int i = indexCacheList.size(); i < numSpills; ++i) {
        Path indexFileName = spillFileIndexPaths.get(i);
        indexCacheList.add(new TezSpillRecord(indexFileName, conf));
    }
    // Check if it is needed to do final merge. Or else, exit early.
    if (numSpills > 0 && !isFinalMergeEnabled()) {
        maybeAddEventsForSpills();
        // No need to do final merge.
        return;
    }
    // make correction in the length to include the sequence file header
    // lengths for each partition
    finalOutFileSize += partitions * APPROX_HEADER_LENGTH;
    finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    if (isFinalMergeEnabled()) {
        finalOutputFile = mapOutputFile.getOutputFileForWrite(finalOutFileSize);
        finalIndexFile = mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);
    } else if (numSpills == 0) {
        // e.g attempt_1424502260528_0119_1_07_000058_0_10012_0/file.out when final merge is
        // disabled
        finalOutputFile = mapOutputFile.getSpillFileForWrite(numSpills, finalOutFileSize);
        finalIndexFile = mapOutputFile.getSpillIndexFileForWrite(numSpills, finalIndexFileSize);
    }
    // The output stream for the final single output file
    FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);
    if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
        rfs.setPermission(finalOutputFile, SPILL_FILE_PERMS);
    }
    if (numSpills == 0) {
        // TODO Change event generation to say there is no data rather than generating a dummy file
        // create dummy files
        long rawLength = 0;
        long partLength = 0;
        TezSpillRecord sr = new TezSpillRecord(partitions);
        try {
            for (int i = 0; i < partitions; i++) {
                long segmentStart = finalOut.getPos();
                if (!sendEmptyPartitionDetails) {
                    Writer writer = new Writer(conf, finalOut, keyClass, valClass, codec, null, null);
                    writer.close();
                    rawLength = writer.getRawLength();
                    partLength = writer.getCompressedLength();
                }
                TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
                // Covers the case of multiple spills.
                outputBytesWithOverheadCounter.increment(rawLength);
                sr.putIndex(rec, i);
            }
            sr.writeToFile(finalIndexFile, conf);
        } finally {
            finalOut.close();
        }
        ++numSpills;
        if (!isFinalMergeEnabled()) {
            List<Event> events = Lists.newLinkedList();
            maybeSendEventForSpill(events, true, sr, 0, true);
            fileOutputByteCounter.increment(rfs.getFileStatus(finalOutputFile).getLen());
        }
        numShuffleChunks.setValue(numSpills);
        return;
    } else {
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        for (int parts = 0; parts < partitions; parts++) {
            boolean shouldWrite = false;
            // create the segments to be merged
            List<Segment> segmentList = new ArrayList<Segment>(numSpills);
            for (int i = 0; i < numSpills; i++) {
                outputContext.notifyProgress();
                TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);
                if (indexRecord.hasData() || !sendEmptyPartitionDetails) {
                    shouldWrite = true;
                    DiskSegment s = new DiskSegment(rfs, filename[i], indexRecord.getStartOffset(), indexRecord.getPartLength(), codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, true);
                    segmentList.add(s);
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug(outputContext.getDestinationVertexName() + ": " + "TaskIdentifier=" + taskIdentifier + " Partition=" + parts + "Spill =" + i + "(" + indexRecord.getStartOffset() + "," + indexRecord.getRawLength() + ", " + indexRecord.getPartLength() + ")");
                }
            }
            int mergeFactor = this.conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);
            // sort the segments only if there are intermediate merges
            boolean sortSegments = segmentList.size() > mergeFactor;
            // merge
            TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs, keyClass, valClass, codec, segmentList, mergeFactor, new Path(taskIdentifier), (RawComparator) ConfigUtils.getIntermediateOutputKeyComparator(conf), progressable, sortSegments, true, null, spilledRecordsCounter, additionalSpillBytesRead, // Not using any Progress in TezMerger. Should just work.
            null);
            // write merged output to disk
            long segmentStart = finalOut.getPos();
            long rawLength = 0;
            long partLength = 0;
            if (shouldWrite) {
                Writer writer = new Writer(conf, finalOut, keyClass, valClass, codec, spilledRecordsCounter, null);
                if (combiner == null || numSpills < minSpillsForCombine) {
                    TezMerger.writeFile(kvIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
                } else {
                    runCombineProcessor(kvIter, writer);
                }
                writer.close();
                rawLength = writer.getRawLength();
                partLength = writer.getCompressedLength();
            }
            outputBytesWithOverheadCounter.increment(rawLength);
            // record offsets
            final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
            spillRec.putIndex(rec, parts);
            if (reportPartitionStats()) {
                partitionStats[parts] += partLength;
            }
        }
        // final merge has happened
        numShuffleChunks.setValue(1);
        spillRec.writeToFile(finalIndexFile, conf);
        finalOut.close();
        for (int i = 0; i < numSpills; i++) {
            rfs.delete(filename[i], true);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DiskSegment(org.apache.tez.runtime.library.common.sort.impl.TezMerger.DiskSegment) ArrayList(java.util.ArrayList) DiskSegment(org.apache.tez.runtime.library.common.sort.impl.TezMerger.DiskSegment) Segment(org.apache.tez.runtime.library.common.sort.impl.TezMerger.Segment) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) Event(org.apache.tez.runtime.api.Event) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)

Example 7 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class DefaultSorter method spill.

protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCount) throws IOException, InterruptedException {
    // approximate the length of the output file to be the length of the
    // buffer + header lengths for the partitions
    final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        spillFilePaths.put(numSpills, filename);
        out = rfs.create(filename);
        if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
            rfs.setPermission(filename, SPILL_FILE_PERMS);
        }
        int spindex = mstart;
        final InMemValBytes value = createInMemValBytes();
        boolean rle = isRLENeeded(sameKeyCount, totalKeysCount);
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                if (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i || !sendEmptyPartitionDetails) {
                    writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, rle);
                }
                if (combiner == null) {
                    // spill directly
                    DataInputBuffer key = new DataInputBuffer();
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        final int kvoff = offsetFor(spindex);
                        int keystart = kvmeta.get(kvoff + KEYSTART);
                        int valstart = kvmeta.get(kvoff + VALSTART);
                        key.reset(kvbuffer, keystart, valstart - keystart);
                        getVBytesForOffset(kvoff, value);
                        writer.append(key, value);
                        ++spindex;
                    }
                } else {
                    int spstart = spindex;
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        ++spindex;
                    }
                    // than some threshold of records for a partition
                    if (spstart != spindex) {
                        TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(outputContext.getDestinationVertexName() + ": " + "Running combine processor");
                        }
                        runCombineProcessor(kvIter, writer);
                    }
                }
                long rawLength = 0;
                long partLength = 0;
                // close the writer
                if (writer != null) {
                    writer.close();
                    rawLength = writer.getRawLength();
                    partLength = writer.getCompressedLength();
                }
                adjustSpillCounters(rawLength, partLength);
                // record offsets
                final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
                spillRec.putIndex(rec, i);
                if (!isFinalMergeEnabled() && reportPartitionStats() && writer != null) {
                    partitionStats[i] += partLength;
                }
                writer = null;
            } finally {
                if (null != writer)
                    writer.close();
            }
        }
        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillFileIndexPaths.put(numSpills, indexFilename);
            spillRec.writeToFile(indexFilename, conf);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        LOG.info(outputContext.getDestinationVertexName() + ": " + "Finished spill " + numSpills + " at " + filename.toString());
        ++numSpills;
        if (!isFinalMergeEnabled()) {
            numShuffleChunks.setValue(numSpills);
        } else if (numSpills > 1) {
            // Increment only when there was atleast one previous spill
            numAdditionalSpills.increment(1);
        }
    } finally {
        if (out != null)
            out.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)

Example 8 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class FetcherOrderedGrouped method getIndexRecord.

@VisibleForTesting
protected TezIndexRecord getIndexRecord(String pathComponent, int partitionId) throws IOException {
    Path indexFile = getShuffleInputFileName(pathComponent, Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
    TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
    return spillRecord.getIndex(partitionId);
}
Also used : Path(org.apache.hadoop.fs.Path) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 9 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class UnorderedPartitionedKVWriter method mergeAll.

private void mergeAll() throws IOException {
    long expectedSize = spilledSize;
    if (currentBuffer.nextPosition != 0) {
        expectedSize += currentBuffer.nextPosition - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize + numPartitions * APPROX_HEADER_LENGTH;
        // Update final statistics.
        updateGlobalStats(currentBuffer);
    }
    SpillPathDetails spillPathDetails = getSpillPathDetails(true, expectedSize);
    finalIndexPath = spillPathDetails.indexFilePath;
    finalOutPath = spillPathDetails.outputFilePath;
    TezSpillRecord finalSpillRecord = new TezSpillRecord(numPartitions);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    DataInputBuffer keyBufferIFile = new DataInputBuffer();
    DataInputBuffer valBufferIFile = new DataInputBuffer();
    FSDataOutputStream out = null;
    try {
        out = rfs.create(finalOutPath);
        if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
            rfs.setPermission(finalOutPath, SPILL_FILE_PERMS);
        }
        Writer writer = null;
        for (int i = 0; i < numPartitions; i++) {
            long segmentStart = out.getPos();
            if (numRecordsPerPartition[i] == 0) {
                LOG.info(destNameTrimmed + ": " + "Skipping partition: " + i + " in final merge since it has no records");
                continue;
            }
            writer = new Writer(conf, out, keyClass, valClass, codec, null, null);
            try {
                if (currentBuffer.nextPosition != 0 && currentBuffer.partitionPositions[i] != WrappedBuffer.PARTITION_ABSENT_POSITION) {
                    // Write current buffer.
                    writePartition(currentBuffer.partitionPositions[i], currentBuffer, writer, keyBuffer, valBuffer);
                }
                synchronized (spillInfoList) {
                    for (SpillInfo spillInfo : spillInfoList) {
                        TezIndexRecord indexRecord = spillInfo.spillRecord.getIndex(i);
                        if (indexRecord.getPartLength() == 0) {
                            // Skip empty partitions within a spill
                            continue;
                        }
                        FSDataInputStream in = rfs.open(spillInfo.outPath);
                        in.seek(indexRecord.getStartOffset());
                        IFile.Reader reader = new IFile.Reader(in, indexRecord.getPartLength(), codec, null, additionalSpillBytesReadCounter, ifileReadAhead, ifileReadAheadLength, ifileBufferSize);
                        while (reader.nextRawKey(keyBufferIFile)) {
                            // TODO Inefficient. If spills are not compressed, a direct copy should be possible
                            // given the current IFile format. Also exteremely inefficient for large records,
                            // since the entire record will be read into memory.
                            reader.nextRawValue(valBufferIFile);
                            writer.append(keyBufferIFile, valBufferIFile);
                        }
                        reader.close();
                    }
                }
                writer.close();
                fileOutputBytesCounter.increment(writer.getCompressedLength());
                TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength());
                writer = null;
                finalSpillRecord.putIndex(indexRecord, i);
                outputContext.notifyProgress();
            } finally {
                if (writer != null) {
                    writer.close();
                }
            }
        }
    } finally {
        if (out != null) {
            out.close();
        }
        deleteIntermediateSpills();
    }
    finalSpillRecord.writeToFile(finalIndexPath, conf);
    fileOutputBytesCounter.increment(indexFileSizeEstimate);
    LOG.info(destNameTrimmed + ": " + "Finished final spill after merging : " + numSpills.get() + " spills");
}
Also used : IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)

Example 10 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class UnorderedPartitionedKVWriter method writeLargeRecord.

private void writeLargeRecord(final Object key, final Object value, final int partition) throws IOException {
    numAdditionalSpillsCounter.increment(1);
    long size = sizePerBuffer - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize + numPartitions * APPROX_HEADER_LENGTH;
    SpillPathDetails spillPathDetails = getSpillPathDetails(false, size);
    int spillIndex = spillPathDetails.spillIndex;
    FSDataOutputStream out = null;
    long outSize = 0;
    try {
        final TezSpillRecord spillRecord = new TezSpillRecord(numPartitions);
        final Path outPath = spillPathDetails.outputFilePath;
        out = rfs.create(outPath);
        if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
            rfs.setPermission(outPath, SPILL_FILE_PERMS);
        }
        BitSet emptyPartitions = null;
        if (pipelinedShuffle || !isFinalMergeEnabled) {
            emptyPartitions = new BitSet(numPartitions);
        }
        for (int i = 0; i < numPartitions; i++) {
            final long recordStart = out.getPos();
            if (i == partition) {
                spilledRecordsCounter.increment(1);
                Writer writer = null;
                try {
                    writer = new IFile.Writer(conf, out, keyClass, valClass, codec, null, null);
                    writer.append(key, value);
                    outputLargeRecordsCounter.increment(1);
                    numRecordsPerPartition[i]++;
                    if (reportPartitionStats()) {
                        sizePerPartition[i] += writer.getRawLength();
                    }
                    writer.close();
                    synchronized (additionalSpillBytesWritternCounter) {
                        additionalSpillBytesWritternCounter.increment(writer.getCompressedLength());
                    }
                    TezIndexRecord indexRecord = new TezIndexRecord(recordStart, writer.getRawLength(), writer.getCompressedLength());
                    spillRecord.putIndex(indexRecord, i);
                    outSize = writer.getCompressedLength();
                    writer = null;
                } finally {
                    if (writer != null) {
                        writer.close();
                    }
                }
            } else {
                if (emptyPartitions != null) {
                    emptyPartitions.set(i);
                }
            }
        }
        handleSpillIndex(spillPathDetails, spillRecord);
        mayBeSendEventsForSpill(emptyPartitions, sizePerPartition, spillIndex, false);
        LOG.info(destNameTrimmed + ": " + "Finished writing large record of size " + outSize + " to spill file " + spillIndex);
        if (LOG.isDebugEnabled()) {
            LOG.debug(destNameTrimmed + ": " + "LargeRecord Spill=" + spillIndex + ", indexPath=" + spillPathDetails.indexFilePath + ", outputPath=" + spillPathDetails.outputFilePath);
        }
    } finally {
        if (out != null) {
            out.close();
        }
    }
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) Path(org.apache.hadoop.fs.Path) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) BitSet(java.util.BitSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)

Aggregations

TezSpillRecord (org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord)20 Path (org.apache.hadoop.fs.Path)14 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)11 Event (org.apache.tez.runtime.api.Event)9 BitSet (java.util.BitSet)7 ByteString (com.google.protobuf.ByteString)6 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)6 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)6 IOException (java.io.IOException)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 VertexManagerEvent (org.apache.tez.runtime.api.events.VertexManagerEvent)5 Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)5 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)3 OutputContext (org.apache.tez.runtime.api.OutputContext)3 ShuffleUserPayloads (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads)3 Test (org.junit.Test)3 Mockito.anyString (org.mockito.Mockito.anyString)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 LinkedListMultimap (com.google.common.collect.LinkedListMultimap)2