Examples with Writer - org.apache.tez.runtime.library.common.sort.impl.IFile.Writer

Example 11 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class DefaultSorter method spill.

protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCount) throws IOException, InterruptedException {
    // approximate the length of the output file to be the length of the
    // buffer + header lengths for the partitions
    final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        spillFilePaths.put(numSpills, filename);
        out = rfs.create(filename);
        if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
            rfs.setPermission(filename, SPILL_FILE_PERMS);
        }
        int spindex = mstart;
        final InMemValBytes value = createInMemValBytes();
        boolean rle = isRLENeeded(sameKeyCount, totalKeysCount);
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                if (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i || !sendEmptyPartitionDetails) {
                    writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, rle);
                }
                if (combiner == null) {
                    // spill directly
                    DataInputBuffer key = new DataInputBuffer();
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        final int kvoff = offsetFor(spindex);
                        int keystart = kvmeta.get(kvoff + KEYSTART);
                        int valstart = kvmeta.get(kvoff + VALSTART);
                        key.reset(kvbuffer, keystart, valstart - keystart);
                        getVBytesForOffset(kvoff, value);
                        writer.append(key, value);
                        ++spindex;
                    }
                } else {
                    int spstart = spindex;
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        ++spindex;
                    }
                    // than some threshold of records for a partition
                    if (spstart != spindex) {
                        TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(outputContext.getDestinationVertexName() + ": " + "Running combine processor");
                        }
                        runCombineProcessor(kvIter, writer);
                    }
                }
                long rawLength = 0;
                long partLength = 0;
                // close the writer
                if (writer != null) {
                    writer.close();
                    rawLength = writer.getRawLength();
                    partLength = writer.getCompressedLength();
                }
                adjustSpillCounters(rawLength, partLength);
                // record offsets
                final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
                spillRec.putIndex(rec, i);
                if (!isFinalMergeEnabled() && reportPartitionStats() && writer != null) {
                    partitionStats[i] += partLength;
                }
                writer = null;
            } finally {
                if (null != writer)
                    writer.close();
            }
        }
        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillFileIndexPaths.put(numSpills, indexFilename);
            spillRec.writeToFile(indexFilename, conf);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        LOG.info(outputContext.getDestinationVertexName() + ": " + "Finished spill " + numSpills + " at " + filename.toString());
        ++numSpills;
        if (!isFinalMergeEnabled()) {
            numShuffleChunks.setValue(numSpills);
        } else if (numSpills > 1) {
            // Increment only when there was atleast one previous spill
            numAdditionalSpills.increment(1);
        }
    } finally {
        if (out != null)
            out.close();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)

Example 12 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class PipelinedSorter method spillSingleRecord.

// it is guaranteed that when spillSingleRecord is called, there is
// no merger spans queued in executor.
private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException {
    final TezSpillRecord spillRec = new TezSpillRecord(partitions);
    // getSpillFileForWrite with size -1 as the serialized size of KV pair is still unknown
    final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, -1);
    Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
    spillFilePaths.put(numSpills, filename);
    FSDataOutputStream out = rfs.create(filename, true, 4096);
    if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
        rfs.setPermission(filename, SPILL_FILE_PERMS);
    }
    try {
        LOG.info(outputContext.getDestinationVertexName() + ": Spilling to " + filename.toString() + ", indexFilename=" + indexFilename);
        for (int i = 0; i < partitions; ++i) {
            if (isThreadInterrupted()) {
                return;
            }
            Writer writer = null;
            try {
                long segmentStart = out.getPos();
                if (!sendEmptyPartitionDetails || (i == partition)) {
                    writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, false);
                }
                // we need not check for combiner since its a single record
                if (i == partition) {
                    final long recordStart = out.getPos();
                    writer.append(key, value);
                    mapOutputRecordCounter.increment(1);
                    mapOutputByteCounter.increment(out.getPos() - recordStart);
                }
                long rawLength = 0;
                long partLength = 0;
                if (writer != null) {
                    writer.close();
                    rawLength = writer.getRawLength();
                    partLength = writer.getCompressedLength();
                }
                adjustSpillCounters(rawLength, partLength);
                // record offsets
                final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
                spillRec.putIndex(rec, i);
                writer = null;
            } finally {
                if (null != writer) {
                    writer.close();
                }
            }
        }
        spillFileIndexPaths.put(numSpills, indexFilename);
        spillRec.writeToFile(indexFilename, conf);
        // TODO: honor cache limits
        indexCacheList.add(spillRec);
        ++numSpills;
        if (!isFinalMergeEnabled()) {
            fileOutputByteCounter.increment(rfs.getFileStatus(filename).getLen());
            // No final merge. Set the number of files offered via shuffle-handler
            numShuffleChunks.setValue(numSpills);
        }
        if (pipelinedShuffle) {
            sendPipelinedShuffleEvents();
        }
    } finally {
        out.close();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)

Example 13 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class PipelinedSorter method spill.

public boolean spill(boolean ignoreEmptySpills) throws IOException {
    FSDataOutputStream out = null;
    try {
        try {
            boolean ret = merger.ready();
            // then return directly without spilling
            if (!ret && ignoreEmptySpills) {
                return false;
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            LOG.info(outputContext.getDestinationVertexName() + ": Interrupted while waiting for mergers to complete");
            throw new IOInterruptedException(outputContext.getDestinationVertexName() + ": Interrupted while waiting for mergers to complete", e);
        }
        // create spill file
        final long size = capacity + +(partitions * APPROX_HEADER_LENGTH);
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        spillFilePaths.put(numSpills, filename);
        out = rfs.create(filename, true, 4096);
        if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
            rfs.setPermission(filename, SPILL_FILE_PERMS);
        }
        LOG.info(outputContext.getDestinationVertexName() + ": Spilling to " + filename.toString());
        for (int i = 0; i < partitions; ++i) {
            if (isThreadInterrupted()) {
                return false;
            }
            outputContext.notifyProgress();
            TezRawKeyValueIterator kvIter = merger.filter(i);
            // write merged output to disk
            long segmentStart = out.getPos();
            Writer writer = null;
            boolean hasNext = kvIter.hasNext();
            if (hasNext || !sendEmptyPartitionDetails) {
                writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, merger.needsRLE());
            }
            if (combiner == null) {
                while (kvIter.next()) {
                    writer.append(kvIter.getKey(), kvIter.getValue());
                }
            } else {
                if (hasNext) {
                    runCombineProcessor(kvIter, writer);
                }
            }
            long rawLength = 0;
            long partLength = 0;
            // close
            if (writer != null) {
                writer.close();
                rawLength = writer.getRawLength();
                partLength = writer.getCompressedLength();
            }
            adjustSpillCounters(rawLength, partLength);
            // record offsets
            final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
            spillRec.putIndex(rec, i);
            if (!isFinalMergeEnabled() && reportPartitionStats()) {
                partitionStats[i] += partLength;
            }
        }
        Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        spillFileIndexPaths.put(numSpills, indexFilename);
        spillRec.writeToFile(indexFilename, conf);
        // TODO: honor cache limits
        indexCacheList.add(spillRec);
        ++numSpills;
        if (!isFinalMergeEnabled()) {
            fileOutputByteCounter.increment(rfs.getFileStatus(filename).getLen());
            // No final merge. Set the number of files offered via shuffle-handler
            numShuffleChunks.setValue(numSpills);
        }
        return true;
    } finally {
        if (out != null) {
            out.close();
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) IOInterruptedException(org.apache.tez.runtime.library.api.IOInterruptedException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOInterruptedException(org.apache.tez.runtime.library.api.IOInterruptedException) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)

Example 14 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class MergeManager method finalMerge.

private TezRawKeyValueIterator finalMerge(Configuration job, FileSystem fs, List<MapOutput> inMemoryMapOutputs, List<FileChunk> onDiskMapOutputs) throws IOException, InterruptedException {
    logFinalMergeStart(inMemoryMapOutputs, onDiskMapOutputs);
    StringBuilder finalMergeLog = new StringBuilder();
    inputContext.notifyProgress();
    // merge config params
    Class keyClass = (Class) ConfigUtils.getIntermediateInputKeyClass(job);
    Class valueClass = (Class) ConfigUtils.getIntermediateInputValueClass(job);
    final Path tmpDir = new Path(inputContext.getUniqueIdentifier());
    final RawComparator comparator = (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(job);
    // segments required to vacate memory
    List<Segment> memDiskSegments = new ArrayList<Segment>();
    long inMemToDiskBytes = 0;
    boolean mergePhaseFinished = false;
    if (inMemoryMapOutputs.size() > 0) {
        int srcTaskId = inMemoryMapOutputs.get(0).getAttemptIdentifier().getInputIdentifier();
        inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, memDiskSegments, this.postMergeMemLimit);
        final int numMemDiskSegments = memDiskSegments.size();
        if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) {
            // If we reach here, it implies that we have less than io.sort.factor
            // disk segments and this will be incremented by 1 (result of the
            // memory segments merge). Since this total would still be
            // <= io.sort.factor, we will not do any more intermediate merges,
            // the merge of all these disk segments would be directly fed to the
            // reduce method
            mergePhaseFinished = true;
            // must spill to disk, but can't retain in-mem for intermediate merge
            // Can not use spill id in final merge as it would clobber with other files, hence using
            // Integer.MAX_VALUE
            final Path outputPath = mapOutputFile.getInputFileForWrite(srcTaskId, Integer.MAX_VALUE, inMemToDiskBytes).suffix(Constants.MERGED_OUTPUT_PREFIX);
            final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass, memDiskSegments, numMemDiskSegments, tmpDir, comparator, progressable, spilledRecordsCounter, null, additionalBytesRead, null);
            final Writer writer = new Writer(job, fs, outputPath, keyClass, valueClass, codec, null, null);
            try {
                TezMerger.writeFile(rIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
            } catch (IOException e) {
                if (null != outputPath) {
                    try {
                        fs.delete(outputPath, true);
                    } catch (IOException ie) {
                    // NOTHING
                    }
                }
                throw e;
            } finally {
                if (null != writer) {
                    writer.close();
                    additionalBytesWritten.increment(writer.getCompressedLength());
                }
            }
            final FileStatus fStatus = localFS.getFileStatus(outputPath);
            // add to list of final disk outputs.
            onDiskMapOutputs.add(new FileChunk(outputPath, 0, fStatus.getLen()));
            if (LOG.isInfoEnabled()) {
                finalMergeLog.append("MemMerged: " + numMemDiskSegments + ", " + inMemToDiskBytes);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Merged " + numMemDiskSegments + "segments, size=" + inMemToDiskBytes + " to " + outputPath);
                }
            }
            inMemToDiskBytes = 0;
            memDiskSegments.clear();
        } else if (inMemToDiskBytes != 0) {
            if (LOG.isInfoEnabled()) {
                finalMergeLog.append("DelayedMemMerge: " + numMemDiskSegments + ", " + inMemToDiskBytes);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes in memory for " + "intermediate, on-disk merge");
                }
            }
        }
    }
    // segments on disk
    List<Segment> diskSegments = new ArrayList<Segment>();
    long onDiskBytes = inMemToDiskBytes;
    FileChunk[] onDisk = onDiskMapOutputs.toArray(new FileChunk[onDiskMapOutputs.size()]);
    for (FileChunk fileChunk : onDisk) {
        final long fileLength = fileChunk.getLength();
        onDiskBytes += fileLength;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Disk file=" + fileChunk.getPath() + ", len=" + fileLength + ", isLocal=" + fileChunk.isLocalFile());
        }
        final Path file = fileChunk.getPath();
        TezCounter counter = file.toString().endsWith(Constants.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter;
        final long fileOffset = fileChunk.getOffset();
        final boolean preserve = fileChunk.isLocalFile();
        diskSegments.add(new DiskSegment(fs, file, fileOffset, fileLength, codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, preserve, counter));
    }
    if (LOG.isInfoEnabled()) {
        finalMergeLog.append(". DiskSeg: " + onDisk.length + ", " + onDiskBytes);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Merging " + onDisk.length + " files, " + onDiskBytes + " bytes from disk");
        }
    }
    Collections.sort(diskSegments, new Comparator<Segment>() {

        public int compare(Segment o1, Segment o2) {
            if (o1.getLength() == o2.getLength()) {
                return 0;
            }
            return o1.getLength() < o2.getLength() ? -1 : 1;
        }
    });
    // build final list of segments from merged backed by disk + in-mem
    List<Segment> finalSegments = new ArrayList<Segment>();
    long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0);
    if (LOG.isInfoEnabled()) {
        finalMergeLog.append(". MemSeg: " + finalSegments.size() + ", " + inMemBytes);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Merging " + finalSegments.size() + " segments, " + inMemBytes + " bytes from memory into reduce");
        }
    }
    if (0 != onDiskBytes) {
        final int numInMemSegments = memDiskSegments.size();
        diskSegments.addAll(0, memDiskSegments);
        memDiskSegments.clear();
        TezRawKeyValueIterator diskMerge = TezMerger.merge(job, fs, keyClass, valueClass, codec, diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, progressable, false, spilledRecordsCounter, null, additionalBytesRead, null);
        diskSegments.clear();
        if (0 == finalSegments.size()) {
            return diskMerge;
        }
        finalSegments.add(new Segment(new RawKVIteratorReader(diskMerge, onDiskBytes), null));
    }
    if (LOG.isInfoEnabled()) {
        LOG.info(finalMergeLog.toString());
    }
    // This is doing nothing but creating an iterator over the segments.
    return TezMerger.merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.size(), tmpDir, comparator, progressable, spilledRecordsCounter, null, additionalBytesRead, null);
}

Also used : Path(org.apache.hadoop.fs.Path) DiskSegment(org.apache.tez.runtime.library.common.sort.impl.TezMerger.DiskSegment) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) IOException(java.io.IOException) TezCounter(org.apache.tez.common.counters.TezCounter) DiskSegment(org.apache.tez.runtime.library.common.sort.impl.TezMerger.DiskSegment) Segment(org.apache.tez.runtime.library.common.sort.impl.TezMerger.Segment) RawComparator(org.apache.hadoop.io.RawComparator) FileChunk(org.apache.hadoop.io.FileChunk) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)

Example 15 with Writer

use of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer in project tez by apache.

the class UnorderedPartitionedKVWriter method mergeAll.

private void mergeAll() throws IOException {
    long expectedSize = spilledSize;
    if (currentBuffer.nextPosition != 0) {
        expectedSize += currentBuffer.nextPosition - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize + numPartitions * APPROX_HEADER_LENGTH;
        // Update final statistics.
        updateGlobalStats(currentBuffer);
    }
    SpillPathDetails spillPathDetails = getSpillPathDetails(true, expectedSize);
    finalIndexPath = spillPathDetails.indexFilePath;
    finalOutPath = spillPathDetails.outputFilePath;
    TezSpillRecord finalSpillRecord = new TezSpillRecord(numPartitions);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    DataInputBuffer keyBufferIFile = new DataInputBuffer();
    DataInputBuffer valBufferIFile = new DataInputBuffer();
    FSDataOutputStream out = null;
    try {
        out = rfs.create(finalOutPath);
        if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
            rfs.setPermission(finalOutPath, SPILL_FILE_PERMS);
        }
        Writer writer = null;
        for (int i = 0; i < numPartitions; i++) {
            long segmentStart = out.getPos();
            if (numRecordsPerPartition[i] == 0) {
                LOG.info(destNameTrimmed + ": " + "Skipping partition: " + i + " in final merge since it has no records");
                continue;
            }
            writer = new Writer(conf, out, keyClass, valClass, codec, null, null);
            try {
                if (currentBuffer.nextPosition != 0 && currentBuffer.partitionPositions[i] != WrappedBuffer.PARTITION_ABSENT_POSITION) {
                    // Write current buffer.
                    writePartition(currentBuffer.partitionPositions[i], currentBuffer, writer, keyBuffer, valBuffer);
                }
                synchronized (spillInfoList) {
                    for (SpillInfo spillInfo : spillInfoList) {
                        TezIndexRecord indexRecord = spillInfo.spillRecord.getIndex(i);
                        if (indexRecord.getPartLength() == 0) {
                            // Skip empty partitions within a spill
                            continue;
                        }
                        FSDataInputStream in = rfs.open(spillInfo.outPath);
                        in.seek(indexRecord.getStartOffset());
                        IFile.Reader reader = new IFile.Reader(in, indexRecord.getPartLength(), codec, null, additionalSpillBytesReadCounter, ifileReadAhead, ifileReadAheadLength, ifileBufferSize);
                        while (reader.nextRawKey(keyBufferIFile)) {
                            // TODO Inefficient. If spills are not compressed, a direct copy should be possible
                            // given the current IFile format. Also exteremely inefficient for large records,
                            // since the entire record will be read into memory.
                            reader.nextRawValue(valBufferIFile);
                            writer.append(keyBufferIFile, valBufferIFile);
                        }
                        reader.close();
                    }
                }
                writer.close();
                fileOutputBytesCounter.increment(writer.getCompressedLength());
                TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength());
                writer = null;
                finalSpillRecord.putIndex(indexRecord, i);
                outputContext.notifyProgress();
            } finally {
                if (writer != null) {
                    writer.close();
                }
            }
        }
    } finally {
        if (out != null) {
            out.close();
        }
        deleteIntermediateSpills();
    }
    finalSpillRecord.writeToFile(finalIndexPath, conf);
    fileOutputBytesCounter.increment(indexFileSizeEstimate);
    LOG.info(destNameTrimmed + ": " + "Finished final spill after merging : " + numSpills.get() + " spills");
}

Also used : IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)

Aggregations

Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)25 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)12 Test (org.junit.Test)12 InMemoryWriter (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter)11 Path (org.apache.hadoop.fs.Path)8 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)6 ArrayList (java.util.ArrayList)5 Text (org.apache.hadoop.io.Text)5 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)5 TezSpillRecord (org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord)5 KVPair (org.apache.tez.runtime.library.testutils.KVDataGen.KVPair)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)4 TaskContext (org.apache.tez.runtime.api.TaskContext)4 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)4 IOException (java.io.IOException)3 IntWritable (org.apache.hadoop.io.IntWritable)3 Reader (org.apache.tez.runtime.library.common.sort.impl.IFile.Reader)3 DiskSegment (org.apache.tez.runtime.library.common.sort.impl.TezMerger.DiskSegment)3 Segment (org.apache.tez.runtime.library.common.sort.impl.TezMerger.Segment)3 TezRawKeyValueIterator (org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)3