Search in sources :

Example 11 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class Fetcher method getTezIndexRecord.

@VisibleForTesting
protected TezIndexRecord getTezIndexRecord(InputAttemptIdentifier srcAttemptId, int partition) throws IOException {
    TezIndexRecord idxRecord;
    Path indexFile = getShuffleInputFileName(srcAttemptId.getPathComponent(), Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
    TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
    idxRecord = spillRecord.getIndex(partition);
    return idxRecord;
}
Also used : Path(org.apache.hadoop.fs.Path) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 12 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project hive by apache.

the class IndexCache method readIndexFileToCache.

private IndexInformation readIndexFileToCache(Path indexFileName, String mapId, String expectedIndexOwner) throws IOException {
    IndexInformation info;
    IndexInformation newInd = new IndexInformation();
    if ((info = cache.putIfAbsent(mapId, newInd)) != null) {
        synchronized (info) {
            while (isUnderConstruction(info)) {
                try {
                    info.wait();
                } catch (InterruptedException e) {
                    throw new IOException("Interrupted waiting for construction", e);
                }
            }
        }
        LOG.debug("IndexCache HIT: MapId " + mapId + " found");
        return info;
    }
    LOG.debug("IndexCache MISS: MapId " + mapId + " not found");
    TezSpillRecord tmp = null;
    try {
        tmp = new TezSpillRecord(indexFileName, fs, expectedIndexOwner);
    } catch (Throwable e) {
        tmp = new TezSpillRecord(0);
        cache.remove(mapId);
        throw new IOException("Error Reading IndexFile", e);
    } finally {
        synchronized (newInd) {
            newInd.mapSpillRecord = tmp;
            newInd.notifyAll();
        }
    }
    queue.add(mapId);
    if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
        freeIndexInformation();
    }
    return newInd;
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) IOException(java.io.IOException)

Example 13 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class IndexCache method readIndexFileToCache.

private IndexInformation readIndexFileToCache(Path indexFileName, String mapId, String expectedIndexOwner) throws IOException {
    IndexInformation info;
    IndexInformation newInd = new IndexInformation();
    if ((info = cache.putIfAbsent(mapId, newInd)) != null) {
        synchronized (info) {
            while (isUnderConstruction(info)) {
                try {
                    info.wait();
                } catch (InterruptedException e) {
                    throw new IOException("Interrupted waiting for construction", e);
                }
            }
        }
        LOG.debug("IndexCache HIT: MapId " + mapId + " found");
        return info;
    }
    LOG.debug("IndexCache MISS: MapId " + mapId + " not found");
    TezSpillRecord tmp = null;
    try {
        tmp = new TezSpillRecord(indexFileName, conf, expectedIndexOwner);
    } catch (Throwable e) {
        tmp = new TezSpillRecord(0);
        cache.remove(mapId);
        throw new IOException("Error Reading IndexFile", e);
    } finally {
        synchronized (newInd) {
            newInd.mapSpillRecord = tmp;
            newInd.notifyAll();
        }
    }
    queue.add(mapId);
    if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
        freeIndexInformation();
    }
    return newInd;
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) IOException(java.io.IOException)

Example 14 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class DefaultSorter method spillSingleRecord.

/**
 * Handles the degenerate case where serialization fails to fit in
 * the in-memory buffer, so we must spill the record from collect
 * directly to a spill file. Consider this "losing".
 */
private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException {
    long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        spillFilePaths.put(numSpills, filename);
        out = rfs.create(filename);
        if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
            rfs.setPermission(filename, SPILL_FILE_PERMS);
        }
        // we don't run the combiner for a single record
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                // Create a new codec, don't care!
                if (!sendEmptyPartitionDetails || (i == partition)) {
                    writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, false);
                }
                if (i == partition) {
                    final long recordStart = out.getPos();
                    writer.append(key, value);
                    // Note that our map byte count will not be accurate with
                    // compression
                    mapOutputByteCounter.increment(out.getPos() - recordStart);
                }
                long rawLength = 0;
                long partLength = 0;
                if (writer != null) {
                    writer.close();
                    rawLength = writer.getRawLength();
                    partLength = writer.getCompressedLength();
                }
                adjustSpillCounters(rawLength, partLength);
                // record offsets
                TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
                spillRec.putIndex(rec, i);
                writer = null;
            } catch (IOException e) {
                if (null != writer)
                    writer.close();
                throw e;
            }
        }
        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillFileIndexPaths.put(numSpills, indexFilename);
            spillRec.writeToFile(indexFilename, conf);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        ++numSpills;
        if (!isFinalMergeEnabled()) {
            numShuffleChunks.setValue(numSpills);
        } else if (numSpills > 1) {
            // Increment only when there is atleast one previous spill
            numAdditionalSpills.increment(1);
        }
    } finally {
        if (out != null)
            out.close();
    }
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) Path(org.apache.hadoop.fs.Path) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)

Example 15 with TezSpillRecord

use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.

the class UnorderedPartitionedKVWriter method close.

@Override
public List<Event> close() throws IOException, InterruptedException {
    // In case there are buffers to be spilled, schedule spilling
    scheduleSpill(true);
    List<Event> eventList = Lists.newLinkedList();
    isShutdown.set(true);
    spillLock.lock();
    try {
        LOG.info(destNameTrimmed + ": " + "Waiting for all spills to complete : Pending : " + pendingSpillCount.get());
        while (pendingSpillCount.get() != 0 && spillException == null) {
            spillInProgress.await();
        }
    } finally {
        spillLock.unlock();
    }
    if (spillException != null) {
        LOG.error(destNameTrimmed + ": " + "Error during spill, throwing");
        // Assuming close will be called on the same thread as the write
        cleanup();
        currentBuffer.cleanup();
        currentBuffer = null;
        if (spillException instanceof IOException) {
            throw (IOException) spillException;
        } else {
            throw new IOException(spillException);
        }
    } else {
        LOG.info(destNameTrimmed + ": " + "All spills complete");
        // Assuming close will be called on the same thread as the write
        cleanup();
        List<Event> events = Lists.newLinkedList();
        if (!pipelinedShuffle) {
            if (skipBuffers) {
                writer.close();
                long rawLen = writer.getRawLength();
                long compLen = writer.getCompressedLength();
                TezIndexRecord rec = new TezIndexRecord(0, rawLen, compLen);
                TezSpillRecord sr = new TezSpillRecord(1);
                sr.putIndex(rec, 0);
                sr.writeToFile(finalIndexPath, conf);
                BitSet emptyPartitions = new BitSet();
                if (outputRecordsCounter.getValue() == 0) {
                    emptyPartitions.set(0);
                }
                if (reportPartitionStats()) {
                    if (outputRecordsCounter.getValue() > 0) {
                        sizePerPartition[0] = rawLen;
                    }
                }
                cleanupCurrentBuffer();
                if (outputRecordsCounter.getValue() > 0) {
                    outputBytesWithOverheadCounter.increment(rawLen);
                    fileOutputBytesCounter.increment(compLen + indexFileSizeEstimate);
                }
                eventList.add(generateVMEvent());
                eventList.add(generateDMEvent(false, -1, false, outputContext.getUniqueIdentifier(), emptyPartitions));
                return eventList;
            }
            /*
          1. Final merge enabled
             - When lots of spills are there, mergeAll, generate events and return
             - If there are no existing spills, check for final spill and generate events
          2. Final merge disabled
             - If finalSpill generated data, generate events and return
             - If finalSpill did not generate data, it would automatically populate events
         */
            if (isFinalMergeEnabled) {
                if (numSpills.get() > 0) {
                    mergeAll();
                } else {
                    finalSpill();
                }
                updateTezCountersAndNotify();
                eventList.add(generateVMEvent());
                eventList.add(generateDMEvent());
            } else {
                // if no data is generated, finalSpill would create VMEvent & add to finalEvents
                SpillResult result = finalSpill();
                if (result != null) {
                    updateTezCountersAndNotify();
                    // Generate vm event
                    finalEvents.add(generateVMEvent());
                    // compute empty partitions based on spill result and generate DME
                    int spillNum = numSpills.get() - 1;
                    SpillCallback callback = new SpillCallback(spillNum);
                    callback.computePartitionStats(result);
                    BitSet emptyPartitions = getEmptyPartitions(callback.getRecordsPerPartition());
                    String pathComponent = generatePathComponent(outputContext.getUniqueIdentifier(), spillNum);
                    Event finalEvent = generateDMEvent(true, spillNum, true, pathComponent, emptyPartitions);
                    finalEvents.add(finalEvent);
                }
                // all events to be sent out are in finalEvents.
                eventList.addAll(finalEvents);
            }
            cleanupCurrentBuffer();
            return eventList;
        }
        // For pipelined case, send out an event in case finalspill generated a spill file.
        if (finalSpill() != null) {
            // VertexManagerEvent is only sent at the end and thus sizePerPartition is used
            // for the sum of all spills.
            mayBeSendEventsForSpill(currentBuffer.recordsPerPartition, sizePerPartition, numSpills.get() - 1, true);
        }
        updateTezCountersAndNotify();
        cleanupCurrentBuffer();
        return events;
    }
}
Also used : TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) BitSet(java.util.BitSet) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) IOException(java.io.IOException) ByteString(com.google.protobuf.ByteString)

Aggregations

TezSpillRecord (org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord)20 Path (org.apache.hadoop.fs.Path)14 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)11 Event (org.apache.tez.runtime.api.Event)9 BitSet (java.util.BitSet)7 ByteString (com.google.protobuf.ByteString)6 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)6 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)6 IOException (java.io.IOException)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 VertexManagerEvent (org.apache.tez.runtime.api.events.VertexManagerEvent)5 Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)5 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)3 OutputContext (org.apache.tez.runtime.api.OutputContext)3 ShuffleUserPayloads (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads)3 Test (org.junit.Test)3 Mockito.anyString (org.mockito.Mockito.anyString)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 LinkedListMultimap (com.google.common.collect.LinkedListMultimap)2