use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class Fetcher method getTezIndexRecord.
@VisibleForTesting
protected TezIndexRecord getTezIndexRecord(InputAttemptIdentifier srcAttemptId, int partition) throws IOException {
TezIndexRecord idxRecord;
Path indexFile = getShuffleInputFileName(srcAttemptId.getPathComponent(), Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING);
TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
idxRecord = spillRecord.getIndex(partition);
return idxRecord;
}
use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project hive by apache.
the class IndexCache method readIndexFileToCache.
private IndexInformation readIndexFileToCache(Path indexFileName, String mapId, String expectedIndexOwner) throws IOException {
IndexInformation info;
IndexInformation newInd = new IndexInformation();
if ((info = cache.putIfAbsent(mapId, newInd)) != null) {
synchronized (info) {
while (isUnderConstruction(info)) {
try {
info.wait();
} catch (InterruptedException e) {
throw new IOException("Interrupted waiting for construction", e);
}
}
}
LOG.debug("IndexCache HIT: MapId " + mapId + " found");
return info;
}
LOG.debug("IndexCache MISS: MapId " + mapId + " not found");
TezSpillRecord tmp = null;
try {
tmp = new TezSpillRecord(indexFileName, fs, expectedIndexOwner);
} catch (Throwable e) {
tmp = new TezSpillRecord(0);
cache.remove(mapId);
throw new IOException("Error Reading IndexFile", e);
} finally {
synchronized (newInd) {
newInd.mapSpillRecord = tmp;
newInd.notifyAll();
}
}
queue.add(mapId);
if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
freeIndexInformation();
}
return newInd;
}
use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class IndexCache method readIndexFileToCache.
private IndexInformation readIndexFileToCache(Path indexFileName, String mapId, String expectedIndexOwner) throws IOException {
IndexInformation info;
IndexInformation newInd = new IndexInformation();
if ((info = cache.putIfAbsent(mapId, newInd)) != null) {
synchronized (info) {
while (isUnderConstruction(info)) {
try {
info.wait();
} catch (InterruptedException e) {
throw new IOException("Interrupted waiting for construction", e);
}
}
}
LOG.debug("IndexCache HIT: MapId " + mapId + " found");
return info;
}
LOG.debug("IndexCache MISS: MapId " + mapId + " not found");
TezSpillRecord tmp = null;
try {
tmp = new TezSpillRecord(indexFileName, conf, expectedIndexOwner);
} catch (Throwable e) {
tmp = new TezSpillRecord(0);
cache.remove(mapId);
throw new IOException("Error Reading IndexFile", e);
} finally {
synchronized (newInd) {
newInd.mapSpillRecord = tmp;
newInd.notifyAll();
}
}
queue.add(mapId);
if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
freeIndexInformation();
}
return newInd;
}
use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class DefaultSorter method spillSingleRecord.
/**
* Handles the degenerate case where serialization fails to fit in
* the in-memory buffer, so we must spill the record from collect
* directly to a spill file. Consider this "losing".
*/
private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException {
long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
FSDataOutputStream out = null;
try {
// create spill file
final TezSpillRecord spillRec = new TezSpillRecord(partitions);
final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
spillFilePaths.put(numSpills, filename);
out = rfs.create(filename);
if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
rfs.setPermission(filename, SPILL_FILE_PERMS);
}
// we don't run the combiner for a single record
for (int i = 0; i < partitions; ++i) {
IFile.Writer writer = null;
try {
long segmentStart = out.getPos();
// Create a new codec, don't care!
if (!sendEmptyPartitionDetails || (i == partition)) {
writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, false);
}
if (i == partition) {
final long recordStart = out.getPos();
writer.append(key, value);
// Note that our map byte count will not be accurate with
// compression
mapOutputByteCounter.increment(out.getPos() - recordStart);
}
long rawLength = 0;
long partLength = 0;
if (writer != null) {
writer.close();
rawLength = writer.getRawLength();
partLength = writer.getCompressedLength();
}
adjustSpillCounters(rawLength, partLength);
// record offsets
TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength);
spillRec.putIndex(rec, i);
writer = null;
} catch (IOException e) {
if (null != writer)
writer.close();
throw e;
}
}
if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
// create spill index file
Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillFileIndexPaths.put(numSpills, indexFilename);
spillRec.writeToFile(indexFilename, conf);
} else {
indexCacheList.add(spillRec);
totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
}
++numSpills;
if (!isFinalMergeEnabled()) {
numShuffleChunks.setValue(numSpills);
} else if (numSpills > 1) {
// Increment only when there is atleast one previous spill
numAdditionalSpills.increment(1);
}
} finally {
if (out != null)
out.close();
}
}
use of org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord in project tez by apache.
the class UnorderedPartitionedKVWriter method close.
@Override
public List<Event> close() throws IOException, InterruptedException {
// In case there are buffers to be spilled, schedule spilling
scheduleSpill(true);
List<Event> eventList = Lists.newLinkedList();
isShutdown.set(true);
spillLock.lock();
try {
LOG.info(destNameTrimmed + ": " + "Waiting for all spills to complete : Pending : " + pendingSpillCount.get());
while (pendingSpillCount.get() != 0 && spillException == null) {
spillInProgress.await();
}
} finally {
spillLock.unlock();
}
if (spillException != null) {
LOG.error(destNameTrimmed + ": " + "Error during spill, throwing");
// Assuming close will be called on the same thread as the write
cleanup();
currentBuffer.cleanup();
currentBuffer = null;
if (spillException instanceof IOException) {
throw (IOException) spillException;
} else {
throw new IOException(spillException);
}
} else {
LOG.info(destNameTrimmed + ": " + "All spills complete");
// Assuming close will be called on the same thread as the write
cleanup();
List<Event> events = Lists.newLinkedList();
if (!pipelinedShuffle) {
if (skipBuffers) {
writer.close();
long rawLen = writer.getRawLength();
long compLen = writer.getCompressedLength();
TezIndexRecord rec = new TezIndexRecord(0, rawLen, compLen);
TezSpillRecord sr = new TezSpillRecord(1);
sr.putIndex(rec, 0);
sr.writeToFile(finalIndexPath, conf);
BitSet emptyPartitions = new BitSet();
if (outputRecordsCounter.getValue() == 0) {
emptyPartitions.set(0);
}
if (reportPartitionStats()) {
if (outputRecordsCounter.getValue() > 0) {
sizePerPartition[0] = rawLen;
}
}
cleanupCurrentBuffer();
if (outputRecordsCounter.getValue() > 0) {
outputBytesWithOverheadCounter.increment(rawLen);
fileOutputBytesCounter.increment(compLen + indexFileSizeEstimate);
}
eventList.add(generateVMEvent());
eventList.add(generateDMEvent(false, -1, false, outputContext.getUniqueIdentifier(), emptyPartitions));
return eventList;
}
/*
1. Final merge enabled
- When lots of spills are there, mergeAll, generate events and return
- If there are no existing spills, check for final spill and generate events
2. Final merge disabled
- If finalSpill generated data, generate events and return
- If finalSpill did not generate data, it would automatically populate events
*/
if (isFinalMergeEnabled) {
if (numSpills.get() > 0) {
mergeAll();
} else {
finalSpill();
}
updateTezCountersAndNotify();
eventList.add(generateVMEvent());
eventList.add(generateDMEvent());
} else {
// if no data is generated, finalSpill would create VMEvent & add to finalEvents
SpillResult result = finalSpill();
if (result != null) {
updateTezCountersAndNotify();
// Generate vm event
finalEvents.add(generateVMEvent());
// compute empty partitions based on spill result and generate DME
int spillNum = numSpills.get() - 1;
SpillCallback callback = new SpillCallback(spillNum);
callback.computePartitionStats(result);
BitSet emptyPartitions = getEmptyPartitions(callback.getRecordsPerPartition());
String pathComponent = generatePathComponent(outputContext.getUniqueIdentifier(), spillNum);
Event finalEvent = generateDMEvent(true, spillNum, true, pathComponent, emptyPartitions);
finalEvents.add(finalEvent);
}
// all events to be sent out are in finalEvents.
eventList.addAll(finalEvents);
}
cleanupCurrentBuffer();
return eventList;
}
// For pipelined case, send out an event in case finalspill generated a spill file.
if (finalSpill() != null) {
// VertexManagerEvent is only sent at the end and thus sizePerPartition is used
// for the sum of all spills.
mayBeSendEventsForSpill(currentBuffer.recordsPerPartition, sizePerPartition, numSpills.get() - 1, true);
}
updateTezCountersAndNotify();
cleanupCurrentBuffer();
return events;
}
}
Aggregations