use of org.apache.drill.exec.physical.impl.spill.RecordBatchSizer in project drill by apache.
the class ExternalSortBatch method processBatch.
/**
* Process the converted incoming batch by adding it to the in-memory store
* of data, or spilling data to disk when necessary.
*/
@SuppressWarnings("resource")
private void processBatch() {
if (incoming.getRecordCount() == 0) {
return;
}
// Determine actual sizes of the incoming batch before taking
// ownership. Allows us to figure out if we need to spill first,
// to avoid overflowing memory simply due to ownership transfer.
RecordBatchSizer sizer = analyzeIncomingBatch();
if (isSpillNeeded(sizer.actualSize())) {
spillFromMemory();
}
// Sanity check. We should now be below the buffer memory maximum.
long startMem = allocator.getAllocatedMemory();
if (startMem > bufferMemoryPool) {
logger.error("ERROR: Failed to spill above buffer limit. Buffer pool = {}, memory = {}", bufferMemoryPool, startMem);
}
// Convert the incoming batch to the agreed-upon schema.
// No converted batch means we got an empty input batch.
// Converting the batch transfers memory ownership to our
// allocator. This gives a round-about way to learn the batch
// size: check the before and after memory levels, then use
// the difference as the batch size, in bytes.
VectorContainer convertedBatch = convertBatch();
if (convertedBatch == null) {
return;
}
SelectionVector2 sv2;
try {
sv2 = makeSelectionVector();
} catch (Exception e) {
convertedBatch.clear();
throw e;
}
// Compute batch size, including allocation of an sv2.
long endMem = allocator.getAllocatedMemory();
long batchSize = endMem - startMem;
int count = sv2.getCount();
inputRecordCount += count;
inputBatchCount++;
totalInputBytes += sizer.actualSize();
if (minimumBufferSpace == 0) {
minimumBufferSpace = endMem;
} else {
minimumBufferSpace = Math.min(minimumBufferSpace, endMem);
}
stats.setLongStat(Metric.MIN_BUFFER, minimumBufferSpace);
// Update the size based on the actual record count, not
// the effective count as given by the selection vector
// (which may exclude some records due to filtering.)
updateMemoryEstimates(batchSize, sizer);
// Sort the incoming batch using either the original selection vector,
// or a new one created here.
SingleBatchSorter sorter;
sorter = opCodeGen.getSorter(convertedBatch);
try {
sorter.setup(context, sv2, convertedBatch);
} catch (SchemaChangeException e) {
convertedBatch.clear();
throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
}
try {
sorter.sort(sv2);
} catch (SchemaChangeException e) {
convertedBatch.clear();
throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
}
RecordBatchData rbd = new RecordBatchData(convertedBatch, allocator);
try {
rbd.setSv2(sv2);
bufferedBatches.add(new BatchGroup.InputBatch(rbd.getContainer(), rbd.getSv2(), oContext, sizer.netSize()));
if (peakNumBatches < bufferedBatches.size()) {
peakNumBatches = bufferedBatches.size();
stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches);
}
} catch (Throwable t) {
rbd.clear();
throw t;
}
}
use of org.apache.drill.exec.physical.impl.spill.RecordBatchSizer in project drill by apache.
the class ExternalSortBatch method analyzeIncomingBatch.
/**
* Scan the vectors in the incoming batch to determine batch size and if
* any oversize columns exist. (Oversize columns cause memory fragmentation.)
*
* @return an analysis of the incoming batch
*/
private RecordBatchSizer analyzeIncomingBatch() {
RecordBatchSizer sizer = new RecordBatchSizer(incoming);
sizer.applySv2();
if (inputBatchCount == 0) {
logger.debug("{}", sizer.toString());
}
return sizer;
}
Aggregations