use of org.apache.drill.exec.physical.impl.sort.RecordBatchData in project drill by apache.
the class ExternalSortBatch method processBatch.
/**
* Process the converted incoming batch by adding it to the in-memory store
* of data, or spilling data to disk when necessary.
*/
@SuppressWarnings("resource")
private void processBatch() {
if (incoming.getRecordCount() == 0) {
return;
}
// Determine actual sizes of the incoming batch before taking
// ownership. Allows us to figure out if we need to spill first,
// to avoid overflowing memory simply due to ownership transfer.
RecordBatchSizer sizer = analyzeIncomingBatch();
if (isSpillNeeded(sizer.actualSize())) {
spillFromMemory();
}
// Sanity check. We should now be below the buffer memory maximum.
long startMem = allocator.getAllocatedMemory();
if (startMem > bufferMemoryPool) {
logger.error("ERROR: Failed to spill above buffer limit. Buffer pool = {}, memory = {}", bufferMemoryPool, startMem);
}
// Convert the incoming batch to the agreed-upon schema.
// No converted batch means we got an empty input batch.
// Converting the batch transfers memory ownership to our
// allocator. This gives a round-about way to learn the batch
// size: check the before and after memory levels, then use
// the difference as the batch size, in bytes.
VectorContainer convertedBatch = convertBatch();
if (convertedBatch == null) {
return;
}
SelectionVector2 sv2;
try {
sv2 = makeSelectionVector();
} catch (Exception e) {
convertedBatch.clear();
throw e;
}
// Compute batch size, including allocation of an sv2.
long endMem = allocator.getAllocatedMemory();
long batchSize = endMem - startMem;
int count = sv2.getCount();
inputRecordCount += count;
inputBatchCount++;
totalInputBytes += sizer.actualSize();
if (minimumBufferSpace == 0) {
minimumBufferSpace = endMem;
} else {
minimumBufferSpace = Math.min(minimumBufferSpace, endMem);
}
stats.setLongStat(Metric.MIN_BUFFER, minimumBufferSpace);
// Update the size based on the actual record count, not
// the effective count as given by the selection vector
// (which may exclude some records due to filtering.)
updateMemoryEstimates(batchSize, sizer);
// Sort the incoming batch using either the original selection vector,
// or a new one created here.
SingleBatchSorter sorter;
sorter = opCodeGen.getSorter(convertedBatch);
try {
sorter.setup(context, sv2, convertedBatch);
} catch (SchemaChangeException e) {
convertedBatch.clear();
throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
}
try {
sorter.sort(sv2);
} catch (SchemaChangeException e) {
convertedBatch.clear();
throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
}
RecordBatchData rbd = new RecordBatchData(convertedBatch, allocator);
try {
rbd.setSv2(sv2);
bufferedBatches.add(new BatchGroup.InputBatch(rbd.getContainer(), rbd.getSv2(), oContext, sizer.netSize()));
if (peakNumBatches < bufferedBatches.size()) {
peakNumBatches = bufferedBatches.size();
stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches);
}
} catch (Throwable t) {
rbd.clear();
throw t;
}
}
use of org.apache.drill.exec.physical.impl.sort.RecordBatchData in project drill by apache.
the class MergeSort method merge.
/**
* Merge the set of in-memory batches to produce a single logical output in the given
* destination container, indexed by an SV4.
*
* @param batchGroups the complete set of in-memory batches
* @param batch the record batch (operator) for the sort operator
* @param destContainer the vector container for the sort operator
* @return the sv4 for this operator
*/
public SelectionVector4 merge(LinkedList<BatchGroup.InputBatch> batchGroups, VectorAccessible batch, VectorContainer destContainer) {
// Add the buffered batches to a collection that MSorter can use.
// The builder takes ownership of the batches and will release them if
// an error occurs.
builder = new SortRecordBatchBuilder(oAllocator);
for (BatchGroup.InputBatch group : batchGroups) {
RecordBatchData rbd = new RecordBatchData(group.getContainer(), oAllocator);
rbd.setSv2(group.getSv2());
builder.add(rbd);
}
batchGroups.clear();
try {
builder.build(context, destContainer);
sv4 = builder.getSv4();
mSorter = opCg.createNewMSorter(batch);
mSorter.setup(context, oAllocator, sv4, destContainer, sv4.getCount());
} catch (SchemaChangeException e) {
throw UserException.unsupportedError(e).message("Unexpected schema change - likely code error.").build(logger);
}
// For testing memory-leaks, inject exception after mSorter finishes setup
ExternalSortBatch.injector.injectUnchecked(context.getExecutionControls(), ExternalSortBatch.INTERRUPTION_AFTER_SETUP);
mSorter.sort(destContainer);
// sort may have prematurely exited due to should continue returning false.
if (!context.shouldContinue()) {
return null;
}
// For testing memory-leak purpose, inject exception after mSorter finishes sorting
ExternalSortBatch.injector.injectUnchecked(context.getExecutionControls(), ExternalSortBatch.INTERRUPTION_AFTER_SORT);
sv4 = mSorter.getSV4();
destContainer.buildSchema(SelectionVectorMode.FOUR_BYTE);
return sv4;
}
use of org.apache.drill.exec.physical.impl.sort.RecordBatchData in project drill by apache.
the class RecordIterator method reset.
public void reset() {
if (!enableMarkAndReset) {
throw new UnsupportedOperationException("mark and reset disabled for this RecordIterator");
}
if (markedOuterPosition >= 0) {
// Move to rbd for markedOuterPosition.
final RecordBatchData rbdNew = batches.get(markedOuterPosition);
final RecordBatchData rbdOld = batches.get(startBatchPosition);
assert rbdOld != null;
assert rbdNew != null;
if (rbdNew != rbdOld) {
container.transferOut(rbdOld.getContainer());
container.transferIn(rbdNew.getContainer());
}
innerPosition = markedInnerPosition;
outerPosition = markedOuterPosition;
final Range<Long> markedBatchRange = batches.getEntry(outerPosition).getKey();
startBatchPosition = markedBatchRange.lowerEndpoint();
innerRecordCount = (int) (markedBatchRange.upperEndpoint() - startBatchPosition);
markedInnerPosition = -1;
markedOuterPosition = -1;
}
}
use of org.apache.drill.exec.physical.impl.sort.RecordBatchData in project drill by apache.
the class NestedLoopJoinBatch method addBatchToHyperContainer.
private void addBatchToHyperContainer(RecordBatch inputBatch) {
final RecordBatchData batchCopy = new RecordBatchData(inputBatch, oContext.getAllocator());
boolean success = false;
try {
rightCounts.addLast(inputBatch.getRecordCount());
rightContainer.addBatch(batchCopy.getContainer());
success = true;
} finally {
if (!success) {
batchCopy.clear();
}
}
}
use of org.apache.drill.exec.physical.impl.sort.RecordBatchData in project drill by apache.
the class TopNBatch method innerNext.
@Override
public IterOutcome innerNext() {
recordCount = 0;
if (state == BatchState.DONE) {
return IterOutcome.NONE;
}
if (schema != null) {
if (getSelectionVector4().next()) {
recordCount = sv4.getCount();
return IterOutcome.OK;
} else {
recordCount = 0;
return IterOutcome.NONE;
}
}
try {
outer: while (true) {
Stopwatch watch = Stopwatch.createStarted();
IterOutcome upstream;
if (first) {
upstream = IterOutcome.OK_NEW_SCHEMA;
first = false;
} else {
upstream = next(incoming);
}
if (upstream == IterOutcome.OK && schema == null) {
upstream = IterOutcome.OK_NEW_SCHEMA;
container.clear();
}
logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
switch(upstream) {
case NONE:
break outer;
case NOT_YET:
throw new UnsupportedOperationException();
case OUT_OF_MEMORY:
case STOP:
return upstream;
case OK_NEW_SCHEMA:
// only change in the case that the schema truly changes. Artificial schema changes are ignored.
if (!incoming.getSchema().equals(schema)) {
if (schema != null) {
if (!unionTypeEnabled) {
throw new UnsupportedOperationException("Sort doesn't currently support sorts with changing schemas.");
} else {
this.schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
purgeAndResetPriorityQueue();
this.schemaChanged = true;
}
} else {
this.schema = incoming.getSchema();
}
}
// fall through.
case OK:
if (incoming.getRecordCount() == 0) {
for (VectorWrapper<?> w : incoming) {
w.clear();
}
break;
}
countSincePurge += incoming.getRecordCount();
batchCount++;
RecordBatchData batch;
if (schemaChanged) {
batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
} else {
batch = new RecordBatchData(incoming, oContext.getAllocator());
}
boolean success = false;
try {
batch.canonicalize();
if (priorityQueue == null) {
assert !schemaChanged;
priorityQueue = createNewPriorityQueue(context, config.getOrderings(), new ExpandableHyperContainer(batch.getContainer()), MAIN_MAPPING, LEFT_MAPPING, RIGHT_MAPPING);
}
priorityQueue.add(context, batch);
if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
purge();
countSincePurge = 0;
batchCount = 0;
}
success = true;
} finally {
if (!success) {
batch.clear();
}
}
break;
default:
throw new UnsupportedOperationException();
}
}
if (schema == null || priorityQueue == null) {
// builder may be null at this point if the first incoming batch is empty
state = BatchState.DONE;
return IterOutcome.NONE;
}
priorityQueue.generate();
this.sv4 = priorityQueue.getFinalSv4();
container.clear();
for (VectorWrapper<?> w : priorityQueue.getHyperBatch()) {
container.add(w.getValueVectors());
}
container.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
recordCount = sv4.getCount();
return IterOutcome.OK_NEW_SCHEMA;
} catch (SchemaChangeException | ClassTransformationException | IOException ex) {
kill(false);
logger.error("Failure during query", ex);
context.fail(ex);
return IterOutcome.STOP;
}
}
Aggregations