use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.
the class HashAggTemplate method allocateOutgoing.
/**
* Allocate space for the returned aggregate columns
* (Note DRILL-5588: Maybe can eliminate this allocation (and copy))
* @param records
*/
private void allocateOutgoing(int records) {
// Skip the keys and only allocate for outputting the workspace values
// (keys will be output through splitAndTransfer)
Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator();
for (int i = 0; i < numGroupByOutFields; i++) {
outgoingIter.next();
}
// try to preempt an OOM by using the reserved memory
useReservedOutgoingMemory();
long allocatedBefore = allocator.getAllocatedMemory();
while (outgoingIter.hasNext()) {
ValueVector vv = outgoingIter.next().getValueVector();
// Prevent allocating complex vectors here to avoid losing their content
// since their writers will still be used in generated code
TypeProtos.MajorType majorType = vv.getField().getType();
if (!Types.isComplex(majorType) && !Types.isUnion(majorType) && !Types.isRepeated(majorType)) {
AllocationHelper.allocatePrecomputedChildCount(vv, records, maxColumnWidth, 0);
}
}
long memAdded = allocator.getAllocatedMemory() - allocatedBefore;
if (memAdded > estOutgoingAllocSize) {
logger.trace("Output values allocated {} but the estimate was only {}. Adjusting ...", memAdded, estOutgoingAllocSize);
estOutgoingAllocSize = memAdded;
}
outContainer.setRecordCount(records);
// try to restore the reserve
restoreReservedMemory();
}
use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.
the class HashAggTemplate method updateEstMaxBatchSize.
/**
* Update the estimated max batch size to be used in the Hash Aggr Op.
* using the record batch size to get the row width.
* @param incoming
*/
private void updateEstMaxBatchSize(RecordBatch incoming) {
// no handling of a schema (or varchar) change
if (estMaxBatchSize > 0) {
return;
}
// Use the sizer to get the input row width and the length of the longest varchar column
RecordBatchSizer sizer = outgoing.getRecordBatchMemoryManager().getRecordBatchSizer();
logger.trace("Incoming sizer: {}", sizer);
// An empty batch only has the schema, can not tell actual length of varchars
// else use the actual varchars length, each capped at 50 (to match the space allocation)
long estInputRowWidth = sizer.rowCount() == 0 ? sizer.getStdRowWidth() : sizer.getNetRowWidthCap50();
// Get approx max (varchar) column width to get better memory allocation
maxColumnWidth = Math.max(sizer.getMaxAvgColumnSize(), VARIABLE_MIN_WIDTH_VALUE_SIZE);
maxColumnWidth = Math.min(maxColumnWidth, VARIABLE_MAX_WIDTH_VALUE_SIZE);
//
// Calculate the estimated max (internal) batch (i.e. Keys batch + Values batch) size
// (which is used to decide when to spill)
// Also calculate the values batch size (used as a reserve to overcome an OOM)
//
Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator();
int fieldId = 0;
while (outgoingIter.hasNext()) {
ValueVector vv = outgoingIter.next().getValueVector();
MaterializedField mr = vv.getField();
int fieldSize = vv instanceof VariableWidthVector ? maxColumnWidth : TypeHelper.getSize(mr.getType());
estRowWidth += fieldSize;
estOutputRowWidth += fieldSize;
if (fieldId < numGroupByOutFields) {
fieldId++;
} else {
estValuesRowWidth += fieldSize;
}
}
// multiply by the max number of rows in a batch to get the final estimated max size
long estimatedMaxWidth = Math.max(estRowWidth, estInputRowWidth);
estMaxBatchSize = estimatedMaxWidth * MAX_BATCH_ROW_COUNT;
// estimated batch size should not exceed the configuration given size
int configuredBatchSize = outgoing.getRecordBatchMemoryManager().getOutputBatchSize();
estMaxBatchSize = Math.min(estMaxBatchSize, configuredBatchSize);
// work back the number of rows (may have been reduced from MAX_BATCH_ROW_COUNT)
long rowsInBatch = estMaxBatchSize / estimatedMaxWidth;
// (When there are no aggr functions, use '1' as later code relies on this size being non-zero)
estValuesBatchSize = Math.max(estValuesRowWidth, 1) * rowsInBatch;
// initially assume same size
estOutgoingAllocSize = estValuesBatchSize;
logger.trace("{} phase. Estimated internal row width: {} Values row width: {} batch size: {} memory limit: {} max column width: {}", phase.getName(), estRowWidth, estValuesRowWidth, estMaxBatchSize, allocator.getLimit(), maxColumnWidth);
if (estMaxBatchSize > allocator.getLimit()) {
logger.warn("HashAggregate: Estimated max batch size {} is larger than the memory limit {}", estMaxBatchSize, allocator.getLimit());
}
}
use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.
the class HashPartition method allocateNewVectorContainer.
/**
* Allocate a new vector container for either right or left record batch
* Add an additional special vector for the hash values
* Note: this call may OOM !!
* @param rb - either the right or the left record batch
* @return the new vector container
*/
private VectorContainer allocateNewVectorContainer(RecordBatch rb) {
VectorContainer newVC = new VectorContainer();
VectorContainer fromVC = rb.getContainer();
Iterator<VectorWrapper<?>> vci = fromVC.iterator();
boolean success = false;
try {
while (vci.hasNext()) {
VectorWrapper<?> vw = vci.next();
// If processing a spilled container, skip the last column (HV)
if (cycleNum > 0 && !vci.hasNext()) {
break;
}
ValueVector vv = vw.getValueVector();
ValueVector newVV = TypeHelper.getNewVector(vv.getField(), allocator);
// add first to allow dealloc in case of an OOM
newVC.add(newVV);
if (newVV instanceof FixedWidthVector) {
((FixedWidthVector) newVV).allocateNew(recordsPerBatch);
} else if (newVV instanceof VariableWidthVector) {
((VariableWidthVector) newVV).allocateNew(maxColumnWidth * recordsPerBatch, recordsPerBatch);
} else if (newVV instanceof ObjectVector) {
((ObjectVector) newVV).allocateNew(recordsPerBatch);
} else {
newVV.allocateNew();
}
}
newVC.setRecordCount(0);
success = true;
} finally {
if (!success) {
// in case of an OOM
newVC.clear();
}
}
return newVC;
}
Aggregations