Search in sources :

Example 71 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.

the class HashAggTemplate method allocateOutgoing.

/**
 *   Allocate space for the returned aggregate columns
 *   (Note DRILL-5588: Maybe can eliminate this allocation (and copy))
 * @param records
 */
private void allocateOutgoing(int records) {
    // Skip the keys and only allocate for outputting the workspace values
    // (keys will be output through splitAndTransfer)
    Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator();
    for (int i = 0; i < numGroupByOutFields; i++) {
        outgoingIter.next();
    }
    // try to preempt an OOM by using the reserved memory
    useReservedOutgoingMemory();
    long allocatedBefore = allocator.getAllocatedMemory();
    while (outgoingIter.hasNext()) {
        ValueVector vv = outgoingIter.next().getValueVector();
        // Prevent allocating complex vectors here to avoid losing their content
        // since their writers will still be used in generated code
        TypeProtos.MajorType majorType = vv.getField().getType();
        if (!Types.isComplex(majorType) && !Types.isUnion(majorType) && !Types.isRepeated(majorType)) {
            AllocationHelper.allocatePrecomputedChildCount(vv, records, maxColumnWidth, 0);
        }
    }
    long memAdded = allocator.getAllocatedMemory() - allocatedBefore;
    if (memAdded > estOutgoingAllocSize) {
        logger.trace("Output values allocated {} but the estimate was only {}. Adjusting ...", memAdded, estOutgoingAllocSize);
        estOutgoingAllocSize = memAdded;
    }
    outContainer.setRecordCount(records);
    // try to restore the reserve
    restoreReservedMemory();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) TypeProtos(org.apache.drill.common.types.TypeProtos)

Example 72 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.

the class HashAggTemplate method updateEstMaxBatchSize.

/**
 *  Update the estimated max batch size to be used in the Hash Aggr Op.
 *  using the record batch size to get the row width.
 * @param incoming
 */
private void updateEstMaxBatchSize(RecordBatch incoming) {
    // no handling of a schema (or varchar) change
    if (estMaxBatchSize > 0) {
        return;
    }
    // Use the sizer to get the input row width and the length of the longest varchar column
    RecordBatchSizer sizer = outgoing.getRecordBatchMemoryManager().getRecordBatchSizer();
    logger.trace("Incoming sizer: {}", sizer);
    // An empty batch only has the schema, can not tell actual length of varchars
    // else use the actual varchars length, each capped at 50 (to match the space allocation)
    long estInputRowWidth = sizer.rowCount() == 0 ? sizer.getStdRowWidth() : sizer.getNetRowWidthCap50();
    // Get approx max (varchar) column width to get better memory allocation
    maxColumnWidth = Math.max(sizer.getMaxAvgColumnSize(), VARIABLE_MIN_WIDTH_VALUE_SIZE);
    maxColumnWidth = Math.min(maxColumnWidth, VARIABLE_MAX_WIDTH_VALUE_SIZE);
    // 
    // Calculate the estimated max (internal) batch (i.e. Keys batch + Values batch) size
    // (which is used to decide when to spill)
    // Also calculate the values batch size (used as a reserve to overcome an OOM)
    // 
    Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator();
    int fieldId = 0;
    while (outgoingIter.hasNext()) {
        ValueVector vv = outgoingIter.next().getValueVector();
        MaterializedField mr = vv.getField();
        int fieldSize = vv instanceof VariableWidthVector ? maxColumnWidth : TypeHelper.getSize(mr.getType());
        estRowWidth += fieldSize;
        estOutputRowWidth += fieldSize;
        if (fieldId < numGroupByOutFields) {
            fieldId++;
        } else {
            estValuesRowWidth += fieldSize;
        }
    }
    // multiply by the max number of rows in a batch to get the final estimated max size
    long estimatedMaxWidth = Math.max(estRowWidth, estInputRowWidth);
    estMaxBatchSize = estimatedMaxWidth * MAX_BATCH_ROW_COUNT;
    // estimated batch size should not exceed the configuration given size
    int configuredBatchSize = outgoing.getRecordBatchMemoryManager().getOutputBatchSize();
    estMaxBatchSize = Math.min(estMaxBatchSize, configuredBatchSize);
    // work back the number of rows (may have been reduced from MAX_BATCH_ROW_COUNT)
    long rowsInBatch = estMaxBatchSize / estimatedMaxWidth;
    // (When there are no aggr functions, use '1' as later code relies on this size being non-zero)
    estValuesBatchSize = Math.max(estValuesRowWidth, 1) * rowsInBatch;
    // initially assume same size
    estOutgoingAllocSize = estValuesBatchSize;
    logger.trace("{} phase. Estimated internal row width: {} Values row width: {} batch size: {}  memory limit: {}  max column width: {}", phase.getName(), estRowWidth, estValuesRowWidth, estMaxBatchSize, allocator.getLimit(), maxColumnWidth);
    if (estMaxBatchSize > allocator.getLimit()) {
        logger.warn("HashAggregate: Estimated max batch size {} is larger than the memory limit {}", estMaxBatchSize, allocator.getLimit());
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) RecordBatchSizer(org.apache.drill.exec.record.RecordBatchSizer) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) MaterializedField(org.apache.drill.exec.record.MaterializedField) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector)

Example 73 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.

the class HashPartition method allocateNewVectorContainer.

/**
 * Allocate a new vector container for either right or left record batch
 * Add an additional special vector for the hash values
 * Note: this call may OOM !!
 * @param rb - either the right or the left record batch
 * @return the new vector container
 */
private VectorContainer allocateNewVectorContainer(RecordBatch rb) {
    VectorContainer newVC = new VectorContainer();
    VectorContainer fromVC = rb.getContainer();
    Iterator<VectorWrapper<?>> vci = fromVC.iterator();
    boolean success = false;
    try {
        while (vci.hasNext()) {
            VectorWrapper<?> vw = vci.next();
            // If processing a spilled container, skip the last column (HV)
            if (cycleNum > 0 && !vci.hasNext()) {
                break;
            }
            ValueVector vv = vw.getValueVector();
            ValueVector newVV = TypeHelper.getNewVector(vv.getField(), allocator);
            // add first to allow dealloc in case of an OOM
            newVC.add(newVV);
            if (newVV instanceof FixedWidthVector) {
                ((FixedWidthVector) newVV).allocateNew(recordsPerBatch);
            } else if (newVV instanceof VariableWidthVector) {
                ((VariableWidthVector) newVV).allocateNew(maxColumnWidth * recordsPerBatch, recordsPerBatch);
            } else if (newVV instanceof ObjectVector) {
                ((ObjectVector) newVV).allocateNew(recordsPerBatch);
            } else {
                newVV.allocateNew();
            }
        }
        newVC.setRecordCount(0);
        success = true;
    } finally {
        if (!success) {
            // in case of an OOM
            newVC.clear();
        }
    }
    return newVC;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) FixedWidthVector(org.apache.drill.exec.vector.FixedWidthVector) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector) VectorContainer(org.apache.drill.exec.record.VectorContainer) ObjectVector(org.apache.drill.exec.vector.ObjectVector)

Aggregations

VectorWrapper (org.apache.drill.exec.record.VectorWrapper)73 ValueVector (org.apache.drill.exec.vector.ValueVector)44 Test (org.junit.Test)39 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)35 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)34 DrillClient (org.apache.drill.exec.client.DrillClient)28 Drillbit (org.apache.drill.exec.server.Drillbit)28 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)28 SlowTest (org.apache.drill.categories.SlowTest)18 SchemaPath (org.apache.drill.common.expression.SchemaPath)11 ExecTest (org.apache.drill.exec.ExecTest)9 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)9 VectorContainer (org.apache.drill.exec.record.VectorContainer)9 MaterializedField (org.apache.drill.exec.record.MaterializedField)7 IOException (java.io.IOException)6 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)6 Stopwatch (com.google.common.base.Stopwatch)5 OperatorTest (org.apache.drill.categories.OperatorTest)5 TypeProtos (org.apache.drill.common.types.TypeProtos)5 TransferPair (org.apache.drill.exec.record.TransferPair)5