Search in sources :

Example 51 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by axbaretto.

the class TopNBatch method innerNext.

@Override
public IterOutcome innerNext() {
    recordCount = 0;
    if (state == BatchState.DONE) {
        return IterOutcome.NONE;
    }
    if (schema != null) {
        if (getSelectionVector4().next()) {
            recordCount = sv4.getCount();
            return IterOutcome.OK;
        } else {
            recordCount = 0;
            return IterOutcome.NONE;
        }
    }
    try {
        outer: while (true) {
            Stopwatch watch = Stopwatch.createStarted();
            IterOutcome upstream;
            if (first) {
                upstream = IterOutcome.OK_NEW_SCHEMA;
                first = false;
            } else {
                upstream = next(incoming);
            }
            if (upstream == IterOutcome.OK && schema == null) {
                upstream = IterOutcome.OK_NEW_SCHEMA;
                container.clear();
            }
            logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
            switch(upstream) {
                case NONE:
                    break outer;
                case NOT_YET:
                    throw new UnsupportedOperationException();
                case OUT_OF_MEMORY:
                case STOP:
                    return upstream;
                case OK_NEW_SCHEMA:
                    // only change in the case that the schema truly changes.  Artificial schema changes are ignored.
                    if (!incoming.getSchema().equals(schema)) {
                        if (schema != null) {
                            if (!unionTypeEnabled) {
                                throw new UnsupportedOperationException("Sort doesn't currently support sorts with changing schemas.");
                            } else {
                                this.schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
                                purgeAndResetPriorityQueue();
                                this.schemaChanged = true;
                            }
                        } else {
                            this.schema = incoming.getSchema();
                        }
                    }
                // fall through.
                case OK:
                    if (incoming.getRecordCount() == 0) {
                        for (VectorWrapper<?> w : incoming) {
                            w.clear();
                        }
                        break;
                    }
                    countSincePurge += incoming.getRecordCount();
                    batchCount++;
                    RecordBatchData batch;
                    if (schemaChanged) {
                        batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
                    } else {
                        batch = new RecordBatchData(incoming, oContext.getAllocator());
                    }
                    boolean success = false;
                    try {
                        if (priorityQueue == null) {
                            assert !schemaChanged;
                            priorityQueue = createNewPriorityQueue(new ExpandableHyperContainer(batch.getContainer()), config.getLimit());
                        }
                        priorityQueue.add(batch);
                        if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
                            purge();
                            countSincePurge = 0;
                            batchCount = 0;
                        }
                        success = true;
                    } finally {
                        if (!success) {
                            batch.clear();
                        }
                    }
                    break;
                default:
                    throw new UnsupportedOperationException();
            }
        }
        if (schema == null || priorityQueue == null) {
            // builder may be null at this point if the first incoming batch is empty
            state = BatchState.DONE;
            return IterOutcome.NONE;
        }
        priorityQueue.generate();
        this.sv4 = priorityQueue.getFinalSv4();
        container.clear();
        for (VectorWrapper<?> w : priorityQueue.getHyperBatch()) {
            container.add(w.getValueVectors());
        }
        container.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
        recordCount = sv4.getCount();
        return IterOutcome.OK_NEW_SCHEMA;
    } catch (SchemaChangeException | ClassTransformationException | IOException ex) {
        kill(false);
        logger.error("Failure during query", ex);
        context.getExecutorState().fail(ex);
        return IterOutcome.STOP;
    }
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Stopwatch(com.google.common.base.Stopwatch) IOException(java.io.IOException)

Example 52 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by axbaretto.

the class ProjectRecordBatch method setupNewSchemaFromInput.

private void setupNewSchemaFromInput(RecordBatch incomingBatch) throws SchemaChangeException {
    if (allocationVectors != null) {
        for (final ValueVector v : allocationVectors) {
            v.clear();
        }
    }
    this.allocationVectors = Lists.newArrayList();
    if (complexWriters != null) {
        container.clear();
    } else {
        container.zeroVectors();
    }
    final List<NamedExpression> exprs = getExpressionList();
    final ErrorCollector collector = new ErrorCollectorImpl();
    final List<TransferPair> transfers = Lists.newArrayList();
    final ClassGenerator<Projector> cg = CodeGenerator.getRoot(Projector.TEMPLATE_DEFINITION, context.getOptions());
    cg.getCodeGenerator().plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
    // cg.getCodeGenerator().saveCodeForDebugging(true);
    final IntHashSet transferFieldIds = new IntHashSet();
    final boolean isAnyWildcard = isAnyWildcard(exprs);
    final ClassifierResult result = new ClassifierResult();
    final boolean classify = isClassificationNeeded(exprs);
    for (NamedExpression namedExpression : exprs) {
        result.clear();
        if (classify && namedExpression.getExpr() instanceof SchemaPath) {
            classifyExpr(namedExpression, incomingBatch, result);
            if (result.isStar) {
                // The value indicates which wildcard we are processing now
                final Integer value = result.prefixMap.get(result.prefix);
                if (value != null && value == 1) {
                    int k = 0;
                    for (final VectorWrapper<?> wrapper : incomingBatch) {
                        final ValueVector vvIn = wrapper.getValueVector();
                        if (k > result.outputNames.size() - 1) {
                            assert false;
                        }
                        // get the renamed column names
                        final String name = result.outputNames.get(k++);
                        if (name.isEmpty()) {
                            continue;
                        }
                        if (isImplicitFileColumn(vvIn)) {
                            continue;
                        }
                        final FieldReference ref = new FieldReference(name);
                        final ValueVector vvOut = container.addOrGet(MaterializedField.create(ref.getAsNamePart().getName(), vvIn.getField().getType()), callBack);
                        final TransferPair tp = vvIn.makeTransferPair(vvOut);
                        transfers.add(tp);
                    }
                } else if (value != null && value > 1) {
                    // subsequent wildcards should do a copy of incoming valuevectors
                    int k = 0;
                    for (final VectorWrapper<?> wrapper : incomingBatch) {
                        final ValueVector vvIn = wrapper.getValueVector();
                        final SchemaPath originalPath = SchemaPath.getSimplePath(vvIn.getField().getName());
                        if (k > result.outputNames.size() - 1) {
                            assert false;
                        }
                        // get the renamed column names
                        final String name = result.outputNames.get(k++);
                        if (name.isEmpty()) {
                            continue;
                        }
                        if (isImplicitFileColumn(vvIn)) {
                            continue;
                        }
                        final LogicalExpression expr = ExpressionTreeMaterializer.materialize(originalPath, incomingBatch, collector, context.getFunctionRegistry());
                        if (collector.hasErrors()) {
                            throw new SchemaChangeException(String.format("Failure while trying to materialize incomingBatch schema.  Errors:\n %s.", collector.toErrorString()));
                        }
                        final MaterializedField outputField = MaterializedField.create(name, expr.getMajorType());
                        final ValueVector vv = container.addOrGet(outputField, callBack);
                        allocationVectors.add(vv);
                        final TypedFieldId fid = container.getValueVectorId(SchemaPath.getSimplePath(outputField.getName()));
                        final ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
                        final HoldingContainer hc = cg.addExpr(write, ClassGenerator.BlkCreateMode.TRUE_IF_BOUND);
                    }
                }
                continue;
            }
        } else {
            // For the columns which do not needed to be classified,
            // it is still necessary to ensure the output column name is unique
            result.outputNames = Lists.newArrayList();
            final String outputName = getRef(namedExpression).getRootSegment().getPath();
            addToResultMaps(outputName, result, true);
        }
        String outputName = getRef(namedExpression).getRootSegment().getPath();
        if (result != null && result.outputNames != null && result.outputNames.size() > 0) {
            boolean isMatched = false;
            for (int j = 0; j < result.outputNames.size(); j++) {
                if (!result.outputNames.get(j).isEmpty()) {
                    outputName = result.outputNames.get(j);
                    isMatched = true;
                    break;
                }
            }
            if (!isMatched) {
                continue;
            }
        }
        final LogicalExpression expr = ExpressionTreeMaterializer.materialize(namedExpression.getExpr(), incomingBatch, collector, context.getFunctionRegistry(), true, unionTypeEnabled);
        final MaterializedField outputField = MaterializedField.create(outputName, expr.getMajorType());
        if (collector.hasErrors()) {
            throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
        }
        // add value vector to transfer if direct reference and this is allowed, otherwise, add to evaluation stack.
        if (expr instanceof ValueVectorReadExpression && incomingBatch.getSchema().getSelectionVectorMode() == SelectionVectorMode.NONE && !((ValueVectorReadExpression) expr).hasReadPath() && !isAnyWildcard && !transferFieldIds.contains(((ValueVectorReadExpression) expr).getFieldId().getFieldIds()[0])) {
            final ValueVectorReadExpression vectorRead = (ValueVectorReadExpression) expr;
            final TypedFieldId id = vectorRead.getFieldId();
            final ValueVector vvIn = incomingBatch.getValueAccessorById(id.getIntermediateClass(), id.getFieldIds()).getValueVector();
            Preconditions.checkNotNull(incomingBatch);
            final FieldReference ref = getRef(namedExpression);
            final ValueVector vvOut = container.addOrGet(MaterializedField.create(ref.getLastSegment().getNameSegment().getPath(), vectorRead.getMajorType()), callBack);
            final TransferPair tp = vvIn.makeTransferPair(vvOut);
            transfers.add(tp);
            transferFieldIds.add(vectorRead.getFieldId().getFieldIds()[0]);
        } else if (expr instanceof DrillFuncHolderExpr && ((DrillFuncHolderExpr) expr).getHolder().isComplexWriterFuncHolder()) {
            // Lazy initialization of the list of complex writers, if not done yet.
            if (complexWriters == null) {
                complexWriters = Lists.newArrayList();
            } else {
                complexWriters.clear();
            }
            // The reference name will be passed to ComplexWriter, used as the name of the output vector from the writer.
            ((DrillFuncHolderExpr) expr).getFieldReference(namedExpression.getRef());
            cg.addExpr(expr, ClassGenerator.BlkCreateMode.TRUE_IF_BOUND);
            if (complexFieldReferencesList == null) {
                complexFieldReferencesList = Lists.newArrayList();
            }
            // save the field reference for later for getting schema when input is empty
            complexFieldReferencesList.add(namedExpression.getRef());
        } else {
            // need to do evaluation.
            final ValueVector vector = container.addOrGet(outputField, callBack);
            allocationVectors.add(vector);
            final TypedFieldId fid = container.getValueVectorId(SchemaPath.getSimplePath(outputField.getName()));
            final boolean useSetSafe = !(vector instanceof FixedWidthVector);
            final ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, useSetSafe);
            final HoldingContainer hc = cg.addExpr(write, ClassGenerator.BlkCreateMode.TRUE_IF_BOUND);
            // We cannot do multiple transfers from the same vector. However we still need to instantiate the output vector.
            if (expr instanceof ValueVectorReadExpression) {
                final ValueVectorReadExpression vectorRead = (ValueVectorReadExpression) expr;
                if (!vectorRead.hasReadPath()) {
                    final TypedFieldId id = vectorRead.getFieldId();
                    final ValueVector vvIn = incomingBatch.getValueAccessorById(id.getIntermediateClass(), id.getFieldIds()).getValueVector();
                    vvIn.makeTransferPair(vector);
                }
            }
            logger.debug("Added eval for project expression.");
        }
    }
    try {
        CodeGenerator<Projector> codeGen = cg.getCodeGenerator();
        codeGen.plainJavaCapable(true);
        // Uncomment out this line to debug the generated code.
        // codeGen.saveCodeForDebugging(true);
        this.projector = context.getImplementationClass(codeGen);
        projector.setup(context, incomingBatch, this, transfers);
    } catch (ClassTransformationException | IOException e) {
        throw new SchemaChangeException("Failure while attempting to load generated class", e);
    }
}
Also used : TransferPair(org.apache.drill.exec.record.TransferPair) IntHashSet(com.carrotsearch.hppc.IntHashSet) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) DrillFuncHolderExpr(org.apache.drill.exec.expr.DrillFuncHolderExpr) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) HoldingContainer(org.apache.drill.exec.expr.ClassGenerator.HoldingContainer) SchemaPath(org.apache.drill.common.expression.SchemaPath) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) ValueVectorWriteExpression(org.apache.drill.exec.expr.ValueVectorWriteExpression) FieldReference(org.apache.drill.common.expression.FieldReference) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) FixedWidthVector(org.apache.drill.exec.vector.FixedWidthVector) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) MaterializedField(org.apache.drill.exec.record.MaterializedField) IOException(java.io.IOException) ValueVector(org.apache.drill.exec.vector.ValueVector) ValueVectorReadExpression(org.apache.drill.exec.expr.ValueVectorReadExpression) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) NamedExpression(org.apache.drill.common.logical.data.NamedExpression)

Example 53 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by axbaretto.

the class HashAggTemplate method updateEstMaxBatchSize.

/**
 *  Update the estimated max batch size to be used in the Hash Aggr Op.
 *  using the record batch size to get the row width.
 * @param incoming
 */
private void updateEstMaxBatchSize(RecordBatch incoming) {
    // no handling of a schema (or varchar) change
    if (estMaxBatchSize > 0) {
        return;
    }
    // Use the sizer to get the input row width and the length of the longest varchar column
    RecordBatchSizer sizer = new RecordBatchSizer(incoming);
    logger.trace("Incoming sizer: {}", sizer);
    // An empty batch only has the schema, can not tell actual length of varchars
    // else use the actual varchars length, each capped at 50 (to match the space allocation)
    long estInputRowWidth = sizer.rowCount() == 0 ? sizer.stdRowWidth() : sizer.netRowWidthCap50();
    // Get approx max (varchar) column width to get better memory allocation
    maxColumnWidth = Math.max(sizer.maxAvgColumnSize(), VARIABLE_MIN_WIDTH_VALUE_SIZE);
    maxColumnWidth = Math.min(maxColumnWidth, VARIABLE_MAX_WIDTH_VALUE_SIZE);
    // 
    // Calculate the estimated max (internal) batch (i.e. Keys batch + Values batch) size
    // (which is used to decide when to spill)
    // Also calculate the values batch size (used as a reserve to overcome an OOM)
    // 
    Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator();
    int fieldId = 0;
    while (outgoingIter.hasNext()) {
        ValueVector vv = outgoingIter.next().getValueVector();
        MaterializedField mr = vv.getField();
        int fieldSize = vv instanceof VariableWidthVector ? maxColumnWidth : TypeHelper.getSize(mr.getType());
        estRowWidth += fieldSize;
        estOutputRowWidth += fieldSize;
        if (fieldId < numGroupByOutFields) {
            fieldId++;
        } else {
            estValuesRowWidth += fieldSize;
        }
    }
    // multiply by the max number of rows in a batch to get the final estimated max size
    estMaxBatchSize = Math.max(estRowWidth, estInputRowWidth) * MAX_BATCH_SIZE;
    // (When there are no aggr functions, use '1' as later code relies on this size being non-zero)
    estValuesBatchSize = Math.max(estValuesRowWidth, 1) * MAX_BATCH_SIZE;
    // initially assume same size
    estOutgoingAllocSize = estValuesBatchSize;
    logger.trace("{} phase. Estimated internal row width: {} Values row width: {} batch size: {}  memory limit: {}  max column width: {}", isTwoPhase ? (is2ndPhase ? "2nd" : "1st") : "Single", estRowWidth, estValuesRowWidth, estMaxBatchSize, allocator.getLimit(), maxColumnWidth);
    if (estMaxBatchSize > allocator.getLimit()) {
        logger.warn("HashAggregate: Estimated max batch size {} is larger than the memory limit {}", estMaxBatchSize, allocator.getLimit());
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) RecordBatchSizer(org.apache.drill.exec.record.RecordBatchSizer) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) MaterializedField(org.apache.drill.exec.record.MaterializedField) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector)

Example 54 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by axbaretto.

the class HashAggTemplate method allocateOutgoing.

/**
 *   Allocate space for the returned aggregate columns
 *   (Note DRILL-5588: Maybe can eliminate this allocation (and copy))
 * @param records
 */
private void allocateOutgoing(int records) {
    // Skip the keys and only allocate for outputting the workspace values
    // (keys will be output through splitAndTransfer)
    Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator();
    for (int i = 0; i < numGroupByOutFields; i++) {
        outgoingIter.next();
    }
    // try to preempt an OOM by using the reserved memory
    useReservedOutgoingMemory();
    long allocatedBefore = allocator.getAllocatedMemory();
    while (outgoingIter.hasNext()) {
        @SuppressWarnings("resource") ValueVector vv = outgoingIter.next().getValueVector();
        AllocationHelper.allocatePrecomputedChildCount(vv, records, maxColumnWidth, 0);
    }
    long memAdded = allocator.getAllocatedMemory() - allocatedBefore;
    if (memAdded > estOutgoingAllocSize) {
        logger.trace("Output values allocated {} but the estimate was only {}. Adjusting ...", memAdded, estOutgoingAllocSize);
        estOutgoingAllocSize = memAdded;
    }
    // try to restore the reserve
    restoreReservedMemory();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) VectorWrapper(org.apache.drill.exec.record.VectorWrapper)

Example 55 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by axbaretto.

the class MergeJoinBatch method setRecordCountInContainer.

private void setRecordCountInContainer() {
    for (VectorWrapper vw : container) {
        Preconditions.checkArgument(!vw.isHyper());
        vw.getValueVector().getMutator().setValueCount(getRecordCount());
    }
}
Also used : VectorWrapper(org.apache.drill.exec.record.VectorWrapper)

Aggregations

VectorWrapper (org.apache.drill.exec.record.VectorWrapper)73 ValueVector (org.apache.drill.exec.vector.ValueVector)44 Test (org.junit.Test)39 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)35 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)34 DrillClient (org.apache.drill.exec.client.DrillClient)28 Drillbit (org.apache.drill.exec.server.Drillbit)28 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)28 SlowTest (org.apache.drill.categories.SlowTest)18 SchemaPath (org.apache.drill.common.expression.SchemaPath)11 ExecTest (org.apache.drill.exec.ExecTest)9 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)9 VectorContainer (org.apache.drill.exec.record.VectorContainer)9 MaterializedField (org.apache.drill.exec.record.MaterializedField)7 IOException (java.io.IOException)6 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)6 Stopwatch (com.google.common.base.Stopwatch)5 OperatorTest (org.apache.drill.categories.OperatorTest)5 TypeProtos (org.apache.drill.common.types.TypeProtos)5 TransferPair (org.apache.drill.exec.record.TransferPair)5