Search in sources :

Example 36 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class FlattenRecordBatch method getFlattenFieldTransferPair.

/**
   * The data layout is the same for the actual data within a repeated field, as it is in a scalar vector for
   * the same sql type. For example, a repeated int vector has a vector of offsets into a regular int vector to
   * represent the lists. As the data layout for the actual values in the same in the repeated vector as in the
   * scalar vector of the same type, we can avoid making individual copies for the column being flattened, and just
   * use vector copies between the inner vector of the repeated field to the resulting scalar vector from the flatten
   * operation. This is completed after we determine how many records will fit (as we will hit either a batch end, or
   * the end of one of the other vectors while we are copying the data of the other vectors alongside each new flattened
   * value coming out of the repeated field.)
   */
@SuppressWarnings("resource")
private TransferPair getFlattenFieldTransferPair(FieldReference reference) {
    final TypedFieldId fieldId = incoming.getValueVectorId(popConfig.getColumn());
    final Class<?> vectorClass = incoming.getSchema().getColumn(fieldId.getFieldIds()[0]).getValueClass();
    final ValueVector flattenField = incoming.getValueAccessorById(vectorClass, fieldId.getFieldIds()).getValueVector();
    TransferPair tp = null;
    if (flattenField instanceof RepeatedMapVector) {
        tp = ((RepeatedMapVector) flattenField).getTransferPairToSingleMap(reference.getAsNamePart().getName(), oContext.getAllocator());
    } else if (!(flattenField instanceof RepeatedValueVector)) {
        if (incoming.getRecordCount() != 0) {
            throw UserException.unsupportedError().message("Flatten does not support inputs of non-list values.").build(logger);
        }
        logger.error("Cannot cast {} to RepeatedValueVector", flattenField);
        //when incoming recordCount is 0, don't throw exception since the type being seen here is not solid
        final ValueVector vv = new RepeatedMapVector(flattenField.getField(), oContext.getAllocator(), null);
        tp = RepeatedValueVector.class.cast(vv).getTransferPair(reference.getAsNamePart().getName(), oContext.getAllocator());
    } else {
        final ValueVector vvIn = RepeatedValueVector.class.cast(flattenField).getDataVector();
        // vvIn may be null because of fast schema return for repeated list vectors
        if (vvIn != null) {
            tp = vvIn.getTransferPair(reference.getAsNamePart().getName(), oContext.getAllocator());
        }
    }
    return tp;
}
Also used : RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) TransferPair(org.apache.drill.exec.record.TransferPair) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) TypedFieldId(org.apache.drill.exec.record.TypedFieldId)

Example 37 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class FlattenRecordBatch method setupNewSchema.

@Override
protected boolean setupNewSchema() throws SchemaChangeException {
    this.allocationVectors = Lists.newArrayList();
    container.clear();
    final List<NamedExpression> exprs = getExpressionList();
    final ErrorCollector collector = new ErrorCollectorImpl();
    final List<TransferPair> transfers = Lists.newArrayList();
    final ClassGenerator<Flattener> cg = CodeGenerator.getRoot(Flattener.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
    cg.getCodeGenerator().plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
    //    cg.getCodeGenerator().saveCodeForDebugging(true);
    final IntHashSet transferFieldIds = new IntHashSet();
    final NamedExpression flattenExpr = new NamedExpression(popConfig.getColumn(), new FieldReference(popConfig.getColumn()));
    final ValueVectorReadExpression vectorRead = (ValueVectorReadExpression) ExpressionTreeMaterializer.materialize(flattenExpr.getExpr(), incoming, collector, context.getFunctionRegistry(), true);
    final FieldReference fieldReference = flattenExpr.getRef();
    final TransferPair transferPair = getFlattenFieldTransferPair(fieldReference);
    if (transferPair != null) {
        final ValueVector flattenVector = transferPair.getTo();
        // checks that list has only default ValueVector and replaces resulting ValueVector to INT typed ValueVector
        if (exprs.size() == 0 && flattenVector.getField().getType().equals(Types.LATE_BIND_TYPE)) {
            final MaterializedField outputField = MaterializedField.create(fieldReference.getAsNamePart().getName(), Types.OPTIONAL_INT);
            final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
            container.add(vector);
        } else {
            transfers.add(transferPair);
            container.add(flattenVector);
            transferFieldIds.add(vectorRead.getFieldId().getFieldIds()[0]);
        }
    }
    logger.debug("Added transfer for project expression.");
    ClassifierResult result = new ClassifierResult();
    for (int i = 0; i < exprs.size(); i++) {
        final NamedExpression namedExpression = exprs.get(i);
        result.clear();
        String outputName = getRef(namedExpression).getRootSegment().getPath();
        if (result != null && result.outputNames != null && result.outputNames.size() > 0) {
            for (int j = 0; j < result.outputNames.size(); j++) {
                if (!result.outputNames.get(j).equals(EMPTY_STRING)) {
                    outputName = result.outputNames.get(j);
                    break;
                }
            }
        }
        final LogicalExpression expr = ExpressionTreeMaterializer.materialize(namedExpression.getExpr(), incoming, collector, context.getFunctionRegistry(), true);
        if (collector.hasErrors()) {
            throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
        }
        if (expr instanceof DrillFuncHolderExpr && ((DrillFuncHolderExpr) expr).getHolder().isComplexWriterFuncHolder()) {
            // Lazy initialization of the list of complex writers, if not done yet.
            if (complexWriters == null) {
                complexWriters = Lists.newArrayList();
            }
            // The reference name will be passed to ComplexWriter, used as the name of the output vector from the writer.
            ((DrillFuncHolderExpr) expr).getFieldReference(namedExpression.getRef());
            cg.addExpr(expr);
        } else {
            // need to do evaluation.
            final MaterializedField outputField;
            if (expr instanceof ValueVectorReadExpression) {
                final TypedFieldId id = ValueVectorReadExpression.class.cast(expr).getFieldId();
                @SuppressWarnings("resource") final ValueVector incomingVector = incoming.getValueAccessorById(id.getIntermediateClass(), id.getFieldIds()).getValueVector();
                // when the first batch will be empty.
                if (incomingVector != null) {
                    outputField = incomingVector.getField().clone();
                } else {
                    outputField = MaterializedField.create(outputName, expr.getMajorType());
                }
            } else {
                outputField = MaterializedField.create(outputName, expr.getMajorType());
            }
            @SuppressWarnings("resource") final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
            allocationVectors.add(vector);
            TypedFieldId fid = container.add(vector);
            ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
            cg.addExpr(write);
            logger.debug("Added eval for project expression.");
        }
    }
    cg.rotateBlock();
    cg.getEvalBlock()._return(JExpr.TRUE);
    container.buildSchema(SelectionVectorMode.NONE);
    try {
        this.flattener = context.getImplementationClass(cg.getCodeGenerator());
        flattener.setup(context, incoming, this, transfers);
    } catch (ClassTransformationException | IOException e) {
        throw new SchemaChangeException("Failure while attempting to load generated class", e);
    }
    return true;
}
Also used : TransferPair(org.apache.drill.exec.record.TransferPair) IntHashSet(com.carrotsearch.hppc.IntHashSet) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) DrillFuncHolderExpr(org.apache.drill.exec.expr.DrillFuncHolderExpr) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) ValueVectorWriteExpression(org.apache.drill.exec.expr.ValueVectorWriteExpression) FieldReference(org.apache.drill.common.expression.FieldReference) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) MaterializedField(org.apache.drill.exec.record.MaterializedField) IOException(java.io.IOException) ValueVectorReadExpression(org.apache.drill.exec.expr.ValueVectorReadExpression) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) NamedExpression(org.apache.drill.common.logical.data.NamedExpression)

Example 38 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class HashJoinBatch method setupHashJoinProbe.

public HashJoinProbe setupHashJoinProbe() throws ClassTransformationException, IOException {
    final CodeGenerator<HashJoinProbe> cg = CodeGenerator.get(HashJoinProbe.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
    cg.plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
    //    cg.saveCodeForDebugging(true);
    final ClassGenerator<HashJoinProbe> g = cg.getRoot();
    // Generate the code to project build side records
    g.setMappingSet(projectBuildMapping);
    int fieldId = 0;
    final JExpression buildIndex = JExpr.direct("buildIndex");
    final JExpression outIndex = JExpr.direct("outIndex");
    g.rotateBlock();
    if (rightSchema != null) {
        for (final MaterializedField field : rightSchema) {
            final MajorType inputType = field.getType();
            final MajorType outputType;
            // not nullable so we must exclude them from the check below (see DRILL-2197).
            if ((joinType == JoinRelType.LEFT || joinType == JoinRelType.FULL) && inputType.getMode() == DataMode.REQUIRED && inputType.getMinorType() != TypeProtos.MinorType.MAP) {
                outputType = Types.overrideMode(inputType, DataMode.OPTIONAL);
            } else {
                outputType = inputType;
            }
            // make sure to project field with children for children to show up in the schema
            final MaterializedField projected = field.withType(outputType);
            // Add the vector to our output container
            container.addOrGet(projected);
            final JVar inVV = g.declareVectorValueSetupAndMember("buildBatch", new TypedFieldId(field.getType(), true, fieldId));
            final JVar outVV = g.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(outputType, false, fieldId));
            g.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(buildIndex.band(JExpr.lit((int) Character.MAX_VALUE))).arg(outIndex).arg(inVV.component(buildIndex.shrz(JExpr.lit(16)))));
            g.rotateBlock();
            fieldId++;
        }
    }
    // Generate the code to project probe side records
    g.setMappingSet(projectProbeMapping);
    int outputFieldId = fieldId;
    fieldId = 0;
    final JExpression probeIndex = JExpr.direct("probeIndex");
    if (leftUpstream == IterOutcome.OK || leftUpstream == IterOutcome.OK_NEW_SCHEMA) {
        for (final VectorWrapper<?> vv : left) {
            final MajorType inputType = vv.getField().getType();
            final MajorType outputType;
            // not nullable so we must exclude them from the check below (see DRILL-2771, DRILL-2197).
            if ((joinType == JoinRelType.RIGHT || joinType == JoinRelType.FULL) && inputType.getMode() == DataMode.REQUIRED && inputType.getMinorType() != TypeProtos.MinorType.MAP) {
                outputType = Types.overrideMode(inputType, DataMode.OPTIONAL);
            } else {
                outputType = inputType;
            }
            final ValueVector v = container.addOrGet(MaterializedField.create(vv.getField().getPath(), outputType));
            if (v instanceof AbstractContainerVector) {
                vv.getValueVector().makeTransferPair(v);
                v.clear();
            }
            final JVar inVV = g.declareVectorValueSetupAndMember("probeBatch", new TypedFieldId(inputType, false, fieldId));
            final JVar outVV = g.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(outputType, false, outputFieldId));
            g.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(probeIndex).arg(outIndex).arg(inVV));
            g.rotateBlock();
            fieldId++;
            outputFieldId++;
        }
    }
    final HashJoinProbe hj = context.getImplementationClass(cg);
    return hj;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) AbstractContainerVector(org.apache.drill.exec.vector.complex.AbstractContainerVector) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) MaterializedField(org.apache.drill.exec.record.MaterializedField) JExpression(com.sun.codemodel.JExpression) JVar(com.sun.codemodel.JVar)

Example 39 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class MergeJoinBatch method allocateBatch.

private void allocateBatch(boolean newSchema) {
    boolean leftAllowed = status.getLeftStatus() != IterOutcome.NONE;
    boolean rightAllowed = status.getRightStatus() != IterOutcome.NONE;
    if (newSchema) {
        container.clear();
        // add fields from both batches
        if (leftAllowed) {
            for (VectorWrapper<?> w : leftIterator) {
                MajorType inputType = w.getField().getType();
                MajorType outputType;
                if (joinType == JoinRelType.RIGHT && inputType.getMode() == DataMode.REQUIRED) {
                    outputType = Types.overrideMode(inputType, DataMode.OPTIONAL);
                } else {
                    outputType = inputType;
                }
                MaterializedField newField = MaterializedField.create(w.getField().getPath(), outputType);
                ValueVector v = container.addOrGet(newField);
                if (v instanceof AbstractContainerVector) {
                    w.getValueVector().makeTransferPair(v);
                    v.clear();
                }
            }
        }
        if (rightAllowed) {
            for (VectorWrapper<?> w : rightIterator) {
                MajorType inputType = w.getField().getType();
                MajorType outputType;
                if (joinType == JoinRelType.LEFT && inputType.getMode() == DataMode.REQUIRED) {
                    outputType = Types.overrideMode(inputType, DataMode.OPTIONAL);
                } else {
                    outputType = inputType;
                }
                MaterializedField newField = MaterializedField.create(w.getField().getPath(), outputType);
                ValueVector v = container.addOrGet(newField);
                if (v instanceof AbstractContainerVector) {
                    w.getValueVector().makeTransferPair(v);
                    v.clear();
                }
            }
        }
    } else {
        container.zeroVectors();
    }
    for (VectorWrapper w : container) {
        AllocationHelper.allocateNew(w.getValueVector(), Character.MAX_VALUE);
    }
    container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    logger.debug("Built joined schema: {}", container.getSchema());
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) AbstractContainerVector(org.apache.drill.exec.vector.complex.AbstractContainerVector) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) MaterializedField(org.apache.drill.exec.record.MaterializedField)

Example 40 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class NestedLoopJoinBatch method buildSchema.

/**
   * Builds the output container's schema. Goes over the left and the right
   * batch and adds the corresponding vectors to the output container.
   * @throws SchemaChangeException if batch schema was changed during execution
   */
@Override
protected void buildSchema() throws SchemaChangeException {
    try {
        leftUpstream = next(LEFT_INPUT, left);
        rightUpstream = next(RIGHT_INPUT, right);
        if (leftUpstream == IterOutcome.STOP || rightUpstream == IterOutcome.STOP) {
            state = BatchState.STOP;
            return;
        }
        if (leftUpstream == IterOutcome.OUT_OF_MEMORY || rightUpstream == IterOutcome.OUT_OF_MEMORY) {
            state = BatchState.OUT_OF_MEMORY;
            return;
        }
        if (leftUpstream != IterOutcome.NONE) {
            leftSchema = left.getSchema();
            for (final VectorWrapper<?> vw : left) {
                container.addOrGet(vw.getField());
            }
        }
        if (rightUpstream != IterOutcome.NONE) {
            // make right input schema optional if we have LEFT join
            for (final VectorWrapper<?> vectorWrapper : right) {
                TypeProtos.MajorType inputType = vectorWrapper.getField().getType();
                TypeProtos.MajorType outputType;
                if (popConfig.getJoinType() == JoinRelType.LEFT && inputType.getMode() == TypeProtos.DataMode.REQUIRED) {
                    outputType = Types.overrideMode(inputType, TypeProtos.DataMode.OPTIONAL);
                } else {
                    outputType = inputType;
                }
                MaterializedField newField = MaterializedField.create(vectorWrapper.getField().getPath(), outputType);
                ValueVector valueVector = container.addOrGet(newField);
                if (valueVector instanceof AbstractContainerVector) {
                    vectorWrapper.getValueVector().makeTransferPair(valueVector);
                    valueVector.clear();
                }
            }
            rightSchema = right.getSchema();
            addBatchToHyperContainer(right);
        }
        allocateVectors();
        nljWorker = setupWorker();
        // if left batch is empty, fetch next
        if (leftUpstream != IterOutcome.NONE && left.getRecordCount() == 0) {
            leftUpstream = next(LEFT_INPUT, left);
        }
        container.setRecordCount(0);
        container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    } catch (ClassTransformationException | IOException e) {
        throw new SchemaChangeException(e);
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) AbstractContainerVector(org.apache.drill.exec.vector.complex.AbstractContainerVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) MaterializedField(org.apache.drill.exec.record.MaterializedField) IOException(java.io.IOException) TypeProtos(org.apache.drill.common.types.TypeProtos)

Aggregations

ValueVector (org.apache.drill.exec.vector.ValueVector)130 MaterializedField (org.apache.drill.exec.record.MaterializedField)29 Test (org.junit.Test)21 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)18 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)13 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)13 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)12 ExecTest (org.apache.drill.exec.ExecTest)11 IOException (java.io.IOException)10 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)10 VectorContainer (org.apache.drill.exec.record.VectorContainer)10 Drillbit (org.apache.drill.exec.server.Drillbit)10 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)9 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)9 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)9 SchemaPath (org.apache.drill.common.expression.SchemaPath)8 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)8 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)8 FragmentContext (org.apache.drill.exec.ops.FragmentContext)8 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)8