Search in sources :

Example 96 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getVectorExpressionForUdf.

private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, Class<?> udfClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    int numChildren = (childExpr == null) ? 0 : childExpr.size();
    if (numChildren > 2 && mode == VectorExpressionDescriptor.Mode.FILTER && ((genericUdf instanceof GenericUDFOPOr) || (genericUdf instanceof GenericUDFOPAnd))) {
        for (int i = 0; i < numChildren; i++) {
            ExprNodeDesc child = childExpr.get(i);
            String childTypeString = child.getTypeString();
            if (childTypeString == null) {
                throw new HiveException("Null child type name string");
            }
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString);
            Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
            if (columnVectorType != ColumnVector.Type.LONG) {
                return null;
            }
            if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) {
                return null;
            }
        }
        Class<?> vclass;
        if (genericUdf instanceof GenericUDFOPOr) {
            vclass = FilterExprOrExpr.class;
        } else {
            vclass = FilterExprAndExpr.class;
        }
        VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
        return createVectorExpression(vclass, childExpr, childrenMode, returnType, DataTypePhysicalVariation.NONE);
    }
    if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) {
        return null;
    }
    // Intercept here for a possible Decimal64 vector expression class.
    VectorExpression result = getDecimal64VectorExpressionForUdf(genericUdf, udfClass, childExpr, numChildren, mode, returnType);
    if (result != null) {
        return result;
    }
    // Otherwise, fall through and proceed with non-Decimal64 vector expression classes...
    VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder();
    builder.setNumArguments(numChildren);
    builder.setMode(mode);
    for (int i = 0; i < numChildren; i++) {
        ExprNodeDesc child = childExpr.get(i);
        TypeInfo childTypeInfo = child.getTypeInfo();
        String childTypeString = childTypeInfo.toString();
        if (childTypeString == null) {
            throw new HiveException("Null child type name string");
        }
        String undecoratedTypeName = getUndecoratedName(childTypeString);
        if (undecoratedTypeName == null) {
            throw new HiveException("No match for type string " + childTypeString + " from undecorated type name method");
        }
        builder.setArgumentType(i, undecoratedTypeName);
        if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc) || (child instanceof ExprNodeFieldDesc)) {
            builder.setInputExpressionType(i, InputExpressionType.COLUMN);
        } else if (child instanceof ExprNodeConstantDesc) {
            if (isNullConst(child)) {
                builder.setInputExpressionType(i, InputExpressionType.NULLSCALAR);
            } else {
                builder.setInputExpressionType(i, InputExpressionType.SCALAR);
            }
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            builder.setInputExpressionType(i, InputExpressionType.DYNAMICVALUE);
        } else {
            throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
        }
    }
    VectorExpressionDescriptor.Descriptor descriptor = builder.build();
    Class<?> vclass = this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions);
    if (vclass == null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("No vector udf found for " + udfClass.getSimpleName() + ", descriptor: " + descriptor);
        }
        return null;
    }
    VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
    return createVectorExpression(vclass, childExpr, childrenMode, returnType, DataTypePhysicalVariation.NONE);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) InputExpressionType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 97 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getEltExpression.

private VectorExpression getEltExpression(List<ExprNodeDesc> childExpr, TypeInfo returnType) throws HiveException {
    int[] inputColumns = new int[childExpr.size()];
    VectorExpression[] vectorChildren = getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION);
    final int size = vectorChildren.length;
    TypeInfo[] inputTypeInfos = new TypeInfo[size];
    DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[size];
    int i = 0;
    for (VectorExpression ve : vectorChildren) {
        inputColumns[i] = ve.getOutputColumnNum();
        inputTypeInfos[i] = ve.getOutputTypeInfo();
        inputDataTypePhysicalVariations[i++] = ve.getOutputDataTypePhysicalVariation();
    }
    final int outputColumnNum = ocm.allocateOutputColumn(returnType);
    VectorElt vectorElt = new VectorElt(inputColumns, outputColumnNum);
    vectorElt.setChildExpressions(vectorChildren);
    vectorElt.setInputTypeInfos(inputTypeInfos);
    vectorElt.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
    vectorElt.setOutputTypeInfo(returnType);
    vectorElt.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
    freeNonColumns(vectorChildren);
    return vectorElt;
}
Also used : DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) VectorElt(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorElt) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 98 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getVectorExpressionsUpConvertDecimal64.

public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List<ExprNodeDesc> exprNodes) throws HiveException {
    VectorExpression[] vecExprs = getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION);
    final int size = vecExprs.length;
    for (int i = 0; i < size; i++) {
        VectorExpression vecExpr = vecExprs[i];
        if (vecExpr.getOutputColumnVectorType() == ColumnVector.Type.DECIMAL_64) {
            vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr);
        }
    }
    return vecExprs;
}
Also used : FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)

Example 99 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getGroupingExpression.

private VectorExpression getGroupingExpression(GenericUDFGrouping udf, List<ExprNodeDesc> childExprs, TypeInfo returnType) throws HiveException {
    ExprNodeDesc childExpr0 = childExprs.get(0);
    if (!(childExpr0 instanceof ExprNodeColumnDesc)) {
        return null;
    }
    ExprNodeColumnDesc groupingIdColDesc = (ExprNodeColumnDesc) childExpr0;
    int groupingIdColNum = getInputColumnIndex(groupingIdColDesc.getColumn());
    final int indexCount = childExprs.size() - 1;
    int[] indices = new int[indexCount];
    for (int i = 0; i < indexCount; i++) {
        ExprNodeDesc indexChildExpr = childExprs.get(i + 1);
        if (!(indexChildExpr instanceof ExprNodeConstantDesc)) {
            return null;
        }
        Object scalarObject = ((ExprNodeConstantDesc) indexChildExpr).getValue();
        final int index;
        if (scalarObject instanceof Integer) {
            index = (int) scalarObject;
        } else if (scalarObject instanceof Long) {
            index = (int) ((long) scalarObject);
        } else {
            return null;
        }
        indices[i] = index;
    }
    final int outputColumnNum = ocm.allocateOutputColumn(returnType);
    final VectorExpression ve;
    if (indices.length == 1) {
        ve = new GroupingColumn(groupingIdColNum, indices[0], outputColumnNum);
    } else {
        ve = new GroupingColumns(groupingIdColNum, indices, outputColumnNum);
    }
    ve.setInputTypeInfos(groupingIdColDesc.getTypeInfo());
    ve.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
    ve.setOutputTypeInfo(returnType);
    ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
    return ve;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupingColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.GroupingColumn) GroupingColumns(org.apache.hadoop.hive.ql.exec.vector.expressions.GroupingColumns)

Example 100 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class TestVectorFilterCompare method doVectorFilterCompareTest.

private void doVectorFilterCompareTest(TypeInfo typeInfo1, TypeInfo typeInfo2, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, ExprNodeGenericFuncDesc exprDesc, Comparison comparison, FilterCompareTestMode filterCompareTestMode, ColumnScalarMode columnScalarMode, VectorRandomBatchSource batchSource, ObjectInspector objectInspector, TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception {
    HiveConf hiveConf = new HiveConf();
    if (filterCompareTestMode == FilterCompareTestMode.ADAPTOR) {
        hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
        // Don't use DECIMAL_64 with the VectorUDFAdaptor.
        dataTypePhysicalVariations = null;
    }
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), dataTypePhysicalVariations == null ? null : Arrays.asList(dataTypePhysicalVariations), hiveConf);
    final VectorExpressionDescriptor.Mode mode;
    switch(filterCompareTestMode) {
        case ADAPTOR:
        case COMPARE_VECTOR_EXPRESSION:
            mode = VectorExpressionDescriptor.Mode.PROJECTION;
            break;
        case FILTER_VECTOR_EXPRESSION:
            mode = VectorExpressionDescriptor.Mode.FILTER;
            break;
        default:
            throw new RuntimeException("Unexpected filter compare mode " + filterCompareTestMode);
    }
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc, mode);
    vectorExpression.transientInit(hiveConf);
    if (filterCompareTestMode == FilterCompareTestMode.COMPARE_VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo1 " + typeInfo1.toString() + " typeInfo2 " + typeInfo2.toString() + " " + comparison + " " + " filterCompareTestMode " + filterCompareTestMode + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString());
    }
    String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
    DataTypePhysicalVariation[] outputDataTypePhysicalVariations = vectorizationContext.getScratchDataTypePhysicalVariations();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, outputScratchTypeNames, outputDataTypePhysicalVariations);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    final int outputColumnNum = vectorExpression.getOutputColumnNum();
    resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum });
    Object[] scrqtchRow = new Object[1];
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo1 " + typeInfo1.toString() +
        " typeInfo2 " + typeInfo2.toString() +
        " " + comparison + " " +
        " filterCompareTestMode " + filterCompareTestMode +
        " columnScalarMode " + columnScalarMode +
        " vectorExpression " + vectorExpression.toString());
    */
    final boolean isFilter = (mode == VectorExpressionDescriptor.Mode.FILTER);
    boolean copySelectedInUse = false;
    int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        final int originalBatchSize = batch.size;
        if (isFilter) {
            copySelectedInUse = batch.selectedInUse;
            if (batch.selectedInUse) {
                System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize);
            }
        }
        // In filter mode, the batch size can be made smaller.
        vectorExpression.evaluate(batch);
        if (!isFilter) {
            extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
        } else {
            final int currentBatchSize = batch.size;
            if (copySelectedInUse && batch.selectedInUse) {
                int selectIndex = 0;
                for (int i = 0; i < originalBatchSize; i++) {
                    final int originalBatchIndex = copySelected[i];
                    final boolean booleanResult;
                    if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) {
                        booleanResult = true;
                        selectIndex++;
                    } else {
                        booleanResult = false;
                    }
                    resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
                }
            } else if (batch.selectedInUse) {
                int selectIndex = 0;
                for (int i = 0; i < originalBatchSize; i++) {
                    final boolean booleanResult;
                    if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) {
                        booleanResult = true;
                        selectIndex++;
                    } else {
                        booleanResult = false;
                    }
                    resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
                }
            } else if (currentBatchSize == 0) {
                // Whole batch got zapped.
                for (int i = 0; i < originalBatchSize; i++) {
                    resultObjects[rowIndex + i] = new BooleanWritable(false);
                }
            } else {
                // Every row kept.
                for (int i = 0; i < originalBatchSize; i++) {
                    resultObjects[rowIndex + i] = new BooleanWritable(true);
                }
            }
        }
        rowIndex += originalBatchSize;
    }
}
Also used : VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorExpressionDescriptor(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)57 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)44 ArrayList (java.util.ArrayList)43 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)38 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)32 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)30 Test (org.junit.Test)29 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)27 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)27 IOException (java.io.IOException)25 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)25 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)23 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)23 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)23 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)19 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)19 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)19 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)15