Search in sources :

Example 6 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizationContext method getCastToDecimal.

private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr, TypeInfo returnType) throws HiveException {
    ExprNodeDesc child = childExpr.get(0);
    String inputType = childExpr.get(0).getTypeString();
    if (child instanceof ExprNodeConstantDesc) {
        // Return a constant vector expression
        Object constantValue = ((ExprNodeConstantDesc) child).getValue();
        HiveDecimal decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo());
        return getConstantVectorExpression(decimalValue, returnType, VectorExpressionDescriptor.Mode.PROJECTION);
    }
    if (isIntFamily(inputType)) {
        return createVectorExpression(CastLongToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
    } else if (isFloatFamily(inputType)) {
        return createVectorExpression(CastDoubleToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
    } else if (decimalTypePattern.matcher(inputType).matches()) {
        if (child instanceof ExprNodeColumnDesc) {
            int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
            DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
            if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
                // Do Decimal64 conversion instead.
                return createDecimal64ToDecimalConversion(colIndex, returnType);
            } else {
                return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
            }
        } else {
            return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
        }
    } else if (isStringFamily(inputType)) {
        return createVectorExpression(CastStringToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
    } else if (inputType.equals("timestamp")) {
        return createVectorExpression(CastTimestampToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
    }
    return null;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 7 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizationContext method getCustomUDFExpression.

/*
   * Return vector expression for a custom (i.e. not built-in) UDF.
   */
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException {
    // Assume.
    boolean isFilter = false;
    if (mode == VectorExpressionDescriptor.Mode.FILTER) {
        // Is output type a BOOLEAN?
        TypeInfo resultTypeInfo = expr.getTypeInfo();
        if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
            isFilter = true;
        } else {
            return null;
        }
    }
    // GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
    List<ExprNodeDesc> childExprList = expr.getChildren();
    final int childrenCount = childExprList.size();
    // argument descriptors
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount];
    for (int i = 0; i < argDescs.length; i++) {
        argDescs[i] = new VectorUDFArgDesc();
    }
    // positions of variable arguments (columns or non-constant expressions)
    List<Integer> variableArgPositions = new ArrayList<Integer>();
    // Column numbers of batch corresponding to expression result arguments
    List<Integer> exprResultColumnNums = new ArrayList<Integer>();
    // Prepare children
    List<VectorExpression> vectorExprs = new ArrayList<VectorExpression>();
    TypeInfo[] inputTypeInfos = new TypeInfo[childrenCount];
    DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[childrenCount];
    for (int i = 0; i < childrenCount; i++) {
        ExprNodeDesc child = childExprList.get(i);
        inputTypeInfos[i] = child.getTypeInfo();
        inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
        if (child instanceof ExprNodeGenericFuncDesc) {
            VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumnNum());
            argDescs[i].setVariable(e.getOutputColumnNum());
        } else if (child instanceof ExprNodeColumnDesc) {
            variableArgPositions.add(i);
            argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
        } else if (child instanceof ExprNodeConstantDesc) {
            // this is a constant (or null)
            argDescs[i].setConstant((ExprNodeConstantDesc) child);
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
            vectorExprs.add(e);
            variableArgPositions.add(i);
            exprResultColumnNums.add(e.getOutputColumnNum());
            argDescs[i].setVariable(e.getOutputColumnNum());
        } else {
            throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child);
        }
    }
    // Allocate output column and get column number;
    TypeInfo resultTypeInfo = expr.getTypeInfo();
    String resultTypeName = resultTypeInfo.getTypeName();
    final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo());
    // Make vectorized operator
    VectorExpression ve = new VectorUDFAdaptor(expr, outputColumnNum, resultTypeName, argDescs);
    // Set child expressions
    VectorExpression[] childVEs = null;
    if (exprResultColumnNums.size() != 0) {
        childVEs = new VectorExpression[exprResultColumnNums.size()];
        for (int i = 0; i < childVEs.length; i++) {
            childVEs[i] = vectorExprs.get(i);
        }
    }
    ve.setChildExpressions(childVEs);
    ve.setInputTypeInfos(inputTypeInfos);
    ve.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
    ve.setOutputTypeInfo(resultTypeInfo);
    ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
    // Free output columns if inputs have non-leaf expression trees.
    for (Integer i : exprResultColumnNums) {
        ocm.freeOutputColumn(i);
    }
    if (isFilter) {
        SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
        filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
        filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
        filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
        return filterVectorExpr;
    } else {
        return ve;
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorUDFArgDesc(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)

Example 8 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizationContext method getDecimal64VectorExpressionForUdf.

private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUdf, Class<?> udfClass, List<ExprNodeDesc> childExpr, int numChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    ExprNodeDesc child1 = childExpr.get(0);
    ExprNodeDesc child2 = childExpr.get(1);
    DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) child1.getTypeInfo();
    DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) child2.getTypeInfo();
    DataTypePhysicalVariation dataTypePhysicalVariation1 = DataTypePhysicalVariation.DECIMAL_64;
    DataTypePhysicalVariation dataTypePhysicalVariation2 = DataTypePhysicalVariation.DECIMAL_64;
    final int scale1 = decimalTypeInfo1.scale();
    final int scale2 = decimalTypeInfo2.scale();
    VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder();
    builder.setNumArguments(numChildren);
    builder.setMode(mode);
    boolean isColumnScaleEstablished = false;
    int columnScale = 0;
    boolean hasScalar = false;
    builder.setArgumentType(0, ArgumentType.DECIMAL_64);
    if (child1 instanceof ExprNodeGenericFuncDesc || child1 instanceof ExprNodeColumnDesc) {
        builder.setInputExpressionType(0, InputExpressionType.COLUMN);
        isColumnScaleEstablished = true;
        columnScale = scale1;
    } else if (child1 instanceof ExprNodeConstantDesc) {
        hasScalar = true;
        builder.setInputExpressionType(0, InputExpressionType.SCALAR);
    } else {
        // Currently, only functions, columns, and scalars supported.
        return null;
    }
    builder.setArgumentType(1, ArgumentType.DECIMAL_64);
    if (child2 instanceof ExprNodeGenericFuncDesc || child2 instanceof ExprNodeColumnDesc) {
        builder.setInputExpressionType(1, InputExpressionType.COLUMN);
        if (!isColumnScaleEstablished) {
            isColumnScaleEstablished = true;
            columnScale = scale2;
        } else if (columnScale != scale2) {
            // We only support Decimal64 on 2 columns when the have the same scale.
            return null;
        }
    } else if (child2 instanceof ExprNodeConstantDesc) {
        // Cannot have SCALAR, SCALAR.
        if (!isColumnScaleEstablished) {
            return null;
        }
        hasScalar = true;
        builder.setInputExpressionType(1, InputExpressionType.SCALAR);
    } else {
        // Currently, only functions, columns, and scalars supported.
        return null;
    }
    VectorExpressionDescriptor.Descriptor descriptor = builder.build();
    Class<?> vectorClass = this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions);
    if (vectorClass == null) {
        return null;
    }
    VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
    /*
     * Custom build arguments.
     */
    List<VectorExpression> children = new ArrayList<VectorExpression>();
    Object[] arguments = new Object[numChildren];
    for (int i = 0; i < numChildren; i++) {
        ExprNodeDesc child = childExpr.get(i);
        if (child instanceof ExprNodeGenericFuncDesc) {
            VectorExpression vChild = getVectorExpression(child, childrenMode);
            children.add(vChild);
            arguments[i] = vChild.getOutputColumnNum();
        } else if (child instanceof ExprNodeColumnDesc) {
            int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
            if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
                // In filter mode, the column must be a boolean
                children.add(new SelectColumnIsTrue(colIndex));
            }
            arguments[i] = colIndex;
        } else {
            Preconditions.checkState(child instanceof ExprNodeConstantDesc);
            ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) child;
            HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue();
            if (hiveDecimal.scale() > columnScale) {
                // For now, bail out on decimal constants with larger scale than column scale.
                return null;
            }
            final long decimal64Scalar = new HiveDecimalWritable(hiveDecimal).serialize64(columnScale);
            arguments[i] = decimal64Scalar;
        }
    }
    /*
     * Instantiate Decimal64 vector expression.
     *
     * The instantiateExpression method sets the output column and type information.
     */
    VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments);
    if (vectorExpression == null) {
        handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments);
    }
    vectorExpression.setInputTypeInfos(decimalTypeInfo1, decimalTypeInfo2);
    vectorExpression.setInputDataTypePhysicalVariations(dataTypePhysicalVariation1, dataTypePhysicalVariation2);
    if ((vectorExpression != null) && !children.isEmpty()) {
        vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
    }
    return vectorExpression;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 9 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizedRowBatchCtx method createColumnVectorFromRowColumnTypeInfos.

private ColumnVector createColumnVectorFromRowColumnTypeInfos(int columnNum) {
    TypeInfo typeInfo = rowColumnTypeInfos[columnNum];
    final DataTypePhysicalVariation dataTypePhysicalVariation;
    if (rowDataTypePhysicalVariations != null) {
        dataTypePhysicalVariation = rowDataTypePhysicalVariations[columnNum];
    } else {
        dataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
    }
    return VectorizedBatchUtil.createColumnVector(typeInfo, dataTypePhysicalVariation);
}
Also used : DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)

Example 10 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class Vectorizer method debugDisplayVertexInfo.

public void debugDisplayVertexInfo(BaseWork work) {
    VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx();
    String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames();
    TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
    DataTypePhysicalVariation[] dataTypePhysicalVariations = vectorizedRowBatchCtx.getRowdataTypePhysicalVariations();
    int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount();
    int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount();
    String[] scratchColumnTypeNames = vectorizedRowBatchCtx.getScratchColumnTypeNames();
    DataTypePhysicalVariation[] scratchdataTypePhysicalVariations = vectorizedRowBatchCtx.getScratchDataTypePhysicalVariations();
    LOG.debug("debugDisplayVertexInfo rowColumnNames " + Arrays.toString(allColumnNames));
    LOG.debug("debugDisplayVertexInfo rowColumnTypeInfos " + Arrays.toString(columnTypeInfos));
    LOG.debug("debugDisplayVertexInfo rowDataTypePhysicalVariations " + (dataTypePhysicalVariations == null ? "NULL" : Arrays.toString(dataTypePhysicalVariations)));
    LOG.debug("debugDisplayVertexInfo partitionColumnCount " + partitionColumnCount);
    LOG.debug("debugDisplayVertexInfo virtualColumnCount " + virtualColumnCount);
    LOG.debug("debugDisplayVertexInfo scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames));
    LOG.debug("debugDisplayVertexInfo scratchdataTypePhysicalVariations " + (scratchdataTypePhysicalVariations == null ? "NULL" : Arrays.toString(scratchdataTypePhysicalVariations)));
}
Also used : VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)10 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)7 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)7 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)6 ArrayList (java.util.ArrayList)5 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)5 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)5 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)5 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 ExprNodeDynamicValueDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc)2 List (java.util.List)1 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)1 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)1 VectorUDFArgDesc (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc)1 ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)1 UDFToString (org.apache.hadoop.hive.ql.udf.UDFToString)1