Search in sources :

Example 1 with ILongInExpr

use of org.apache.hadoop.hive.ql.exec.vector.expressions.ILongInExpr in project hive by apache.

the class VectorizationContext method getInExpression.

/**
 * Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
 */
private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    ExprNodeDesc colExpr = childExpr.get(0);
    List<ExprNodeDesc> inChildren = childExpr.subList(1, childExpr.size());
    String colType = colExpr.getTypeString();
    colType = VectorizationContext.mapTypeNameSynonyms(colType);
    TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType);
    Category category = colTypeInfo.getCategory();
    if (category == Category.STRUCT) {
        return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType);
    } else if (category != Category.PRIMITIVE) {
        return null;
    }
    // prepare arguments for createVectorExpression
    List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(inChildren);
    /* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
     * in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
     * If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued
     * logic correctly. E.g. NOT (col IN (null)) should be considered UNKNOWN, so that would
     * become FALSE in the WHERE clause, and cause the row in question to be filtered out.
     * See the discussion in Jira HIVE-5583.
     */
    VectorExpression expr = null;
    // Validate the IN items are only constants.
    for (ExprNodeDesc inListChild : childrenForInList) {
        if (!(inListChild instanceof ExprNodeConstantDesc)) {
            throw new HiveException("Vectorizing IN expression only supported for constant values");
        }
    }
    // determine class
    Class<?> cl;
    // non-vectorized validates that explicitly during UDF init.
    if (isIntFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
        long[] inVals = new long[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((ILongInExpr) expr).setInListValues(inVals);
    } else if (isTimestampFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterTimestampColumnInList.class : TimestampColumnInList.class);
        Timestamp[] inVals = new Timestamp[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getTimestampScalar(childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((ITimestampInExpr) expr).setInListValues(inVals);
    } else if (isStringFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class);
        byte[][] inVals = new byte[childrenForInList.size()][];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((IStringInExpr) expr).setInListValues(inVals);
    } else if (isFloatFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class);
        double[] inValsD = new double[childrenForInList.size()];
        for (int i = 0; i != inValsD.length; i++) {
            inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((IDoubleInExpr) expr).setInListValues(inValsD);
    } else if (isDecimalFamily(colType)) {
        final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr);
        if (tryDecimal64) {
            cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimal64ColumnInList.class : Decimal64ColumnInList.class);
            final int scale = ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale();
            expr = createDecimal64VectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, /* isDecimal64ScaleEstablished */
            true, /* decimal64ColumnScale */
            scale, returnType, DataTypePhysicalVariation.NONE, /* dontRescaleArguments */
            false, new GenericUDFIn());
            if (expr != null) {
                long[] inVals = new long[childrenForInList.size()];
                for (int i = 0; i != inVals.length; i++) {
                    ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childrenForInList.get(i);
                    HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue();
                    final long decimal64Scalar = new HiveDecimalWritable(hiveDecimal).serialize64(scale);
                    inVals[i] = decimal64Scalar;
                }
                ((ILongInExpr) expr).setInListValues(inVals);
            }
        }
        if (expr == null) {
            cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class);
            expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
            HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()];
            for (int i = 0; i != inValsD.length; i++) {
                inValsD[i] = (HiveDecimal) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
            }
            ((IDecimalInExpr) expr).setInListValues(inValsD);
        }
    } else if (isDateFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
        long[] inVals = new long[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = (Long) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((ILongInExpr) expr).setInListValues(inVals);
    }
    // execution to fall back to row mode.
    return expr;
}
Also used : FilterLongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FilterDoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) IStringInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TimestampColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.TimestampColumnInList) FilterTimestampColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterTimestampColumnInList) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) FilterTimestampColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterTimestampColumnInList) DoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList) FilterDoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList) FilterLongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList) LongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) IDecimalInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.IDecimalInExpr) ILongInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.ILongInExpr) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)

Aggregations

HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)1 CastBooleanToStringViaLongToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString)1 CastDateToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString)1 CastDecimalToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString)1 CastDoubleToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString)1 CastFloatToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString)1 CastLongToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString)1 CastTimestampToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString)1 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)1 DoubleColumnInList (org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList)1 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)1 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)1 FilterDoubleColumnInList (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList)1 FilterLongColumnInList (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList)1 FilterTimestampColumnInList (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterTimestampColumnInList)1 IDecimalInExpr (org.apache.hadoop.hive.ql.exec.vector.expressions.IDecimalInExpr)1 ILongInExpr (org.apache.hadoop.hive.ql.exec.vector.expressions.ILongInExpr)1 IStringInExpr (org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr)1 LongColumnInList (org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList)1 TimestampColumnInList (org.apache.hadoop.hive.ql.exec.vector.expressions.TimestampColumnInList)1