Search in sources :

Example 1 with SelectColumnIsTrue

use of org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue in project hive by apache.

the class VectorizationContext method getIfExpression.

private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    // Assume.
    boolean isFilter = false;
    if (mode == VectorExpressionDescriptor.Mode.FILTER) {
        // Is output type a BOOLEAN?
        if (returnType.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) returnType).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
            isFilter = true;
        } else {
            return null;
        }
    }
    // Get a PROJECTION IF expression.
    VectorExpression ve = doGetIfExpression(genericUDFIf, childExpr, returnType);
    if (ve == null) {
        return null;
    }
    if (isFilter) {
        // Wrap the PROJECTION IF expression output with a filter.
        SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(ve.getOutputColumnNum());
        filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
        filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
        filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
        return filterVectorExpr;
    } else {
        return ve;
    }
}
Also used : FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)

Example 2 with SelectColumnIsTrue

use of org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue in project hive by apache.

the class VectorizationContext method getFilterOnBooleanColumnExpression.

private VectorExpression getFilterOnBooleanColumnExpression(ExprNodeColumnDesc exprDesc, int columnNum) throws HiveException {
    final VectorExpression expr;
    // Evaluate the column as a boolean, converting if necessary.
    TypeInfo typeInfo = exprDesc.getTypeInfo();
    if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
        expr = new SelectColumnIsTrue(columnNum);
        expr.setInputTypeInfos(typeInfo);
        expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
    } else {
        // Ok, we need to convert.
        List<ExprNodeDesc> exprAsList = Collections.singletonList(exprDesc);
        expr = getCastToBooleanExpression(exprAsList, VectorExpressionDescriptor.Mode.FILTER);
        if (expr == null) {
            throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean");
        }
    }
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)

Example 3 with SelectColumnIsTrue

use of org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue in project hive by apache.

the class VectorizationContext method getCoalesceExpression.

private VectorExpression getCoalesceExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    int[] inputColumns = new int[childExpr.size()];
    VectorExpression[] vectorChildren = getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION);
    final int size = vectorChildren.length;
    TypeInfo[] inputTypeInfos = new TypeInfo[size];
    DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[size];
    DataTypePhysicalVariation outputDataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64;
    boolean fixConstants = false;
    for (int i = 0; i < vectorChildren.length; ++i) {
        VectorExpression ve = vectorChildren[i];
        inputColumns[i] = ve.getOutputColumnNum();
        inputTypeInfos[i] = ve.getOutputTypeInfo();
        inputDataTypePhysicalVariations[i] = ve.getOutputDataTypePhysicalVariation();
        if (inputDataTypePhysicalVariations[i] == DataTypePhysicalVariation.NONE || inputDataTypePhysicalVariations[i] == null) {
            if (childExpr.get(i) instanceof ExprNodeConstantDesc && inputTypeInfos[i] instanceof DecimalTypeInfo && ((DecimalTypeInfo) inputTypeInfos[i]).precision() <= 18) {
                fixConstants = true;
            } else {
                outputDataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
            }
        }
    }
    if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64 && fixConstants) {
        for (int i = 0; i < vectorChildren.length; ++i) {
            if ((inputDataTypePhysicalVariations[i] == DataTypePhysicalVariation.NONE || inputDataTypePhysicalVariations[i] == null) && vectorChildren[i] instanceof ConstantVectorExpression) {
                ConstantVectorExpression cve = ((ConstantVectorExpression) vectorChildren[i]);
                HiveDecimal hd = cve.getDecimalValue();
                Long longValue = new HiveDecimalWritable(hd).serialize64(((DecimalTypeInfo) cve.getOutputTypeInfo()).getScale());
                ((ConstantVectorExpression) vectorChildren[i]).setLongValue(longValue);
                vectorChildren[i].setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.DECIMAL_64);
                int scratchColIndex = vectorChildren[i].getOutputColumnNum() - ocm.initialOutputCol;
                ocm.scratchDataTypePhysicalVariations[scratchColIndex] = DataTypePhysicalVariation.DECIMAL_64;
            }
        }
    }
    final int outputColumnNum = ocm.allocateOutputColumn(returnType, outputDataTypePhysicalVariation);
    VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outputColumnNum);
    vectorCoalesce.setChildExpressions(vectorChildren);
    vectorCoalesce.setInputTypeInfos(inputTypeInfos);
    vectorCoalesce.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
    vectorCoalesce.setOutputTypeInfo(returnType);
    vectorCoalesce.setOutputDataTypePhysicalVariation(outputDataTypePhysicalVariation);
    freeNonColumns(vectorChildren);
    // Assume.
    boolean isFilter = false;
    if (mode == VectorExpressionDescriptor.Mode.FILTER) {
        // Is output type a BOOLEAN?
        if (returnType.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) returnType).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
            isFilter = true;
        } else {
            return null;
        }
    }
    if (isFilter) {
        // Wrap the PROJECTION IF expression output with a filter.
        SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(vectorCoalesce.getOutputColumnNum());
        filterVectorExpr.setChildExpressions(new VectorExpression[] { vectorCoalesce });
        filterVectorExpr.setInputTypeInfos(vectorCoalesce.getOutputTypeInfo());
        filterVectorExpr.setInputDataTypePhysicalVariations(vectorCoalesce.getOutputDataTypePhysicalVariation());
        return filterVectorExpr;
    } else {
        return vectorCoalesce;
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) VectorCoalesce(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)

Example 4 with SelectColumnIsTrue

use of org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue in project hive by apache.

the class VectorizationContext method getCastToBooleanExpression.

private VectorExpression getCastToBooleanExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode) throws HiveException {
    ExprNodeDesc child = childExpr.get(0);
    TypeInfo inputTypeInfo = child.getTypeInfo();
    String inputType = inputTypeInfo.toString();
    if (child instanceof ExprNodeConstantDesc) {
        if (null == ((ExprNodeConstantDesc) child).getValue()) {
            return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, mode);
        }
        // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
        return null;
    }
    VectorExpression ve;
    // Long and double are handled using descriptors, string needs to be specially handled.
    if (isStringFamily(inputType)) {
        ve = createVectorExpression(CastStringToBoolean.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo, DataTypePhysicalVariation.NONE);
    } else {
        // Ok, try the UDF.
        ve = getVectorExpressionForUdf(null, UDFToBoolean.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
    }
    if (ve == null || mode == VectorExpressionDescriptor.Mode.PROJECTION) {
        return ve;
    }
    int outputColumnNum = ve.getOutputColumnNum();
    SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
    filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
    filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
    filterVectorExpr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
    return filterVectorExpr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) CastStringToBoolean(org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)

Example 5 with SelectColumnIsTrue

use of org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue in project hive by apache.

the class VectorizationContext method createVectorExpression.

private VectorExpression createVectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType, DataTypePhysicalVariation returnDataTypePhysicalVariation) throws HiveException {
    int numChildren = childExpr == null ? 0 : childExpr.size();
    TypeInfo[] inputTypeInfos = new TypeInfo[numChildren];
    DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren];
    List<VectorExpression> children = new ArrayList<>();
    Object[] arguments = new Object[numChildren];
    for (int i = 0; i < numChildren; i++) {
        ExprNodeDesc child = childExpr.get(i);
        TypeInfo childTypeInfo = child.getTypeInfo();
        inputTypeInfos[i] = childTypeInfo;
        // Assume.
        inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
        if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) {
            VectorExpression vChild = getVectorExpression(child, childrenMode);
            children.add(vChild);
            arguments[i] = vChild.getOutputColumnNum();
            // Update.
            inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
        } else if (child instanceof ExprNodeColumnDesc) {
            int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
            if (childTypeInfo instanceof DecimalTypeInfo) {
                // In this method, we must only process non-Decimal64 column vectors.
                // Convert Decimal64 columns to regular decimal.
                DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
                if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
                    // FUTURE: Can we reuse this conversion?
                    VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo);
                    children.add(vChild);
                    arguments[i] = vChild.getOutputColumnNum();
                    // Update.
                    inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
                    continue;
                }
            }
            if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
                // In filter mode, the column must be a boolean
                SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex);
                selectColumnIsTrue.setInputTypeInfos(childTypeInfo);
                selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
                children.add(selectColumnIsTrue);
            }
            arguments[i] = colIndex;
        } else if (child instanceof ExprNodeConstantDesc) {
            Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
            arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue;
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue();
        } else {
            throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
        }
    }
    VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, returnDataTypePhysicalVariation, arguments);
    if (vectorExpression == null) {
        handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
    }
    Objects.requireNonNull(vectorExpression).setInputTypeInfos(inputTypeInfos);
    vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
    if (!children.isEmpty()) {
        vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
    }
    freeNonColumns(children.toArray(new VectorExpression[0]));
    return vectorExpression;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)

Aggregations

ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)6 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)6 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)6 SelectColumnIsTrue (org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)6 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)6 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)5 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)5 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)5 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)5 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 ArrayList (java.util.ArrayList)2 CastBooleanToStringViaLongToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString)2 CastDateToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString)2 CastDecimalToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString)2 CastDoubleToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString)2 CastFloatToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString)2