Search in sources :

Example 1 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizationContext method checkExprNodeDescForDecimal64.

private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException {
    if (exprNodeDesc instanceof ExprNodeColumnDesc) {
        int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc);
        DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
        return (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64);
    } else if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) {
        // Is the result Decimal64 precision?
        TypeInfo returnType = exprNodeDesc.getTypeInfo();
        if (!checkTypeInfoForDecimal64(returnType)) {
            return false;
        }
        DecimalTypeInfo returnDecimalType = (DecimalTypeInfo) returnType;
        GenericUDF udf = ((ExprNodeGenericFuncDesc) exprNodeDesc).getGenericUDF();
        Class<?> udfClass = udf.getClass();
        // We have a class-level annotation that says whether the UDF's vectorization expressions
        // support Decimal64.
        VectorizedExpressionsSupportDecimal64 annotation = AnnotationUtils.getAnnotation(udfClass, VectorizedExpressionsSupportDecimal64.class);
        if (annotation == null) {
            return false;
        }
        // Carefully check the children to make sure they are Decimal64.
        List<ExprNodeDesc> children = exprNodeDesc.getChildren();
        for (ExprNodeDesc childExprNodeDesc : children) {
            if (childExprNodeDesc instanceof ExprNodeConstantDesc) {
                DecimalTypeInfo childDecimalTypeInfo = decimalTypeFromCastToDecimal(childExprNodeDesc, returnDecimalType);
                if (childDecimalTypeInfo == null) {
                    return false;
                }
                if (!checkTypeInfoForDecimal64(childDecimalTypeInfo)) {
                    return false;
                }
                continue;
            }
            // Otherwise, recurse.
            if (!checkExprNodeDescForDecimal64(childExprNodeDesc)) {
                return false;
            }
        }
        return true;
    } else if (exprNodeDesc instanceof ExprNodeConstantDesc) {
        return checkTypeInfoForDecimal64(exprNodeDesc.getTypeInfo());
    }
    return false;
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)

Example 2 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizationContext method getIdentityExpression.

/**
 * Used as a fast path for operations that don't modify their input, like unary +
 * and casting boolean to long. IdentityExpression and its children are always
 * projections.
 */
private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList) throws HiveException {
    ExprNodeDesc childExpr = childExprList.get(0);
    int identityCol;
    TypeInfo identityTypeInfo;
    DataTypePhysicalVariation identityDataTypePhysicalVariation;
    VectorExpression v1 = null;
    if (childExpr instanceof ExprNodeGenericFuncDesc) {
        v1 = getVectorExpression(childExpr);
        identityCol = v1.getOutputColumnNum();
        identityTypeInfo = v1.getOutputTypeInfo();
        identityDataTypePhysicalVariation = v1.getOutputDataTypePhysicalVariation();
    } else if (childExpr instanceof ExprNodeColumnDesc) {
        ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
        identityCol = getInputColumnIndex(colDesc.getColumn());
        identityTypeInfo = colDesc.getTypeInfo();
        // CONSIDER: Validation of type information
        identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol);
    } else {
        throw new HiveException("Expression not supported: " + childExpr);
    }
    VectorExpression ve = new IdentityExpression(identityCol);
    if (v1 != null) {
        ve.setChildExpressions(new VectorExpression[] { v1 });
    }
    ve.setInputTypeInfos(identityTypeInfo);
    ve.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
    ve.setOutputTypeInfo(identityTypeInfo);
    ve.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
    return ve;
}
Also used : DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)

Example 3 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizationContext method getColumnVectorExpression.

private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException {
    int columnNum = getInputColumnIndex(exprDesc.getColumn());
    VectorExpression expr = null;
    switch(mode) {
        case FILTER:
            // Evaluate the column as a boolean, converting if necessary.
            TypeInfo typeInfo = exprDesc.getTypeInfo();
            if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
                expr = new SelectColumnIsTrue(columnNum);
            } else {
                // Ok, we need to convert.
                ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1);
                exprAsList.add(exprDesc);
                // First try our cast method that will handle a few special cases.
                VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList);
                if (castToBooleanExpr == null) {
                    // Ok, try the UDF.
                    castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
                    if (castToBooleanExpr == null) {
                        throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean");
                    }
                }
                final int outputColumnNum = castToBooleanExpr.getOutputColumnNum();
                expr = new SelectColumnIsTrue(outputColumnNum);
                expr.setChildExpressions(new VectorExpression[] { castToBooleanExpr });
                expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo());
                expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
            }
            break;
        case PROJECTION:
            {
                expr = new IdentityExpression(columnNum);
                TypeInfo identityTypeInfo = exprDesc.getTypeInfo();
                DataTypePhysicalVariation identityDataTypePhysicalVariation = getDataTypePhysicalVariation(columnNum);
                expr.setInputTypeInfos(identityTypeInfo);
                expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
                expr.setOutputTypeInfo(identityTypeInfo);
                expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
            }
            break;
    }
    return expr;
}
Also used : DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 4 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizedRowBatchCtx method createVectorizedRowBatch.

/**
 * Creates a Vectorized row batch and the column vectors.
 *
 * @return VectorizedRowBatch
 * @throws HiveException
 */
public VectorizedRowBatch createVectorizedRowBatch() {
    final int nonScratchColumnCount = rowColumnTypeInfos.length;
    final int totalColumnCount = nonScratchColumnCount + scratchColumnTypeNames.length;
    VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount);
    if (dataColumnNums == null) {
        // All data and partition columns.
        for (int i = 0; i < nonScratchColumnCount; i++) {
            result.cols[i] = createColumnVectorFromRowColumnTypeInfos(i);
        }
    } else {
        // Create only needed/included columns data columns.
        for (int i = 0; i < dataColumnNums.length; i++) {
            int columnNum = dataColumnNums[i];
            Preconditions.checkState(columnNum < nonScratchColumnCount);
            result.cols[columnNum] = createColumnVectorFromRowColumnTypeInfos(columnNum);
        }
        // Always create partition and virtual columns.
        final int partitionEndColumnNum = dataColumnCount + partitionColumnCount;
        for (int partitionColumnNum = dataColumnCount; partitionColumnNum < partitionEndColumnNum; partitionColumnNum++) {
            result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(rowColumnTypeInfos[partitionColumnNum]);
        }
        final int virtualEndColumnNum = partitionEndColumnNum + virtualColumnCount;
        for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < virtualEndColumnNum; virtualColumnNum++) {
            String virtualColumnName = rowColumnNames[virtualColumnNum];
            if (!isVirtualColumnNeeded(virtualColumnName)) {
                continue;
            }
            result.cols[virtualColumnNum] = VectorizedBatchUtil.createColumnVector(rowColumnTypeInfos[virtualColumnNum]);
        }
    }
    for (int i = 0; i < scratchColumnTypeNames.length; i++) {
        String typeName = scratchColumnTypeNames[i];
        DataTypePhysicalVariation dataTypePhysicalVariation = scratchDataTypePhysicalVariations[i];
        result.cols[nonScratchColumnCount + i] = VectorizedBatchUtil.createColumnVector(typeName, dataTypePhysicalVariation);
    }
    // UNDONE: Also remember virtualColumnCount...
    result.setPartitionInfo(dataColumnCount, partitionColumnCount);
    result.reset();
    return result;
}
Also used : DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)

Example 5 with DataTypePhysicalVariation

use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.

the class VectorizationContext method createVectorExpression.

private VectorExpression createVectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType) throws HiveException {
    int numChildren = childExpr == null ? 0 : childExpr.size();
    TypeInfo[] inputTypeInfos = new TypeInfo[numChildren];
    DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren];
    List<VectorExpression> children = new ArrayList<VectorExpression>();
    Object[] arguments = new Object[numChildren];
    for (int i = 0; i < numChildren; i++) {
        ExprNodeDesc child = childExpr.get(i);
        TypeInfo childTypeInfo = child.getTypeInfo();
        inputTypeInfos[i] = childTypeInfo;
        // Assume.
        inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
        if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) {
            VectorExpression vChild = getVectorExpression(child, childrenMode);
            children.add(vChild);
            arguments[i] = vChild.getOutputColumnNum();
            // Update.
            inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
        } else if (child instanceof ExprNodeColumnDesc) {
            int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
            if (childTypeInfo instanceof DecimalTypeInfo) {
                // In this method, we must only process non-Decimal64 column vectors.
                // Convert Decimal64 columns to regular decimal.
                DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
                if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
                    // FUTURE: Can we reuse this conversion?
                    VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo);
                    children.add(vChild);
                    arguments[i] = vChild.getOutputColumnNum();
                    // Update.
                    inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
                    continue;
                }
            }
            if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
                // In filter mode, the column must be a boolean
                SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex);
                selectColumnIsTrue.setInputTypeInfos(childTypeInfo);
                selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
                children.add(selectColumnIsTrue);
            }
            arguments[i] = colIndex;
        } else if (child instanceof ExprNodeConstantDesc) {
            Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
            arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue;
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue();
        } else {
            throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
        }
    }
    VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
    if (vectorExpression == null) {
        handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
    }
    vectorExpression.setInputTypeInfos(inputTypeInfos);
    vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
    if ((vectorExpression != null) && !children.isEmpty()) {
        vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
    }
    for (VectorExpression ve : children) {
        ocm.freeOutputColumn(ve.getOutputColumnNum());
    }
    return vectorExpression;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)10 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)7 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)7 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)6 ArrayList (java.util.ArrayList)5 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)5 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)5 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)5 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 ExprNodeDynamicValueDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc)2 List (java.util.List)1 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)1 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)1 VectorUDFArgDesc (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc)1 ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)1 UDFToString (org.apache.hadoop.hive.ql.udf.UDFToString)1