Search in sources :

Example 11 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getGenericUdfVectorExpression.

private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
    childExpr = castedChildren;
    // First handle special cases.  If one of the special case methods cannot handle it,
    // it returns null.
    VectorExpression ve = null;
    if (udf instanceof GenericUDFBetween) {
        ve = getBetweenExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFIn) {
        ve = getInExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFIf) {
        ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFWhen) {
        ve = getWhenExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFOPPositive) {
        ve = getIdentityExpression(childExpr);
    } else if (udf instanceof GenericUDFCoalesce) {
        ve = getCoalesceExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFElt) {
        // Elt is a special case because it can take variable number of arguments.
        ve = getEltExpression(childExpr, returnType);
    } else if (udf instanceof GenericUDFGrouping) {
        ve = getGroupingExpression((GenericUDFGrouping) udf, childExpr, returnType);
    } else if (udf instanceof GenericUDFBridge) {
        ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFToString) {
        ve = getCastToString(childExpr, returnType);
    } else if (udf instanceof GenericUDFToDecimal) {
        ve = getCastToDecimal(childExpr, returnType);
    } else if (udf instanceof GenericUDFToChar) {
        ve = getCastToChar(childExpr, returnType);
    } else if (udf instanceof GenericUDFToVarchar) {
        ve = getCastToVarChar(childExpr, returnType);
    } else if (udf instanceof GenericUDFToBinary) {
        ve = getCastToBinary(childExpr, returnType);
    } else if (udf instanceof GenericUDFTimestamp) {
        ve = getCastToTimestamp((GenericUDFTimestamp) udf, childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFDate || udf instanceof GenericUDFToDate) {
        ve = getIdentityForDateToDate(childExpr, returnType);
    } else if (udf instanceof GenericUDFBucketNumber) {
        int outCol = ocm.allocateOutputColumn(returnType);
        ve = new BucketNumExpression(outCol);
        ve.setInputTypeInfos(returnType);
        ve.setOutputTypeInfo(returnType);
    } else if (udf instanceof GenericUDFCastFormat) {
        ve = getCastWithFormat(udf, childExpr, returnType);
    }
    if (ve != null) {
        return ve;
    }
    // Now do a general lookup
    Class<?> udfClass = udf.getClass();
    boolean isSubstituted = false;
    if (udf instanceof GenericUDFBridge) {
        udfClass = ((GenericUDFBridge) udf).getUdfClass();
        isSubstituted = true;
    }
    ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), udfClass, castedChildren, mode, returnType);
    return ve;
}
Also used : BucketNumExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)

Example 12 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getColumnVectorExpression.

private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException {
    int columnNum = getInputColumnIndex(exprDesc.getColumn());
    VectorExpression expr;
    switch(mode) {
        case FILTER:
            expr = getFilterOnBooleanColumnExpression(exprDesc, columnNum);
            break;
        case PROJECTION:
            {
                expr = new IdentityExpression(columnNum);
                TypeInfo identityTypeInfo = exprDesc.getTypeInfo();
                DataTypePhysicalVariation identityDataTypePhysicalVariation = getDataTypePhysicalVariation(columnNum);
                expr.setInputTypeInfos(identityTypeInfo);
                expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
                expr.setOutputTypeInfo(identityTypeInfo);
                expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
            }
            break;
        default:
            throw new RuntimeException("Unexpected mode " + mode);
    }
    return expr;
}
Also used : DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) IdentityExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 13 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getGenericUDFBridgeVectorExpression.

/**
 * Invoke special handling for expressions that can't be vectorized by regular
 * descriptor based lookup.
 */
private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    Class<? extends UDF> cl = udf.getUdfClass();
    VectorExpression ve = null;
    if (isCastToIntFamily(cl)) {
        PrimitiveCategory integerPrimitiveCategory = getAnyIntegerPrimitiveCategoryFromUdfClass(cl);
        ve = getCastToLongExpression(childExpr, integerPrimitiveCategory);
    } else if (isCastToBoolean(cl)) {
        ve = getCastToBooleanExpression(childExpr, mode);
    } else if (isCastToFloatFamily(cl)) {
        ve = getCastToDoubleExpression(cl, childExpr, returnType);
    }
    if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) {
        ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode);
    }
    return ve;
}
Also used : ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 14 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getCastToBooleanExpression.

private VectorExpression getCastToBooleanExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode) throws HiveException {
    ExprNodeDesc child = childExpr.get(0);
    TypeInfo inputTypeInfo = child.getTypeInfo();
    String inputType = inputTypeInfo.toString();
    if (child instanceof ExprNodeConstantDesc) {
        if (null == ((ExprNodeConstantDesc) child).getValue()) {
            return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, mode);
        }
        // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
        return null;
    }
    VectorExpression ve;
    // Long and double are handled using descriptors, string needs to be specially handled.
    if (isStringFamily(inputType)) {
        ve = createVectorExpression(CastStringToBoolean.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo, DataTypePhysicalVariation.NONE);
    } else {
        // Ok, try the UDF.
        ve = getVectorExpressionForUdf(null, UDFToBoolean.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
    }
    if (ve == null || mode == VectorExpressionDescriptor.Mode.PROJECTION) {
        return ve;
    }
    int outputColumnNum = ve.getOutputColumnNum();
    SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
    filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
    filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
    filterVectorExpr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
    return filterVectorExpr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) CastStringToBoolean(org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)

Example 15 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
    // Preparation for hybrid grace hash join
    this.tag = tag;
    if (scratchBatch == null) {
        scratchBatch = VectorizedBatchUtil.makeLike(inBatch);
    }
    if (null != bigTableFilterExpressions) {
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(inBatch);
        }
    }
    if (null != bigTableValueExpressions) {
        for (VectorExpression ve : bigTableValueExpressions) {
            ve.evaluate(inBatch);
        }
    }
    for (VectorExpression ve : keyExpressions) {
        ve.evaluate(inBatch);
    }
    keyWrapperBatch.evaluateBatch(inBatch);
    keyValues = keyWrapperBatch.getVectorHashKeyWrappers();
    // 
    for (batchIndex = 0; batchIndex < inBatch.size; ++batchIndex) {
        super.process(row, tag);
    }
    // Set these two to invalid values so any attempt to use them
    // outside the inner loop results in NPE/OutOfBounds errors
    batchIndex = -1;
    keyValues = null;
}
Also used : VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)57 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)44 ArrayList (java.util.ArrayList)43 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)38 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)32 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)30 Test (org.junit.Test)29 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)27 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)27 IOException (java.io.IOException)25 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)25 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)23 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)23 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)23 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)19 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)19 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)19 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)15