Search in sources :

Example 1 with ExprNodeDynamicValueDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.

the class VectorizationContext method getBetweenExpression.

/* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case
   * because the NOT is actually specified in the expression tree as the first argument,
   * and we don't want any runtime cost for that. So creating the VectorExpression
   * needs to be done differently than the standard way where all arguments are
   * passed to the VectorExpression constructor.
   */
private VectorExpression getBetweenExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    boolean hasDynamicValues = false;
    // We don't currently support the BETWEEN ends being columns.  They must be scalars.
    if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) {
        hasDynamicValues = true;
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            // Projection mode is not applicable.
            return null;
        }
    } else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
        return null;
    }
    boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
    ExprNodeDesc colExpr = childExpr.get(1);
    // The children after not, might need a cast. Get common types for the two comparisons.
    // Casting for 'between' is handled here as a special case, because the first child is for NOT and doesn't need
    // cast
    TypeInfo commonType = FunctionRegistry.getCommonClassForComparison(childExpr.get(1).getTypeInfo(), childExpr.get(2).getTypeInfo());
    if (commonType == null) {
        // Can't vectorize
        return null;
    }
    commonType = FunctionRegistry.getCommonClassForComparison(commonType, childExpr.get(3).getTypeInfo());
    if (commonType == null) {
        // Can't vectorize
        return null;
    }
    List<ExprNodeDesc> castChildren = new ArrayList<>();
    boolean wereCastUdfs = false;
    Category commonTypeCategory = commonType.getCategory();
    for (ExprNodeDesc desc : childExpr.subList(1, 4)) {
        TypeInfo childTypeInfo = desc.getTypeInfo();
        Category childCategory = childTypeInfo.getCategory();
        if (childCategory != commonTypeCategory) {
            return null;
        }
        final boolean isNeedsCast;
        if (commonTypeCategory == Category.PRIMITIVE) {
            // Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category.
            // Otherwise, we generate unnecessary casts.
            isNeedsCast = ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() != ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory();
        } else {
            isNeedsCast = !commonType.equals(desc.getTypeInfo());
        }
        if (!isNeedsCast) {
            castChildren.add(desc);
        } else {
            GenericUDF castUdf = getGenericUDFForCast(commonType);
            ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc }));
            castChildren.add(engfd);
            wereCastUdfs = true;
        }
    }
    String colType = commonType.getTypeName();
    // prepare arguments for createVectorExpression
    List<ExprNodeDesc> childrenAfterNot = evaluateCastOnConstants(castChildren);
    // determine class
    Class<?> cl = null;
    if (isIntFamily(colType) && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = LongColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterLongColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
        }
    } else if (isIntFamily(colType) && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = LongColumnNotBetween.class;
        } else {
            cl = FilterLongColumnNotBetween.class;
        }
    } else if (isFloatFamily(colType) && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = DoubleColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterDoubleColumnBetweenDynamicValue.class : FilterDoubleColumnBetween.class);
        }
    } else if (isFloatFamily(colType) && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = DoubleColumnNotBetween.class;
        } else {
            cl = FilterDoubleColumnNotBetween.class;
        }
    } else if (colType.equals("string") && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = StringColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterStringColumnBetweenDynamicValue.class : FilterStringColumnBetween.class);
        }
    } else if (colType.equals("string") && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = StringColumnNotBetween.class;
        } else {
            cl = FilterStringColumnNotBetween.class;
        }
    } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = VarCharColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterVarCharColumnBetweenDynamicValue.class : FilterVarCharColumnBetween.class);
        }
    } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = VarCharColumnNotBetween.class;
        } else {
            cl = FilterVarCharColumnNotBetween.class;
        }
    } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = CharColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterCharColumnBetweenDynamicValue.class : FilterCharColumnBetween.class);
        }
    } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = CharColumnNotBetween.class;
        } else {
            cl = FilterCharColumnNotBetween.class;
        }
    } else if (colType.equals("timestamp") && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = TimestampColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterTimestampColumnBetweenDynamicValue.class : FilterTimestampColumnBetween.class);
        }
    } else if (colType.equals("timestamp") && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = TimestampColumnNotBetween.class;
        } else {
            cl = FilterTimestampColumnNotBetween.class;
        }
    } else if (isDecimalFamily(colType) && !notKeywordPresent) {
        final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
        if (tryDecimal64) {
            VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
            false, colExpr, childrenAfterNot, returnType);
            if (decimal64VecExpr != null) {
                return decimal64VecExpr;
            }
        }
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = DecimalColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterDecimalColumnBetweenDynamicValue.class : FilterDecimalColumnBetween.class);
        }
    } else if (isDecimalFamily(colType) && notKeywordPresent) {
        final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
        if (tryDecimal64) {
            VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
            true, colExpr, childrenAfterNot, returnType);
            if (decimal64VecExpr != null) {
                return decimal64VecExpr;
            }
        }
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = DecimalColumnNotBetween.class;
        } else {
            cl = FilterDecimalColumnNotBetween.class;
        }
    } else if (isDateFamily(colType) && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = LongColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterDateColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
        }
    } else if (isDateFamily(colType) && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = LongColumnNotBetween.class;
        } else {
            cl = FilterLongColumnNotBetween.class;
        }
    }
    return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
}
Also used : FilterDoubleColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) FilterVarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween) VarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharColumnNotBetween) StringColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColumnNotBetween) FilterStringColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) FilterVarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween) VarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharColumnNotBetween) CharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CharColumnNotBetween) FilterCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnNotBetween) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) FilterCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnNotBetween) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) FilterDoubleColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween) DoubleColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColumnNotBetween) CastStringToBoolean(org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) LongColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColumnNotBetween) FilterLongColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween) FilterDecimalColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColumnNotBetween) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) FilterVarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween) TimestampColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.TimestampColumnNotBetween) FilterTimestampColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween) FilterTimestampColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween) FilterStringColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) DecimalColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColumnNotBetween) FilterDecimalColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColumnNotBetween) FilterLongColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween)

Example 2 with ExprNodeDynamicValueDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.

the class SharedWorkOptimizer method extractConjsIgnoringDPPPreds.

private static Multiset<String> extractConjsIgnoringDPPPreds(ExprNodeDesc predicate) {
    List<ExprNodeDesc> conjsOp = ExprNodeDescUtils.split(predicate);
    Multiset<String> conjsOpString = TreeMultiset.create();
    for (int i = 0; i < conjsOp.size(); i++) {
        if (conjsOp.get(i) instanceof ExprNodeGenericFuncDesc) {
            ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) conjsOp.get(i);
            if (GenericUDFInBloomFilter.class == func.getGenericUDF().getClass()) {
                continue;
            } else if (GenericUDFBetween.class == func.getGenericUDF().getClass() && (func.getChildren().get(2) instanceof ExprNodeDynamicValueDesc || func.getChildren().get(3) instanceof ExprNodeDynamicValueDesc)) {
                continue;
            }
        } else if (conjsOp.get(i) instanceof ExprNodeDynamicListDesc) {
            continue;
        }
        conjsOpString.add(conjsOp.get(i).toString());
    }
    return conjsOpString;
}
Also used : ExprNodeDynamicListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 3 with ExprNodeDynamicValueDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.

the class SemiJoinReductionMerge method createSemiJoinPredicate.

/**
 * Creates the multi-column semi-join predicate that is applied on the target relation.
 *
 * Assuming that the target columns of the semi-join are fname, lname, and age, the generated predicates is:
 * <pre>
 *   fname BETWEEN ?min_fname AND ?max_fname and
 *   lname BETWEEN ?min_lname AND ?max_lname and
 *   age   BETWEEN ?min_age   AND ?max_age and
 *   IN_BLOOM_FILTER(HASH(fname,lname,age),?bloom_filter)
 * </pre>
 * where the question mark (?) indicates dynamic values bound at runtime.
 */
private static ExprNodeGenericFuncDesc createSemiJoinPredicate(List<ReduceSinkOperator> sjBranches, RuntimeValuesInfo sjValueInfo, ParseContext context) {
    // Performance note: To speed-up evaluation 'BETWEEN' predicates should come before the 'IN_BLOOM_FILTER'
    Deque<String> dynamicIds = new ArrayDeque<>(sjValueInfo.getDynamicValueIDs());
    List<ExprNodeDesc> sjPredicates = new ArrayList<>();
    List<ExprNodeDesc> hashArgs = new ArrayList<>();
    for (ReduceSinkOperator rs : sjBranches) {
        RuntimeValuesInfo info = context.getRsToRuntimeValuesInfoMap().get(rs);
        checkState(info.getTargetColumns().size() == 1, "Cannot handle multi-column semijoin branches.");
        final ExprNodeDesc targetColumn = info.getTargetColumns().get(0);
        TypeInfo typeInfo = targetColumn.getTypeInfo();
        DynamicValue minDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
        DynamicValue maxDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
        List<ExprNodeDesc> betweenArgs = Arrays.asList(// Use false to not invert between result
        new ExprNodeConstantDesc(Boolean.FALSE), targetColumn, new ExprNodeDynamicValueDesc(minDynamic), new ExprNodeDynamicValueDesc(maxDynamic));
        ExprNodeDesc betweenExp = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFBetween(), "between", betweenArgs);
        sjPredicates.add(betweenExp);
        hashArgs.add(targetColumn);
    }
    ExprNodeDesc hashExp = ExprNodeDescUtils.murmurHash(hashArgs);
    assert dynamicIds.size() == 1 : "There should be one column left untreated the one with the bloom filter";
    DynamicValue bloomDynamic = new DynamicValue(dynamicIds.poll(), TypeInfoFactory.binaryTypeInfo);
    sjPredicates.add(new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFInBloomFilter(), "in_bloom_filter", Arrays.asList(hashExp, new ExprNodeDynamicValueDesc(bloomDynamic))));
    return and(sjPredicates);
}
Also used : GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ArrayDeque(java.util.ArrayDeque) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GenericUDFInBloomFilter(org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DynamicValue(org.apache.hadoop.hive.ql.plan.DynamicValue)

Example 4 with ExprNodeDynamicValueDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.

the class TezCompiler method sortSemijoinFilters.

/**
 * Sort semijoin filters depending on the benefit (computed depending on selectivity and cost)
 * that they provide. We create three blocks: first all normal predicates, second between clauses
 * for the min/max dynamic values, and finally the in bloom filter predicates. The intuition is
 * that evaluating the between clause will be cheaper than evaluating the bloom filter predicates.
 * Hence, after this method runs, normal predicates come first (possibly sorted by Calcite),
 * then we will have sorted between clauses, and finally sorted in bloom filter clauses.
 */
private static void sortSemijoinFilters(OptimizeTezProcContext procCtx, ListMultimap<FilterOperator, SemijoinOperatorInfo> globalReductionFactorMap) throws SemanticException {
    for (Entry<FilterOperator, Collection<SemijoinOperatorInfo>> e : globalReductionFactorMap.asMap().entrySet()) {
        FilterOperator filterOp = e.getKey();
        Collection<SemijoinOperatorInfo> semijoinInfos = e.getValue();
        ExprNodeDesc pred = filterOp.getConf().getPredicate();
        if (FunctionRegistry.isOpAnd(pred)) {
            LinkedHashSet<ExprNodeDesc> allPreds = new LinkedHashSet<>(pred.getChildren());
            List<ExprNodeDesc> betweenPreds = new ArrayList<>();
            List<ExprNodeDesc> inBloomFilterPreds = new ArrayList<>();
            // We check whether we can find semijoin predicates
            for (SemijoinOperatorInfo roi : semijoinInfos) {
                for (ExprNodeDesc expr : pred.getChildren()) {
                    if (FunctionRegistry.isOpBetween(expr) && expr.getChildren().get(2) instanceof ExprNodeDynamicValueDesc) {
                        // BETWEEN in SJ
                        String dynamicValueIdFromExpr = ((ExprNodeDynamicValueDesc) expr.getChildren().get(2)).getDynamicValue().getId();
                        List<String> dynamicValueIdsFromMap = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(roi.rsOperator).getDynamicValueIDs();
                        for (String dynamicValueIdFromMap : dynamicValueIdsFromMap) {
                            if (dynamicValueIdFromExpr.equals(dynamicValueIdFromMap)) {
                                betweenPreds.add(expr);
                                allPreds.remove(expr);
                                break;
                            }
                        }
                    } else if (FunctionRegistry.isOpInBloomFilter(expr) && expr.getChildren().get(1) instanceof ExprNodeDynamicValueDesc) {
                        // IN_BLOOM_FILTER in SJ
                        String dynamicValueIdFromExpr = ((ExprNodeDynamicValueDesc) expr.getChildren().get(1)).getDynamicValue().getId();
                        List<String> dynamicValueIdsFromMap = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(roi.rsOperator).getDynamicValueIDs();
                        for (String dynamicValueIdFromMap : dynamicValueIdsFromMap) {
                            if (dynamicValueIdFromExpr.equals(dynamicValueIdFromMap)) {
                                inBloomFilterPreds.add(expr);
                                allPreds.remove(expr);
                                break;
                            }
                        }
                    }
                }
            }
            // First rest of predicates
            List<ExprNodeDesc> newAndArgs = new ArrayList<>(allPreds);
            // Then sorted between predicates
            newAndArgs.addAll(betweenPreds);
            // Finally, sorted in bloom predicates
            newAndArgs.addAll(inBloomFilterPreds);
            ExprNodeDesc andExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and").getGenericUDF(), newAndArgs);
            filterOp.getConf().setPredicate(andExpr);
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 5 with ExprNodeDynamicValueDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.

the class VectorizationContext method createVectorExpression.

private VectorExpression createVectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType) throws HiveException {
    int numChildren = childExpr == null ? 0 : childExpr.size();
    TypeInfo[] inputTypeInfos = new TypeInfo[numChildren];
    DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren];
    List<VectorExpression> children = new ArrayList<VectorExpression>();
    Object[] arguments = new Object[numChildren];
    for (int i = 0; i < numChildren; i++) {
        ExprNodeDesc child = childExpr.get(i);
        TypeInfo childTypeInfo = child.getTypeInfo();
        inputTypeInfos[i] = childTypeInfo;
        // Assume.
        inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
        if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) {
            VectorExpression vChild = getVectorExpression(child, childrenMode);
            children.add(vChild);
            arguments[i] = vChild.getOutputColumnNum();
            // Update.
            inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
        } else if (child instanceof ExprNodeColumnDesc) {
            int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
            if (childTypeInfo instanceof DecimalTypeInfo) {
                // In this method, we must only process non-Decimal64 column vectors.
                // Convert Decimal64 columns to regular decimal.
                DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
                if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
                    // FUTURE: Can we reuse this conversion?
                    VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo);
                    children.add(vChild);
                    arguments[i] = vChild.getOutputColumnNum();
                    // Update.
                    inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
                    continue;
                }
            }
            if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
                // In filter mode, the column must be a boolean
                SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex);
                selectColumnIsTrue.setInputTypeInfos(childTypeInfo);
                selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
                children.add(selectColumnIsTrue);
            }
            arguments[i] = colIndex;
        } else if (child instanceof ExprNodeConstantDesc) {
            Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
            arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue;
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue();
        } else {
            throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
        }
    }
    VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
    if (vectorExpression == null) {
        handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
    }
    vectorExpression.setInputTypeInfos(inputTypeInfos);
    vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
    if ((vectorExpression != null) && !children.isEmpty()) {
        vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
    }
    for (VectorExpression ve : children) {
        ocm.freeOutputColumn(ve.getOutputColumnNum());
    }
    return vectorExpression;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 ExprNodeDynamicValueDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc)11 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)10 ArrayList (java.util.ArrayList)9 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)8 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)7 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)5 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)5 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)5 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)5 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)5 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)4 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)4 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)4 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)3 CastBooleanToStringViaLongToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString)3 CastDateToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString)3