Search in sources :

Example 71 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class SemiJoinReductionMerge method createSemiJoinPredicate.

/**
 * Creates the multi-column semi-join predicate that is applied on the target relation.
 *
 * Assuming that the target columns of the semi-join are fname, lname, and age, the generated predicates is:
 * <pre>
 *   fname BETWEEN ?min_fname AND ?max_fname and
 *   lname BETWEEN ?min_lname AND ?max_lname and
 *   age   BETWEEN ?min_age   AND ?max_age and
 *   IN_BLOOM_FILTER(HASH(fname,lname,age),?bloom_filter)
 * </pre>
 * where the question mark (?) indicates dynamic values bound at runtime.
 */
private static ExprNodeGenericFuncDesc createSemiJoinPredicate(List<ReduceSinkOperator> sjBranches, RuntimeValuesInfo sjValueInfo, ParseContext context) {
    // Performance note: To speed-up evaluation 'BETWEEN' predicates should come before the 'IN_BLOOM_FILTER'
    Deque<String> dynamicIds = new ArrayDeque<>(sjValueInfo.getDynamicValueIDs());
    List<ExprNodeDesc> sjPredicates = new ArrayList<>();
    List<ExprNodeDesc> hashArgs = new ArrayList<>();
    for (ReduceSinkOperator rs : sjBranches) {
        RuntimeValuesInfo info = context.getRsToRuntimeValuesInfoMap().get(rs);
        checkState(info.getTargetColumns().size() == 1, "Cannot handle multi-column semijoin branches.");
        final ExprNodeDesc targetColumn = info.getTargetColumns().get(0);
        TypeInfo typeInfo = targetColumn.getTypeInfo();
        DynamicValue minDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
        DynamicValue maxDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
        List<ExprNodeDesc> betweenArgs = Arrays.asList(// Use false to not invert between result
        new ExprNodeConstantDesc(Boolean.FALSE), targetColumn, new ExprNodeDynamicValueDesc(minDynamic), new ExprNodeDynamicValueDesc(maxDynamic));
        ExprNodeDesc betweenExp = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFBetween(), "between", betweenArgs);
        sjPredicates.add(betweenExp);
        hashArgs.add(targetColumn);
    }
    ExprNodeDesc hashExp = ExprNodeDescUtils.murmurHash(hashArgs);
    assert dynamicIds.size() == 1 : "There should be one column left untreated the one with the bloom filter";
    DynamicValue bloomDynamic = new DynamicValue(dynamicIds.poll(), TypeInfoFactory.binaryTypeInfo);
    sjPredicates.add(new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFInBloomFilter(), "in_bloom_filter", Arrays.asList(hashExp, new ExprNodeDynamicValueDesc(bloomDynamic))));
    return and(sjPredicates);
}
Also used : GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ArrayDeque(java.util.ArrayDeque) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GenericUDFInBloomFilter(org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DynamicValue(org.apache.hadoop.hive.ql.plan.DynamicValue)

Example 72 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class SemiJoinReductionMerge method transform.

public ParseContext transform(ParseContext parseContext) throws SemanticException {
    Map<ReduceSinkOperator, SemiJoinBranchInfo> allSemijoins = parseContext.getRsToSemiJoinBranchInfo();
    if (allSemijoins.isEmpty()) {
        return parseContext;
    }
    HiveConf hiveConf = parseContext.getConf();
    for (Entry<SJSourceTarget, List<ReduceSinkOperator>> sjMergeCandidate : createMergeCandidates(allSemijoins)) {
        final List<ReduceSinkOperator> sjBranches = sjMergeCandidate.getValue();
        if (sjBranches.size() < 2) {
            continue;
        }
        List<SelectOperator> selOps = new ArrayList<>(sjBranches.size());
        for (ReduceSinkOperator rs : sjBranches) {
            selOps.add(OperatorUtils.ancestor(rs, SelectOperator.class, 0, 0, 0, 0));
        }
        long sjEntriesHint = extractBloomEntriesHint(sjBranches);
        SelectOperator selectOp = mergeSelectOps(sjMergeCandidate.getKey().source, selOps);
        GroupByOperator gbPartialOp = createGroupBy(selectOp, selectOp, GroupByDesc.Mode.HASH, sjEntriesHint, hiveConf);
        ReduceSinkOperator rsPartialOp = createReduceSink(gbPartialOp, NullOrdering.defaultNullOrder(hiveConf));
        rsPartialOp.getConf().setReducerTraits(EnumSet.of(ReduceSinkDesc.ReducerTraits.QUICKSTART));
        GroupByOperator gbCompleteOp = createGroupBy(selectOp, rsPartialOp, GroupByDesc.Mode.FINAL, sjEntriesHint, hiveConf);
        ReduceSinkOperator rsCompleteOp = createReduceSink(gbCompleteOp, NullOrdering.defaultNullOrder(hiveConf));
        final TableScanOperator sjTargetTable = sjMergeCandidate.getKey().target;
        SemiJoinBranchInfo sjInfo = new SemiJoinBranchInfo(sjTargetTable, false);
        parseContext.getRsToSemiJoinBranchInfo().put(rsCompleteOp, sjInfo);
        // Save the info that is required at query time to resolve dynamic/runtime values.
        RuntimeValuesInfo valuesInfo = createRuntimeValuesInfo(rsCompleteOp, sjBranches, parseContext);
        parseContext.getRsToRuntimeValuesInfoMap().put(rsCompleteOp, valuesInfo);
        ExprNodeGenericFuncDesc sjPredicate = createSemiJoinPredicate(sjBranches, valuesInfo, parseContext);
        // Update filter operators with the new semi-join predicate
        for (Operator<?> op : sjTargetTable.getChildOperators()) {
            if (op instanceof FilterOperator) {
                FilterDesc filter = ((FilterOperator) op).getConf();
                filter.setPredicate(and(filter.getPredicate(), sjPredicate));
            }
        }
        // Update tableScan with the new semi-join predicate
        sjTargetTable.getConf().setFilterExpr(and(sjTargetTable.getConf().getFilterExpr(), sjPredicate));
        for (ReduceSinkOperator rs : sjBranches) {
            GenTezUtils.removeSemiJoinOperator(parseContext, rs, sjTargetTable);
            GenTezUtils.removeBranch(rs);
        }
    }
    return parseContext;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SemiJoinBranchInfo(org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) List(java.util.List) ArrayList(java.util.ArrayList)

Example 73 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class HiveRexExecutorImpl method reduce.

@Override
public void reduce(RexBuilder rexBuilder, List<RexNode> constExps, List<RexNode> reducedValues) {
    RexNodeConverter rexNodeConverter = new RexNodeConverter(rexBuilder, rexBuilder.getTypeFactory());
    for (RexNode rexNode : constExps) {
        // initialize the converter
        ExprNodeConverter converter = new ExprNodeConverter("", null, null, null, new HashSet<>(), rexBuilder);
        // convert RexNode to ExprNodeGenericFuncDesc
        ExprNodeDesc expr = rexNode.accept(converter);
        if (expr instanceof ExprNodeGenericFuncDesc) {
            // folding the constant
            ExprNodeDesc constant = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc) expr);
            if (constant != null) {
                addExpressionToList(constant, rexNode, rexNodeConverter, reducedValues);
            } else {
                reducedValues.add(rexNode);
            }
        } else if (expr instanceof ExprNodeConstantDesc) {
            addExpressionToList(expr, rexNode, rexNodeConverter, reducedValues);
        } else {
            reducedValues.add(rexNode);
        }
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RexNodeConverter(org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RexNode(org.apache.calcite.rex.RexNode) ExprNodeConverter(org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter)

Example 74 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class SemanticAnalyzer method genSamplePredicate.

/**
 * Generates the sampling predicate from the TABLESAMPLE clause information.
 * This function uses the bucket column list to decide the expression inputs
 * to the predicate hash function in case useBucketCols is set to true,
 * otherwise the expression list stored in the TableSample is used. The bucket
 * columns of the table are used to generate this predicate in case no
 * expressions are provided on the TABLESAMPLE clause and the table has
 * clustering columns defined in it's metadata. The predicate created has the
 * following structure:
 *
 * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
 *
 * @param ts
 *          TABLESAMPLE clause information
 * @param bucketCols
 *          The clustering columns of the table
 * @param useBucketCols
 *          Flag to indicate whether the bucketCols should be used as input to
 *          the hash function
 * @param alias
 *          The alias used for the table in the row resolver
 * @param rwsch
 *          The row resolver used to resolve column references
 * @param planExpr
 *          The plan tree for the expression. If the user specified this, the
 *          parse expressions are not used
 * @return exprNodeDesc
 * @exception SemanticException
 */
private ExprNodeDesc genSamplePredicate(TableSample ts, List<String> bucketCols, boolean useBucketCols, String alias, RowResolver rwsch, ExprNodeDesc planExpr, int bucketingVersion) throws SemanticException {
    ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));
    ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));
    ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));
    List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
    if (planExpr != null) {
        args.add(planExpr);
    } else if (useBucketCols) {
        for (String col : bucketCols) {
            ColumnInfo ci = rwsch.get(alias, col);
            // TODO: change type to the one in the table schema
            args.add(new ExprNodeColumnDesc(ci));
        }
    } else {
        for (ASTNode expr : ts.getExprs()) {
            args.add(genExprNodeDesc(expr, rwsch));
        }
    }
    ExprNodeDesc equalsExpr = null;
    {
        ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, bucketingVersion == 2 ? new GenericUDFMurmurHash() : new GenericUDFHash(), args);
        LOG.info("hashfnExpr = " + hashfnExpr);
        ExprNodeDesc andExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
        LOG.info("andExpr = " + andExpr);
        ExprNodeDesc modExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("%", andExpr, denominatorExpr);
        LOG.info("modExpr = " + modExpr);
        LOG.info("numeratorExpr = " + numeratorExpr);
        equalsExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("==", modExpr, numeratorExpr);
        LOG.info("equalsExpr = " + equalsExpr);
    }
    return equalsExpr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFMurmurHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 75 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class TestHiveMetaStoreClientApiArgumentsChecker method testGetPartitionsByExpr.

@Test
public void testGetPartitionsByExpr() throws HiveException, TException {
    List<Partition> partitions = new ArrayList<>();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    hive.getPartitionsByExpr(t, node, hive.getConf(), partitions);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) Test(org.junit.Test)

Aggregations

ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)228 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)165 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)134 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)123 ArrayList (java.util.ArrayList)106 Test (org.junit.Test)92 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)49 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)44 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)38 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)37 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)30 List (java.util.List)29 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)28 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)26 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)24 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)23 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)22 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)22 HashMap (java.util.HashMap)21 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)21