Search in sources :

Example 6 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class SyntheticJoinPredicate method createFilter.

// insert filter operator between target(child) and input(parent)
private static Operator<FilterDesc> createFilter(Operator<?> target, Operator<?> parent, RowSchema parentRS, ExprNodeDesc filterExpr) {
    FilterDesc filterDesc = new FilterDesc(filterExpr, false);
    filterDesc.setSyntheticJoinPredicate(true);
    Operator<FilterDesc> filter = OperatorFactory.get(parent.getCompilationOpContext(), filterDesc, new RowSchema(parentRS.getSignature()));
    filter.getParentOperators().add(parent);
    filter.getChildOperators().add(target);
    parent.replaceChild(target, filter);
    target.replaceParent(parent, filter);
    return filter;
}
Also used : FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema)

Example 7 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class SemiJoinReductionMerge method transform.

public ParseContext transform(ParseContext parseContext) throws SemanticException {
    Map<ReduceSinkOperator, SemiJoinBranchInfo> allSemijoins = parseContext.getRsToSemiJoinBranchInfo();
    if (allSemijoins.isEmpty()) {
        return parseContext;
    }
    HiveConf hiveConf = parseContext.getConf();
    for (Entry<SJSourceTarget, List<ReduceSinkOperator>> sjMergeCandidate : createMergeCandidates(allSemijoins)) {
        final List<ReduceSinkOperator> sjBranches = sjMergeCandidate.getValue();
        if (sjBranches.size() < 2) {
            continue;
        }
        List<SelectOperator> selOps = new ArrayList<>(sjBranches.size());
        for (ReduceSinkOperator rs : sjBranches) {
            selOps.add(OperatorUtils.ancestor(rs, SelectOperator.class, 0, 0, 0, 0));
        }
        long sjEntriesHint = extractBloomEntriesHint(sjBranches);
        SelectOperator selectOp = mergeSelectOps(sjMergeCandidate.getKey().source, selOps);
        GroupByOperator gbPartialOp = createGroupBy(selectOp, selectOp, GroupByDesc.Mode.HASH, sjEntriesHint, hiveConf);
        ReduceSinkOperator rsPartialOp = createReduceSink(gbPartialOp, NullOrdering.defaultNullOrder(hiveConf));
        rsPartialOp.getConf().setReducerTraits(EnumSet.of(ReduceSinkDesc.ReducerTraits.QUICKSTART));
        GroupByOperator gbCompleteOp = createGroupBy(selectOp, rsPartialOp, GroupByDesc.Mode.FINAL, sjEntriesHint, hiveConf);
        ReduceSinkOperator rsCompleteOp = createReduceSink(gbCompleteOp, NullOrdering.defaultNullOrder(hiveConf));
        final TableScanOperator sjTargetTable = sjMergeCandidate.getKey().target;
        SemiJoinBranchInfo sjInfo = new SemiJoinBranchInfo(sjTargetTable, false);
        parseContext.getRsToSemiJoinBranchInfo().put(rsCompleteOp, sjInfo);
        // Save the info that is required at query time to resolve dynamic/runtime values.
        RuntimeValuesInfo valuesInfo = createRuntimeValuesInfo(rsCompleteOp, sjBranches, parseContext);
        parseContext.getRsToRuntimeValuesInfoMap().put(rsCompleteOp, valuesInfo);
        ExprNodeGenericFuncDesc sjPredicate = createSemiJoinPredicate(sjBranches, valuesInfo, parseContext);
        // Update filter operators with the new semi-join predicate
        for (Operator<?> op : sjTargetTable.getChildOperators()) {
            if (op instanceof FilterOperator) {
                FilterDesc filter = ((FilterOperator) op).getConf();
                filter.setPredicate(and(filter.getPredicate(), sjPredicate));
            }
        }
        // Update tableScan with the new semi-join predicate
        sjTargetTable.getConf().setFilterExpr(and(sjTargetTable.getConf().getFilterExpr(), sjPredicate));
        for (ReduceSinkOperator rs : sjBranches) {
            GenTezUtils.removeSemiJoinOperator(parseContext, rs, sjTargetTable);
            GenTezUtils.removeBranch(rs);
        }
    }
    return parseContext;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SemiJoinBranchInfo(org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) List(java.util.List) ArrayList(java.util.ArrayList)

Example 8 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class SemanticAnalyzer method genNotNullFilterForJoinSourcePlan.

/*
   * for inner joins push a 'is not null predicate' to the join sources for
   * every non nullSafe predicate.
   */
private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException {
    if (qb == null || joinTree == null) {
        return input;
    }
    if (!joinTree.getNoOuterJoin()) {
        return input;
    }
    if (joinKeys == null || joinKeys.length == 0) {
        return input;
    }
    Multimap<Integer, ExprNodeColumnDesc> hashes = ArrayListMultimap.create();
    if (input instanceof FilterOperator) {
        ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc) input.getConf()).getPredicate()), hashes);
    }
    ExprNodeDesc filterPred = null;
    List<Boolean> nullSafes = joinTree.getNullSafes();
    for (int i = 0; i < joinKeys.length; i++) {
        if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc && ((ExprNodeColumnDesc) joinKeys[i]).getIsPartitionColOrVirtualCol())) {
            // virtual column, since those columns can never be null.
            continue;
        }
        boolean skip = false;
        for (ExprNodeColumnDesc node : hashes.get(joinKeys[i].hashCode())) {
            if (node.isSame(joinKeys[i])) {
                skip = true;
                break;
            }
        }
        if (skip) {
            // there is already a predicate on this src.
            continue;
        }
        List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
        args.add(joinKeys[i]);
        ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("isnotnull").getGenericUDF(), args);
        filterPred = filterPred == null ? nextExpr : ExprNodeDescUtils.mergePredicates(filterPred, nextExpr);
    }
    if (filterPred == null) {
        return input;
    }
    OpParseContext inputCtx = opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    if (input instanceof FilterOperator) {
        FilterOperator f = (FilterOperator) input;
        List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
        preds.add(f.getConf().getPredicate());
        preds.add(filterPred);
        f.getConf().setPredicate(ExprNodeDescUtils.mergePredicates(preds));
        return input;
    }
    FilterDesc filterDesc = new FilterDesc(filterPred, false);
    filterDesc.setGenerated(true);
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    LOG.debug("Created Filter Plan for {} row schema: {}", qb.getId(), inputRR);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 9 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class SemanticAnalyzer method genConstraintsPlan.

private Operator genConstraintsPlan(String dest, QB qb, Operator input) throws SemanticException {
    if (deleting(dest)) {
        // for DELETE statements NOT NULL constraint need not be checked
        return input;
    }
    if (updating(dest) && isCBOExecuted() && this.ctx.getOperation() != Context.Operation.MERGE) {
        // for UPDATE statements CBO already added and pushed down the constraints
        return input;
    }
    // MERGE statements could have inserted a cardinality violation branch, we need to avoid that
    if (mergeCardinalityViolationBranch(input)) {
        return input;
    }
    // if this is an insert into statement we might need to add constraint check
    assert (input.getParentOperators().size() == 1);
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    Table targetTable = getTargetTable(qb, dest);
    ExprNodeDesc combinedConstraintExpr = ExprNodeTypeCheck.genConstraintsExpr(conf, targetTable, updating(dest), inputRR);
    if (combinedConstraintExpr != null) {
        return putOpInsertMap(OperatorFactory.getAndMakeChild(new FilterDesc(combinedConstraintExpr, false), new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    }
    return input;
}
Also used : FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) Table(org.apache.hadoop.hive.ql.metadata.Table) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 10 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class SemanticAnalyzer method genFilterPlan.

/**
 * create a filter plan. The condition and the inputs are specified.
 *
 * @param qb
 *          current query block
 * @param condn
 *          The condition to be resolved
 * @param input
 *          the input operator
 */
@SuppressWarnings("nls")
private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, boolean useCaching) throws SemanticException {
    OpParseContext inputCtx = opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    ExprNodeDesc filterCond = genExprNodeDesc(condn, inputRR, useCaching, isCBOExecuted());
    if (filterCond instanceof ExprNodeConstantDesc) {
        ExprNodeConstantDesc c = (ExprNodeConstantDesc) filterCond;
        if (Boolean.TRUE.equals(c.getValue())) {
            // If filter condition is TRUE, we ignore it
            return input;
        }
        if (ExprNodeDescUtils.isNullConstant(c)) {
            // If filter condition is NULL, transform to FALSE
            filterCond = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false);
        }
    }
    if (!filterCond.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo)) {
        // convert the result of the condition to a boolean value.
        if (filterCond.getTypeInfo().getCategory() == ObjectInspector.Category.PRIMITIVE) {
            // For primitive types like string/double/timestamp, try to cast the result of
            // the child expression to a boolean.
            filterCond = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(filterCond, TypeInfoFactory.booleanTypeInfo);
        } else {
            // For complex types like map/list/struct, create a isnotnull function on the child expression.
            filterCond = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("isnotnull", filterCond);
        }
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new FilterDesc(filterCond, false), new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    ctx.getPlanMapper().link(condn, output);
    LOG.debug("Created Filter Plan for {} row schema: {}", qb.getId(), inputRR.toString());
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)33 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)23 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)16 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)14 ArrayList (java.util.ArrayList)13 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 Operator (org.apache.hadoop.hive.ql.exec.Operator)11 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)9 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)8 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)8 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)7 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)7 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)7 List (java.util.List)6 Path (org.apache.hadoop.fs.Path)5 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)5 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)5 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)4 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)4