Search in sources :

Example 26 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan6.

@SuppressWarnings("unchecked")
private void populateMapRedPlan6(Table src) throws Exception {
    // map-side work
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("tkey")), Utilities.makeList(getStringColumn("tkey"), getStringColumn("tvalue")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
    Operator<ScriptDesc> op0 = OperatorFactory.get(new ScriptDesc("\'cat\'", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op1);
    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    mr.setReduceWork(rWork);
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan6.out"), Utilities.defaultTd, false));
    Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"), op3);
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op2);
    rWork.setReducer(op5);
}
Also used : ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 27 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class TestExecDriver method getTestFilterDesc.

private FilterDesc getTestFilterDesc(String column) throws Exception {
    ArrayList<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
    children1.add(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, column, "", false));
    ExprNodeDesc lhs = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, FunctionRegistry.getFunctionInfo(serdeConstants.DOUBLE_TYPE_NAME).getGenericUDF(), children1);
    ArrayList<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>();
    children2.add(new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(100)));
    ExprNodeDesc rhs = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, FunctionRegistry.getFunctionInfo(serdeConstants.DOUBLE_TYPE_NAME).getGenericUDF(), children2);
    ArrayList<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>();
    children3.add(lhs);
    children3.add(rhs);
    ExprNodeDesc desc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getFunctionInfo("<").getGenericUDF(), children3);
    return new FilterDesc(desc, false);
}
Also used : FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 28 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class TestExecDriver method populateMapPlan1.

@SuppressWarnings("unchecked")
private void populateMapPlan1(Table src) throws Exception {
    Operator<FileSinkDesc> op2 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapplan1.out"), Utilities.defaultTd, true));
    Operator<FilterDesc> op1 = OperatorFactory.get(getTestFilterDesc("key"), op2);
    addMapWork(mr, src, "a", op1);
}
Also used : Path(org.apache.hadoop.fs.Path) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 29 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class SharedWorkOptimizer method extractSharedOptimizationInfoForRoot.

private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator retainableTsOp, TableScanOperator discardableTsOp, boolean mayRemoveDownStreamOperators, boolean mayRemoveInputOps) throws SemanticException {
    LinkedHashSet<Operator<?>> retainableOps = new LinkedHashSet<>();
    LinkedHashSet<Operator<?>> discardableOps = new LinkedHashSet<>();
    Set<Operator<?>> discardableInputOps = new HashSet<>();
    long dataSize = 0L;
    long maxDataSize = 0L;
    retainableOps.add(retainableTsOp);
    discardableOps.add(discardableTsOp);
    Operator<?> equalOp1 = retainableTsOp;
    Operator<?> equalOp2 = discardableTsOp;
    if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
        // TODO: Support checking multiple child operators to merge further.
        discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
        return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
    }
    if (retainableTsOp.getChildOperators().size() == 0 || discardableTsOp.getChildOperators().size() == 0) {
        return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
    }
    Operator<?> currentOp1 = retainableTsOp.getChildOperators().get(0);
    Operator<?> currentOp2 = discardableTsOp.getChildOperators().get(0);
    // Special treatment for Filter operator that ignores the DPP predicates
    if (mayRemoveDownStreamOperators && currentOp1 instanceof FilterOperator && currentOp2 instanceof FilterOperator) {
        boolean equalFilters = false;
        FilterDesc op1Conf = ((FilterOperator) currentOp1).getConf();
        FilterDesc op2Conf = ((FilterOperator) currentOp2).getConf();
        if (op1Conf.getIsSamplingPred() == op2Conf.getIsSamplingPred() && StringUtils.equals(op1Conf.getSampleDescExpr(), op2Conf.getSampleDescExpr())) {
            Multiset<String> conjsOp1String = extractConjsIgnoringDPPPreds(op1Conf.getPredicate());
            Multiset<String> conjsOp2String = extractConjsIgnoringDPPPreds(op2Conf.getPredicate());
            if (conjsOp1String.equals(conjsOp2String)) {
                equalFilters = true;
            }
        }
        if (equalFilters) {
            equalOp1 = currentOp1;
            equalOp2 = currentOp2;
            retainableOps.add(equalOp1);
            discardableOps.add(equalOp2);
            if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
                // TODO: Support checking multiple child operators to merge further.
                discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
                discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
                return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
            }
            currentOp1 = currentOp1.getChildOperators().get(0);
            currentOp2 = currentOp2.getChildOperators().get(0);
        } else {
            // Bail out
            discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
            discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
            return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
        }
    }
    return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2, currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, mayRemoveDownStreamOperators, mayRemoveInputOps);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) LinkedHashSet(java.util.LinkedHashSet) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 30 with FilterDesc

use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.

the class SharedWorkOptimizer method pushFilterToTopOfTableScan.

private static void pushFilterToTopOfTableScan(SharedWorkOptimizerCache optimizerCache, DecomposedTs tsModel) throws UDFArgumentException {
    TableScanOperator tsOp = tsModel.ts;
    ExprNodeGenericFuncDesc tableScanExprNode = (ExprNodeGenericFuncDesc) tsModel.getFullFilterExpr();
    if (tableScanExprNode == null) {
        return;
    }
    List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators());
    childOperators: for (Operator<? extends OperatorDesc> op : allChildren) {
        if (optimizerCache.isKnownFilteringOperator(op)) {
            continue;
        }
        if (op instanceof FilterOperator) {
            FilterOperator filterOp = (FilterOperator) op;
            ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
            if (tableScanExprNode.isSame(filterExprNode)) {
                // We do not need to do anything
                optimizerCache.setKnownFilteringOperator(filterOp);
                continue;
            }
            if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
                for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
                    if (childExprNode.isSame(filterExprNode)) {
                        // We do not need to do anything, it is in the OR expression
                        // so probably we pushed previously
                        optimizerCache.setKnownFilteringOperator(filterOp);
                        continue childOperators;
                    }
                }
            }
            ExprNodeDesc newFilterExpr = conjunction(filterExprNode, tableScanExprNode);
            if (!isSame(filterOp.getConf().getPredicate(), newFilterExpr)) {
                filterOp.getConf().setPredicate(newFilterExpr);
            }
            optimizerCache.setKnownFilteringOperator(filterOp);
        } else {
            Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(), new FilterDesc(tableScanExprNode.clone(), false), new RowSchema(tsOp.getSchema().getSignature()));
            tsOp.replaceChild(op, newOp);
            newOp.getParentOperators().add(tsOp);
            op.replaceParent(tsOp, newOp);
            newOp.getChildOperators().add(op);
            // Add to cache (same group as tsOp)
            optimizerCache.putIfWorkExists(newOp, tsOp);
            optimizerCache.setKnownFilteringOperator(newOp);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr)

Aggregations

FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)33 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)23 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)16 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)14 ArrayList (java.util.ArrayList)13 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 Operator (org.apache.hadoop.hive.ql.exec.Operator)11 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)9 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)8 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)8 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)7 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)7 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)7 List (java.util.List)6 Path (org.apache.hadoop.fs.Path)5 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)5 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)5 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)4 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)4