Search in sources :

Example 1 with GenericUDFOPOr

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr in project hive by apache.

the class SharedWorkOptimizer method pushFilterToTopOfTableScan.

private static void pushFilterToTopOfTableScan(SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp) throws UDFArgumentException {
    ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr();
    List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators());
    for (Operator<? extends OperatorDesc> op : allChildren) {
        if (op instanceof FilterOperator) {
            FilterOperator filterOp = (FilterOperator) op;
            ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
            if (tableScanExprNode.isSame(filterExprNode)) {
                // We do not need to do anything
                return;
            }
            if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
                for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
                    if (childExprNode.isSame(filterExprNode)) {
                        // so probably we pushed previously
                        return;
                    }
                }
            }
            ExprNodeGenericFuncDesc newPred = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPAnd(), Arrays.<ExprNodeDesc>asList(tableScanExprNode.clone(), filterExprNode));
            filterOp.getConf().setPredicate(newPred);
        } else {
            Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(), new FilterDesc(tableScanExprNode.clone(), false), new RowSchema(tsOp.getSchema().getSignature()));
            tsOp.replaceChild(op, newOp);
            newOp.getParentOperators().add(tsOp);
            op.replaceParent(tsOp, newOp);
            newOp.getChildOperators().add(op);
            // Add to cache (same group as tsOp)
            optimizerCache.putIfWorkExists(newOp, tsOp);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)

Example 2 with GenericUDFOPOr

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr in project hive by apache.

the class TestAccumuloRangeGenerator method testRangeConjunctionWithDisjunction.

@Test
public void testRangeConjunctionWithDisjunction() throws Exception {
    // rowId >= 'h'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "h");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // rowId <= 'd'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "d");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // rowId >= 'q'
    ExprNodeDesc column3 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant3 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "q");
    List<ExprNodeDesc> children3 = Lists.newArrayList();
    children3.add(column3);
    children3.add(constant3);
    ExprNodeDesc node3 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children3);
    assertNotNull(node3);
    // Or UDF, (rowId <= 'd' or rowId >= 'q')
    List<ExprNodeDesc> orFilters = Lists.newArrayList();
    orFilters.add(node2);
    orFilters.add(node3);
    ExprNodeGenericFuncDesc orNode = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPOr(), orFilters);
    // And UDF, (rowId >= 'h' and (rowId <= 'd' or rowId >= 'q'))
    List<ExprNodeDesc> andFilters = Lists.newArrayList();
    andFilters.add(node);
    andFilters.add(orNode);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), andFilters);
    // Should generate ['q', +inf)
    List<Range> expectedRanges = Arrays.asList(new Range(new Key("q"), true, null, false));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid");
    SemanticDispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<SemanticRule, SemanticNodeProcessor>emptyMap(), null);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) Range(org.apache.accumulo.core.data.Range) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) Key(org.apache.accumulo.core.data.Key) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 3 with GenericUDFOPOr

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr in project hive by apache.

the class ProjectionPusher method pushProjectionsAndFilters.

private void pushProjectionsAndFilters(final JobConf jobConf, final String splitPath, final String splitPathWithNoSchema) {
    if (mapWork == null) {
        return;
    } else if (mapWork.getPathToAliases() == null) {
        return;
    }
    final Set<String> aliases = new HashSet<String>();
    try {
        List<String> a = HiveFileFormatUtils.getFromPathRecursively(mapWork.getPathToAliases(), new Path(splitPath), null, false, true);
        if (a != null) {
            aliases.addAll(a);
        }
        if (a == null || a.isEmpty()) {
            // TODO: not having aliases for path usually means some bug. Should it give up?
            LOG.warn("Couldn't find aliases for " + splitPath);
        }
    } catch (IllegalArgumentException | IOException e) {
        throw new RuntimeException(e);
    }
    // Collect the needed columns from all the aliases and create ORed filter
    // expression for the table.
    boolean allColumnsNeeded = false;
    boolean noFilters = false;
    Set<Integer> neededColumnIDs = new HashSet<Integer>();
    // To support nested column pruning, we need to track the path from the top to the nested
    // fields
    Set<String> neededNestedColumnPaths = new HashSet<String>();
    List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>();
    RowSchema rowSchema = null;
    for (String alias : aliases) {
        final Operator<? extends Serializable> op = mapWork.getAliasToWork().get(alias);
        if (op != null && op instanceof TableScanOperator) {
            final TableScanOperator ts = (TableScanOperator) op;
            if (ts.getNeededColumnIDs() == null) {
                allColumnsNeeded = true;
            } else {
                neededColumnIDs.addAll(ts.getNeededColumnIDs());
                if (ts.getNeededNestedColumnPaths() != null) {
                    neededNestedColumnPaths.addAll(ts.getNeededNestedColumnPaths());
                }
            }
            rowSchema = ts.getSchema();
            ExprNodeGenericFuncDesc filterExpr = ts.getConf() == null ? null : ts.getConf().getFilterExpr();
            // No filter if any TS has no filter expression
            noFilters = filterExpr == null;
            filterExprs.add(filterExpr);
        }
    }
    ExprNodeGenericFuncDesc tableFilterExpr = null;
    if (!noFilters) {
        try {
            for (ExprNodeGenericFuncDesc filterExpr : filterExprs) {
                if (tableFilterExpr == null) {
                    tableFilterExpr = filterExpr;
                } else {
                    tableFilterExpr = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(), Arrays.<ExprNodeDesc>asList(tableFilterExpr, filterExpr));
                }
            }
        } catch (UDFArgumentException ex) {
            LOG.debug("Turn off filtering due to " + ex);
            tableFilterExpr = null;
        }
    }
    // push down projections
    if (!allColumnsNeeded) {
        if (!neededColumnIDs.isEmpty()) {
            ColumnProjectionUtils.appendReadColumns(jobConf, new ArrayList<Integer>(neededColumnIDs));
            ColumnProjectionUtils.appendNestedColumnPaths(jobConf, new ArrayList<String>(neededNestedColumnPaths));
        }
    } else {
        ColumnProjectionUtils.setReadAllColumns(jobConf);
    }
    pushFilters(jobConf, rowSchema, tableFilterExpr);
}
Also used : Path(org.apache.hadoop.fs.Path) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) IOException(java.io.IOException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) HashSet(java.util.HashSet)

Example 4 with GenericUDFOPOr

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr in project hive by apache.

the class ConstantPropagateProcFactory method foldNegative.

/**
 * Combines the logical not() operator with the child operator if possible.
 * @param desc the expression to be evaluated
 * @return  the new expression to be replaced
 * @throws UDFArgumentException
 */
private static ExprNodeDesc foldNegative(ExprNodeDesc desc) throws UDFArgumentException {
    if (desc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
        GenericUDF udf = funcDesc.getGenericUDF();
        if (udf instanceof GenericUDFOPNot) {
            ExprNodeDesc child = funcDesc.getChildren().get(0);
            if (child instanceof ExprNodeGenericFuncDesc) {
                ExprNodeGenericFuncDesc childDesc = (ExprNodeGenericFuncDesc) child;
                GenericUDF childUDF = childDesc.getGenericUDF();
                List<ExprNodeDesc> grandChildren = child.getChildren();
                if (childUDF instanceof GenericUDFBaseCompare || childUDF instanceof GenericUDFOPNull || childUDF instanceof GenericUDFOPNotNull) {
                    List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
                    for (ExprNodeDesc grandChild : grandChildren) {
                        newGrandChildren.add(foldNegative(grandChild));
                    }
                    return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
                } else if (childUDF instanceof GenericUDFOPAnd || childUDF instanceof GenericUDFOPOr) {
                    List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
                    for (ExprNodeDesc grandChild : grandChildren) {
                        newGrandChildren.add(foldNegative(ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), Arrays.asList(grandChild))));
                    }
                    return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
                } else if (childUDF instanceof GenericUDFOPNot) {
                    return foldNegative(child.getChildren().get(0));
                } else {
                    // For operator like if() that cannot be handled, leave not() as it
                    // is and continue processing the children
                    List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
                    for (ExprNodeDesc grandChild : grandChildren) {
                        newGrandChildren.add(foldNegative(grandChild));
                    }
                    childDesc.setChildren(newGrandChildren);
                    return funcDesc;
                }
            }
        }
    }
    return desc;
}
Also used : GenericUDFOPNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFOPNotNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) List(java.util.List) ArrayList(java.util.ArrayList) GenericUDFOPNot(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)

Example 5 with GenericUDFOPOr

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr in project hive by apache.

the class PartitionPruner method compactExpr.

/**
 * Taking a partition pruning expression, remove the null operands and non-partition columns.
 * The reason why there are null operands is ExprProcFactory classes, for example
 * PPRColumnExprProcessor.
 * @param expr original partition pruning expression.
 * @return partition pruning expression that only contains partition columns.
 */
@VisibleForTesting
static ExprNodeDesc compactExpr(ExprNodeDesc expr) {
    // If this is a constant boolean expression, return the value.
    if (expr == null) {
        return null;
    }
    if (expr instanceof ExprNodeConstantDesc) {
        if (((ExprNodeConstantDesc) expr).getValue() == null) {
            return null;
        }
        if (!isBooleanExpr(expr)) {
            throw new IllegalStateException("Unexpected non-boolean ExprNodeConstantDesc: " + expr.getExprString(true));
        }
        return expr;
    } else if (expr instanceof ExprNodeColumnDesc) {
        return expr;
    } else if (expr instanceof ExprNodeGenericFuncDesc) {
        GenericUDF udf = ((ExprNodeGenericFuncDesc) expr).getGenericUDF();
        boolean isAnd = udf instanceof GenericUDFOPAnd;
        boolean isOr = udf instanceof GenericUDFOPOr;
        List<ExprNodeDesc> children = expr.getChildren();
        if (isAnd) {
            // Non-partition expressions are converted to nulls.
            List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
            boolean allTrue = true;
            for (ExprNodeDesc child : children) {
                ExprNodeDesc compactChild = compactExpr(child);
                if (compactChild != null) {
                    if (!isTrueExpr(compactChild)) {
                        newChildren.add(compactChild);
                        allTrue = false;
                    }
                    if (isFalseExpr(compactChild)) {
                        return new ExprNodeConstantDesc(Boolean.FALSE);
                    }
                } else {
                    allTrue = false;
                }
            }
            if (allTrue) {
                return new ExprNodeConstantDesc(Boolean.TRUE);
            }
            if (newChildren.size() == 0) {
                return null;
            }
            if (newChildren.size() == 1) {
                return newChildren.get(0);
            }
            // Nothing to compact, update expr with compacted children.
            ((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
        } else if (isOr) {
            // Non-partition expressions are converted to nulls.
            List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
            boolean allFalse = true;
            boolean isNull = false;
            for (ExprNodeDesc child : children) {
                ExprNodeDesc compactChild = compactExpr(child);
                if (compactChild != null) {
                    if (isTrueExpr(compactChild)) {
                        return new ExprNodeConstantDesc(Boolean.TRUE);
                    }
                    if (!isNull && !isFalseExpr(compactChild)) {
                        newChildren.add(compactChild);
                        allFalse = false;
                    }
                } else {
                    isNull = true;
                }
            }
            if (isNull) {
                return null;
            }
            if (allFalse) {
                return new ExprNodeConstantDesc(Boolean.FALSE);
            }
            if (newChildren.size() == 1) {
                return newChildren.get(0);
            }
            // Nothing to compact, update expr with compacted children.
            ((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
        }
        return expr;
    } else {
        throw new IllegalStateException("Unexpected type of ExprNodeDesc: " + expr.getExprString(true));
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) AbstractSequentialList(java.util.AbstractSequentialList) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)13 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)13 GenericUDFOPOr (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr)13 ArrayList (java.util.ArrayList)10 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)8 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)8 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)8 List (java.util.List)6 Test (org.junit.Test)6 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)4 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)3 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)3 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)3 FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)3 HashMap (java.util.HashMap)2 LinkedList (java.util.LinkedList)2 Range (org.apache.accumulo.core.data.Range)2