Search in sources :

Example 41 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project phoenix by apache.

the class IndexPredicateAnalyzer method analyzeExpr.

private ExprNodeDesc analyzeExpr(ExprNodeGenericFuncDesc expr, List<IndexSearchCondition> searchConditions, Object... nodeOutputs) throws SemanticException {
    if (FunctionRegistry.isOpAnd(expr)) {
        assert (nodeOutputs.length == 2);
        ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
        ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1];
        if (residual1 == null) {
            return residual2;
        }
        if (residual2 == null) {
            return residual1;
        }
        List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>();
        residuals.add(residual1);
        residuals.add(residual2);
        return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getGenericUDFForAnd(), residuals);
    }
    GenericUDF genericUDF = expr.getGenericUDF();
    if (!(genericUDF instanceof GenericUDFBaseCompare)) {
        // 2015-10-22 Added by JeongMin Ju : Processing Between/In Operator
        if (genericUDF instanceof GenericUDFBetween) {
            // In case of not between, The value of first element of nodeOutputs is true.
            // otherwise false.
            processingBetweenOperator(expr, searchConditions, nodeOutputs);
            return expr;
        } else if (genericUDF instanceof GenericUDFIn) {
            // In case of not in operator, in operator exist as child of not operator.
            processingInOperator(expr, searchConditions, false, nodeOutputs);
            return expr;
        } else if (genericUDF instanceof GenericUDFOPNot && ((ExprNodeGenericFuncDesc) expr.getChildren().get(0)).getGenericUDF() instanceof GenericUDFIn) {
            // In case of not in operator, in operator exist as child of not operator.
            processingInOperator((ExprNodeGenericFuncDesc) expr.getChildren().get(0), searchConditions, true, ((ExprNodeGenericFuncDesc) nodeOutputs[0]).getChildren().toArray());
            return expr;
        } else if (genericUDF instanceof GenericUDFOPNull) {
            processingNullOperator(expr, searchConditions, nodeOutputs);
            return expr;
        } else if (genericUDF instanceof GenericUDFOPNotNull) {
            processingNotNullOperator(expr, searchConditions, nodeOutputs);
            return expr;
        } else {
            return expr;
        }
    }
    ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0];
    ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1];
    // user
    if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) {
        expr1 = getColumnExpr(expr1);
        expr2 = getColumnExpr(expr2);
    }
    ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
    if (extracted == null || (extracted.length > 2 && !acceptsFields)) {
        return expr;
    }
    ExprNodeColumnDesc columnDesc;
    ExprNodeConstantDesc constantDesc;
    if (extracted[0] instanceof ExprNodeConstantDesc) {
        genericUDF = genericUDF.flip();
        columnDesc = (ExprNodeColumnDesc) extracted[1];
        constantDesc = (ExprNodeConstantDesc) extracted[0];
    } else {
        columnDesc = (ExprNodeColumnDesc) extracted[0];
        constantDesc = (ExprNodeConstantDesc) extracted[1];
    }
    Set<String> allowed = columnToUDFs.get(columnDesc.getColumn());
    if (allowed == null) {
        return expr;
    }
    String udfName = genericUDF.getUdfName();
    if (!allowed.contains(genericUDF.getUdfName())) {
        return expr;
    }
    String[] fields = null;
    if (extracted.length > 2) {
        ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) extracted[2];
        if (!isValidField(fieldDesc)) {
            return expr;
        }
        fields = ExprNodeDescUtils.extractFields(fieldDesc);
    }
    // We also need to update the expr so that the index query can be
    // generated.
    // Note that, hive does not support UDFToDouble etc in the query text.
    List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>();
    list.add(expr1);
    list.add(expr2);
    expr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list);
    searchConditions.add(new IndexSearchCondition(columnDesc, udfName, constantDesc, expr, fields));
    // remove it from the residual predicate
    return fields == null ? null : expr;
}
Also used : GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFOPNotNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPNot(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot)

Example 42 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project phoenix by apache.

the class IndexPredicateAnalyzer method processingBetweenOperator.

private void processingBetweenOperator(ExprNodeGenericFuncDesc expr, List<IndexSearchCondition> searchConditions, Object... nodeOutputs) {
    ExprNodeColumnDesc columnDesc = null;
    String[] fields = null;
    if (nodeOutputs[1] instanceof ExprNodeFieldDesc) {
        // rowKey field
        ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) nodeOutputs[1];
        fields = ExprNodeDescUtils.extractFields(fieldDesc);
        ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair((ExprNodeDesc) nodeOutputs[1], (ExprNodeDesc) nodeOutputs[2]);
        columnDesc = (ExprNodeColumnDesc) extracted[0];
    } else if (nodeOutputs[0] instanceof ExprNodeGenericFuncDesc) {
        columnDesc = (ExprNodeColumnDesc) ((ExprNodeGenericFuncDesc) nodeOutputs[1]).getChildren().get(0);
    } else {
        columnDesc = (ExprNodeColumnDesc) nodeOutputs[1];
    }
    String udfName = expr.getGenericUDF().getUdfName();
    ExprNodeConstantDesc[] betweenConstants = new ExprNodeConstantDesc[] { (ExprNodeConstantDesc) nodeOutputs[2], (ExprNodeConstantDesc) nodeOutputs[3] };
    boolean isNot = (Boolean) ((ExprNodeConstantDesc) nodeOutputs[0]).getValue();
    searchConditions.add(new IndexSearchCondition(columnDesc, udfName, betweenConstants, expr, fields, isNot));
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 43 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project SQLWindowing by hbutani.

the class WindowingTypeCheckProcFactory method processGByExpr.

/**
 * Function to do groupby subexpression elimination. This is called by all
 * the processors initially. As an example, consider the query select a+b,
 * count(1) from T group by a+b; Then a+b is already precomputed in the
 * group by operators key, so we substitute a+b in the select list with the
 * internal column name of the a+b expression that appears in the in input
 * row resolver.
 *
 * @param nd
 *            The node that is being inspected.
 * @param procCtx
 *            The processor context.
 *
 * @return exprNodeColumnDesc.
 */
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
    // We recursively create the exprNodeDesc. Base cases: when we encounter
    // a column ref, we convert that into an exprNodeColumnDesc; when we
    // encounter
    // a constant, we convert that into an exprNodeConstantDesc. For others
    // we
    // just
    // build the exprNodeFuncDesc with recursively built children.
    ASTNode expr = (ASTNode) nd;
    TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
    RowResolver input = ctx.getInputRR();
    ExprNodeDesc desc = null;
    // If the current subExpression is pre-calculated, as in Group-By etc.
    ColumnInfo colInfo = input.getExpression(expr);
    if (colInfo != null) {
        desc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
        // }
        return desc;
    }
    return desc;
}
Also used : TypeCheckCtx(org.apache.hadoop.hive.ql.parse.TypeCheckCtx) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 44 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TestAccumuloRangeGenerator method testRangeOverNonRowIdField.

@Test
public void testRangeOverNonRowIdField() throws Exception {
    // foo >= 'f'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "foo", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // foo <= 'm'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "foo", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // And UDF
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    // Filters are not over the rowid, therefore scan everything
    Object result = nodeOutput.get(both);
    Assert.assertNull(result);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Test(org.junit.Test)

Example 45 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TestAccumuloRangeGenerator method testRangeOverIntegerIndexedField.

@Test
public void testRangeOverIntegerIndexedField() throws Exception {
    // cars >= 2
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "cars", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 2);
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // cars <= 9
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "cars", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 9);
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // And UDF
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid");
    rangeGenerator.setIndexScanner(TestAccumuloDefaultIndexScanner.buildMockHandler(10));
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    // Filters are using an index which should match 3 rows
    Object result = nodeOutput.get(both);
    if (result instanceof List) {
        List results = (List) result;
        Assert.assertEquals(3, results.size());
        Assert.assertTrue("does not contain row1", results.contains(new Range("row1")));
        Assert.assertTrue("does not contain row2", results.contains(new Range("row2")));
        Assert.assertTrue("does not contain row3", results.contains(new Range("row3")));
    } else {
        Assert.fail("Results not a list");
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Range(org.apache.accumulo.core.data.Range) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Test(org.junit.Test)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)186 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)168 ArrayList (java.util.ArrayList)110 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)98 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)89 Test (org.junit.Test)68 HashMap (java.util.HashMap)53 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)49 LinkedHashMap (java.util.LinkedHashMap)35 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)34 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)30 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)28 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)26 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)24 Operator (org.apache.hadoop.hive.ql.exec.Operator)24 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 List (java.util.List)23 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)22 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)22 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)22