Search in sources :

Example 1 with GenericUDFBetween

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween in project hive by apache.

the class TestVectorizationContext method testBetweenFilters.

@Test
public void testBetweenFilters() throws HiveException {
    // string tests
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
    ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
    ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo");
    // string BETWEEN
    GenericUDFBetween udf = new GenericUDFBetween();
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
    // no NOT keyword
    children1.add(new ExprNodeConstantDesc(new Boolean(false)));
    children1.add(col1Expr);
    children1.add(constDesc);
    children1.add(constDesc2);
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1);
    List<String> columns = new ArrayList<String>();
    columns.add("col0");
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringColumnBetween);
    // string NOT BETWEEN
    // has NOT keyword
    children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringColumnNotBetween);
    // CHAR tests
    CharTypeInfo charTypeInfo = new CharTypeInfo(10);
    col1Expr = new ExprNodeColumnDesc(charTypeInfo, "col1", "table", false);
    constDesc = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Alpha", 10));
    constDesc2 = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Bravo", 10));
    // CHAR BETWEEN
    udf = new GenericUDFBetween();
    children1 = new ArrayList<ExprNodeDesc>();
    // no NOT keyword
    children1.add(new ExprNodeConstantDesc(new Boolean(false)));
    children1.add(col1Expr);
    children1.add(constDesc);
    children1.add(constDesc2);
    exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterCharColumnBetween);
    // CHAR NOT BETWEEN
    // has NOT keyword
    children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterCharColumnNotBetween);
    // VARCHAR tests
    VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10);
    col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false);
    constDesc = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Alpha", 10));
    constDesc2 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Bravo", 10));
    // VARCHAR BETWEEN
    udf = new GenericUDFBetween();
    children1 = new ArrayList<ExprNodeDesc>();
    // no NOT keyword
    children1.add(new ExprNodeConstantDesc(new Boolean(false)));
    children1.add(col1Expr);
    children1.add(constDesc);
    children1.add(constDesc2);
    exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterVarCharColumnBetween);
    // VARCHAR NOT BETWEEN
    // has NOT keyword
    children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterVarCharColumnNotBetween);
    // long BETWEEN
    children1.set(0, new ExprNodeConstantDesc(new Boolean(false)));
    children1.set(1, new ExprNodeColumnDesc(Long.class, "col1", "table", false));
    children1.set(2, new ExprNodeConstantDesc(10));
    children1.set(3, new ExprNodeConstantDesc(20));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterLongColumnBetween);
    // long NOT BETWEEN
    children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterLongColumnNotBetween);
    // double BETWEEN
    children1.set(0, new ExprNodeConstantDesc(new Boolean(false)));
    children1.set(1, new ExprNodeColumnDesc(Double.class, "col1", "table", false));
    children1.set(2, new ExprNodeConstantDesc(10.0d));
    children1.set(3, new ExprNodeConstantDesc(20.0d));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterDoubleColumnBetween);
    // double NOT BETWEEN
    children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterDoubleColumnNotBetween);
    // timestamp BETWEEN
    children1.set(0, new ExprNodeConstantDesc(new Boolean(false)));
    children1.set(1, new ExprNodeColumnDesc(Timestamp.class, "col1", "table", false));
    children1.set(2, new ExprNodeConstantDesc("2013-11-05 00:00:00.000"));
    children1.set(3, new ExprNodeConstantDesc("2013-11-06 00:00:00.000"));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertEquals(FilterTimestampColumnBetween.class, ve.getClass());
    // timestamp NOT BETWEEN
    children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertEquals(FilterTimestampColumnNotBetween.class, ve.getClass());
}
Also used : FilterCharColumnBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnBetween) FilterDoubleColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ArrayList(java.util.ArrayList) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) FilterLongColumnBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween) VectorUDFUnixTimeStampTimestamp(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampTimestamp) VectorUDFYearTimestamp(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearTimestamp) GenericUDFTimestamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp) Timestamp(java.sql.Timestamp) FilterCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnNotBetween) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) FilterStringColumnBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnBetween) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) BRoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble) FuncRoundDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncRoundDoubleToDouble) FuncBRoundDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncBRoundDoubleToDouble) FuncLogWithBaseDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble) FuncLogWithBaseLongToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble) FuncPowerDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble) FuncLnDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncLnDoubleToDouble) FuncSinDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncSinDoubleToDouble) RoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.RoundWithNumDigitsDoubleToDouble) FilterDoubleColumnBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween) FilterVarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween) FilterStringColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) FilterVarCharColumnBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnBetween) FilterLongColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween) Test(org.junit.Test)

Example 2 with GenericUDFBetween

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween in project mongo-hadoop by mongodb.

the class MongoStorageHandlerTest method testDecomposePredicateUnrecognizedOps.

@Test
public void testDecomposePredicateUnrecognizedOps() throws SerDeException {
    BSONSerDe serde = new BSONSerDe();
    Configuration conf = new Configuration();
    Properties tableProperties = new Properties();
    // Set table columns.
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, "id,i,j");
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,int,int");
    serde.initialize(conf, tableProperties);
    // Build a query.
    // WHERE i > 20 AND j IS BETWEEN 1 AND 10
    GenericUDFOPGreaterThan gt = new GenericUDFOPGreaterThan();
    ExprNodeDesc[] children = { new ExprNodeColumnDesc(new SimpleMockColumnInfo("i")), new ExprNodeConstantDesc(20) };
    ExprNodeGenericFuncDesc iGt20 = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, gt, Arrays.asList(children));
    GenericUDFBetween between = new GenericUDFBetween();
    ExprNodeDesc[] betweenChildren = { new ExprNodeConstantDesc(false), new ExprNodeColumnDesc(new SimpleMockColumnInfo("j")), new ExprNodeConstantDesc(1), new ExprNodeConstantDesc(10) };
    ExprNodeGenericFuncDesc jBetween1And10 = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, between, Arrays.asList(betweenChildren));
    ExprNodeDesc[] exprChildren = { iGt20, jBetween1And10 };
    ExprNodeGenericFuncDesc expr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), Arrays.asList(exprChildren));
    HiveStoragePredicateHandler.DecomposedPredicate decomposed = msh.decomposePredicate(null, serde, expr);
    assertEquals(jBetween1And10.getExprString(), decomposed.residualPredicate.getExprString());
    assertEquals(iGt20.getExprString(), decomposed.pushedPredicate.getExprString());
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) HiveStoragePredicateHandler(org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Configuration(org.apache.hadoop.conf.Configuration) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Properties(java.util.Properties) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 3 with GenericUDFBetween

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween in project phoenix by apache.

the class IndexPredicateAnalyzer method analyzeExpr.

private ExprNodeDesc analyzeExpr(ExprNodeGenericFuncDesc expr, List<IndexSearchCondition> searchConditions, Object... nodeOutputs) throws SemanticException {
    if (FunctionRegistry.isOpAnd(expr)) {
        assert (nodeOutputs.length == 2);
        ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
        ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1];
        if (residual1 == null) {
            return residual2;
        }
        if (residual2 == null) {
            return residual1;
        }
        List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>();
        residuals.add(residual1);
        residuals.add(residual2);
        return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getGenericUDFForAnd(), residuals);
    }
    GenericUDF genericUDF = expr.getGenericUDF();
    if (!(genericUDF instanceof GenericUDFBaseCompare)) {
        // 2015-10-22 Added by JeongMin Ju : Processing Between/In Operator
        if (genericUDF instanceof GenericUDFBetween) {
            // In case of not between, The value of first element of nodeOutputs is true.
            // otherwise false.
            processingBetweenOperator(expr, searchConditions, nodeOutputs);
            return expr;
        } else if (genericUDF instanceof GenericUDFIn) {
            // In case of not in operator, in operator exist as child of not operator.
            processingInOperator(expr, searchConditions, false, nodeOutputs);
            return expr;
        } else if (genericUDF instanceof GenericUDFOPNot && ((ExprNodeGenericFuncDesc) expr.getChildren().get(0)).getGenericUDF() instanceof GenericUDFIn) {
            // In case of not in operator, in operator exist as child of not operator.
            processingInOperator((ExprNodeGenericFuncDesc) expr.getChildren().get(0), searchConditions, true, ((ExprNodeGenericFuncDesc) nodeOutputs[0]).getChildren().toArray());
            return expr;
        } else if (genericUDF instanceof GenericUDFOPNull) {
            processingNullOperator(expr, searchConditions, nodeOutputs);
            return expr;
        } else if (genericUDF instanceof GenericUDFOPNotNull) {
            processingNotNullOperator(expr, searchConditions, nodeOutputs);
            return expr;
        } else {
            return expr;
        }
    }
    ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0];
    ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1];
    // user
    if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) {
        expr1 = getColumnExpr(expr1);
        expr2 = getColumnExpr(expr2);
    }
    ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
    if (extracted == null || (extracted.length > 2 && !acceptsFields)) {
        return expr;
    }
    ExprNodeColumnDesc columnDesc;
    ExprNodeConstantDesc constantDesc;
    if (extracted[0] instanceof ExprNodeConstantDesc) {
        genericUDF = genericUDF.flip();
        columnDesc = (ExprNodeColumnDesc) extracted[1];
        constantDesc = (ExprNodeConstantDesc) extracted[0];
    } else {
        columnDesc = (ExprNodeColumnDesc) extracted[0];
        constantDesc = (ExprNodeConstantDesc) extracted[1];
    }
    Set<String> allowed = columnToUDFs.get(columnDesc.getColumn());
    if (allowed == null) {
        return expr;
    }
    String udfName = genericUDF.getUdfName();
    if (!allowed.contains(genericUDF.getUdfName())) {
        return expr;
    }
    String[] fields = null;
    if (extracted.length > 2) {
        ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) extracted[2];
        if (!isValidField(fieldDesc)) {
            return expr;
        }
        fields = ExprNodeDescUtils.extractFields(fieldDesc);
    }
    // We also need to update the expr so that the index query can be
    // generated.
    // Note that, hive does not support UDFToDouble etc in the query text.
    List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>();
    list.add(expr1);
    list.add(expr2);
    expr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list);
    searchConditions.add(new IndexSearchCondition(columnDesc, udfName, constantDesc, expr, fields));
    // remove it from the residual predicate
    return fields == null ? null : expr;
}
Also used : GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFOPNotNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPNot(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)3 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)3 GenericUDFBetween (org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween)3 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 Timestamp (java.sql.Timestamp)1 Properties (java.util.Properties)1 Configuration (org.apache.hadoop.conf.Configuration)1 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)1 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)1 BRoundWithNumDigitsDoubleToDouble (org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble)1 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)1 FuncLogWithBaseDoubleToDouble (org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble)1 FuncLogWithBaseLongToDouble (org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble)1 FuncPowerDoubleToDouble (org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble)1 RoundWithNumDigitsDoubleToDouble (org.apache.hadoop.hive.ql.exec.vector.expressions.RoundWithNumDigitsDoubleToDouble)1 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)1 VectorUDFUnixTimeStampTimestamp (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampTimestamp)1