Search in sources :

Example 21 with GenericUDFOPEqualOrLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.

the class TestOrcSplitElimination method testSplitEliminationComplexExpr.

@Test
public void testSplitEliminationComplexExpr() throws Exception {
    ObjectInspector inspector = createIO();
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
    writeData(writer);
    writer.close();
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 150000);
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    // predicate expression: userid <= 100 and subtype <= 1000.0
    GenericUDF udf = new GenericUDFOPEqualOrLessThan();
    List<ExprNodeDesc> childExpr = Lists.newArrayList();
    ExprNodeColumnDesc col = new ExprNodeColumnDesc(Long.class, "userid", "T", false);
    ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
    childExpr.add(col);
    childExpr.add(con);
    ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    GenericUDF udf1 = new GenericUDFOPEqualOrLessThan();
    List<ExprNodeDesc> childExpr1 = Lists.newArrayList();
    ExprNodeColumnDesc col1 = new ExprNodeColumnDesc(Double.class, "subtype", "T", false);
    ExprNodeConstantDesc con1 = new ExprNodeConstantDesc(1000.0);
    childExpr1.add(col1);
    childExpr1.add(con1);
    ExprNodeGenericFuncDesc en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
    GenericUDF udf2 = new GenericUDFOPAnd();
    List<ExprNodeDesc> childExpr2 = Lists.newArrayList();
    childExpr2.add(en);
    childExpr2.add(en1);
    ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
    String sargStr = SerializationUtilities.serializeExpression(en2);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(2, splits.length);
    con = new ExprNodeConstantDesc(2);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    con1 = new ExprNodeConstantDesc(0.0);
    childExpr1.set(1, con1);
    en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
    childExpr2.set(0, en);
    childExpr2.set(1, en1);
    en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
    sargStr = SerializationUtilities.serializeExpression(en2);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // no stripe will satisfy the predicate
    assertEquals(0, splits.length);
    con = new ExprNodeConstantDesc(2);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    con1 = new ExprNodeConstantDesc(1.0);
    childExpr1.set(1, con1);
    en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
    childExpr2.set(0, en);
    childExpr2.set(1, en1);
    en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
    sargStr = SerializationUtilities.serializeExpression(en2);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // only first stripe will satisfy condition and hence single split
    assertEquals(1, splits.length);
    udf = new GenericUDFOPEqual();
    con = new ExprNodeConstantDesc(13);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    con1 = new ExprNodeConstantDesc(80.0);
    childExpr1.set(1, con1);
    en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
    childExpr2.set(0, en);
    childExpr2.set(1, en1);
    en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
    sargStr = SerializationUtilities.serializeExpression(en2);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // first two stripes will satisfy condition and hence single split
    assertEquals(2, splits.length);
    udf = new GenericUDFOPEqual();
    con = new ExprNodeConstantDesc(13);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    udf1 = new GenericUDFOPEqual();
    con1 = new ExprNodeConstantDesc(80.0);
    childExpr1.set(1, con1);
    en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
    childExpr2.set(0, en);
    childExpr2.set(1, en1);
    en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
    sargStr = SerializationUtilities.serializeExpression(en2);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // only second stripes will satisfy condition and hence single split
    assertEquals(1, splits.length);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) InputSplit(org.apache.hadoop.mapred.InputSplit) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 22 with GenericUDFOPEqualOrLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.

the class TestOrcSplitElimination method testSplitEliminationSmallMaxSplit.

@Test
public void testSplitEliminationSmallMaxSplit() throws Exception {
    ObjectInspector inspector = createIO();
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
    writeData(writer);
    writer.close();
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 5000);
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    GenericUDF udf = new GenericUDFOPEqualOrLessThan();
    List<ExprNodeDesc> childExpr = Lists.newArrayList();
    ExprNodeConstantDesc con;
    ExprNodeGenericFuncDesc en;
    String sargStr;
    createTestSarg(inspector, udf, childExpr);
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(5, splits.length);
    con = new ExprNodeConstantDesc(1);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    assertEquals(0, splits.length);
    con = new ExprNodeConstantDesc(2);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    con = new ExprNodeConstantDesc(5);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    assertEquals(2, splits.length);
    con = new ExprNodeConstantDesc(13);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    assertEquals(3, splits.length);
    con = new ExprNodeConstantDesc(29);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    assertEquals(4, splits.length);
    con = new ExprNodeConstantDesc(70);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    assertEquals(5, splits.length);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) InputSplit(org.apache.hadoop.mapred.InputSplit) Test(org.junit.Test)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)22 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)22 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)22 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)21 Test (org.junit.Test)19 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)18 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)14 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)13 ArrayList (java.util.ArrayList)9 Range (org.apache.accumulo.core.data.Range)9 HashMap (java.util.HashMap)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)8 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)7 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)7 Node (org.apache.hadoop.hive.ql.lib.Node)7 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)7 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)7 List (java.util.List)6 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)6 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)6