use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestConvertAstToSearchArg method testExpression10.
@Test
public void testExpression10() throws Exception {
/* id >= 10 and not (10 > id) */
String exprStr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> \n" + "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>id</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + " <void property=\"typeName\"> \n" + " <string>int</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>10</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object id=\"PrimitiveTypeInfo1\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + " <void property=\"typeName\"> \n" + " <string>boolean</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>id</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>10</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + "</java>";
SearchArgumentImpl sarg = (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr));
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 id;" + " required binary first_name; }");
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
assertEquals(expected, p.toString());
assertEquals(PredicateLeaf.Type.LONG, leaves.get(0).getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaves.get(0).getOperator());
assertEquals("id", leaves.get(0).getColumnName());
assertEquals(10L, leaves.get(0).getLiteral());
assertEquals("(and (not leaf-0) (not leaf-0))", sarg.getExpression().toString());
assertNoSharedNodes(sarg.getExpression(), Sets.<ExpressionTree>newIdentityHashSet());
assertEquals(TruthValue.NO, sarg.evaluate(values(TruthValue.YES)));
assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.NO)));
assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NULL)));
assertEquals(TruthValue.NO_NULL, sarg.evaluate(values(TruthValue.YES_NULL)));
assertEquals(TruthValue.YES_NULL, sarg.evaluate(values(TruthValue.NO_NULL)));
assertEquals(TruthValue.YES_NO, sarg.evaluate(values(TruthValue.YES_NO)));
assertEquals(TruthValue.YES_NO_NULL, sarg.evaluate(values(TruthValue.YES_NO_NULL)));
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterColumnsThatDoNoExistOnSchema.
@Test
public void testFilterColumnsThatDoNoExistOnSchema() {
MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required binary stinger; }");
SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("a", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.LONG, 10L, // Column will be removed from filter
20L).in("z", PredicateLeaf.Type.LONG, 1L, 2L, // Column will be removed from filter
3L).nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger").end().end().build();
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterFloatColumns.
@Test
public void testFilterFloatColumns() {
MessageType schema = MessageTypeParser.parseMessageType("message test { required float a; required int32 b; }");
SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("a", PredicateLeaf.Type.FLOAT).between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3).in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L).end().end().build();
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(and(not(eq(a, null)), not(and(lteq(a, 20.3), not(lt(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterBetween.
@Test
public void testFilterBetween() {
MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 bCol; }");
SearchArgument sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 5L).build();
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(lteq(bCol, 5), not(lt(bCol, 1)))";
assertEquals(expected, p.toString());
sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 5L, 1L).build();
p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
expected = "and(lteq(bCol, 1), not(lt(bCol, 5)))";
assertEquals(expected, p.toString());
sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 1L).build();
p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
expected = "and(lteq(bCol, 1), not(lt(bCol, 1)))";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class ParquetRecordReaderBase method setFilter.
public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
if (sarg == null) {
return null;
}
// Create the Parquet FilterPredicate without including columns that do not exist
// on the schema (such as partition columns).
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
if (p != null) {
// Filter may have sensitive information. Do not send to debug.
LOG.debug("PARQUET predicate push down generated.");
ParquetInputFormat.setFilterPredicate(conf, p);
return FilterCompat.get(p);
} else {
// Filter may have sensitive information. Do not send to debug.
LOG.debug("No PARQUET predicate push down is generated.");
return null;
}
}
Aggregations