Search in sources :

Example 66 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterFloatColumn.

@Test
public void testFilterFloatColumn() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("x", PredicateLeaf.Type.LONG, 22L).lessThan("x1", PredicateLeaf.Type.LONG, 22L).lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()).equals("z", PredicateLeaf.Type.FLOAT, Double.valueOf(0.22)).equals("z1", PredicateLeaf.Type.FLOAT, Double.valueOf(0.22)).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required int32 x1;" + " required binary y; required float z; required float z1;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("x", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("x1", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("y", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("z", TypeInfoFactory.getPrimitiveTypeInfo("float"));
    columnTypes.put("z1", TypeInfoFactory.getPrimitiveTypeInfo("float"));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," + " lteq(y, Binary{\"hi\"})), eq(z, " + "0.22)), eq(z1, 0.22))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 67 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterVarCharColumn.

@Test
public void testFilterVarCharColumn() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("a", PredicateLeaf.Type.STRING, new HiveVarchar("apple", 10).toString()).lessThanEquals("b", PredicateLeaf.Type.STRING, new HiveVarchar("pear", 10).toString()).equals("c", PredicateLeaf.Type.STRING, new HiveVarchar("orange", 10).toString()).nullSafeEquals("d", PredicateLeaf.Type.STRING, new HiveVarchar("pineapple", 9).toString()).in("e", PredicateLeaf.Type.STRING, new HiveVarchar("cherry", 10).toString(), new HiveVarchar("orange", 10).toString()).between("f", PredicateLeaf.Type.STRING, new HiveVarchar("apple", 10).toString(), new HiveVarchar("pear", 10).toString()).isNull("g", PredicateLeaf.Type.STRING).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required binary a; required binary b;" + " required binary c; required binary d;" + " required binary e; required binary f;" + " required binary g;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("b", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("c", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("d", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("e", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("f", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("g", TypeInfoFactory.getVarcharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(and(and(and(and(" + "lt(a, Binary{\"apple\"}), " + "lteq(b, Binary{\"pear\"})), " + "eq(c, Binary{\"orange\"})), " + "eq(d, Binary{\"pineapple\"})), " + "or(eq(e, Binary{\"cherry\"}), eq(e, Binary{\"orange\"}))), " + "and(lteq(f, Binary{\"pear\"}), not(lt(f, Binary{\"apple\"})))), " + "eq(g, null))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 68 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterFloatColumns.

@Test
public void testFilterFloatColumns() {
    MessageType schema = MessageTypeParser.parseMessageType("message test {  required float a; required int32 b; }");
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("a", PredicateLeaf.Type.FLOAT).between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3).in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L).end().end().build();
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getPrimitiveTypeInfo("float"));
    columnTypes.put("b", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(not(eq(a, null)), not(and(lteq(a, 20.3), not(lt(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 69 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterColumnsThatDoNoExistOnSchema.

@Test
public void testFilterColumnsThatDoNoExistOnSchema() {
    MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required binary stinger; }");
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("a", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.LONG, 10L, // Column will be removed from filter
    20L).in("z", PredicateLeaf.Type.LONG, 1L, 2L, // Column will be removed from filter
    3L).nullSafeEquals("stinger", PredicateLeaf.Type.STRING, "stinger").end().end().build();
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("y", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("z", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("stinger", TypeInfoFactory.getPrimitiveTypeInfo("string"));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(not(eq(a, null)), not(eq(stinger, Binary{\"stinger\"})))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 70 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterCharColumnGreaterThan.

@Test
public void testFilterCharColumnGreaterThan() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().lessThanEquals("a", PredicateLeaf.Type.STRING, new HiveChar("apple", 10).toString()).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "not(lteq(a, Binary{\"apple\"}))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)76 Test (org.junit.Test)50 HashMap (java.util.HashMap)33 MessageType (org.apache.parquet.schema.MessageType)33 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)32 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)25 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)12 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)8 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Group (org.apache.parquet.example.data.Group)5 Configuration (org.apache.hadoop.conf.Configuration)4 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)4 User (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User)4 Predicate (java.util.function.Predicate)3 Path (org.apache.hadoop.fs.Path)3 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)3 TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)3 HashSet (java.util.HashSet)2 GenericRecord (org.apache.avro.generic.GenericRecord)2