Search in sources :

Example 61 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class ParquetRecordReaderBase method setFilter.

public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
    SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
    if (sarg == null) {
        return null;
    }
    String columnTypes = conf.get(IOConstants.COLUMNS_TYPES);
    String columnNames = conf.get(IOConstants.COLUMNS);
    List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(columnTypes);
    Map<String, TypeInfo> columns = new HashMap<>();
    String[] names = columnNames.split(",");
    for (int i = 0; i < names.length; i++) {
        columns.put(names[i], columnTypeList.get(i));
    }
    // Create the Parquet FilterPredicate without including columns that do not exist
    // on the schema (such as partition columns).
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columns);
    if (p != null) {
        // Filter may have sensitive information. Do not send to debug.
        LOG.debug("PARQUET predicate push down generated.");
        ParquetInputFormat.setFilterPredicate(conf, p);
        return FilterCompat.get(p);
    } else {
        // Filter may have sensitive information. Do not send to debug.
        LOG.debug("No PARQUET predicate push down is generated.");
        return null;
    }
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 62 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterCharColumnLessThan.

@Test
public void testFilterCharColumnLessThan() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().lessThan("a", PredicateLeaf.Type.STRING, new HiveChar("apple", 10).toString()).build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "lt(a, Binary{\"apple\"})";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 63 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterVarCharColumnWithWhiteSpaces.

@Test
public void testFilterVarCharColumnWithWhiteSpaces() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("a", PredicateLeaf.Type.STRING, new HiveVarchar(" apple  ", 10).toString()).lessThanEquals("b", PredicateLeaf.Type.STRING, new HiveVarchar(" pear", 10).toString()).equals("c", PredicateLeaf.Type.STRING, new HiveVarchar("orange ", 10).toString()).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required binary a; required binary b;" + " required binary c;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("b", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("c", TypeInfoFactory.getVarcharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(" + "lt(a, Binary{\" apple  \"}), " + "lteq(b, Binary{\" pear\"})), " + "eq(c, Binary{\"orange \"}))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 64 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterBetween.

@Test
public void testFilterBetween() {
    MessageType schema = MessageTypeParser.parseMessageType("message test {  required int32 bCol; }");
    SearchArgument sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 5L).build();
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("bCol", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(lteq(bCol, 5), not(lt(bCol, 1)))";
    assertEquals(expected, p.toString());
    sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 5L, 1L).build();
    p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    expected = "and(lteq(bCol, 1), not(lt(bCol, 5)))";
    assertEquals(expected, p.toString());
    sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 1L).build();
    p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    expected = "and(lteq(bCol, 1), not(lt(bCol, 1)))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 65 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterCharColumnNullSafeEquals.

@Test
public void testFilterCharColumnNullSafeEquals() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().nullSafeEquals("a", PredicateLeaf.Type.STRING, new HiveChar("apple", 10).toString()).build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "eq(a, Binary{\"apple\"})";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)76 Test (org.junit.Test)50 HashMap (java.util.HashMap)33 MessageType (org.apache.parquet.schema.MessageType)33 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)32 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)25 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)12 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)8 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Group (org.apache.parquet.example.data.Group)5 Configuration (org.apache.hadoop.conf.Configuration)4 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)4 User (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User)4 Predicate (java.util.function.Predicate)3 Path (org.apache.hadoop.fs.Path)3 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)3 TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)3 HashSet (java.util.HashSet)2 GenericRecord (org.apache.avro.generic.GenericRecord)2