use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class ParquetRecordReaderBase method setFilter.
public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
if (sarg == null) {
return null;
}
String columnTypes = conf.get(IOConstants.COLUMNS_TYPES);
String columnNames = conf.get(IOConstants.COLUMNS);
List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(columnTypes);
Map<String, TypeInfo> columns = new HashMap<>();
String[] names = columnNames.split(",");
for (int i = 0; i < names.length; i++) {
columns.put(names[i], columnTypeList.get(i));
}
// Create the Parquet FilterPredicate without including columns that do not exist
// on the schema (such as partition columns).
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columns);
if (p != null) {
// Filter may have sensitive information. Do not send to debug.
LOG.debug("PARQUET predicate push down generated.");
ParquetInputFormat.setFilterPredicate(conf, p);
return FilterCompat.get(p);
} else {
// Filter may have sensitive information. Do not send to debug.
LOG.debug("No PARQUET predicate push down is generated.");
return null;
}
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterCharColumnLessThan.
@Test
public void testFilterCharColumnLessThan() throws Exception {
SearchArgument sarg = SearchArgumentFactory.newBuilder().lessThan("a", PredicateLeaf.Type.STRING, new HiveChar("apple", 10).toString()).build();
MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
Map<String, TypeInfo> columnTypes = new HashMap<>();
columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
String expected = "lt(a, Binary{\"apple\"})";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterVarCharColumnWithWhiteSpaces.
@Test
public void testFilterVarCharColumnWithWhiteSpaces() throws Exception {
SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("a", PredicateLeaf.Type.STRING, new HiveVarchar(" apple ", 10).toString()).lessThanEquals("b", PredicateLeaf.Type.STRING, new HiveVarchar(" pear", 10).toString()).equals("c", PredicateLeaf.Type.STRING, new HiveVarchar("orange ", 10).toString()).end().build();
MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required binary a; required binary b;" + " required binary c;}");
Map<String, TypeInfo> columnTypes = new HashMap<>();
columnTypes.put("a", TypeInfoFactory.getVarcharTypeInfo(10));
columnTypes.put("b", TypeInfoFactory.getVarcharTypeInfo(10));
columnTypes.put("c", TypeInfoFactory.getVarcharTypeInfo(10));
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
String expected = "and(and(" + "lt(a, Binary{\" apple \"}), " + "lteq(b, Binary{\" pear\"})), " + "eq(c, Binary{\"orange \"}))";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterBetween.
@Test
public void testFilterBetween() {
MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 bCol; }");
SearchArgument sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 5L).build();
Map<String, TypeInfo> columnTypes = new HashMap<>();
columnTypes.put("bCol", TypeInfoFactory.getPrimitiveTypeInfo("int"));
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
String expected = "and(lteq(bCol, 5), not(lt(bCol, 1)))";
assertEquals(expected, p.toString());
sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 5L, 1L).build();
p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
expected = "and(lteq(bCol, 1), not(lt(bCol, 5)))";
assertEquals(expected, p.toString());
sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 1L).build();
p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
expected = "and(lteq(bCol, 1), not(lt(bCol, 1)))";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterCharColumnNullSafeEquals.
@Test
public void testFilterCharColumnNullSafeEquals() throws Exception {
SearchArgument sarg = SearchArgumentFactory.newBuilder().nullSafeEquals("a", PredicateLeaf.Type.STRING, new HiveChar("apple", 10).toString()).build();
MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
Map<String, TypeInfo> columnTypes = new HashMap<>();
columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
String expected = "eq(a, Binary{\"apple\"})";
assertEquals(expected, p.toString());
}
Aggregations