use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class TestStatisticsFilter method testClearExceptionForNots.
@Test
public void testClearExceptionForNots() {
List<ColumnChunkMetaData> columnMetas = Arrays.asList(getDoubleColumnMeta(new DoubleStatistics(), 0L), getIntColumnMeta(new IntStatistics(), 0L));
FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
try {
canDrop(pred, columnMetas);
fail("This should throw");
} catch (IllegalArgumentException e) {
assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?" + " not(eq(double.column, 12.0))", e.getMessage());
}
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class TestStatisticsFilter method testUdp.
@Test
public void testUdp() {
FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class);
FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class)));
FilterPredicate udpDropMissingColumn = userDefined(missingColumn2, DropNullUdp.class);
FilterPredicate invUdpDropMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, DropNullUdp.class)));
FilterPredicate udpKeepMissingColumn = userDefined(missingColumn2, SevensAndEightsUdp.class);
FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, SevensAndEightsUdp.class)));
FilterPredicate allPositivePred = userDefined(doubleColumn, AllPositiveUdp.class);
IntStatistics seven = new IntStatistics();
seven.setMinMax(7, 7);
IntStatistics eight = new IntStatistics();
eight.setMinMax(8, 8);
IntStatistics neither = new IntStatistics();
neither.setMinMax(1, 2);
assertTrue(canDrop(pred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invPred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// udpDropMissingColumn drops null column.
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// invUdpDropMissingColumn (i.e., not(udpDropMissingColumn)) keeps null column.
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// udpKeepMissingColumn keeps null column.
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// invUdpKeepMissingColumn (i.e., not(udpKeepMissingColumn)) drops null column.
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(allPositivePred, missingMinMaxColumnMetas));
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class TestRecordLevelFilters method testNameNotStartWithP.
@Test
public void testNameNotStartWithP() throws Exception {
BinaryColumn name = binaryColumn("name");
FilterPredicate pred = not(userDefined(name, StartWithP.class));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
@Override
public boolean keep(User u) {
return u.getName() == null || !u.getName().startsWith("p");
}
});
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class TestRecordLevelFilters method testUserDefinedByInstance.
@Test
public void testUserDefinedByInstance() throws Exception {
LongColumn name = longColumn("id");
final HashSet<Long> h = new HashSet<Long>();
h.add(20L);
h.add(27L);
h.add(28L);
FilterPredicate pred = userDefined(name, new SetInFilter(h));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
@Override
public boolean keep(User u) {
return u != null && h.contains(u.getId());
}
});
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class TestRecordLevelFilters method testNameNotNull.
@Test
public void testNameNotNull() throws Exception {
BinaryColumn name = binaryColumn("name");
FilterPredicate pred = notEq(name, null);
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
@Override
public boolean keep(User u) {
return u.getName() != null;
}
});
}
Aggregations