use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class TestInputFormat method testGetFilter.
@Test
public void testGetFilter() throws IOException {
IntColumn intColumn = intColumn("foo");
FilterPredicate p = or(eq(intColumn, 7), eq(intColumn, 12));
Configuration conf = new Configuration();
ParquetInputFormat.setFilterPredicate(conf, p);
Filter read = ParquetInputFormat.getFilter(conf);
assertTrue(read instanceof FilterPredicateCompat);
assertEquals(p, ((FilterPredicateCompat) read).getFilterPredicate());
conf = new Configuration();
ParquetInputFormat.setFilterPredicate(conf, not(p));
read = ParquetInputFormat.getFilter(conf);
assertTrue(read instanceof FilterPredicateCompat);
assertEquals(and(notEq(intColumn, 7), notEq(intColumn, 12)), ((FilterPredicateCompat) read).getFilterPredicate());
assertEquals(FilterCompat.NOOP, ParquetInputFormat.getFilter(new Configuration()));
}
use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class TestInputFormat method testOnlyOneKindOfFilterSupported.
@Test
public void testOnlyOneKindOfFilterSupported() throws Exception {
IntColumn foo = intColumn("foo");
FilterPredicate p = or(eq(foo, 10), eq(foo, 11));
Job job = new Job();
Configuration conf = job.getConfiguration();
ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
try {
ParquetInputFormat.setFilterPredicate(conf, p);
fail("this should throw");
} catch (IllegalArgumentException e) {
assertEquals("You cannot provide a FilterPredicate after providing an UnboundRecordFilter", e.getMessage());
}
job = new Job();
conf = job.getConfiguration();
ParquetInputFormat.setFilterPredicate(conf, p);
try {
ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
fail("this should throw");
} catch (IllegalArgumentException e) {
assertEquals("You cannot provide an UnboundRecordFilter after providing a FilterPredicate", e.getMessage());
}
}
use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class TestStatisticsFilter method testAnd.
@Test
public void testAnd() {
FilterPredicate yes = eq(intColumn, 9);
FilterPredicate no = eq(doubleColumn, 50D);
assertTrue(canDrop(and(yes, yes), columnMetas));
assertTrue(canDrop(and(yes, no), columnMetas));
assertTrue(canDrop(and(no, yes), columnMetas));
assertFalse(canDrop(and(no, no), columnMetas));
}
use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class TestStatisticsFilter method testClearExceptionForNots.
@Test
public void testClearExceptionForNots() {
List<ColumnChunkMetaData> columnMetas = Arrays.asList(getDoubleColumnMeta(new DoubleStatistics(), 0L), getIntColumnMeta(new IntStatistics(), 0L));
FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
try {
canDrop(pred, columnMetas);
fail("This should throw");
} catch (IllegalArgumentException e) {
assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?" + " not(eq(double.column, 12.0))", e.getMessage());
}
}
use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class TestStatisticsFilter method testUdp.
@Test
public void testUdp() {
FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class);
FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class)));
FilterPredicate udpDropMissingColumn = userDefined(missingColumn2, DropNullUdp.class);
FilterPredicate invUdpDropMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, DropNullUdp.class)));
FilterPredicate udpKeepMissingColumn = userDefined(missingColumn2, SevensAndEightsUdp.class);
FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, SevensAndEightsUdp.class)));
FilterPredicate allPositivePred = userDefined(doubleColumn, AllPositiveUdp.class);
IntStatistics seven = new IntStatistics();
seven.setMinMax(7, 7);
IntStatistics eight = new IntStatistics();
eight.setMinMax(8, 8);
IntStatistics neither = new IntStatistics();
neither.setMinMax(1, 2);
assertTrue(canDrop(pred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invPred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// udpDropMissingColumn drops null column.
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// invUdpDropMissingColumn (i.e., not(udpDropMissingColumn)) keeps null column.
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// udpKeepMissingColumn keeps null column.
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// invUdpKeepMissingColumn (i.e., not(udpKeepMissingColumn)) drops null column.
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(allPositivePred, missingMinMaxColumnMetas));
}
Aggregations