Search in sources :

Example 6 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class TestInputFormat method testGetFilter.

@Test
public void testGetFilter() throws IOException {
    IntColumn intColumn = intColumn("foo");
    FilterPredicate p = or(eq(intColumn, 7), eq(intColumn, 12));
    Configuration conf = new Configuration();
    ParquetInputFormat.setFilterPredicate(conf, p);
    Filter read = ParquetInputFormat.getFilter(conf);
    assertTrue(read instanceof FilterPredicateCompat);
    assertEquals(p, ((FilterPredicateCompat) read).getFilterPredicate());
    conf = new Configuration();
    ParquetInputFormat.setFilterPredicate(conf, not(p));
    read = ParquetInputFormat.getFilter(conf);
    assertTrue(read instanceof FilterPredicateCompat);
    assertEquals(and(notEq(intColumn, 7), notEq(intColumn, 12)), ((FilterPredicateCompat) read).getFilterPredicate());
    assertEquals(FilterCompat.NOOP, ParquetInputFormat.getFilter(new Configuration()));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Filter(org.apache.parquet.filter2.compat.FilterCompat.Filter) RecordFilter(org.apache.parquet.filter.RecordFilter) UnboundRecordFilter(org.apache.parquet.filter.UnboundRecordFilter) FilterPredicateCompat(org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Example 7 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class TestInputFormat method testOnlyOneKindOfFilterSupported.

@Test
public void testOnlyOneKindOfFilterSupported() throws Exception {
    IntColumn foo = intColumn("foo");
    FilterPredicate p = or(eq(foo, 10), eq(foo, 11));
    Job job = new Job();
    Configuration conf = job.getConfiguration();
    ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
    try {
        ParquetInputFormat.setFilterPredicate(conf, p);
        fail("this should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("You cannot provide a FilterPredicate after providing an UnboundRecordFilter", e.getMessage());
    }
    job = new Job();
    conf = job.getConfiguration();
    ParquetInputFormat.setFilterPredicate(conf, p);
    try {
        ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
        fail("this should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("You cannot provide an UnboundRecordFilter after providing a FilterPredicate", e.getMessage());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Job(org.apache.hadoop.mapreduce.Job) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Example 8 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class TestStatisticsFilter method testAnd.

@Test
public void testAnd() {
    FilterPredicate yes = eq(intColumn, 9);
    FilterPredicate no = eq(doubleColumn, 50D);
    assertTrue(canDrop(and(yes, yes), columnMetas));
    assertTrue(canDrop(and(yes, no), columnMetas));
    assertTrue(canDrop(and(no, yes), columnMetas));
    assertFalse(canDrop(and(no, no), columnMetas));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 9 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class TestStatisticsFilter method testClearExceptionForNots.

@Test
public void testClearExceptionForNots() {
    List<ColumnChunkMetaData> columnMetas = Arrays.asList(getDoubleColumnMeta(new DoubleStatistics(), 0L), getIntColumnMeta(new IntStatistics(), 0L));
    FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
    try {
        canDrop(pred, columnMetas);
        fail("This should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?" + " not(eq(double.column, 12.0))", e.getMessage());
    }
}
Also used : IntStatistics(org.apache.parquet.column.statistics.IntStatistics) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 10 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class TestStatisticsFilter method testUdp.

@Test
public void testUdp() {
    FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class);
    FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class)));
    FilterPredicate udpDropMissingColumn = userDefined(missingColumn2, DropNullUdp.class);
    FilterPredicate invUdpDropMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, DropNullUdp.class)));
    FilterPredicate udpKeepMissingColumn = userDefined(missingColumn2, SevensAndEightsUdp.class);
    FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, SevensAndEightsUdp.class)));
    FilterPredicate allPositivePred = userDefined(doubleColumn, AllPositiveUdp.class);
    IntStatistics seven = new IntStatistics();
    seven.setMinMax(7, 7);
    IntStatistics eight = new IntStatistics();
    eight.setMinMax(8, 8);
    IntStatistics neither = new IntStatistics();
    neither.setMinMax(1, 2);
    assertTrue(canDrop(pred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invPred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // udpDropMissingColumn drops null column.
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // invUdpDropMissingColumn (i.e., not(udpDropMissingColumn)) keeps null column.
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // udpKeepMissingColumn keeps null column.
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // invUdpKeepMissingColumn (i.e., not(udpKeepMissingColumn)) drops null column.
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(allPositivePred, missingMinMaxColumnMetas));
}
Also used : IntStatistics(org.apache.parquet.column.statistics.IntStatistics) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)14 IntColumn (org.apache.parquet.filter2.predicate.Operators.IntColumn)8 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)7 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)3 Configuration (org.apache.hadoop.conf.Configuration)2 DictionaryPageReadStore (org.apache.parquet.column.page.DictionaryPageReadStore)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectInputStream (java.io.ObjectInputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 ArrayList (java.util.ArrayList)1 Job (org.apache.hadoop.mapreduce.Job)1 DoubleStatistics (org.apache.parquet.column.statistics.DoubleStatistics)1 RecordFilter (org.apache.parquet.filter.RecordFilter)1 UnboundRecordFilter (org.apache.parquet.filter.UnboundRecordFilter)1 Filter (org.apache.parquet.filter2.compat.FilterCompat.Filter)1 FilterPredicateCompat (org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat)1 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)1 BlockMetaData (org.apache.parquet.hadoop.metadata.BlockMetaData)1 ColumnChunkMetaData (org.apache.parquet.hadoop.metadata.ColumnChunkMetaData)1