Search in sources :

Example 21 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestStatisticsFilter method testClearExceptionForNots.

@Test
public void testClearExceptionForNots() {
    List<ColumnChunkMetaData> columnMetas = Arrays.asList(getDoubleColumnMeta(new DoubleStatistics(), 0L), getIntColumnMeta(new IntStatistics(), 0L));
    FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
    try {
        canDrop(pred, columnMetas);
        fail("This should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?" + " not(eq(double.column, 12.0))", e.getMessage());
    }
}
Also used : IntStatistics(org.apache.parquet.column.statistics.IntStatistics) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 22 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestStatisticsFilter method testUdp.

@Test
public void testUdp() {
    FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class);
    FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class)));
    FilterPredicate udpDropMissingColumn = userDefined(missingColumn2, DropNullUdp.class);
    FilterPredicate invUdpDropMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, DropNullUdp.class)));
    FilterPredicate udpKeepMissingColumn = userDefined(missingColumn2, SevensAndEightsUdp.class);
    FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, SevensAndEightsUdp.class)));
    FilterPredicate allPositivePred = userDefined(doubleColumn, AllPositiveUdp.class);
    IntStatistics seven = new IntStatistics();
    seven.setMinMax(7, 7);
    IntStatistics eight = new IntStatistics();
    eight.setMinMax(8, 8);
    IntStatistics neither = new IntStatistics();
    neither.setMinMax(1, 2);
    assertTrue(canDrop(pred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invPred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // udpDropMissingColumn drops null column.
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // invUdpDropMissingColumn (i.e., not(udpDropMissingColumn)) keeps null column.
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // udpKeepMissingColumn keeps null column.
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // invUdpKeepMissingColumn (i.e., not(udpKeepMissingColumn)) drops null column.
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(allPositivePred, missingMinMaxColumnMetas));
}
Also used : IntStatistics(org.apache.parquet.column.statistics.IntStatistics) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 23 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestRecordLevelFilters method testNameNotStartWithP.

@Test
public void testNameNotStartWithP() throws Exception {
    BinaryColumn name = binaryColumn("name");
    FilterPredicate pred = not(userDefined(name, StartWithP.class));
    List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
    assertFilter(found, new UserFilter() {

        @Override
        public boolean keep(User u) {
            return u.getName() == null || !u.getName().startsWith("p");
        }
    });
}
Also used : Group(org.apache.parquet.example.data.Group) User(org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User) BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 24 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestRecordLevelFilters method testUserDefinedByInstance.

@Test
public void testUserDefinedByInstance() throws Exception {
    LongColumn name = longColumn("id");
    final HashSet<Long> h = new HashSet<Long>();
    h.add(20L);
    h.add(27L);
    h.add(28L);
    FilterPredicate pred = userDefined(name, new SetInFilter(h));
    List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
    assertFilter(found, new UserFilter() {

        @Override
        public boolean keep(User u) {
            return u != null && h.contains(u.getId());
        }
    });
}
Also used : Group(org.apache.parquet.example.data.Group) LongColumn(org.apache.parquet.filter2.predicate.Operators.LongColumn) User(org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 25 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestRecordLevelFilters method testNameNotNull.

@Test
public void testNameNotNull() throws Exception {
    BinaryColumn name = binaryColumn("name");
    FilterPredicate pred = notEq(name, null);
    List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
    assertFilter(found, new UserFilter() {

        @Override
        public boolean keep(User u) {
            return u.getName() != null;
        }
    });
}
Also used : Group(org.apache.parquet.example.data.Group) User(org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User) BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)57 Test (org.junit.Test)33 MessageType (org.apache.parquet.schema.MessageType)15 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)8 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)8 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Group (org.apache.parquet.example.data.Group)5 Configuration (org.apache.hadoop.conf.Configuration)4 User (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User)4 TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)4 Predicate (java.util.function.Predicate)3 Path (org.apache.hadoop.fs.Path)3 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)3 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)2 IntColumn (org.apache.parquet.filter2.predicate.Operators.IntColumn)2 Test (org.junit.jupiter.api.Test)2