Search in sources :

Example 16 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class DictionaryFilterTest method testOr.

@Test
public void testOr() throws Exception {
    BinaryColumn col = binaryColumn("binary_field");
    // both evaluate to false (no upper-case letters are in the dictionary)
    FilterPredicate B = eq(col, Binary.fromString("B"));
    FilterPredicate C = eq(col, Binary.fromString("C"));
    // both evaluate to true (all lower-case letters are in the dictionary)
    FilterPredicate x = eq(col, Binary.fromString("x"));
    FilterPredicate y = eq(col, Binary.fromString("y"));
    assertFalse("Should not drop when one predicate could be true", canDrop(or(B, y), ccmd, dictionaries));
    assertFalse("Should not drop when one predicate could be true", canDrop(or(x, C), ccmd, dictionaries));
    assertTrue("Should drop when both predicates must be false", canDrop(or(B, C), ccmd, dictionaries));
    assertFalse("Should not drop when one predicate could be true", canDrop(or(x, y), ccmd, dictionaries));
}
Also used : BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 17 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestInputFormat method testGetFilter.

@Test
public void testGetFilter() throws IOException {
    IntColumn intColumn = intColumn("foo");
    FilterPredicate p = or(eq(intColumn, 7), eq(intColumn, 12));
    Configuration conf = new Configuration();
    ParquetInputFormat.setFilterPredicate(conf, p);
    Filter read = ParquetInputFormat.getFilter(conf);
    assertTrue(read instanceof FilterPredicateCompat);
    assertEquals(p, ((FilterPredicateCompat) read).getFilterPredicate());
    conf = new Configuration();
    ParquetInputFormat.setFilterPredicate(conf, not(p));
    read = ParquetInputFormat.getFilter(conf);
    assertTrue(read instanceof FilterPredicateCompat);
    assertEquals(and(notEq(intColumn, 7), notEq(intColumn, 12)), ((FilterPredicateCompat) read).getFilterPredicate());
    assertEquals(FilterCompat.NOOP, ParquetInputFormat.getFilter(new Configuration()));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Filter(org.apache.parquet.filter2.compat.FilterCompat.Filter) RecordFilter(org.apache.parquet.filter.RecordFilter) UnboundRecordFilter(org.apache.parquet.filter.UnboundRecordFilter) FilterPredicateCompat(org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Example 18 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestInputFormat method testOnlyOneKindOfFilterSupported.

@Test
public void testOnlyOneKindOfFilterSupported() throws Exception {
    IntColumn foo = intColumn("foo");
    FilterPredicate p = or(eq(foo, 10), eq(foo, 11));
    Job job = new Job();
    Configuration conf = job.getConfiguration();
    ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
    try {
        ParquetInputFormat.setFilterPredicate(conf, p);
        fail("this should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("You cannot provide a FilterPredicate after providing an UnboundRecordFilter", e.getMessage());
    }
    job = new Job();
    conf = job.getConfiguration();
    ParquetInputFormat.setFilterPredicate(conf, p);
    try {
        ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
        fail("this should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("You cannot provide an UnboundRecordFilter after providing a FilterPredicate", e.getMessage());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Job(org.apache.hadoop.mapreduce.Job) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Example 19 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestRecordLevelFilters method testAllFilter.

@Test
public void testAllFilter() throws Exception {
    BinaryColumn name = binaryColumn("name");
    FilterPredicate pred = eq(name, Binary.fromString("no matches"));
    List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
    assertEquals(new ArrayList<Group>(), found);
}
Also used : Group(org.apache.parquet.example.data.Group) BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 20 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestStatisticsFilter method testAnd.

@Test
public void testAnd() {
    FilterPredicate yes = eq(intColumn, 9);
    FilterPredicate no = eq(doubleColumn, 50D);
    assertTrue(canDrop(and(yes, yes), columnMetas));
    assertTrue(canDrop(and(yes, no), columnMetas));
    assertTrue(canDrop(and(no, yes), columnMetas));
    assertFalse(canDrop(and(no, no), columnMetas));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)57 Test (org.junit.Test)33 MessageType (org.apache.parquet.schema.MessageType)15 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)8 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)8 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Group (org.apache.parquet.example.data.Group)5 Configuration (org.apache.hadoop.conf.Configuration)4 User (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User)4 TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)4 Predicate (java.util.function.Predicate)3 Path (org.apache.hadoop.fs.Path)3 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)3 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)2 IntColumn (org.apache.parquet.filter2.predicate.Operators.IntColumn)2 Test (org.junit.jupiter.api.Test)2