Search in sources :

Example 11 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.

the class TestIncrementallyUpdatedFilterPredicateResetter method testReset.

@Test
public void testReset() {
    ValueInspector intIsNull = intIsNull();
    ValueInspector intIsEven = intIsEven();
    ValueInspector doubleMoreThan10 = doubleMoreThan10();
    IncrementallyUpdatedFilterPredicate pred = new Or(intIsNull, new And(intIsEven, doubleMoreThan10));
    intIsNull.updateNull();
    intIsEven.update(11);
    doubleMoreThan10.update(20.0D);
    assertTrue(intIsNull.isKnown());
    assertTrue(intIsEven.isKnown());
    assertTrue(doubleMoreThan10.isKnown());
    IncrementallyUpdatedFilterPredicateResetter.reset(pred);
    assertFalse(intIsNull.isKnown());
    assertFalse(intIsEven.isKnown());
    assertFalse(doubleMoreThan10.isKnown());
    intIsNull.updateNull();
    assertTrue(intIsNull.isKnown());
    assertFalse(intIsEven.isKnown());
    assertFalse(doubleMoreThan10.isKnown());
    IncrementallyUpdatedFilterPredicateResetter.reset(pred);
    assertFalse(intIsNull.isKnown());
    assertFalse(intIsEven.isKnown());
    assertFalse(doubleMoreThan10.isKnown());
}
Also used : ValueInspector(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector) Or(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or) And(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And) Test(org.junit.Test)

Example 12 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.

the class TestFilterApiMethods method testSerializable.

@Test
public void testSerializable() throws Exception {
    BinaryColumn binary = binaryColumn("foo");
    FilterPredicate p = and(or(and(userDefined(intColumn, DummyUdp.class), predicate), eq(binary, Binary.fromString("hi"))), userDefined(longColumn, new IsMultipleOf(7)));
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream oos = new ObjectOutputStream(baos);
    oos.writeObject(p);
    oos.close();
    ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
    FilterPredicate read = (FilterPredicate) is.readObject();
    assertEquals(p, read);
}
Also used : BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) ObjectInputStream(java.io.ObjectInputStream) Test(org.junit.Test)

Example 13 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.

the class DictionaryFilterTest method testOr.

@Test
public void testOr() throws Exception {
    BinaryColumn col = binaryColumn("binary_field");
    // both evaluate to false (no upper-case letters are in the dictionary)
    FilterPredicate B = eq(col, Binary.fromString("B"));
    FilterPredicate C = eq(col, Binary.fromString("C"));
    // both evaluate to true (all lower-case letters are in the dictionary)
    FilterPredicate x = eq(col, Binary.fromString("x"));
    FilterPredicate y = eq(col, Binary.fromString("y"));
    assertFalse("Should not drop when one predicate could be true", canDrop(or(B, y), ccmd, dictionaries));
    assertFalse("Should not drop when one predicate could be true", canDrop(or(x, C), ccmd, dictionaries));
    assertTrue("Should drop when both predicates must be false", canDrop(or(B, C), ccmd, dictionaries));
    assertFalse("Should not drop when one predicate could be true", canDrop(or(x, y), ccmd, dictionaries));
}
Also used : BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 14 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.

the class TestInputFormat method testGetFilter.

@Test
public void testGetFilter() throws IOException {
    IntColumn intColumn = intColumn("foo");
    FilterPredicate p = or(eq(intColumn, 7), eq(intColumn, 12));
    Configuration conf = new Configuration();
    ParquetInputFormat.setFilterPredicate(conf, p);
    Filter read = ParquetInputFormat.getFilter(conf);
    assertTrue(read instanceof FilterPredicateCompat);
    assertEquals(p, ((FilterPredicateCompat) read).getFilterPredicate());
    conf = new Configuration();
    ParquetInputFormat.setFilterPredicate(conf, not(p));
    read = ParquetInputFormat.getFilter(conf);
    assertTrue(read instanceof FilterPredicateCompat);
    assertEquals(and(notEq(intColumn, 7), notEq(intColumn, 12)), ((FilterPredicateCompat) read).getFilterPredicate());
    assertEquals(FilterCompat.NOOP, ParquetInputFormat.getFilter(new Configuration()));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Filter(org.apache.parquet.filter2.compat.FilterCompat.Filter) RecordFilter(org.apache.parquet.filter.RecordFilter) UnboundRecordFilter(org.apache.parquet.filter.UnboundRecordFilter) FilterPredicateCompat(org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Example 15 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.

the class TestInputFormat method testOnlyOneKindOfFilterSupported.

@Test
public void testOnlyOneKindOfFilterSupported() throws Exception {
    IntColumn foo = intColumn("foo");
    FilterPredicate p = or(eq(foo, 10), eq(foo, 11));
    Job job = new Job();
    Configuration conf = job.getConfiguration();
    ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
    try {
        ParquetInputFormat.setFilterPredicate(conf, p);
        fail("this should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("You cannot provide a FilterPredicate after providing an UnboundRecordFilter", e.getMessage());
    }
    job = new Job();
    conf = job.getConfiguration();
    ParquetInputFormat.setFilterPredicate(conf, p);
    try {
        ParquetInputFormat.setUnboundRecordFilter(job, DummyUnboundRecordFilter.class);
        fail("this should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("You cannot provide an UnboundRecordFilter after providing a FilterPredicate", e.getMessage());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Job(org.apache.hadoop.mapreduce.Job) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)17 Test (org.junit.Test)17 MessageType (org.apache.parquet.schema.MessageType)9 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)4 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)3 Configuration (org.apache.hadoop.conf.Configuration)2 IntColumn (org.apache.parquet.filter2.predicate.Operators.IntColumn)2 And (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And)2 Or (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or)2 ValueInspector (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectInputStream (java.io.ObjectInputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 ArrayList (java.util.ArrayList)1 Path (org.apache.hadoop.fs.Path)1 DataWritableReadSupport (org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport)1 FileSplit (org.apache.hadoop.mapred.FileSplit)1 Job (org.apache.hadoop.mapreduce.Job)1 Group (org.apache.parquet.example.data.Group)1