Search in sources :

Example 1 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class TestFilterApiMethods method testSerializable.

@Test
public void testSerializable() throws Exception {
    BinaryColumn binary = binaryColumn("foo");
    FilterPredicate p = and(or(and(userDefined(intColumn, DummyUdp.class), predicate), eq(binary, Binary.fromString("hi"))), userDefined(longColumn, new IsMultipleOf(7)));
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream oos = new ObjectOutputStream(baos);
    oos.writeObject(p);
    oos.close();
    ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
    FilterPredicate read = (FilterPredicate) is.readObject();
    assertEquals(p, read);
}
Also used : BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) ObjectInputStream(java.io.ObjectInputStream) Test(org.junit.Test)

Example 2 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class DictionaryFilterTest method testLtInt.

@Test
public void testLtInt() throws Exception {
    IntColumn i32 = intColumn("int32_field");
    int lowest = Integer.MAX_VALUE;
    for (int value : intValues) {
        lowest = Math.min(lowest, value);
    }
    assertTrue("Should drop: < lowest value", canDrop(lt(i32, lowest), ccmd, dictionaries));
    assertFalse("Should not drop: < (lowest value + 1)", canDrop(lt(i32, lowest + 1), ccmd, dictionaries));
    assertFalse("Should not drop: contains matching values", canDrop(lt(i32, Integer.MAX_VALUE), ccmd, dictionaries));
}
Also used : IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Example 3 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class DictionaryFilterTest method testInverseUdp.

@Test
public void testInverseUdp() throws Exception {
    InInt32UDP droppable = new InInt32UDP(ImmutableSet.of(42));
    InInt32UDP undroppable = new InInt32UDP(ImmutableSet.of(205));
    Set<Integer> allValues = ImmutableSet.copyOf(Arrays.asList(ArrayUtils.toObject(intValues)));
    InInt32UDP completeMatch = new InInt32UDP(allValues);
    FilterPredicate inverse = LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), droppable)));
    FilterPredicate inverse1 = LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), undroppable)));
    FilterPredicate inverse2 = LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), completeMatch)));
    assertFalse("Should not drop block for inverse of non-matching UDP", canDrop(inverse, ccmd, dictionaries));
    assertFalse("Should not drop block for inverse of UDP with some matches", canDrop(inverse1, ccmd, dictionaries));
    assertTrue("Should drop block for inverse of UDP with all matches", canDrop(inverse2, ccmd, dictionaries));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 4 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class DictionaryFilterTest method testUdpMissingColumn.

@Test
public void testUdpMissingColumn() throws Exception {
    InInt32UDP nullRejecting = new InInt32UDP(ImmutableSet.of(42));
    InInt32UDP nullAccepting = new InInt32UDP(Sets.newHashSet((Integer) null));
    IntColumn fake = intColumn("missing_column");
    assertTrue("Should drop block for null rejecting udp", canDrop(userDefined(fake, nullRejecting), ccmd, dictionaries));
    assertFalse("Should not drop block for null accepting udp", canDrop(userDefined(fake, nullAccepting), ccmd, dictionaries));
}
Also used : IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Example 5 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.

the class DictionaryFilterTest method testColumnWithoutDictionary.

@Test
public void testColumnWithoutDictionary() throws Exception {
    IntColumn plain = intColumn("plain_int32_field");
    DictionaryPageReadStore dictionaryStore = mock(DictionaryPageReadStore.class);
    assertFalse("Should never drop block using plain encoding", canDrop(eq(plain, -10), ccmd, dictionaryStore));
    assertFalse("Should never drop block using plain encoding", canDrop(lt(plain, -10), ccmd, dictionaryStore));
    assertFalse("Should never drop block using plain encoding", canDrop(ltEq(plain, -10), ccmd, dictionaryStore));
    assertFalse("Should never drop block using plain encoding", canDrop(gt(plain, nElements + 10), ccmd, dictionaryStore));
    assertFalse("Should never drop block using plain encoding", canDrop(gtEq(plain, nElements + 10), ccmd, dictionaryStore));
    assertFalse("Should never drop block using plain encoding", canDrop(notEq(plain, nElements + 10), ccmd, dictionaryStore));
    verifyZeroInteractions(dictionaryStore);
}
Also used : DictionaryPageReadStore(org.apache.parquet.column.page.DictionaryPageReadStore) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)14 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)13 IntColumn (org.apache.parquet.filter2.predicate.Operators.IntColumn)8 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)3 Configuration (org.apache.hadoop.conf.Configuration)2 DictionaryPageReadStore (org.apache.parquet.column.page.DictionaryPageReadStore)2 Operators (org.apache.parquet.filter2.predicate.Operators)2 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)2 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)2 PrimitiveType (org.apache.parquet.schema.PrimitiveType)2 Test (org.testng.annotations.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectInputStream (java.io.ObjectInputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 ArrayList (java.util.ArrayList)1 Job (org.apache.hadoop.mapreduce.Job)1 DoubleStatistics (org.apache.parquet.column.statistics.DoubleStatistics)1 RecordFilter (org.apache.parquet.filter.RecordFilter)1 UnboundRecordFilter (org.apache.parquet.filter.UnboundRecordFilter)1