use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class TestRowGroupFilter method testApplyRowGroupFilters.
@Test
public void testApplyRowGroupFilters() {
List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
IntStatistics stats1 = new IntStatistics();
stats1.setMinMax(10, 100);
stats1.setNumNulls(4);
BlockMetaData b1 = makeBlockFromStats(stats1, 301);
blocks.add(b1);
IntStatistics stats2 = new IntStatistics();
stats2.setMinMax(8, 102);
stats2.setNumNulls(0);
BlockMetaData b2 = makeBlockFromStats(stats2, 302);
blocks.add(b2);
IntStatistics stats3 = new IntStatistics();
stats3.setMinMax(100, 102);
stats3.setNumNulls(12);
BlockMetaData b3 = makeBlockFromStats(stats3, 303);
blocks.add(b3);
IntStatistics stats4 = new IntStatistics();
stats4.setMinMax(0, 0);
stats4.setNumNulls(304);
BlockMetaData b4 = makeBlockFromStats(stats4, 304);
blocks.add(b4);
IntStatistics stats5 = new IntStatistics();
stats5.setMinMax(50, 50);
stats5.setNumNulls(7);
BlockMetaData b5 = makeBlockFromStats(stats5, 305);
blocks.add(b5);
IntStatistics stats6 = new IntStatistics();
stats6.setMinMax(0, 0);
stats6.setNumNulls(12);
BlockMetaData b6 = makeBlockFromStats(stats6, 306);
blocks.add(b6);
MessageType schema = MessageTypeParser.parseMessageType("message Document { optional int32 foo; }");
IntColumn foo = intColumn("foo");
List<BlockMetaData> filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, 50)), blocks, schema);
assertEquals(Arrays.asList(b1, b2, b5), filtered);
filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(notEq(foo, 50)), blocks, schema);
assertEquals(Arrays.asList(b1, b2, b3, b4, b5, b6), filtered);
filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, null)), blocks, schema);
assertEquals(Arrays.asList(b1, b3, b4, b5, b6), filtered);
filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(notEq(foo, null)), blocks, schema);
assertEquals(Arrays.asList(b1, b2, b3, b5, b6), filtered);
filtered = RowGroupFilter.filterRowGroups(FilterCompat.get(eq(foo, 0)), blocks, schema);
assertEquals(Arrays.asList(b6), filtered);
}
use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class DictionaryFilterTest method testInverseUdpMissingColumn.
@Test
public void testInverseUdpMissingColumn() throws Exception {
InInt32UDP nullRejecting = new InInt32UDP(ImmutableSet.of(42));
InInt32UDP nullAccepting = new InInt32UDP(Sets.newHashSet((Integer) null));
IntColumn fake = intColumn("missing_column");
assertTrue("Should drop block for null accepting udp", canDrop(LogicalInverseRewriter.rewrite(not(userDefined(fake, nullAccepting))), ccmd, dictionaries));
assertFalse("Should not drop block for null rejecting udp", canDrop(LogicalInverseRewriter.rewrite(not(userDefined(fake, nullRejecting))), ccmd, dictionaries));
}
use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class DictionaryFilterTest method testColumnWithDictionaryAndPlainEncodings.
@Test
public void testColumnWithDictionaryAndPlainEncodings() throws Exception {
IntColumn plain = intColumn("fallback_binary_field");
DictionaryPageReadStore dictionaryStore = mock(DictionaryPageReadStore.class);
assertFalse("Should never drop block using plain encoding", canDrop(eq(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(lt(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(ltEq(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(gt(plain, nElements + 10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(gtEq(plain, nElements + 10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(notEq(plain, nElements + 10), ccmd, dictionaryStore));
verifyZeroInteractions(dictionaryStore);
}
use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project parquet-mr by apache.
the class TestStatisticsFilter method testOr.
@Test
public void testOr() {
FilterPredicate yes = eq(intColumn, 9);
FilterPredicate no = eq(doubleColumn, 50D);
assertTrue(canDrop(or(yes, yes), columnMetas));
assertFalse(canDrop(or(yes, no), columnMetas));
assertFalse(canDrop(or(no, yes), columnMetas));
assertFalse(canDrop(or(no, no), columnMetas));
}
Aggregations