use of org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat in project parquet-mr by apache.
the class RowGroupFilter method visit.
@Override
public List<BlockMetaData> visit(FilterCompat.FilterPredicateCompat filterPredicateCompat) {
FilterPredicate filterPredicate = filterPredicateCompat.getFilterPredicate();
// check that the schema of the filter matches the schema of the file
SchemaCompatibilityValidator.validate(filterPredicate, schema);
List<BlockMetaData> filteredBlocks = new ArrayList<BlockMetaData>();
for (BlockMetaData block : blocks) {
boolean drop = false;
if (levels.contains(FilterLevel.STATISTICS)) {
drop = StatisticsFilter.canDrop(filterPredicate, block.getColumns());
}
if (!drop && levels.contains(FilterLevel.DICTIONARY)) {
drop = DictionaryFilter.canDrop(filterPredicate, block.getColumns(), reader.getDictionaryReader(block));
}
if (!drop) {
filteredBlocks.add(block);
}
}
return filteredBlocks;
}
use of org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat in project parquet-mr by apache.
the class TestInputFormat method testGetFilter.
@Test
public void testGetFilter() throws IOException {
IntColumn intColumn = intColumn("foo");
FilterPredicate p = or(eq(intColumn, 7), eq(intColumn, 12));
Configuration conf = new Configuration();
ParquetInputFormat.setFilterPredicate(conf, p);
Filter read = ParquetInputFormat.getFilter(conf);
assertTrue(read instanceof FilterPredicateCompat);
assertEquals(p, ((FilterPredicateCompat) read).getFilterPredicate());
conf = new Configuration();
ParquetInputFormat.setFilterPredicate(conf, not(p));
read = ParquetInputFormat.getFilter(conf);
assertTrue(read instanceof FilterPredicateCompat);
assertEquals(and(notEq(intColumn, 7), notEq(intColumn, 12)), ((FilterPredicateCompat) read).getFilterPredicate());
assertEquals(FilterCompat.NOOP, ParquetInputFormat.getFilter(new Configuration()));
}
use of org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat in project parquet-mr by apache.
the class MessageColumnIO method getRecordReader.
public <T> RecordReader<T> getRecordReader(final PageReadStore columns, final RecordMaterializer<T> recordMaterializer, final Filter filter) {
checkNotNull(columns, "columns");
checkNotNull(recordMaterializer, "recordMaterializer");
checkNotNull(filter, "filter");
if (leaves.isEmpty()) {
return new EmptyRecordReader<T>(recordMaterializer);
}
return filter.accept(new Visitor<RecordReader<T>>() {
@Override
public RecordReader<T> visit(FilterPredicateCompat filterPredicateCompat) {
FilterPredicate predicate = filterPredicateCompat.getFilterPredicate();
IncrementallyUpdatedFilterPredicateBuilder builder = new IncrementallyUpdatedFilterPredicateBuilder(leaves);
IncrementallyUpdatedFilterPredicate streamingPredicate = builder.build(predicate);
RecordMaterializer<T> filteringRecordMaterializer = new FilteringRecordMaterializer<T>(recordMaterializer, leaves, builder.getValueInspectorsByColumn(), streamingPredicate);
return new RecordReaderImplementation<T>(MessageColumnIO.this, filteringRecordMaterializer, validating, new ColumnReadStoreImpl(columns, filteringRecordMaterializer.getRootConverter(), getType(), createdBy));
}
@Override
public RecordReader<T> visit(UnboundRecordFilterCompat unboundRecordFilterCompat) {
return new FilteredRecordReader<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy), unboundRecordFilterCompat.getUnboundRecordFilter(), columns.getRowCount());
}
@Override
public RecordReader<T> visit(NoOpFilter noOpFilter) {
return new RecordReaderImplementation<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy));
}
});
}
use of org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat in project parquet-mr by apache.
the class FilterCompat method get.
/**
* Given a FilterPredicate, return a Filter that wraps it.
* This method also logs the filter being used and rewrites
* the predicate to not include the not() operator.
*/
public static Filter get(FilterPredicate filterPredicate) {
checkNotNull(filterPredicate, "filterPredicate");
LOG.info("Filtering using predicate: {}", filterPredicate);
// rewrite the predicate to not include the not() operator
FilterPredicate collapsedPredicate = LogicalInverseRewriter.rewrite(filterPredicate);
if (!filterPredicate.equals(collapsedPredicate)) {
LOG.info("Predicate has been collapsed to: {}", collapsedPredicate);
}
return new FilterPredicateCompat(collapsedPredicate);
}
Aggregations