Search in sources :

Example 1 with FilterPredicateCompat

use of org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat in project parquet-mr by apache.

the class RowGroupFilter method visit.

@Override
public List<BlockMetaData> visit(FilterCompat.FilterPredicateCompat filterPredicateCompat) {
    FilterPredicate filterPredicate = filterPredicateCompat.getFilterPredicate();
    // check that the schema of the filter matches the schema of the file
    SchemaCompatibilityValidator.validate(filterPredicate, schema);
    List<BlockMetaData> filteredBlocks = new ArrayList<BlockMetaData>();
    for (BlockMetaData block : blocks) {
        boolean drop = false;
        if (levels.contains(FilterLevel.STATISTICS)) {
            drop = StatisticsFilter.canDrop(filterPredicate, block.getColumns());
        }
        if (!drop && levels.contains(FilterLevel.DICTIONARY)) {
            drop = DictionaryFilter.canDrop(filterPredicate, block.getColumns(), reader.getDictionaryReader(block));
        }
        if (!drop) {
            filteredBlocks.add(block);
        }
    }
    return filteredBlocks;
}
Also used : BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ArrayList(java.util.ArrayList) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 2 with FilterPredicateCompat

use of org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat in project parquet-mr by apache.

the class TestInputFormat method testGetFilter.

@Test
public void testGetFilter() throws IOException {
    IntColumn intColumn = intColumn("foo");
    FilterPredicate p = or(eq(intColumn, 7), eq(intColumn, 12));
    Configuration conf = new Configuration();
    ParquetInputFormat.setFilterPredicate(conf, p);
    Filter read = ParquetInputFormat.getFilter(conf);
    assertTrue(read instanceof FilterPredicateCompat);
    assertEquals(p, ((FilterPredicateCompat) read).getFilterPredicate());
    conf = new Configuration();
    ParquetInputFormat.setFilterPredicate(conf, not(p));
    read = ParquetInputFormat.getFilter(conf);
    assertTrue(read instanceof FilterPredicateCompat);
    assertEquals(and(notEq(intColumn, 7), notEq(intColumn, 12)), ((FilterPredicateCompat) read).getFilterPredicate());
    assertEquals(FilterCompat.NOOP, ParquetInputFormat.getFilter(new Configuration()));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Filter(org.apache.parquet.filter2.compat.FilterCompat.Filter) RecordFilter(org.apache.parquet.filter.RecordFilter) UnboundRecordFilter(org.apache.parquet.filter.UnboundRecordFilter) FilterPredicateCompat(org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) Test(org.junit.Test)

Example 3 with FilterPredicateCompat

use of org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat in project parquet-mr by apache.

the class MessageColumnIO method getRecordReader.

public <T> RecordReader<T> getRecordReader(final PageReadStore columns, final RecordMaterializer<T> recordMaterializer, final Filter filter) {
    checkNotNull(columns, "columns");
    checkNotNull(recordMaterializer, "recordMaterializer");
    checkNotNull(filter, "filter");
    if (leaves.isEmpty()) {
        return new EmptyRecordReader<T>(recordMaterializer);
    }
    return filter.accept(new Visitor<RecordReader<T>>() {

        @Override
        public RecordReader<T> visit(FilterPredicateCompat filterPredicateCompat) {
            FilterPredicate predicate = filterPredicateCompat.getFilterPredicate();
            IncrementallyUpdatedFilterPredicateBuilder builder = new IncrementallyUpdatedFilterPredicateBuilder(leaves);
            IncrementallyUpdatedFilterPredicate streamingPredicate = builder.build(predicate);
            RecordMaterializer<T> filteringRecordMaterializer = new FilteringRecordMaterializer<T>(recordMaterializer, leaves, builder.getValueInspectorsByColumn(), streamingPredicate);
            return new RecordReaderImplementation<T>(MessageColumnIO.this, filteringRecordMaterializer, validating, new ColumnReadStoreImpl(columns, filteringRecordMaterializer.getRootConverter(), getType(), createdBy));
        }

        @Override
        public RecordReader<T> visit(UnboundRecordFilterCompat unboundRecordFilterCompat) {
            return new FilteredRecordReader<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy), unboundRecordFilterCompat.getUnboundRecordFilter(), columns.getRowCount());
        }

        @Override
        public RecordReader<T> visit(NoOpFilter noOpFilter) {
            return new RecordReaderImplementation<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy));
        }
    });
}
Also used : ColumnReadStoreImpl(org.apache.parquet.column.impl.ColumnReadStoreImpl) NoOpFilter(org.apache.parquet.filter2.compat.FilterCompat.NoOpFilter) FilteringRecordMaterializer(org.apache.parquet.filter2.recordlevel.FilteringRecordMaterializer) RecordMaterializer(org.apache.parquet.io.api.RecordMaterializer) FilterPredicateCompat(org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat) IncrementallyUpdatedFilterPredicateBuilder(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicateBuilder) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) IncrementallyUpdatedFilterPredicate(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate) UnboundRecordFilterCompat(org.apache.parquet.filter2.compat.FilterCompat.UnboundRecordFilterCompat) IncrementallyUpdatedFilterPredicate(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate)

Example 4 with FilterPredicateCompat

use of org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat in project parquet-mr by apache.

the class FilterCompat method get.

/**
 * Given a FilterPredicate, return a Filter that wraps it.
 * This method also logs the filter being used and rewrites
 * the predicate to not include the not() operator.
 */
public static Filter get(FilterPredicate filterPredicate) {
    checkNotNull(filterPredicate, "filterPredicate");
    LOG.info("Filtering using predicate: {}", filterPredicate);
    // rewrite the predicate to not include the not() operator
    FilterPredicate collapsedPredicate = LogicalInverseRewriter.rewrite(filterPredicate);
    if (!filterPredicate.equals(collapsedPredicate)) {
        LOG.info("Predicate has been collapsed to: {}", collapsedPredicate);
    }
    return new FilterPredicateCompat(collapsedPredicate);
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)4 FilterPredicateCompat (org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat)2 ArrayList (java.util.ArrayList)1 Configuration (org.apache.hadoop.conf.Configuration)1 ColumnReadStoreImpl (org.apache.parquet.column.impl.ColumnReadStoreImpl)1 RecordFilter (org.apache.parquet.filter.RecordFilter)1 UnboundRecordFilter (org.apache.parquet.filter.UnboundRecordFilter)1 Filter (org.apache.parquet.filter2.compat.FilterCompat.Filter)1 NoOpFilter (org.apache.parquet.filter2.compat.FilterCompat.NoOpFilter)1 UnboundRecordFilterCompat (org.apache.parquet.filter2.compat.FilterCompat.UnboundRecordFilterCompat)1 IntColumn (org.apache.parquet.filter2.predicate.Operators.IntColumn)1 FilteringRecordMaterializer (org.apache.parquet.filter2.recordlevel.FilteringRecordMaterializer)1 IncrementallyUpdatedFilterPredicate (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate)1 IncrementallyUpdatedFilterPredicateBuilder (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicateBuilder)1 BlockMetaData (org.apache.parquet.hadoop.metadata.BlockMetaData)1 RecordMaterializer (org.apache.parquet.io.api.RecordMaterializer)1 Test (org.junit.Test)1