Search in sources :

Example 46 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterCharColumnLessThanEquals.

@Test
public void testFilterCharColumnLessThanEquals() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().lessThanEquals("a", PredicateLeaf.Type.STRING, new HiveChar("apple", 10).toString()).build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "lteq(a, Binary{\"apple\"})";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 47 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.

the class TestParquetFilterPredicate method testFilterMoreComplexCharColumn.

@Test
public void testFilterMoreComplexCharColumn() throws Exception {
    // ((a=pear or a<=cherry) and (b=orange)) and (c=banana or d<cherry)
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startAnd().startOr().equals("a", PredicateLeaf.Type.STRING, new HiveChar("pear", 10).toString()).lessThanEquals("a", PredicateLeaf.Type.STRING, new HiveChar("cherry", 10).toString()).end().equals("b", PredicateLeaf.Type.STRING, new HiveChar("orange", 10).toString()).end().startOr().equals("c", PredicateLeaf.Type.STRING, new HiveChar("banana", 10).toString()).lessThan("d", PredicateLeaf.Type.STRING, new HiveChar("cherry", 10).toString()).end().end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required binary a; required binary b;" + " required binary c; required binary d;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("b", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("c", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("d", TypeInfoFactory.getCharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(or(" + "eq(a, Binary{\"pear\"}), " + "lteq(a, Binary{\"cherry\"})), " + "eq(b, Binary{\"orange\"})), " + "or(eq(c, Binary{\"banana\"}), lt(d, Binary{\"cherry\"})))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 48 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class ParquetLoader method buildFilter.

private FilterPredicate buildFilter(Expression e) {
    OpType op = e.getOpType();
    if (e instanceof BinaryExpression) {
        Expression lhs = ((BinaryExpression) e).getLhs();
        Expression rhs = ((BinaryExpression) e).getRhs();
        switch(op) {
            case OP_AND:
                return and(buildFilter(lhs), buildFilter(rhs));
            case OP_OR:
                return or(buildFilter(lhs), buildFilter(rhs));
            case OP_BETWEEN:
                BetweenExpression between = (BetweenExpression) rhs;
                return and(buildFilter(OpType.OP_GE, (Column) lhs, (Const) between.getLower()), buildFilter(OpType.OP_LE, (Column) lhs, (Const) between.getUpper()));
            case OP_IN:
                FilterPredicate current = null;
                for (Object value : ((InExpression) rhs).getValues()) {
                    FilterPredicate next = buildFilter(OpType.OP_EQ, (Column) lhs, (Const) value);
                    if (current != null) {
                        current = or(current, next);
                    } else {
                        current = next;
                    }
                }
                return current;
        }
        if (lhs instanceof Column && rhs instanceof Const) {
            return buildFilter(op, (Column) lhs, (Const) rhs);
        } else if (lhs instanceof Const && rhs instanceof Column) {
            return buildFilter(op, (Column) rhs, (Const) lhs);
        }
    } else if (e instanceof UnaryExpression && op == OpType.OP_NOT) {
        return LogicalInverseRewriter.rewrite(not(buildFilter(((UnaryExpression) e).getExpression())));
    }
    throw new RuntimeException("Could not build filter for expression: " + e);
}
Also used : BinaryExpression(org.apache.pig.Expression.BinaryExpression) InExpression(org.apache.pig.Expression.InExpression) UnaryExpression(org.apache.pig.Expression.UnaryExpression) BinaryExpression(org.apache.pig.Expression.BinaryExpression) Expression(org.apache.pig.Expression) BetweenExpression(org.apache.pig.Expression.BetweenExpression) Column(org.apache.pig.Expression.Column) BetweenExpression(org.apache.pig.Expression.BetweenExpression) Const(org.apache.pig.Expression.Const) InExpression(org.apache.pig.Expression.InExpression) OpType(org.apache.pig.Expression.OpType) UnaryExpression(org.apache.pig.Expression.UnaryExpression) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 49 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class ParquetLoader method setPushdownPredicate.

@Override
public void setPushdownPredicate(Expression e) throws IOException {
    LOG.info("Pig pushdown expression: {}", e);
    FilterPredicate pred = buildFilter(e);
    LOG.info("Parquet filter predicate expression: {}", pred);
    storeInUDFContext(ParquetInputFormat.FILTER_PREDICATE, pred);
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 50 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class MessageColumnIO method getRecordReader.

public <T> RecordReader<T> getRecordReader(final PageReadStore columns, final RecordMaterializer<T> recordMaterializer, final Filter filter) {
    checkNotNull(columns, "columns");
    checkNotNull(recordMaterializer, "recordMaterializer");
    checkNotNull(filter, "filter");
    if (leaves.isEmpty()) {
        return new EmptyRecordReader<T>(recordMaterializer);
    }
    return filter.accept(new Visitor<RecordReader<T>>() {

        @Override
        public RecordReader<T> visit(FilterPredicateCompat filterPredicateCompat) {
            FilterPredicate predicate = filterPredicateCompat.getFilterPredicate();
            IncrementallyUpdatedFilterPredicateBuilder builder = new IncrementallyUpdatedFilterPredicateBuilder(leaves);
            IncrementallyUpdatedFilterPredicate streamingPredicate = builder.build(predicate);
            RecordMaterializer<T> filteringRecordMaterializer = new FilteringRecordMaterializer<T>(recordMaterializer, leaves, builder.getValueInspectorsByColumn(), streamingPredicate);
            return new RecordReaderImplementation<T>(MessageColumnIO.this, filteringRecordMaterializer, validating, new ColumnReadStoreImpl(columns, filteringRecordMaterializer.getRootConverter(), getType(), createdBy));
        }

        @Override
        public RecordReader<T> visit(UnboundRecordFilterCompat unboundRecordFilterCompat) {
            return new FilteredRecordReader<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy), unboundRecordFilterCompat.getUnboundRecordFilter(), columns.getRowCount());
        }

        @Override
        public RecordReader<T> visit(NoOpFilter noOpFilter) {
            return new RecordReaderImplementation<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy));
        }
    });
}
Also used : ColumnReadStoreImpl(org.apache.parquet.column.impl.ColumnReadStoreImpl) NoOpFilter(org.apache.parquet.filter2.compat.FilterCompat.NoOpFilter) FilteringRecordMaterializer(org.apache.parquet.filter2.recordlevel.FilteringRecordMaterializer) RecordMaterializer(org.apache.parquet.io.api.RecordMaterializer) FilterPredicateCompat(org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat) IncrementallyUpdatedFilterPredicateBuilder(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicateBuilder) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) IncrementallyUpdatedFilterPredicate(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate) UnboundRecordFilterCompat(org.apache.parquet.filter2.compat.FilterCompat.UnboundRecordFilterCompat) IncrementallyUpdatedFilterPredicate(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)76 Test (org.junit.Test)50 HashMap (java.util.HashMap)33 MessageType (org.apache.parquet.schema.MessageType)33 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)32 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)25 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)12 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)8 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Group (org.apache.parquet.example.data.Group)5 Configuration (org.apache.hadoop.conf.Configuration)4 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)4 User (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User)4 Predicate (java.util.function.Predicate)3 Path (org.apache.hadoop.fs.Path)3 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)3 TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)3 HashSet (java.util.HashSet)2 GenericRecord (org.apache.avro.generic.GenericRecord)2