use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterCharColumnLessThanEquals.
@Test
public void testFilterCharColumnLessThanEquals() throws Exception {
SearchArgument sarg = SearchArgumentFactory.newBuilder().lessThanEquals("a", PredicateLeaf.Type.STRING, new HiveChar("apple", 10).toString()).build();
MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
Map<String, TypeInfo> columnTypes = new HashMap<>();
columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
String expected = "lteq(a, Binary{\"apple\"})";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class TestParquetFilterPredicate method testFilterMoreComplexCharColumn.
@Test
public void testFilterMoreComplexCharColumn() throws Exception {
// ((a=pear or a<=cherry) and (b=orange)) and (c=banana or d<cherry)
SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startAnd().startOr().equals("a", PredicateLeaf.Type.STRING, new HiveChar("pear", 10).toString()).lessThanEquals("a", PredicateLeaf.Type.STRING, new HiveChar("cherry", 10).toString()).end().equals("b", PredicateLeaf.Type.STRING, new HiveChar("orange", 10).toString()).end().startOr().equals("c", PredicateLeaf.Type.STRING, new HiveChar("banana", 10).toString()).lessThan("d", PredicateLeaf.Type.STRING, new HiveChar("cherry", 10).toString()).end().end().build();
MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required binary a; required binary b;" + " required binary c; required binary d;}");
Map<String, TypeInfo> columnTypes = new HashMap<>();
columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
columnTypes.put("b", TypeInfoFactory.getCharTypeInfo(10));
columnTypes.put("c", TypeInfoFactory.getCharTypeInfo(10));
columnTypes.put("d", TypeInfoFactory.getCharTypeInfo(10));
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
String expected = "and(and(or(" + "eq(a, Binary{\"pear\"}), " + "lteq(a, Binary{\"cherry\"})), " + "eq(b, Binary{\"orange\"})), " + "or(eq(c, Binary{\"banana\"}), lt(d, Binary{\"cherry\"})))";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class ParquetLoader method buildFilter.
private FilterPredicate buildFilter(Expression e) {
OpType op = e.getOpType();
if (e instanceof BinaryExpression) {
Expression lhs = ((BinaryExpression) e).getLhs();
Expression rhs = ((BinaryExpression) e).getRhs();
switch(op) {
case OP_AND:
return and(buildFilter(lhs), buildFilter(rhs));
case OP_OR:
return or(buildFilter(lhs), buildFilter(rhs));
case OP_BETWEEN:
BetweenExpression between = (BetweenExpression) rhs;
return and(buildFilter(OpType.OP_GE, (Column) lhs, (Const) between.getLower()), buildFilter(OpType.OP_LE, (Column) lhs, (Const) between.getUpper()));
case OP_IN:
FilterPredicate current = null;
for (Object value : ((InExpression) rhs).getValues()) {
FilterPredicate next = buildFilter(OpType.OP_EQ, (Column) lhs, (Const) value);
if (current != null) {
current = or(current, next);
} else {
current = next;
}
}
return current;
}
if (lhs instanceof Column && rhs instanceof Const) {
return buildFilter(op, (Column) lhs, (Const) rhs);
} else if (lhs instanceof Const && rhs instanceof Column) {
return buildFilter(op, (Column) rhs, (Const) lhs);
}
} else if (e instanceof UnaryExpression && op == OpType.OP_NOT) {
return LogicalInverseRewriter.rewrite(not(buildFilter(((UnaryExpression) e).getExpression())));
}
throw new RuntimeException("Could not build filter for expression: " + e);
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class ParquetLoader method setPushdownPredicate.
@Override
public void setPushdownPredicate(Expression e) throws IOException {
LOG.info("Pig pushdown expression: {}", e);
FilterPredicate pred = buildFilter(e);
LOG.info("Parquet filter predicate expression: {}", pred);
storeInUDFContext(ParquetInputFormat.FILTER_PREDICATE, pred);
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class MessageColumnIO method getRecordReader.
public <T> RecordReader<T> getRecordReader(final PageReadStore columns, final RecordMaterializer<T> recordMaterializer, final Filter filter) {
checkNotNull(columns, "columns");
checkNotNull(recordMaterializer, "recordMaterializer");
checkNotNull(filter, "filter");
if (leaves.isEmpty()) {
return new EmptyRecordReader<T>(recordMaterializer);
}
return filter.accept(new Visitor<RecordReader<T>>() {
@Override
public RecordReader<T> visit(FilterPredicateCompat filterPredicateCompat) {
FilterPredicate predicate = filterPredicateCompat.getFilterPredicate();
IncrementallyUpdatedFilterPredicateBuilder builder = new IncrementallyUpdatedFilterPredicateBuilder(leaves);
IncrementallyUpdatedFilterPredicate streamingPredicate = builder.build(predicate);
RecordMaterializer<T> filteringRecordMaterializer = new FilteringRecordMaterializer<T>(recordMaterializer, leaves, builder.getValueInspectorsByColumn(), streamingPredicate);
return new RecordReaderImplementation<T>(MessageColumnIO.this, filteringRecordMaterializer, validating, new ColumnReadStoreImpl(columns, filteringRecordMaterializer.getRootConverter(), getType(), createdBy));
}
@Override
public RecordReader<T> visit(UnboundRecordFilterCompat unboundRecordFilterCompat) {
return new FilteredRecordReader<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy), unboundRecordFilterCompat.getUnboundRecordFilter(), columns.getRowCount());
}
@Override
public RecordReader<T> visit(NoOpFilter noOpFilter) {
return new RecordReaderImplementation<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy));
}
});
}
Aggregations