use of org.apache.parquet.filter2.compat.FilterCompat.Filter in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsMoreThanOrEqualToFilter.
private FilterPredicate getIsMoreThanOrEqualToFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = gtEq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Double) {
tempFilter = gtEq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = gtEq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = gtEq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = gtEq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = gtEq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = gtEq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = gtEq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = gtEq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsMoreThanOrEqualTo filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.compat.FilterCompat.Filter in project parquet-mr by apache.
the class ParquetLoader method buildFilter.
private FilterPredicate buildFilter(Expression e) {
OpType op = e.getOpType();
if (e instanceof BinaryExpression) {
Expression lhs = ((BinaryExpression) e).getLhs();
Expression rhs = ((BinaryExpression) e).getRhs();
switch(op) {
case OP_AND:
return and(buildFilter(lhs), buildFilter(rhs));
case OP_OR:
return or(buildFilter(lhs), buildFilter(rhs));
case OP_BETWEEN:
BetweenExpression between = (BetweenExpression) rhs;
return and(buildFilter(OpType.OP_GE, (Column) lhs, (Const) between.getLower()), buildFilter(OpType.OP_LE, (Column) lhs, (Const) between.getUpper()));
case OP_IN:
FilterPredicate current = null;
for (Object value : ((InExpression) rhs).getValues()) {
FilterPredicate next = buildFilter(OpType.OP_EQ, (Column) lhs, (Const) value);
if (current != null) {
current = or(current, next);
} else {
current = next;
}
}
return current;
}
if (lhs instanceof Column && rhs instanceof Const) {
return buildFilter(op, (Column) lhs, (Const) rhs);
} else if (lhs instanceof Const && rhs instanceof Column) {
return buildFilter(op, (Column) rhs, (Const) lhs);
}
} else if (e instanceof UnaryExpression && op == OpType.OP_NOT) {
return LogicalInverseRewriter.rewrite(not(buildFilter(((UnaryExpression) e).getExpression())));
}
throw new RuntimeException("Could not build filter for expression: " + e);
}
use of org.apache.parquet.filter2.compat.FilterCompat.Filter in project parquet-mr by apache.
the class ParquetLoader method setPushdownPredicate.
@Override
public void setPushdownPredicate(Expression e) throws IOException {
LOG.info("Pig pushdown expression: {}", e);
FilterPredicate pred = buildFilter(e);
LOG.info("Parquet filter predicate expression: {}", pred);
storeInUDFContext(ParquetInputFormat.FILTER_PREDICATE, pred);
}
use of org.apache.parquet.filter2.compat.FilterCompat.Filter in project parquet-mr by apache.
the class MessageColumnIO method getRecordReader.
public <T> RecordReader<T> getRecordReader(final PageReadStore columns, final RecordMaterializer<T> recordMaterializer, final Filter filter) {
checkNotNull(columns, "columns");
checkNotNull(recordMaterializer, "recordMaterializer");
checkNotNull(filter, "filter");
if (leaves.isEmpty()) {
return new EmptyRecordReader<T>(recordMaterializer);
}
return filter.accept(new Visitor<RecordReader<T>>() {
@Override
public RecordReader<T> visit(FilterPredicateCompat filterPredicateCompat) {
FilterPredicate predicate = filterPredicateCompat.getFilterPredicate();
IncrementallyUpdatedFilterPredicateBuilder builder = new IncrementallyUpdatedFilterPredicateBuilder(leaves);
IncrementallyUpdatedFilterPredicate streamingPredicate = builder.build(predicate);
RecordMaterializer<T> filteringRecordMaterializer = new FilteringRecordMaterializer<T>(recordMaterializer, leaves, builder.getValueInspectorsByColumn(), streamingPredicate);
return new RecordReaderImplementation<T>(MessageColumnIO.this, filteringRecordMaterializer, validating, new ColumnReadStoreImpl(columns, filteringRecordMaterializer.getRootConverter(), getType(), createdBy));
}
@Override
public RecordReader<T> visit(UnboundRecordFilterCompat unboundRecordFilterCompat) {
return new FilteredRecordReader<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy), unboundRecordFilterCompat.getUnboundRecordFilter(), columns.getRowCount());
}
@Override
public RecordReader<T> visit(NoOpFilter noOpFilter) {
return new RecordReaderImplementation<T>(MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy));
}
});
}
use of org.apache.parquet.filter2.compat.FilterCompat.Filter in project parquet-mr by apache.
the class FilterCompat method get.
/**
* Given a FilterPredicate, return a Filter that wraps it.
* This method also logs the filter being used and rewrites
* the predicate to not include the not() operator.
*/
public static Filter get(FilterPredicate filterPredicate) {
checkNotNull(filterPredicate, "filterPredicate");
LOG.info("Filtering using predicate: {}", filterPredicate);
// rewrite the predicate to not include the not() operator
FilterPredicate collapsedPredicate = LogicalInverseRewriter.rewrite(filterPredicate);
if (!filterPredicate.equals(collapsedPredicate)) {
LOG.info("Predicate has been collapsed to: {}", collapsedPredicate);
}
return new FilterPredicateCompat(collapsedPredicate);
}
Aggregations