use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsEqualFilter.
public FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Boolean) {
tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Double) {
tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsLessThanOrEqualToFilter.
private FilterPredicate getIsLessThanOrEqualToFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = ltEq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Double) {
tempFilter = ltEq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = ltEq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = ltEq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = ltEq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = ltEq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = ltEq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = ltEq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = ltEq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsLessThanOrEqualTo filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsMoreThanFilter.
private FilterPredicate getIsMoreThanFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = gt(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Double) {
tempFilter = gt(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = gt(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = gt(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = gt(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = gt(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = gt(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = gt(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = gt(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsMoreThan filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsMoreThanOrEqualToFilter.
private FilterPredicate getIsMoreThanOrEqualToFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = gtEq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Double) {
tempFilter = gtEq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = gtEq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = gtEq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = gtEq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = gtEq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = gtEq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = gtEq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = gtEq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsMoreThanOrEqualTo filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.
the class ParquetLoader method buildFilter.
private FilterPredicate buildFilter(Expression e) {
OpType op = e.getOpType();
if (e instanceof BinaryExpression) {
Expression lhs = ((BinaryExpression) e).getLhs();
Expression rhs = ((BinaryExpression) e).getRhs();
switch(op) {
case OP_AND:
return and(buildFilter(lhs), buildFilter(rhs));
case OP_OR:
return or(buildFilter(lhs), buildFilter(rhs));
case OP_BETWEEN:
BetweenExpression between = (BetweenExpression) rhs;
return and(buildFilter(OpType.OP_GE, (Column) lhs, (Const) between.getLower()), buildFilter(OpType.OP_LE, (Column) lhs, (Const) between.getUpper()));
case OP_IN:
FilterPredicate current = null;
for (Object value : ((InExpression) rhs).getValues()) {
FilterPredicate next = buildFilter(OpType.OP_EQ, (Column) lhs, (Const) value);
if (current != null) {
current = or(current, next);
} else {
current = next;
}
}
return current;
}
if (lhs instanceof Column && rhs instanceof Const) {
return buildFilter(op, (Column) lhs, (Const) rhs);
} else if (lhs instanceof Const && rhs instanceof Column) {
return buildFilter(op, (Column) rhs, (Const) lhs);
}
} else if (e instanceof UnaryExpression && op == OpType.OP_NOT) {
return LogicalInverseRewriter.rewrite(not(buildFilter(((UnaryExpression) e).getExpression())));
}
throw new RuntimeException("Could not build filter for expression: " + e);
}
Aggregations