Search in sources :

Example 26 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsEqualFilter.

public FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Boolean) {
            tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 27 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsLessThanOrEqualToFilter.

private FilterPredicate getIsLessThanOrEqualToFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = ltEq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = ltEq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = ltEq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = ltEq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = ltEq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = ltEq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = ltEq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = ltEq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = ltEq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsLessThanOrEqualTo filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 28 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsMoreThanFilter.

private FilterPredicate getIsMoreThanFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = gt(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = gt(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = gt(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = gt(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = gt(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = gt(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = gt(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = gt(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = gt(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsMoreThan filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 29 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsMoreThanOrEqualToFilter.

private FilterPredicate getIsMoreThanOrEqualToFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = gtEq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = gtEq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = gtEq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = gtEq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = gtEq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = gtEq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = gtEq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = gtEq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = gtEq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsMoreThanOrEqualTo filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 30 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.

the class ParquetLoader method buildFilter.

private FilterPredicate buildFilter(Expression e) {
    OpType op = e.getOpType();
    if (e instanceof BinaryExpression) {
        Expression lhs = ((BinaryExpression) e).getLhs();
        Expression rhs = ((BinaryExpression) e).getRhs();
        switch(op) {
            case OP_AND:
                return and(buildFilter(lhs), buildFilter(rhs));
            case OP_OR:
                return or(buildFilter(lhs), buildFilter(rhs));
            case OP_BETWEEN:
                BetweenExpression between = (BetweenExpression) rhs;
                return and(buildFilter(OpType.OP_GE, (Column) lhs, (Const) between.getLower()), buildFilter(OpType.OP_LE, (Column) lhs, (Const) between.getUpper()));
            case OP_IN:
                FilterPredicate current = null;
                for (Object value : ((InExpression) rhs).getValues()) {
                    FilterPredicate next = buildFilter(OpType.OP_EQ, (Column) lhs, (Const) value);
                    if (current != null) {
                        current = or(current, next);
                    } else {
                        current = next;
                    }
                }
                return current;
        }
        if (lhs instanceof Column && rhs instanceof Const) {
            return buildFilter(op, (Column) lhs, (Const) rhs);
        } else if (lhs instanceof Const && rhs instanceof Column) {
            return buildFilter(op, (Column) rhs, (Const) lhs);
        }
    } else if (e instanceof UnaryExpression && op == OpType.OP_NOT) {
        return LogicalInverseRewriter.rewrite(not(buildFilter(((UnaryExpression) e).getExpression())));
    }
    throw new RuntimeException("Could not build filter for expression: " + e);
}
Also used : BinaryExpression(org.apache.pig.Expression.BinaryExpression) InExpression(org.apache.pig.Expression.InExpression) UnaryExpression(org.apache.pig.Expression.UnaryExpression) BinaryExpression(org.apache.pig.Expression.BinaryExpression) Expression(org.apache.pig.Expression) BetweenExpression(org.apache.pig.Expression.BetweenExpression) Column(org.apache.pig.Expression.Column) BetweenExpression(org.apache.pig.Expression.BetweenExpression) Const(org.apache.pig.Expression.Const) InExpression(org.apache.pig.Expression.InExpression) OpType(org.apache.pig.Expression.OpType) UnaryExpression(org.apache.pig.Expression.UnaryExpression) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)45 Test (org.junit.Test)31 HashMap (java.util.HashMap)22 MessageType (org.apache.parquet.schema.MessageType)22 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)20 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)15 List (java.util.List)6 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)6 ArrayList (java.util.ArrayList)5 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)4 ValueInspector (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector)4 Path (org.apache.hadoop.fs.Path)3 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)2 And (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And)2 Or (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or)2 Test (org.junit.jupiter.api.Test)2 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)2 ViewElementDefinition (uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition)2 GetElements (uk.gov.gchq.gaffer.operation.impl.get.GetElements)2 ParquetStore (uk.gov.gchq.gaffer.parquetstore.ParquetStore)2