Search in sources :

Example 21 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project Gaffer by gchq.

the class QueryGenerator method getIsEqualFilter.

private FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Boolean) {
            tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 22 with IntColumn

use of org.apache.parquet.filter2.predicate.Operators.IntColumn in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsLessThanFilter.

private FilterPredicate getIsLessThanFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = lt(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = lt(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = lt(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = lt(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = lt(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = lt(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = lt(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = lt(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = lt(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsLessThan filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Aggregations

Test (org.junit.Test)14 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)13 IntColumn (org.apache.parquet.filter2.predicate.Operators.IntColumn)8 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)3 Configuration (org.apache.hadoop.conf.Configuration)2 DictionaryPageReadStore (org.apache.parquet.column.page.DictionaryPageReadStore)2 Operators (org.apache.parquet.filter2.predicate.Operators)2 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)2 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)2 PrimitiveType (org.apache.parquet.schema.PrimitiveType)2 Test (org.testng.annotations.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectInputStream (java.io.ObjectInputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 ArrayList (java.util.ArrayList)1 Job (org.apache.hadoop.mapreduce.Job)1 DoubleStatistics (org.apache.parquet.column.statistics.DoubleStatistics)1 RecordFilter (org.apache.parquet.filter.RecordFilter)1 UnboundRecordFilter (org.apache.parquet.filter.UnboundRecordFilter)1