Search in sources :

Example 11 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project Gaffer by gchq.

the class QueryGenerator method getIsEqualFilter.

private FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Boolean) {
            tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 12 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsLessThanFilter.

private FilterPredicate getIsLessThanFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = lt(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = lt(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = lt(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = lt(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = lt(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = lt(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = lt(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = lt(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = lt(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsLessThan filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 13 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.

the class TestStatisticsFilter method testOr.

@Test
public void testOr() {
    FilterPredicate yes = eq(intColumn, 9);
    FilterPredicate no = eq(doubleColumn, 50D);
    assertTrue(canDrop(or(yes, yes), columnMetas));
    assertFalse(canDrop(or(yes, no), columnMetas));
    assertFalse(canDrop(or(no, yes), columnMetas));
    assertFalse(canDrop(or(no, no), columnMetas));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 14 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.

the class TestColumnIndexFiltering method testComplexFiltering.

@Test
public void testComplexFiltering() throws IOException {
    assertCorrectFiltering(record -> {
        Location loc = record.getLocation();
        Double lat = loc == null ? null : loc.getLat();
        Double lon = loc == null ? null : loc.getLon();
        return lat != null && lon != null && 37 <= lat && lat <= 70 && -21 <= lon && lon <= 35;
    }, and(and(gtEq(doubleColumn("location.lat"), 37.0), ltEq(doubleColumn("location.lat"), 70.0)), and(gtEq(doubleColumn("location.lon"), -21.0), ltEq(doubleColumn("location.lon"), 35.0))));
    assertCorrectFiltering(record -> {
        Location loc = record.getLocation();
        return loc == null || (loc.getLat() == null && loc.getLon() == null);
    }, and(eq(doubleColumn("location.lat"), null), eq(doubleColumn("location.lon"), null)));
    assertCorrectFiltering(record -> {
        String name = record.getName();
        return name != null && name.compareTo("thomas") < 0 && record.getId() <= 3 * DATA.size() / 4;
    }, and(lt(binaryColumn("name"), Binary.fromString("thomas")), ltEq(longColumn("id"), 3l * DATA.size() / 4)));
}
Also used : Location(org.apache.parquet.filter2.recordlevel.PhoneBookWriter.Location) Test(org.junit.Test)

Example 15 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.

the class TestRecordLevelFilters method testComplex.

@Test
public void testComplex() throws Exception {
    BinaryColumn name = binaryColumn("name");
    DoubleColumn lon = doubleColumn("location.lon");
    DoubleColumn lat = doubleColumn("location.lat");
    FilterPredicate pred = or(and(gt(lon, 150.0), notEq(lat, null)), eq(name, Binary.fromString("alice")));
    List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
    assertFilter(found, new UserFilter() {

        @Override
        public boolean keep(User u) {
            String name = u.getName();
            Double lat = null;
            Double lon = null;
            if (u.getLocation() != null) {
                lat = u.getLocation().getLat();
                lon = u.getLocation().getLon();
            }
            return (lon != null && lon > 150.0 && lat != null) || "alice".equals(name);
        }
    });
}
Also used : Group(org.apache.parquet.example.data.Group) DoubleColumn(org.apache.parquet.filter2.predicate.Operators.DoubleColumn) User(org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User) BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)11 Test (org.junit.Test)8 DoubleColumn (org.apache.parquet.filter2.predicate.Operators.DoubleColumn)3 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)2 PrimitiveType (org.apache.parquet.schema.PrimitiveType)2 HashSet (java.util.HashSet)1 DoubleStatistics (org.apache.parquet.column.statistics.DoubleStatistics)1 Group (org.apache.parquet.example.data.Group)1 Operators (org.apache.parquet.filter2.predicate.Operators)1 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)1 Location (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.Location)1 User (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User)1 ColumnChunkMetaData (org.apache.parquet.hadoop.metadata.ColumnChunkMetaData)1 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)1 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)1 Test (org.testng.annotations.Test)1