Search in sources :

Example 1 with FloatColumn

use of org.apache.parquet.filter2.predicate.Operators.FloatColumn in project parquet-mr by apache.

the class DictionaryFilterTest method testGtFloat.

@Test
public void testGtFloat() throws Exception {
    FloatColumn f = floatColumn("float_field");
    float highest = Float.MIN_VALUE;
    for (int value : intValues) {
        highest = Math.max(highest, toFloat(value));
    }
    assertTrue("Should drop: > highest value", canDrop(gt(f, highest), ccmd, dictionaries));
    assertFalse("Should not drop: > (highest value - 1.0)", canDrop(gt(f, highest - 1.0f), ccmd, dictionaries));
    assertFalse("Should not drop: contains matching values", canDrop(gt(f, Float.MIN_VALUE), ccmd, dictionaries));
}
Also used : FloatColumn(org.apache.parquet.filter2.predicate.Operators.FloatColumn) Test(org.junit.Test)

Example 2 with FloatColumn

use of org.apache.parquet.filter2.predicate.Operators.FloatColumn in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildFloat.

@Test
public void testBuildFloat() {
    PrimitiveType type = Types.required(FLOAT).named("test_float");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    assertThat(builder, instanceOf(FloatColumnIndexBuilder.class));
    assertNull(builder.build());
    FloatColumn col = floatColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, -4.2f, -4.1f));
    builder.add(sb.stats(type, -11.7f, 7.0f, null));
    builder.add(sb.stats(type, 2.2f, 2.2f, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 1.9f, 2.32f));
    builder.add(sb.stats(type, -21.0f, 8.1f));
    assertEquals(6, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
    assertCorrectValues(columnIndex.getMaxValues(), -4.1f, 7.0f, 2.2f, null, 2.32f, 8.1f);
    assertCorrectValues(columnIndex.getMinValues(), -4.2f, -11.7f, 2.2f, null, 1.9f, -21.0f);
    assertCorrectFiltering(columnIndex, eq(col, 0.0f), 1, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
    Set<Float> set1 = new HashSet<>();
    set1.add(0.0f);
    assertCorrectFiltering(columnIndex, in(col, set1), 1, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 4);
    set1.add(null);
    assertCorrectFiltering(columnIndex, in(col, set1), 1, 2, 3, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 4);
    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5);
    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 4, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2.2f), 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, lt(col, 0.0f), 0, 1, 5);
    assertCorrectFiltering(columnIndex, ltEq(col, 1.9f), 0, 1, 4, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 4, 5);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -532.3f, -345.2f, null, null));
    builder.add(sb.stats(type, -300.6f, -234.7f, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, -234.6f, 2.99999f));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 3.0f, 42.83f));
    builder.add(sb.stats(type, null, null));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, -345.2f, -234.7f, null, null, 2.99999f, null, 42.83f, null);
    assertCorrectValues(columnIndex.getMinValues(), null, -532.3f, -300.6f, null, null, -234.6f, null, 3.0f, null);
    assertCorrectFiltering(columnIndex, eq(col, 0.0f), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
    Set<Float> set2 = new HashSet<>();
    set2.add(0.0f);
    assertCorrectFiltering(columnIndex, in(col, set2), 5);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 6, 7, 8);
    set2.add(null);
    assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 5, 7);
    assertCorrectFiltering(columnIndex, gtEq(col, -234.7f), 2, 5, 7);
    assertCorrectFiltering(columnIndex, lt(col, -234.6f), 1, 2);
    assertCorrectFiltering(columnIndex, ltEq(col, -234.6f), 1, 2, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, 532.3f, 345.2f));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 234.7f, 234.6f, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 234.6f, -2.99999f));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -3.0f, -42.83f));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, 532.3f, null, 234.7f, null, 234.6f, null, null, -3.0f);
    assertCorrectValues(columnIndex.getMinValues(), null, 345.2f, null, 234.6f, null, -2.99999f, null, null, -42.83f);
    assertCorrectFiltering(columnIndex, eq(col, 234.65f), 3);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
    Set<Float> set3 = new HashSet<>();
    set3.add(234.65f);
    assertCorrectFiltering(columnIndex, in(col, set3), 3);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 5, 6, 7, 8);
    set3.add(null);
    assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 6, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 5, 8);
    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 3, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2.2f), 1, 3, 5);
    assertCorrectFiltering(columnIndex, lt(col, 0.0f), 5, 8);
    assertCorrectFiltering(columnIndex, ltEq(col, 0.0f), 5, 8);
    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 5, 8);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) FloatColumn(org.apache.parquet.filter2.predicate.Operators.FloatColumn) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with FloatColumn

use of org.apache.parquet.filter2.predicate.Operators.FloatColumn in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsEqualFilter.

public FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Boolean) {
            tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 4 with FloatColumn

use of org.apache.parquet.filter2.predicate.Operators.FloatColumn in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsLessThanOrEqualToFilter.

private FilterPredicate getIsLessThanOrEqualToFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = ltEq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = ltEq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = ltEq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = ltEq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = ltEq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = ltEq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = ltEq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = ltEq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = ltEq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsLessThanOrEqualTo filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 5 with FloatColumn

use of org.apache.parquet.filter2.predicate.Operators.FloatColumn in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsMoreThanFilter.

private FilterPredicate getIsMoreThanFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = gt(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = gt(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = gt(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = gt(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = gt(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = gt(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = gt(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = gt(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = gt(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsMoreThan filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)6 FloatColumn (org.apache.parquet.filter2.predicate.Operators.FloatColumn)2 PrimitiveType (org.apache.parquet.schema.PrimitiveType)2 Test (org.junit.Test)2 HashSet (java.util.HashSet)1 Operators (org.apache.parquet.filter2.predicate.Operators)1 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)1 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)1 Test (org.testng.annotations.Test)1