Search in sources :

Example 1 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.

the class TestStatisticsFilter method testAnd.

@Test
public void testAnd() {
    FilterPredicate yes = eq(intColumn, 9);
    FilterPredicate no = eq(doubleColumn, 50D);
    assertTrue(canDrop(and(yes, yes), columnMetas));
    assertTrue(canDrop(and(yes, no), columnMetas));
    assertTrue(canDrop(and(no, yes), columnMetas));
    assertFalse(canDrop(and(no, no), columnMetas));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 2 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.

the class TestStatisticsFilter method testClearExceptionForNots.

@Test
public void testClearExceptionForNots() {
    List<ColumnChunkMetaData> columnMetas = Arrays.asList(getDoubleColumnMeta(new DoubleStatistics(), 0L), getIntColumnMeta(new IntStatistics(), 0L));
    FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
    try {
        canDrop(pred, columnMetas);
        fail("This should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?" + " not(eq(double.column, 12.0))", e.getMessage());
    }
}
Also used : IntStatistics(org.apache.parquet.column.statistics.IntStatistics) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 3 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.

the class TestStatisticsFilter method testUdp.

@Test
public void testUdp() {
    FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class);
    FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class)));
    FilterPredicate udpDropMissingColumn = userDefined(missingColumn2, DropNullUdp.class);
    FilterPredicate invUdpDropMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, DropNullUdp.class)));
    FilterPredicate udpKeepMissingColumn = userDefined(missingColumn2, SevensAndEightsUdp.class);
    FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, SevensAndEightsUdp.class)));
    FilterPredicate allPositivePred = userDefined(doubleColumn, AllPositiveUdp.class);
    IntStatistics seven = new IntStatistics();
    seven.setMinMax(7, 7);
    IntStatistics eight = new IntStatistics();
    eight.setMinMax(8, 8);
    IntStatistics neither = new IntStatistics();
    neither.setMinMax(1, 2);
    assertTrue(canDrop(pred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invPred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // udpDropMissingColumn drops null column.
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // invUdpDropMissingColumn (i.e., not(udpDropMissingColumn)) keeps null column.
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // udpKeepMissingColumn keeps null column.
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    // invUdpKeepMissingColumn (i.e., not(udpKeepMissingColumn)) drops null column.
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
    assertFalse(canDrop(allPositivePred, missingMinMaxColumnMetas));
}
Also used : IntStatistics(org.apache.parquet.column.statistics.IntStatistics) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 4 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.

the class DictionaryFilterTest method testGtEqDouble.

@Test
public void testGtEqDouble() throws Exception {
    DoubleColumn d = doubleColumn("double_field");
    double highest = Double.MIN_VALUE;
    for (int value : intValues) {
        highest = Math.max(highest, toDouble(value));
    }
    assertTrue("Should drop: >= highest + 0.00000001", canDrop(gtEq(d, highest + 0.00000001), ccmd, dictionaries));
    assertFalse("Should not drop: >= highest", canDrop(gtEq(d, highest), ccmd, dictionaries));
    assertFalse("Should not drop: contains matching values", canDrop(gtEq(d, Double.MIN_VALUE), ccmd, dictionaries));
}
Also used : DoubleColumn(org.apache.parquet.filter2.predicate.Operators.DoubleColumn) Test(org.junit.Test)

Example 5 with DoubleColumn

use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getIsEqualFilter.

public FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Boolean) {
            tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            fullyApplied = false;
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)11 Test (org.junit.Test)6 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)2 DoubleColumn (org.apache.parquet.filter2.predicate.Operators.DoubleColumn)2 DoubleStatistics (org.apache.parquet.column.statistics.DoubleStatistics)1 Group (org.apache.parquet.example.data.Group)1 Operators (org.apache.parquet.filter2.predicate.Operators)1 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)1 User (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User)1 ColumnChunkMetaData (org.apache.parquet.hadoop.metadata.ColumnChunkMetaData)1 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)1 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)1 PrimitiveType (org.apache.parquet.schema.PrimitiveType)1 Test (org.testng.annotations.Test)1