use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.
the class TestStatisticsFilter method testAnd.
@Test
public void testAnd() {
FilterPredicate yes = eq(intColumn, 9);
FilterPredicate no = eq(doubleColumn, 50D);
assertTrue(canDrop(and(yes, yes), columnMetas));
assertTrue(canDrop(and(yes, no), columnMetas));
assertTrue(canDrop(and(no, yes), columnMetas));
assertFalse(canDrop(and(no, no), columnMetas));
}
use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.
the class TestStatisticsFilter method testClearExceptionForNots.
@Test
public void testClearExceptionForNots() {
List<ColumnChunkMetaData> columnMetas = Arrays.asList(getDoubleColumnMeta(new DoubleStatistics(), 0L), getIntColumnMeta(new IntStatistics(), 0L));
FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
try {
canDrop(pred, columnMetas);
fail("This should throw");
} catch (IllegalArgumentException e) {
assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?" + " not(eq(double.column, 12.0))", e.getMessage());
}
}
use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.
the class TestStatisticsFilter method testUdp.
@Test
public void testUdp() {
FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class);
FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class)));
FilterPredicate udpDropMissingColumn = userDefined(missingColumn2, DropNullUdp.class);
FilterPredicate invUdpDropMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, DropNullUdp.class)));
FilterPredicate udpKeepMissingColumn = userDefined(missingColumn2, SevensAndEightsUdp.class);
FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, SevensAndEightsUdp.class)));
FilterPredicate allPositivePred = userDefined(doubleColumn, AllPositiveUdp.class);
IntStatistics seven = new IntStatistics();
seven.setMinMax(7, 7);
IntStatistics eight = new IntStatistics();
eight.setMinMax(8, 8);
IntStatistics neither = new IntStatistics();
neither.setMinMax(1, 2);
assertTrue(canDrop(pred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(pred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invPred, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invPred, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// udpDropMissingColumn drops null column.
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// invUdpDropMissingColumn (i.e., not(udpDropMissingColumn)) keeps null column.
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// udpKeepMissingColumn keeps null column.
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
// invUdpKeepMissingColumn (i.e., not(udpKeepMissingColumn)) drops null column.
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(seven, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(eight, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(getIntColumnMeta(neither, 177L), getDoubleColumnMeta(doubleStats, 177L))));
assertFalse(canDrop(allPositivePred, missingMinMaxColumnMetas));
}
use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project parquet-mr by apache.
the class DictionaryFilterTest method testGtEqDouble.
@Test
public void testGtEqDouble() throws Exception {
DoubleColumn d = doubleColumn("double_field");
double highest = Double.MIN_VALUE;
for (int value : intValues) {
highest = Math.max(highest, toDouble(value));
}
assertTrue("Should drop: >= highest + 0.00000001", canDrop(gtEq(d, highest + 0.00000001), ccmd, dictionaries));
assertFalse("Should not drop: >= highest", canDrop(gtEq(d, highest), ccmd, dictionaries));
assertFalse("Should not drop: contains matching values", canDrop(gtEq(d, Double.MIN_VALUE), ccmd, dictionaries));
}
use of org.apache.parquet.filter2.predicate.Operators.DoubleColumn in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsEqualFilter.
public FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Boolean) {
tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Double) {
tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
Aggregations