use of org.apache.parquet.filter2.predicate.Operators.LongColumn in project parquet-mr by apache.
the class TestFilterApiMethods method testSerializable.
@Test
public void testSerializable() throws Exception {
BinaryColumn binary = binaryColumn("foo");
FilterPredicate p = and(or(and(userDefined(intColumn, DummyUdp.class), predicate), eq(binary, Binary.fromString("hi"))), userDefined(longColumn, new IsMultipleOf(7)));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(p);
oos.close();
ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
FilterPredicate read = (FilterPredicate) is.readObject();
assertEquals(p, read);
}
use of org.apache.parquet.filter2.predicate.Operators.LongColumn in project parquet-mr by apache.
the class TestRecordLevelFilters method testUserDefinedByInstance.
@Test
public void testUserDefinedByInstance() throws Exception {
LongColumn name = longColumn("id");
final HashSet<Long> h = new HashSet<Long>();
h.add(20L);
h.add(27L);
h.add(28L);
FilterPredicate pred = userDefined(name, new SetInFilter(h));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
@Override
public boolean keep(User u) {
return u != null && h.contains(u.getId());
}
});
}
use of org.apache.parquet.filter2.predicate.Operators.LongColumn in project presto by prestodb.
the class TestColumnIndexBuilder method testBuildInt64.
@Test
public void testBuildInt64() {
PrimitiveType type = Types.required(INT64).named("test_int64");
ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
// assertThat(builder, instanceOf(LongColumnIndexBuilder.class));
assertNull(builder.build());
Operators.LongColumn col = longColumn("test_col");
StatsBuilder sb = new StatsBuilder();
builder.add(sb.stats(type, -4L, 10L));
builder.add(sb.stats(type, -11L, 7L, null));
builder.add(sb.stats(type, 2L, 2L, null, null));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, 1L, 2L));
builder.add(sb.stats(type, -21L, 8L));
assertEquals(6, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
ColumnIndex columnIndex = builder.build();
assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 0L, 1L, 2L, 3L, 0L, 0L);
assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
assertCorrectValues(columnIndex.getMaxValues(), 10L, 7L, 2L, null, 2L, 8L);
assertCorrectValues(columnIndex.getMinValues(), -4L, -11L, 2L, null, 1L, -21L);
assertCorrectFiltering(columnIndex, eq(col, 0L), 0, 1, 5);
assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
assertCorrectFiltering(columnIndex, notEq(col, 0L), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, gt(col, 2L), 0, 1, 5);
assertCorrectFiltering(columnIndex, gtEq(col, 2L), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, lt(col, -21L));
assertCorrectFiltering(columnIndex, ltEq(col, -21L), 5);
assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 0, 1, 5);
assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
sb = new StatsBuilder();
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, -532L, -345L, null, null));
builder.add(sb.stats(type, -234L, -42L, null));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, -42L, 2L));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, -3L, 42L));
builder.add(sb.stats(type, null, null));
assertEquals(9, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
columnIndex = builder.build();
assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
assertCorrectValues(columnIndex.getMaxValues(), null, -345L, -42L, null, null, 2L, null, 42L, null);
assertCorrectValues(columnIndex.getMinValues(), null, -532L, -234L, null, null, -42L, null, -3L, null);
assertCorrectFiltering(columnIndex, eq(col, -42L), 2, 5);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
assertCorrectFiltering(columnIndex, notEq(col, -42L), 0, 1, 2, 3, 4, 5, 6, 7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, gt(col, 2L), 7);
assertCorrectFiltering(columnIndex, gtEq(col, 2L), 5, 7);
assertCorrectFiltering(columnIndex, lt(col, -42L), 1, 2);
assertCorrectFiltering(columnIndex, ltEq(col, -42L), 1, 2, 5);
assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
sb = new StatsBuilder();
builder.add(sb.stats(type, null, null, null, null, null));
builder.add(sb.stats(type, 532L, 345L));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, 234L, 42L, null));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, 42L, -2L));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, -3L, -42L));
assertEquals(9, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
columnIndex = builder.build();
assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
assertCorrectValues(columnIndex.getMaxValues(), null, 532L, null, 234L, null, 42L, null, null, -3L);
assertCorrectValues(columnIndex.getMinValues(), null, 345L, null, 42L, null, -2L, null, null, -42L);
assertCorrectFiltering(columnIndex, eq(col, 0L), 5);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, 0L), 0, 1, 2, 3, 4, 5, 6, 7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, gt(col, 2L), 1, 3, 5);
assertCorrectFiltering(columnIndex, gtEq(col, 2L), 1, 3, 5);
assertCorrectFiltering(columnIndex, lt(col, -42L));
assertCorrectFiltering(columnIndex, ltEq(col, -42L), 8);
assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
}
use of org.apache.parquet.filter2.predicate.Operators.LongColumn in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsEqualFilter.
public FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Boolean) {
tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Double) {
tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.predicate.Operators.LongColumn in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsLessThanOrEqualToFilter.
private FilterPredicate getIsLessThanOrEqualToFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = ltEq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Double) {
tempFilter = ltEq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = ltEq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = ltEq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = ltEq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = ltEq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = ltEq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = ltEq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = ltEq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsLessThanOrEqualTo filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
Aggregations