use of com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL in project presto by prestodb.
the class TestSelectiveOrcReader method testMaps.
@Test
public void testMaps() throws Exception {
Random random = new Random(0);
tester.testRoundTrip(mapType(INTEGER, INTEGER), createList(NUM_ROWS, i -> createMap(i)));
// map column with no nulls
tester.testRoundTripTypes(ImmutableList.of(INTEGER, mapType(INTEGER, INTEGER)), ImmutableList.of(createList(NUM_ROWS, i -> random.nextInt()), createList(NUM_ROWS, i -> createMap(i))), toSubfieldFilters(ImmutableMap.of(0, BigintRange.of(0, Integer.MAX_VALUE, false)), ImmutableMap.of(1, IS_NOT_NULL), ImmutableMap.of(1, IS_NULL)));
// map column with nulls
tester.testRoundTripTypes(ImmutableList.of(INTEGER, mapType(INTEGER, INTEGER)), ImmutableList.of(createList(NUM_ROWS, i -> random.nextInt()), createList(NUM_ROWS, i -> i % 5 == 0 ? null : createMap(i))), toSubfieldFilters(ImmutableMap.of(0, BigintRange.of(0, Integer.MAX_VALUE, false)), ImmutableMap.of(1, IS_NOT_NULL), ImmutableMap.of(1, IS_NULL), ImmutableMap.of(0, BigintRange.of(0, Integer.MAX_VALUE, false), 1, IS_NULL), ImmutableMap.of(0, BigintRange.of(0, Integer.MAX_VALUE, false), 1, IS_NOT_NULL)));
// map column with filter, followed by another column with filter
tester.testRoundTripTypes(ImmutableList.of(mapType(INTEGER, INTEGER), INTEGER), ImmutableList.of(createList(NUM_ROWS, i -> i % 5 == 0 ? null : createMap(i)), createList(NUM_ROWS, i -> random.nextInt())), toSubfieldFilters(ImmutableMap.of(0, IS_NULL, 1, BigintRange.of(0, Integer.MAX_VALUE, false)), ImmutableMap.of(0, IS_NOT_NULL, 1, BigintRange.of(0, Integer.MAX_VALUE, false))));
// empty maps
tester.testRoundTripTypes(ImmutableList.of(INTEGER, mapType(INTEGER, INTEGER)), ImmutableList.of(createList(NUM_ROWS, i -> random.nextInt()), Collections.nCopies(NUM_ROWS, ImmutableMap.of())), ImmutableList.of());
// read selected positions from all nulls map column
tester.testRoundTripTypes(ImmutableList.of(INTEGER, mapType(INTEGER, INTEGER)), ImmutableList.of(createList(NUM_ROWS, i -> random.nextInt(10)), createList(NUM_ROWS, i -> null)), toSubfieldFilters(ImmutableMap.of(0, BigintRange.of(0, 5, false))));
}
use of com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL in project presto by prestodb.
the class TestMapFlatSelectiveStreamReader method runTest.
private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, ExpectedValuesBuilder<K, V> expectedValuesBuilder) throws Exception {
List<Map<K, V>> expectedValues = expectedValuesBuilder.build();
Type mapType = mapType(keyType, valueType);
OrcPredicate orcPredicate = createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true);
runTest(testOrcFileName, mapType, expectedValues, orcPredicate, Optional.empty(), ImmutableList.of());
runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::isNull).collect(toList()), orcPredicate, Optional.of(IS_NULL), ImmutableList.of());
runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::nonNull).collect(toList()), orcPredicate, Optional.of(IS_NOT_NULL), ImmutableList.of());
if (keyType != VARBINARY) {
// read only some keys
List<K> keys = expectedValues.stream().filter(Objects::nonNull).flatMap(v -> v.keySet().stream()).distinct().collect(toImmutableList());
if (!keys.isEmpty()) {
List<K> requiredKeys = ImmutableList.of(keys.get(0));
runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
List<Integer> keyIndices = ImmutableList.of(1, 3, 7, 11);
requiredKeys = keyIndices.stream().filter(k -> k < keys.size()).map(keys::get).collect(toList());
runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
}
}
// read only some rows
List<Integer> ids = IntStream.range(0, expectedValues.size()).map(i -> i % 10).boxed().collect(toImmutableList());
ImmutableList<Type> types = ImmutableList.of(mapType, INTEGER);
Map<Integer, Map<Subfield, TupleDomainFilter>> filters = ImmutableMap.of(1, ImmutableMap.of(new Subfield("c"), toBigintValues(new long[] { 1, 5, 6 }, true)));
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterRows(types, ImmutableList.of(expectedValues, ids), filters), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.of(filters), ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of());
TestingFilterFunction filterFunction = new TestingFilterFunction(mapType);
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterFunction.filterRows(ImmutableList.of(expectedValues, ids)), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.empty(), ImmutableList.of(filterFunction), ImmutableMap.of(0, 0), ImmutableMap.of());
}
use of com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL in project presto by prestodb.
the class TupleDomainFilterUtils method toFilter.
public static TupleDomainFilter toFilter(Domain domain) {
ValueSet values = domain.getValues();
boolean nullAllowed = domain.isNullAllowed();
if (values.isAll()) {
checkArgument(!nullAllowed, "Unexpected allways-true filter");
return IS_NOT_NULL;
}
if (values.isNone()) {
checkArgument(nullAllowed, "Unexpected allways-false filter");
return IS_NULL;
}
checkArgument(values instanceof SortedRangeSet, "Unexpected domain type: " + values.getClass().getSimpleName());
List<Range> ranges = ((SortedRangeSet) values).getOrderedRanges();
if (ranges.isEmpty() && nullAllowed) {
return IS_NULL;
}
Type type = domain.getType();
if (ranges.size() == 1) {
return createRangeFilter(type, ranges.get(0), nullAllowed);
}
if (type == BOOLEAN) {
return createBooleanFilter(ranges, nullAllowed);
}
List<TupleDomainFilter> rangeFilters = ranges.stream().map(range -> createRangeFilter(type, range, false)).filter(rangeFilter -> !rangeFilter.equals(ALWAYS_FALSE)).collect(toList());
if (rangeFilters.isEmpty()) {
return nullAllowed ? IS_NULL : ALWAYS_FALSE;
}
TupleDomainFilter firstRangeFilter = rangeFilters.get(0);
if (firstRangeFilter instanceof BigintRange) {
List<BigintRange> bigintRanges = rangeFilters.stream().map(BigintRange.class::cast).collect(toList());
if (bigintRanges.stream().allMatch(BigintRange::isSingleValue)) {
return toBigintValues(bigintRanges.stream().mapToLong(BigintRange::getLower).toArray(), nullAllowed);
}
return BigintMultiRange.of(bigintRanges, nullAllowed);
}
if (firstRangeFilter instanceof BytesRange) {
List<BytesRange> bytesRanges = rangeFilters.stream().map(BytesRange.class::cast).collect(toList());
if (bytesRanges.stream().allMatch(BytesRange::isSingleValue)) {
return BytesValues.of(bytesRanges.stream().map(BytesRange::getLower).toArray(byte[][]::new), nullAllowed);
}
if (isNotIn(ranges)) {
return BytesValuesExclusive.of(bytesRanges.stream().map(BytesRange::getLower).filter(Objects::nonNull).toArray(byte[][]::new), nullAllowed);
}
}
if (firstRangeFilter instanceof DoubleRange || firstRangeFilter instanceof FloatRange) {
// != and NOT IN filters should return true when applied to NaN
// E.g. NaN != 1.0 as well as NaN NOT IN (1.0, 2.5, 3.6) should return true; otherwise false.
boolean nanAllowed = isNotIn(ranges);
return MultiRange.of(rangeFilters, nullAllowed, nanAllowed);
}
return MultiRange.of(rangeFilters, nullAllowed, false);
}
use of com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL in project presto by prestodb.
the class TestSelectiveOrcReader method testArrayIndexOutOfBounds.
@Test
public void testArrayIndexOutOfBounds() throws Exception {
Random random = new Random(0);
// non-null arrays of varying sizes
try {
tester.testRoundTrip(arrayType(INTEGER), createList(NUM_ROWS, i -> randomIntegers(random.nextInt(10), random)), ImmutableList.of(ImmutableMap.of(new Subfield("c[2]"), IS_NULL)));
fail("Expected 'Array subscript out of bounds' exception");
} catch (InvalidFunctionArgumentException e) {
assertTrue(e.getMessage().contains("Array subscript out of bounds"));
}
// non-null nested arrays of varying sizes
try {
tester.testRoundTrip(arrayType(arrayType(INTEGER)), createList(NUM_ROWS, i -> ImmutableList.of(randomIntegers(random.nextInt(5), random), randomIntegers(random.nextInt(5), random))), ImmutableList.of(ImmutableMap.of(new Subfield("c[2][3]"), IS_NULL)));
fail("Expected 'Array subscript out of bounds' exception");
} catch (InvalidFunctionArgumentException e) {
assertTrue(e.getMessage().contains("Array subscript out of bounds"));
}
// empty arrays
try {
tester.testRoundTrip(arrayType(INTEGER), nCopies(NUM_ROWS, ImmutableList.of()), ImmutableList.of(ImmutableMap.of(new Subfield("c[2]"), IS_NULL)));
fail("Expected 'Array subscript out of bounds' exception");
} catch (InvalidFunctionArgumentException e) {
assertTrue(e.getMessage().contains("Array subscript out of bounds"));
}
// empty nested arrays
try {
tester.testRoundTrip(arrayType(arrayType(INTEGER)), nCopies(NUM_ROWS, ImmutableList.of()), ImmutableList.of(ImmutableMap.of(new Subfield("c[2][3]"), IS_NULL)));
fail("Expected 'Array subscript out of bounds' exception");
} catch (InvalidFunctionArgumentException e) {
assertTrue(e.getMessage().contains("Array subscript out of bounds"));
}
}
use of com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL in project presto by prestodb.
the class TestSelectiveOrcReader method testArrays.
@Test
public void testArrays() throws Exception {
Random random = new Random(0);
// non-null arrays of varying sizes; some arrays may be empty
tester.testRoundTrip(arrayType(INTEGER), createList(NUM_ROWS, i -> randomIntegers(random.nextInt(10), random)), IS_NULL, IS_NOT_NULL);
BigintRange negative = BigintRange.of(Integer.MIN_VALUE, 0, false);
BigintRange nonNegative = BigintRange.of(0, Integer.MAX_VALUE, false);
// arrays of strings
tester.testRoundTrip(arrayType(VARCHAR), createList(1000, i -> randomStrings(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter("c[1]", IS_NULL), toSubfieldFilter("c[1]", stringIn(true, "a", "b", "c", "d"))));
tester.testRoundTrip(arrayType(VARCHAR), createList(10, i -> randomStringsWithNulls(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter("c[1]", IS_NULL), toSubfieldFilter("c[1]", stringIn(true, "a", "b", "c", "d"))));
// non-empty non-null arrays of varying sizes
tester.testRoundTrip(arrayType(INTEGER), createList(NUM_ROWS, i -> randomIntegers(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter(IS_NULL), toSubfieldFilter(IS_NOT_NULL), // c[1] >= 0
toSubfieldFilter("c[1]", nonNegative), // c[2] >= 0 AND c[4] >= 0
ImmutableMap.of(new Subfield("c[2]"), nonNegative, new Subfield("c[4]"), nonNegative)));
// non-null arrays of varying sizes; some arrays may be empty
tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(INTEGER)), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> randomIntegers(random.nextInt(10), random))), toSubfieldFilters(ImmutableMap.of(0, nonNegative), ImmutableMap.of(0, nonNegative, 1, IS_NULL), ImmutableMap.of(0, nonNegative, 1, IS_NOT_NULL)));
// non-empty non-null arrays of varying sizes
tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(INTEGER)), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> randomIntegers(5 + random.nextInt(5), random))), ImmutableList.of(// c[1] >= 0
ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, toSubfieldFilter("c[1]", nonNegative)), // c[3] >= 0
ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, toSubfieldFilter("c[3]", nonNegative)), // c[2] >= 0 AND c[4] <= 0
ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, ImmutableMap.of(new Subfield("c[2]"), nonNegative, new Subfield("c[4]"), negative))));
// nested arrays
tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(arrayType(INTEGER))), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> createList(random.nextInt(10), index -> randomIntegers(random.nextInt(5), random)))), toSubfieldFilters(ImmutableMap.of(0, nonNegative), ImmutableMap.of(1, IS_NULL), ImmutableMap.of(1, IS_NOT_NULL), ImmutableMap.of(0, nonNegative, 1, IS_NULL)));
tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(arrayType(INTEGER))), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> createList(3 + random.nextInt(10), index -> randomIntegers(3 + random.nextInt(5), random)))), ImmutableList.of(// c[1] IS NULL
ImmutableMap.of(1, ImmutableMap.of(new Subfield("c[1]"), IS_NULL)), // c[2] IS NOT NULL AND c[2][3] >= 0
ImmutableMap.of(1, ImmutableMap.of(new Subfield("c[2]"), IS_NOT_NULL, new Subfield("c[2][3]"), nonNegative)), ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, ImmutableMap.of(new Subfield("c[1]"), IS_NULL))));
}
Aggregations