use of com.facebook.presto.common.predicate.TupleDomainFilter in project presto by prestodb.
the class TestSelectiveOrcReader method testHiddenConstantColumns.
@Test
public void testHiddenConstantColumns() throws Exception {
Type type = BIGINT;
List<Type> types = ImmutableList.of(type);
List<List<?>> values = ImmutableList.of(ImmutableList.of(1L, 2L));
TempFile tempFile = new TempFile();
writeOrcColumnsPresto(tempFile.getFile(), DWRF, ZSTD, Optional.empty(), types, values, new OrcWriterStats());
// Hidden columns like partition columns use negative indices (-13).
int hiddenColumnIndex = -13;
Map<Integer, Type> includedColumns = ImmutableMap.of(hiddenColumnIndex, VARCHAR, 0, BIGINT);
List<Integer> outputColumns = ImmutableList.of(hiddenColumnIndex, 0);
Slice constantSlice = Slices.utf8Slice("partition_value");
Map<Integer, Object> constantValues = ImmutableMap.of(hiddenColumnIndex, constantSlice);
OrcAggregatedMemoryContext systemMemoryUsage = new TestingHiveOrcAggregatedMemoryContext();
TupleDomainFilter filter = BigintRange.of(1, 1, false);
Map<Subfield, TupleDomainFilter> subFieldFilter = toSubfieldFilter(filter);
OrcReaderSettings readerSettings = OrcTester.OrcReaderSettings.builder().setColumnFilters(ImmutableMap.of(0, subFieldFilter)).build();
try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), DWRF.getOrcEncoding(), OrcPredicate.TRUE, types, 1, readerSettings.getColumnFilters(), readerSettings.getFilterFunctions(), readerSettings.getFilterFunctionInputMapping(), readerSettings.getRequiredSubfields(), constantValues, ImmutableMap.of(), includedColumns, outputColumns, false, systemMemoryUsage, false)) {
Page page = recordReader.getNextPage();
assertEquals(page.getPositionCount(), 1);
Block partitionValueBlock = page.getBlock(0);
int length = partitionValueBlock.getSliceLength(0);
Slice varcharSlice = partitionValueBlock.getSlice(0, 0, length);
assertEquals(varcharSlice, constantSlice);
Block bigintBlock = page.getBlock(1);
assertEquals(bigintBlock.getLong(0), 1);
assertNull(recordReader.getNextPage());
}
}
use of com.facebook.presto.common.predicate.TupleDomainFilter in project presto by prestodb.
the class TestMapFlatSelectiveStreamReader method runTest.
private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, ExpectedValuesBuilder<K, V> expectedValuesBuilder) throws Exception {
List<Map<K, V>> expectedValues = expectedValuesBuilder.build();
Type mapType = mapType(keyType, valueType);
OrcPredicate orcPredicate = createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true);
runTest(testOrcFileName, mapType, expectedValues, orcPredicate, Optional.empty(), ImmutableList.of());
runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::isNull).collect(toList()), orcPredicate, Optional.of(IS_NULL), ImmutableList.of());
runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::nonNull).collect(toList()), orcPredicate, Optional.of(IS_NOT_NULL), ImmutableList.of());
if (keyType != VARBINARY) {
// read only some keys
List<K> keys = expectedValues.stream().filter(Objects::nonNull).flatMap(v -> v.keySet().stream()).distinct().collect(toImmutableList());
if (!keys.isEmpty()) {
List<K> requiredKeys = ImmutableList.of(keys.get(0));
runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
List<Integer> keyIndices = ImmutableList.of(1, 3, 7, 11);
requiredKeys = keyIndices.stream().filter(k -> k < keys.size()).map(keys::get).collect(toList());
runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
}
}
// read only some rows
List<Integer> ids = IntStream.range(0, expectedValues.size()).map(i -> i % 10).boxed().collect(toImmutableList());
ImmutableList<Type> types = ImmutableList.of(mapType, INTEGER);
Map<Integer, Map<Subfield, TupleDomainFilter>> filters = ImmutableMap.of(1, ImmutableMap.of(new Subfield("c"), toBigintValues(new long[] { 1, 5, 6 }, true)));
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterRows(types, ImmutableList.of(expectedValues, ids), filters), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.of(filters), ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of());
TestingFilterFunction filterFunction = new TestingFilterFunction(mapType);
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterFunction.filterRows(ImmutableList.of(expectedValues, ids)), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.empty(), ImmutableList.of(filterFunction), ImmutableMap.of(0, 0), ImmutableMap.of());
}
use of com.facebook.presto.common.predicate.TupleDomainFilter in project presto by prestodb.
the class TestMapFlatSelectiveStreamReader method runTest.
private <K, V> void runTest(String testOrcFileName, Type mapType, List<Map<K, V>> expectedValues, OrcPredicate orcPredicate, Optional<TupleDomainFilter> filter, List<Subfield> subfields) throws Exception {
List<Type> types = ImmutableList.of(mapType);
Optional<Map<Integer, Map<Subfield, TupleDomainFilter>>> filters = filter.map(f -> ImmutableMap.of(new Subfield("c"), f)).map(f -> ImmutableMap.of(0, f));
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filters.map(f -> filterRows(types, ImmutableList.of(expectedValues), f)).orElse(ImmutableList.of(expectedValues)), OrcEncoding.DWRF, orcPredicate, filters, ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of(0, subfields));
}
use of com.facebook.presto.common.predicate.TupleDomainFilter in project presto by prestodb.
the class OrcSelectiveRecordReader method createStreamReaders.
private static SelectiveStreamReader[] createStreamReaders(OrcDataSource orcDataSource, List<OrcType> types, DateTimeZone hiveStorageTimeZone, OrcRecordReaderOptions options, boolean legacyMapSubscript, Map<Integer, Type> includedColumns, List<Integer> outputColumns, Map<Integer, Map<Subfield, TupleDomainFilter>> filters, List<FilterFunction> filterFunctions, Map<Integer, Integer> filterFunctionInputMapping, Map<Integer, List<Subfield>> requiredSubfields, OrcAggregatedMemoryContext systemMemoryContext) {
List<StreamDescriptor> streamDescriptors = createStreamDescriptor("", "", 0, types, orcDataSource).getNestedStreams();
requireNonNull(filterFunctions, "filterFunctions is null");
requireNonNull(filterFunctionInputMapping, "filterFunctionInputMapping is null");
Set<Integer> filterFunctionInputColumns = filterFunctions.stream().flatMapToInt(function -> Arrays.stream(function.getInputChannels())).boxed().map(filterFunctionInputMapping::get).collect(toImmutableSet());
OrcType rowType = types.get(0);
SelectiveStreamReader[] streamReaders = new SelectiveStreamReader[rowType.getFieldCount()];
for (int columnId = 0; columnId < rowType.getFieldCount(); columnId++) {
if (includedColumns.containsKey(columnId)) {
StreamDescriptor streamDescriptor = streamDescriptors.get(columnId);
boolean outputRequired = outputColumns.contains(columnId) || filterFunctionInputColumns.contains(columnId);
streamReaders[columnId] = createStreamReader(streamDescriptor, Optional.ofNullable(filters.get(columnId)).orElse(ImmutableMap.of()), outputRequired ? Optional.of(includedColumns.get(columnId)) : Optional.empty(), Optional.ofNullable(requiredSubfields.get(columnId)).orElse(ImmutableList.of()), hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
}
}
return streamReaders;
}
use of com.facebook.presto.common.predicate.TupleDomainFilter in project presto by prestodb.
the class OrcSelectiveRecordReader method scoreFilter.
private static int scoreFilter(Map<Subfield, TupleDomainFilter> filters) {
checkArgument(!filters.isEmpty());
if (filters.size() > 1) {
// Complex type column. Complex types are expensive!
return 1000;
}
Map.Entry<Subfield, TupleDomainFilter> filterEntry = Iterables.getOnlyElement(filters.entrySet());
if (!filterEntry.getKey().getPath().isEmpty()) {
// Complex type column. Complex types are expensive!
return 1000;
}
TupleDomainFilter filter = filterEntry.getValue();
if (filter instanceof BigintRange) {
if (((BigintRange) filter).isSingleValue()) {
// Integer equality. Generally cheap.
return 10;
}
return 50;
}
if (filter instanceof BigintValuesUsingHashTable || filter instanceof BigintValuesUsingBitmask || filter instanceof BigintMultiRange) {
return 50;
}
return 100;
}
Aggregations