use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.
the class TestMapFlatSelectiveStreamReader method runTest.
private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, ExpectedValuesBuilder<K, V> expectedValuesBuilder) throws Exception {
List<Map<K, V>> expectedValues = expectedValuesBuilder.build();
Type mapType = mapType(keyType, valueType);
OrcPredicate orcPredicate = createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true);
runTest(testOrcFileName, mapType, expectedValues, orcPredicate, Optional.empty(), ImmutableList.of());
runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::isNull).collect(toList()), orcPredicate, Optional.of(IS_NULL), ImmutableList.of());
runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::nonNull).collect(toList()), orcPredicate, Optional.of(IS_NOT_NULL), ImmutableList.of());
if (keyType != VARBINARY) {
// read only some keys
List<K> keys = expectedValues.stream().filter(Objects::nonNull).flatMap(v -> v.keySet().stream()).distinct().collect(toImmutableList());
if (!keys.isEmpty()) {
List<K> requiredKeys = ImmutableList.of(keys.get(0));
runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
List<Integer> keyIndices = ImmutableList.of(1, 3, 7, 11);
requiredKeys = keyIndices.stream().filter(k -> k < keys.size()).map(keys::get).collect(toList());
runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
}
}
// read only some rows
List<Integer> ids = IntStream.range(0, expectedValues.size()).map(i -> i % 10).boxed().collect(toImmutableList());
ImmutableList<Type> types = ImmutableList.of(mapType, INTEGER);
Map<Integer, Map<Subfield, TupleDomainFilter>> filters = ImmutableMap.of(1, ImmutableMap.of(new Subfield("c"), toBigintValues(new long[] { 1, 5, 6 }, true)));
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterRows(types, ImmutableList.of(expectedValues, ids), filters), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.of(filters), ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of());
TestingFilterFunction filterFunction = new TestingFilterFunction(mapType);
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterFunction.filterRows(ImmutableList.of(expectedValues, ids)), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.empty(), ImmutableList.of(filterFunction), ImmutableMap.of(0, 0), ImmutableMap.of());
}
use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.
the class OrcSelectiveRecordReader method createStreamReaders.
private static SelectiveStreamReader[] createStreamReaders(OrcDataSource orcDataSource, List<OrcType> types, DateTimeZone hiveStorageTimeZone, OrcRecordReaderOptions options, boolean legacyMapSubscript, Map<Integer, Type> includedColumns, List<Integer> outputColumns, Map<Integer, Map<Subfield, TupleDomainFilter>> filters, List<FilterFunction> filterFunctions, Map<Integer, Integer> filterFunctionInputMapping, Map<Integer, List<Subfield>> requiredSubfields, OrcAggregatedMemoryContext systemMemoryContext) {
List<StreamDescriptor> streamDescriptors = createStreamDescriptor("", "", 0, types, orcDataSource).getNestedStreams();
requireNonNull(filterFunctions, "filterFunctions is null");
requireNonNull(filterFunctionInputMapping, "filterFunctionInputMapping is null");
Set<Integer> filterFunctionInputColumns = filterFunctions.stream().flatMapToInt(function -> Arrays.stream(function.getInputChannels())).boxed().map(filterFunctionInputMapping::get).collect(toImmutableSet());
OrcType rowType = types.get(0);
SelectiveStreamReader[] streamReaders = new SelectiveStreamReader[rowType.getFieldCount()];
for (int columnId = 0; columnId < rowType.getFieldCount(); columnId++) {
if (includedColumns.containsKey(columnId)) {
StreamDescriptor streamDescriptor = streamDescriptors.get(columnId);
boolean outputRequired = outputColumns.contains(columnId) || filterFunctionInputColumns.contains(columnId);
streamReaders[columnId] = createStreamReader(streamDescriptor, Optional.ofNullable(filters.get(columnId)).orElse(ImmutableMap.of()), outputRequired ? Optional.of(includedColumns.get(columnId)) : Optional.empty(), Optional.ofNullable(requiredSubfields.get(columnId)).orElse(ImmutableList.of()), hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
}
}
return streamReaders;
}
use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.
the class OrcSelectiveRecordReader method applyFilterFunctions.
private int applyFilterFunctions(List<FilterFunctionWithStats> filterFunctions, Set<Integer> filterFunctionInputs, int[] positions, int positionCount) {
BlockLease[] blockLeases = new BlockLease[hiveColumnIndices.length];
Block[] blocks = new Block[hiveColumnIndices.length];
for (int columnIndex : filterFunctionInputs) {
if (constantValues[columnIndex] != null) {
blocks[columnIndex] = RunLengthEncodedBlock.create(columnTypes.get(columnIndex), constantValues[columnIndex] == NULL_MARKER ? null : constantValues[columnIndex], positionCount);
} else {
blockLeases[columnIndex] = getStreamReader(columnIndex).getBlockView(positions, positionCount);
Block block = blockLeases[columnIndex].get();
if (coercers[columnIndex] != null) {
block = coercers[columnIndex].apply(block);
}
blocks[columnIndex] = block;
}
}
initializeTmpErrors(positionCount);
for (int i = 0; i < positionCount; i++) {
tmpErrors[i] = errors[positions[i]];
}
Arrays.fill(errors, null);
try {
initializeOutputPositions(positionCount);
for (int i = 0; i < filterFunctions.size(); i++) {
FilterFunctionWithStats functionWithStats = filterFunctions.get(i);
FilterFunction function = functionWithStats.getFunction();
int[] inputs = function.getInputChannels();
Block[] inputBlocks = new Block[inputs.length];
for (int j = 0; j < inputs.length; j++) {
inputBlocks[j] = blocks[filterFunctionInputMapping.get(inputs[j])];
}
Page page = new Page(positionCount, inputBlocks);
long startTime = System.nanoTime();
int inputPositionCount = positionCount;
positionCount = function.filter(page, outputPositions, positionCount, tmpErrors);
functionWithStats.getStats().update(inputPositionCount, positionCount, System.nanoTime() - startTime);
if (positionCount == 0) {
break;
}
}
// e.g. make outputPositions a subset of positions array
for (int i = 0; i < positionCount; i++) {
outputPositions[i] = positions[outputPositions[i]];
errors[outputPositions[i]] = tmpErrors[i];
}
return positionCount;
} finally {
for (BlockLease blockLease : blockLeases) {
if (blockLease != null) {
blockLease.close();
}
}
}
}
use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.
the class OrcSelectivePageSourceFactory method toFilterFunctions.
/**
* Split filter expression into groups of conjuncts that depend on the same set of inputs,
* then compile each group into FilterFunction.
*/
private static List<FilterFunction> toFilterFunctions(RowExpression filter, Optional<BucketAdapter> bucketAdapter, ConnectorSession session, DeterminismEvaluator determinismEvaluator, PredicateCompiler predicateCompiler) {
ImmutableList.Builder<FilterFunction> filterFunctions = ImmutableList.builder();
bucketAdapter.map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), true, predicate)).ifPresent(filterFunctions::add);
if (TRUE_CONSTANT.equals(filter)) {
return filterFunctions.build();
}
DynamicFilterExtractResult extractDynamicFilterResult = extractDynamicFilters(filter);
// dynamic filter will be added through subfield pushdown
filter = and(extractDynamicFilterResult.getStaticConjuncts());
if (!isAdaptiveFilterReorderingEnabled(session)) {
filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
return filterFunctions.build();
}
List<RowExpression> conjuncts = extractConjuncts(filter);
if (conjuncts.size() == 1) {
filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
return filterFunctions.build();
}
// Use LinkedHashMap to preserve user-specified order of conjuncts. This will be the initial order in which filters are applied.
Map<Set<Integer>, List<RowExpression>> inputsToConjuncts = new LinkedHashMap<>();
for (RowExpression conjunct : conjuncts) {
inputsToConjuncts.computeIfAbsent(extractInputs(conjunct), k -> new ArrayList<>()).add(conjunct);
}
inputsToConjuncts.values().stream().map(expressions -> binaryExpression(AND, expressions)).map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(predicate), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), predicate).get())).forEach(filterFunctions::add);
return filterFunctions.build();
}
use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.
the class TestOrcReaderPositions method testFilterFunctionWithAppendRowNumber.
@Test
public void testFilterFunctionWithAppendRowNumber() throws Exception {
try (TempFile tempFile = new TempFile()) {
int rowCount = 100;
createSequentialFile(tempFile.getFile(), rowCount);
List<Long> expectedValues = LongStream.range(0, 100).boxed().filter(input -> input % 2 != 0).collect(ArrayList::new, List::add, List::addAll);
ConnectorSession session = new TestingConnectorSession(ImmutableList.of());
FilterFunction filter = new FilterFunction(session.getSqlFunctionProperties(), true, new IsOddPredicate());
OrcSelectiveRecordReader reader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), MAX_BATCH_SIZE, ImmutableMap.of(), ImmutableList.of(filter), ImmutableMap.of(0, 0), ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of(0, BIGINT), ImmutableList.of(0), false, new TestingHiveOrcAggregatedMemoryContext(), true);
verifyAppendNumber(expectedValues, reader);
}
}
Aggregations