Search in sources :

Example 1 with FilterFunction

use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.

the class TestMapFlatSelectiveStreamReader method runTest.

private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, ExpectedValuesBuilder<K, V> expectedValuesBuilder) throws Exception {
    List<Map<K, V>> expectedValues = expectedValuesBuilder.build();
    Type mapType = mapType(keyType, valueType);
    OrcPredicate orcPredicate = createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true);
    runTest(testOrcFileName, mapType, expectedValues, orcPredicate, Optional.empty(), ImmutableList.of());
    runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::isNull).collect(toList()), orcPredicate, Optional.of(IS_NULL), ImmutableList.of());
    runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::nonNull).collect(toList()), orcPredicate, Optional.of(IS_NOT_NULL), ImmutableList.of());
    if (keyType != VARBINARY) {
        // read only some keys
        List<K> keys = expectedValues.stream().filter(Objects::nonNull).flatMap(v -> v.keySet().stream()).distinct().collect(toImmutableList());
        if (!keys.isEmpty()) {
            List<K> requiredKeys = ImmutableList.of(keys.get(0));
            runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
            List<Integer> keyIndices = ImmutableList.of(1, 3, 7, 11);
            requiredKeys = keyIndices.stream().filter(k -> k < keys.size()).map(keys::get).collect(toList());
            runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
        }
    }
    // read only some rows
    List<Integer> ids = IntStream.range(0, expectedValues.size()).map(i -> i % 10).boxed().collect(toImmutableList());
    ImmutableList<Type> types = ImmutableList.of(mapType, INTEGER);
    Map<Integer, Map<Subfield, TupleDomainFilter>> filters = ImmutableMap.of(1, ImmutableMap.of(new Subfield("c"), toBigintValues(new long[] { 1, 5, 6 }, true)));
    assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterRows(types, ImmutableList.of(expectedValues, ids), filters), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.of(filters), ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of());
    TestingFilterFunction filterFunction = new TestingFilterFunction(mapType);
    assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterFunction.filterRows(ImmutableList.of(expectedValues, ids)), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.empty(), ImmutableList.of(filterFunction), ImmutableMap.of(0, 0), ImmutableMap.of());
}
Also used : ALL(com.facebook.presto.orc.TestMapFlatSelectiveStreamReader.ExpectedValuesBuilder.Frequency.ALL) Page(com.facebook.presto.common.Page) NONE(com.facebook.presto.orc.TestMapFlatSelectiveStreamReader.ExpectedValuesBuilder.Frequency.NONE) Test(org.testng.annotations.Test) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.testing.TestingEnvironment.FUNCTION_AND_TYPE_MANAGER) ALL_EXCEPT_FIRST(com.facebook.presto.orc.TestMapFlatSelectiveStreamReader.ExpectedValuesBuilder.Frequency.ALL_EXCEPT_FIRST) Map(java.util.Map) Predicate(com.facebook.presto.common.relation.Predicate) ImmutableMap(com.google.common.collect.ImmutableMap) OrcTester.mapType(com.facebook.presto.orc.OrcTester.mapType) SqlFunctionProperties(com.facebook.presto.common.function.SqlFunctionProperties) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) OrcTester.assertFileContentsPresto(com.facebook.presto.orc.OrcTester.assertFileContentsPresto) StandardCharsets(java.nio.charset.StandardCharsets) Objects(java.util.Objects) Resources.getResource(com.google.common.io.Resources.getResource) List(java.util.List) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) IS_NOT_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NOT_NULL) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) Optional(java.util.Optional) IS_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL) IntStream(java.util.stream.IntStream) StandardTypes(com.facebook.presto.common.type.StandardTypes) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) BooleanType(com.facebook.presto.common.type.BooleanType) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) HashMap(java.util.HashMap) Function(java.util.function.Function) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) IntegerType(com.facebook.presto.common.type.IntegerType) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) SOME(com.facebook.presto.orc.TestMapFlatSelectiveStreamReader.ExpectedValuesBuilder.Frequency.SOME) Type(com.facebook.presto.common.type.Type) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) OrcTester.arrayType(com.facebook.presto.orc.OrcTester.arrayType) OrcTester.filterRows(com.facebook.presto.orc.OrcTester.filterRows) Session(com.facebook.presto.Session) TupleDomainFilterUtils.toBigintValues(com.facebook.presto.common.predicate.TupleDomainFilterUtils.toBigintValues) TestingSession.testSessionBuilder(com.facebook.presto.testing.TestingSession.testSessionBuilder) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) Maps(com.google.common.collect.Maps) File(java.io.File) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) TimeZoneKey(com.facebook.presto.common.type.TimeZoneKey) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) RowFieldName(com.facebook.presto.common.type.RowFieldName) Block(com.facebook.presto.common.block.Block) Collections(java.util.Collections) DoubleType(com.facebook.presto.common.type.DoubleType) OrcTester.mapType(com.facebook.presto.orc.OrcTester.mapType) BooleanType(com.facebook.presto.common.type.BooleanType) IntegerType(com.facebook.presto.common.type.IntegerType) Type(com.facebook.presto.common.type.Type) OrcTester.arrayType(com.facebook.presto.orc.OrcTester.arrayType) DoubleType(com.facebook.presto.common.type.DoubleType) Objects(java.util.Objects) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) File(java.io.File) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) Subfield(com.facebook.presto.common.Subfield)

Example 2 with FilterFunction

use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.

the class OrcSelectiveRecordReader method createStreamReaders.

private static SelectiveStreamReader[] createStreamReaders(OrcDataSource orcDataSource, List<OrcType> types, DateTimeZone hiveStorageTimeZone, OrcRecordReaderOptions options, boolean legacyMapSubscript, Map<Integer, Type> includedColumns, List<Integer> outputColumns, Map<Integer, Map<Subfield, TupleDomainFilter>> filters, List<FilterFunction> filterFunctions, Map<Integer, Integer> filterFunctionInputMapping, Map<Integer, List<Subfield>> requiredSubfields, OrcAggregatedMemoryContext systemMemoryContext) {
    List<StreamDescriptor> streamDescriptors = createStreamDescriptor("", "", 0, types, orcDataSource).getNestedStreams();
    requireNonNull(filterFunctions, "filterFunctions is null");
    requireNonNull(filterFunctionInputMapping, "filterFunctionInputMapping is null");
    Set<Integer> filterFunctionInputColumns = filterFunctions.stream().flatMapToInt(function -> Arrays.stream(function.getInputChannels())).boxed().map(filterFunctionInputMapping::get).collect(toImmutableSet());
    OrcType rowType = types.get(0);
    SelectiveStreamReader[] streamReaders = new SelectiveStreamReader[rowType.getFieldCount()];
    for (int columnId = 0; columnId < rowType.getFieldCount(); columnId++) {
        if (includedColumns.containsKey(columnId)) {
            StreamDescriptor streamDescriptor = streamDescriptors.get(columnId);
            boolean outputRequired = outputColumns.contains(columnId) || filterFunctionInputColumns.contains(columnId);
            streamReaders[columnId] = createStreamReader(streamDescriptor, Optional.ofNullable(filters.get(columnId)).orElse(ImmutableMap.of()), outputRequired ? Optional.of(includedColumns.get(columnId)) : Optional.empty(), Optional.ofNullable(requiredSubfields.get(columnId)).orElse(ImmutableList.of()), hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
        }
    }
    return streamReaders;
}
Also used : Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) BigintRange(com.facebook.presto.common.predicate.TupleDomainFilter.BigintRange) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Predicates.not(com.google.common.base.Predicates.not) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) RuntimeStats(com.facebook.presto.common.RuntimeStats) BlockLease(com.facebook.presto.common.block.BlockLease) BigintValuesUsingHashTable(com.facebook.presto.common.predicate.TupleDomainFilter.BigintValuesUsingHashTable) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Math.min(java.lang.Math.min) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) UncheckedIOException(java.io.UncheckedIOException) Objects(java.util.Objects) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) SizeOf.sizeOf(io.airlift.slice.SizeOf.sizeOf) LazyBlockLoader(com.facebook.presto.common.block.LazyBlockLoader) SelectiveStreamReader(com.facebook.presto.orc.reader.SelectiveStreamReader) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) SelectiveStreamReaders.createStreamReader(com.facebook.presto.orc.reader.SelectiveStreamReaders.createStreamReader) Optional(java.util.Optional) Math.max(java.lang.Math.max) LazyBlock(com.facebook.presto.common.block.LazyBlock) IntStream(java.util.stream.IntStream) Iterables(com.google.common.collect.Iterables) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) HashMap(java.util.HashMap) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) Function(java.util.function.Function) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) Objects.requireNonNull(java.util.Objects.requireNonNull) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) CharType(com.facebook.presto.common.type.CharType) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) Nullable(javax.annotation.Nullable) MAX_BATCH_SIZE(com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) BigintMultiRange(com.facebook.presto.common.predicate.TupleDomainFilter.BigintMultiRange) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) PostScript(com.facebook.presto.orc.metadata.PostScript) LongArrayBlock(com.facebook.presto.common.block.LongArrayBlock) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) Ints(com.google.common.primitives.Ints) BigintValuesUsingBitmask(com.facebook.presto.common.predicate.TupleDomainFilter.BigintValuesUsingBitmask) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) Block(com.facebook.presto.common.block.Block) Comparator(java.util.Comparator) MetadataReader(com.facebook.presto.orc.metadata.MetadataReader) OrcType(com.facebook.presto.orc.metadata.OrcType) SelectiveStreamReader(com.facebook.presto.orc.reader.SelectiveStreamReader)

Example 3 with FilterFunction

use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.

the class OrcSelectiveRecordReader method applyFilterFunctions.

private int applyFilterFunctions(List<FilterFunctionWithStats> filterFunctions, Set<Integer> filterFunctionInputs, int[] positions, int positionCount) {
    BlockLease[] blockLeases = new BlockLease[hiveColumnIndices.length];
    Block[] blocks = new Block[hiveColumnIndices.length];
    for (int columnIndex : filterFunctionInputs) {
        if (constantValues[columnIndex] != null) {
            blocks[columnIndex] = RunLengthEncodedBlock.create(columnTypes.get(columnIndex), constantValues[columnIndex] == NULL_MARKER ? null : constantValues[columnIndex], positionCount);
        } else {
            blockLeases[columnIndex] = getStreamReader(columnIndex).getBlockView(positions, positionCount);
            Block block = blockLeases[columnIndex].get();
            if (coercers[columnIndex] != null) {
                block = coercers[columnIndex].apply(block);
            }
            blocks[columnIndex] = block;
        }
    }
    initializeTmpErrors(positionCount);
    for (int i = 0; i < positionCount; i++) {
        tmpErrors[i] = errors[positions[i]];
    }
    Arrays.fill(errors, null);
    try {
        initializeOutputPositions(positionCount);
        for (int i = 0; i < filterFunctions.size(); i++) {
            FilterFunctionWithStats functionWithStats = filterFunctions.get(i);
            FilterFunction function = functionWithStats.getFunction();
            int[] inputs = function.getInputChannels();
            Block[] inputBlocks = new Block[inputs.length];
            for (int j = 0; j < inputs.length; j++) {
                inputBlocks[j] = blocks[filterFunctionInputMapping.get(inputs[j])];
            }
            Page page = new Page(positionCount, inputBlocks);
            long startTime = System.nanoTime();
            int inputPositionCount = positionCount;
            positionCount = function.filter(page, outputPositions, positionCount, tmpErrors);
            functionWithStats.getStats().update(inputPositionCount, positionCount, System.nanoTime() - startTime);
            if (positionCount == 0) {
                break;
            }
        }
        // e.g. make outputPositions a subset of positions array
        for (int i = 0; i < positionCount; i++) {
            outputPositions[i] = positions[outputPositions[i]];
            errors[outputPositions[i]] = tmpErrors[i];
        }
        return positionCount;
    } finally {
        for (BlockLease blockLease : blockLeases) {
            if (blockLease != null) {
                blockLease.close();
            }
        }
    }
}
Also used : FilterFunction(com.facebook.presto.common.predicate.FilterFunction) BlockLease(com.facebook.presto.common.block.BlockLease) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) LazyBlock(com.facebook.presto.common.block.LazyBlock) LongArrayBlock(com.facebook.presto.common.block.LongArrayBlock) Block(com.facebook.presto.common.block.Block) Page(com.facebook.presto.common.Page)

Example 4 with FilterFunction

use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.

the class OrcSelectivePageSourceFactory method toFilterFunctions.

/**
 * Split filter expression into groups of conjuncts that depend on the same set of inputs,
 * then compile each group into FilterFunction.
 */
private static List<FilterFunction> toFilterFunctions(RowExpression filter, Optional<BucketAdapter> bucketAdapter, ConnectorSession session, DeterminismEvaluator determinismEvaluator, PredicateCompiler predicateCompiler) {
    ImmutableList.Builder<FilterFunction> filterFunctions = ImmutableList.builder();
    bucketAdapter.map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), true, predicate)).ifPresent(filterFunctions::add);
    if (TRUE_CONSTANT.equals(filter)) {
        return filterFunctions.build();
    }
    DynamicFilterExtractResult extractDynamicFilterResult = extractDynamicFilters(filter);
    // dynamic filter will be added through subfield pushdown
    filter = and(extractDynamicFilterResult.getStaticConjuncts());
    if (!isAdaptiveFilterReorderingEnabled(session)) {
        filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
        return filterFunctions.build();
    }
    List<RowExpression> conjuncts = extractConjuncts(filter);
    if (conjuncts.size() == 1) {
        filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
        return filterFunctions.build();
    }
    // Use LinkedHashMap to preserve user-specified order of conjuncts. This will be the initial order in which filters are applied.
    Map<Set<Integer>, List<RowExpression>> inputsToConjuncts = new LinkedHashMap<>();
    for (RowExpression conjunct : conjuncts) {
        inputsToConjuncts.computeIfAbsent(extractInputs(conjunct), k -> new ArrayList<>()).add(conjunct);
    }
    inputsToConjuncts.values().stream().map(expressions -> binaryExpression(AND, expressions)).map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(predicate), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), predicate).get())).forEach(filterFunctions::add);
    return filterFunctions.build();
}
Also used : LogicalRowExpressions.extractConjuncts(com.facebook.presto.expressions.LogicalRowExpressions.extractConjuncts) Page(com.facebook.presto.common.Page) HiveCoercer(com.facebook.presto.hive.HiveCoercer) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FixedPageSource(com.facebook.presto.spi.FixedPageSource) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Predicate(com.facebook.presto.common.relation.Predicate) HiveSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) SqlFunctionProperties(com.facebook.presto.common.function.SqlFunctionProperties) HiveSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcZstdJniDecompressionEnabled) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) DefaultRowExpressionTraversalVisitor(com.facebook.presto.expressions.DefaultRowExpressionTraversalVisitor) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) BucketAdaptation(com.facebook.presto.hive.BucketAdaptation) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) HIVE_INVALID_BUCKET_FILES(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES) OrcSelectiveRecordReader(com.facebook.presto.orc.OrcSelectiveRecordReader) SubfieldExtractor(com.facebook.presto.hive.SubfieldExtractor) LogicalRowExpressions.and(com.facebook.presto.expressions.LogicalRowExpressions.and) IOException(java.io.IOException) HiveBucketing.getHiveBucket(com.facebook.presto.hive.HiveBucketing.getHiveBucket) OrcReader(com.facebook.presto.orc.OrcReader) DynamicFilters.extractDynamicFilters(com.facebook.presto.expressions.DynamicFilters.extractDynamicFilters) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) HiveSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.hive.HiveSessionProperties.getOrcTinyStripeThreshold) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HiveSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) AND(com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) CallExpression(com.facebook.presto.spi.relation.CallExpression) SpecialFormExpression(com.facebook.presto.spi.relation.SpecialFormExpression) BiMap(com.google.common.collect.BiMap) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) FileNotFoundException(java.io.FileNotFoundException) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) HiveSessionProperties.isAdaptiveFilterReorderingEnabled(com.facebook.presto.hive.HiveSessionProperties.isAdaptiveFilterReorderingEnabled) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) RowExpressionService(com.facebook.presto.spi.relation.RowExpressionService) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) HiveSelectivePageSourceFactory(com.facebook.presto.hive.HiveSelectivePageSourceFactory) Optional(java.util.Optional) LogicalRowExpressions.binaryExpression(com.facebook.presto.expressions.LogicalRowExpressions.binaryExpression) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) PredicateCompiler(com.facebook.presto.spi.relation.PredicateCompiler) InputReferenceExpression(com.facebook.presto.spi.relation.InputReferenceExpression) IntStream(java.util.stream.IntStream) DynamicFilterExtractResult(com.facebook.presto.expressions.DynamicFilters.DynamicFilterExtractResult) DeterminismEvaluator(com.facebook.presto.spi.relation.DeterminismEvaluator) HiveSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxMergeDistance) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) HiveType(com.facebook.presto.hive.HiveType) HashMap(java.util.HashMap) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HiveSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxReadBlockSize) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Inject(javax.inject.Inject) HashSet(java.util.HashSet) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getOrcMaxBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxBufferSize) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) Type(com.facebook.presto.common.type.Type) RowExpression(com.facebook.presto.spi.relation.RowExpression) Storage(com.facebook.presto.hive.metastore.Storage) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) HiveSessionProperties.getOrcStreamBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcStreamBufferSize) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) Maps(com.google.common.collect.Maps) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Consumer(java.util.function.Consumer) HiveUtil.typedPartitionKey(com.facebook.presto.hive.HiveUtil.typedPartitionKey) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HiveUtil.getPhysicalHiveColumnHandles(com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles) ImmutableBiMap.toImmutableBiMap(com.google.common.collect.ImmutableBiMap.toImmutableBiMap) RowExpressionNodeInliner.replaceExpression(com.facebook.presto.expressions.RowExpressionNodeInliner.replaceExpression) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) Set(java.util.Set) ImmutableSet(com.google.common.collect.ImmutableSet) HashSet(java.util.HashSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) RowExpression(com.facebook.presto.spi.relation.RowExpression) LinkedHashMap(java.util.LinkedHashMap) DynamicFilterExtractResult(com.facebook.presto.expressions.DynamicFilters.DynamicFilterExtractResult) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList)

Example 5 with FilterFunction

use of com.facebook.presto.common.predicate.FilterFunction in project presto by prestodb.

the class TestOrcReaderPositions method testFilterFunctionWithAppendRowNumber.

@Test
public void testFilterFunctionWithAppendRowNumber() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        int rowCount = 100;
        createSequentialFile(tempFile.getFile(), rowCount);
        List<Long> expectedValues = LongStream.range(0, 100).boxed().filter(input -> input % 2 != 0).collect(ArrayList::new, List::add, List::addAll);
        ConnectorSession session = new TestingConnectorSession(ImmutableList.of());
        FilterFunction filter = new FilterFunction(session.getSqlFunctionProperties(), true, new IsOddPredicate());
        OrcSelectiveRecordReader reader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), MAX_BATCH_SIZE, ImmutableMap.of(), ImmutableList.of(filter), ImmutableMap.of(0, 0), ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of(0, BIGINT), ImmutableList.of(0), false, new TestingHiveOrcAggregatedMemoryContext(), true);
        verifyAppendNumber(expectedValues, reader);
    }
}
Also used : OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Page(com.facebook.presto.common.Page) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ByteBuffer(java.nio.ByteBuffer) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) Path(org.apache.hadoop.fs.Path) Predicate(com.facebook.presto.common.relation.Predicate) RuntimeStats(com.facebook.presto.common.RuntimeStats) NullMemoryManager(org.apache.orc.NullMemoryManager) ImmutableMap(com.google.common.collect.ImmutableMap) Footer(com.facebook.presto.orc.metadata.Footer) SqlFunctionProperties(com.facebook.presto.common.function.SqlFunctionProperties) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) Math.min(java.lang.Math.min) Assert.assertNotNull(org.testng.Assert.assertNotNull) NOOP_ORC_AGGREGATED_MEMORY_CONTEXT(com.facebook.presto.orc.NoopOrcAggregatedMemoryContext.NOOP_ORC_AGGREGATED_MEMORY_CONTEXT) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MAX_BLOCK_SIZE(com.facebook.presto.orc.OrcTester.MAX_BLOCK_SIZE) Slice(io.airlift.slice.Slice) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) MAX_BATCH_SIZE(com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) LongStream(java.util.stream.LongStream) BATCH_SIZE_GROWTH_FACTOR(com.facebook.presto.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SNAPPY(org.apache.hadoop.hive.ql.io.orc.CompressionKind.SNAPPY) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) IOException(java.io.IOException) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Field(java.lang.reflect.Field) Maps(com.google.common.collect.Maps) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Serializer(org.apache.hadoop.hive.serde2.Serializer) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Assert.assertTrue(org.testng.Assert.assertTrue) Block(com.facebook.presto.common.block.Block) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) ArrayList(java.util.ArrayList) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Test(org.testng.annotations.Test)

Aggregations

FilterFunction (com.facebook.presto.common.predicate.FilterFunction)6 Page (com.facebook.presto.common.Page)5 Block (com.facebook.presto.common.block.Block)5 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 Maps (com.google.common.collect.Maps)4 Subfield (com.facebook.presto.common.Subfield)3 LongArrayBlock (com.facebook.presto.common.block.LongArrayBlock)3 SqlFunctionProperties (com.facebook.presto.common.function.SqlFunctionProperties)3 TupleDomainFilter (com.facebook.presto.common.predicate.TupleDomainFilter)3 Predicate (com.facebook.presto.common.relation.Predicate)3 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)3 Type (com.facebook.presto.common.type.Type)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 Optional (java.util.Optional)3