Search in sources :

Example 1 with Predicate

use of com.facebook.presto.common.relation.Predicate in project presto by prestodb.

the class RowExpressionPredicateCompiler method compilePredicateInternal.

private Supplier<Predicate> compilePredicateInternal(SqlFunctionProperties sqlFunctionProperties, Map<SqlFunctionId, SqlInvokedFunction> sessionFunctions, RowExpression predicate) {
    requireNonNull(predicate, "predicate is null");
    PageFieldsToInputParametersRewriter.Result result = rewritePageFieldsToInputParameters(predicate);
    int[] inputChannels = result.getInputChannels().getInputChannels().stream().mapToInt(Integer::intValue).toArray();
    CallSiteBinder callSiteBinder = new CallSiteBinder();
    ClassDefinition classDefinition = definePredicateClass(sqlFunctionProperties, sessionFunctions, result.getRewrittenExpression(), inputChannels, callSiteBinder);
    Class<? extends Predicate> predicateClass;
    try {
        predicateClass = defineClass(classDefinition, Predicate.class, callSiteBinder.getBindings(), getClass().getClassLoader());
    } catch (Exception e) {
        throw new PrestoException(COMPILER_ERROR, predicate.toString(), e.getCause());
    }
    return () -> {
        try {
            return predicateClass.getConstructor().newInstance();
        } catch (ReflectiveOperationException e) {
            throw new PrestoException(COMPILER_ERROR, e);
        }
    };
}
Also used : PageFieldsToInputParametersRewriter(com.facebook.presto.operator.project.PageFieldsToInputParametersRewriter) CallSiteBinder(com.facebook.presto.bytecode.CallSiteBinder) PrestoException(com.facebook.presto.spi.PrestoException) ClassDefinition(com.facebook.presto.bytecode.ClassDefinition) PrestoException(com.facebook.presto.spi.PrestoException) Predicate(com.facebook.presto.common.relation.Predicate)

Example 2 with Predicate

use of com.facebook.presto.common.relation.Predicate in project presto by prestodb.

the class TestRowExpressionPredicateCompiler method test.

@Test
public void test() {
    InputReferenceExpression a = new InputReferenceExpression(Optional.empty(), 0, BIGINT);
    InputReferenceExpression b = new InputReferenceExpression(Optional.empty(), 1, BIGINT);
    Block aBlock = createLongBlock(5, 5, 5, 5, 5);
    Block bBlock = createLongBlock(1, 3, 5, 7, 0);
    // b - a >= 0
    RowExpression sum = call("<", functionResolution.comparisonFunction(GREATER_THAN_OR_EQUAL, BIGINT, BIGINT), BOOLEAN, call("b - a", functionResolution.arithmeticFunction(SUBTRACT, BIGINT, BIGINT), BIGINT, b, a), constant(0L, BIGINT));
    PredicateCompiler compiler = new RowExpressionPredicateCompiler(metadata, 10_000);
    Predicate compiledSum = compiler.compilePredicate(SESSION.getSqlFunctionProperties(), SESSION.getSessionFunctions(), sum).get();
    assertEquals(Arrays.asList(1, 0), Ints.asList(compiledSum.getInputChannels()));
    Page page = new Page(bBlock, aBlock);
    assertFalse(compiledSum.evaluate(SESSION.getSqlFunctionProperties(), page, 0));
    assertFalse(compiledSum.evaluate(SESSION.getSqlFunctionProperties(), page, 1));
    assertTrue(compiledSum.evaluate(SESSION.getSqlFunctionProperties(), page, 2));
    assertTrue(compiledSum.evaluate(SESSION.getSqlFunctionProperties(), page, 3));
    assertFalse(compiledSum.evaluate(SESSION.getSqlFunctionProperties(), page, 4));
    // b * 2 < 10
    RowExpression timesTwo = call("=", functionResolution.comparisonFunction(LESS_THAN, BIGINT, BIGINT), BOOLEAN, call("b * 2", functionResolution.arithmeticFunction(MULTIPLY, BIGINT, BIGINT), BIGINT, b, constant(2L, BIGINT)), constant(10L, BIGINT));
    Predicate compiledTimesTwo = compiler.compilePredicate(SESSION.getSqlFunctionProperties(), SESSION.getSessionFunctions(), timesTwo).get();
    assertEquals(Arrays.asList(1), Ints.asList(compiledTimesTwo.getInputChannels()));
    page = new Page(bBlock);
    assertTrue(compiledTimesTwo.evaluate(SESSION.getSqlFunctionProperties(), page, 0));
    assertTrue(compiledTimesTwo.evaluate(SESSION.getSqlFunctionProperties(), page, 1));
    assertFalse(compiledTimesTwo.evaluate(SESSION.getSqlFunctionProperties(), page, 2));
    assertFalse(compiledTimesTwo.evaluate(SESSION.getSqlFunctionProperties(), page, 3));
    assertTrue(compiledTimesTwo.evaluate(SESSION.getSqlFunctionProperties(), page, 4));
}
Also used : InputReferenceExpression(com.facebook.presto.spi.relation.InputReferenceExpression) Block(com.facebook.presto.common.block.Block) RowExpression(com.facebook.presto.spi.relation.RowExpression) Page(com.facebook.presto.common.Page) PredicateCompiler(com.facebook.presto.spi.relation.PredicateCompiler) Predicate(com.facebook.presto.common.relation.Predicate) Test(org.testng.annotations.Test)

Example 3 with Predicate

use of com.facebook.presto.common.relation.Predicate in project presto by prestodb.

the class OrcSelectivePageSourceFactory method toFilterFunctions.

/**
 * Split filter expression into groups of conjuncts that depend on the same set of inputs,
 * then compile each group into FilterFunction.
 */
private static List<FilterFunction> toFilterFunctions(RowExpression filter, Optional<BucketAdapter> bucketAdapter, ConnectorSession session, DeterminismEvaluator determinismEvaluator, PredicateCompiler predicateCompiler) {
    ImmutableList.Builder<FilterFunction> filterFunctions = ImmutableList.builder();
    bucketAdapter.map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), true, predicate)).ifPresent(filterFunctions::add);
    if (TRUE_CONSTANT.equals(filter)) {
        return filterFunctions.build();
    }
    DynamicFilterExtractResult extractDynamicFilterResult = extractDynamicFilters(filter);
    // dynamic filter will be added through subfield pushdown
    filter = and(extractDynamicFilterResult.getStaticConjuncts());
    if (!isAdaptiveFilterReorderingEnabled(session)) {
        filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
        return filterFunctions.build();
    }
    List<RowExpression> conjuncts = extractConjuncts(filter);
    if (conjuncts.size() == 1) {
        filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
        return filterFunctions.build();
    }
    // Use LinkedHashMap to preserve user-specified order of conjuncts. This will be the initial order in which filters are applied.
    Map<Set<Integer>, List<RowExpression>> inputsToConjuncts = new LinkedHashMap<>();
    for (RowExpression conjunct : conjuncts) {
        inputsToConjuncts.computeIfAbsent(extractInputs(conjunct), k -> new ArrayList<>()).add(conjunct);
    }
    inputsToConjuncts.values().stream().map(expressions -> binaryExpression(AND, expressions)).map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(predicate), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), predicate).get())).forEach(filterFunctions::add);
    return filterFunctions.build();
}
Also used : LogicalRowExpressions.extractConjuncts(com.facebook.presto.expressions.LogicalRowExpressions.extractConjuncts) Page(com.facebook.presto.common.Page) HiveCoercer(com.facebook.presto.hive.HiveCoercer) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FixedPageSource(com.facebook.presto.spi.FixedPageSource) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Predicate(com.facebook.presto.common.relation.Predicate) HiveSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) SqlFunctionProperties(com.facebook.presto.common.function.SqlFunctionProperties) HiveSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcZstdJniDecompressionEnabled) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) DefaultRowExpressionTraversalVisitor(com.facebook.presto.expressions.DefaultRowExpressionTraversalVisitor) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) BucketAdaptation(com.facebook.presto.hive.BucketAdaptation) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) HIVE_INVALID_BUCKET_FILES(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES) OrcSelectiveRecordReader(com.facebook.presto.orc.OrcSelectiveRecordReader) SubfieldExtractor(com.facebook.presto.hive.SubfieldExtractor) LogicalRowExpressions.and(com.facebook.presto.expressions.LogicalRowExpressions.and) IOException(java.io.IOException) HiveBucketing.getHiveBucket(com.facebook.presto.hive.HiveBucketing.getHiveBucket) OrcReader(com.facebook.presto.orc.OrcReader) DynamicFilters.extractDynamicFilters(com.facebook.presto.expressions.DynamicFilters.extractDynamicFilters) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) HiveSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.hive.HiveSessionProperties.getOrcTinyStripeThreshold) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HiveSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) AND(com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) CallExpression(com.facebook.presto.spi.relation.CallExpression) SpecialFormExpression(com.facebook.presto.spi.relation.SpecialFormExpression) BiMap(com.google.common.collect.BiMap) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) FileNotFoundException(java.io.FileNotFoundException) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) HiveSessionProperties.isAdaptiveFilterReorderingEnabled(com.facebook.presto.hive.HiveSessionProperties.isAdaptiveFilterReorderingEnabled) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) RowExpressionService(com.facebook.presto.spi.relation.RowExpressionService) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) HiveSelectivePageSourceFactory(com.facebook.presto.hive.HiveSelectivePageSourceFactory) Optional(java.util.Optional) LogicalRowExpressions.binaryExpression(com.facebook.presto.expressions.LogicalRowExpressions.binaryExpression) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) PredicateCompiler(com.facebook.presto.spi.relation.PredicateCompiler) InputReferenceExpression(com.facebook.presto.spi.relation.InputReferenceExpression) IntStream(java.util.stream.IntStream) DynamicFilterExtractResult(com.facebook.presto.expressions.DynamicFilters.DynamicFilterExtractResult) DeterminismEvaluator(com.facebook.presto.spi.relation.DeterminismEvaluator) HiveSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxMergeDistance) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) HiveType(com.facebook.presto.hive.HiveType) HashMap(java.util.HashMap) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HiveSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxReadBlockSize) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Inject(javax.inject.Inject) HashSet(java.util.HashSet) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getOrcMaxBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxBufferSize) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) Type(com.facebook.presto.common.type.Type) RowExpression(com.facebook.presto.spi.relation.RowExpression) Storage(com.facebook.presto.hive.metastore.Storage) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) HiveSessionProperties.getOrcStreamBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcStreamBufferSize) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) Maps(com.google.common.collect.Maps) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Consumer(java.util.function.Consumer) HiveUtil.typedPartitionKey(com.facebook.presto.hive.HiveUtil.typedPartitionKey) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HiveUtil.getPhysicalHiveColumnHandles(com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles) ImmutableBiMap.toImmutableBiMap(com.google.common.collect.ImmutableBiMap.toImmutableBiMap) RowExpressionNodeInliner.replaceExpression(com.facebook.presto.expressions.RowExpressionNodeInliner.replaceExpression) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) Set(java.util.Set) ImmutableSet(com.google.common.collect.ImmutableSet) HashSet(java.util.HashSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) RowExpression(com.facebook.presto.spi.relation.RowExpression) LinkedHashMap(java.util.LinkedHashMap) DynamicFilterExtractResult(com.facebook.presto.expressions.DynamicFilters.DynamicFilterExtractResult) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList)

Example 4 with Predicate

use of com.facebook.presto.common.relation.Predicate in project presto by prestodb.

the class TestOrcReaderPositions method testRowGroupSkipping.

@Test
public void testRowGroupSkipping() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        // create single stripe file with multiple row groups
        int rowCount = 142_000;
        createSequentialFile(tempFile.getFile(), rowCount);
        // test reading two row groups from middle of file
        OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
            if (numberOfRows == rowCount) {
                return true;
            }
            IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
            return (stats.getMin() == 50_000) || (stats.getMin() == 60_000);
        };
        try (OrcBatchRecordReader reader = createCustomOrcRecordReader(tempFile, ORC, predicate, BIGINT, MAX_BATCH_SIZE, false, false)) {
            assertEquals(reader.getFileRowCount(), rowCount);
            assertEquals(reader.getReaderRowCount(), rowCount);
            assertEquals(reader.getFilePosition(), 0);
            assertEquals(reader.getReaderPosition(), 0);
            long position = 50_000;
            while (true) {
                int batchSize = reader.nextBatch();
                if (batchSize == -1) {
                    break;
                }
                Block block = reader.readBlock(0);
                for (int i = 0; i < batchSize; i++) {
                    assertEquals(BIGINT.getLong(block, i), position + i);
                }
                assertEquals(reader.getFilePosition(), position);
                assertEquals(reader.getReaderPosition(), position);
                position += batchSize;
            }
            assertEquals(position, 70_000);
            assertEquals(reader.getFilePosition(), rowCount);
            assertEquals(reader.getReaderPosition(), rowCount);
        }
    }
}
Also used : OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Page(com.facebook.presto.common.Page) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ByteBuffer(java.nio.ByteBuffer) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) Path(org.apache.hadoop.fs.Path) Predicate(com.facebook.presto.common.relation.Predicate) RuntimeStats(com.facebook.presto.common.RuntimeStats) NullMemoryManager(org.apache.orc.NullMemoryManager) ImmutableMap(com.google.common.collect.ImmutableMap) Footer(com.facebook.presto.orc.metadata.Footer) SqlFunctionProperties(com.facebook.presto.common.function.SqlFunctionProperties) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) Math.min(java.lang.Math.min) Assert.assertNotNull(org.testng.Assert.assertNotNull) NOOP_ORC_AGGREGATED_MEMORY_CONTEXT(com.facebook.presto.orc.NoopOrcAggregatedMemoryContext.NOOP_ORC_AGGREGATED_MEMORY_CONTEXT) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MAX_BLOCK_SIZE(com.facebook.presto.orc.OrcTester.MAX_BLOCK_SIZE) Slice(io.airlift.slice.Slice) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) MAX_BATCH_SIZE(com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) LongStream(java.util.stream.LongStream) BATCH_SIZE_GROWTH_FACTOR(com.facebook.presto.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SNAPPY(org.apache.hadoop.hive.ql.io.orc.CompressionKind.SNAPPY) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) IOException(java.io.IOException) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Field(java.lang.reflect.Field) Maps(com.google.common.collect.Maps) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Serializer(org.apache.hadoop.hive.serde2.Serializer) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Assert.assertTrue(org.testng.Assert.assertTrue) Block(com.facebook.presto.common.block.Block) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) Block(com.facebook.presto.common.block.Block) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Example 5 with Predicate

use of com.facebook.presto.common.relation.Predicate in project presto by prestodb.

the class TestOrcReaderPositions method testRowGroupSkippingWithAppendRowNumber.

@Test
public void testRowGroupSkippingWithAppendRowNumber() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        // create single stripe file with multiple row groups
        int rowCount = 142_000;
        createSequentialFile(tempFile.getFile(), rowCount);
        // test reading two row groups from middle of file
        OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
            if (numberOfRows == rowCount) {
                return true;
            }
            IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
            return (stats.getMin() == 50_000) || (stats.getMin() == 70_000);
        };
        List<Long> expectedValues = new ArrayList<>();
        expectedValues.addAll(LongStream.range(50_000, 60_000).collect(ArrayList::new, List::add, List::addAll));
        expectedValues.addAll(LongStream.range(70_000, 80_000).collect(ArrayList::new, List::add, List::addAll));
        OrcSelectiveRecordReader reader = createCustomOrcSelectiveRecordReader(tempFile, ORC, predicate, BIGINT, MAX_BATCH_SIZE, false, true);
        verifyAppendNumber(expectedValues, reader);
    }
}
Also used : OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Page(com.facebook.presto.common.Page) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ByteBuffer(java.nio.ByteBuffer) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) Path(org.apache.hadoop.fs.Path) Predicate(com.facebook.presto.common.relation.Predicate) RuntimeStats(com.facebook.presto.common.RuntimeStats) NullMemoryManager(org.apache.orc.NullMemoryManager) ImmutableMap(com.google.common.collect.ImmutableMap) Footer(com.facebook.presto.orc.metadata.Footer) SqlFunctionProperties(com.facebook.presto.common.function.SqlFunctionProperties) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) Math.min(java.lang.Math.min) Assert.assertNotNull(org.testng.Assert.assertNotNull) NOOP_ORC_AGGREGATED_MEMORY_CONTEXT(com.facebook.presto.orc.NoopOrcAggregatedMemoryContext.NOOP_ORC_AGGREGATED_MEMORY_CONTEXT) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MAX_BLOCK_SIZE(com.facebook.presto.orc.OrcTester.MAX_BLOCK_SIZE) Slice(io.airlift.slice.Slice) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) MAX_BATCH_SIZE(com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) LongStream(java.util.stream.LongStream) BATCH_SIZE_GROWTH_FACTOR(com.facebook.presto.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SNAPPY(org.apache.hadoop.hive.ql.io.orc.CompressionKind.SNAPPY) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) IOException(java.io.IOException) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Field(java.lang.reflect.Field) Maps(com.google.common.collect.Maps) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Serializer(org.apache.hadoop.hive.serde2.Serializer) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Assert.assertTrue(org.testng.Assert.assertTrue) Block(com.facebook.presto.common.block.Block) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) ArrayList(java.util.ArrayList) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Aggregations

Predicate (com.facebook.presto.common.relation.Predicate)7 Page (com.facebook.presto.common.Page)5 Block (com.facebook.presto.common.block.Block)4 SqlFunctionProperties (com.facebook.presto.common.function.SqlFunctionProperties)4 FilterFunction (com.facebook.presto.common.predicate.FilterFunction)4 RuntimeStats (com.facebook.presto.common.RuntimeStats)3 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)3 VARCHAR (com.facebook.presto.common.type.VarcharType.VARCHAR)3 NO_ENCRYPTION (com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION)3 NOOP_ORC_AGGREGATED_MEMORY_CONTEXT (com.facebook.presto.orc.NoopOrcAggregatedMemoryContext.NOOP_ORC_AGGREGATED_MEMORY_CONTEXT)3 ORC (com.facebook.presto.orc.OrcEncoding.ORC)3 BATCH_SIZE_GROWTH_FACTOR (com.facebook.presto.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR)3 INITIAL_BATCH_SIZE (com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE)3 MAX_BATCH_SIZE (com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE)3 ORC_12 (com.facebook.presto.orc.OrcTester.Format.ORC_12)3 MAX_BLOCK_SIZE (com.facebook.presto.orc.OrcTester.MAX_BLOCK_SIZE)3 OrcTester.createCustomOrcRecordReader (com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader)3 OrcTester.createCustomOrcSelectiveRecordReader (com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader)3 OrcTester.createOrcRecordWriter (com.facebook.presto.orc.OrcTester.createOrcRecordWriter)3 OrcTester.createSettableStructObjectInspector (com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector)3