Search in sources :

Example 21 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestScanFilterAndProjectOperator method testPageYield.

@Test
public void testPageYield() {
    int totalRows = 1000;
    Page input = SequencePageBuilder.createSequencePage(ImmutableList.of(BIGINT), totalRows, 1);
    DriverContext driverContext = newDriverContext();
    // 20 columns; each column is associated with a function that will force yield per projection
    int totalColumns = 20;
    ImmutableList.Builder<SqlScalarFunction> functions = ImmutableList.builder();
    for (int i = 0; i < totalColumns; i++) {
        functions.add(new GenericLongFunction("page_col" + i, value -> {
            driverContext.getYieldSignal().forceYieldForTesting();
            return value;
        }));
    }
    functionAssertions.addFunctions(new InternalFunctionBundle(functions.build()));
    // match each column with a projection
    ExpressionCompiler expressionCompiler = new ExpressionCompiler(functionAssertions.getFunctionManager(), new PageFunctionCompiler(functionAssertions.getFunctionManager(), 0));
    ImmutableList.Builder<RowExpression> projections = ImmutableList.builder();
    for (int i = 0; i < totalColumns; i++) {
        projections.add(call(functionAssertions.getMetadata().resolveFunction(session, QualifiedName.of("generic_long_page_col" + i), fromTypes(BIGINT)), field(0, BIGINT)));
    }
    Supplier<CursorProcessor> cursorProcessor = expressionCompiler.compileCursorProcessor(Optional.empty(), projections.build(), "key");
    Supplier<PageProcessor> pageProcessor = expressionCompiler.compilePageProcessor(Optional.empty(), projections.build(), MAX_BATCH_SIZE);
    ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory factory = new ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory(0, new PlanNodeId("test"), new PlanNodeId("0"), (session, split, table, columns, dynamicFilter) -> new FixedPageSource(ImmutableList.of(input)), cursorProcessor, pageProcessor, TEST_TABLE_HANDLE, ImmutableList.of(), DynamicFilter.EMPTY, ImmutableList.of(BIGINT), DataSize.ofBytes(0), 0);
    SourceOperator operator = factory.createOperator(driverContext);
    operator.addSplit(new Split(new CatalogName("test"), TestingSplit.createLocalSplit(), Lifespan.taskWide()));
    operator.noMoreSplits();
    // exactly 20 blocks (one for each column) and the PageProcessor will be able to create a Page out of it.
    for (int i = 1; i <= totalRows * totalColumns; i++) {
        driverContext.getYieldSignal().setWithDelay(SECONDS.toNanos(1000), driverContext.getYieldExecutor());
        Page page = operator.getOutput();
        if (i == totalColumns) {
            assertNotNull(page);
            assertEquals(page.getPositionCount(), totalRows);
            assertEquals(page.getChannelCount(), totalColumns);
            for (int j = 0; j < totalColumns; j++) {
                assertEquals(toValues(BIGINT, page.getBlock(j)), toValues(BIGINT, input.getBlock(0)));
            }
        } else {
            assertNull(page);
        }
        driverContext.getYieldSignal().reset();
    }
}
Also used : MaterializedResult(io.trino.testing.MaterializedResult) TypeSignatureProvider.fromTypes(io.trino.sql.analyzer.TypeSignatureProvider.fromTypes) BlockAssertions(io.trino.block.BlockAssertions) Test(org.testng.annotations.Test) Expressions.field(io.trino.sql.relational.Expressions.field) LazyPagePageProjection(io.trino.operator.project.TestPageProcessor.LazyPagePageProjection) SequencePageBuilder(io.trino.SequencePageBuilder) LazyBlock(io.trino.spi.block.LazyBlock) CatalogName(io.trino.connector.CatalogName) Block(io.trino.spi.block.Block) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) CursorProcessor(io.trino.operator.project.CursorProcessor) TEST_SESSION(io.trino.SessionTestUtils.TEST_SESSION) PageRecordSet(io.trino.operator.index.PageRecordSet) RowPagesBuilder.rowPagesBuilder(io.trino.RowPagesBuilder.rowPagesBuilder) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) EQUAL(io.trino.spi.function.OperatorType.EQUAL) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) Expressions.constant(io.trino.sql.relational.Expressions.constant) PageFunctionCompiler(io.trino.sql.gen.PageFunctionCompiler) SelectAllFilter(io.trino.operator.project.TestPageProcessor.SelectAllFilter) Assert.assertNotNull(org.testng.Assert.assertNotNull) AbstractTestFunctions(io.trino.operator.scalar.AbstractTestFunctions) DataSize(io.airlift.units.DataSize) List(java.util.List) FixedPageSource(io.trino.spi.connector.FixedPageSource) BIGINT(io.trino.spi.type.BigintType.BIGINT) PageAssertions.assertPageEquals(io.trino.operator.PageAssertions.assertPageEquals) Split(io.trino.metadata.Split) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) BlockAssertions.toValues(io.trino.block.BlockAssertions.toValues) Assert.assertNull(org.testng.Assert.assertNull) Page(io.trino.spi.Page) Supplier(java.util.function.Supplier) ExpressionCompiler(io.trino.sql.gen.ExpressionCompiler) MAX_BATCH_SIZE(io.trino.operator.project.PageProcessor.MAX_BATCH_SIZE) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) RecordPageSource(io.trino.spi.connector.RecordPageSource) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) PageProcessor(io.trino.operator.project.PageProcessor) TestingSplit(io.trino.testing.TestingSplit) Lifespan(io.trino.execution.Lifespan) ExecutorService(java.util.concurrent.ExecutorService) TestingTaskContext.createTaskContext(io.trino.testing.TestingTaskContext.createTaskContext) AfterClass(org.testng.annotations.AfterClass) SqlScalarFunction(io.trino.metadata.SqlScalarFunction) FunctionManager(io.trino.metadata.FunctionManager) QualifiedName(io.trino.sql.tree.QualifiedName) OperatorAssertion.toMaterializedResult(io.trino.operator.OperatorAssertion.toMaterializedResult) Expressions.call(io.trino.sql.relational.Expressions.call) RowExpression(io.trino.sql.relational.RowExpression) InternalFunctionBundle(io.trino.metadata.InternalFunctionBundle) TEST_TABLE_HANDLE(io.trino.testing.TestingHandles.TEST_TABLE_HANDLE) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) Assert.assertTrue(org.testng.Assert.assertTrue) SECONDS(java.util.concurrent.TimeUnit.SECONDS) PageFunctionCompiler(io.trino.sql.gen.PageFunctionCompiler) CursorProcessor(io.trino.operator.project.CursorProcessor) ImmutableList(com.google.common.collect.ImmutableList) Page(io.trino.spi.Page) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) PageProcessor(io.trino.operator.project.PageProcessor) InternalFunctionBundle(io.trino.metadata.InternalFunctionBundle) RowExpression(io.trino.sql.relational.RowExpression) FixedPageSource(io.trino.spi.connector.FixedPageSource) SqlScalarFunction(io.trino.metadata.SqlScalarFunction) ExpressionCompiler(io.trino.sql.gen.ExpressionCompiler) CatalogName(io.trino.connector.CatalogName) Split(io.trino.metadata.Split) TestingSplit(io.trino.testing.TestingSplit) Test(org.testng.annotations.Test)

Example 22 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class DeltaLakePageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    DeltaLakeSplit split = (DeltaLakeSplit) connectorSplit;
    DeltaLakeTableHandle table = (DeltaLakeTableHandle) connectorTable;
    // We reach here when we could not prune the split using file level stats, table predicate
    // and the dynamic filter in the coordinator during split generation. The file level stats
    // in DeltaLakeSplit#filePredicate could help to prune this split when a more selective dynamic filter
    // is available now, without having to access parquet file footer for row-group stats.
    // We avoid sending DeltaLakeSplit#splitPredicate to workers by using table.getPredicate() here.
    TupleDomain<DeltaLakeColumnHandle> filteredSplitPredicate = TupleDomain.intersect(ImmutableList.of(table.getNonPartitionConstraint(), split.getStatisticsPredicate(), dynamicFilter.getCurrentPredicate().transformKeys(DeltaLakeColumnHandle.class::cast)));
    if (filteredSplitPredicate.isNone()) {
        return new EmptyPageSource();
    }
    List<DeltaLakeColumnHandle> deltaLakeColumns = columns.stream().map(DeltaLakeColumnHandle.class::cast).collect(toImmutableList());
    Map<String, Optional<String>> partitionKeys = split.getPartitionKeys();
    List<DeltaLakeColumnHandle> regularColumns = deltaLakeColumns.stream().filter(column -> column.getColumnType() == REGULAR).collect(toImmutableList());
    List<HiveColumnHandle> hiveColumnHandles = regularColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList());
    Path path = new Path(split.getPath());
    HdfsContext hdfsContext = new HdfsContext(session);
    TupleDomain<HiveColumnHandle> parquetPredicate = getParquetTupleDomain(filteredSplitPredicate.simplify(domainCompactionThreshold));
    if (table.getWriteType().isPresent()) {
        return new DeltaLakeUpdatablePageSource(table, deltaLakeColumns, partitionKeys, split.getPath(), split.getFileSize(), split.getFileModifiedTime(), session, executorService, hdfsEnvironment, hdfsContext, parquetDateTimeZone, parquetReaderOptions, parquetPredicate, typeManager, updateResultJsonCodec);
    }
    ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource(path, split.getStart(), split.getLength(), split.getFileSize(), hiveColumnHandles, parquetPredicate, true, hdfsEnvironment, hdfsEnvironment.getConfiguration(hdfsContext, path), session.getIdentity(), parquetDateTimeZone, fileFormatDataSourceStats, parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)).withUseColumnIndex(isParquetUseColumnIndex(session)));
    verify(pageSource.getReaderColumns().isEmpty(), "All columns expected to be base columns");
    return new DeltaLakePageSource(deltaLakeColumns, partitionKeys, pageSource.get(), split.getPath(), split.getFileSize(), split.getFileModifiedTime());
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) HiveSessionProperties.isParquetUseColumnIndex(io.trino.plugin.hive.HiveSessionProperties.isParquetUseColumnIndex) Inject(javax.inject.Inject) ParquetPageSourceFactory(io.trino.plugin.hive.parquet.ParquetPageSourceFactory) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) Path(org.apache.hadoop.fs.Path) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ExecutorService(java.util.concurrent.ExecutorService) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ImmutableMap(com.google.common.collect.ImmutableMap) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) StandardTypes(io.trino.spi.type.StandardTypes) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DeltaLakeSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetMaxReadBlockSize) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) TypeManager(io.trino.spi.type.TypeManager) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) JsonCodec(io.airlift.json.JsonCodec) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 23 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestLocalDynamicFiltersCollector method testDynamicFilterCancellation.

@Test
public void testDynamicFilterCancellation() {
    LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
    DynamicFilterId filterId = new DynamicFilterId("filter");
    collector.register(ImmutableSet.of(filterId));
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol symbol = symbolAllocator.newSymbol("symbol", BIGINT);
    ColumnHandle column = new TestingColumnHandle("column");
    DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId, symbol.toSymbolReference())), ImmutableMap.of(symbol, column), symbolAllocator.getTypes());
    // Filter is blocked and not completed.
    CompletableFuture<?> isBlocked = filter.isBlocked();
    assertFalse(filter.isComplete());
    assertFalse(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
    // DynamicFilter future cancellation should not affect LocalDynamicFiltersCollector
    assertFalse(isBlocked.cancel(false));
    assertFalse(isBlocked.isDone());
    assertFalse(filter.isComplete());
    Domain domain = Domain.singleValue(BIGINT, 7L);
    collector.collectDynamicFilterDomains(ImmutableMap.of(filterId, domain));
    // Unblocked and completed.
    assertTrue(filter.isComplete());
    assertTrue(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column, domain)));
}
Also used : TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) DynamicFilter(io.trino.spi.connector.DynamicFilter) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) Test(org.testng.annotations.Test)

Example 24 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestLocalDynamicFiltersCollector method testMultipleProbeColumns.

@Test
public void testMultipleProbeColumns() {
    LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
    DynamicFilterId filterId = new DynamicFilterId("filter");
    collector.register(ImmutableSet.of(filterId));
    // Same build-side column being matched to multiple probe-side columns.
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol symbol1 = symbolAllocator.newSymbol("symbol1", BIGINT);
    Symbol symbol2 = symbolAllocator.newSymbol("symbol2", BIGINT);
    ColumnHandle column1 = new TestingColumnHandle("column1");
    ColumnHandle column2 = new TestingColumnHandle("column2");
    DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId, symbol1.toSymbolReference()), new DynamicFilters.Descriptor(filterId, symbol2.toSymbolReference())), ImmutableMap.of(symbol1, column1, symbol2, column2), symbolAllocator.getTypes());
    assertEquals(filter.getColumnsCovered(), Set.of(column1, column2), "columns covered");
    // Filter is blocked and not completed.
    CompletableFuture<?> isBlocked = filter.isBlocked();
    assertFalse(filter.isComplete());
    assertTrue(filter.isAwaitable());
    assertFalse(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
    Domain domain = Domain.singleValue(BIGINT, 7L);
    collector.collectDynamicFilterDomains(ImmutableMap.of(filterId, domain));
    // Unblocked and completed.
    assertTrue(filter.isComplete());
    assertFalse(filter.isAwaitable());
    assertTrue(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column1, domain, column2, domain)));
}
Also used : TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) DynamicFilter(io.trino.spi.connector.DynamicFilter) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) Test(org.testng.annotations.Test)

Example 25 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestLocalDynamicFiltersCollector method testUnusedDynamicFilter.

@Test
public void testUnusedDynamicFilter() {
    LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
    DynamicFilterId unusedFilterId = new DynamicFilterId("unused");
    DynamicFilterId usedFilterId = new DynamicFilterId("used");
    collector.register(ImmutableSet.of(unusedFilterId));
    collector.register(ImmutableSet.of(usedFilterId));
    // One of the dynamic filters is not used for the the table scan.
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol usedSymbol = symbolAllocator.newSymbol("used", BIGINT);
    ColumnHandle usedColumn = new TestingColumnHandle("used");
    DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(usedFilterId, usedSymbol.toSymbolReference())), ImmutableMap.of(usedSymbol, usedColumn), symbolAllocator.getTypes());
    // Filter is blocking and not completed.
    CompletableFuture<?> isBlocked = filter.isBlocked();
    assertFalse(filter.isComplete());
    assertTrue(filter.isAwaitable());
    assertFalse(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
    collector.collectDynamicFilterDomains(ImmutableMap.of(unusedFilterId, Domain.singleValue(BIGINT, 1L)));
    // This dynamic filter is unused here - has no effect on blocking/completion of the above future.
    assertFalse(filter.isComplete());
    assertFalse(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
    collector.collectDynamicFilterDomains(ImmutableMap.of(usedFilterId, Domain.singleValue(BIGINT, 2L)));
    // Unblocked and completed.
    assertTrue(filter.isComplete());
    assertFalse(filter.isAwaitable());
    assertTrue(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(usedColumn, Domain.singleValue(BIGINT, 2L))));
}
Also used : TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) DynamicFilter(io.trino.spi.connector.DynamicFilter) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) Test(org.testng.annotations.Test)

Aggregations

DynamicFilter (io.trino.spi.connector.DynamicFilter)32 Test (org.testng.annotations.Test)23 DynamicFilterId (io.trino.sql.planner.plan.DynamicFilterId)20 TestingColumnHandle (io.trino.spi.connector.TestingColumnHandle)19 ColumnHandle (io.trino.spi.connector.ColumnHandle)18 TupleDomain (io.trino.spi.predicate.TupleDomain)13 Symbol (io.trino.sql.planner.Symbol)11 SymbolAllocator (io.trino.sql.planner.SymbolAllocator)11 QueryId (io.trino.spi.QueryId)10 List (java.util.List)10 ImmutableList (com.google.common.collect.ImmutableList)9 StageId (io.trino.execution.StageId)9 Domain (io.trino.spi.predicate.Domain)9 Objects.requireNonNull (java.util.Objects.requireNonNull)9 TaskId (io.trino.execution.TaskId)8 ConnectorSession (io.trino.spi.connector.ConnectorSession)8 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)8 ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)8 Optional (java.util.Optional)8 Inject (javax.inject.Inject)8