use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestScanFilterAndProjectOperator method testPageYield.
@Test
public void testPageYield() {
int totalRows = 1000;
Page input = SequencePageBuilder.createSequencePage(ImmutableList.of(BIGINT), totalRows, 1);
DriverContext driverContext = newDriverContext();
// 20 columns; each column is associated with a function that will force yield per projection
int totalColumns = 20;
ImmutableList.Builder<SqlScalarFunction> functions = ImmutableList.builder();
for (int i = 0; i < totalColumns; i++) {
functions.add(new GenericLongFunction("page_col" + i, value -> {
driverContext.getYieldSignal().forceYieldForTesting();
return value;
}));
}
functionAssertions.addFunctions(new InternalFunctionBundle(functions.build()));
// match each column with a projection
ExpressionCompiler expressionCompiler = new ExpressionCompiler(functionAssertions.getFunctionManager(), new PageFunctionCompiler(functionAssertions.getFunctionManager(), 0));
ImmutableList.Builder<RowExpression> projections = ImmutableList.builder();
for (int i = 0; i < totalColumns; i++) {
projections.add(call(functionAssertions.getMetadata().resolveFunction(session, QualifiedName.of("generic_long_page_col" + i), fromTypes(BIGINT)), field(0, BIGINT)));
}
Supplier<CursorProcessor> cursorProcessor = expressionCompiler.compileCursorProcessor(Optional.empty(), projections.build(), "key");
Supplier<PageProcessor> pageProcessor = expressionCompiler.compilePageProcessor(Optional.empty(), projections.build(), MAX_BATCH_SIZE);
ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory factory = new ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory(0, new PlanNodeId("test"), new PlanNodeId("0"), (session, split, table, columns, dynamicFilter) -> new FixedPageSource(ImmutableList.of(input)), cursorProcessor, pageProcessor, TEST_TABLE_HANDLE, ImmutableList.of(), DynamicFilter.EMPTY, ImmutableList.of(BIGINT), DataSize.ofBytes(0), 0);
SourceOperator operator = factory.createOperator(driverContext);
operator.addSplit(new Split(new CatalogName("test"), TestingSplit.createLocalSplit(), Lifespan.taskWide()));
operator.noMoreSplits();
// exactly 20 blocks (one for each column) and the PageProcessor will be able to create a Page out of it.
for (int i = 1; i <= totalRows * totalColumns; i++) {
driverContext.getYieldSignal().setWithDelay(SECONDS.toNanos(1000), driverContext.getYieldExecutor());
Page page = operator.getOutput();
if (i == totalColumns) {
assertNotNull(page);
assertEquals(page.getPositionCount(), totalRows);
assertEquals(page.getChannelCount(), totalColumns);
for (int j = 0; j < totalColumns; j++) {
assertEquals(toValues(BIGINT, page.getBlock(j)), toValues(BIGINT, input.getBlock(0)));
}
} else {
assertNull(page);
}
driverContext.getYieldSignal().reset();
}
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class DeltaLakePageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
DeltaLakeSplit split = (DeltaLakeSplit) connectorSplit;
DeltaLakeTableHandle table = (DeltaLakeTableHandle) connectorTable;
// We reach here when we could not prune the split using file level stats, table predicate
// and the dynamic filter in the coordinator during split generation. The file level stats
// in DeltaLakeSplit#filePredicate could help to prune this split when a more selective dynamic filter
// is available now, without having to access parquet file footer for row-group stats.
// We avoid sending DeltaLakeSplit#splitPredicate to workers by using table.getPredicate() here.
TupleDomain<DeltaLakeColumnHandle> filteredSplitPredicate = TupleDomain.intersect(ImmutableList.of(table.getNonPartitionConstraint(), split.getStatisticsPredicate(), dynamicFilter.getCurrentPredicate().transformKeys(DeltaLakeColumnHandle.class::cast)));
if (filteredSplitPredicate.isNone()) {
return new EmptyPageSource();
}
List<DeltaLakeColumnHandle> deltaLakeColumns = columns.stream().map(DeltaLakeColumnHandle.class::cast).collect(toImmutableList());
Map<String, Optional<String>> partitionKeys = split.getPartitionKeys();
List<DeltaLakeColumnHandle> regularColumns = deltaLakeColumns.stream().filter(column -> column.getColumnType() == REGULAR).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = regularColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList());
Path path = new Path(split.getPath());
HdfsContext hdfsContext = new HdfsContext(session);
TupleDomain<HiveColumnHandle> parquetPredicate = getParquetTupleDomain(filteredSplitPredicate.simplify(domainCompactionThreshold));
if (table.getWriteType().isPresent()) {
return new DeltaLakeUpdatablePageSource(table, deltaLakeColumns, partitionKeys, split.getPath(), split.getFileSize(), split.getFileModifiedTime(), session, executorService, hdfsEnvironment, hdfsContext, parquetDateTimeZone, parquetReaderOptions, parquetPredicate, typeManager, updateResultJsonCodec);
}
ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource(path, split.getStart(), split.getLength(), split.getFileSize(), hiveColumnHandles, parquetPredicate, true, hdfsEnvironment, hdfsEnvironment.getConfiguration(hdfsContext, path), session.getIdentity(), parquetDateTimeZone, fileFormatDataSourceStats, parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)).withUseColumnIndex(isParquetUseColumnIndex(session)));
verify(pageSource.getReaderColumns().isEmpty(), "All columns expected to be base columns");
return new DeltaLakePageSource(deltaLakeColumns, partitionKeys, pageSource.get(), split.getPath(), split.getFileSize(), split.getFileModifiedTime());
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestLocalDynamicFiltersCollector method testDynamicFilterCancellation.
@Test
public void testDynamicFilterCancellation() {
LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
DynamicFilterId filterId = new DynamicFilterId("filter");
collector.register(ImmutableSet.of(filterId));
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol symbol = symbolAllocator.newSymbol("symbol", BIGINT);
ColumnHandle column = new TestingColumnHandle("column");
DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId, symbol.toSymbolReference())), ImmutableMap.of(symbol, column), symbolAllocator.getTypes());
// Filter is blocked and not completed.
CompletableFuture<?> isBlocked = filter.isBlocked();
assertFalse(filter.isComplete());
assertFalse(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
// DynamicFilter future cancellation should not affect LocalDynamicFiltersCollector
assertFalse(isBlocked.cancel(false));
assertFalse(isBlocked.isDone());
assertFalse(filter.isComplete());
Domain domain = Domain.singleValue(BIGINT, 7L);
collector.collectDynamicFilterDomains(ImmutableMap.of(filterId, domain));
// Unblocked and completed.
assertTrue(filter.isComplete());
assertTrue(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column, domain)));
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestLocalDynamicFiltersCollector method testMultipleProbeColumns.
@Test
public void testMultipleProbeColumns() {
LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
DynamicFilterId filterId = new DynamicFilterId("filter");
collector.register(ImmutableSet.of(filterId));
// Same build-side column being matched to multiple probe-side columns.
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol symbol1 = symbolAllocator.newSymbol("symbol1", BIGINT);
Symbol symbol2 = symbolAllocator.newSymbol("symbol2", BIGINT);
ColumnHandle column1 = new TestingColumnHandle("column1");
ColumnHandle column2 = new TestingColumnHandle("column2");
DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId, symbol1.toSymbolReference()), new DynamicFilters.Descriptor(filterId, symbol2.toSymbolReference())), ImmutableMap.of(symbol1, column1, symbol2, column2), symbolAllocator.getTypes());
assertEquals(filter.getColumnsCovered(), Set.of(column1, column2), "columns covered");
// Filter is blocked and not completed.
CompletableFuture<?> isBlocked = filter.isBlocked();
assertFalse(filter.isComplete());
assertTrue(filter.isAwaitable());
assertFalse(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
Domain domain = Domain.singleValue(BIGINT, 7L);
collector.collectDynamicFilterDomains(ImmutableMap.of(filterId, domain));
// Unblocked and completed.
assertTrue(filter.isComplete());
assertFalse(filter.isAwaitable());
assertTrue(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column1, domain, column2, domain)));
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestLocalDynamicFiltersCollector method testUnusedDynamicFilter.
@Test
public void testUnusedDynamicFilter() {
LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
DynamicFilterId unusedFilterId = new DynamicFilterId("unused");
DynamicFilterId usedFilterId = new DynamicFilterId("used");
collector.register(ImmutableSet.of(unusedFilterId));
collector.register(ImmutableSet.of(usedFilterId));
// One of the dynamic filters is not used for the the table scan.
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol usedSymbol = symbolAllocator.newSymbol("used", BIGINT);
ColumnHandle usedColumn = new TestingColumnHandle("used");
DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(usedFilterId, usedSymbol.toSymbolReference())), ImmutableMap.of(usedSymbol, usedColumn), symbolAllocator.getTypes());
// Filter is blocking and not completed.
CompletableFuture<?> isBlocked = filter.isBlocked();
assertFalse(filter.isComplete());
assertTrue(filter.isAwaitable());
assertFalse(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
collector.collectDynamicFilterDomains(ImmutableMap.of(unusedFilterId, Domain.singleValue(BIGINT, 1L)));
// This dynamic filter is unused here - has no effect on blocking/completion of the above future.
assertFalse(filter.isComplete());
assertFalse(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
collector.collectDynamicFilterDomains(ImmutableMap.of(usedFilterId, Domain.singleValue(BIGINT, 2L)));
// Unblocked and completed.
assertTrue(filter.isComplete());
assertFalse(filter.isAwaitable());
assertTrue(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(usedColumn, Domain.singleValue(BIGINT, 2L))));
}
Aggregations