use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestLocalDynamicFiltersCollector method testIsNotDistinctFrom.
@Test
public void testIsNotDistinctFrom() {
LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
DynamicFilterId filterId1 = new DynamicFilterId("filter1");
DynamicFilterId filterId2 = new DynamicFilterId("filter2");
collector.register(ImmutableSet.of(filterId1, filterId2));
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol symbol1 = symbolAllocator.newSymbol("symbol1", BIGINT);
Symbol symbol2 = symbolAllocator.newSymbol("symbol2", BIGINT);
ColumnHandle column1 = new TestingColumnHandle("column1");
ColumnHandle column2 = new TestingColumnHandle("column2");
DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId1, symbol1.toSymbolReference(), EQUAL, true), new DynamicFilters.Descriptor(filterId2, symbol2.toSymbolReference(), EQUAL, true)), ImmutableMap.of(symbol1, column1, symbol2, column2), symbolAllocator.getTypes());
assertEquals(filter.getColumnsCovered(), Set.of(column1, column2), "columns covered");
// Filter is blocked and not completed.
CompletableFuture<?> isBlocked = filter.isBlocked();
assertFalse(filter.isComplete());
assertFalse(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
collector.collectDynamicFilterDomains(ImmutableMap.of(filterId1, Domain.multipleValues(BIGINT, ImmutableList.of(4L, 5L, 6L)), filterId2, Domain.none(BIGINT)));
// Unblocked and completed.
assertTrue(filter.isComplete());
assertTrue(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column1, Domain.create(ValueSet.of(BIGINT, 4L, 5L, 6L), true), column2, Domain.onlyNull(BIGINT))));
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestLocalDynamicFiltersCollector method testUnregisteredDynamicFilter.
@Test
public void testUnregisteredDynamicFilter() {
// One dynamic filter is not collected locally (e.g. due to a distributed join)
LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
DynamicFilterId registeredFilterId = new DynamicFilterId("registered");
DynamicFilterId unregisteredFilterId = new DynamicFilterId("unregistered");
collector.register(ImmutableSet.of(registeredFilterId));
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol registeredSymbol = symbolAllocator.newSymbol("registered", BIGINT);
Symbol unregisteredSymbol = symbolAllocator.newSymbol("unregistered", BIGINT);
ColumnHandle registeredColumn = new TestingColumnHandle("registered");
ColumnHandle unregisteredColumn = new TestingColumnHandle("unregistered");
DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(registeredFilterId, registeredSymbol.toSymbolReference()), new DynamicFilters.Descriptor(unregisteredFilterId, unregisteredSymbol.toSymbolReference())), ImmutableMap.of(registeredSymbol, registeredColumn, unregisteredSymbol, unregisteredColumn), symbolAllocator.getTypes());
// Filter is blocked and not completed.
CompletableFuture<?> isBlocked = filter.isBlocked();
assertFalse(filter.isComplete());
assertTrue(filter.isAwaitable());
assertFalse(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
collector.collectDynamicFilterDomains(ImmutableMap.of(registeredFilterId, Domain.singleValue(BIGINT, 2L)));
// Unblocked and completed (don't wait for filter2)
assertTrue(filter.isComplete());
assertFalse(filter.isAwaitable());
assertTrue(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(registeredColumn, Domain.singleValue(BIGINT, 2L))));
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestLocalDynamicFiltersCollector method testSingleEquality.
@Test
public void testSingleEquality() {
LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
DynamicFilterId filterId = new DynamicFilterId("filter");
collector.register(ImmutableSet.of(filterId));
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol symbol = symbolAllocator.newSymbol("symbol", BIGINT);
ColumnHandle column = new TestingColumnHandle("column");
DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId, symbol.toSymbolReference())), ImmutableMap.of(symbol, column), symbolAllocator.getTypes());
assertEquals(filter.getColumnsCovered(), Set.of(column), "columns covered");
// Filter is blocked and not completed.
CompletableFuture<?> isBlocked = filter.isBlocked();
assertFalse(filter.isComplete());
assertTrue(filter.isAwaitable());
assertFalse(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
Domain domain = Domain.singleValue(BIGINT, 7L);
collector.collectDynamicFilterDomains(ImmutableMap.of(filterId, domain));
// Unblocked and completed.
assertTrue(filter.isComplete());
assertFalse(filter.isAwaitable());
assertTrue(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column, domain)));
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestLocalDynamicFiltersCollector method testComparison.
@Test
public void testComparison() {
LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
DynamicFilterId filterId1 = new DynamicFilterId("filter1");
DynamicFilterId filterId2 = new DynamicFilterId("filter2");
collector.register(ImmutableSet.of(filterId1, filterId2));
SymbolAllocator symbolAllocator = new SymbolAllocator();
Symbol symbol = symbolAllocator.newSymbol("symbol", BIGINT);
ColumnHandle column = new TestingColumnHandle("column");
DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId1, symbol.toSymbolReference(), GREATER_THAN), new DynamicFilters.Descriptor(filterId2, symbol.toSymbolReference(), LESS_THAN)), ImmutableMap.of(symbol, column), symbolAllocator.getTypes());
assertEquals(filter.getColumnsCovered(), Set.of(column), "columns covered");
// Filter is blocked and not completed.
CompletableFuture<?> isBlocked = filter.isBlocked();
assertFalse(filter.isComplete());
assertFalse(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
collector.collectDynamicFilterDomains(ImmutableMap.of(filterId1, Domain.multipleValues(BIGINT, ImmutableList.of(1L, 2L, 3L)), filterId2, Domain.multipleValues(BIGINT, ImmutableList.of(4L, 5L, 6L))));
// Unblocked and completed.
assertTrue(filter.isComplete());
assertTrue(isBlocked.isDone());
assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column, Domain.create(ValueSet.ofRanges(Range.range(BIGINT, 1L, false, 6L, false)), false))));
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTable.getSchemaTableName();
// get table metadata
SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
// get partitions
List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
// short circuit if we don't have any partitions
if (partitions.isEmpty()) {
if (hiveTable.isRecordScannedFiles()) {
return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
}
return new FixedSplitSource(ImmutableList.of());
}
// get buckets from first partition (arbitrary)
Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
// Only one thread per partition is usable when a table is not transactional
int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
HiveSplitSource splitSource;
switch(splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);
return splitSource;
}
Aggregations