Search in sources :

Example 26 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestLocalDynamicFiltersCollector method testIsNotDistinctFrom.

@Test
public void testIsNotDistinctFrom() {
    LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
    DynamicFilterId filterId1 = new DynamicFilterId("filter1");
    DynamicFilterId filterId2 = new DynamicFilterId("filter2");
    collector.register(ImmutableSet.of(filterId1, filterId2));
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol symbol1 = symbolAllocator.newSymbol("symbol1", BIGINT);
    Symbol symbol2 = symbolAllocator.newSymbol("symbol2", BIGINT);
    ColumnHandle column1 = new TestingColumnHandle("column1");
    ColumnHandle column2 = new TestingColumnHandle("column2");
    DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId1, symbol1.toSymbolReference(), EQUAL, true), new DynamicFilters.Descriptor(filterId2, symbol2.toSymbolReference(), EQUAL, true)), ImmutableMap.of(symbol1, column1, symbol2, column2), symbolAllocator.getTypes());
    assertEquals(filter.getColumnsCovered(), Set.of(column1, column2), "columns covered");
    // Filter is blocked and not completed.
    CompletableFuture<?> isBlocked = filter.isBlocked();
    assertFalse(filter.isComplete());
    assertFalse(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
    collector.collectDynamicFilterDomains(ImmutableMap.of(filterId1, Domain.multipleValues(BIGINT, ImmutableList.of(4L, 5L, 6L)), filterId2, Domain.none(BIGINT)));
    // Unblocked and completed.
    assertTrue(filter.isComplete());
    assertTrue(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column1, Domain.create(ValueSet.of(BIGINT, 4L, 5L, 6L), true), column2, Domain.onlyNull(BIGINT))));
}
Also used : TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) DynamicFilter(io.trino.spi.connector.DynamicFilter) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) Test(org.testng.annotations.Test)

Example 27 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestLocalDynamicFiltersCollector method testUnregisteredDynamicFilter.

@Test
public void testUnregisteredDynamicFilter() {
    // One dynamic filter is not collected locally (e.g. due to a distributed join)
    LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
    DynamicFilterId registeredFilterId = new DynamicFilterId("registered");
    DynamicFilterId unregisteredFilterId = new DynamicFilterId("unregistered");
    collector.register(ImmutableSet.of(registeredFilterId));
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol registeredSymbol = symbolAllocator.newSymbol("registered", BIGINT);
    Symbol unregisteredSymbol = symbolAllocator.newSymbol("unregistered", BIGINT);
    ColumnHandle registeredColumn = new TestingColumnHandle("registered");
    ColumnHandle unregisteredColumn = new TestingColumnHandle("unregistered");
    DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(registeredFilterId, registeredSymbol.toSymbolReference()), new DynamicFilters.Descriptor(unregisteredFilterId, unregisteredSymbol.toSymbolReference())), ImmutableMap.of(registeredSymbol, registeredColumn, unregisteredSymbol, unregisteredColumn), symbolAllocator.getTypes());
    // Filter is blocked and not completed.
    CompletableFuture<?> isBlocked = filter.isBlocked();
    assertFalse(filter.isComplete());
    assertTrue(filter.isAwaitable());
    assertFalse(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
    collector.collectDynamicFilterDomains(ImmutableMap.of(registeredFilterId, Domain.singleValue(BIGINT, 2L)));
    // Unblocked and completed (don't wait for filter2)
    assertTrue(filter.isComplete());
    assertFalse(filter.isAwaitable());
    assertTrue(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(registeredColumn, Domain.singleValue(BIGINT, 2L))));
}
Also used : TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) DynamicFilter(io.trino.spi.connector.DynamicFilter) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) Test(org.testng.annotations.Test)

Example 28 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestLocalDynamicFiltersCollector method testSingleEquality.

@Test
public void testSingleEquality() {
    LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
    DynamicFilterId filterId = new DynamicFilterId("filter");
    collector.register(ImmutableSet.of(filterId));
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol symbol = symbolAllocator.newSymbol("symbol", BIGINT);
    ColumnHandle column = new TestingColumnHandle("column");
    DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId, symbol.toSymbolReference())), ImmutableMap.of(symbol, column), symbolAllocator.getTypes());
    assertEquals(filter.getColumnsCovered(), Set.of(column), "columns covered");
    // Filter is blocked and not completed.
    CompletableFuture<?> isBlocked = filter.isBlocked();
    assertFalse(filter.isComplete());
    assertTrue(filter.isAwaitable());
    assertFalse(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
    Domain domain = Domain.singleValue(BIGINT, 7L);
    collector.collectDynamicFilterDomains(ImmutableMap.of(filterId, domain));
    // Unblocked and completed.
    assertTrue(filter.isComplete());
    assertFalse(filter.isAwaitable());
    assertTrue(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column, domain)));
}
Also used : TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) DynamicFilter(io.trino.spi.connector.DynamicFilter) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) Test(org.testng.annotations.Test)

Example 29 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestLocalDynamicFiltersCollector method testComparison.

@Test
public void testComparison() {
    LocalDynamicFiltersCollector collector = new LocalDynamicFiltersCollector(TEST_SESSION);
    DynamicFilterId filterId1 = new DynamicFilterId("filter1");
    DynamicFilterId filterId2 = new DynamicFilterId("filter2");
    collector.register(ImmutableSet.of(filterId1, filterId2));
    SymbolAllocator symbolAllocator = new SymbolAllocator();
    Symbol symbol = symbolAllocator.newSymbol("symbol", BIGINT);
    ColumnHandle column = new TestingColumnHandle("column");
    DynamicFilter filter = createDynamicFilter(collector, ImmutableList.of(new DynamicFilters.Descriptor(filterId1, symbol.toSymbolReference(), GREATER_THAN), new DynamicFilters.Descriptor(filterId2, symbol.toSymbolReference(), LESS_THAN)), ImmutableMap.of(symbol, column), symbolAllocator.getTypes());
    assertEquals(filter.getColumnsCovered(), Set.of(column), "columns covered");
    // Filter is blocked and not completed.
    CompletableFuture<?> isBlocked = filter.isBlocked();
    assertFalse(filter.isComplete());
    assertFalse(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.all());
    collector.collectDynamicFilterDomains(ImmutableMap.of(filterId1, Domain.multipleValues(BIGINT, ImmutableList.of(1L, 2L, 3L)), filterId2, Domain.multipleValues(BIGINT, ImmutableList.of(4L, 5L, 6L))));
    // Unblocked and completed.
    assertTrue(filter.isComplete());
    assertTrue(isBlocked.isDone());
    assertEquals(filter.getCurrentPredicate(), TupleDomain.withColumnDomains(ImmutableMap.of(column, Domain.create(ValueSet.ofRanges(Range.range(BIGINT, 1L, false, 6L, false)), false))));
}
Also used : TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TestingColumnHandle(io.trino.spi.connector.TestingColumnHandle) DynamicFilter(io.trino.spi.connector.DynamicFilter) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) Test(org.testng.annotations.Test)

Example 30 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class HiveSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = hiveTable.getSchemaTableName();
    // get table metadata
    SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
    Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    // verify table is not marked as non-readable
    String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
    if (!isNullOrEmpty(tableNotReadable)) {
        throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
    }
    // get partitions
    List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
    // short circuit if we don't have any partitions
    if (partitions.isEmpty()) {
        if (hiveTable.isRecordScannedFiles()) {
            return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
        }
        return new FixedSplitSource(ImmutableList.of());
    }
    // get buckets from first partition (arbitrary)
    Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
    // validate bucket bucketed execution
    Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
    if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
        throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
    }
    // sort partitions
    partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
    Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
    // Only one thread per partition is usable when a table is not transactional
    int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
    HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
    HiveSplitSource splitSource;
    switch(splitSchedulingStrategy) {
        case UNGROUPED_SCHEDULING:
            splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
            break;
        case GROUPED_SCHEDULING:
            splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
            break;
        default:
            throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
    }
    hiveSplitLoader.start(splitSource);
    return splitSource;
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Iterables.transform(com.google.common.collect.Iterables.transform) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) HiveSessionProperties.isOptimizeSymlinkListing(io.trino.plugin.hive.HiveSessionProperties.isOptimizeSymlinkListing) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) HiveSessionProperties.isIgnoreAbsentPartitions(io.trino.plugin.hive.HiveSessionProperties.isIgnoreAbsentPartitions) HiveSessionProperties.isUseParquetColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseParquetColumnNames) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) HIVE_PARTITION_SCHEMA_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) Iterables.concat(com.google.common.collect.Iterables.concat) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) ENGLISH(java.util.Locale.ENGLISH) Table(io.trino.plugin.hive.metastore.Table) ImmutableMap(com.google.common.collect.ImmutableMap) TableToPartitionMapping.mapColumnsByIndex(io.trino.plugin.hive.TableToPartitionMapping.mapColumnsByIndex) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) Math.min(java.lang.Math.min) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) MetastoreUtil.verifyOnline(io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline) VersionEmbedder(io.trino.spi.VersionEmbedder) DynamicFilter(io.trino.spi.connector.DynamicFilter) MetastoreUtil.getProtectMode(io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode) HiveStorageFormat.getHiveStorageFormat(io.trino.plugin.hive.HiveStorageFormat.getHiveStorageFormat) SERVER_SHUTTING_DOWN(io.trino.spi.StandardErrorCode.SERVER_SHUTTING_DOWN) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) HiveCoercionPolicy.canCoerce(io.trino.plugin.hive.util.HiveCoercionPolicy.canCoerce) Partition(io.trino.plugin.hive.metastore.Partition) HiveUtil(io.trino.plugin.hive.util.HiveUtil) Nested(org.weakref.jmx.Nested) BucketSplitInfo.createBucketSplitInfo(io.trino.plugin.hive.BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) CounterStat(io.airlift.stats.CounterStat) UNPARTITIONED_ID(io.trino.plugin.hive.HivePartition.UNPARTITIONED_ID) HiveSessionProperties.isUseOrcColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames) HIVE_PARTITION_DROPPED_DURING_QUERY(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY) Inject(javax.inject.Inject) GROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Managed(org.weakref.jmx.Managed) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) HiveSessionProperties.isPropagateTableScanSortingProperties(io.trino.plugin.hive.HiveSessionProperties.isPropagateTableScanSortingProperties) AbstractIterator(com.google.common.collect.AbstractIterator) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Ordering(com.google.common.collect.Ordering) HiveSessionProperties.getDynamicFilteringWaitTimeout(io.trino.plugin.hive.HiveSessionProperties.getDynamicFilteringWaitTimeout) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Table(io.trino.plugin.hive.metastore.Table) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) SchemaTableName(io.trino.spi.connector.SchemaTableName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException)

Aggregations

DynamicFilter (io.trino.spi.connector.DynamicFilter)32 Test (org.testng.annotations.Test)23 DynamicFilterId (io.trino.sql.planner.plan.DynamicFilterId)20 TestingColumnHandle (io.trino.spi.connector.TestingColumnHandle)19 ColumnHandle (io.trino.spi.connector.ColumnHandle)18 TupleDomain (io.trino.spi.predicate.TupleDomain)13 Symbol (io.trino.sql.planner.Symbol)11 SymbolAllocator (io.trino.sql.planner.SymbolAllocator)11 QueryId (io.trino.spi.QueryId)10 List (java.util.List)10 ImmutableList (com.google.common.collect.ImmutableList)9 StageId (io.trino.execution.StageId)9 Domain (io.trino.spi.predicate.Domain)9 Objects.requireNonNull (java.util.Objects.requireNonNull)9 TaskId (io.trino.execution.TaskId)8 ConnectorSession (io.trino.spi.connector.ConnectorSession)8 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)8 ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)8 Optional (java.util.Optional)8 Inject (javax.inject.Inject)8