Search in sources :

Example 41 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class TestDefaultJdbcMetadata method tesMultiGroupKeyPredicatePushdown.

@Test
public void tesMultiGroupKeyPredicatePushdown() {
    ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(new JdbcMetadataSessionProperties(new JdbcMetadataConfig().setAggregationPushdownEnabled(true), Optional.empty()).getSessionProperties()).build();
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    ColumnHandle textColumn = columnHandles.get("text");
    ColumnHandle valueColumn = columnHandles.get("value");
    ConnectorTableHandle baseTableHandle = metadata.getTableHandle(session, new SchemaTableName("example", "numbers"));
    ConnectorTableHandle aggregatedTable = applyCountAggregation(session, baseTableHandle, ImmutableList.of(ImmutableList.of(textColumn, valueColumn), ImmutableList.of(textColumn)));
    Domain domain = Domain.singleValue(BIGINT, 123L);
    JdbcTableHandle tableHandleWithFilter = applyFilter(session, aggregatedTable, new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(valueColumn, domain))));
    assertEquals(tableHandleWithFilter.getConstraint().getDomains(), Optional.of(ImmutableMap.of(valueColumn, domain)));
    assertEquals(((JdbcQueryRelationHandle) tableHandleWithFilter.getRelationHandle()).getPreparedQuery().getQuery(), "SELECT \"TEXT\", \"VALUE\", count(*) AS \"_pfgnrtd_0\" " + "FROM \"" + database.getDatabaseName() + "\".\"EXAMPLE\".\"NUMBERS\" " + "GROUP BY GROUPING SETS ((\"TEXT\", \"VALUE\"), (\"TEXT\"))");
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Constraint(io.trino.spi.connector.Constraint) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 42 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class TestPruneTableScanColumns method mockApplyProjection.

private Optional<ProjectionApplicationResult<ConnectorTableHandle>> mockApplyProjection(ConnectorSession session, ConnectorTableHandle tableHandle, List<ConnectorExpression> projections, Map<String, ColumnHandle> assignments) {
    MockConnectorTableHandle handle = (MockConnectorTableHandle) tableHandle;
    List<Variable> variables = projections.stream().map(Variable.class::cast).collect(toImmutableList());
    List<ColumnHandle> newColumns = variables.stream().map(variable -> assignments.get(variable.getName())).collect(toImmutableList());
    if (handle.getColumns().isPresent() && newColumns.equals(handle.getColumns().get())) {
        return Optional.empty();
    }
    return Optional.of(new ProjectionApplicationResult<>(new MockConnectorTableHandle(handle.getTableName(), handle.getConstraint(), Optional.of(newColumns)), projections, variables.stream().map(variable -> new Assignment(variable.getName(), assignments.get(variable.getName()), ((MockConnectorColumnHandle) assignments.get(variable.getName())).getType())).collect(toImmutableList()), false));
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) PlanMatchPattern(io.trino.sql.planner.assertions.PlanMatchPattern) Test(org.testng.annotations.Test) Variable(io.trino.spi.expression.Variable) CatalogName(io.trino.connector.CatalogName) ImmutableList(com.google.common.collect.ImmutableList) MockConnectorFactory(io.trino.connector.MockConnectorFactory) TpchTableHandle(io.trino.plugin.tpch.TpchTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) ColumnHandle(io.trino.spi.connector.ColumnHandle) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) Symbol(io.trino.sql.planner.Symbol) TINY_SCHEMA_NAME(io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME) TpchColumnHandle(io.trino.plugin.tpch.TpchColumnHandle) RuleTester.defaultRuleTester(io.trino.sql.planner.iterative.rule.test.RuleTester.defaultRuleTester) ImmutableMap(com.google.common.collect.ImmutableMap) BaseRuleTest(io.trino.sql.planner.iterative.rule.test.BaseRuleTest) TINY_SCALE_FACTOR(io.trino.plugin.tpch.TpchMetadata.TINY_SCALE_FACTOR) MockConnectorColumnHandle(io.trino.connector.MockConnectorColumnHandle) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Assignments(io.trino.sql.planner.plan.Assignments) ConnectorSession(io.trino.spi.connector.ConnectorSession) RuleTester(io.trino.sql.planner.iterative.rule.test.RuleTester) TupleDomain(io.trino.spi.predicate.TupleDomain) SchemaTableName(io.trino.spi.connector.SchemaTableName) TpchTransactionHandle(io.trino.plugin.tpch.TpchTransactionHandle) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) MockConnectorTableHandle(io.trino.connector.MockConnectorTableHandle) MockConnectorTransactionHandle(io.trino.connector.MockConnectorTransactionHandle) List(java.util.List) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) TableHandle(io.trino.metadata.TableHandle) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) PlanMatchPattern.strictProject(io.trino.sql.planner.assertions.PlanMatchPattern.strictProject) Assignment(io.trino.spi.connector.Assignment) PlanMatchPattern.strictConstrainedTableScan(io.trino.sql.planner.assertions.PlanMatchPattern.strictConstrainedTableScan) Optional(java.util.Optional) TestingColumnHandle(io.trino.testing.TestingMetadata.TestingColumnHandle) PlanBuilder.expression(io.trino.sql.planner.iterative.rule.test.PlanBuilder.expression) DATE(io.trino.spi.type.DateType.DATE) PlanMatchPattern.tableScan(io.trino.sql.planner.assertions.PlanMatchPattern.tableScan) PlanMatchPattern.strictTableScan(io.trino.sql.planner.assertions.PlanMatchPattern.strictTableScan) Assignment(io.trino.spi.connector.Assignment) ColumnHandle(io.trino.spi.connector.ColumnHandle) TpchColumnHandle(io.trino.plugin.tpch.TpchColumnHandle) MockConnectorColumnHandle(io.trino.connector.MockConnectorColumnHandle) TestingColumnHandle(io.trino.testing.TestingMetadata.TestingColumnHandle) Variable(io.trino.spi.expression.Variable) MockConnectorTableHandle(io.trino.connector.MockConnectorTableHandle)

Example 43 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class DeltaLakeMetadata method finishStatisticsCollection.

@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle table, Collection<ComputedStatistics> computedStatistics) {
    DeltaLakeTableHandle tableHandle = (DeltaLakeTableHandle) table;
    AnalyzeHandle analyzeHandle = tableHandle.getAnalyzeHandle().orElseThrow(() -> new IllegalArgumentException("analyzeHandle not set"));
    String location = metastore.getTableLocation(tableHandle.getSchemaTableName(), session);
    Optional<DeltaLakeStatistics> oldStatistics = statisticsAccess.readDeltaLakeStatistics(session, location);
    // more elaborate logic for handling statistics model evaluation may need to be introduced in the future
    // for now let's have a simple check rejecting update
    oldStatistics.ifPresent(statistics -> checkArgument(statistics.getModelVersion() == DeltaLakeStatistics.CURRENT_MODEL_VERSION, "Existing table statistics are incompatible, run the drop statistics procedure on this table before re-analyzing"));
    Map<String, DeltaLakeColumnStatistics> oldColumnStatistics = oldStatistics.map(DeltaLakeStatistics::getColumnStatistics).orElseGet(ImmutableMap::of);
    Map<String, DeltaLakeColumnStatistics> newColumnStatistics = toDeltaLakeColumnStatistics(computedStatistics);
    Map<String, DeltaLakeColumnStatistics> mergedColumnStatistics = new HashMap<>();
    // only keep stats for existing columns
    Set<String> newColumns = newColumnStatistics.keySet();
    oldColumnStatistics.entrySet().stream().filter(entry -> newColumns.contains(entry.getKey())).forEach(entry -> mergedColumnStatistics.put(entry.getKey(), entry.getValue()));
    newColumnStatistics.forEach((columnName, columnStatistics) -> {
        mergedColumnStatistics.merge(columnName, columnStatistics, DeltaLakeColumnStatistics::update);
    });
    Optional<Instant> maxFileModificationTime = getMaxFileModificationTime(computedStatistics);
    // We do not want to hinder our future calls to ANALYZE if one of the files we analyzed have modification time far in the future.
    // Therefore we cap the value we store in extended_stats.json to current_time as observed on Trino coordinator.
    Instant finalAlreadyAnalyzedModifiedTimeMax = Instant.now();
    if (maxFileModificationTime.isPresent()) {
        finalAlreadyAnalyzedModifiedTimeMax = Comparators.min(maxFileModificationTime.get(), finalAlreadyAnalyzedModifiedTimeMax);
    }
    // also ensure that we are not traveling back in time
    if (oldStatistics.isPresent()) {
        finalAlreadyAnalyzedModifiedTimeMax = Comparators.max(oldStatistics.get().getAlreadyAnalyzedModifiedTimeMax(), finalAlreadyAnalyzedModifiedTimeMax);
    }
    if (analyzeHandle.getColumns().isPresent() && !mergedColumnStatistics.keySet().equals(analyzeHandle.getColumns().get())) {
        // sanity validation
        throw new IllegalStateException(format("Unexpected columns in in mergedColumnStatistics %s; expected %s", mergedColumnStatistics.keySet(), analyzeHandle.getColumns().get()));
    }
    DeltaLakeStatistics mergedDeltaLakeStatistics = new DeltaLakeStatistics(finalAlreadyAnalyzedModifiedTimeMax, mergedColumnStatistics, analyzeHandle.getColumns());
    statisticsAccess.updateDeltaLakeStatistics(session, location, mergedDeltaLakeStatistics);
}
Also used : TransactionLogUtil.getTransactionLogDir(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir) FileSystem(org.apache.hadoop.fs.FileSystem) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) DeltaLakeSchemaSupport.validateType(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TransactionLogWriterFactory(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ValueSet.ofRanges(io.trino.spi.predicate.ValueSet.ofRanges) Column(io.trino.plugin.hive.metastore.Column) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) PARTITIONED_BY_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.PARTITIONED_BY_PROPERTY) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TABLE_PROVIDER_PROPERTY(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY) HiveWriteUtils.pathExists(io.trino.plugin.hive.util.HiveWriteUtils.pathExists) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) HyperLogLog(io.airlift.stats.cardinality.HyperLogLog) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) Predicate.not(java.util.function.Predicate.not) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.ANALYZE_COLUMNS_PROPERTY) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) TransactionLogParser.getMandatoryCurrentVersion(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.getMandatoryCurrentVersion) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeTableProcedureId(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) DeltaLakeTableProperties.getLocation(io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation) Range.greaterThanOrEqual(io.trino.spi.predicate.Range.greaterThanOrEqual) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) HiveType(io.trino.plugin.hive.HiveType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) DeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.DeltaLakeStatisticsAccess) DeltaLakeSchemaSupport.extractPartitionColumns(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) DeltaLakeSchemaSupport.serializeStatsAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson) Nullable(javax.annotation.Nullable) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) MapType(io.trino.spi.type.MapType) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) CheckpointWriterManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriterManager) ROW_ID_COLUMN_TYPE(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_TYPE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) DeltaLakeSessionProperties.isTableStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled) LOCATION_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.LOCATION_PROPERTY) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) TINYINT(io.trino.spi.type.TinyintType.TINYINT) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) NodeManager(io.trino.spi.NodeManager) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) SYNTHESIZED(io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED) TABLE_PROVIDER_VALUE(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) HyperLogLogType(io.trino.spi.type.HyperLogLogType) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) Range.range(io.trino.spi.predicate.Range.range) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) Collection(java.util.Collection) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Instant(java.time.Instant) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ROW_ID_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) Assignment(io.trino.spi.connector.Assignment) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) DecimalType(io.trino.spi.type.DecimalType) OPTIMIZE(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE) JsonCodec(io.airlift.json.JsonCodec) Comparators(com.google.common.collect.Comparators) Constraint(io.trino.spi.connector.Constraint) Range.lessThanOrEqual(io.trino.spi.predicate.Range.lessThanOrEqual) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Logger(io.airlift.log.Logger) DeltaLakeSchemaSupport.serializeSchemaAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeSchemaAsJson) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) CHECKPOINT_INTERVAL_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY) StorageFormat.create(io.trino.plugin.hive.metastore.StorageFormat.create) MetadataEntry.buildDeltaMetadataConfiguration(io.trino.plugin.deltalake.transactionlog.MetadataEntry.buildDeltaMetadataConfiguration) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TupleDomain(io.trino.spi.predicate.TupleDomain) DeltaLakeTableProperties.getPartitionedBy(io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SchemaTableName.schemaTableName(io.trino.spi.connector.SchemaTableName.schemaTableName) UUID.randomUUID(java.util.UUID.randomUUID) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) NUMBER_OF_DISTINCT_VALUES_SUMMARY(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES_SUMMARY) HashMap(java.util.HashMap) Instant(java.time.Instant) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics)

Example 44 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class KafkaSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
    try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
        List<PartitionInfo> partitionInfos = kafkaConsumer.partitionsFor(kafkaTableHandle.getTopicName());
        List<TopicPartition> topicPartitions = partitionInfos.stream().map(KafkaSplitManager::toTopicPartition).collect(toImmutableList());
        Map<TopicPartition, Long> partitionBeginOffsets = kafkaConsumer.beginningOffsets(topicPartitions);
        Map<TopicPartition, Long> partitionEndOffsets = kafkaConsumer.endOffsets(topicPartitions);
        KafkaFilteringResult kafkaFilteringResult = kafkaFilterManager.getKafkaFilterResult(session, kafkaTableHandle, partitionInfos, partitionBeginOffsets, partitionEndOffsets);
        partitionInfos = kafkaFilteringResult.getPartitionInfos();
        partitionBeginOffsets = kafkaFilteringResult.getPartitionBeginOffsets();
        partitionEndOffsets = kafkaFilteringResult.getPartitionEndOffsets();
        ImmutableList.Builder<KafkaSplit> splits = ImmutableList.builder();
        Optional<String> keyDataSchemaContents = contentSchemaReader.readKeyContentSchema(kafkaTableHandle);
        Optional<String> messageDataSchemaContents = contentSchemaReader.readValueContentSchema(kafkaTableHandle);
        for (PartitionInfo partitionInfo : partitionInfos) {
            TopicPartition topicPartition = toTopicPartition(partitionInfo);
            HostAddress leader = HostAddress.fromParts(partitionInfo.leader().host(), partitionInfo.leader().port());
            new Range(partitionBeginOffsets.get(topicPartition), partitionEndOffsets.get(topicPartition)).partition(messagesPerSplit).stream().map(range -> new KafkaSplit(kafkaTableHandle.getTopicName(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), keyDataSchemaContents, messageDataSchemaContents, partitionInfo.partition(), range, leader)).forEach(splits::add);
        }
        return new FixedSplitSource(splits.build());
    } catch (Exception e) {
        // Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
        if (e instanceof TrinoException) {
            throw e;
        }
        throw new TrinoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) String.format(java.lang.String.format) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Inject(javax.inject.Inject) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ContentSchemaReader(io.trino.plugin.kafka.schema.ContentSchemaReader) Optional(java.util.Optional) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) HostAddress(io.trino.spi.HostAddress) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.trino.spi.HostAddress) TrinoException(io.trino.spi.TrinoException) TopicPartition(org.apache.kafka.common.TopicPartition) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException) PartitionInfo(org.apache.kafka.common.PartitionInfo)

Example 45 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class TestPrometheusRecordSetProvider method testGetRecordSet.

@Test
public void testGetRecordSet() {
    ConnectorTableHandle tableHandle = new PrometheusTableHandle("schema", "table");
    PrometheusRecordSetProvider recordSetProvider = new PrometheusRecordSetProvider(client);
    RecordSet recordSet = recordSetProvider.getRecordSet(PrometheusTransactionHandle.INSTANCE, SESSION, new PrometheusSplit(dataUri), tableHandle, ImmutableList.of(new PrometheusColumnHandle("labels", varcharMapType, 0), new PrometheusColumnHandle("timestamp", TIMESTAMP_COLUMN_TYPE, 1), new PrometheusColumnHandle("value", DoubleType.DOUBLE, 2)));
    assertNotNull(recordSet, "recordSet is null");
    RecordCursor cursor = recordSet.cursor();
    assertNotNull(cursor, "cursor is null");
    Map<Instant, Map<?, ?>> actual = new LinkedHashMap<>();
    while (cursor.advanceNextPosition()) {
        actual.put((Instant) cursor.getObject(1), getMapFromBlock(varcharMapType, (Block) cursor.getObject(0)));
    }
    Map<Instant, Map<String, String>> expected = ImmutableMap.<Instant, Map<String, String>>builder().put(ofEpochMilli(1565962969044L), ImmutableMap.of("instance", "localhost:9090", "__name__", "up", "job", "prometheus")).put(ofEpochMilli(1565962984045L), ImmutableMap.of("instance", "localhost:9090", "__name__", "up", "job", "prometheus")).put(ofEpochMilli(1565962999044L), ImmutableMap.of("instance", "localhost:9090", "__name__", "up", "job", "prometheus")).put(ofEpochMilli(1565963014044L), ImmutableMap.of("instance", "localhost:9090", "__name__", "up", "job", "prometheus")).buildOrThrow();
    assertEquals(actual, expected);
}
Also used : RecordCursor(io.trino.spi.connector.RecordCursor) Instant(java.time.Instant) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) LinkedHashMap(java.util.LinkedHashMap) PrometheusRecordCursor.getMapFromBlock(io.trino.plugin.prometheus.PrometheusRecordCursor.getMapFromBlock) Block(io.trino.spi.block.Block) RecordSet(io.trino.spi.connector.RecordSet) ImmutableMap(com.google.common.collect.ImmutableMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Test(org.testng.annotations.Test)

Aggregations

ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)133 ConnectorSession (io.trino.spi.connector.ConnectorSession)82 ColumnHandle (io.trino.spi.connector.ColumnHandle)77 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)67 Test (org.testng.annotations.Test)65 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)52 SchemaTableName (io.trino.spi.connector.SchemaTableName)51 Constraint (io.trino.spi.connector.Constraint)45 TestingConnectorSession (io.trino.testing.TestingConnectorSession)44 List (java.util.List)40 Optional (java.util.Optional)40 ConnectorInsertTableHandle (io.trino.spi.connector.ConnectorInsertTableHandle)39 TupleDomain (io.trino.spi.predicate.TupleDomain)39 ImmutableList (com.google.common.collect.ImmutableList)38 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)37 ImmutableMap (com.google.common.collect.ImmutableMap)36 TrinoException (io.trino.spi.TrinoException)36 ConnectorOutputTableHandle (io.trino.spi.connector.ConnectorOutputTableHandle)36 Map (java.util.Map)36 Objects.requireNonNull (java.util.Objects.requireNonNull)36