Search in sources :

Example 51 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class IcebergMetadata method applyProjection.

@Override
public Optional<ProjectionApplicationResult<ConnectorTableHandle>> applyProjection(ConnectorSession session, ConnectorTableHandle handle, List<ConnectorExpression> projections, Map<String, ColumnHandle> assignments) {
    if (!isProjectionPushdownEnabled(session)) {
        return Optional.empty();
    }
    // Create projected column representations for supported sub expressions. Simple column references and chain of
    // dereferences on a variable are supported right now.
    Set<ConnectorExpression> projectedExpressions = projections.stream().flatMap(expression -> extractSupportedProjectedColumns(expression).stream()).collect(toImmutableSet());
    Map<ConnectorExpression, ProjectedColumnRepresentation> columnProjections = projectedExpressions.stream().collect(toImmutableMap(Function.identity(), HiveApplyProjectionUtil::createProjectedColumnRepresentation));
    IcebergTableHandle icebergTableHandle = (IcebergTableHandle) handle;
    // all references are simple variables
    if (columnProjections.values().stream().allMatch(ProjectedColumnRepresentation::isVariable)) {
        Set<IcebergColumnHandle> projectedColumns = assignments.values().stream().map(IcebergColumnHandle.class::cast).collect(toImmutableSet());
        if (icebergTableHandle.getProjectedColumns().equals(projectedColumns)) {
            return Optional.empty();
        }
        List<Assignment> assignmentsList = assignments.entrySet().stream().map(assignment -> new Assignment(assignment.getKey(), assignment.getValue(), ((IcebergColumnHandle) assignment.getValue()).getType())).collect(toImmutableList());
        return Optional.of(new ProjectionApplicationResult<>(icebergTableHandle.withProjectedColumns(projectedColumns), projections, assignmentsList, false));
    }
    Map<String, Assignment> newAssignments = new HashMap<>();
    ImmutableMap.Builder<ConnectorExpression, Variable> newVariablesBuilder = ImmutableMap.builder();
    ImmutableSet.Builder<IcebergColumnHandle> projectedColumnsBuilder = ImmutableSet.builder();
    for (Map.Entry<ConnectorExpression, ProjectedColumnRepresentation> entry : columnProjections.entrySet()) {
        ConnectorExpression expression = entry.getKey();
        ProjectedColumnRepresentation projectedColumn = entry.getValue();
        IcebergColumnHandle baseColumnHandle = (IcebergColumnHandle) assignments.get(projectedColumn.getVariable().getName());
        IcebergColumnHandle projectedColumnHandle = createProjectedColumnHandle(baseColumnHandle, projectedColumn.getDereferenceIndices(), expression.getType());
        String projectedColumnName = projectedColumnHandle.getQualifiedName();
        Variable projectedColumnVariable = new Variable(projectedColumnName, expression.getType());
        Assignment newAssignment = new Assignment(projectedColumnName, projectedColumnHandle, expression.getType());
        newAssignments.putIfAbsent(projectedColumnName, newAssignment);
        newVariablesBuilder.put(expression, projectedColumnVariable);
        projectedColumnsBuilder.add(projectedColumnHandle);
    }
    // Modify projections to refer to new variables
    Map<ConnectorExpression, Variable> newVariables = newVariablesBuilder.buildOrThrow();
    List<ConnectorExpression> newProjections = projections.stream().map(expression -> replaceWithNewVariables(expression, newVariables)).collect(toImmutableList());
    List<Assignment> outputAssignments = newAssignments.values().stream().collect(toImmutableList());
    return Optional.of(new ProjectionApplicationResult<>(icebergTableHandle.withProjectedColumns(projectedColumnsBuilder.build()), newProjections, outputAssignments, false));
}
Also used : IcebergUtil.getPartitionKeys(io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys) TrinoCatalog(io.trino.plugin.iceberg.catalog.TrinoCatalog) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Matcher(java.util.regex.Matcher) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) RewriteFiles(org.apache.iceberg.RewriteFiles) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) CloseableIterable(org.apache.iceberg.io.CloseableIterable) IcebergUtil.newCreateTableTransaction(io.trino.plugin.iceberg.IcebergUtil.newCreateTableTransaction) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) IcebergTableExecuteHandle(io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle) Set(java.util.Set) Schema(org.apache.iceberg.Schema) ColumnIdentity.primitiveColumnIdentity(io.trino.plugin.iceberg.ColumnIdentity.primitiveColumnIdentity) SchemaTableName(io.trino.spi.connector.SchemaTableName) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) Collectors.joining(java.util.stream.Collectors.joining) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) IcebergUtil.deserializePartitionValue(io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) Supplier(java.util.function.Supplier) OptionalLong(java.util.OptionalLong) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) FILE_FORMAT_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY) OPTIMIZE(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.OPTIMIZE) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) DEPENDS_ON_TABLES(io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog.DEPENDS_ON_TABLES) Table(org.apache.iceberg.Table) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ICEBERG_INVALID_METADATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) PartitionFields.parsePartitionFields(io.trino.plugin.iceberg.PartitionFields.parsePartitionFields) ArrayDeque(java.util.ArrayDeque) MaterializedViewNotFoundException(io.trino.spi.connector.MaterializedViewNotFoundException) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) IcebergUtil.getColumns(io.trino.plugin.iceberg.IcebergUtil.getColumns) IcebergSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isProjectionPushdownEnabled) IcebergTableProcedureId(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId) IcebergSessionProperties.isStatisticsEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isStatisticsEnabled) AppendFiles(org.apache.iceberg.AppendFiles) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) TypeConverter.toIcebergType(io.trino.plugin.iceberg.TypeConverter.toIcebergType) PartitionField(org.apache.iceberg.PartitionField) DataFiles(org.apache.iceberg.DataFiles) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) LOCATION_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.LOCATION_PROPERTY) Path(org.apache.hadoop.fs.Path) DATA(io.trino.plugin.iceberg.TableType.DATA) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) Splitter(com.google.common.base.Splitter) IcebergTableProperties.getPartitioning(io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning) IcebergUtil.getFileFormat(io.trino.plugin.iceberg.IcebergUtil.getFileFormat) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWrittenPartitions(io.trino.plugin.hive.HiveWrittenPartitions) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) TableScan(org.apache.iceberg.TableScan) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) String.format(java.lang.String.format) SchemaParser(org.apache.iceberg.SchemaParser) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) ClassLoaderSafeSystemTable(io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable) IcebergUtil.getTableComment(io.trino.plugin.iceberg.IcebergUtil.getTableComment) Assignment(io.trino.spi.connector.Assignment) HiveApplyProjectionUtil(io.trino.plugin.hive.HiveApplyProjectionUtil) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) PartitionSpec(org.apache.iceberg.PartitionSpec) Function.identity(java.util.function.Function.identity) TableProperties(org.apache.iceberg.TableProperties) Optional(java.util.Optional) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) Pattern(java.util.regex.Pattern) SystemTable(io.trino.spi.connector.SystemTable) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) IcebergOptimizeHandle(io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle) Logger(io.airlift.log.Logger) PartitionFields.toPartitionFields(io.trino.plugin.iceberg.PartitionFields.toPartitionFields) HashMap(java.util.HashMap) Deque(java.util.Deque) Function(java.util.function.Function) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) PARTITIONING_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) VerifyException(com.google.common.base.VerifyException) RetryMode(io.trino.spi.connector.RetryMode) Iterator(java.util.Iterator) TupleDomain(io.trino.spi.predicate.TupleDomain) IcebergUtil.toIcebergSchema(io.trino.plugin.iceberg.IcebergUtil.toIcebergSchema) Transaction(org.apache.iceberg.Transaction) Comparator(java.util.Comparator) TypeManager(io.trino.spi.type.TypeManager) HiveUtil.isStructuralType(io.trino.plugin.hive.util.HiveUtil.isStructuralType) Snapshot(org.apache.iceberg.Snapshot) Variable(io.trino.spi.expression.Variable) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) Assignment(io.trino.spi.connector.Assignment) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 52 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class IcebergPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    IcebergSplit split = (IcebergSplit) connectorSplit;
    IcebergTableHandle table = (IcebergTableHandle) connectorTable;
    List<IcebergColumnHandle> icebergColumns = columns.stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList());
    Map<Integer, Optional<String>> partitionKeys = split.getPartitionKeys();
    List<IcebergColumnHandle> regularColumns = columns.stream().map(IcebergColumnHandle.class::cast).filter(column -> !partitionKeys.containsKey(column.getId())).collect(toImmutableList());
    TupleDomain<IcebergColumnHandle> effectivePredicate = table.getUnenforcedPredicate().intersect(dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)).simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
    HdfsContext hdfsContext = new HdfsContext(session);
    ReaderPageSource dataPageSource = createDataPageSource(session, hdfsContext, new Path(split.getPath()), split.getStart(), split.getLength(), split.getFileSize(), split.getFileFormat(), regularColumns, effectivePredicate, table.getNameMappingJson().map(NameMappingParser::fromJson));
    Optional<ReaderProjectionsAdapter> projectionsAdapter = dataPageSource.getReaderColumns().map(readerColumns -> new ReaderProjectionsAdapter(regularColumns, readerColumns, column -> ((IcebergColumnHandle) column).getType(), IcebergPageSourceProvider::applyProjection));
    return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource.get(), projectionsAdapter);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ORC_ICEBERG_ID_KEY(io.trino.plugin.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ICEBERG_CANNOT_OPEN_SPLIT(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) UUID(io.trino.spi.type.UuidType.UUID) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) ICEBERG_FILESYSTEM_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) Set(java.util.Set) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnIO(org.apache.parquet.io.ColumnIO) IcebergSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) MappedField(org.apache.iceberg.mapping.MappedField) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) IcebergSessionProperties.isOrcNestedLazy(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy) IcebergSessionProperties.getOrcMaxBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) HdfsParquetDataSource(io.trino.plugin.hive.parquet.HdfsParquetDataSource) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) IcebergSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) ICEBERG_MISSING_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) MappedFields(org.apache.iceberg.mapping.MappedFields) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) OrcType(io.trino.orc.metadata.OrcType) Predicate(io.trino.parquet.predicate.Predicate) IcebergSessionProperties.isUseFileSizeFromMetadata(io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata) MapType(io.trino.spi.type.MapType) PredicateUtils.predicateMatches(io.trino.parquet.predicate.PredicateUtils.predicateMatches) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) StandardTypes(io.trino.spi.type.StandardTypes) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IcebergSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) IcebergSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) UTC(org.joda.time.DateTimeZone.UTC) Field(io.trino.parquet.Field) Traverser(com.google.common.graph.Traverser) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ProjectedLayout(io.trino.orc.OrcReader.ProjectedLayout) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) IcebergSessionProperties.getOrcStreamBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetSchemaUtil(org.apache.iceberg.parquet.ParquetSchemaUtil) OrcColumn(io.trino.orc.OrcColumn) PredicateUtils.buildPredicate(io.trino.parquet.predicate.PredicateUtils.buildPredicate) MetadataReader(io.trino.parquet.reader.MetadataReader) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) OrcRecordReader(io.trino.orc.OrcRecordReader) NameMapping(org.apache.iceberg.mapping.NameMapping) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) RowType(io.trino.spi.type.RowType) ICEBERG_BAD_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) ParquetReader(io.trino.parquet.reader.ParquetReader) FieldContext(io.trino.plugin.iceberg.IcebergParquetColumnIOConverter.FieldContext) ICEBERG_CURSOR_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) ParquetTypeUtils.getColumnIO(io.trino.parquet.ParquetTypeUtils.getColumnIO) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) OrcDataSourceId(io.trino.orc.OrcDataSourceId) MessageType(org.apache.parquet.schema.MessageType) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) IcebergSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) Function(java.util.function.Function) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) IcebergSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) HdfsOrcDataSource(io.trino.plugin.hive.orc.HdfsOrcDataSource) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) OrcReader(io.trino.orc.OrcReader) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ICEBERG_BINARY_TYPE(io.trino.plugin.iceberg.TypeConverter.ICEBERG_BINARY_TYPE) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) OrcCorruptionException(io.trino.orc.OrcCorruptionException) Collectors.toList(java.util.stream.Collectors.toList) ParquetTypeUtils.getDescriptors(io.trino.parquet.ParquetTypeUtils.getDescriptors) ParquetDataSource(io.trino.parquet.ParquetDataSource) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Path(org.apache.hadoop.fs.Path) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) Optional(java.util.Optional) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext)

Example 53 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class TestMemoryMetadata method testRenameTable.

@Test
public void testRenameTable() {
    SchemaTableName tableName = new SchemaTableName("test_schema", "test_table_to_be_renamed");
    metadata.createSchema(SESSION, "test_schema", ImmutableMap.of(), new TrinoPrincipal(USER, SESSION.getUser()));
    ConnectorOutputTableHandle table = metadata.beginCreateTable(SESSION, new ConnectorTableMetadata(tableName, ImmutableList.of(), ImmutableMap.of()), Optional.empty(), NO_RETRIES);
    metadata.finishCreateTable(SESSION, table, ImmutableList.of(), ImmutableList.of());
    // rename table to schema which does not exist
    SchemaTableName invalidSchemaTableName = new SchemaTableName("test_schema_not_exist", "test_table_renamed");
    ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, tableName);
    Throwable throwable = expectThrows(SchemaNotFoundException.class, () -> metadata.renameTable(SESSION, tableHandle, invalidSchemaTableName));
    assertEquals(throwable.getMessage(), "Schema test_schema_not_exist not found");
    // rename table to same schema
    SchemaTableName sameSchemaTableName = new SchemaTableName("test_schema", "test_renamed");
    metadata.renameTable(SESSION, metadata.getTableHandle(SESSION, tableName), sameSchemaTableName);
    assertEquals(metadata.listTables(SESSION, Optional.of("test_schema")), ImmutableList.of(sameSchemaTableName));
    // rename table to different schema
    metadata.createSchema(SESSION, "test_different_schema", ImmutableMap.of(), new TrinoPrincipal(USER, SESSION.getUser()));
    SchemaTableName differentSchemaTableName = new SchemaTableName("test_different_schema", "test_renamed");
    metadata.renameTable(SESSION, metadata.getTableHandle(SESSION, sameSchemaTableName), differentSchemaTableName);
    assertEquals(metadata.listTables(SESSION, Optional.of("test_schema")), ImmutableList.of());
    assertEquals(metadata.listTables(SESSION, Optional.of("test_different_schema")), ImmutableList.of(differentSchemaTableName));
}
Also used : ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 54 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class TestMemoryMetadata method tableAlreadyExists.

@Test
public void tableAlreadyExists() {
    assertNoTables();
    SchemaTableName test1Table = new SchemaTableName("default", "test1");
    SchemaTableName test2Table = new SchemaTableName("default", "test2");
    metadata.createTable(SESSION, new ConnectorTableMetadata(test1Table, ImmutableList.of()), false);
    assertTrinoExceptionThrownBy(() -> metadata.createTable(SESSION, new ConnectorTableMetadata(test1Table, ImmutableList.of()), false)).hasErrorCode(ALREADY_EXISTS).hasMessage("Table [default.test1] already exists");
    ConnectorTableHandle test1TableHandle = metadata.getTableHandle(SESSION, test1Table);
    metadata.createTable(SESSION, new ConnectorTableMetadata(test2Table, ImmutableList.of()), false);
    assertTrinoExceptionThrownBy(() -> metadata.renameTable(SESSION, test1TableHandle, test2Table)).hasErrorCode(ALREADY_EXISTS).hasMessage("Table [default.test2] already exists");
}
Also used : SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 55 with ConnectorTableHandle

use of io.trino.spi.connector.ConnectorTableHandle in project trino by trinodb.

the class PhoenixSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    JdbcTableHandle tableHandle = (JdbcTableHandle) table;
    try (Connection connection = phoenixClient.getConnection(session)) {
        List<JdbcColumnHandle> columns = tableHandle.getColumns().map(columnSet -> columnSet.stream().map(JdbcColumnHandle.class::cast).collect(toList())).orElseGet(() -> phoenixClient.getColumns(session, tableHandle));
        PhoenixPreparedStatement inputQuery = (PhoenixPreparedStatement) phoenixClient.prepareStatement(session, connection, tableHandle, columns, Optional.empty());
        int maxScansPerSplit = session.getProperty(PhoenixSessionProperties.MAX_SCANS_PER_SPLIT, Integer.class);
        List<ConnectorSplit> splits = getSplits(inputQuery, maxScansPerSplit).stream().map(PhoenixInputSplit.class::cast).map(split -> new PhoenixSplit(getSplitAddresses(split), SerializedPhoenixInputSplit.serialize(split))).collect(toImmutableList());
        return new FixedSplitSource(splits);
    } catch (IOException | SQLException e) {
        throw new TrinoException(PHOENIX_SPLIT_ERROR, "Couldn't get Phoenix splits", e);
    }
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) KeyRange(org.apache.phoenix.query.KeyRange) PHOENIX_INTERNAL_ERROR(io.trino.plugin.phoenix.PhoenixErrorCode.PHOENIX_INTERNAL_ERROR) Connection(java.sql.Connection) Logger(io.airlift.log.Logger) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) QueryPlan(org.apache.phoenix.compile.QueryPlan) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) Bytes(org.apache.hadoop.hbase.util.Bytes) TableName(org.apache.hadoop.hbase.TableName) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) Scan(org.apache.hadoop.hbase.client.Scan) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) HostAddress(io.trino.spi.HostAddress) PHOENIX_SPLIT_ERROR(io.trino.plugin.phoenix.PhoenixErrorCode.PHOENIX_SPLIT_ERROR) EXPECTED_UPPER_REGION_KEY(org.apache.phoenix.coprocessor.BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) SQLException(java.sql.SQLException) Connection(java.sql.Connection) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) IOException(java.io.IOException) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) TrinoException(io.trino.spi.TrinoException) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Aggregations

ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)133 ConnectorSession (io.trino.spi.connector.ConnectorSession)82 ColumnHandle (io.trino.spi.connector.ColumnHandle)77 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)67 Test (org.testng.annotations.Test)65 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)52 SchemaTableName (io.trino.spi.connector.SchemaTableName)51 Constraint (io.trino.spi.connector.Constraint)45 TestingConnectorSession (io.trino.testing.TestingConnectorSession)44 List (java.util.List)40 Optional (java.util.Optional)40 ConnectorInsertTableHandle (io.trino.spi.connector.ConnectorInsertTableHandle)39 TupleDomain (io.trino.spi.predicate.TupleDomain)39 ImmutableList (com.google.common.collect.ImmutableList)38 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)37 ImmutableMap (com.google.common.collect.ImmutableMap)36 TrinoException (io.trino.spi.TrinoException)36 ConnectorOutputTableHandle (io.trino.spi.connector.ConnectorOutputTableHandle)36 Map (java.util.Map)36 Objects.requireNonNull (java.util.Objects.requireNonNull)36