Search in sources :

Example 11 with NOT_SUPPORTED

use of io.trino.spi.StandardErrorCode.NOT_SUPPORTED in project trino by trinodb.

the class BigQuerySplitManager method createEmptyProjection.

private List<BigQuerySplit> createEmptyProjection(ConnectorSession session, TableId remoteTableId, int actualParallelism, Optional<String> filter) {
    BigQueryClient client = bigQueryClientFactory.create(session);
    log.debug("createEmptyProjection(tableId=%s, actualParallelism=%s, filter=[%s])", remoteTableId, actualParallelism, filter);
    try {
        long numberOfRows;
        if (filter.isPresent()) {
            // count the rows based on the filter
            String sql = client.selectSql(remoteTableId, "COUNT(*)");
            TableResult result = client.query(sql);
            numberOfRows = result.iterateAll().iterator().next().get(0).getLongValue();
        } else {
            // no filters, so we can take the value from the table info when the object is TABLE
            TableInfo tableInfo = client.getTable(remoteTableId).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(remoteTableId.getDataset(), remoteTableId.getTable())));
            if (tableInfo.getDefinition().getType() == TABLE) {
                numberOfRows = tableInfo.getNumRows().longValue();
            } else if (tableInfo.getDefinition().getType() == VIEW) {
                String sql = client.selectSql(remoteTableId, "COUNT(*)");
                TableResult result = client.query(sql);
                numberOfRows = result.iterateAll().iterator().next().get(0).getLongValue();
            } else {
                throw new TrinoException(NOT_SUPPORTED, "Unsupported table type: " + tableInfo.getDefinition().getType());
            }
        }
        long rowsPerSplit = numberOfRows / actualParallelism;
        // need to be added to one fo the split due to integer division
        long remainingRows = numberOfRows - (rowsPerSplit * actualParallelism);
        List<BigQuerySplit> splits = range(0, actualParallelism).mapToObj(ignored -> BigQuerySplit.emptyProjection(rowsPerSplit)).collect(toList());
        splits.set(0, BigQuerySplit.emptyProjection(rowsPerSplit + remainingRows));
        return splits;
    } catch (BigQueryException e) {
        throw new TrinoException(BIGQUERY_FAILED_TO_EXECUTE_QUERY, "Failed to compute empty projection", e);
    }
}
Also used : BIGQUERY_FAILED_TO_EXECUTE_QUERY(io.trino.plugin.bigquery.BigQueryErrorCode.BIGQUERY_FAILED_TO_EXECUTE_QUERY) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Logger(io.airlift.log.Logger) IntStream.range(java.util.stream.IntStream.range) NodeManager(io.trino.spi.NodeManager) TableId(com.google.cloud.bigquery.TableId) BigQueryException(com.google.cloud.bigquery.BigQueryException) Duration(io.airlift.units.Duration) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Inject(javax.inject.Inject) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) ImmutableList(com.google.common.collect.ImmutableList) VIEW(com.google.cloud.bigquery.TableDefinition.Type.VIEW) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) TableResult(com.google.cloud.bigquery.TableResult) TABLE(com.google.cloud.bigquery.TableDefinition.Type.TABLE) ReadSession(com.google.cloud.bigquery.storage.v1.ReadSession) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) SchemaTableName(io.trino.spi.connector.SchemaTableName) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) TableInfo(com.google.cloud.bigquery.TableInfo) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TableResult(com.google.cloud.bigquery.TableResult) TrinoException(io.trino.spi.TrinoException) TableInfo(com.google.cloud.bigquery.TableInfo) BigQueryException(com.google.cloud.bigquery.BigQueryException) SchemaTableName(io.trino.spi.connector.SchemaTableName)

Example 12 with NOT_SUPPORTED

use of io.trino.spi.StandardErrorCode.NOT_SUPPORTED in project trino by trinodb.

the class CassandraSession method getTable.

public CassandraTable getTable(SchemaTableName schemaTableName) throws TableNotFoundException {
    KeyspaceMetadata keyspace = getKeyspaceByCaseInsensitiveName(schemaTableName.getSchemaName());
    AbstractTableMetadata tableMeta = getTableMetadata(keyspace, schemaTableName.getTableName());
    List<String> columnNames = new ArrayList<>();
    List<ColumnMetadata> columns = tableMeta.getColumns();
    checkColumnNames(columns);
    for (ColumnMetadata columnMetadata : columns) {
        columnNames.add(columnMetadata.getName());
    }
    // check if there is a comment to establish column ordering
    String comment = tableMeta.getOptions().getComment();
    Set<String> hiddenColumns = ImmutableSet.of();
    if (comment != null && comment.startsWith(PRESTO_COMMENT_METADATA)) {
        String columnOrderingString = comment.substring(PRESTO_COMMENT_METADATA.length());
        // column ordering
        List<ExtraColumnMetadata> extras = extraColumnMetadataCodec.fromJson(columnOrderingString);
        List<String> explicitColumnOrder = new ArrayList<>(ImmutableList.copyOf(transform(extras, ExtraColumnMetadata::getName)));
        hiddenColumns = extras.stream().filter(ExtraColumnMetadata::isHidden).map(ExtraColumnMetadata::getName).collect(toImmutableSet());
        // add columns not in the comment to the ordering
        List<String> remaining = columnNames.stream().filter(name -> !explicitColumnOrder.contains(name)).collect(toList());
        explicitColumnOrder.addAll(remaining);
        // sort the actual columns names using the explicit column order (this allows for missing columns)
        columnNames = Ordering.explicit(explicitColumnOrder).sortedCopy(columnNames);
    }
    ImmutableList.Builder<CassandraColumnHandle> columnHandles = ImmutableList.builder();
    // add primary keys first
    Set<String> primaryKeySet = new HashSet<>();
    for (ColumnMetadata columnMeta : tableMeta.getPartitionKey()) {
        primaryKeySet.add(columnMeta.getName());
        boolean hidden = hiddenColumns.contains(columnMeta.getName());
        CassandraColumnHandle columnHandle = buildColumnHandle(tableMeta, columnMeta, true, false, columnNames.indexOf(columnMeta.getName()), hidden).orElseThrow(() -> new TrinoException(NOT_SUPPORTED, "Unsupported partition key type: " + columnMeta.getType().getName()));
        columnHandles.add(columnHandle);
    }
    // add clustering columns
    for (ColumnMetadata columnMeta : tableMeta.getClusteringColumns()) {
        primaryKeySet.add(columnMeta.getName());
        boolean hidden = hiddenColumns.contains(columnMeta.getName());
        Optional<CassandraColumnHandle> columnHandle = buildColumnHandle(tableMeta, columnMeta, false, true, columnNames.indexOf(columnMeta.getName()), hidden);
        columnHandle.ifPresent(columnHandles::add);
    }
    // add other columns
    for (ColumnMetadata columnMeta : columns) {
        if (!primaryKeySet.contains(columnMeta.getName())) {
            boolean hidden = hiddenColumns.contains(columnMeta.getName());
            Optional<CassandraColumnHandle> columnHandle = buildColumnHandle(tableMeta, columnMeta, false, false, columnNames.indexOf(columnMeta.getName()), hidden);
            columnHandle.ifPresent(columnHandles::add);
        }
    }
    List<CassandraColumnHandle> sortedColumnHandles = columnHandles.build().stream().sorted(comparing(CassandraColumnHandle::getOrdinalPosition)).collect(toList());
    CassandraTableHandle tableHandle = new CassandraTableHandle(tableMeta.getKeyspace().getName(), tableMeta.getName());
    return new CassandraTable(tableHandle, sortedColumnHandles);
}
Also used : CassandraType.isFullySupported(io.trino.plugin.cassandra.CassandraType.isFullySupported) QueryBuilder(com.datastax.driver.core.querybuilder.QueryBuilder) Iterables.transform(com.google.common.collect.Iterables.transform) CassandraType.toCassandraType(io.trino.plugin.cassandra.CassandraType.toCassandraType) RegularStatement(com.datastax.driver.core.RegularStatement) Clause(com.datastax.driver.core.querybuilder.Clause) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) ByteBuffer(java.nio.ByteBuffer) Duration(io.airlift.units.Duration) ReconnectionPolicy(com.datastax.driver.core.policies.ReconnectionPolicy) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Session(com.datastax.driver.core.Session) Map(java.util.Map) VersionNumber(com.datastax.driver.core.VersionNumber) ENGLISH(java.util.Locale.ENGLISH) CassandraCqlUtils.validSchemaName(io.trino.plugin.cassandra.util.CassandraCqlUtils.validSchemaName) TableMetadata(com.datastax.driver.core.TableMetadata) ImmutableSet(com.google.common.collect.ImmutableSet) ColumnMetadata(com.datastax.driver.core.ColumnMetadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) NoHostAvailableException(com.datastax.driver.core.exceptions.NoHostAvailableException) Sets(com.google.common.collect.Sets) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) Collectors.joining(java.util.stream.Collectors.joining) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ProtocolVersion(com.datastax.driver.core.ProtocolVersion) List(java.util.List) Stream(java.util.stream.Stream) Cluster(com.datastax.driver.core.Cluster) Host(com.datastax.driver.core.Host) Optional(java.util.Optional) Select(com.datastax.driver.core.querybuilder.Select) Statement(com.datastax.driver.core.Statement) JsonCodec(io.airlift.json.JsonCodec) TokenRange(com.datastax.driver.core.TokenRange) Logger(io.airlift.log.Logger) NullableValue(io.trino.spi.predicate.NullableValue) Row(com.datastax.driver.core.Row) HashMap(java.util.HashMap) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) PRESTO_COMMENT_METADATA(io.trino.plugin.cassandra.CassandraMetadata.PRESTO_COMMENT_METADATA) AbstractTableMetadata(com.datastax.driver.core.AbstractTableMetadata) PreparedStatement(com.datastax.driver.core.PreparedStatement) HashSet(java.util.HashSet) CassandraCqlUtils.selectDistinctFrom(io.trino.plugin.cassandra.util.CassandraCqlUtils.selectDistinctFrom) ResultSet(com.datastax.driver.core.ResultSet) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Comparator.comparing(java.util.Comparator.comparing) Suppliers.memoize(com.google.common.base.Suppliers.memoize) QueryBuilder.eq(com.datastax.driver.core.querybuilder.QueryBuilder.eq) TupleDomain(io.trino.spi.predicate.TupleDomain) IndexMetadata(com.datastax.driver.core.IndexMetadata) CassandraCqlUtils(io.trino.plugin.cassandra.util.CassandraCqlUtils) Collectors.toList(java.util.stream.Collectors.toList) MaterializedViewMetadata(com.datastax.driver.core.MaterializedViewMetadata) KeyspaceMetadata(com.datastax.driver.core.KeyspaceMetadata) Ordering(com.google.common.collect.Ordering) QueryBuilder.select(com.datastax.driver.core.querybuilder.QueryBuilder.select) DataType(com.datastax.driver.core.DataType) CASSANDRA_VERSION_ERROR(io.trino.plugin.cassandra.CassandraErrorCode.CASSANDRA_VERSION_ERROR) ReconnectionSchedule(com.datastax.driver.core.policies.ReconnectionPolicy.ReconnectionSchedule) ColumnMetadata(com.datastax.driver.core.ColumnMetadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) AbstractTableMetadata(com.datastax.driver.core.AbstractTableMetadata) TrinoException(io.trino.spi.TrinoException) KeyspaceMetadata(com.datastax.driver.core.KeyspaceMetadata) HashSet(java.util.HashSet)

Example 13 with NOT_SUPPORTED

use of io.trino.spi.StandardErrorCode.NOT_SUPPORTED in project trino by trinodb.

the class DeltaLakeMetadata method createTable.

@Override
public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) {
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    Database schema = metastore.getDatabase(schemaName).orElseThrow(() -> new SchemaNotFoundException(schemaName));
    boolean external = true;
    String location = getLocation(tableMetadata.getProperties());
    if (location == null) {
        Optional<String> schemaLocation = getSchemaLocation(schema);
        if (schemaLocation.isEmpty()) {
            throw new TrinoException(NOT_SUPPORTED, "The 'location' property must be specified either for the table or the schema");
        }
        location = new Path(schemaLocation.get(), tableName).toString();
        checkPathContainsNoFiles(session, new Path(location));
        external = false;
    }
    Path targetPath = new Path(location);
    ensurePathExists(session, targetPath);
    Path deltaLogDirectory = getTransactionLogDir(targetPath);
    Optional<Long> checkpointInterval = DeltaLakeTableProperties.getCheckpointInterval(tableMetadata.getProperties());
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), targetPath);
        if (!fileSystem.exists(deltaLogDirectory)) {
            validateTableColumns(tableMetadata);
            List<String> partitionColumns = getPartitionedBy(tableMetadata.getProperties());
            List<DeltaLakeColumnHandle> deltaLakeColumns = tableMetadata.getColumns().stream().map(column -> toColumnHandle(column, partitionColumns)).collect(toImmutableList());
            TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriterWithoutTransactionIsolation(session, targetPath.toString());
            appendInitialTableEntries(transactionLogWriter, deltaLakeColumns, partitionColumns, buildDeltaMetadataConfiguration(checkpointInterval), CREATE_TABLE_OPERATION, session, nodeVersion, nodeId);
            setRollback(() -> deleteRecursivelyIfExists(new HdfsContext(session), hdfsEnvironment, deltaLogDirectory));
            transactionLogWriter.flush();
        }
    } catch (IOException e) {
        throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Unable to access file system for: " + location, e);
    }
    Table.Builder tableBuilder = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(Optional.of(session.getUser())).setTableType(external ? EXTERNAL_TABLE.name() : MANAGED_TABLE.name()).setDataColumns(DUMMY_DATA_COLUMNS).setParameters(deltaTableProperties(session, location, external));
    setDeltaStorageFormat(tableBuilder, location, targetPath);
    Table table = tableBuilder.build();
    PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow());
    metastore.createTable(session, table, principalPrivileges);
}
Also used : Path(org.apache.hadoop.fs.Path) TransactionLogUtil.getTransactionLogDir(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir) FileSystem(org.apache.hadoop.fs.FileSystem) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) DeltaLakeSchemaSupport.validateType(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TransactionLogWriterFactory(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ValueSet.ofRanges(io.trino.spi.predicate.ValueSet.ofRanges) Column(io.trino.plugin.hive.metastore.Column) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) PARTITIONED_BY_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.PARTITIONED_BY_PROPERTY) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TABLE_PROVIDER_PROPERTY(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY) HiveWriteUtils.pathExists(io.trino.plugin.hive.util.HiveWriteUtils.pathExists) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) HyperLogLog(io.airlift.stats.cardinality.HyperLogLog) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) Predicate.not(java.util.function.Predicate.not) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.ANALYZE_COLUMNS_PROPERTY) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) TransactionLogParser.getMandatoryCurrentVersion(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.getMandatoryCurrentVersion) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeTableProcedureId(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) DeltaLakeTableProperties.getLocation(io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation) Range.greaterThanOrEqual(io.trino.spi.predicate.Range.greaterThanOrEqual) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) HiveType(io.trino.plugin.hive.HiveType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) DeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.DeltaLakeStatisticsAccess) DeltaLakeSchemaSupport.extractPartitionColumns(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) DeltaLakeSchemaSupport.serializeStatsAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson) Nullable(javax.annotation.Nullable) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) MapType(io.trino.spi.type.MapType) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) CheckpointWriterManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriterManager) ROW_ID_COLUMN_TYPE(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_TYPE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) DeltaLakeSessionProperties.isTableStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled) LOCATION_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.LOCATION_PROPERTY) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) TINYINT(io.trino.spi.type.TinyintType.TINYINT) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) NodeManager(io.trino.spi.NodeManager) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) SYNTHESIZED(io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED) TABLE_PROVIDER_VALUE(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) HyperLogLogType(io.trino.spi.type.HyperLogLogType) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) Range.range(io.trino.spi.predicate.Range.range) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) Collection(java.util.Collection) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Instant(java.time.Instant) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ROW_ID_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) Assignment(io.trino.spi.connector.Assignment) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) DecimalType(io.trino.spi.type.DecimalType) OPTIMIZE(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE) JsonCodec(io.airlift.json.JsonCodec) Comparators(com.google.common.collect.Comparators) Constraint(io.trino.spi.connector.Constraint) Range.lessThanOrEqual(io.trino.spi.predicate.Range.lessThanOrEqual) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Logger(io.airlift.log.Logger) DeltaLakeSchemaSupport.serializeSchemaAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeSchemaAsJson) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) CHECKPOINT_INTERVAL_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY) StorageFormat.create(io.trino.plugin.hive.metastore.StorageFormat.create) MetadataEntry.buildDeltaMetadataConfiguration(io.trino.plugin.deltalake.transactionlog.MetadataEntry.buildDeltaMetadataConfiguration) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TupleDomain(io.trino.spi.predicate.TupleDomain) DeltaLakeTableProperties.getPartitionedBy(io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SchemaTableName.schemaTableName(io.trino.spi.connector.SchemaTableName.schemaTableName) UUID.randomUUID(java.util.UUID.randomUUID) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) NUMBER_OF_DISTINCT_VALUES_SUMMARY(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES_SUMMARY) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) IOException(java.io.IOException) SchemaTableName(io.trino.spi.connector.SchemaTableName) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) FileSystem(org.apache.hadoop.fs.FileSystem) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) Database(io.trino.plugin.hive.metastore.Database) TrinoException(io.trino.spi.TrinoException) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext)

Example 14 with NOT_SUPPORTED

use of io.trino.spi.StandardErrorCode.NOT_SUPPORTED in project trino by trinodb.

the class DeltaLakeMetadata method beginInsert.

@Override
public ConnectorInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> columns) {
    DeltaLakeTableHandle table = (DeltaLakeTableHandle) tableHandle;
    if (!allowWrite(session, table)) {
        String fileSystem = new Path(table.getLocation()).toUri().getScheme();
        throw new TrinoException(NOT_SUPPORTED, format("Inserts are not supported on the %s filesystem", fileSystem));
    }
    checkSupportedWriterVersion(session, table.getSchemaTableName());
    List<DeltaLakeColumnHandle> inputColumns = columns.stream().map(handle -> (DeltaLakeColumnHandle) handle).collect(toImmutableList());
    ConnectorTableMetadata tableMetadata = getTableMetadata(session, table);
    // This check acts as a safeguard in cases where the input columns may differ from the table metadata case-sensitively
    checkAllColumnsPassedOnInsert(tableMetadata, inputColumns);
    String tableLocation = getLocation(tableMetadata.getProperties());
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(tableLocation));
        return new DeltaLakeInsertTableHandle(table.getSchemaName(), table.getTableName(), tableLocation, table.getMetadataEntry(), inputColumns, getMandatoryCurrentVersion(fileSystem, new Path(tableLocation)));
    } catch (IOException e) {
        throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TransactionLogUtil.getTransactionLogDir(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir) FileSystem(org.apache.hadoop.fs.FileSystem) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) DeltaLakeSchemaSupport.validateType(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TransactionLogWriterFactory(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ValueSet.ofRanges(io.trino.spi.predicate.ValueSet.ofRanges) Column(io.trino.plugin.hive.metastore.Column) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) PARTITIONED_BY_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.PARTITIONED_BY_PROPERTY) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TABLE_PROVIDER_PROPERTY(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY) HiveWriteUtils.pathExists(io.trino.plugin.hive.util.HiveWriteUtils.pathExists) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) HyperLogLog(io.airlift.stats.cardinality.HyperLogLog) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) Predicate.not(java.util.function.Predicate.not) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.ANALYZE_COLUMNS_PROPERTY) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) TransactionLogParser.getMandatoryCurrentVersion(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.getMandatoryCurrentVersion) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeTableProcedureId(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) DeltaLakeTableProperties.getLocation(io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation) Range.greaterThanOrEqual(io.trino.spi.predicate.Range.greaterThanOrEqual) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) HiveType(io.trino.plugin.hive.HiveType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) DeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.DeltaLakeStatisticsAccess) DeltaLakeSchemaSupport.extractPartitionColumns(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) DeltaLakeSchemaSupport.serializeStatsAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson) Nullable(javax.annotation.Nullable) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) MapType(io.trino.spi.type.MapType) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) CheckpointWriterManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriterManager) ROW_ID_COLUMN_TYPE(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_TYPE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) DeltaLakeSessionProperties.isTableStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled) LOCATION_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.LOCATION_PROPERTY) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) TINYINT(io.trino.spi.type.TinyintType.TINYINT) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) NodeManager(io.trino.spi.NodeManager) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) SYNTHESIZED(io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED) TABLE_PROVIDER_VALUE(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) HyperLogLogType(io.trino.spi.type.HyperLogLogType) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) Range.range(io.trino.spi.predicate.Range.range) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) Collection(java.util.Collection) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Instant(java.time.Instant) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ROW_ID_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) Assignment(io.trino.spi.connector.Assignment) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) DecimalType(io.trino.spi.type.DecimalType) OPTIMIZE(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE) JsonCodec(io.airlift.json.JsonCodec) Comparators(com.google.common.collect.Comparators) Constraint(io.trino.spi.connector.Constraint) Range.lessThanOrEqual(io.trino.spi.predicate.Range.lessThanOrEqual) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Logger(io.airlift.log.Logger) DeltaLakeSchemaSupport.serializeSchemaAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeSchemaAsJson) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) CHECKPOINT_INTERVAL_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY) StorageFormat.create(io.trino.plugin.hive.metastore.StorageFormat.create) MetadataEntry.buildDeltaMetadataConfiguration(io.trino.plugin.deltalake.transactionlog.MetadataEntry.buildDeltaMetadataConfiguration) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TupleDomain(io.trino.spi.predicate.TupleDomain) DeltaLakeTableProperties.getPartitionedBy(io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SchemaTableName.schemaTableName(io.trino.spi.connector.SchemaTableName.schemaTableName) UUID.randomUUID(java.util.UUID.randomUUID) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) NUMBER_OF_DISTINCT_VALUES_SUMMARY(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES_SUMMARY) FileSystem(org.apache.hadoop.fs.FileSystem) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TrinoException(io.trino.spi.TrinoException) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) IOException(java.io.IOException) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 15 with NOT_SUPPORTED

use of io.trino.spi.StandardErrorCode.NOT_SUPPORTED in project trino by trinodb.

the class IcebergSplitSource method getNextBatch.

@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
    long timeLeft = dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS);
    if (dynamicFilter.isAwaitable() && timeLeft > 0) {
        return dynamicFilter.isBlocked().thenApply(ignored -> EMPTY_BATCH).completeOnTimeout(EMPTY_BATCH, timeLeft, MILLISECONDS);
    }
    if (combinedScanIterable == null) {
        // Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg API
        this.pushedDownDynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
        TupleDomain<IcebergColumnHandle> fullPredicate = tableHandle.getUnenforcedPredicate().intersect(pushedDownDynamicFilterPredicate);
        // TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing TupleDomain#simplify
        TupleDomain<IcebergColumnHandle> simplifiedPredicate = fullPredicate.simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
        if (!simplifiedPredicate.equals(fullPredicate)) {
            // Pushed down predicate was simplified, always evaluate it against individual splits
            this.pushedDownDynamicFilterPredicate = TupleDomain.all();
        }
        TupleDomain<IcebergColumnHandle> effectivePredicate = tableHandle.getEnforcedPredicate().intersect(simplifiedPredicate);
        if (effectivePredicate.isNone()) {
            finish();
            return completedFuture(NO_MORE_SPLITS_BATCH);
        }
        Expression filterExpression = toIcebergExpression(effectivePredicate);
        this.combinedScanIterable = tableScan.filter(filterExpression).includeColumnStats().planTasks();
        this.fileScanIterator = Streams.stream(combinedScanIterable).map(CombinedScanTask::files).flatMap(Collection::stream).iterator();
    }
    TupleDomain<IcebergColumnHandle> dynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
    if (dynamicFilterPredicate.isNone()) {
        finish();
        return completedFuture(NO_MORE_SPLITS_BATCH);
    }
    Iterator<FileScanTask> fileScanTasks = Iterators.limit(fileScanIterator, maxSize);
    ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
    while (fileScanTasks.hasNext()) {
        FileScanTask scanTask = fileScanTasks.next();
        if (!scanTask.deletes().isEmpty()) {
            throw new TrinoException(NOT_SUPPORTED, "Iceberg tables with delete files are not supported: " + tableHandle.getSchemaTableName());
        }
        if (maxScannedFileSizeInBytes.isPresent() && scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) {
            continue;
        }
        IcebergSplit icebergSplit = toIcebergSplit(scanTask);
        Schema fileSchema = scanTask.spec().schema();
        Set<IcebergColumnHandle> identityPartitionColumns = icebergSplit.getPartitionKeys().keySet().stream().map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)).collect(toImmutableSet());
        Supplier<Map<ColumnHandle, NullableValue>> partitionValues = memoize(() -> {
            Map<ColumnHandle, NullableValue> bindings = new HashMap<>();
            for (IcebergColumnHandle partitionColumn : identityPartitionColumns) {
                Object partitionValue = deserializePartitionValue(partitionColumn.getType(), icebergSplit.getPartitionKeys().get(partitionColumn.getId()).orElse(null), partitionColumn.getName());
                NullableValue bindingValue = new NullableValue(partitionColumn.getType(), partitionValue);
                bindings.put(partitionColumn, bindingValue);
            }
            return bindings;
        });
        if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) {
            if (!partitionMatchesPredicate(identityPartitionColumns, partitionValues, dynamicFilterPredicate)) {
                continue;
            }
            if (!fileMatchesPredicate(fieldIdToType, dynamicFilterPredicate, scanTask.file().lowerBounds(), scanTask.file().upperBounds(), scanTask.file().nullValueCounts())) {
                continue;
            }
        }
        if (!partitionMatchesConstraint(identityPartitionColumns, partitionValues, constraint)) {
            continue;
        }
        if (recordScannedFiles) {
            scannedFiles.add(scanTask.file());
        }
        splits.add(icebergSplit);
    }
    return completedFuture(new ConnectorSplitBatch(splits.build(), isFinished()));
}
Also used : IcebergUtil.getPartitionKeys(io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys) CompletableFuture.completedFuture(java.util.concurrent.CompletableFuture.completedFuture) ByteBuffer(java.nio.ByteBuffer) TypeConverter.toIcebergType(io.trino.plugin.iceberg.TypeConverter.toIcebergType) Duration(io.airlift.units.Duration) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) Expression(org.apache.iceberg.expressions.Expression) ConnectorPartitionHandle(io.trino.spi.connector.ConnectorPartitionHandle) Map(java.util.Map) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) IcebergUtil.getColumnHandle(io.trino.plugin.iceberg.IcebergUtil.getColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) CloseableIterable(org.apache.iceberg.io.CloseableIterable) Range(io.trino.spi.predicate.Range) Domain(io.trino.spi.predicate.Domain) Collection(java.util.Collection) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) TableScan(org.apache.iceberg.TableScan) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Streams(com.google.common.collect.Streams) Schema(org.apache.iceberg.Schema) CombinedScanTask(org.apache.iceberg.CombinedScanTask) ValueSet(io.trino.spi.predicate.ValueSet) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) DataSize(io.airlift.units.DataSize) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) Constraint(io.trino.spi.connector.Constraint) IcebergUtil.deserializePartitionValue(io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue) NullableValue(io.trino.spi.predicate.NullableValue) Stopwatch(com.google.common.base.Stopwatch) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) Iterators(com.google.common.collect.Iterators) IcebergTypes.convertIcebergValueToTrino(io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Suppliers.memoize(com.google.common.base.Suppliers.memoize) Nullable(javax.annotation.Nullable) Iterator(java.util.Iterator) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) TupleDomain(io.trino.spi.predicate.TupleDomain) Conversions.fromByteBuffer(org.apache.iceberg.types.Conversions.fromByteBuffer) Sets.intersection(com.google.common.collect.Sets.intersection) IcebergUtil.primitiveFieldTypes(io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) IcebergUtil.getColumnHandle(io.trino.plugin.iceberg.IcebergUtil.getColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) NullableValue(io.trino.spi.predicate.NullableValue) Expression(org.apache.iceberg.expressions.Expression) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) Collection(java.util.Collection) TrinoException(io.trino.spi.TrinoException) FileScanTask(org.apache.iceberg.FileScanTask) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

NOT_SUPPORTED (io.trino.spi.StandardErrorCode.NOT_SUPPORTED)18 List (java.util.List)18 TrinoException (io.trino.spi.TrinoException)17 ImmutableList (com.google.common.collect.ImmutableList)16 Objects.requireNonNull (java.util.Objects.requireNonNull)16 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)15 Optional (java.util.Optional)15 Map (java.util.Map)14 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)13 String.format (java.lang.String.format)13 HashMap (java.util.HashMap)13 Set (java.util.Set)13 ImmutableSet (com.google.common.collect.ImmutableSet)12 ColumnHandle (io.trino.spi.connector.ColumnHandle)12 Preconditions.checkState (com.google.common.base.Preconditions.checkState)11 Logger (io.airlift.log.Logger)11 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)10 Verify.verify (com.google.common.base.Verify.verify)10 Sets (com.google.common.collect.Sets)10 JsonCodec (io.airlift.json.JsonCodec)10