Search in sources :

Example 6 with ConnectorIdentity

use of io.trino.spi.security.ConnectorIdentity in project trino by trinodb.

the class BaseJdbcClient method createTable.

protected JdbcOutputTableHandle createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, String targetTableName) throws SQLException {
    SchemaTableName schemaTableName = tableMetadata.getTable();
    ConnectorIdentity identity = session.getIdentity();
    if (!getSchemaNames(session).contains(schemaTableName.getSchemaName())) {
        throw new TrinoException(NOT_FOUND, "Schema not found: " + schemaTableName.getSchemaName());
    }
    try (Connection connection = connectionFactory.openConnection(session)) {
        String remoteSchema = identifierMapping.toRemoteSchemaName(identity, connection, schemaTableName.getSchemaName());
        String remoteTable = identifierMapping.toRemoteTableName(identity, connection, remoteSchema, schemaTableName.getTableName());
        String remoteTargetTableName = identifierMapping.toRemoteTableName(identity, connection, remoteSchema, targetTableName);
        String catalog = connection.getCatalog();
        ImmutableList.Builder<String> columnNames = ImmutableList.builder();
        ImmutableList.Builder<Type> columnTypes = ImmutableList.builder();
        ImmutableList.Builder<String> columnList = ImmutableList.builder();
        for (ColumnMetadata column : tableMetadata.getColumns()) {
            String columnName = identifierMapping.toRemoteColumnName(connection, column.getName());
            columnNames.add(columnName);
            columnTypes.add(column.getType());
            columnList.add(getColumnDefinitionSql(session, column, columnName));
        }
        RemoteTableName remoteTableName = new RemoteTableName(Optional.ofNullable(catalog), Optional.ofNullable(remoteSchema), remoteTargetTableName);
        String sql = createTableSql(remoteTableName, columnList.build(), tableMetadata);
        execute(connection, sql);
        return new JdbcOutputTableHandle(catalog, remoteSchema, remoteTable, columnNames.build(), columnTypes.build(), Optional.empty(), remoteTargetTableName);
    }
}
Also used : Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) VarcharType(io.trino.spi.type.VarcharType) JoinType(io.trino.spi.connector.JoinType) CharType(io.trino.spi.type.CharType) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Connection(java.sql.Connection) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) TrinoException(io.trino.spi.TrinoException) SchemaTableName(io.trino.spi.connector.SchemaTableName)

Example 7 with ConnectorIdentity

use of io.trino.spi.security.ConnectorIdentity in project trino by trinodb.

the class BaseJdbcClient method createSchema.

@Override
public void createSchema(ConnectorSession session, String schemaName) {
    ConnectorIdentity identity = session.getIdentity();
    try (Connection connection = connectionFactory.openConnection(session)) {
        schemaName = identifierMapping.toRemoteSchemaName(identity, connection, schemaName);
        execute(connection, createSchemaSql(schemaName));
    } catch (SQLException e) {
        throw new TrinoException(JDBC_ERROR, e);
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) TrinoException(io.trino.spi.TrinoException)

Example 8 with ConnectorIdentity

use of io.trino.spi.security.ConnectorIdentity in project trino by trinodb.

the class BaseJdbcClient method dropSchema.

@Override
public void dropSchema(ConnectorSession session, String schemaName) {
    ConnectorIdentity identity = session.getIdentity();
    try (Connection connection = connectionFactory.openConnection(session)) {
        schemaName = identifierMapping.toRemoteSchemaName(identity, connection, schemaName);
        execute(connection, dropSchemaSql(schemaName));
    } catch (SQLException e) {
        throw new TrinoException(JDBC_ERROR, e);
    }
}
Also used : SQLException(java.sql.SQLException) Connection(java.sql.Connection) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) TrinoException(io.trino.spi.TrinoException)

Example 9 with ConnectorIdentity

use of io.trino.spi.security.ConnectorIdentity in project trino by trinodb.

the class PhoenixClient method beginCreateTable.

@Override
public JdbcOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata) {
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schema = schemaTableName.getSchemaName();
    String table = schemaTableName.getTableName();
    if (!getSchemaNames(session).contains(schema)) {
        throw new SchemaNotFoundException(schema);
    }
    try (Connection connection = connectionFactory.openConnection(session)) {
        ConnectorIdentity identity = session.getIdentity();
        schema = getIdentifierMapping().toRemoteSchemaName(identity, connection, schema);
        table = getIdentifierMapping().toRemoteTableName(identity, connection, schema, table);
        schema = toPhoenixSchemaName(schema);
        LinkedList<ColumnMetadata> tableColumns = new LinkedList<>(tableMetadata.getColumns());
        Map<String, Object> tableProperties = tableMetadata.getProperties();
        Optional<Boolean> immutableRows = PhoenixTableProperties.getImmutableRows(tableProperties);
        String immutable = immutableRows.isPresent() && immutableRows.get() ? "IMMUTABLE" : "";
        ImmutableList.Builder<String> columnNames = ImmutableList.builder();
        ImmutableList.Builder<Type> columnTypes = ImmutableList.builder();
        ImmutableList.Builder<String> columnList = ImmutableList.builder();
        Set<ColumnMetadata> rowkeyColumns = tableColumns.stream().filter(col -> isPrimaryKey(col, tableProperties)).collect(toSet());
        ImmutableList.Builder<String> pkNames = ImmutableList.builder();
        Optional<String> rowkeyColumn = Optional.empty();
        if (rowkeyColumns.isEmpty()) {
            // Add a rowkey when not specified in DDL
            columnList.add(ROWKEY + " bigint not null");
            pkNames.add(ROWKEY);
            execute(session, format("CREATE SEQUENCE %s", getEscapedTableName(schema, table + "_sequence")));
            rowkeyColumn = Optional.of(ROWKEY);
        }
        for (ColumnMetadata column : tableColumns) {
            String columnName = getIdentifierMapping().toRemoteColumnName(connection, column.getName());
            columnNames.add(columnName);
            columnTypes.add(column.getType());
            String typeStatement = toWriteMapping(session, column.getType()).getDataType();
            if (rowkeyColumns.contains(column)) {
                typeStatement += " not null";
                pkNames.add(columnName);
            }
            columnList.add(format("%s %s", getEscapedArgument(columnName), typeStatement));
        }
        ImmutableList.Builder<String> tableOptions = ImmutableList.builder();
        PhoenixTableProperties.getSaltBuckets(tableProperties).ifPresent(value -> tableOptions.add(TableProperty.SALT_BUCKETS + "=" + value));
        PhoenixTableProperties.getSplitOn(tableProperties).ifPresent(value -> tableOptions.add("SPLIT ON (" + value.replace('"', '\'') + ")"));
        PhoenixTableProperties.getDisableWal(tableProperties).ifPresent(value -> tableOptions.add(TableProperty.DISABLE_WAL + "=" + value));
        PhoenixTableProperties.getDefaultColumnFamily(tableProperties).ifPresent(value -> tableOptions.add(TableProperty.DEFAULT_COLUMN_FAMILY + "=" + value));
        PhoenixTableProperties.getBloomfilter(tableProperties).ifPresent(value -> tableOptions.add(HColumnDescriptor.BLOOMFILTER + "='" + value + "'"));
        PhoenixTableProperties.getVersions(tableProperties).ifPresent(value -> tableOptions.add(HConstants.VERSIONS + "=" + value));
        PhoenixTableProperties.getMinVersions(tableProperties).ifPresent(value -> tableOptions.add(HColumnDescriptor.MIN_VERSIONS + "=" + value));
        PhoenixTableProperties.getCompression(tableProperties).ifPresent(value -> tableOptions.add(HColumnDescriptor.COMPRESSION + "='" + value + "'"));
        PhoenixTableProperties.getTimeToLive(tableProperties).ifPresent(value -> tableOptions.add(HColumnDescriptor.TTL + "=" + value));
        PhoenixTableProperties.getDataBlockEncoding(tableProperties).ifPresent(value -> tableOptions.add(HColumnDescriptor.DATA_BLOCK_ENCODING + "='" + value + "'"));
        String sql = format("CREATE %s TABLE %s (%s , CONSTRAINT PK PRIMARY KEY (%s)) %s", immutable, getEscapedTableName(schema, table), join(", ", columnList.build()), join(", ", pkNames.build()), join(", ", tableOptions.build()));
        execute(session, sql);
        return new PhoenixOutputTableHandle(schema, table, columnNames.build(), columnTypes.build(), Optional.empty(), rowkeyColumn);
    } catch (SQLException e) {
        if (e.getErrorCode() == SQLExceptionCode.TABLE_ALREADY_EXIST.getErrorCode()) {
            throw new TrinoException(ALREADY_EXISTS, "Phoenix table already exists", e);
        }
        throw new TrinoException(PHOENIX_METADATA_ERROR, "Error creating Phoenix table", e);
    }
}
Also used : UNNECESSARY(java.math.RoundingMode.UNNECESSARY) StandardColumnMappings.varcharColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.varcharColumnMapping) TypeUtils.getArrayElementPhoenixTypeName(io.trino.plugin.phoenix5.TypeUtils.getArrayElementPhoenixTypeName) TypeUtils.jdbcObjectArrayToBlock(io.trino.plugin.phoenix5.TypeUtils.jdbcObjectArrayToBlock) HBaseFactoryProvider(org.apache.phoenix.query.HBaseFactoryProvider) StandardColumnMappings.bigintWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.bigintWriteFunction) PredicatePushdownController(io.trino.plugin.jdbc.PredicatePushdownController) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) StandardColumnMappings.booleanColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.booleanColumnMapping) StandardColumnMappings.defaultVarcharColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.defaultVarcharColumnMapping) ResultSet(java.sql.ResultSet) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) StandardColumnMappings.doubleWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.doubleWriteFunction) DecimalSessionSessionProperties.getDecimalDefaultScale(io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalDefaultScale) MapReduceParallelScanGrouper(org.apache.phoenix.iterate.MapReduceParallelScanGrouper) ENGLISH(java.util.Locale.ENGLISH) PhoenixArray(org.apache.phoenix.schema.types.PhoenixArray) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) LONGNVARCHAR(java.sql.Types.LONGNVARCHAR) ConcatResultIterator(org.apache.phoenix.iterate.ConcatResultIterator) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) TypeHandlingJdbcSessionProperties.getUnsupportedTypeHandling(io.trino.plugin.jdbc.TypeHandlingJdbcSessionProperties.getUnsupportedTypeHandling) PhoenixClientModule.getConnectionProperties(io.trino.plugin.phoenix5.PhoenixClientModule.getConnectionProperties) TIME_WITH_TIMEZONE(java.sql.Types.TIME_WITH_TIMEZONE) FOREVER(org.apache.hadoop.hbase.HConstants.FOREVER) LongWriteFunction(io.trino.plugin.jdbc.LongWriteFunction) Set(java.util.Set) LONGVARCHAR(java.sql.Types.LONGVARCHAR) PreparedStatement(java.sql.PreparedStatement) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) SchemaTableName(io.trino.spi.connector.SchemaTableName) Collectors.joining(java.util.stream.Collectors.joining) LongReadFunction(io.trino.plugin.jdbc.LongReadFunction) ResultIterator(org.apache.phoenix.iterate.ResultIterator) StandardColumnMappings.smallintWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.smallintWriteFunction) StandardColumnMappings.longDecimalWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.longDecimalWriteFunction) ConnectionFactory(io.trino.plugin.jdbc.ConnectionFactory) CONVERT_TO_VARCHAR(io.trino.plugin.jdbc.UnsupportedTypeHandling.CONVERT_TO_VARCHAR) SKIP_REGION_BOUNDARY_CHECK(org.apache.phoenix.coprocessor.BaseScannerRegionObserver.SKIP_REGION_BOUNDARY_CHECK) PhoenixResultSet(org.apache.phoenix.jdbc.PhoenixResultSet) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) ARRAY(java.sql.Types.ARRAY) StandardColumnMappings.doubleColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.doubleColumnMapping) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SchemaUtil(org.apache.phoenix.util.SchemaUtil) QueryConstants(org.apache.phoenix.query.QueryConstants) StandardColumnMappings.booleanWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.booleanWriteFunction) JdbcSplit(io.trino.plugin.jdbc.JdbcSplit) SimpleDateFormat(java.text.SimpleDateFormat) ALLOW_OVERFLOW(io.trino.plugin.jdbc.DecimalConfig.DecimalMapping.ALLOW_OVERFLOW) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) TableProperty(org.apache.phoenix.schema.TableProperty) DatabaseMetaData(java.sql.DatabaseMetaData) StandardColumnMappings.bigintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.bigintColumnMapping) StandardColumnMappings.defaultCharColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.defaultCharColumnMapping) ArrayList(java.util.ArrayList) JDBCType(java.sql.JDBCType) SQLException(java.sql.SQLException) TIMESTAMP_TZ_MILLIS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS) String.join(java.lang.String.join) FULL_PUSHDOWN(io.trino.plugin.jdbc.PredicatePushdownController.FULL_PUSHDOWN) StandardColumnMappings.charWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.charWriteFunction) PreparedQuery(io.trino.plugin.jdbc.PreparedQuery) PName(org.apache.phoenix.schema.PName) StandardColumnMappings.decimalColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.decimalColumnMapping) TableName(org.apache.hadoop.hbase.TableName) StandardColumnMappings.realWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.realWriteFunction) DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) QueryBuilder(io.trino.plugin.jdbc.QueryBuilder) PHOENIX_METADATA_ERROR(io.trino.plugin.phoenix5.PhoenixErrorCode.PHOENIX_METADATA_ERROR) SchemaUtil.getEscapedArgument(org.apache.phoenix.util.SchemaUtil.getEscapedArgument) StandardColumnMappings.smallintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.smallintColumnMapping) IOException(java.io.IOException) DEFAULT_SCHEMA(io.trino.plugin.phoenix5.PhoenixMetadata.DEFAULT_SCHEMA) ConnectorSession(io.trino.spi.connector.ConnectorSession) Scan(org.apache.hadoop.hbase.client.Scan) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) IdentifierMapping(io.trino.plugin.jdbc.mapping.IdentifierMapping) PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) VarbinaryType(io.trino.spi.type.VarbinaryType) Admin(org.apache.hadoop.hbase.client.Admin) CharType(io.trino.spi.type.CharType) DecimalSessionSessionProperties.getDecimalRoundingMode(io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalRoundingMode) MetadataUtil.toPhoenixSchemaName(io.trino.plugin.phoenix5.MetadataUtil.toPhoenixSchemaName) TINYINT(io.trino.spi.type.TinyintType.TINYINT) StandardColumnMappings.shortDecimalWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.shortDecimalWriteFunction) StandardColumnMappings.varbinaryWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.varbinaryWriteFunction) WriteMapping(io.trino.plugin.jdbc.WriteMapping) TypeUtils.toBoxedArray(io.trino.plugin.phoenix5.TypeUtils.toBoxedArray) Connection(java.sql.Connection) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) BiFunction(java.util.function.BiFunction) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) ObjectWriteFunction(io.trino.plugin.jdbc.ObjectWriteFunction) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TypeUtils.getJdbcObjectArray(io.trino.plugin.phoenix5.TypeUtils.getJdbcObjectArray) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Block(io.trino.spi.block.Block) DEFAULT_SCALE(io.trino.spi.type.DecimalType.DEFAULT_SCALE) ColumnMapping(io.trino.plugin.jdbc.ColumnMapping) ALREADY_EXISTS(io.trino.spi.StandardErrorCode.ALREADY_EXISTS) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Collectors.toSet(java.util.stream.Collectors.toSet) NVARCHAR(java.sql.Types.NVARCHAR) ImmutableSet(com.google.common.collect.ImmutableSet) DecimalSessionSessionProperties.getDecimalRounding(io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalRounding) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) DelegatePreparedStatement(org.apache.phoenix.jdbc.DelegatePreparedStatement) Compression(org.apache.hadoop.hbase.io.compress.Compression) MetadataUtil.getEscapedTableName(io.trino.plugin.phoenix5.MetadataUtil.getEscapedTableName) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) StandardColumnMappings.realColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.realColumnMapping) JdbcOutputTableHandle(io.trino.plugin.jdbc.JdbcOutputTableHandle) WriteFunction(io.trino.plugin.jdbc.WriteFunction) String.format(java.lang.String.format) PhoenixRuntime.getTable(org.apache.phoenix.util.PhoenixRuntime.getTable) JdbcSortItem(io.trino.plugin.jdbc.JdbcSortItem) PColumn(org.apache.phoenix.schema.PColumn) List(java.util.List) JdbcTypeHandle(io.trino.plugin.jdbc.JdbcTypeHandle) BIGINT(io.trino.spi.type.BigintType.BIGINT) ScanMetricsHolder(org.apache.phoenix.monitoring.ScanMetricsHolder) LocalDate(java.time.LocalDate) Decimals(io.trino.spi.type.Decimals) Optional(java.util.Optional) Math.max(java.lang.Math.max) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) VARCHAR(java.sql.Types.VARCHAR) ESCAPE_CHARACTER(org.apache.phoenix.util.SchemaUtil.ESCAPE_CHARACTER) SQLExceptionCode(org.apache.phoenix.exception.SQLExceptionCode) PDataType(org.apache.phoenix.schema.types.PDataType) StandardColumnMappings.tinyintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.tinyintColumnMapping) DecimalType(io.trino.spi.type.DecimalType) PeekingResultIterator(org.apache.phoenix.iterate.PeekingResultIterator) StandardColumnMappings.integerColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.integerColumnMapping) Types(java.sql.Types) PhoenixColumnProperties.isPrimaryKey(io.trino.plugin.phoenix5.PhoenixColumnProperties.isPrimaryKey) StandardColumnMappings.varbinaryColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.varbinaryColumnMapping) PHOENIX_QUERY_ERROR(io.trino.plugin.phoenix5.PhoenixErrorCode.PHOENIX_QUERY_ERROR) StatementContext(org.apache.phoenix.compile.StatementContext) SequenceResultIterator(org.apache.phoenix.iterate.SequenceResultIterator) StandardColumnMappings.varcharWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.varcharWriteFunction) StandardColumnMappings.tinyintWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.tinyintWriteFunction) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) TIMESTAMP(java.sql.Types.TIMESTAMP) Inject(javax.inject.Inject) VarcharType(io.trino.spi.type.VarcharType) TIME_WITH_TIME_ZONE(io.trino.spi.type.TimeWithTimeZoneType.TIME_WITH_TIME_ZONE) HConstants(org.apache.hadoop.hbase.HConstants) TIMESTAMP_WITH_TIMEZONE(java.sql.Types.TIMESTAMP_WITH_TIMEZONE) ImmutableList(com.google.common.collect.ImmutableList) QueryPlan(org.apache.phoenix.compile.QueryPlan) Verify.verify(com.google.common.base.Verify.verify) TIME(io.trino.spi.type.TimeType.TIME) BaseJdbcClient(io.trino.plugin.jdbc.BaseJdbcClient) LinkedList(java.util.LinkedList) Bytes(org.apache.hadoop.hbase.util.Bytes) PTable(org.apache.phoenix.schema.PTable) StandardColumnMappings.timeWriteFunctionUsingSqlTime(io.trino.plugin.jdbc.StandardColumnMappings.timeWriteFunctionUsingSqlTime) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) StandardColumnMappings.integerWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.integerWriteFunction) TableResultIterator(org.apache.phoenix.iterate.TableResultIterator) ConnectionQueryServices(org.apache.phoenix.query.ConnectionQueryServices) DEFAULT_PRECISION(io.trino.spi.type.DecimalType.DEFAULT_PRECISION) DISABLE_PUSHDOWN(io.trino.plugin.jdbc.PredicatePushdownController.DISABLE_PUSHDOWN) ObjectReadFunction(io.trino.plugin.jdbc.ObjectReadFunction) DateTimeFormatter(java.time.format.DateTimeFormatter) StringJoiner(java.util.StringJoiner) LookAheadResultIterator(org.apache.phoenix.iterate.LookAheadResultIterator) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SQLException(java.sql.SQLException) ImmutableList(com.google.common.collect.ImmutableList) PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) Connection(java.sql.Connection) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) SchemaTableName(io.trino.spi.connector.SchemaTableName) LinkedList(java.util.LinkedList) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) JDBCType(java.sql.JDBCType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) ArrayType(io.trino.spi.type.ArrayType) PDataType(org.apache.phoenix.schema.types.PDataType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) VarcharType(io.trino.spi.type.VarcharType) TrinoException(io.trino.spi.TrinoException) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException)

Example 10 with ConnectorIdentity

use of io.trino.spi.security.ConnectorIdentity in project trino by trinodb.

the class IcebergPageSourceProvider method createParquetPageSource.

private static ReaderPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long fileSize, List<IcebergColumnHandle> regularColumns, ParquetReaderOptions options, TupleDomain<IcebergColumnHandle> effectivePredicate, FileFormatDataSourceStats fileFormatDataSourceStats, Optional<NameMapping> nameMapping) {
    AggregatedMemoryContext memoryContext = newSimpleAggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
        dataSource = new HdfsParquetDataSource(new ParquetDataSourceId(path.toString()), fileSize, inputStream, fileFormatDataSourceStats, options);
        // extra variable required for lambda below
        ParquetDataSource theDataSource = dataSource;
        ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(identity, () -> MetadataReader.readFooter(theDataSource));
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        if (nameMapping.isPresent() && !ParquetSchemaUtil.hasIds(fileSchema)) {
            // NameMapping conversion is necessary because MetadataReader converts all column names to lowercase and NameMapping is case sensitive
            fileSchema = ParquetSchemaUtil.applyNameMapping(fileSchema, convertToLowercase(nameMapping.get()));
        }
        // Mapping from Iceberg field ID to Parquet fields.
        Map<Integer, org.apache.parquet.schema.Type> parquetIdToField = fileSchema.getFields().stream().filter(field -> field.getId() != null).collect(toImmutableMap(field -> field.getId().intValue(), Function.identity()));
        Optional<ReaderColumns> columnProjections = projectColumns(regularColumns);
        List<IcebergColumnHandle> readColumns = columnProjections.map(readerColumns -> (List<IcebergColumnHandle>) readerColumns.get().stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList())).orElse(regularColumns);
        List<org.apache.parquet.schema.Type> parquetFields = readColumns.stream().map(column -> parquetIdToField.get(column.getId())).collect(toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), parquetFields.stream().filter(Objects::nonNull).collect(toImmutableList()));
        Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
        TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
        Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath, UTC);
        List<BlockMetaData> blocks = new ArrayList<>();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (start <= firstDataPage && firstDataPage < start + length && predicateMatches(parquetPredicate, block, dataSource, descriptorsByPath, parquetTupleDomain)) {
                blocks.add(block);
            }
        }
        MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
        ParquetReader parquetReader = new ParquetReader(Optional.ofNullable(fileMetaData.getCreatedBy()), messageColumnIO, blocks, Optional.empty(), dataSource, UTC, memoryContext, options);
        ImmutableList.Builder<Type> trinoTypes = ImmutableList.builder();
        ImmutableList.Builder<Optional<Field>> internalFields = ImmutableList.builder();
        for (int columnIndex = 0; columnIndex < readColumns.size(); columnIndex++) {
            IcebergColumnHandle column = readColumns.get(columnIndex);
            org.apache.parquet.schema.Type parquetField = parquetFields.get(columnIndex);
            Type trinoType = column.getBaseType();
            trinoTypes.add(trinoType);
            if (parquetField == null) {
                internalFields.add(Optional.empty());
            } else {
                // The top level columns are already mapped by name/id appropriately.
                ColumnIO columnIO = messageColumnIO.getChild(parquetField.getName());
                internalFields.add(IcebergParquetColumnIOConverter.constructField(new FieldContext(trinoType, column.getColumnIdentity()), columnIO));
            }
        }
        return new ReaderPageSource(new ParquetPageSource(parquetReader, trinoTypes.build(), internalFields.build()), columnProjections);
    } catch (IOException | RuntimeException e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof TrinoException) {
            throw (TrinoException) e;
        }
        String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e instanceof ParquetCorruptionException) {
            throw new TrinoException(ICEBERG_BAD_DATA, message, e);
        }
        if (e instanceof BlockMissingException) {
            throw new TrinoException(ICEBERG_MISSING_DATA, message, e);
        }
        throw new TrinoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ORC_ICEBERG_ID_KEY(io.trino.plugin.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ICEBERG_CANNOT_OPEN_SPLIT(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) UUID(io.trino.spi.type.UuidType.UUID) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) ICEBERG_FILESYSTEM_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) Set(java.util.Set) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnIO(org.apache.parquet.io.ColumnIO) IcebergSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) MappedField(org.apache.iceberg.mapping.MappedField) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) IcebergSessionProperties.isOrcNestedLazy(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy) IcebergSessionProperties.getOrcMaxBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) HdfsParquetDataSource(io.trino.plugin.hive.parquet.HdfsParquetDataSource) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) IcebergSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) ICEBERG_MISSING_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) MappedFields(org.apache.iceberg.mapping.MappedFields) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) OrcType(io.trino.orc.metadata.OrcType) Predicate(io.trino.parquet.predicate.Predicate) IcebergSessionProperties.isUseFileSizeFromMetadata(io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata) MapType(io.trino.spi.type.MapType) PredicateUtils.predicateMatches(io.trino.parquet.predicate.PredicateUtils.predicateMatches) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) StandardTypes(io.trino.spi.type.StandardTypes) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IcebergSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) IcebergSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) UTC(org.joda.time.DateTimeZone.UTC) Field(io.trino.parquet.Field) Traverser(com.google.common.graph.Traverser) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ProjectedLayout(io.trino.orc.OrcReader.ProjectedLayout) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) IcebergSessionProperties.getOrcStreamBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetSchemaUtil(org.apache.iceberg.parquet.ParquetSchemaUtil) OrcColumn(io.trino.orc.OrcColumn) PredicateUtils.buildPredicate(io.trino.parquet.predicate.PredicateUtils.buildPredicate) MetadataReader(io.trino.parquet.reader.MetadataReader) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) OrcRecordReader(io.trino.orc.OrcRecordReader) NameMapping(org.apache.iceberg.mapping.NameMapping) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) RowType(io.trino.spi.type.RowType) ICEBERG_BAD_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) ParquetReader(io.trino.parquet.reader.ParquetReader) FieldContext(io.trino.plugin.iceberg.IcebergParquetColumnIOConverter.FieldContext) ICEBERG_CURSOR_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) ParquetTypeUtils.getColumnIO(io.trino.parquet.ParquetTypeUtils.getColumnIO) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) OrcDataSourceId(io.trino.orc.OrcDataSourceId) MessageType(org.apache.parquet.schema.MessageType) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) IcebergSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) Function(java.util.function.Function) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) IcebergSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) HdfsOrcDataSource(io.trino.plugin.hive.orc.HdfsOrcDataSource) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) OrcReader(io.trino.orc.OrcReader) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ICEBERG_BINARY_TYPE(io.trino.plugin.iceberg.TypeConverter.ICEBERG_BINARY_TYPE) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) OrcCorruptionException(io.trino.orc.OrcCorruptionException) Collectors.toList(java.util.stream.Collectors.toList) ParquetTypeUtils.getDescriptors(io.trino.parquet.ParquetTypeUtils.getDescriptors) ParquetDataSource(io.trino.parquet.ParquetDataSource) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ArrayList(java.util.ArrayList) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Predicate(io.trino.parquet.predicate.Predicate) PredicateUtils.buildPredicate(io.trino.parquet.predicate.PredicateUtils.buildPredicate) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) FileSystem(org.apache.hadoop.fs.FileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) MessageType(org.apache.parquet.schema.MessageType) Optional(java.util.Optional) ParquetReader(io.trino.parquet.reader.ParquetReader) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ColumnIO(org.apache.parquet.io.ColumnIO) ParquetTypeUtils.getColumnIO(io.trino.parquet.ParquetTypeUtils.getColumnIO) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Objects(java.util.Objects) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) HdfsParquetDataSource(io.trino.plugin.hive.parquet.HdfsParquetDataSource) ParquetDataSource(io.trino.parquet.ParquetDataSource) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) FieldContext(io.trino.plugin.iceberg.IcebergParquetColumnIOConverter.FieldContext) IOException(java.io.IOException) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) HdfsParquetDataSource(io.trino.plugin.hive.parquet.HdfsParquetDataSource) OrcType(io.trino.orc.metadata.OrcType) MapType(io.trino.spi.type.MapType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) MessageType(org.apache.parquet.schema.MessageType) Type(io.trino.spi.type.Type) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) TrinoException(io.trino.spi.TrinoException) ReaderColumns(io.trino.plugin.hive.ReaderColumns)

Aggregations

ConnectorIdentity (io.trino.spi.security.ConnectorIdentity)21 TrinoException (io.trino.spi.TrinoException)16 Type (io.trino.spi.type.Type)12 Map (java.util.Map)11 Optional (java.util.Optional)11 ImmutableList (com.google.common.collect.ImmutableList)10 Connection (java.sql.Connection)10 SchemaTableName (io.trino.spi.connector.SchemaTableName)9 List (java.util.List)9 Set (java.util.Set)9 Objects.requireNonNull (java.util.Objects.requireNonNull)8 Inject (javax.inject.Inject)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 ImmutableSet (com.google.common.collect.ImmutableSet)7 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)7 NOT_SUPPORTED (io.trino.spi.StandardErrorCode.NOT_SUPPORTED)7 String.format (java.lang.String.format)7 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)6 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)6 ConnectorSession (io.trino.spi.connector.ConnectorSession)6