Search in sources :

Example 21 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class TestRubixCaching method getCachingFileSystem.

private FileSystem getCachingFileSystem(HdfsContext context, Path path) throws IOException {
    HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of());
    HiveHdfsConfiguration configuration = new HiveHdfsConfiguration(configurationInitializer, ImmutableSet.of(rubixConfigInitializer, (dynamicConfig, ignoredContext, ignoredUri) -> {
        dynamicConfig.set("fs.file.impl", CachingLocalFileSystem.class.getName());
        dynamicConfig.setBoolean("fs.gs.lazy.init.enable", true);
        dynamicConfig.set("fs.azure.account.key", "Zm9vCg==");
        dynamicConfig.set("fs.adl.oauth2.client.id", "test");
        dynamicConfig.set("fs.adl.oauth2.refresh.url", "http://localhost");
        dynamicConfig.set("fs.adl.oauth2.credential", "password");
    }));
    HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
    return environment.getFileSystem(context, path);
}
Also used : Arrays(java.util.Arrays) Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) BlockLocation(org.apache.hadoop.fs.BlockLocation) FileSystem(org.apache.hadoop.fs.FileSystem) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) Test(org.testng.annotations.Test) Random(java.util.Random) ReadMode(io.trino.plugin.hive.rubix.RubixConfig.ReadMode) FileStatus(org.apache.hadoop.fs.FileStatus) AfterMethod(org.testng.annotations.AfterMethod) Duration(io.airlift.units.Duration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) ASYNC(io.trino.plugin.hive.rubix.RubixConfig.ReadMode.ASYNC) Future(java.util.concurrent.Future) InetAddress.getLocalHost(java.net.InetAddress.getLocalHost) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Path(org.apache.hadoop.fs.Path) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) URI(java.net.URI) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ImmutableSet(com.google.common.collect.ImmutableSet) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) TestingNodeManager(io.trino.testing.TestingNodeManager) Collections.nCopies(java.util.Collections.nCopies) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) BeforeMethod(org.testng.annotations.BeforeMethod) PropertyMetadata(io.trino.spi.session.PropertyMetadata) ObjectName(javax.management.ObjectName) Files.createDirectories(java.nio.file.Files.createDirectories) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) CachingPrestoAzureBlobFileSystem(com.qubole.rubix.prestosql.CachingPrestoAzureBlobFileSystem) HdfsAuthenticationConfig(io.trino.plugin.hive.authentication.HdfsAuthenticationConfig) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) UNKNOWN(io.trino.client.NodeVersion.UNKNOWN) HdfsConfig(io.trino.plugin.hive.HdfsConfig) ByteStreams(com.google.common.io.ByteStreams) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) Joiner(com.google.common.base.Joiner) DataProvider(org.testng.annotations.DataProvider) READ_THROUGH(io.trino.plugin.hive.rubix.RubixConfig.ReadMode.READ_THROUGH) CachingPrestoDistributedFileSystem(com.qubole.rubix.prestosql.CachingPrestoDistributedFileSystem) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) Callable(java.util.concurrent.Callable) CachingPrestoGoogleHadoopFileSystem(com.qubole.rubix.prestosql.CachingPrestoGoogleHadoopFileSystem) CachingPrestoSecureAzureBlobFileSystem(com.qubole.rubix.prestosql.CachingPrestoSecureAzureBlobFileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) FilterFileSystem(org.apache.hadoop.fs.FilterFileSystem) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Closer(com.google.common.io.Closer) CachingPrestoAdlFileSystem(com.qubole.rubix.prestosql.CachingPrestoAdlFileSystem) MBeanServer(javax.management.MBeanServer) ManagementFactory(java.lang.management.ManagementFactory) ExecutorService(java.util.concurrent.ExecutorService) Node(io.trino.spi.Node) AfterClass(org.testng.annotations.AfterClass) RetryDriver.retry(io.trino.plugin.hive.util.RetryDriver.retry) CachingFileSystem(com.qubole.rubix.core.CachingFileSystem) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DefaultRubixHdfsInitializer(io.trino.plugin.hive.rubix.RubixModule.DefaultRubixHdfsInitializer) IOException(java.io.IOException) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) CatalogName(io.trino.plugin.base.CatalogName) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) TestingConnectorSession(io.trino.testing.TestingConnectorSession) InternalNode(io.trino.metadata.InternalNode) Assert.assertEventually(io.trino.testing.assertions.Assert.assertEventually) CacheConfig.setRemoteFetchProcessInterval(com.qubole.rubix.spi.CacheConfig.setRemoteFetchProcessInterval) Assert.assertTrue(org.testng.Assert.assertTrue) HiveConfig(io.trino.plugin.hive.HiveConfig) SECONDS(java.util.concurrent.TimeUnit.SECONDS) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment)

Example 22 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class DeltaLakeMetadata method createTable.

@Override
public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) {
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    Database schema = metastore.getDatabase(schemaName).orElseThrow(() -> new SchemaNotFoundException(schemaName));
    boolean external = true;
    String location = getLocation(tableMetadata.getProperties());
    if (location == null) {
        Optional<String> schemaLocation = getSchemaLocation(schema);
        if (schemaLocation.isEmpty()) {
            throw new TrinoException(NOT_SUPPORTED, "The 'location' property must be specified either for the table or the schema");
        }
        location = new Path(schemaLocation.get(), tableName).toString();
        checkPathContainsNoFiles(session, new Path(location));
        external = false;
    }
    Path targetPath = new Path(location);
    ensurePathExists(session, targetPath);
    Path deltaLogDirectory = getTransactionLogDir(targetPath);
    Optional<Long> checkpointInterval = DeltaLakeTableProperties.getCheckpointInterval(tableMetadata.getProperties());
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), targetPath);
        if (!fileSystem.exists(deltaLogDirectory)) {
            validateTableColumns(tableMetadata);
            List<String> partitionColumns = getPartitionedBy(tableMetadata.getProperties());
            List<DeltaLakeColumnHandle> deltaLakeColumns = tableMetadata.getColumns().stream().map(column -> toColumnHandle(column, partitionColumns)).collect(toImmutableList());
            TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriterWithoutTransactionIsolation(session, targetPath.toString());
            appendInitialTableEntries(transactionLogWriter, deltaLakeColumns, partitionColumns, buildDeltaMetadataConfiguration(checkpointInterval), CREATE_TABLE_OPERATION, session, nodeVersion, nodeId);
            setRollback(() -> deleteRecursivelyIfExists(new HdfsContext(session), hdfsEnvironment, deltaLogDirectory));
            transactionLogWriter.flush();
        }
    } catch (IOException e) {
        throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Unable to access file system for: " + location, e);
    }
    Table.Builder tableBuilder = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(Optional.of(session.getUser())).setTableType(external ? EXTERNAL_TABLE.name() : MANAGED_TABLE.name()).setDataColumns(DUMMY_DATA_COLUMNS).setParameters(deltaTableProperties(session, location, external));
    setDeltaStorageFormat(tableBuilder, location, targetPath);
    Table table = tableBuilder.build();
    PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow());
    metastore.createTable(session, table, principalPrivileges);
}
Also used : Path(org.apache.hadoop.fs.Path) TransactionLogUtil.getTransactionLogDir(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir) FileSystem(org.apache.hadoop.fs.FileSystem) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) DeltaLakeSchemaSupport.validateType(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TransactionLogWriterFactory(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ValueSet.ofRanges(io.trino.spi.predicate.ValueSet.ofRanges) Column(io.trino.plugin.hive.metastore.Column) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) PARTITIONED_BY_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.PARTITIONED_BY_PROPERTY) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TABLE_PROVIDER_PROPERTY(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY) HiveWriteUtils.pathExists(io.trino.plugin.hive.util.HiveWriteUtils.pathExists) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) HyperLogLog(io.airlift.stats.cardinality.HyperLogLog) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) Predicate.not(java.util.function.Predicate.not) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.ANALYZE_COLUMNS_PROPERTY) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) TransactionLogParser.getMandatoryCurrentVersion(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.getMandatoryCurrentVersion) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeTableProcedureId(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) DeltaLakeTableProperties.getLocation(io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation) Range.greaterThanOrEqual(io.trino.spi.predicate.Range.greaterThanOrEqual) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) HiveType(io.trino.plugin.hive.HiveType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) DeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.DeltaLakeStatisticsAccess) DeltaLakeSchemaSupport.extractPartitionColumns(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) DeltaLakeSchemaSupport.serializeStatsAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson) Nullable(javax.annotation.Nullable) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) MapType(io.trino.spi.type.MapType) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) CheckpointWriterManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriterManager) ROW_ID_COLUMN_TYPE(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_TYPE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) DeltaLakeSessionProperties.isTableStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled) LOCATION_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.LOCATION_PROPERTY) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) TINYINT(io.trino.spi.type.TinyintType.TINYINT) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) NodeManager(io.trino.spi.NodeManager) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) SYNTHESIZED(io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED) TABLE_PROVIDER_VALUE(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) HyperLogLogType(io.trino.spi.type.HyperLogLogType) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) Range.range(io.trino.spi.predicate.Range.range) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) Collection(java.util.Collection) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Instant(java.time.Instant) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ROW_ID_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) Assignment(io.trino.spi.connector.Assignment) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) DecimalType(io.trino.spi.type.DecimalType) OPTIMIZE(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE) JsonCodec(io.airlift.json.JsonCodec) Comparators(com.google.common.collect.Comparators) Constraint(io.trino.spi.connector.Constraint) Range.lessThanOrEqual(io.trino.spi.predicate.Range.lessThanOrEqual) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Logger(io.airlift.log.Logger) DeltaLakeSchemaSupport.serializeSchemaAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeSchemaAsJson) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) CHECKPOINT_INTERVAL_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY) StorageFormat.create(io.trino.plugin.hive.metastore.StorageFormat.create) MetadataEntry.buildDeltaMetadataConfiguration(io.trino.plugin.deltalake.transactionlog.MetadataEntry.buildDeltaMetadataConfiguration) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TupleDomain(io.trino.spi.predicate.TupleDomain) DeltaLakeTableProperties.getPartitionedBy(io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SchemaTableName.schemaTableName(io.trino.spi.connector.SchemaTableName.schemaTableName) UUID.randomUUID(java.util.UUID.randomUUID) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) NUMBER_OF_DISTINCT_VALUES_SUMMARY(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES_SUMMARY) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) IOException(java.io.IOException) SchemaTableName(io.trino.spi.connector.SchemaTableName) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) FileSystem(org.apache.hadoop.fs.FileSystem) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) Database(io.trino.plugin.hive.metastore.Database) TrinoException(io.trino.spi.TrinoException) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext)

Example 23 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class DeltaLakePageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    DeltaLakeSplit split = (DeltaLakeSplit) connectorSplit;
    DeltaLakeTableHandle table = (DeltaLakeTableHandle) connectorTable;
    // We reach here when we could not prune the split using file level stats, table predicate
    // and the dynamic filter in the coordinator during split generation. The file level stats
    // in DeltaLakeSplit#filePredicate could help to prune this split when a more selective dynamic filter
    // is available now, without having to access parquet file footer for row-group stats.
    // We avoid sending DeltaLakeSplit#splitPredicate to workers by using table.getPredicate() here.
    TupleDomain<DeltaLakeColumnHandle> filteredSplitPredicate = TupleDomain.intersect(ImmutableList.of(table.getNonPartitionConstraint(), split.getStatisticsPredicate(), dynamicFilter.getCurrentPredicate().transformKeys(DeltaLakeColumnHandle.class::cast)));
    if (filteredSplitPredicate.isNone()) {
        return new EmptyPageSource();
    }
    List<DeltaLakeColumnHandle> deltaLakeColumns = columns.stream().map(DeltaLakeColumnHandle.class::cast).collect(toImmutableList());
    Map<String, Optional<String>> partitionKeys = split.getPartitionKeys();
    List<DeltaLakeColumnHandle> regularColumns = deltaLakeColumns.stream().filter(column -> column.getColumnType() == REGULAR).collect(toImmutableList());
    List<HiveColumnHandle> hiveColumnHandles = regularColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList());
    Path path = new Path(split.getPath());
    HdfsContext hdfsContext = new HdfsContext(session);
    TupleDomain<HiveColumnHandle> parquetPredicate = getParquetTupleDomain(filteredSplitPredicate.simplify(domainCompactionThreshold));
    if (table.getWriteType().isPresent()) {
        return new DeltaLakeUpdatablePageSource(table, deltaLakeColumns, partitionKeys, split.getPath(), split.getFileSize(), split.getFileModifiedTime(), session, executorService, hdfsEnvironment, hdfsContext, parquetDateTimeZone, parquetReaderOptions, parquetPredicate, typeManager, updateResultJsonCodec);
    }
    ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource(path, split.getStart(), split.getLength(), split.getFileSize(), hiveColumnHandles, parquetPredicate, true, hdfsEnvironment, hdfsEnvironment.getConfiguration(hdfsContext, path), session.getIdentity(), parquetDateTimeZone, fileFormatDataSourceStats, parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)).withUseColumnIndex(isParquetUseColumnIndex(session)));
    verify(pageSource.getReaderColumns().isEmpty(), "All columns expected to be base columns");
    return new DeltaLakePageSource(deltaLakeColumns, partitionKeys, pageSource.get(), split.getPath(), split.getFileSize(), split.getFileModifiedTime());
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) HiveSessionProperties.isParquetUseColumnIndex(io.trino.plugin.hive.HiveSessionProperties.isParquetUseColumnIndex) Inject(javax.inject.Inject) ParquetPageSourceFactory(io.trino.plugin.hive.parquet.ParquetPageSourceFactory) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) Path(org.apache.hadoop.fs.Path) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ExecutorService(java.util.concurrent.ExecutorService) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ImmutableMap(com.google.common.collect.ImmutableMap) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) StandardTypes(io.trino.spi.type.StandardTypes) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DeltaLakeSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetMaxReadBlockSize) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) TypeManager(io.trino.spi.type.TypeManager) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) JsonCodec(io.airlift.json.JsonCodec) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 24 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class TestIcebergTableWithExternalLocation method createQueryRunner.

@Override
protected DistributedQueryRunner createQueryRunner() throws Exception {
    metastoreDir = Files.createTempDirectory("test_iceberg").toFile();
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
    hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    FileHiveMetastoreConfig config = new FileHiveMetastoreConfig().setCatalogDirectory(metastoreDir.toURI().toString()).setMetastoreUser("test");
    hdfsContext = new HdfsContext(ConnectorIdentity.ofUser(config.getMetastoreUser()));
    metastore = new FileHiveMetastore(new NodeVersion("testversion"), hdfsEnvironment, new MetastoreConfig(), config);
    return createIcebergQueryRunner(ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(), Optional.of(metastoreDir));
}
Also used : FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) NodeVersion(io.trino.plugin.hive.NodeVersion) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) HdfsConfig(io.trino.plugin.hive.HdfsConfig) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment)

Example 25 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class TestIcebergV2 method createQueryRunner.

@Override
protected QueryRunner createQueryRunner() throws Exception {
    HdfsConfig config = new HdfsConfig();
    HdfsConfiguration configuration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(config), ImmutableSet.of());
    hdfsEnvironment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
    tempDir = Files.createTempDirectory("test_iceberg_v2");
    metastoreDir = tempDir.resolve("iceberg_data").toFile();
    metastore = createTestingFileHiveMetastore(metastoreDir);
    return createIcebergQueryRunner(ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(NATION), Optional.of(metastoreDir));
}
Also used : HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfig(io.trino.plugin.hive.HdfsConfig) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment)

Aggregations

HdfsEnvironment (io.trino.plugin.hive.HdfsEnvironment)35 HdfsConfigurationInitializer (io.trino.plugin.hive.HdfsConfigurationInitializer)23 HiveHdfsConfiguration (io.trino.plugin.hive.HiveHdfsConfiguration)23 NoHdfsAuthentication (io.trino.plugin.hive.authentication.NoHdfsAuthentication)23 HdfsConfig (io.trino.plugin.hive.HdfsConfig)22 HdfsConfiguration (io.trino.plugin.hive.HdfsConfiguration)19 FileSystem (org.apache.hadoop.fs.FileSystem)14 Path (org.apache.hadoop.fs.Path)14 NodeVersion (io.trino.plugin.hive.NodeVersion)12 IOException (java.io.IOException)12 FileFormatDataSourceStats (io.trino.plugin.hive.FileFormatDataSourceStats)11 MetastoreConfig (io.trino.plugin.hive.metastore.MetastoreConfig)11 ImmutableList (com.google.common.collect.ImmutableList)10 ImmutableMap (com.google.common.collect.ImmutableMap)10 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)10 FileHiveMetastore (io.trino.plugin.hive.metastore.file.FileHiveMetastore)10 FileHiveMetastoreConfig (io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig)10 List (java.util.List)10 Optional (java.util.Optional)10 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)9