Search in sources :

Example 1 with HdfsConfiguration

use of io.trino.plugin.hive.HdfsConfiguration in project trino by trinodb.

the class TestCheckpointEntryIterator method setUp.

@BeforeClass
public void setUp() {
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
    hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    checkpointSchemaManager = new CheckpointSchemaManager(TESTING_TYPE_MANAGER);
}
Also used : HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfig(io.trino.plugin.hive.HdfsConfig) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) BeforeClass(org.testng.annotations.BeforeClass)

Example 2 with HdfsConfiguration

use of io.trino.plugin.hive.HdfsConfiguration in project trino by trinodb.

the class TestCheckpointWriter method setUp.

@BeforeClass
public void setUp() {
    checkpointSchemaManager = new CheckpointSchemaManager(typeManager);
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), Set.of());
    hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    HiveSessionProperties hiveSessionProperties = getHiveSessionProperties(new HiveConfig());
    session = TestingConnectorSession.builder().setPropertyMetadata(hiveSessionProperties.getSessionProperties()).build();
}
Also used : HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfig(io.trino.plugin.hive.HdfsConfig) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HiveSessionProperties(io.trino.plugin.hive.HiveSessionProperties) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) HiveConfig(io.trino.plugin.hive.HiveConfig) BeforeClass(org.testng.annotations.BeforeClass)

Example 3 with HdfsConfiguration

use of io.trino.plugin.hive.HdfsConfiguration in project trino by trinodb.

the class TestDeltaLakeFileStatistics method testParseParquetStatistics.

@Test
public void testParseParquetStatistics() throws Exception {
    File statsFile = new File(getClass().getResource("/databricks/pruning/parquet_struct_statistics/_delta_log/00000000000000000010.checkpoint.parquet").getFile());
    Path checkpointPath = new Path(statsFile.toURI());
    TypeManager typeManager = TESTING_TYPE_MANAGER;
    CheckpointSchemaManager checkpointSchemaManager = new CheckpointSchemaManager(typeManager);
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    FileSystem fs = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(SESSION), checkpointPath);
    CheckpointEntryIterator metadataEntryIterator = new CheckpointEntryIterator(checkpointPath, SESSION, fs.getFileStatus(checkpointPath).getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(METADATA), Optional.empty(), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true);
    MetadataEntry metadataEntry = getOnlyElement(metadataEntryIterator).getMetaData();
    CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator(checkpointPath, SESSION, fs.getFileStatus(checkpointPath).getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(CheckpointEntryIterator.EntryType.ADD), Optional.of(metadataEntry), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true);
    DeltaLakeTransactionLogEntry matchingAddFileEntry = null;
    while (checkpointEntryIterator.hasNext()) {
        DeltaLakeTransactionLogEntry entry = checkpointEntryIterator.next();
        if (entry.getAdd() != null && entry.getAdd().getPath().contains("part-00000-17951bea-0d04-43c1-979c-ea1fac19b382-c000.snappy.parquet")) {
            assertNull(matchingAddFileEntry);
            matchingAddFileEntry = entry;
        }
    }
    assertNotNull(matchingAddFileEntry);
    assertThat(matchingAddFileEntry.getAdd().getStats()).isPresent();
    testStatisticsValues(matchingAddFileEntry.getAdd().getStats().get());
}
Also used : Path(org.apache.hadoop.fs.Path) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) DeltaLakeTransactionLogEntry(io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry) HdfsConfig(io.trino.plugin.hive.HdfsConfig) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) CheckpointEntryIterator(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) CheckpointSchemaManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager) FileSystem(org.apache.hadoop.fs.FileSystem) TypeManager(io.trino.spi.type.TypeManager) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) File(java.io.File) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) Test(org.testng.annotations.Test)

Example 4 with HdfsConfiguration

use of io.trino.plugin.hive.HdfsConfiguration in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method setupMetastore.

@BeforeClass
public void setupMetastore() {
    TestingConnectorContext context = new TestingConnectorContext();
    TypeManager typeManager = context.getTypeManager();
    CheckpointSchemaManager checkpointSchemaManager = new CheckpointSchemaManager(typeManager);
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    FileFormatDataSourceStats fileFormatDataSourceStats = new FileFormatDataSourceStats();
    TransactionLogAccess transactionLogAccess = new TransactionLogAccess(typeManager, checkpointSchemaManager, new DeltaLakeConfig(), fileFormatDataSourceStats, hdfsEnvironment, new ParquetReaderConfig(), new DeltaLakeConfig());
    File tmpDir = Files.createTempDir();
    File metastoreDir = new File(tmpDir, "metastore");
    hiveMetastore = new FileHiveMetastore(new NodeVersion("test_version"), hdfsEnvironment, new MetastoreConfig(), new FileHiveMetastoreConfig().setCatalogDirectory(metastoreDir.toURI().toString()).setMetastoreUser("test"));
    hiveMetastore.createDatabase(new Database("db_name", Optional.empty(), Optional.of("test"), Optional.of(PrincipalType.USER), Optional.empty(), ImmutableMap.of()));
    CachingDeltaLakeStatisticsAccess statistics = new CachingDeltaLakeStatisticsAccess(new MetaDirStatisticsAccess(hdfsEnvironment, new JsonCodecFactory().jsonCodec(DeltaLakeStatistics.class)));
    deltaLakeMetastore = new HiveMetastoreBackedDeltaLakeMetastore(hiveMetastore, transactionLogAccess, typeManager, statistics);
}
Also used : DeltaLakeConfig(io.trino.plugin.deltalake.DeltaLakeConfig) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) MetaDirStatisticsAccess(io.trino.plugin.deltalake.statistics.MetaDirStatisticsAccess) HdfsConfig(io.trino.plugin.hive.HdfsConfig) TransactionLogAccess(io.trino.plugin.deltalake.transactionlog.TransactionLogAccess) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) TestingConnectorContext(io.trino.testing.TestingConnectorContext) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) NodeVersion(io.trino.plugin.hive.NodeVersion) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) CheckpointSchemaManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager) Database(io.trino.plugin.hive.metastore.Database) TypeManager(io.trino.spi.type.TypeManager) CachingDeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.CachingDeltaLakeStatisticsAccess) File(java.io.File) JsonCodecFactory(io.airlift.json.JsonCodecFactory) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) BeforeClass(org.testng.annotations.BeforeClass)

Example 5 with HdfsConfiguration

use of io.trino.plugin.hive.HdfsConfiguration in project trino by trinodb.

the class TestHivePlans method createLocalQueryRunner.

@Override
protected LocalQueryRunner createLocalQueryRunner() {
    baseDir = Files.createTempDir();
    HdfsConfig config = new HdfsConfig();
    HdfsConfiguration configuration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(config), ImmutableSet.of());
    HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
    HiveMetastore metastore = new FileHiveMetastore(new NodeVersion("test_version"), environment, new MetastoreConfig(), new FileHiveMetastoreConfig().setCatalogDirectory(baseDir.toURI().toString()).setMetastoreUser("test"));
    Database database = Database.builder().setDatabaseName(SCHEMA_NAME).setOwnerName(Optional.of("public")).setOwnerType(Optional.of(PrincipalType.ROLE)).build();
    metastore.createDatabase(database);
    return createQueryRunner(HIVE_SESSION, metastore);
}
Also used : NodeVersion(io.trino.plugin.hive.NodeVersion) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) HdfsConfig(io.trino.plugin.hive.HdfsConfig) Database(io.trino.plugin.hive.metastore.Database) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment)

Aggregations

HdfsConfig (io.trino.plugin.hive.HdfsConfig)19 HdfsConfiguration (io.trino.plugin.hive.HdfsConfiguration)19 HdfsConfigurationInitializer (io.trino.plugin.hive.HdfsConfigurationInitializer)19 HdfsEnvironment (io.trino.plugin.hive.HdfsEnvironment)19 HiveHdfsConfiguration (io.trino.plugin.hive.HiveHdfsConfiguration)19 NoHdfsAuthentication (io.trino.plugin.hive.authentication.NoHdfsAuthentication)19 NodeVersion (io.trino.plugin.hive.NodeVersion)11 MetastoreConfig (io.trino.plugin.hive.metastore.MetastoreConfig)11 FileHiveMetastore (io.trino.plugin.hive.metastore.file.FileHiveMetastore)10 FileHiveMetastoreConfig (io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig)10 File (java.io.File)7 HiveMetastore (io.trino.plugin.hive.metastore.HiveMetastore)6 CheckpointSchemaManager (io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager)5 FileFormatDataSourceStats (io.trino.plugin.hive.FileFormatDataSourceStats)4 Database (io.trino.plugin.hive.metastore.Database)4 ParquetReaderConfig (io.trino.plugin.hive.parquet.ParquetReaderConfig)4 DistributedQueryRunner (io.trino.testing.DistributedQueryRunner)4 CatalogName (io.trino.plugin.base.CatalogName)3 FileMetastoreTableOperationsProvider (io.trino.plugin.iceberg.catalog.file.FileMetastoreTableOperationsProvider)3 TrinoHiveCatalog (io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog)3