Search in sources :

Example 6 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class TestCheckpointWriter method setUp.

@BeforeClass
public void setUp() {
    checkpointSchemaManager = new CheckpointSchemaManager(typeManager);
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), Set.of());
    hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    HiveSessionProperties hiveSessionProperties = getHiveSessionProperties(new HiveConfig());
    session = TestingConnectorSession.builder().setPropertyMetadata(hiveSessionProperties.getSessionProperties()).build();
}
Also used : HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfig(io.trino.plugin.hive.HdfsConfig) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HiveSessionProperties(io.trino.plugin.hive.HiveSessionProperties) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) HiveConfig(io.trino.plugin.hive.HiveConfig) BeforeClass(org.testng.annotations.BeforeClass)

Example 7 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class TestDeltaLakeFileStatistics method testParseParquetStatistics.

@Test
public void testParseParquetStatistics() throws Exception {
    File statsFile = new File(getClass().getResource("/databricks/pruning/parquet_struct_statistics/_delta_log/00000000000000000010.checkpoint.parquet").getFile());
    Path checkpointPath = new Path(statsFile.toURI());
    TypeManager typeManager = TESTING_TYPE_MANAGER;
    CheckpointSchemaManager checkpointSchemaManager = new CheckpointSchemaManager(typeManager);
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    FileSystem fs = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(SESSION), checkpointPath);
    CheckpointEntryIterator metadataEntryIterator = new CheckpointEntryIterator(checkpointPath, SESSION, fs.getFileStatus(checkpointPath).getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(METADATA), Optional.empty(), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true);
    MetadataEntry metadataEntry = getOnlyElement(metadataEntryIterator).getMetaData();
    CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator(checkpointPath, SESSION, fs.getFileStatus(checkpointPath).getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(CheckpointEntryIterator.EntryType.ADD), Optional.of(metadataEntry), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true);
    DeltaLakeTransactionLogEntry matchingAddFileEntry = null;
    while (checkpointEntryIterator.hasNext()) {
        DeltaLakeTransactionLogEntry entry = checkpointEntryIterator.next();
        if (entry.getAdd() != null && entry.getAdd().getPath().contains("part-00000-17951bea-0d04-43c1-979c-ea1fac19b382-c000.snappy.parquet")) {
            assertNull(matchingAddFileEntry);
            matchingAddFileEntry = entry;
        }
    }
    assertNotNull(matchingAddFileEntry);
    assertThat(matchingAddFileEntry.getAdd().getStats()).isPresent();
    testStatisticsValues(matchingAddFileEntry.getAdd().getStats().get());
}
Also used : Path(org.apache.hadoop.fs.Path) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) DeltaLakeTransactionLogEntry(io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry) HdfsConfig(io.trino.plugin.hive.HdfsConfig) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) CheckpointEntryIterator(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) CheckpointSchemaManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager) FileSystem(org.apache.hadoop.fs.FileSystem) TypeManager(io.trino.spi.type.TypeManager) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) File(java.io.File) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) Test(org.testng.annotations.Test)

Example 8 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method setupMetastore.

@BeforeClass
public void setupMetastore() {
    TestingConnectorContext context = new TestingConnectorContext();
    TypeManager typeManager = context.getTypeManager();
    CheckpointSchemaManager checkpointSchemaManager = new CheckpointSchemaManager(typeManager);
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    FileFormatDataSourceStats fileFormatDataSourceStats = new FileFormatDataSourceStats();
    TransactionLogAccess transactionLogAccess = new TransactionLogAccess(typeManager, checkpointSchemaManager, new DeltaLakeConfig(), fileFormatDataSourceStats, hdfsEnvironment, new ParquetReaderConfig(), new DeltaLakeConfig());
    File tmpDir = Files.createTempDir();
    File metastoreDir = new File(tmpDir, "metastore");
    hiveMetastore = new FileHiveMetastore(new NodeVersion("test_version"), hdfsEnvironment, new MetastoreConfig(), new FileHiveMetastoreConfig().setCatalogDirectory(metastoreDir.toURI().toString()).setMetastoreUser("test"));
    hiveMetastore.createDatabase(new Database("db_name", Optional.empty(), Optional.of("test"), Optional.of(PrincipalType.USER), Optional.empty(), ImmutableMap.of()));
    CachingDeltaLakeStatisticsAccess statistics = new CachingDeltaLakeStatisticsAccess(new MetaDirStatisticsAccess(hdfsEnvironment, new JsonCodecFactory().jsonCodec(DeltaLakeStatistics.class)));
    deltaLakeMetastore = new HiveMetastoreBackedDeltaLakeMetastore(hiveMetastore, transactionLogAccess, typeManager, statistics);
}
Also used : DeltaLakeConfig(io.trino.plugin.deltalake.DeltaLakeConfig) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) MetaDirStatisticsAccess(io.trino.plugin.deltalake.statistics.MetaDirStatisticsAccess) HdfsConfig(io.trino.plugin.hive.HdfsConfig) TransactionLogAccess(io.trino.plugin.deltalake.transactionlog.TransactionLogAccess) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) TestingConnectorContext(io.trino.testing.TestingConnectorContext) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) NodeVersion(io.trino.plugin.hive.NodeVersion) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) CheckpointSchemaManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager) Database(io.trino.plugin.hive.metastore.Database) TypeManager(io.trino.spi.type.TypeManager) CachingDeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.CachingDeltaLakeStatisticsAccess) File(java.io.File) JsonCodecFactory(io.airlift.json.JsonCodecFactory) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) BeforeClass(org.testng.annotations.BeforeClass)

Example 9 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class TestHivePlans method createLocalQueryRunner.

@Override
protected LocalQueryRunner createLocalQueryRunner() {
    baseDir = Files.createTempDir();
    HdfsConfig config = new HdfsConfig();
    HdfsConfiguration configuration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(config), ImmutableSet.of());
    HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
    HiveMetastore metastore = new FileHiveMetastore(new NodeVersion("test_version"), environment, new MetastoreConfig(), new FileHiveMetastoreConfig().setCatalogDirectory(baseDir.toURI().toString()).setMetastoreUser("test"));
    Database database = Database.builder().setDatabaseName(SCHEMA_NAME).setOwnerName(Optional.of("public")).setOwnerType(Optional.of(PrincipalType.ROLE)).build();
    metastore.createDatabase(database);
    return createQueryRunner(HIVE_SESSION, metastore);
}
Also used : NodeVersion(io.trino.plugin.hive.NodeVersion) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) HdfsConfig(io.trino.plugin.hive.HdfsConfig) Database(io.trino.plugin.hive.metastore.Database) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment)

Example 10 with HdfsEnvironment

use of io.trino.plugin.hive.HdfsEnvironment in project trino by trinodb.

the class TestHiveProjectionPushdownIntoTableScan method createLocalQueryRunner.

@Override
protected LocalQueryRunner createLocalQueryRunner() {
    baseDir = Files.createTempDir();
    HdfsConfig config = new HdfsConfig();
    HdfsConfiguration configuration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(config), ImmutableSet.of());
    HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
    HiveMetastore metastore = new FileHiveMetastore(new NodeVersion("test_version"), environment, new MetastoreConfig(), new FileHiveMetastoreConfig().setCatalogDirectory(baseDir.toURI().toString()).setMetastoreUser("test"));
    Database database = Database.builder().setDatabaseName(SCHEMA_NAME).setOwnerName(Optional.of("public")).setOwnerType(Optional.of(PrincipalType.ROLE)).build();
    metastore.createDatabase(database);
    LocalQueryRunner queryRunner = LocalQueryRunner.create(HIVE_SESSION);
    queryRunner.createCatalog(HIVE_CATALOG_NAME, new TestingHiveConnectorFactory(metastore), ImmutableMap.of());
    return queryRunner;
}
Also used : HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) HdfsConfig(io.trino.plugin.hive.HdfsConfig) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) LocalQueryRunner(io.trino.testing.LocalQueryRunner) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) NodeVersion(io.trino.plugin.hive.NodeVersion) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) TestingHiveConnectorFactory(io.trino.plugin.hive.TestingHiveConnectorFactory) Database(io.trino.plugin.hive.metastore.Database)

Aggregations

HdfsEnvironment (io.trino.plugin.hive.HdfsEnvironment)35 HdfsConfigurationInitializer (io.trino.plugin.hive.HdfsConfigurationInitializer)23 HiveHdfsConfiguration (io.trino.plugin.hive.HiveHdfsConfiguration)23 NoHdfsAuthentication (io.trino.plugin.hive.authentication.NoHdfsAuthentication)23 HdfsConfig (io.trino.plugin.hive.HdfsConfig)22 HdfsConfiguration (io.trino.plugin.hive.HdfsConfiguration)19 FileSystem (org.apache.hadoop.fs.FileSystem)14 Path (org.apache.hadoop.fs.Path)14 NodeVersion (io.trino.plugin.hive.NodeVersion)12 IOException (java.io.IOException)12 FileFormatDataSourceStats (io.trino.plugin.hive.FileFormatDataSourceStats)11 MetastoreConfig (io.trino.plugin.hive.metastore.MetastoreConfig)11 ImmutableList (com.google.common.collect.ImmutableList)10 ImmutableMap (com.google.common.collect.ImmutableMap)10 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)10 FileHiveMetastore (io.trino.plugin.hive.metastore.file.FileHiveMetastore)10 FileHiveMetastoreConfig (io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig)10 List (java.util.List)10 Optional (java.util.Optional)10 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)9