Search in sources :

Example 1 with MetadataEntry

use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.

the class TestCheckpointEntryIterator method testReadAllEntries.

@Test
public void testReadAllEntries() throws Exception {
    URI checkpointUri = getResource(TEST_CHECKPOINT).toURI();
    MetadataEntry metadataEntry = readMetadataEntry(checkpointUri);
    CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA, PROTOCOL, TRANSACTION, ADD, REMOVE, COMMIT), Optional.of(readMetadataEntry(checkpointUri)));
    List<DeltaLakeTransactionLogEntry> entries = ImmutableList.copyOf(checkpointEntryIterator);
    assertThat(entries).hasSize(17);
    // MetadataEntry
    assertThat(entries).element(12).extracting(DeltaLakeTransactionLogEntry::getMetaData).isEqualTo(metadataEntry);
    // ProtocolEntry
    assertThat(entries).element(11).extracting(DeltaLakeTransactionLogEntry::getProtocol).isEqualTo(new ProtocolEntry(1, 2));
    // TransactionEntry
    // not found in the checkpoint, TODO add a test
    assertThat(entries).map(DeltaLakeTransactionLogEntry::getTxn).filteredOn(Objects::nonNull).isEmpty();
    // AddFileEntry
    assertThat(entries).element(8).extracting(DeltaLakeTransactionLogEntry::getAdd).isEqualTo(new AddFileEntry("age=42/part-00003-0f53cae3-3e34-4876-b651-e1db9584dbc3.c000.snappy.parquet", Map.of("age", "42"), 2634, 1579190165000L, false, Optional.of("{" + "\"numRecords\":1," + "\"minValues\":{\"name\":\"Alice\",\"address\":{\"street\":\"100 Main St\",\"city\":\"Anytown\",\"state\":\"NY\",\"zip\":\"12345\"},\"income\":111000.0}," + "\"maxValues\":{\"name\":\"Alice\",\"address\":{\"street\":\"100 Main St\",\"city\":\"Anytown\",\"state\":\"NY\",\"zip\":\"12345\"},\"income\":111000.0}," + "\"nullCount\":{\"name\":0,\"married\":0,\"phones\":0,\"address\":{\"street\":0,\"city\":0,\"state\":0,\"zip\":0},\"income\":0}" + "}"), Optional.empty(), null));
    // RemoveFileEntry
    assertThat(entries).element(3).extracting(DeltaLakeTransactionLogEntry::getRemove).isEqualTo(new RemoveFileEntry("age=42/part-00000-951068bd-bcf4-4094-bb94-536f3c41d31f.c000.snappy.parquet", 1579190155406L, false));
    // CommitInfoEntry
    // not found in the checkpoint, TODO add a test
    assertThat(entries).map(DeltaLakeTransactionLogEntry::getCommitInfo).filteredOn(Objects::nonNull).isEmpty();
}
Also used : ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaLakeTransactionLogEntry(io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) URI(java.net.URI) Test(org.testng.annotations.Test)

Example 2 with MetadataEntry

use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.

the class TestDeltaLakeFileStatistics method testParseParquetStatistics.

@Test
public void testParseParquetStatistics() throws Exception {
    File statsFile = new File(getClass().getResource("/databricks/pruning/parquet_struct_statistics/_delta_log/00000000000000000010.checkpoint.parquet").getFile());
    Path checkpointPath = new Path(statsFile.toURI());
    TypeManager typeManager = TESTING_TYPE_MANAGER;
    CheckpointSchemaManager checkpointSchemaManager = new CheckpointSchemaManager(typeManager);
    HdfsConfig hdfsConfig = new HdfsConfig();
    HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
    FileSystem fs = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(SESSION), checkpointPath);
    CheckpointEntryIterator metadataEntryIterator = new CheckpointEntryIterator(checkpointPath, SESSION, fs.getFileStatus(checkpointPath).getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(METADATA), Optional.empty(), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true);
    MetadataEntry metadataEntry = getOnlyElement(metadataEntryIterator).getMetaData();
    CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator(checkpointPath, SESSION, fs.getFileStatus(checkpointPath).getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(CheckpointEntryIterator.EntryType.ADD), Optional.of(metadataEntry), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true);
    DeltaLakeTransactionLogEntry matchingAddFileEntry = null;
    while (checkpointEntryIterator.hasNext()) {
        DeltaLakeTransactionLogEntry entry = checkpointEntryIterator.next();
        if (entry.getAdd() != null && entry.getAdd().getPath().contains("part-00000-17951bea-0d04-43c1-979c-ea1fac19b382-c000.snappy.parquet")) {
            assertNull(matchingAddFileEntry);
            matchingAddFileEntry = entry;
        }
    }
    assertNotNull(matchingAddFileEntry);
    assertThat(matchingAddFileEntry.getAdd().getStats()).isPresent();
    testStatisticsValues(matchingAddFileEntry.getAdd().getStats().get());
}
Also used : Path(org.apache.hadoop.fs.Path) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) DeltaLakeTransactionLogEntry(io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry) HdfsConfig(io.trino.plugin.hive.HdfsConfig) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) CheckpointEntryIterator(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) CheckpointSchemaManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager) FileSystem(org.apache.hadoop.fs.FileSystem) TypeManager(io.trino.spi.type.TypeManager) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) File(java.io.File) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) Test(org.testng.annotations.Test)

Example 3 with MetadataEntry

use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.

the class TestCheckpointBuilder method testCheckpointBuilder.

@Test
public void testCheckpointBuilder() {
    CheckpointBuilder builder = new CheckpointBuilder();
    MetadataEntry metadata1 = new MetadataEntry("1", "", "", new MetadataEntry.Format("", Map.of()), "", List.of(), Map.of(), 1);
    MetadataEntry metadata2 = new MetadataEntry("2", "", "", new MetadataEntry.Format("", Map.of()), "", List.of(), Map.of(), 1);
    builder.addLogEntry(metadataEntry(metadata1));
    builder.addLogEntry(metadataEntry(metadata2));
    ProtocolEntry protocol1 = new ProtocolEntry(1, 2);
    ProtocolEntry protocol2 = new ProtocolEntry(3, 4);
    builder.addLogEntry(protocolEntry(protocol1));
    builder.addLogEntry(protocolEntry(protocol2));
    TransactionEntry app1TransactionV1 = new TransactionEntry("app1", 1, 1);
    TransactionEntry app1TransactionV2 = new TransactionEntry("app1", 2, 2);
    TransactionEntry app1TransactionV3 = new TransactionEntry("app1", 3, 3);
    TransactionEntry app2TransactionV5 = new TransactionEntry("app2", 5, 5);
    builder.addLogEntry(transactionEntry(app1TransactionV2));
    builder.addLogEntry(transactionEntry(app1TransactionV3));
    builder.addLogEntry(transactionEntry(app1TransactionV1));
    builder.addLogEntry(transactionEntry(app2TransactionV5));
    AddFileEntry addA1 = new AddFileEntry("a", Map.of(), 1, 1, true, Optional.empty(), Optional.empty(), Map.of());
    RemoveFileEntry removeA1 = new RemoveFileEntry("a", 1, true);
    AddFileEntry addA2 = new AddFileEntry("a", Map.of(), 2, 1, true, Optional.empty(), Optional.empty(), Map.of());
    AddFileEntry addB = new AddFileEntry("b", Map.of(), 1, 1, true, Optional.empty(), Optional.empty(), Map.of());
    RemoveFileEntry removeB = new RemoveFileEntry("b", 1, true);
    RemoveFileEntry removeC = new RemoveFileEntry("c", 1, true);
    builder.addLogEntry(addFileEntry(addA1));
    builder.addLogEntry(removeFileEntry(removeA1));
    builder.addLogEntry(addFileEntry(addA2));
    builder.addLogEntry(addFileEntry(addB));
    builder.addLogEntry(removeFileEntry(removeB));
    builder.addLogEntry(removeFileEntry(removeC));
    CheckpointEntries expectedCheckpoint = new CheckpointEntries(metadata2, protocol2, Set.of(app1TransactionV3, app2TransactionV5), Set.of(addA2), Set.of(removeB, removeC));
    assertEquals(expectedCheckpoint, builder.build());
}
Also used : ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) TransactionEntry(io.trino.plugin.deltalake.transactionlog.TransactionEntry) Test(org.testng.annotations.Test)

Example 4 with MetadataEntry

use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.

the class TestTransactionLogAccess method testGetMetadataEntryUppercase.

@Test
public void testGetMetadataEntryUppercase() throws Exception {
    setupTransactionLogAccess("uppercase_columns");
    MetadataEntry metadataEntry = transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION).get();
    assertThat(metadataEntry.getOriginalPartitionColumns()).containsOnly("ALA");
    assertThat(metadataEntry.getCanonicalPartitionColumns()).containsOnly("ala");
    assertEquals(tableSnapshot.getCachedMetadata(), Optional.of(metadataEntry));
}
Also used : MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) Test(org.testng.annotations.Test)

Example 5 with MetadataEntry

use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.

the class TestTransactionLogAccess method testAllGetMetadataEntry.

@Test(dataProvider = "tableNames")
public void testAllGetMetadataEntry(String tableName) throws Exception {
    setupTransactionLogAccess(tableName);
    transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION);
    MetadataEntry metadataEntry = transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION).get();
    assertThat(metadataEntry.getOriginalPartitionColumns()).containsOnly("age");
    MetadataEntry.Format format = metadataEntry.getFormat();
    assertEquals(format.getOptions().keySet().size(), 0);
    assertEquals(format.getProvider(), "parquet");
}
Also used : MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) Test(org.testng.annotations.Test)

Aggregations

MetadataEntry (io.trino.plugin.deltalake.transactionlog.MetadataEntry)16 Test (org.testng.annotations.Test)9 ProtocolEntry (io.trino.plugin.deltalake.transactionlog.ProtocolEntry)6 AddFileEntry (io.trino.plugin.deltalake.transactionlog.AddFileEntry)5 Path (org.apache.hadoop.fs.Path)5 TableSnapshot (io.trino.plugin.deltalake.transactionlog.TableSnapshot)4 Table (io.trino.plugin.hive.metastore.Table)4 TrinoException (io.trino.spi.TrinoException)4 RemoveFileEntry (io.trino.plugin.deltalake.transactionlog.RemoveFileEntry)3 Block (io.trino.spi.block.Block)3 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)2 DeltaLakeTableHandle (io.trino.plugin.deltalake.DeltaLakeTableHandle)2 DeltaLakeStatistics (io.trino.plugin.deltalake.statistics.DeltaLakeStatistics)2 DeltaLakeTransactionLogEntry (io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry)2 TransactionEntry (io.trino.plugin.deltalake.transactionlog.TransactionEntry)2 DeltaLakeParquetFileStatistics (io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeParquetFileStatistics)2 RowBlock (io.trino.spi.block.RowBlock)2 Utils.nativeValueToBlock (io.trino.spi.predicate.Utils.nativeValueToBlock)2 TypeManager (io.trino.spi.type.TypeManager)2 File (java.io.File)2