use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestCheckpointEntryIterator method testReadAllEntries.
@Test
public void testReadAllEntries() throws Exception {
URI checkpointUri = getResource(TEST_CHECKPOINT).toURI();
MetadataEntry metadataEntry = readMetadataEntry(checkpointUri);
CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA, PROTOCOL, TRANSACTION, ADD, REMOVE, COMMIT), Optional.of(readMetadataEntry(checkpointUri)));
List<DeltaLakeTransactionLogEntry> entries = ImmutableList.copyOf(checkpointEntryIterator);
assertThat(entries).hasSize(17);
// MetadataEntry
assertThat(entries).element(12).extracting(DeltaLakeTransactionLogEntry::getMetaData).isEqualTo(metadataEntry);
// ProtocolEntry
assertThat(entries).element(11).extracting(DeltaLakeTransactionLogEntry::getProtocol).isEqualTo(new ProtocolEntry(1, 2));
// TransactionEntry
// not found in the checkpoint, TODO add a test
assertThat(entries).map(DeltaLakeTransactionLogEntry::getTxn).filteredOn(Objects::nonNull).isEmpty();
// AddFileEntry
assertThat(entries).element(8).extracting(DeltaLakeTransactionLogEntry::getAdd).isEqualTo(new AddFileEntry("age=42/part-00003-0f53cae3-3e34-4876-b651-e1db9584dbc3.c000.snappy.parquet", Map.of("age", "42"), 2634, 1579190165000L, false, Optional.of("{" + "\"numRecords\":1," + "\"minValues\":{\"name\":\"Alice\",\"address\":{\"street\":\"100 Main St\",\"city\":\"Anytown\",\"state\":\"NY\",\"zip\":\"12345\"},\"income\":111000.0}," + "\"maxValues\":{\"name\":\"Alice\",\"address\":{\"street\":\"100 Main St\",\"city\":\"Anytown\",\"state\":\"NY\",\"zip\":\"12345\"},\"income\":111000.0}," + "\"nullCount\":{\"name\":0,\"married\":0,\"phones\":0,\"address\":{\"street\":0,\"city\":0,\"state\":0,\"zip\":0},\"income\":0}" + "}"), Optional.empty(), null));
// RemoveFileEntry
assertThat(entries).element(3).extracting(DeltaLakeTransactionLogEntry::getRemove).isEqualTo(new RemoveFileEntry("age=42/part-00000-951068bd-bcf4-4094-bb94-536f3c41d31f.c000.snappy.parquet", 1579190155406L, false));
// CommitInfoEntry
// not found in the checkpoint, TODO add a test
assertThat(entries).map(DeltaLakeTransactionLogEntry::getCommitInfo).filteredOn(Objects::nonNull).isEmpty();
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestDeltaLakeFileStatistics method testParseParquetStatistics.
@Test
public void testParseParquetStatistics() throws Exception {
File statsFile = new File(getClass().getResource("/databricks/pruning/parquet_struct_statistics/_delta_log/00000000000000000010.checkpoint.parquet").getFile());
Path checkpointPath = new Path(statsFile.toURI());
TypeManager typeManager = TESTING_TYPE_MANAGER;
CheckpointSchemaManager checkpointSchemaManager = new CheckpointSchemaManager(typeManager);
HdfsConfig hdfsConfig = new HdfsConfig();
HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
FileSystem fs = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(SESSION), checkpointPath);
CheckpointEntryIterator metadataEntryIterator = new CheckpointEntryIterator(checkpointPath, SESSION, fs.getFileStatus(checkpointPath).getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(METADATA), Optional.empty(), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true);
MetadataEntry metadataEntry = getOnlyElement(metadataEntryIterator).getMetaData();
CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator(checkpointPath, SESSION, fs.getFileStatus(checkpointPath).getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(CheckpointEntryIterator.EntryType.ADD), Optional.of(metadataEntry), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true);
DeltaLakeTransactionLogEntry matchingAddFileEntry = null;
while (checkpointEntryIterator.hasNext()) {
DeltaLakeTransactionLogEntry entry = checkpointEntryIterator.next();
if (entry.getAdd() != null && entry.getAdd().getPath().contains("part-00000-17951bea-0d04-43c1-979c-ea1fac19b382-c000.snappy.parquet")) {
assertNull(matchingAddFileEntry);
matchingAddFileEntry = entry;
}
}
assertNotNull(matchingAddFileEntry);
assertThat(matchingAddFileEntry.getAdd().getStats()).isPresent();
testStatisticsValues(matchingAddFileEntry.getAdd().getStats().get());
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestCheckpointBuilder method testCheckpointBuilder.
@Test
public void testCheckpointBuilder() {
CheckpointBuilder builder = new CheckpointBuilder();
MetadataEntry metadata1 = new MetadataEntry("1", "", "", new MetadataEntry.Format("", Map.of()), "", List.of(), Map.of(), 1);
MetadataEntry metadata2 = new MetadataEntry("2", "", "", new MetadataEntry.Format("", Map.of()), "", List.of(), Map.of(), 1);
builder.addLogEntry(metadataEntry(metadata1));
builder.addLogEntry(metadataEntry(metadata2));
ProtocolEntry protocol1 = new ProtocolEntry(1, 2);
ProtocolEntry protocol2 = new ProtocolEntry(3, 4);
builder.addLogEntry(protocolEntry(protocol1));
builder.addLogEntry(protocolEntry(protocol2));
TransactionEntry app1TransactionV1 = new TransactionEntry("app1", 1, 1);
TransactionEntry app1TransactionV2 = new TransactionEntry("app1", 2, 2);
TransactionEntry app1TransactionV3 = new TransactionEntry("app1", 3, 3);
TransactionEntry app2TransactionV5 = new TransactionEntry("app2", 5, 5);
builder.addLogEntry(transactionEntry(app1TransactionV2));
builder.addLogEntry(transactionEntry(app1TransactionV3));
builder.addLogEntry(transactionEntry(app1TransactionV1));
builder.addLogEntry(transactionEntry(app2TransactionV5));
AddFileEntry addA1 = new AddFileEntry("a", Map.of(), 1, 1, true, Optional.empty(), Optional.empty(), Map.of());
RemoveFileEntry removeA1 = new RemoveFileEntry("a", 1, true);
AddFileEntry addA2 = new AddFileEntry("a", Map.of(), 2, 1, true, Optional.empty(), Optional.empty(), Map.of());
AddFileEntry addB = new AddFileEntry("b", Map.of(), 1, 1, true, Optional.empty(), Optional.empty(), Map.of());
RemoveFileEntry removeB = new RemoveFileEntry("b", 1, true);
RemoveFileEntry removeC = new RemoveFileEntry("c", 1, true);
builder.addLogEntry(addFileEntry(addA1));
builder.addLogEntry(removeFileEntry(removeA1));
builder.addLogEntry(addFileEntry(addA2));
builder.addLogEntry(addFileEntry(addB));
builder.addLogEntry(removeFileEntry(removeB));
builder.addLogEntry(removeFileEntry(removeC));
CheckpointEntries expectedCheckpoint = new CheckpointEntries(metadata2, protocol2, Set.of(app1TransactionV3, app2TransactionV5), Set.of(addA2), Set.of(removeB, removeC));
assertEquals(expectedCheckpoint, builder.build());
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestTransactionLogAccess method testGetMetadataEntryUppercase.
@Test
public void testGetMetadataEntryUppercase() throws Exception {
setupTransactionLogAccess("uppercase_columns");
MetadataEntry metadataEntry = transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION).get();
assertThat(metadataEntry.getOriginalPartitionColumns()).containsOnly("ALA");
assertThat(metadataEntry.getCanonicalPartitionColumns()).containsOnly("ala");
assertEquals(tableSnapshot.getCachedMetadata(), Optional.of(metadataEntry));
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestTransactionLogAccess method testAllGetMetadataEntry.
@Test(dataProvider = "tableNames")
public void testAllGetMetadataEntry(String tableName) throws Exception {
setupTransactionLogAccess(tableName);
transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION);
MetadataEntry metadataEntry = transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION).get();
assertThat(metadataEntry.getOriginalPartitionColumns()).containsOnly("age");
MetadataEntry.Format format = metadataEntry.getFormat();
assertEquals(format.getOptions().keySet().size(), 0);
assertEquals(format.getProvider(), "parquet");
}
Aggregations