use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class DeltaLakeMetadata method getTableHandle.
@Override
public DeltaLakeTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) {
requireNonNull(tableName, "tableName is null");
Optional<Table> table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName());
if (table.isEmpty()) {
return null;
}
TableSnapshot tableSnapshot = metastore.getSnapshot(tableName, session);
Optional<MetadataEntry> metadata = metastore.getMetadata(tableSnapshot, session);
return new DeltaLakeTableHandle(tableName.getSchemaName(), tableName.getTableName(), metastore.getTableLocation(tableName, session), metadata, TupleDomain.all(), TupleDomain.all(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), tableSnapshot.getVersion());
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestCheckpointEntryIterator method testReadMetadataEntry.
@Test
public void testReadMetadataEntry() throws Exception {
URI checkpointUri = getResource(TEST_CHECKPOINT).toURI();
assertThat(readMetadataEntry(checkpointUri)).isEqualTo(new MetadataEntry("b6aeffad-da73-4dde-b68e-937e468b1fde", "", "", new MetadataEntry.Format("parquet", Map.of()), "{\"type\":\"struct\",\"fields\":[" + "{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"married\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"phones\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[" + "{\"name\":\"number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"label\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}," + "\"containsNull\":true},\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"address\",\"type\":{\"type\":\"struct\",\"fields\":[" + "{\"name\":\"street\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"city\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"state\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"zip\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"income\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}", List.of("age"), Map.of(), 1579190100722L));
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestCheckpointWriter method testCheckpointWriteReadRoundtrip.
@Test
public void testCheckpointWriteReadRoundtrip() throws IOException {
MetadataEntry metadataEntry = new MetadataEntry("metadataId", "metadataName", "metadataDescription", new MetadataEntry.Format("metadataFormatProvider", ImmutableMap.of("formatOptionX", "blah", "fomatOptionY", "plah")), "{\"type\":\"struct\",\"fields\":" + "[{\"name\":\"ts\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"str\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"dec_short\",\"type\":\"decimal(5,1)\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"dec_long\",\"type\":\"decimal(25,3)\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"l\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"in\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"sh\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"byt\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"fl\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"dou\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"bool\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"bin\",\"type\":\"binary\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"dat\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"arr\",\"type\":{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"m\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"row\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"s1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"s2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}", ImmutableList.of("part_key"), ImmutableMap.of("configOption1", "blah", "configOption2", "plah"), 1000);
ProtocolEntry protocolEntry = new ProtocolEntry(10, 20);
TransactionEntry transactionEntry = new TransactionEntry("appId", 1, 1001);
AddFileEntry addFileEntryJsonStats = new AddFileEntry("addFilePathJson", ImmutableMap.of("part_key", "7.0"), 1000, 1001, true, Optional.of("{" + "\"numRecords\":20," + "\"minValues\":{" + "\"ts\":\"2960-10-31T01:00:00.000Z\"," + "\"str\":\"a\"," + "\"dec_short\":10.1," + "\"dec_long\":111111111111.123," + "\"l\":1000000000," + "\"in\":100000," + "\"sh\":100," + "\"byt\":10," + "\"fl\":0.100," + "\"dou\":0.101," + "\"dat\":\"2000-01-01\"," + "\"row\":{\"s1\":1,\"s2\":\"a\"}" + "}," + "\"maxValues\":{" + "\"ts\":\"2960-10-31T02:00:00.000Z\"," + "\"str\":\"z\"," + "\"dec_short\":20.1," + "\"dec_long\":222222222222.123," + "\"l\":2000000000," + "\"in\":200000," + "\"sh\":200," + "\"byt\":20," + "\"fl\":0.200," + "\"dou\":0.202," + "\"dat\":\"3000-01-01\"," + "\"row\":{\"s1\":1,\"s2\":\"a\"}" + "}," + "\"nullCount\":{" + "\"ts\":1," + "\"str\":2," + "\"dec_short\":3," + "\"dec_long\":4," + "\"l\":5," + "\"in\":6," + "\"sh\":7," + "\"byt\":8," + "\"fl\":9," + "\"dou\":10," + "\"bool\":11," + "\"bin\":12," + "\"dat\":13," + "\"arr\":0,\"m\":14," + "\"row\":{\"s1\":0,\"s2\":15}}}"), Optional.empty(), ImmutableMap.of("someTag", "someValue", "otherTag", "otherValue"));
Block[] minMaxRowFieldBlocks = new Block[] { nativeValueToBlock(IntegerType.INTEGER, 1L), nativeValueToBlock(createUnboundedVarcharType(), utf8Slice("a")) };
Block[] nullCountRowFieldBlocks = new Block[] { nativeValueToBlock(BigintType.BIGINT, 0L), nativeValueToBlock(BigintType.BIGINT, 15L) };
AddFileEntry addFileEntryParquetStats = new AddFileEntry("addFilePathParquet", ImmutableMap.of("part_key", "7.0"), 1000, 1001, true, Optional.empty(), Optional.of(new DeltaLakeParquetFileStatistics(Optional.of(5L), Optional.of(ImmutableMap.<String, Object>builder().put("ts", DateTimeUtils.convertToTimestampWithTimeZone(UTC_KEY, "2060-10-31 01:00:00")).put("str", utf8Slice("a")).put("dec_short", 101L).put("dec_long", Int128.valueOf(111111111111123L)).put("l", 1000000000L).put("in", 100000L).put("sh", 100L).put("byt", 10L).put("fl", (long) Float.floatToIntBits(0.100f)).put("dou", 0.101d).put("dat", (long) parseDate("2000-01-01")).put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), minMaxRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()), Optional.of(ImmutableMap.<String, Object>builder().put("ts", DateTimeUtils.convertToTimestampWithTimeZone(UTC_KEY, "2060-10-31 02:00:00")).put("str", utf8Slice("a")).put("dec_short", 201L).put("dec_long", Int128.valueOf(222222222222123L)).put("l", 2000000000L).put("in", 200000L).put("sh", 200L).put("byt", 20L).put("fl", (long) Float.floatToIntBits(0.200f)).put("dou", 0.202d).put("dat", (long) parseDate("3000-01-01")).put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), minMaxRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()), Optional.of(ImmutableMap.<String, Object>builder().put("ts", 1L).put("str", 2L).put("dec_short", 3L).put("dec_long", 4L).put("l", 5L).put("in", 6L).put("sh", 7L).put("byt", 8L).put("fl", 9L).put("dou", 10L).put("bool", 11L).put("bin", 12L).put("dat", 13L).put("arr", 14L).put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), nullCountRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()))), ImmutableMap.of("someTag", "someValue", "otherTag", "otherValue"));
RemoveFileEntry removeFileEntry = new RemoveFileEntry("removeFilePath", 1000, true);
CheckpointEntries entries = new CheckpointEntries(metadataEntry, protocolEntry, ImmutableSet.of(transactionEntry), ImmutableSet.of(addFileEntryJsonStats, addFileEntryParquetStats), ImmutableSet.of(removeFileEntry));
CheckpointWriter writer = new CheckpointWriter(typeManager, checkpointSchemaManager, hdfsEnvironment);
File targetFile = File.createTempFile("testCheckpointWriteReadRoundtrip-", ".checkpoint.parquet");
targetFile.deleteOnExit();
Path targetPath = new Path("file://" + targetFile.getAbsolutePath());
// file must not exist when writer is called
targetFile.delete();
writer.write(session, entries, targetPath);
CheckpointEntries readEntries = readCheckpoint(targetPath, metadataEntry, true);
assertEquals(readEntries.getTransactionEntries(), entries.getTransactionEntries());
assertEquals(readEntries.getRemoveFileEntries(), entries.getRemoveFileEntries());
assertEquals(readEntries.getMetadataEntry(), entries.getMetadataEntry());
assertEquals(readEntries.getProtocolEntry(), entries.getProtocolEntry());
assertEquals(readEntries.getAddFileEntries().stream().map(this::makeComparable).collect(toImmutableSet()), entries.getAddFileEntries().stream().map(this::makeComparable).collect(toImmutableSet()));
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestCheckpointWriter method testDisablingRowStatistics.
@Test
public void testDisablingRowStatistics() throws IOException {
MetadataEntry metadataEntry = new MetadataEntry("metadataId", "metadataName", "metadataDescription", new MetadataEntry.Format("metadataFormatProvider", ImmutableMap.of("formatOptionX", "blah", "fomatOptionY", "plah")), "{\"type\":\"struct\",\"fields\":" + "[{\"name\":\"row\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"s1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"s2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}", ImmutableList.of(), ImmutableMap.of(), 1000);
ProtocolEntry protocolEntry = new ProtocolEntry(10, 20);
Block[] minMaxRowFieldBlocks = new Block[] { nativeValueToBlock(IntegerType.INTEGER, 1L), nativeValueToBlock(createUnboundedVarcharType(), utf8Slice("a")) };
Block[] nullCountRowFieldBlocks = new Block[] { nativeValueToBlock(BigintType.BIGINT, 0L), nativeValueToBlock(BigintType.BIGINT, 15L) };
AddFileEntry addFileEntryParquetStats = new AddFileEntry("addFilePathParquet", ImmutableMap.of("part_key", "7.0"), 1000, 1001, true, Optional.empty(), Optional.of(new DeltaLakeParquetFileStatistics(Optional.of(5L), Optional.of(ImmutableMap.<String, Object>builder().put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), minMaxRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()), Optional.of(ImmutableMap.<String, Object>builder().put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), minMaxRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()), Optional.of(ImmutableMap.<String, Object>builder().put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), nullCountRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()))), ImmutableMap.of());
CheckpointEntries entries = new CheckpointEntries(metadataEntry, protocolEntry, ImmutableSet.of(), ImmutableSet.of(addFileEntryParquetStats), ImmutableSet.of());
CheckpointWriter writer = new CheckpointWriter(typeManager, checkpointSchemaManager, hdfsEnvironment);
File targetFile = File.createTempFile("testCheckpointWriteReadRoundtrip-", ".checkpoint.parquet");
targetFile.deleteOnExit();
Path targetPath = new Path("file://" + targetFile.getAbsolutePath());
// file must not exist when writer is called
targetFile.delete();
writer.write(session, entries, targetPath);
CheckpointEntries readEntries = readCheckpoint(targetPath, metadataEntry, false);
AddFileEntry addFileEntry = getOnlyElement(readEntries.getAddFileEntries());
assertThat(addFileEntry.getStats()).isPresent();
DeltaLakeParquetFileStatistics fileStatistics = (DeltaLakeParquetFileStatistics) addFileEntry.getStats().get();
assertThat(fileStatistics.getMinValues().get()).isEmpty();
assertThat(fileStatistics.getMaxValues().get()).isEmpty();
assertThat(fileStatistics.getNullCount().get()).isEmpty();
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry in project trino by trinodb.
the class TestDeltaLakeMetastoreStatistics method registerTable.
private DeltaLakeTableHandle registerTable(String tableName, String directoryName) {
String tableLocation = Resources.getResource("statistics/" + directoryName).toExternalForm();
Storage tableStorage = new Storage(StorageFormat.create("serde", "input", "output"), Optional.of(tableLocation), Optional.empty(), true, ImmutableMap.of(PATH_PROPERTY, tableLocation));
hiveMetastore.createTable(new Table("db_name", tableName, Optional.of("test"), "EXTERNAL_TABLE", tableStorage, ImmutableList.of(new Column("val", HiveType.HIVE_DOUBLE, Optional.empty())), ImmutableList.of(), ImmutableMap.of(TABLE_PROVIDER_PROPERTY, TABLE_PROVIDER_VALUE), Optional.empty(), Optional.empty(), OptionalLong.empty()), PrincipalPrivileges.fromHivePrivilegeInfos(ImmutableSet.of()));
return new DeltaLakeTableHandle("db_name", tableName, "location", Optional.of(new MetadataEntry("id", "test", "description", null, "", ImmutableList.of(), ImmutableMap.of(), 0)), TupleDomain.all(), TupleDomain.all(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), 0);
}
Aggregations