Search in sources :

Example 1 with ProtocolEntry

use of io.trino.plugin.deltalake.transactionlog.ProtocolEntry in project trino by trinodb.

the class TestCheckpointEntryIterator method testReadAllEntries.

@Test
public void testReadAllEntries() throws Exception {
    URI checkpointUri = getResource(TEST_CHECKPOINT).toURI();
    MetadataEntry metadataEntry = readMetadataEntry(checkpointUri);
    CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA, PROTOCOL, TRANSACTION, ADD, REMOVE, COMMIT), Optional.of(readMetadataEntry(checkpointUri)));
    List<DeltaLakeTransactionLogEntry> entries = ImmutableList.copyOf(checkpointEntryIterator);
    assertThat(entries).hasSize(17);
    // MetadataEntry
    assertThat(entries).element(12).extracting(DeltaLakeTransactionLogEntry::getMetaData).isEqualTo(metadataEntry);
    // ProtocolEntry
    assertThat(entries).element(11).extracting(DeltaLakeTransactionLogEntry::getProtocol).isEqualTo(new ProtocolEntry(1, 2));
    // TransactionEntry
    // not found in the checkpoint, TODO add a test
    assertThat(entries).map(DeltaLakeTransactionLogEntry::getTxn).filteredOn(Objects::nonNull).isEmpty();
    // AddFileEntry
    assertThat(entries).element(8).extracting(DeltaLakeTransactionLogEntry::getAdd).isEqualTo(new AddFileEntry("age=42/part-00003-0f53cae3-3e34-4876-b651-e1db9584dbc3.c000.snappy.parquet", Map.of("age", "42"), 2634, 1579190165000L, false, Optional.of("{" + "\"numRecords\":1," + "\"minValues\":{\"name\":\"Alice\",\"address\":{\"street\":\"100 Main St\",\"city\":\"Anytown\",\"state\":\"NY\",\"zip\":\"12345\"},\"income\":111000.0}," + "\"maxValues\":{\"name\":\"Alice\",\"address\":{\"street\":\"100 Main St\",\"city\":\"Anytown\",\"state\":\"NY\",\"zip\":\"12345\"},\"income\":111000.0}," + "\"nullCount\":{\"name\":0,\"married\":0,\"phones\":0,\"address\":{\"street\":0,\"city\":0,\"state\":0,\"zip\":0},\"income\":0}" + "}"), Optional.empty(), null));
    // RemoveFileEntry
    assertThat(entries).element(3).extracting(DeltaLakeTransactionLogEntry::getRemove).isEqualTo(new RemoveFileEntry("age=42/part-00000-951068bd-bcf4-4094-bb94-536f3c41d31f.c000.snappy.parquet", 1579190155406L, false));
    // CommitInfoEntry
    // not found in the checkpoint, TODO add a test
    assertThat(entries).map(DeltaLakeTransactionLogEntry::getCommitInfo).filteredOn(Objects::nonNull).isEmpty();
}
Also used : ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaLakeTransactionLogEntry(io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) URI(java.net.URI) Test(org.testng.annotations.Test)

Example 2 with ProtocolEntry

use of io.trino.plugin.deltalake.transactionlog.ProtocolEntry in project trino by trinodb.

the class TestCheckpointBuilder method testCheckpointBuilder.

@Test
public void testCheckpointBuilder() {
    CheckpointBuilder builder = new CheckpointBuilder();
    MetadataEntry metadata1 = new MetadataEntry("1", "", "", new MetadataEntry.Format("", Map.of()), "", List.of(), Map.of(), 1);
    MetadataEntry metadata2 = new MetadataEntry("2", "", "", new MetadataEntry.Format("", Map.of()), "", List.of(), Map.of(), 1);
    builder.addLogEntry(metadataEntry(metadata1));
    builder.addLogEntry(metadataEntry(metadata2));
    ProtocolEntry protocol1 = new ProtocolEntry(1, 2);
    ProtocolEntry protocol2 = new ProtocolEntry(3, 4);
    builder.addLogEntry(protocolEntry(protocol1));
    builder.addLogEntry(protocolEntry(protocol2));
    TransactionEntry app1TransactionV1 = new TransactionEntry("app1", 1, 1);
    TransactionEntry app1TransactionV2 = new TransactionEntry("app1", 2, 2);
    TransactionEntry app1TransactionV3 = new TransactionEntry("app1", 3, 3);
    TransactionEntry app2TransactionV5 = new TransactionEntry("app2", 5, 5);
    builder.addLogEntry(transactionEntry(app1TransactionV2));
    builder.addLogEntry(transactionEntry(app1TransactionV3));
    builder.addLogEntry(transactionEntry(app1TransactionV1));
    builder.addLogEntry(transactionEntry(app2TransactionV5));
    AddFileEntry addA1 = new AddFileEntry("a", Map.of(), 1, 1, true, Optional.empty(), Optional.empty(), Map.of());
    RemoveFileEntry removeA1 = new RemoveFileEntry("a", 1, true);
    AddFileEntry addA2 = new AddFileEntry("a", Map.of(), 2, 1, true, Optional.empty(), Optional.empty(), Map.of());
    AddFileEntry addB = new AddFileEntry("b", Map.of(), 1, 1, true, Optional.empty(), Optional.empty(), Map.of());
    RemoveFileEntry removeB = new RemoveFileEntry("b", 1, true);
    RemoveFileEntry removeC = new RemoveFileEntry("c", 1, true);
    builder.addLogEntry(addFileEntry(addA1));
    builder.addLogEntry(removeFileEntry(removeA1));
    builder.addLogEntry(addFileEntry(addA2));
    builder.addLogEntry(addFileEntry(addB));
    builder.addLogEntry(removeFileEntry(removeB));
    builder.addLogEntry(removeFileEntry(removeC));
    CheckpointEntries expectedCheckpoint = new CheckpointEntries(metadata2, protocol2, Set.of(app1TransactionV3, app2TransactionV5), Set.of(addA2), Set.of(removeB, removeC));
    assertEquals(expectedCheckpoint, builder.build());
}
Also used : ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) TransactionEntry(io.trino.plugin.deltalake.transactionlog.TransactionEntry) Test(org.testng.annotations.Test)

Example 3 with ProtocolEntry

use of io.trino.plugin.deltalake.transactionlog.ProtocolEntry in project trino by trinodb.

the class CheckpointEntryIterator method buildProtocolEntry.

private DeltaLakeTransactionLogEntry buildProtocolEntry(ConnectorSession session, Block block, int pagePosition) {
    log.debug("Building protocol entry from %s pagePosition %d", block, pagePosition);
    if (block.isNull(pagePosition)) {
        return null;
    }
    int protocolFields = 2;
    Block protocolEntryBlock = block.getObject(pagePosition, Block.class);
    log.debug("Block %s has %s fields", block, protocolEntryBlock.getPositionCount());
    if (protocolEntryBlock.getPositionCount() != protocolFields) {
        throw new TrinoException(DELTA_LAKE_INVALID_SCHEMA, format("Expected block %s to have %d children, but found %s", block, protocolFields, protocolEntryBlock.getPositionCount()));
    }
    ProtocolEntry result = new ProtocolEntry(getInt(protocolEntryBlock, 0), getInt(protocolEntryBlock, 1));
    log.debug("Result: %s", result);
    return DeltaLakeTransactionLogEntry.protocolEntry(result);
}
Also used : ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) Block(io.trino.spi.block.Block) TrinoException(io.trino.spi.TrinoException)

Example 4 with ProtocolEntry

use of io.trino.plugin.deltalake.transactionlog.ProtocolEntry in project trino by trinodb.

the class DeltaLakeMetadata method appendInitialTableEntries.

private static void appendInitialTableEntries(TransactionLogWriter transactionLogWriter, List<DeltaLakeColumnHandle> columns, List<String> partitionColumnNames, Map<String, String> configuration, String operation, ConnectorSession session, String nodeVersion, String nodeId) {
    long createdTime = System.currentTimeMillis();
    transactionLogWriter.appendCommitInfoEntry(new CommitInfoEntry(0, createdTime, session.getUser(), session.getUser(), operation, ImmutableMap.of("queryId", session.getQueryId()), null, null, "trino-" + nodeVersion + "-" + nodeId, 0, ISOLATION_LEVEL, true));
    transactionLogWriter.appendProtocolEntry(new ProtocolEntry(READER_VERSION, WRITER_VERSION));
    transactionLogWriter.appendMetadataEntry(new MetadataEntry(randomUUID().toString(), null, null, new Format("parquet", ImmutableMap.of()), serializeSchemaAsJson(columns), partitionColumnNames, ImmutableMap.copyOf(configuration), createdTime));
}
Also used : Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry)

Example 5 with ProtocolEntry

use of io.trino.plugin.deltalake.transactionlog.ProtocolEntry in project trino by trinodb.

the class TestCheckpointWriter method testCheckpointWriteReadRoundtrip.

@Test
public void testCheckpointWriteReadRoundtrip() throws IOException {
    MetadataEntry metadataEntry = new MetadataEntry("metadataId", "metadataName", "metadataDescription", new MetadataEntry.Format("metadataFormatProvider", ImmutableMap.of("formatOptionX", "blah", "fomatOptionY", "plah")), "{\"type\":\"struct\",\"fields\":" + "[{\"name\":\"ts\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"str\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"dec_short\",\"type\":\"decimal(5,1)\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"dec_long\",\"type\":\"decimal(25,3)\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"l\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"in\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"sh\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"byt\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"fl\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"dou\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"bool\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"bin\",\"type\":\"binary\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"dat\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"arr\",\"type\":{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"m\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"row\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"s1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}," + "{\"name\":\"s2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}", ImmutableList.of("part_key"), ImmutableMap.of("configOption1", "blah", "configOption2", "plah"), 1000);
    ProtocolEntry protocolEntry = new ProtocolEntry(10, 20);
    TransactionEntry transactionEntry = new TransactionEntry("appId", 1, 1001);
    AddFileEntry addFileEntryJsonStats = new AddFileEntry("addFilePathJson", ImmutableMap.of("part_key", "7.0"), 1000, 1001, true, Optional.of("{" + "\"numRecords\":20," + "\"minValues\":{" + "\"ts\":\"2960-10-31T01:00:00.000Z\"," + "\"str\":\"a\"," + "\"dec_short\":10.1," + "\"dec_long\":111111111111.123," + "\"l\":1000000000," + "\"in\":100000," + "\"sh\":100," + "\"byt\":10," + "\"fl\":0.100," + "\"dou\":0.101," + "\"dat\":\"2000-01-01\"," + "\"row\":{\"s1\":1,\"s2\":\"a\"}" + "}," + "\"maxValues\":{" + "\"ts\":\"2960-10-31T02:00:00.000Z\"," + "\"str\":\"z\"," + "\"dec_short\":20.1," + "\"dec_long\":222222222222.123," + "\"l\":2000000000," + "\"in\":200000," + "\"sh\":200," + "\"byt\":20," + "\"fl\":0.200," + "\"dou\":0.202," + "\"dat\":\"3000-01-01\"," + "\"row\":{\"s1\":1,\"s2\":\"a\"}" + "}," + "\"nullCount\":{" + "\"ts\":1," + "\"str\":2," + "\"dec_short\":3," + "\"dec_long\":4," + "\"l\":5," + "\"in\":6," + "\"sh\":7," + "\"byt\":8," + "\"fl\":9," + "\"dou\":10," + "\"bool\":11," + "\"bin\":12," + "\"dat\":13," + "\"arr\":0,\"m\":14," + "\"row\":{\"s1\":0,\"s2\":15}}}"), Optional.empty(), ImmutableMap.of("someTag", "someValue", "otherTag", "otherValue"));
    Block[] minMaxRowFieldBlocks = new Block[] { nativeValueToBlock(IntegerType.INTEGER, 1L), nativeValueToBlock(createUnboundedVarcharType(), utf8Slice("a")) };
    Block[] nullCountRowFieldBlocks = new Block[] { nativeValueToBlock(BigintType.BIGINT, 0L), nativeValueToBlock(BigintType.BIGINT, 15L) };
    AddFileEntry addFileEntryParquetStats = new AddFileEntry("addFilePathParquet", ImmutableMap.of("part_key", "7.0"), 1000, 1001, true, Optional.empty(), Optional.of(new DeltaLakeParquetFileStatistics(Optional.of(5L), Optional.of(ImmutableMap.<String, Object>builder().put("ts", DateTimeUtils.convertToTimestampWithTimeZone(UTC_KEY, "2060-10-31 01:00:00")).put("str", utf8Slice("a")).put("dec_short", 101L).put("dec_long", Int128.valueOf(111111111111123L)).put("l", 1000000000L).put("in", 100000L).put("sh", 100L).put("byt", 10L).put("fl", (long) Float.floatToIntBits(0.100f)).put("dou", 0.101d).put("dat", (long) parseDate("2000-01-01")).put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), minMaxRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()), Optional.of(ImmutableMap.<String, Object>builder().put("ts", DateTimeUtils.convertToTimestampWithTimeZone(UTC_KEY, "2060-10-31 02:00:00")).put("str", utf8Slice("a")).put("dec_short", 201L).put("dec_long", Int128.valueOf(222222222222123L)).put("l", 2000000000L).put("in", 200000L).put("sh", 200L).put("byt", 20L).put("fl", (long) Float.floatToIntBits(0.200f)).put("dou", 0.202d).put("dat", (long) parseDate("3000-01-01")).put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), minMaxRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()), Optional.of(ImmutableMap.<String, Object>builder().put("ts", 1L).put("str", 2L).put("dec_short", 3L).put("dec_long", 4L).put("l", 5L).put("in", 6L).put("sh", 7L).put("byt", 8L).put("fl", 9L).put("dou", 10L).put("bool", 11L).put("bin", 12L).put("dat", 13L).put("arr", 14L).put("row", RowBlock.fromFieldBlocks(1, Optional.empty(), nullCountRowFieldBlocks).getSingleValueBlock(0)).buildOrThrow()))), ImmutableMap.of("someTag", "someValue", "otherTag", "otherValue"));
    RemoveFileEntry removeFileEntry = new RemoveFileEntry("removeFilePath", 1000, true);
    CheckpointEntries entries = new CheckpointEntries(metadataEntry, protocolEntry, ImmutableSet.of(transactionEntry), ImmutableSet.of(addFileEntryJsonStats, addFileEntryParquetStats), ImmutableSet.of(removeFileEntry));
    CheckpointWriter writer = new CheckpointWriter(typeManager, checkpointSchemaManager, hdfsEnvironment);
    File targetFile = File.createTempFile("testCheckpointWriteReadRoundtrip-", ".checkpoint.parquet");
    targetFile.deleteOnExit();
    Path targetPath = new Path("file://" + targetFile.getAbsolutePath());
    // file must not exist when writer is called
    targetFile.delete();
    writer.write(session, entries, targetPath);
    CheckpointEntries readEntries = readCheckpoint(targetPath, metadataEntry, true);
    assertEquals(readEntries.getTransactionEntries(), entries.getTransactionEntries());
    assertEquals(readEntries.getRemoveFileEntries(), entries.getRemoveFileEntries());
    assertEquals(readEntries.getMetadataEntry(), entries.getMetadataEntry());
    assertEquals(readEntries.getProtocolEntry(), entries.getProtocolEntry());
    assertEquals(readEntries.getAddFileEntries().stream().map(this::makeComparable).collect(toImmutableSet()), entries.getAddFileEntries().stream().map(this::makeComparable).collect(toImmutableSet()));
}
Also used : Path(org.apache.hadoop.fs.Path) DeltaLakeParquetFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeParquetFileStatistics) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) Utils.nativeValueToBlock(io.trino.spi.predicate.Utils.nativeValueToBlock) Block(io.trino.spi.block.Block) RowBlock(io.trino.spi.block.RowBlock) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) File(java.io.File) TransactionEntry(io.trino.plugin.deltalake.transactionlog.TransactionEntry) Test(org.testng.annotations.Test)

Aggregations

ProtocolEntry (io.trino.plugin.deltalake.transactionlog.ProtocolEntry)6 MetadataEntry (io.trino.plugin.deltalake.transactionlog.MetadataEntry)5 AddFileEntry (io.trino.plugin.deltalake.transactionlog.AddFileEntry)4 Test (org.testng.annotations.Test)4 RemoveFileEntry (io.trino.plugin.deltalake.transactionlog.RemoveFileEntry)3 Block (io.trino.spi.block.Block)3 TransactionEntry (io.trino.plugin.deltalake.transactionlog.TransactionEntry)2 DeltaLakeParquetFileStatistics (io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeParquetFileStatistics)2 RowBlock (io.trino.spi.block.RowBlock)2 Utils.nativeValueToBlock (io.trino.spi.predicate.Utils.nativeValueToBlock)2 File (java.io.File)2 Path (org.apache.hadoop.fs.Path)2 CommitInfoEntry (io.trino.plugin.deltalake.transactionlog.CommitInfoEntry)1 DeltaLakeTransactionLogEntry (io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry)1 Format (io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format)1 StorageFormat (io.trino.plugin.hive.metastore.StorageFormat)1 TrinoException (io.trino.spi.TrinoException)1 URI (java.net.URI)1