use of io.trino.plugin.deltalake.transactionlog.TableSnapshot in project trino by trinodb.
the class TestTransactionLogAccess method testUpdatingTailEntriesNoCheckpoint.
@Test
public void testUpdatingTailEntriesNoCheckpoint() throws Exception {
String tableName = "person";
File tempDir = Files.createTempDir();
File tableDir = new File(tempDir, tableName);
File transactionLogDir = new File(tableDir, TRANSACTION_LOG_DIRECTORY);
transactionLogDir.mkdirs();
File resourceDir = new File(getClass().getClassLoader().getResource("databricks/person/_delta_log").toURI());
copyTransactionLogEntry(0, 7, resourceDir, transactionLogDir);
setupTransactionLogAccess(tableName, new Path(tableDir.toURI()));
List<AddFileEntry> activeDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, SESSION);
Set<String> dataFiles = ImmutableSet.of("age=42/part-00000-b82d8859-84a0-4f05-872c-206b07dd54f0.c000.snappy.parquet", "age=30/part-00000-72a56c23-01ba-483a-9062-dd0accc86599.c000.snappy.parquet", "age=25/part-00000-609e34b1-5466-4dbc-a780-2708166e7adb.c000.snappy.parquet", "age=30/part-00000-7e43a3c3-ea26-4ae7-8eac-8f60cbb4df03.c000.snappy.parquet", "age=21/part-00000-3d546786-bedc-407f-b9f7-e97aa12cce0f.c000.snappy.parquet", "age=21/part-00001-290f0f26-19cf-4772-821e-36d55d9b7872.c000.snappy.parquet");
assertEqualsIgnoreOrder(activeDataFiles.stream().map(AddFileEntry::getPath).collect(Collectors.toSet()), dataFiles);
copyTransactionLogEntry(7, 9, resourceDir, transactionLogDir);
TableSnapshot updatedSnapshot = transactionLogAccess.loadSnapshot(new SchemaTableName("schema", tableName), new Path(tableDir.toURI()), SESSION);
activeDataFiles = transactionLogAccess.getActiveFiles(updatedSnapshot, SESSION);
dataFiles = ImmutableSet.of("age=21/part-00000-3d546786-bedc-407f-b9f7-e97aa12cce0f.c000.snappy.parquet", "age=21/part-00001-290f0f26-19cf-4772-821e-36d55d9b7872.c000.snappy.parquet", "age=30/part-00000-63c2205d-84a3-4a66-bd7c-f69f5af55bbc.c000.snappy.parquet", "age=25/part-00001-aceaf062-1cd1-45cb-8f83-277ffebe995c.c000.snappy.parquet", "age=30/part-00002-5800be2e-2373-47d8-8b86-776a8ea9d69f.c000.snappy.parquet", "age=42/part-00003-0f53cae3-3e34-4876-b651-e1db9584dbc3.c000.snappy.parquet", "age=25/part-00000-b7fbbe31-c7f9-44ed-8757-5c47d10c3e81.c000.snappy.parquet");
assertEqualsIgnoreOrder(activeDataFiles.stream().map(AddFileEntry::getPath).collect(Collectors.toSet()), dataFiles);
}
use of io.trino.plugin.deltalake.transactionlog.TableSnapshot in project trino by trinodb.
the class TestTransactionLogAccess method testSnapshotsAreConsistent.
@Test
public void testSnapshotsAreConsistent() throws Exception {
String tableName = "person";
File tempDir = Files.createTempDir();
File tableDir = new File(tempDir, tableName);
File transactionLogDir = new File(tableDir, TRANSACTION_LOG_DIRECTORY);
transactionLogDir.mkdirs();
File resourceDir = new File(getClass().getClassLoader().getResource("databricks/person/_delta_log").toURI());
copyTransactionLogEntry(0, 12, resourceDir, transactionLogDir);
Files.copy(new File(resourceDir, LAST_CHECKPOINT_FILENAME), new File(transactionLogDir, LAST_CHECKPOINT_FILENAME));
setupTransactionLogAccess(tableName, new Path(tableDir.toURI()));
List<AddFileEntry> expectedDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, SESSION);
copyTransactionLogEntry(12, 14, resourceDir, transactionLogDir);
Set<String> newDataFiles = ImmutableSet.of("age=28/part-00000-40dd1707-1d42-4328-a59a-21f5c945fe60.c000.snappy.parquet", "age=29/part-00000-3794c463-cb0c-4beb-8d07-7cc1e3b5920f.c000.snappy.parquet");
TableSnapshot updatedTableSnapshot = transactionLogAccess.loadSnapshot(new SchemaTableName("schema", tableName), new Path(tableDir.toURI()), SESSION);
List<AddFileEntry> allDataFiles = transactionLogAccess.getActiveFiles(updatedTableSnapshot, SESSION);
List<AddFileEntry> dataFilesWithFixedVersion = transactionLogAccess.getActiveFiles(tableSnapshot, SESSION);
for (String newFilePath : newDataFiles) {
assertTrue(allDataFiles.stream().anyMatch(entry -> entry.getPath().equals(newFilePath)));
assertTrue(dataFilesWithFixedVersion.stream().noneMatch(entry -> entry.getPath().equals(newFilePath)));
}
assertEquals(expectedDataFiles.size(), dataFilesWithFixedVersion.size());
List<ColumnMetadata> columns = extractSchema(transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION).get(), TESTING_TYPE_MANAGER);
for (int i = 0; i < expectedDataFiles.size(); i++) {
AddFileEntry expected = expectedDataFiles.get(i);
AddFileEntry actual = dataFilesWithFixedVersion.get(i);
assertEquals(expected.getPath(), actual.getPath());
assertEquals(expected.getPartitionValues(), actual.getPartitionValues());
assertEquals(expected.getSize(), actual.getSize());
assertEquals(expected.getModificationTime(), actual.getModificationTime());
assertEquals(expected.isDataChange(), actual.isDataChange());
assertEquals(expected.getTags(), actual.getTags());
assertTrue(expected.getStats().isPresent());
assertTrue(actual.getStats().isPresent());
for (ColumnMetadata column : columns) {
DeltaLakeColumnHandle columnHandle = new DeltaLakeColumnHandle(column.getName(), column.getType(), REGULAR);
assertEquals(expected.getStats().get().getMinColumnValue(columnHandle), actual.getStats().get().getMinColumnValue(columnHandle));
assertEquals(expected.getStats().get().getMaxColumnValue(columnHandle), actual.getStats().get().getMaxColumnValue(columnHandle));
assertEquals(expected.getStats().get().getNullCount(columnHandle.getName()), actual.getStats().get().getNullCount(columnHandle.getName()));
assertEquals(expected.getStats().get().getNumRecords(), actual.getStats().get().getNumRecords());
}
}
}
use of io.trino.plugin.deltalake.transactionlog.TableSnapshot in project trino by trinodb.
the class TestTransactionLogAccess method testMetadataCacheUpdates.
@Test
public void testMetadataCacheUpdates() throws Exception {
String tableName = "person";
// setupTransactionLogAccess(tableName, new Path(getClass().getClassLoader().getResource("databricks/" + tableName).toURI()));
File tempDir = Files.createTempDir();
File tableDir = new File(tempDir, tableName);
File transactionLogDir = new File(tableDir, TRANSACTION_LOG_DIRECTORY);
transactionLogDir.mkdirs();
java.nio.file.Path resourceDir = java.nio.file.Paths.get(getClass().getClassLoader().getResource("databricks/person/_delta_log").toURI());
for (int i = 0; i < 12; i++) {
String extension = i == 10 ? ".checkpoint.parquet" : ".json";
String fileName = format("%020d%s", i, extension);
Files.copy(resourceDir.resolve(fileName).toFile(), new File(transactionLogDir, fileName));
}
Files.copy(resourceDir.resolve(LAST_CHECKPOINT_FILENAME).toFile(), new File(transactionLogDir, LAST_CHECKPOINT_FILENAME));
setupTransactionLogAccess(tableName, new Path(tableDir.toURI()));
assertEquals(tableSnapshot.getVersion(), 11L);
String lastTransactionName = format("%020d.json", 12);
Files.copy(resourceDir.resolve(lastTransactionName).toFile(), new File(transactionLogDir, lastTransactionName));
TableSnapshot updatedSnapshot = transactionLogAccess.loadSnapshot(new SchemaTableName("schema", tableName), new Path(tableDir.toURI()), SESSION);
assertEquals(updatedSnapshot.getVersion(), 12);
}
use of io.trino.plugin.deltalake.transactionlog.TableSnapshot in project trino by trinodb.
the class TestTransactionLogAccess method testAddNewTransactionLogs.
@Test
public void testAddNewTransactionLogs() throws Exception {
String tableName = "person";
File tempDir = Files.createTempDir();
File tableDir = new File(tempDir, tableName);
File transactionLogDir = new File(tableDir, TRANSACTION_LOG_DIRECTORY);
transactionLogDir.mkdirs();
Path tableLocation = new Path(tableDir.toURI());
SchemaTableName schemaTableName = new SchemaTableName("schema", tableName);
File resourceDir = new File(getClass().getClassLoader().getResource("databricks/person/_delta_log").toURI());
copyTransactionLogEntry(0, 1, resourceDir, transactionLogDir);
setupTransactionLogAccess(tableName, tableLocation);
assertEquals(tableSnapshot.getVersion(), 0L);
copyTransactionLogEntry(1, 2, resourceDir, transactionLogDir);
TableSnapshot firstUpdate = transactionLogAccess.loadSnapshot(schemaTableName, tableLocation, SESSION);
assertEquals(firstUpdate.getVersion(), 1L);
copyTransactionLogEntry(2, 3, resourceDir, transactionLogDir);
TableSnapshot secondUpdate = transactionLogAccess.loadSnapshot(schemaTableName, tableLocation, SESSION);
assertEquals(secondUpdate.getVersion(), 2L);
}
use of io.trino.plugin.deltalake.transactionlog.TableSnapshot in project trino by trinodb.
the class HiveMetastoreBackedDeltaLakeMetastore method createTable.
@Override
public void createTable(ConnectorSession session, Table table, PrincipalPrivileges principalPrivileges) {
String tableLocation = table.getStorage().getLocation();
statisticsAccess.invalidateCache(tableLocation);
transactionLogAccess.invalidateCaches(tableLocation);
try {
TableSnapshot tableSnapshot = transactionLogAccess.loadSnapshot(table.getSchemaTableName(), new Path(tableLocation), session);
Optional<MetadataEntry> maybeMetadata = transactionLogAccess.getMetadataEntry(tableSnapshot, session);
if (maybeMetadata.isEmpty()) {
throw new TrinoException(DELTA_LAKE_INVALID_TABLE, "Provided location did not contain a valid Delta Lake table: " + tableLocation);
}
} catch (IOException e) {
throw new TrinoException(DELTA_LAKE_INVALID_TABLE, "Failed to access table location: " + tableLocation, e);
}
delegate.createTable(table, principalPrivileges);
}
Aggregations