use of io.trino.spi.connector.ConnectorInsertTableHandle in project trino by trinodb.
the class AbstractTestHive method doInsertIntoNewPartition.
private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
// insert the data
String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
// verify partitions were created
Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toImmutableList()));
// verify the node versions in partitions
Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(table, partitionNames);
assertEquals(partitions.size(), partitionNames.size());
for (String partitionName : partitionNames) {
Partition partition = partitions.get(partitionName).get();
assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
}
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// test rollback
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session);
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// rollback insert
transaction.rollback();
}
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext context = new HdfsContext(session);
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
}
}
use of io.trino.spi.connector.ConnectorInsertTableHandle in project trino by trinodb.
the class AbstractTestHive method doInsertOverwriteUnpartitioned.
private void doInsertOverwriteUnpartitioned(SchemaTableName tableName) throws Exception {
// create table with data
doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS);
insertData(tableName, CREATE_TABLE_DATA);
// overwrite table with new data
MaterializedResult.Builder overwriteDataBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes());
MaterializedResult overwriteData = null;
Map<String, Object> overwriteProperties = ImmutableMap.of("insert_existing_partitions_behavior", "OVERWRITE");
for (int i = 0; i < 3; i++) {
overwriteDataBuilder.rows(reverse(CREATE_TABLE_DATA.getMaterializedRows()));
overwriteData = overwriteDataBuilder.build();
insertData(tableName, overwriteData, overwriteProperties);
// verify overwrite
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows());
// statistics
HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName);
assertEquals(tableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount());
assertEquals(tableStatistics.getFileCount().getAsLong(), 1L);
assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
// test rollback
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession(overwriteProperties);
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
for (int i = 0; i < 4; i++) {
sink.appendPage(overwriteData.toPage());
}
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// statistics, visible from within transaction
HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName);
assertEquals(tableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount() * 4L);
try (Transaction otherTransaction = newTransaction()) {
// statistics, not visible from outside transaction
HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(otherTransaction, tableName);
assertEquals(otherTableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount());
}
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify all temp files start with the unique prefix
stagingPathRoot = getStagingPathRoot(insertTableHandle);
HdfsContext context = new HdfsContext(session);
Set<String> tempFiles = listAllDataFiles(context, stagingPathRoot);
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// rollback insert
transaction.rollback();
}
// verify temp directory is empty
HdfsContext context = new HdfsContext(newSession());
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
}
// verify statistics unchanged
try (Transaction transaction = newTransaction()) {
HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName);
assertEquals(statistics.getRowCount().getAsLong(), overwriteData.getRowCount());
assertEquals(statistics.getFileCount().getAsLong(), 1L);
}
}
use of io.trino.spi.connector.ConnectorInsertTableHandle in project trino by trinodb.
the class IcebergMetadata method finishRefreshMaterializedView.
@Override
public Optional<ConnectorOutputMetadata> finishRefreshMaterializedView(ConnectorSession session, ConnectorTableHandle tableHandle, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<ConnectorTableHandle> sourceTableHandles) {
// delete before insert .. simulating overwrite
executeDelete(session, tableHandle);
IcebergWritableTableHandle table = (IcebergWritableTableHandle) insertHandle;
Table icebergTable = transaction.table();
List<CommitTaskData> commitTasks = fragments.stream().map(slice -> commitTaskCodec.fromJson(slice.getBytes())).collect(toImmutableList());
Type[] partitionColumnTypes = icebergTable.spec().fields().stream().map(field -> field.transform().getResultType(icebergTable.schema().findType(field.sourceId()))).toArray(Type[]::new);
AppendFiles appendFiles = transaction.newFastAppend();
ImmutableSet.Builder<String> writtenFiles = ImmutableSet.builder();
for (CommitTaskData task : commitTasks) {
DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()).withPath(task.getPath()).withFileSizeInBytes(task.getFileSizeInBytes()).withFormat(table.getFileFormat().toIceberg()).withMetrics(task.getMetrics().metrics());
if (!icebergTable.spec().fields().isEmpty()) {
String partitionDataJson = task.getPartitionDataJson().orElseThrow(() -> new VerifyException("No partition data for partitioned table"));
builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes));
}
appendFiles.appendFile(builder.build());
writtenFiles.add(task.getPath());
}
String dependencies = sourceTableHandles.stream().map(handle -> (IcebergTableHandle) handle).filter(handle -> handle.getSnapshotId().isPresent()).map(handle -> handle.getSchemaTableName() + "=" + handle.getSnapshotId().get()).distinct().collect(joining(","));
// try to leave as little garbage as possible behind
if (table.getRetryMode() != NO_RETRIES) {
cleanExtraOutputFiles(session, writtenFiles.build());
}
// Update the 'dependsOnTables' property that tracks tables on which the materialized view depends and the corresponding snapshot ids of the tables
appendFiles.set(DEPENDS_ON_TABLES, dependencies);
appendFiles.commit();
transaction.commitTransaction();
transaction = null;
return Optional.of(new HiveWrittenPartitions(commitTasks.stream().map(CommitTaskData::getPath).collect(toImmutableList())));
}
use of io.trino.spi.connector.ConnectorInsertTableHandle in project trino by trinodb.
the class DeltaLakeMetadata method beginInsert.
@Override
public ConnectorInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> columns) {
DeltaLakeTableHandle table = (DeltaLakeTableHandle) tableHandle;
if (!allowWrite(session, table)) {
String fileSystem = new Path(table.getLocation()).toUri().getScheme();
throw new TrinoException(NOT_SUPPORTED, format("Inserts are not supported on the %s filesystem", fileSystem));
}
checkSupportedWriterVersion(session, table.getSchemaTableName());
List<DeltaLakeColumnHandle> inputColumns = columns.stream().map(handle -> (DeltaLakeColumnHandle) handle).collect(toImmutableList());
ConnectorTableMetadata tableMetadata = getTableMetadata(session, table);
// This check acts as a safeguard in cases where the input columns may differ from the table metadata case-sensitively
checkAllColumnsPassedOnInsert(tableMetadata, inputColumns);
String tableLocation = getLocation(tableMetadata.getProperties());
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(tableLocation));
return new DeltaLakeInsertTableHandle(table.getSchemaName(), table.getTableName(), tableLocation, table.getMetadataEntry(), inputColumns, getMandatoryCurrentVersion(fileSystem, new Path(tableLocation)));
} catch (IOException e) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
}
}
use of io.trino.spi.connector.ConnectorInsertTableHandle in project trino by trinodb.
the class AbstractTestHive method insertData.
private String insertData(SchemaTableName tableName, MaterializedResult data, Map<String, Object> sessionProperties) throws Exception {
Path writePath;
Path targetPath;
String queryId;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession(sessionProperties);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
queryId = session.getQueryId();
writePath = getStagingPathRoot(insertTableHandle);
targetPath = getTargetPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
// write data
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// commit the insert
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
transaction.commit();
}
// check that temporary files are removed
if (!writePath.equals(targetPath)) {
HdfsContext context = new HdfsContext(newSession());
FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, writePath);
assertFalse(fileSystem.exists(writePath));
}
return queryId;
}
Aggregations