use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.
the class TestHiveGlueMetastore method testUpdatePartitionedStatisticsOnCreate.
@Test
public void testUpdatePartitionedStatisticsOnCreate() {
SchemaTableName tableName = temporaryTable("update_partitioned_statistics_create");
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT), new ColumnMetadata("part_column", BigintType.BIGINT));
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE, ImmutableList.of("part_column")));
ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
// write data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT, BigintType.BIGINT).row(1L, 1L).row(2L, 1L).row(3L, 1L).row(4L, 2L).row(5L, 2L).build();
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// prepare statistics
ComputedStatistics statistics1 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(1))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(3)).build();
ComputedStatistics statistics2 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(2))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(4)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(2)).build();
// finish CTAS
metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics1, statistics2));
transaction.commit();
} finally {
dropTable(tableName);
}
}
use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.
the class TestHiveGlueMetastore method testUpdateStatisticsOnCreate.
@Test
public void testUpdateStatisticsOnCreate() {
SchemaTableName tableName = temporaryTable("update_statistics_create");
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT));
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE));
ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
// write data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT).row(1L).row(2L).row(3L).row(4L).row(5L).build();
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// prepare statistics
ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(5)).build();
// finish CTAS
metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics));
transaction.commit();
} finally {
dropTable(tableName);
}
}
use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.
the class TestRaptorConnector method assertSplitShard.
private void assertSplitShard(Type temporalType, String min, String max, int expectedSplits) throws Exception {
ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(new RaptorSessionProperties(new StorageManagerConfig()).getSessionProperties()).build();
ConnectorTransactionHandle transaction = beginTransaction();
connector.getMetadata(SESSION, transaction).createTable(SESSION, new ConnectorTableMetadata(new SchemaTableName("test", "test"), ImmutableList.of(new ColumnMetadata("id", BIGINT), new ColumnMetadata("time", temporalType)), ImmutableMap.of(TEMPORAL_COLUMN_PROPERTY, "time")), false);
connector.commit(transaction);
ConnectorTransactionHandle txn1 = beginTransaction();
ConnectorTableHandle handle1 = getTableHandle(connector.getMetadata(SESSION, txn1), "test");
ConnectorInsertTableHandle insertTableHandle = connector.getMetadata(SESSION, txn1).beginInsert(session, handle1);
ConnectorPageSink raptorPageSink = connector.getPageSinkProvider().createPageSink(txn1, session, insertTableHandle);
Object timestamp1 = null;
Object timestamp2 = null;
if (temporalType.equals(TIMESTAMP_MILLIS)) {
timestamp1 = SqlTimestamp.newInstance(3, castToShortTimestamp(TIMESTAMP_MILLIS.getPrecision(), min), 0);
timestamp2 = SqlTimestamp.newInstance(3, castToShortTimestamp(TIMESTAMP_MILLIS.getPrecision(), max), 0);
} else if (temporalType.equals(DATE)) {
timestamp1 = new SqlDate(parseDate(min));
timestamp2 = new SqlDate(parseDate(max));
}
Page inputPage = MaterializedResult.resultBuilder(session, ImmutableList.of(BIGINT, temporalType)).row(1L, timestamp1).row(2L, timestamp2).build().toPage();
raptorPageSink.appendPage(inputPage);
Collection<Slice> shards = raptorPageSink.finish().get();
assertEquals(shards.size(), expectedSplits);
connector.getMetadata(session, txn1).dropTable(session, handle1);
connector.commit(txn1);
}
use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.
the class TestMemoryPagesStore method insertToTable.
private void insertToTable(long tableId, Page page, Long... activeTableIds) {
ConnectorPageSink pageSink = pageSinkProvider.createPageSink(MemoryTransactionHandle.INSTANCE, SESSION, createMemoryInsertTableHandle(tableId, activeTableIds));
pageSink.appendPage(page);
pageSink.finish();
}
use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.
the class AbstractTestHive method doInsertIntoExistingPartition.
private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes());
for (int i = 0; i < 3; i++) {
// insert the data
insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// verify partitions were created
List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toImmutableList()));
// load the new table
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
resultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, partitionName);
assertEquals(statistics.getRowCount().getAsLong(), i + 1L);
assertEquals(statistics.getFileCount().getAsLong(), i + 1L);
assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
}
// test rollback
Set<String> existingFiles;
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session);
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// verify statistics are visible from within of the current transaction
List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 5L);
}
// rollback insert
transaction.rollback();
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data is unchanged
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext hdfsContext = new HdfsContext(session);
assertTrue(listAllDataFiles(hdfsContext, stagingPathRoot).isEmpty());
// verify statistics have been rolled back
List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 3L);
}
}
}
Aggregations