Search in sources :

Example 6 with ConnectorPageSink

use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.

the class TestHiveGlueMetastore method testUpdatePartitionedStatisticsOnCreate.

@Test
public void testUpdatePartitionedStatisticsOnCreate() {
    SchemaTableName tableName = temporaryTable("update_partitioned_statistics_create");
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT), new ColumnMetadata("part_column", BigintType.BIGINT));
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE, ImmutableList.of("part_column")));
        ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
        MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT, BigintType.BIGINT).row(1L, 1L).row(2L, 1L).row(3L, 1L).row(4L, 2L).row(5L, 2L).build();
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // prepare statistics
        ComputedStatistics statistics1 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(1))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(3)).build();
        ComputedStatistics statistics2 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(2))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(4)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(2)).build();
        // finish CTAS
        metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics1, statistics2));
        transaction.commit();
    } finally {
        dropTable(tableName);
    }
}
Also used : ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Example 7 with ConnectorPageSink

use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.

the class TestHiveGlueMetastore method testUpdateStatisticsOnCreate.

@Test
public void testUpdateStatisticsOnCreate() {
    SchemaTableName tableName = temporaryTable("update_statistics_create");
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT));
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE));
        ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
        MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT).row(1L).row(2L).row(3L).row(4L).row(5L).build();
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // prepare statistics
        ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(5)).build();
        // finish CTAS
        metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics));
        transaction.commit();
    } finally {
        dropTable(tableName);
    }
}
Also used : ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Example 8 with ConnectorPageSink

use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.

the class TestRaptorConnector method assertSplitShard.

private void assertSplitShard(Type temporalType, String min, String max, int expectedSplits) throws Exception {
    ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(new RaptorSessionProperties(new StorageManagerConfig()).getSessionProperties()).build();
    ConnectorTransactionHandle transaction = beginTransaction();
    connector.getMetadata(SESSION, transaction).createTable(SESSION, new ConnectorTableMetadata(new SchemaTableName("test", "test"), ImmutableList.of(new ColumnMetadata("id", BIGINT), new ColumnMetadata("time", temporalType)), ImmutableMap.of(TEMPORAL_COLUMN_PROPERTY, "time")), false);
    connector.commit(transaction);
    ConnectorTransactionHandle txn1 = beginTransaction();
    ConnectorTableHandle handle1 = getTableHandle(connector.getMetadata(SESSION, txn1), "test");
    ConnectorInsertTableHandle insertTableHandle = connector.getMetadata(SESSION, txn1).beginInsert(session, handle1);
    ConnectorPageSink raptorPageSink = connector.getPageSinkProvider().createPageSink(txn1, session, insertTableHandle);
    Object timestamp1 = null;
    Object timestamp2 = null;
    if (temporalType.equals(TIMESTAMP_MILLIS)) {
        timestamp1 = SqlTimestamp.newInstance(3, castToShortTimestamp(TIMESTAMP_MILLIS.getPrecision(), min), 0);
        timestamp2 = SqlTimestamp.newInstance(3, castToShortTimestamp(TIMESTAMP_MILLIS.getPrecision(), max), 0);
    } else if (temporalType.equals(DATE)) {
        timestamp1 = new SqlDate(parseDate(min));
        timestamp2 = new SqlDate(parseDate(max));
    }
    Page inputPage = MaterializedResult.resultBuilder(session, ImmutableList.of(BIGINT, temporalType)).row(1L, timestamp1).row(2L, timestamp2).build().toPage();
    raptorPageSink.appendPage(inputPage);
    Collection<Slice> shards = raptorPageSink.finish().get();
    assertEquals(shards.size(), expectedSplits);
    connector.getMetadata(session, txn1).dropTable(session, handle1);
    connector.commit(txn1);
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Page(io.trino.spi.Page) SchemaTableName(io.trino.spi.connector.SchemaTableName) StorageManagerConfig(io.trino.plugin.raptor.legacy.storage.StorageManagerConfig) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Slice(io.airlift.slice.Slice) SqlDate(io.trino.spi.type.SqlDate) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 9 with ConnectorPageSink

use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.

the class TestMemoryPagesStore method insertToTable.

private void insertToTable(long tableId, Page page, Long... activeTableIds) {
    ConnectorPageSink pageSink = pageSinkProvider.createPageSink(MemoryTransactionHandle.INSTANCE, SESSION, createMemoryInsertTableHandle(tableId, activeTableIds));
    pageSink.appendPage(page);
    pageSink.finish();
}
Also used : ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink)

Example 10 with ConnectorPageSink

use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.

the class AbstractTestHive method doInsertIntoExistingPartition.

private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    // creating the table
    doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
    MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes());
    for (int i = 0; i < 3; i++) {
        // insert the data
        insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            metadata.beginQuery(session);
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            // verify partitions were created
            List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
            assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toImmutableList()));
            // load the new table
            List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
            // verify the data
            resultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
            MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
            assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
            // test statistics
            for (String partitionName : partitionNames) {
                HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, partitionName);
                assertEquals(statistics.getRowCount().getAsLong(), i + 1L);
                assertEquals(statistics.getFileCount().getAsLong(), i + 1L);
                assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
                assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
            }
        }
    }
    // test rollback
    Set<String> existingFiles;
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        // "stage" insert data
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
        sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // verify all temp files start with the unique prefix
        HdfsContext context = new HdfsContext(session);
        Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // verify statistics are visible from within of the current transaction
        List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
        for (String partitionName : partitionNames) {
            HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName);
            assertEquals(partitionStatistics.getRowCount().getAsLong(), 5L);
        }
        // rollback insert
        transaction.rollback();
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the data is unchanged
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify temp directory is empty
        HdfsContext hdfsContext = new HdfsContext(session);
        assertTrue(listAllDataFiles(hdfsContext, stagingPathRoot).isEmpty());
        // verify statistics have been rolled back
        List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
        for (String partitionName : partitionNames) {
            HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName);
            assertEquals(partitionStatistics.getRowCount().getAsLong(), 3L);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Constraint(io.trino.spi.connector.Constraint) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink)

Aggregations

ConnectorPageSink (io.trino.spi.connector.ConnectorPageSink)18 ConnectorSession (io.trino.spi.connector.ConnectorSession)15 Slice (io.airlift.slice.Slice)14 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)13 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)12 MaterializedResult (io.trino.testing.MaterializedResult)12 Path (org.apache.hadoop.fs.Path)12 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)11 TestingConnectorSession (io.trino.testing.TestingConnectorSession)11 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)10 HiveSessionProperties.getTemporaryStagingDirectoryPath (io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath)10 ColumnHandle (io.trino.spi.connector.ColumnHandle)9 HiveColumnHandle.bucketColumnHandle (io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle)8 ConnectorInsertTableHandle (io.trino.spi.connector.ConnectorInsertTableHandle)8 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)8 Test (org.testng.annotations.Test)7 Page (io.trino.spi.Page)6 ConnectorOutputTableHandle (io.trino.spi.connector.ConnectorOutputTableHandle)6 Constraint (io.trino.spi.connector.Constraint)6 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)5