Search in sources :

Example 16 with ConnectorTableMetadata

use of io.trino.spi.connector.ConnectorTableMetadata in project trino by trinodb.

the class AbstractTestHiveFileSystem method createTable.

private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
    List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
    MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write the records
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
        // Hack to work around the metastore not being configured for S3 or other FS.
        // The metastore tries to validate the location when creating the
        // table, which fails without explicit configuration for file system.
        // We work around that by using a dummy location when creating the
        // table and update it here to the correct location.
        metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).getTargetPath().toString());
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // load the new table
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the metadata
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
        assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
        // verify the data
        metadata.beginQuery(session);
        ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
        ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
        try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
            MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
            assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
        }
        metadata.cleanupQuery(session);
    }
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) AbstractTestHive.filterNonHiddenColumnMetadata(io.trino.plugin.hive.AbstractTestHive.filterNonHiddenColumnMetadata) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Transaction(io.trino.plugin.hive.AbstractTestHive.Transaction) HiveTransaction(io.trino.plugin.hive.AbstractTestHive.HiveTransaction) Slice(io.airlift.slice.Slice) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 17 with ConnectorTableMetadata

use of io.trino.spi.connector.ConnectorTableMetadata in project trino by trinodb.

the class AbstractTestHive method doInsert.

private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    // creating the table
    doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS);
    MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes());
    for (int i = 0; i < 3; i++) {
        insertData(tableName, CREATE_TABLE_DATA);
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            metadata.beginQuery(session);
            // load the new table
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
            // verify the metadata
            ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
            assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
            // verify the data
            resultBuilder.rows(CREATE_TABLE_DATA.getMaterializedRows());
            MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
            assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
            // statistics
            HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName);
            assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * (i + 1));
            assertEquals(tableStatistics.getFileCount().getAsLong(), i + 1L);
            assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
            assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
        }
    }
    // test rollback
    Set<String> existingFiles;
    try (Transaction transaction = newTransaction()) {
        existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
    }
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        // "stage" insert data
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        sink.appendPage(CREATE_TABLE_DATA.toPage());
        sink.appendPage(CREATE_TABLE_DATA.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // statistics, visible from within transaction
        HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName);
        assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 5L);
        try (Transaction otherTransaction = newTransaction()) {
            // statistics, not visible from outside transaction
            HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(otherTransaction, tableName);
            assertEquals(otherTableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
        }
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify all temp files start with the unique prefix
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        HdfsContext context = new HdfsContext(session);
        Set<String> tempFiles = listAllDataFiles(context, stagingPathRoot);
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // rollback insert
        transaction.rollback();
    }
    // verify temp directory is empty
    HdfsContext context = new HdfsContext(newSession());
    assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
    // verify the data is unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
    }
    // verify statistics unchanged
    try (Transaction transaction = newTransaction()) {
        HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName);
        assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
        assertEquals(statistics.getFileCount().getAsLong(), 3L);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Constraint(io.trino.spi.connector.Constraint) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 18 with ConnectorTableMetadata

use of io.trino.spi.connector.ConnectorTableMetadata in project trino by trinodb.

the class AbstractTestHive method doCreateTable.

protected void doCreateTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
    String queryId;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        queryId = session.getQueryId();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, CREATE_TABLE_COLUMNS, createTableProperties(storageFormat));
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write the data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
        sink.appendPage(CREATE_TABLE_DATA.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // verify all new files start with the unique prefix
        HdfsContext context = new HdfsContext(session);
        for (String filePath : listAllDataFiles(context, getStagingPathRoot(outputHandle))) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // commit the table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        // load the new table
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the metadata
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
        assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
        // verify the data
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
        assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_DATA.getMaterializedRows());
        // verify the node version and query ID in table
        Table table = getMetastoreClient().getTable(tableName.getSchemaName(), tableName.getTableName()).get();
        assertEquals(table.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
        assertEquals(table.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
        // verify basic statistics
        HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName);
        assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount());
        assertEquals(statistics.getFileCount().getAsLong(), 1L);
        assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
        assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) Table(io.trino.plugin.hive.metastore.Table) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 19 with ConnectorTableMetadata

use of io.trino.spi.connector.ConnectorTableMetadata in project trino by trinodb.

the class AbstractTestHive method testGetRecords.

@Test
public void testGetRecords() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
        assertEquals(splits.size(), tablePartitionFormatPartitions.size());
        for (ConnectorSplit split : splits) {
            HiveSplit hiveSplit = (HiveSplit) split;
            List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
            String ds = partitionKeys.get(0).getValue();
            String fileFormat = partitionKeys.get(1).getValue();
            HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH));
            int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue());
            long rowNumber = 0;
            long completedBytes = 0;
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
                MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
                assertPageSourceType(pageSource, fileType);
                for (MaterializedRow row : result) {
                    try {
                        assertValueTypes(row, tableMetadata.getColumns());
                    } catch (RuntimeException e) {
                        throw new RuntimeException("row " + rowNumber, e);
                    }
                    rowNumber++;
                    Object value;
                    value = row.getField(columnIndex.get("t_string"));
                    if (rowNumber % 19 == 0) {
                        assertNull(value);
                    } else if (rowNumber % 19 == 1) {
                        assertEquals(value, "");
                    } else {
                        assertEquals(value, "test");
                    }
                    assertEquals(row.getField(columnIndex.get("t_tinyint")), (byte) (1 + rowNumber));
                    assertEquals(row.getField(columnIndex.get("t_smallint")), (short) (2 + rowNumber));
                    assertEquals(row.getField(columnIndex.get("t_int")), 3 + (int) rowNumber);
                    if (rowNumber % 13 == 0) {
                        assertNull(row.getField(columnIndex.get("t_bigint")));
                    } else {
                        assertEquals(row.getField(columnIndex.get("t_bigint")), 4 + rowNumber);
                    }
                    assertEquals((Float) row.getField(columnIndex.get("t_float")), 5.1f + rowNumber, 0.001);
                    assertEquals(row.getField(columnIndex.get("t_double")), 6.2 + rowNumber);
                    if (rowNumber % 3 == 2) {
                        assertNull(row.getField(columnIndex.get("t_boolean")));
                    } else {
                        assertEquals(row.getField(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
                    }
                    assertEquals(row.getField(columnIndex.get("ds")), ds);
                    assertEquals(row.getField(columnIndex.get("file_format")), fileFormat);
                    assertEquals(row.getField(columnIndex.get("dummy")), dummyPartition);
                    long newCompletedBytes = pageSource.getCompletedBytes();
                    assertTrue(newCompletedBytes >= completedBytes);
                    assertTrue(newCompletedBytes <= hiveSplit.getLength());
                    completedBytes = newCompletedBytes;
                }
                assertTrue(completedBytes <= hiveSplit.getLength());
                assertEquals(rowNumber, 100);
            }
        }
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) Constraint(io.trino.spi.connector.Constraint) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) MaterializedRow(io.trino.testing.MaterializedRow) Test(org.testng.annotations.Test)

Example 20 with ConnectorTableMetadata

use of io.trino.spi.connector.ConnectorTableMetadata in project trino by trinodb.

the class AbstractTestHive method testGetTableSchemaOfflinePartition.

@Test
public void testGetTableSchemaOfflinePartition() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableOfflinePartition);
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(newSession(), tableHandle);
        Map<String, ColumnMetadata> map = uniqueIndex(tableMetadata.getColumns(), ColumnMetadata::getName);
        assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false);
    }
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Aggregations

ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)113 SchemaTableName (io.trino.spi.connector.SchemaTableName)69 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)62 Test (org.testng.annotations.Test)48 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)39 ConnectorSession (io.trino.spi.connector.ConnectorSession)35 ImmutableList (com.google.common.collect.ImmutableList)34 List (java.util.List)32 ImmutableMap (com.google.common.collect.ImmutableMap)31 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)30 ColumnHandle (io.trino.spi.connector.ColumnHandle)25 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)24 ConnectorOutputTableHandle (io.trino.spi.connector.ConnectorOutputTableHandle)20 Optional (java.util.Optional)20 TrinoException (io.trino.spi.TrinoException)19 Map (java.util.Map)19 Type (io.trino.spi.type.Type)18 CatalogSchemaTableName (io.trino.spi.connector.CatalogSchemaTableName)17 Constraint (io.trino.spi.connector.Constraint)17 ImmutableSet (com.google.common.collect.ImmutableSet)16