Search in sources :

Example 26 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class AbstractTestHive method doInsert.

private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    // creating the table
    doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS);
    MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes());
    for (int i = 0; i < 3; i++) {
        insertData(tableName, CREATE_TABLE_DATA);
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            metadata.beginQuery(session);
            // load the new table
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
            // verify the metadata
            ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
            assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
            // verify the data
            resultBuilder.rows(CREATE_TABLE_DATA.getMaterializedRows());
            MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
            assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
            // statistics
            HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName);
            assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * (i + 1));
            assertEquals(tableStatistics.getFileCount().getAsLong(), i + 1L);
            assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
            assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
        }
    }
    // test rollback
    Set<String> existingFiles;
    try (Transaction transaction = newTransaction()) {
        existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
    }
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        // "stage" insert data
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        sink.appendPage(CREATE_TABLE_DATA.toPage());
        sink.appendPage(CREATE_TABLE_DATA.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // statistics, visible from within transaction
        HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName);
        assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 5L);
        try (Transaction otherTransaction = newTransaction()) {
            // statistics, not visible from outside transaction
            HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(otherTransaction, tableName);
            assertEquals(otherTableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
        }
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify all temp files start with the unique prefix
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        HdfsContext context = new HdfsContext(session);
        Set<String> tempFiles = listAllDataFiles(context, stagingPathRoot);
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // rollback insert
        transaction.rollback();
    }
    // verify temp directory is empty
    HdfsContext context = new HdfsContext(newSession());
    assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
    // verify the data is unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
    }
    // verify statistics unchanged
    try (Transaction transaction = newTransaction()) {
        HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName);
        assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
        assertEquals(statistics.getFileCount().getAsLong(), 3L);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Constraint(io.trino.spi.connector.Constraint) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 27 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class AbstractTestHive method doCreateTable.

protected void doCreateTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
    String queryId;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        queryId = session.getQueryId();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, CREATE_TABLE_COLUMNS, createTableProperties(storageFormat));
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write the data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
        sink.appendPage(CREATE_TABLE_DATA.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // verify all new files start with the unique prefix
        HdfsContext context = new HdfsContext(session);
        for (String filePath : listAllDataFiles(context, getStagingPathRoot(outputHandle))) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // commit the table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        // load the new table
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the metadata
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
        assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
        // verify the data
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
        assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_DATA.getMaterializedRows());
        // verify the node version and query ID in table
        Table table = getMetastoreClient().getTable(tableName.getSchemaName(), tableName.getTableName()).get();
        assertEquals(table.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
        assertEquals(table.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
        // verify basic statistics
        HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName);
        assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount());
        assertEquals(statistics.getFileCount().getAsLong(), 1L);
        assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
        assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) Table(io.trino.plugin.hive.metastore.Table) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 28 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class AbstractTestHive method assertEmptyFile.

private void assertEmptyFile(HiveStorageFormat format) throws Exception {
    SchemaTableName tableName = temporaryTable("empty_file");
    try {
        List<Column> columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty()));
        createEmptyTable(tableName, format, columns, ImmutableList.of());
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            metadata.beginQuery(session);
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
            Table table = transaction.getMetastore().getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(AssertionError::new);
            // verify directory is empty
            HdfsContext context = new HdfsContext(session);
            Path location = new Path(table.getStorage().getLocation());
            assertTrue(listDirectory(context, location).isEmpty());
            // read table with empty directory
            readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.of(0), Optional.of(ORC));
            // create empty file
            FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, location);
            assertTrue(fileSystem.createNewFile(new Path(location, "empty-file")));
            assertEquals(listDirectory(context, location), ImmutableList.of("empty-file"));
            // read table with empty file
            MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.of(0), Optional.empty());
            assertEquals(result.getRowCount(), 0);
        }
    } finally {
        dropTable(tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) Table(io.trino.plugin.hive.metastore.Table) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Column(io.trino.plugin.hive.metastore.Column) ViewColumn(io.trino.spi.connector.ConnectorViewDefinition.ViewColumn) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.trino.testing.MaterializedResult)

Example 29 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class AbstractTestHive method testGetRecords.

@Test
public void testGetRecords() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
        assertEquals(splits.size(), tablePartitionFormatPartitions.size());
        for (ConnectorSplit split : splits) {
            HiveSplit hiveSplit = (HiveSplit) split;
            List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
            String ds = partitionKeys.get(0).getValue();
            String fileFormat = partitionKeys.get(1).getValue();
            HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH));
            int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue());
            long rowNumber = 0;
            long completedBytes = 0;
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
                MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
                assertPageSourceType(pageSource, fileType);
                for (MaterializedRow row : result) {
                    try {
                        assertValueTypes(row, tableMetadata.getColumns());
                    } catch (RuntimeException e) {
                        throw new RuntimeException("row " + rowNumber, e);
                    }
                    rowNumber++;
                    Object value;
                    value = row.getField(columnIndex.get("t_string"));
                    if (rowNumber % 19 == 0) {
                        assertNull(value);
                    } else if (rowNumber % 19 == 1) {
                        assertEquals(value, "");
                    } else {
                        assertEquals(value, "test");
                    }
                    assertEquals(row.getField(columnIndex.get("t_tinyint")), (byte) (1 + rowNumber));
                    assertEquals(row.getField(columnIndex.get("t_smallint")), (short) (2 + rowNumber));
                    assertEquals(row.getField(columnIndex.get("t_int")), 3 + (int) rowNumber);
                    if (rowNumber % 13 == 0) {
                        assertNull(row.getField(columnIndex.get("t_bigint")));
                    } else {
                        assertEquals(row.getField(columnIndex.get("t_bigint")), 4 + rowNumber);
                    }
                    assertEquals((Float) row.getField(columnIndex.get("t_float")), 5.1f + rowNumber, 0.001);
                    assertEquals(row.getField(columnIndex.get("t_double")), 6.2 + rowNumber);
                    if (rowNumber % 3 == 2) {
                        assertNull(row.getField(columnIndex.get("t_boolean")));
                    } else {
                        assertEquals(row.getField(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
                    }
                    assertEquals(row.getField(columnIndex.get("ds")), ds);
                    assertEquals(row.getField(columnIndex.get("file_format")), fileFormat);
                    assertEquals(row.getField(columnIndex.get("dummy")), dummyPartition);
                    long newCompletedBytes = pageSource.getCompletedBytes();
                    assertTrue(newCompletedBytes >= completedBytes);
                    assertTrue(newCompletedBytes <= hiveSplit.getLength());
                    completedBytes = newCompletedBytes;
                }
                assertTrue(completedBytes <= hiveSplit.getLength());
                assertEquals(rowNumber, 100);
            }
        }
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) Constraint(io.trino.spi.connector.Constraint) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) MaterializedRow(io.trino.testing.MaterializedRow) Test(org.testng.annotations.Test)

Example 30 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class AbstractTestHive method doTestBucketedTableValidation.

private void doTestBucketedTableValidation(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    prepareInvalidBuckets(storageFormat, tableName);
    // read succeeds when validation is disabled
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession(ImmutableMap.of("validate_bucketing", false));
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
        // fewer rows due to deleted file
        assertEquals(result.getRowCount(), 87);
    }
    // read fails due to validation failure
    assertReadFailsWithMessageMatching(storageFormat, tableName, "Hive table is corrupt\\. File '.*/000002_0_.*' is for bucket 2, but contains a row for bucket 5.");
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Aggregations

ColumnHandle (io.trino.spi.connector.ColumnHandle)268 Test (org.testng.annotations.Test)121 ImmutableList (com.google.common.collect.ImmutableList)106 TupleDomain (io.trino.spi.predicate.TupleDomain)104 ImmutableMap (com.google.common.collect.ImmutableMap)93 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)86 Domain (io.trino.spi.predicate.Domain)86 Map (java.util.Map)78 ConnectorSession (io.trino.spi.connector.ConnectorSession)76 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)71 Optional (java.util.Optional)70 List (java.util.List)69 SchemaTableName (io.trino.spi.connector.SchemaTableName)66 Constraint (io.trino.spi.connector.Constraint)61 Objects.requireNonNull (java.util.Objects.requireNonNull)53 ImmutableSet (com.google.common.collect.ImmutableSet)51 NullableValue (io.trino.spi.predicate.NullableValue)50 Type (io.trino.spi.type.Type)48 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)45 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)45