Search in sources :

Example 16 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHive method doInsertOverwriteUnpartitioned.

private void doInsertOverwriteUnpartitioned(SchemaTableName tableName) throws Exception {
    // create table with data
    doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS);
    insertData(tableName, CREATE_TABLE_DATA);
    // overwrite table with new data
    MaterializedResult.Builder overwriteDataBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes());
    MaterializedResult overwriteData = null;
    Map<String, Object> overwriteProperties = ImmutableMap.of("insert_existing_partitions_behavior", "OVERWRITE");
    for (int i = 0; i < 3; i++) {
        overwriteDataBuilder.rows(reverse(CREATE_TABLE_DATA.getMaterializedRows()));
        overwriteData = overwriteDataBuilder.build();
        if (i == 0) {
            // change the insert behavior to insert overwrite via session property
            insertData(tableName, overwriteData, overwriteProperties);
        } else {
            // do insert overwrite via "INSERT OVERWRITE" syntax
            insertOverwriteData(tableName, overwriteData, ImmutableMap.of());
        }
        // verify overwrite
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            metadata.beginQuery(session);
            // load the new table
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
            // verify the metadata
            ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
            assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
            // verify the data
            MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
            assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows());
            // statistics
            HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
            assertEquals(tableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount());
            assertEquals(tableStatistics.getFileCount().getAsLong(), 1L);
            assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
            assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
        }
    }
    // test rollback
    Set<String> existingFiles;
    try (Transaction transaction = newTransaction()) {
        existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
    }
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession(overwriteProperties);
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        // "stage" insert data
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        for (int i = 0; i < 4; i++) {
            sink.appendPage(overwriteData.toPage());
        }
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // statistics, visible from within transaction
        HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
        assertEquals(tableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount() * 4L);
        try (Transaction otherTransaction = newTransaction()) {
            // statistics, not visible from outside transaction
            HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(session, otherTransaction, tableName);
            assertEquals(otherTableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount());
        }
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify all temp files start with the unique prefix
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
        Set<String> tempFiles = listAllDataFiles(context, stagingPathRoot);
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // rollback insert
        transaction.rollback();
    }
    // verify temp directory is empty
    HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
    assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
    // verify the data is unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
    }
    // verify statistics unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        HiveBasicStatistics statistics = getBasicStatisticsForTable(session, transaction, tableName);
        assertEquals(statistics.getRowCount().getAsLong(), overwriteData.getRowCount());
        assertEquals(statistics.getFileCount().getAsLong(), 1L);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) Constraint(io.prestosql.spi.connector.Constraint) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.prestosql.testing.MaterializedResult) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata)

Example 17 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHive method doInsert.

private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    // creating the table
    doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS);
    MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes());
    for (int i = 0; i < 3; i++) {
        insertData(tableName, CREATE_TABLE_DATA);
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            metadata.beginQuery(session);
            // load the new table
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
            // verify the metadata
            ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
            assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
            // verify the data
            resultBuilder.rows(CREATE_TABLE_DATA.getMaterializedRows());
            MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
            assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
            // statistics
            HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
            assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * (i + 1));
            assertEquals(tableStatistics.getFileCount().getAsLong(), i + 1L);
            assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
            assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
        }
    }
    // test rollback
    Set<String> existingFiles;
    try (Transaction transaction = newTransaction()) {
        existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
    }
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        // "stage" insert data
        metadata.beginQuery(session);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        sink.appendPage(CREATE_TABLE_DATA.toPage());
        sink.appendPage(CREATE_TABLE_DATA.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // statistics, visible from within transaction
        HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
        assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 5L);
        try (Transaction otherTransaction = newTransaction()) {
            // statistics, not visible from outside transaction
            HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(session, otherTransaction, tableName);
            assertEquals(otherTableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
        }
        // verify all temp files start with the unique prefix
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
        Set<String> tempFiles = listAllDataFiles(context, stagingPathRoot);
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // rollback insert
        transaction.rollback();
    }
    // verify temp directory is empty
    HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
    assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
    // verify the data is unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
    }
    // verify statistics unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        HiveBasicStatistics statistics = getBasicStatisticsForTable(session, transaction, tableName);
        assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
        assertEquals(statistics.getFileCount().getAsLong(), 3L);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) Constraint(io.prestosql.spi.connector.Constraint) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.prestosql.testing.MaterializedResult) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata)

Example 18 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project boostkit-bigdata by kunpengcompute.

the class TestHivePageSink method writeTestFile.

private static long writeTestFile(HiveConfig config, HiveMetastore metastore, String outputPath) {
    HiveTransactionHandle transaction = new HiveTransactionHandle();
    HiveWriterStats stats = new HiveWriterStats();
    ConnectorPageSink pageSink = createPageSink(transaction, config, metastore, new Path("file:///" + outputPath), stats);
    List<LineItemColumn> columns = getTestColumns();
    List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(HiveTestUtils.TYPE_MANAGER)).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    int rows = 0;
    for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
        rows++;
        if (rows >= NUM_ROWS) {
            break;
        }
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            LineItemColumn column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
                    break;
                case DATE:
                    DATE.writeLong(blockBuilder, column.getDate(lineItem));
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
    }
    Page page = pageBuilder.build();
    pageSink.appendPage(page);
    getFutureValue(pageSink.finish());
    File outputDir = new File(outputPath);
    List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
    File outputFile = getOnlyElement(files);
    long length = outputFile.length();
    ConnectorPageSource pageSource = createPageSource(transaction, config, outputFile);
    List<Page> pages = new ArrayList<>();
    while (!pageSource.isFinished()) {
        Page nextPage = pageSource.getNextPage();
        if (nextPage != null) {
            pages.add(nextPage.getLoadedPage());
        }
    }
    MaterializedResult expectedResults = toMaterializedResult(getSession(config), columnTypes, ImmutableList.of(page));
    MaterializedResult results = toMaterializedResult(getSession(config), columnTypes, pages);
    assertEquals(results, expectedResults);
    assertEquals(round(stats.getInputPageSizeInBytes().getAllTime().getMax()), page.getRetainedSizeInBytes());
    return length;
}
Also used : Path(org.apache.hadoop.fs.Path) NONE(io.prestosql.plugin.hive.HiveCompressionCodec.NONE) HiveTestUtils.getDefaultHiveSelectiveFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveSelectiveFactories) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) Test(org.testng.annotations.Test) TpchColumnTypes(io.airlift.tpch.TpchColumnTypes) MaterializedResult(io.prestosql.testing.MaterializedResult) Assert.assertEquals(io.prestosql.testing.assertions.Assert.assertEquals) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Math.round(java.lang.Math.round) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Path(org.apache.hadoop.fs.Path) Matchers.anyInt(org.mockito.Matchers.anyInt) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) TpchColumnType(io.airlift.tpch.TpchColumnType) PageIndexerFactory(io.prestosql.spi.PageIndexerFactory) HIVE_LONG(io.prestosql.plugin.hive.HiveType.HIVE_LONG) ImmutableMap(com.google.common.collect.ImmutableMap) BlockBuilder(io.prestosql.spi.block.BlockBuilder) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) PageBuilder(io.prestosql.spi.PageBuilder) String.format(java.lang.String.format) LineItemGenerator(io.airlift.tpch.LineItemGenerator) PageIndexer(io.prestosql.spi.PageIndexer) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Stream(java.util.stream.Stream) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Optional(java.util.Optional) TestingNodeManager(io.prestosql.testing.TestingNodeManager) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) JsonCodec(io.airlift.json.JsonCodec) Mockito.mock(org.mockito.Mockito.mock) LineItem(io.airlift.tpch.LineItem) HivePageSinkMetadata(io.prestosql.plugin.hive.metastore.HivePageSinkMetadata) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) OptionalInt(java.util.OptionalInt) DIRECT_TO_TARGET_NEW_DIRECTORY(io.prestosql.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY) ArrayList(java.util.ArrayList) GroupByHashPageIndexerFactory(io.prestosql.GroupByHashPageIndexerFactory) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) GenericExceptionAction(io.prestosql.plugin.hive.authentication.GenericExceptionAction) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyObject(org.mockito.Matchers.anyObject) LineItemColumn(io.airlift.tpch.LineItemColumn) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) DATE(io.prestosql.spi.type.DateType.DATE) FileHiveMetastore.createTestingFileHiveMetastore(io.prestosql.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Properties(java.util.Properties) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TypeManager(io.prestosql.spi.type.TypeManager) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Mockito.when(org.mockito.Mockito.when) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) File(java.io.File) Collectors.toList(java.util.stream.Collectors.toList) IntArrayBlock(io.prestosql.spi.block.IntArrayBlock) HIVE_DATE(io.prestosql.plugin.hive.HiveType.HIVE_DATE) HIVE_DOUBLE(io.prestosql.plugin.hive.HiveType.HIVE_DOUBLE) JoinCompiler(io.prestosql.sql.gen.JoinCompiler) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) Collections(java.util.Collections) LineItemColumn(io.airlift.tpch.LineItemColumn) ArrayList(java.util.ArrayList) LineItem(io.airlift.tpch.LineItem) Page(io.prestosql.spi.Page) PageBuilder(io.prestosql.spi.PageBuilder) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Type(io.prestosql.spi.type.Type) TpchColumnType(io.airlift.tpch.TpchColumnType) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) MaterializedResult(io.prestosql.testing.MaterializedResult) File(java.io.File) LineItemGenerator(io.airlift.tpch.LineItemGenerator) BlockBuilder(io.prestosql.spi.block.BlockBuilder)

Example 19 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project hetu-core by openlookeng.

the class AbstractTestHive method doTestMismatchSchemaTable.

protected void doTestMismatchSchemaTable(SchemaTableName schemaTableName, HiveStorageFormat storageFormat, List<ColumnMetadata> tableBefore, MaterializedResult dataBefore, List<ColumnMetadata> tableAfter, MaterializedResult dataAfter) throws Exception {
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    doCreateEmptyTable(schemaTableName, storageFormat, tableBefore);
    // insert the data
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        sink.appendPage(dataBefore.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // load the table and verify the data
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
        List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), dataBefore.getMaterializedRows());
        transaction.commit();
    }
    // alter the table schema
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(session);
        Table oldTable = transaction.getMetastore(schemaName).getTable(new HiveIdentity(session), schemaName, tableName).get();
        HiveTypeTranslator hiveTypeTranslator = new HiveTypeTranslator();
        List<Column> dataColumns = tableAfter.stream().filter(columnMetadata -> !columnMetadata.getName().equals("ds")).map(columnMetadata -> new Column(columnMetadata.getName(), toHiveType(hiveTypeTranslator, columnMetadata.getType()), Optional.empty())).collect(toList());
        Table.Builder newTable = Table.builder(oldTable).setDataColumns(dataColumns);
        transaction.getMetastore(schemaName).replaceView(new HiveIdentity(session), schemaName, tableName, newTable.build(), principalPrivileges);
        transaction.commit();
    }
    // load the altered table and verify the data
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
        List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), dataAfter.getMaterializedRows());
        transaction.commit();
    }
    // insertions to the partitions with type mismatches should fail
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        sink.appendPage(dataAfter.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        transaction.commit();
        fail("expected exception");
    } catch (PrestoException e) {
        // expected
        assertEquals(e.getErrorCode(), HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH.toErrorCode());
    }
}
Also used : ROLLBACK_AFTER_FINISH_INSERT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) HiveType.toHiveType(io.prestosql.plugin.hive.HiveType.toHiveType) TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) FileStatus(org.apache.hadoop.fs.FileStatus) TEXTFILE(io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE) TypeSignature.parseTypeSignature(io.prestosql.spi.type.TypeSignature.parseTypeSignature) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Map(java.util.Map) RowType(io.prestosql.spi.type.RowType) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) Chars.isCharType(io.prestosql.spi.type.Chars.isCharType) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) RCTEXT(io.prestosql.plugin.hive.HiveStorageFormat.RCTEXT) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) Table(io.prestosql.plugin.hive.metastore.Table) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) HiveBasicStatistics.createZeroStatistics(io.prestosql.plugin.hive.HiveBasicStatistics.createZeroStatistics) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) TYPE_MANAGER(io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER) MetastoreLocator(io.prestosql.plugin.hive.metastore.thrift.MetastoreLocator) LocalDateTime(java.time.LocalDateTime) PRESTO_QUERY_ID_NAME(io.prestosql.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ThriftHiveMetastoreConfig(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastoreConfig) OptionalLong(java.util.OptionalLong) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) PARTITION_KEY(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) ThriftHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastore) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) Assertions.assertGreaterThanOrEqual(io.airlift.testing.Assertions.assertGreaterThanOrEqual) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) PARQUET(io.prestosql.plugin.hive.HiveStorageFormat.PARQUET) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) AfterClass(org.testng.annotations.AfterClass) HiveTestUtils.mapType(io.prestosql.plugin.hive.HiveTestUtils.mapType) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ROLLBACK_RIGHT_AWAY(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) HostAndPort(com.google.common.net.HostAndPort) USER(io.prestosql.spi.security.PrincipalType.USER) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) VARBINARY(io.prestosql.spi.type.VarbinaryType.VARBINARY) HiveTestUtils.getDefaultOrcFileWriterFactory(io.prestosql.plugin.hive.HiveTestUtils.getDefaultOrcFileWriterFactory) ConnectorPageSourceProvider(io.prestosql.spi.connector.ConnectorPageSourceProvider) ROLLBACK_AFTER_APPEND_PAGE(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) Varchars.isVarcharType(io.prestosql.spi.type.Varchars.isVarcharType) ConnectorSplitManager(io.prestosql.spi.connector.ConnectorSplitManager) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) MaterializedResult.materializeSourceDataStream(io.prestosql.testing.MaterializedResult.materializeSourceDataStream) MaterializedResult(io.prestosql.testing.MaterializedResult) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Type(io.prestosql.spi.type.Type) RcFilePageSource(io.prestosql.plugin.hive.rcfile.RcFilePageSource) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) DecimalType.createDecimalType(io.prestosql.spi.type.DecimalType.createDecimalType) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics.createEmptyStatistics(io.prestosql.plugin.hive.HiveBasicStatistics.createEmptyStatistics) ImmutableSet(com.google.common.collect.ImmutableSet) CachingHiveMetastore(io.prestosql.plugin.hive.metastore.CachingHiveMetastore) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) ROLLBACK_AFTER_DELETE(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) UUID(java.util.UUID) TINYINT(io.prestosql.spi.type.TinyintType.TINYINT) Assert.assertNotNull(org.testng.Assert.assertNotNull) HYPER_LOG_LOG(io.prestosql.spi.type.HyperLogLogType.HYPER_LOG_LOG) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) NOT_PARTITIONED(io.prestosql.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) SqlTimestamp(io.prestosql.spi.type.SqlTimestamp) BUCKET_COLUMN_NAME(io.prestosql.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) Assert.assertNull(org.testng.Assert.assertNull) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) SqlDate(io.prestosql.spi.type.SqlDate) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) OptionalDouble(java.util.OptionalDouble) Assert.assertEquals(org.testng.Assert.assertEquals) BUCKETED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) OptionalInt(java.util.OptionalInt) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) ViewColumn(io.prestosql.spi.connector.ConnectorViewDefinition.ViewColumn) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) DATE(io.prestosql.spi.type.DateType.DATE) Math.toIntExact(java.lang.Math.toIntExact) STORAGE_FORMAT_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) Block(io.prestosql.spi.block.Block) ExecutorService(java.util.concurrent.ExecutorService) Collections.emptyMap(java.util.Collections.emptyMap) ParquetPageSource(io.prestosql.plugin.hive.parquet.ParquetPageSource) UTF_8(java.nio.charset.StandardCharsets.UTF_8) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Assert.fail(org.testng.Assert.fail) DateTime(org.joda.time.DateTime) PartitionWithStatistics(io.prestosql.plugin.hive.metastore.PartitionWithStatistics) Page(io.prestosql.spi.Page) HiveTestUtils.getDefaultHiveDataStreamFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveDataStreamFactories) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) Hashing.sha256(com.google.common.hash.Hashing.sha256) BUCKETING_V1(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion.BUCKETING_V1) Assertions.assertEqualsIgnoreOrder(io.airlift.testing.Assertions.assertEqualsIgnoreOrder) PARTITIONED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) JoinCompiler(io.prestosql.sql.gen.JoinCompiler) Assert.assertTrue(org.testng.Assert.assertTrue) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) OrcConcatPageSource(io.prestosql.plugin.hive.orc.OrcConcatPageSource) ROLLBACK_AFTER_BEGIN_INSERT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) Arrays(java.util.Arrays) RCBINARY(io.prestosql.plugin.hive.HiveStorageFormat.RCBINARY) NoHdfsAuthentication(io.prestosql.plugin.hive.authentication.NoHdfsAuthentication) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ValueSet(io.prestosql.spi.predicate.ValueSet) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) BigDecimal(java.math.BigDecimal) Sets.difference(com.google.common.collect.Sets.difference) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) RowFieldName(io.prestosql.spi.type.RowFieldName) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorPageSinkProvider(io.prestosql.spi.connector.ConnectorPageSinkProvider) JSON(io.prestosql.plugin.hive.HiveStorageFormat.JSON) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) HIVE_LONG(io.prestosql.plugin.hive.HiveType.HIVE_LONG) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) UNGROUPED_SCHEDULING(io.prestosql.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SqlStandardAccessControlMetadata(io.prestosql.plugin.hive.security.SqlStandardAccessControlMetadata) TIMESTAMP(io.prestosql.spi.type.TimestampType.TIMESTAMP) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) VarcharType.createVarcharType(io.prestosql.spi.type.VarcharType.createVarcharType) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Domain(io.prestosql.spi.predicate.Domain) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) TestingNodeManager(io.prestosql.testing.TestingNodeManager) Lists.reverse(com.google.common.collect.Lists.reverse) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) StandardTypes(io.prestosql.spi.type.StandardTypes) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) BUCKET_COUNT_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) MapType(io.prestosql.spi.type.MapType) GroupByHashPageIndexerFactory(io.prestosql.GroupByHashPageIndexerFactory) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) AVRO(io.prestosql.plugin.hive.HiveStorageFormat.AVRO) HiveTestUtils.rowType(io.prestosql.plugin.hive.HiveTestUtils.rowType) RecordCursor(io.prestosql.spi.connector.RecordCursor) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) SESSION(io.prestosql.plugin.hive.HiveTestUtils.SESSION) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) LongStream(java.util.stream.LongStream) MULTIDELIMIT(io.prestosql.plugin.hive.HiveStorageFormat.MULTIDELIMIT) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) PAGE_SORTER(io.prestosql.plugin.hive.HiveTestUtils.PAGE_SORTER) UTC(org.joda.time.DateTimeZone.UTC) MaterializedRow(io.prestosql.testing.MaterializedRow) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) DateTimeTestingUtils.sqlTimestampOf(io.prestosql.testing.DateTimeTestingUtils.sqlTimestampOf) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(io.prestosql.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) TableType(org.apache.hadoop.hive.metastore.TableType) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) OrcPageSource(io.prestosql.plugin.hive.orc.OrcPageSource) HiveTestUtils.getDefaultHiveSelectiveFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveSelectiveFactories) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) Iterables.concat(com.google.common.collect.Iterables.concat) HiveWriteUtils.createDirectory(io.prestosql.plugin.hive.HiveWriteUtils.createDirectory) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) Constraint(io.prestosql.spi.connector.Constraint) ImmutableMap(com.google.common.collect.ImmutableMap) ArrayType(io.prestosql.spi.type.ArrayType) CharType.createCharType(io.prestosql.spi.type.CharType.createCharType) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) HiveTestUtils.getDefaultHiveFileWriterFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) PRESTO_VERSION_NAME(io.prestosql.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) HiveTestUtils.getTypes(io.prestosql.plugin.hive.HiveTestUtils.getTypes) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) SORTED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) Logger(io.airlift.log.Logger) CounterStat(io.airlift.stats.CounterStat) HashMap(java.util.HashMap) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) AtomicReference(java.util.concurrent.atomic.AtomicReference) SqlVarbinary(io.prestosql.spi.type.SqlVarbinary) BridgingHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.BridgingHiveMetastore) NamedTypeSignature(io.prestosql.spi.type.NamedTypeSignature) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) COMMIT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT) TestingMetastoreLocator(io.prestosql.plugin.hive.metastore.thrift.TestingMetastoreLocator) Verify.verify(com.google.common.base.Verify.verify) Assertions.assertLessThanOrEqual(io.airlift.testing.Assertions.assertLessThanOrEqual) Range(io.prestosql.spi.predicate.Range) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) HivePrivilegeInfo(io.prestosql.plugin.hive.metastore.HivePrivilegeInfo) Objects.requireNonNull(java.util.Objects.requireNonNull) SEQUENCEFILE(io.prestosql.plugin.hive.HiveStorageFormat.SEQUENCEFILE) REAL(io.prestosql.spi.type.RealType.REAL) HiveMetadata.convertToPredicate(io.prestosql.plugin.hive.HiveMetadata.convertToPredicate) ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) HiveTestUtils.getNoOpIndexCache(io.prestosql.plugin.hive.HiveTestUtils.getNoOpIndexCache) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TRANSACTION_CONFLICT(io.prestosql.spi.StandardErrorCode.TRANSACTION_CONFLICT) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) CSV(io.prestosql.plugin.hive.HiveStorageFormat.CSV) HiveTestUtils.getDefaultHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) HiveTestUtils.arrayType(io.prestosql.plugin.hive.HiveTestUtils.arrayType) SMALLINT(io.prestosql.spi.type.SmallintType.SMALLINT) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ROLLBACK_AFTER_SINK_FINISH(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Table(io.prestosql.plugin.hive.metastore.Table) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) ViewColumn(io.prestosql.spi.connector.ConnectorViewDefinition.ViewColumn) Column(io.prestosql.plugin.hive.metastore.Column) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) MaterializedResult(io.prestosql.testing.MaterializedResult)

Example 20 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project hetu-core by openlookeng.

the class AbstractTestHive method doInsertOverwriteUnpartitioned.

private void doInsertOverwriteUnpartitioned(SchemaTableName tableName) throws Exception {
    // create table with data
    doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS);
    insertData(tableName, CREATE_TABLE_DATA);
    // overwrite table with new data
    MaterializedResult.Builder overwriteDataBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes());
    MaterializedResult overwriteData = null;
    Map<String, Object> overwriteProperties = ImmutableMap.of("insert_existing_partitions_behavior", "OVERWRITE");
    for (int i = 0; i < 3; i++) {
        overwriteDataBuilder.rows(reverse(CREATE_TABLE_DATA.getMaterializedRows()));
        overwriteData = overwriteDataBuilder.build();
        if (i == 0) {
            // change the insert behavior to insert overwrite via session property
            insertData(tableName, overwriteData, overwriteProperties);
        } else {
            // do insert overwrite via "INSERT OVERWRITE" syntax
            insertOverwriteData(tableName, overwriteData, ImmutableMap.of());
        }
        // verify overwrite
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            metadata.beginQuery(session);
            // load the new table
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
            // verify the metadata
            ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
            assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
            // verify the data
            MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
            assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows());
            // statistics
            HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
            assertEquals(tableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount());
            assertEquals(tableStatistics.getFileCount().getAsLong(), 1L);
            assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
            assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
        }
    }
    // test rollback
    Set<String> existingFiles;
    try (Transaction transaction = newTransaction()) {
        existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
    }
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession(overwriteProperties);
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        // "stage" insert data
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        for (int i = 0; i < 4; i++) {
            sink.appendPage(overwriteData.toPage());
        }
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // statistics, visible from within transaction
        HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
        assertEquals(tableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount() * 4L);
        try (Transaction otherTransaction = newTransaction()) {
            // statistics, not visible from outside transaction
            HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(session, otherTransaction, tableName);
            assertEquals(otherTableStatistics.getRowCount().getAsLong(), overwriteData.getRowCount());
        }
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify all temp files start with the unique prefix
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
        Set<String> tempFiles = listAllDataFiles(context, stagingPathRoot);
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // rollback insert
        transaction.rollback();
    }
    // verify temp directory is empty
    HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
    assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
    // verify the data is unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
    }
    // verify statistics unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        HiveBasicStatistics statistics = getBasicStatisticsForTable(session, transaction, tableName);
        assertEquals(statistics.getRowCount().getAsLong(), overwriteData.getRowCount());
        assertEquals(statistics.getFileCount().getAsLong(), 1L);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) Constraint(io.prestosql.spi.connector.Constraint) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.prestosql.testing.MaterializedResult) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata)

Aggregations

ConnectorPageSink (io.prestosql.spi.connector.ConnectorPageSink)29 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)26 TestingConnectorSession (io.prestosql.testing.TestingConnectorSession)26 ConnectorMetadata (io.prestosql.spi.connector.ConnectorMetadata)24 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)24 Path (org.apache.hadoop.fs.Path)24 Slice (io.airlift.slice.Slice)22 HdfsContext (io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext)22 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)20 MaterializedResult (io.prestosql.testing.MaterializedResult)20 ConnectorInsertTableHandle (io.prestosql.spi.connector.ConnectorInsertTableHandle)19 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)18 HiveColumnHandle.bucketColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle)16 ConnectorTableMetadata (io.prestosql.spi.connector.ConnectorTableMetadata)16 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)12 ConnectorOutputTableHandle (io.prestosql.spi.connector.ConnectorOutputTableHandle)12 Constraint (io.prestosql.spi.connector.Constraint)12 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)10 Page (io.prestosql.spi.Page)9 Test (org.testng.annotations.Test)9