Search in sources :

Example 11 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project hetu-core by openlookeng.

the class AbstractTestHive method insertData.

private String insertData(SchemaTableName tableName, MaterializedResult data, Map<String, Object> sessionProperties) throws Exception {
    Path writePath;
    Path targetPath;
    String queryId;
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession(sessionProperties);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        metadata.beginQuery(session);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        queryId = session.getQueryId();
        writePath = getStagingPathRoot(insertTableHandle);
        targetPath = getTargetPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        // write data
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the insert
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // check that temporary files are removed
    if (!writePath.equals(targetPath)) {
        HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, writePath);
        assertFalse(fileSystem.exists(writePath));
    }
    return queryId;
}
Also used : Path(org.apache.hadoop.fs.Path) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle)

Example 12 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHive method insertData.

private String insertData(SchemaTableName tableName, MaterializedResult data, Map<String, Object> sessionProperties) throws Exception {
    Path writePath;
    Path targetPath;
    String queryId;
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession(sessionProperties);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        metadata.beginQuery(session);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        queryId = session.getQueryId();
        writePath = getStagingPathRoot(insertTableHandle);
        targetPath = getTargetPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        // write data
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the insert
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // check that temporary files are removed
    if (!writePath.equals(targetPath)) {
        HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, writePath);
        assertFalse(fileSystem.exists(writePath));
    }
    return queryId;
}
Also used : Path(org.apache.hadoop.fs.Path) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle)

Example 13 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHive method doInsertIntoNewPartition.

private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    // creating the table
    doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
    // insert the data
    String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
    Set<String> existingFiles;
    try (Transaction transaction = newTransaction()) {
        // verify partitions were created
        HiveIdentity identity = new HiveIdentity(newSession());
        List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
        assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
        // verify the node versions in partitions
        Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(identity, tableName.getSchemaName(), tableName.getTableName(), partitionNames);
        assertEquals(partitions.size(), partitionNames.size());
        for (String partitionName : partitionNames) {
            Partition partition = partitions.get(partitionName).get();
            assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
            assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
        }
        // load the new table
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the data
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
        assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
        // test rollback
        existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
        // test statistics
        for (String partitionName : partitionNames) {
            HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
            assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
            assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
            assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
            assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
        }
    }
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        metadata.beginQuery(session);
        // "stage" insert data
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // verify all temp files start with the unique prefix
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
        Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // rollback insert
        transaction.rollback();
    }
    // verify the data is unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify temp directory is empty
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
        assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(io.prestosql.plugin.hive.metastore.Partition) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Optional(java.util.Optional) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.prestosql.testing.MaterializedResult) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink)

Example 14 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHiveFileSystem method createTable.

private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
    List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
    MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty());
        // write the records
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
        // Hack to work around the metastore not being configured for S3 or other FS.
        // The metastore tries to validate the location when creating the
        // table, which fails without explicit configuration for file system.
        // We work around that by using a dummy location when creating the
        // table and update it here to the correct location.
        metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).getTargetPath().toString());
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // load the new table
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the metadata
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
        assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
        // verify the data
        ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
        ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
        try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
            MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
            assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
        }
    }
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) AbstractTestHive.filterNonHiddenColumnMetadata(io.prestosql.plugin.hive.AbstractTestHive.filterNonHiddenColumnMetadata) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) HiveTransaction(io.prestosql.plugin.hive.AbstractTestHive.HiveTransaction) Transaction(io.prestosql.plugin.hive.AbstractTestHive.Transaction) Slice(io.airlift.slice.Slice) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) MaterializedResult(io.prestosql.testing.MaterializedResult) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata)

Example 15 with ConnectorPageSink

use of io.prestosql.spi.connector.ConnectorPageSink in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHive method insertOverwriteData.

private String insertOverwriteData(SchemaTableName tableName, MaterializedResult data, Map<String, Object> sessionProperties) throws Exception {
    Path writePath;
    Path targetPath;
    String queryId;
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession(sessionProperties);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        metadata.beginQuery(session);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, true);
        queryId = session.getQueryId();
        writePath = getStagingPathRoot(insertTableHandle);
        targetPath = getTargetPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        // write data
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the insert
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // check that temporary files are removed
    if (!writePath.equals(targetPath)) {
        HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, writePath);
        assertFalse(fileSystem.exists(writePath));
    }
    return queryId;
}
Also used : Path(org.apache.hadoop.fs.Path) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle)

Aggregations

ConnectorPageSink (io.prestosql.spi.connector.ConnectorPageSink)29 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)26 TestingConnectorSession (io.prestosql.testing.TestingConnectorSession)26 ConnectorMetadata (io.prestosql.spi.connector.ConnectorMetadata)24 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)24 Path (org.apache.hadoop.fs.Path)24 Slice (io.airlift.slice.Slice)22 HdfsContext (io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext)22 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)20 MaterializedResult (io.prestosql.testing.MaterializedResult)20 ConnectorInsertTableHandle (io.prestosql.spi.connector.ConnectorInsertTableHandle)19 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)18 HiveColumnHandle.bucketColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle)16 ConnectorTableMetadata (io.prestosql.spi.connector.ConnectorTableMetadata)16 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)12 ConnectorOutputTableHandle (io.prestosql.spi.connector.ConnectorOutputTableHandle)12 Constraint (io.prestosql.spi.connector.Constraint)12 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)10 Page (io.prestosql.spi.Page)9 Test (org.testng.annotations.Test)9