Search in sources :

Example 16 with ConnectorPageSink

use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.

the class TestDeltaLakePageSink method testPageSinkStats.

@Test
public void testPageSinkStats() throws Exception {
    File tempDir = Files.createTempDir();
    try {
        DeltaLakeWriterStats stats = new DeltaLakeWriterStats();
        String tablePath = tempDir.getAbsolutePath() + "/test_table";
        ConnectorPageSink pageSink = createPageSink(new Path(tablePath), stats);
        List<LineItemColumn> columns = ImmutableList.copyOf(LineItemColumn.values());
        List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestDeltaLakePageSink::getTrinoType).collect(toList());
        PageBuilder pageBuilder = new PageBuilder(columnTypes);
        long rows = 0;
        for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
            if (rows >= NUM_ROWS) {
                break;
            }
            rows++;
            pageBuilder.declarePosition();
            for (int i = 0; i < columns.size(); i++) {
                LineItemColumn column = columns.get(i);
                BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
                writeToBlock(blockBuilder, column, lineItem);
            }
        }
        Page page = pageBuilder.build();
        pageSink.appendPage(page);
        JsonCodec<DataFileInfo> dataFileInfoCodec = new JsonCodecFactory().jsonCodec(DataFileInfo.class);
        Collection<Slice> fragments = getFutureValue(pageSink.finish());
        List<DataFileInfo> dataFileInfos = fragments.stream().map(Slice::getBytes).map(dataFileInfoCodec::fromJson).collect(toImmutableList());
        assertEquals(dataFileInfos.size(), 1);
        DataFileInfo dataFileInfo = dataFileInfos.get(0);
        List<File> files = ImmutableList.copyOf(new File(tablePath).listFiles((dir, name) -> !name.endsWith(".crc")));
        assertEquals(files.size(), 1);
        File outputFile = files.get(0);
        assertEquals(round(stats.getInputPageSizeInBytes().getAllTime().getMax()), page.getRetainedSizeInBytes());
        assertEquals(dataFileInfo.getStatistics().getNumRecords(), Optional.of(rows));
        assertEquals(dataFileInfo.getPartitionValues(), ImmutableList.of());
        assertEquals(dataFileInfo.getSize(), outputFile.length());
        assertEquals(dataFileInfo.getPath(), outputFile.getName());
        Instant now = Instant.now();
        assertTrue(dataFileInfo.getCreationTime() < now.toEpochMilli());
        assertTrue(dataFileInfo.getCreationTime() > now.minus(1, MINUTES).toEpochMilli());
    } finally {
        deleteRecursively(tempDir.toPath(), ALLOW_INSECURE);
    }
}
Also used : Slice(io.airlift.slice.Slice) PageBuilder(io.trino.spi.PageBuilder) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) Test(org.testng.annotations.Test) TypeOperators(io.trino.spi.type.TypeOperators) JoinCompiler(io.trino.sql.gen.JoinCompiler) GroupByHashPageIndexerFactory(io.trino.operator.GroupByHashPageIndexerFactory) TpchColumnType(io.trino.tpch.TpchColumnType) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) NodeVersion(io.trino.plugin.hive.NodeVersion) MINUTES(java.time.temporal.ChronoUnit.MINUTES) Math.round(java.lang.Math.round) Slices(io.airlift.slice.Slices) Path(org.apache.hadoop.fs.Path) LineItemColumn(io.trino.tpch.LineItemColumn) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) BlockTypeOperators(io.trino.type.BlockTypeOperators) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TestingTypeManager(io.trino.spi.type.TestingTypeManager) Instant(java.time.Instant) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) File(java.io.File) LineItemGenerator(io.trino.tpch.LineItemGenerator) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) LineItem(io.trino.tpch.LineItem) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) BIGINT(io.trino.spi.type.BigintType.BIGINT) JsonCodecFactory(io.airlift.json.JsonCodecFactory) Optional(java.util.Optional) Assert.assertTrue(org.testng.Assert.assertTrue) BlockBuilder(io.trino.spi.block.BlockBuilder) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) DATE(io.trino.spi.type.DateType.DATE) JsonCodec(io.airlift.json.JsonCodec) Page(io.trino.spi.Page) PageBuilder(io.trino.spi.PageBuilder) BlockBuilder(io.trino.spi.block.BlockBuilder) Path(org.apache.hadoop.fs.Path) LineItemColumn(io.trino.tpch.LineItemColumn) Instant(java.time.Instant) LineItem(io.trino.tpch.LineItem) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) TpchColumnType(io.trino.tpch.TpchColumnType) Slice(io.airlift.slice.Slice) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) File(java.io.File) JsonCodecFactory(io.airlift.json.JsonCodecFactory) LineItemGenerator(io.trino.tpch.LineItemGenerator) Test(org.testng.annotations.Test)

Example 17 with ConnectorPageSink

use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.

the class AbstractTestHive method insertData.

private String insertData(SchemaTableName tableName, MaterializedResult data, Map<String, Object> sessionProperties) throws Exception {
    Path writePath;
    Path targetPath;
    String queryId;
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession(sessionProperties);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
        queryId = session.getQueryId();
        writePath = getStagingPathRoot(insertTableHandle);
        targetPath = getTargetPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        // write data
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the insert
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // check that temporary files are removed
    if (!writePath.equals(targetPath)) {
        HdfsContext context = new HdfsContext(newSession());
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, writePath);
        assertFalse(fileSystem.exists(writePath));
    }
    return queryId;
}
Also used : Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Example 18 with ConnectorPageSink

use of io.trino.spi.connector.ConnectorPageSink in project trino by trinodb.

the class TestMemoryPagesStore method createTable.

private void createTable(long tableId, Long... activeTableIds) {
    ConnectorPageSink pageSink = pageSinkProvider.createPageSink(MemoryTransactionHandle.INSTANCE, SESSION, createMemoryOutputTableHandle(tableId, activeTableIds));
    pageSink.finish();
}
Also used : ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink)

Aggregations

ConnectorPageSink (io.trino.spi.connector.ConnectorPageSink)18 ConnectorSession (io.trino.spi.connector.ConnectorSession)15 Slice (io.airlift.slice.Slice)14 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)13 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)12 MaterializedResult (io.trino.testing.MaterializedResult)12 Path (org.apache.hadoop.fs.Path)12 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)11 TestingConnectorSession (io.trino.testing.TestingConnectorSession)11 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)10 HiveSessionProperties.getTemporaryStagingDirectoryPath (io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath)10 ColumnHandle (io.trino.spi.connector.ColumnHandle)9 HiveColumnHandle.bucketColumnHandle (io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle)8 ConnectorInsertTableHandle (io.trino.spi.connector.ConnectorInsertTableHandle)8 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)8 Test (org.testng.annotations.Test)7 Page (io.trino.spi.Page)6 ConnectorOutputTableHandle (io.trino.spi.connector.ConnectorOutputTableHandle)6 Constraint (io.trino.spi.connector.Constraint)6 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)5