Search in sources :

Example 1 with LineItemColumn

use of io.trino.tpch.LineItemColumn in project trino by trinodb.

the class TestHivePageSink method getColumnHandles.

private static List<HiveColumnHandle> getColumnHandles() {
    ImmutableList.Builder<HiveColumnHandle> handles = ImmutableList.builder();
    List<LineItemColumn> columns = getTestColumns();
    for (int i = 0; i < columns.size(); i++) {
        LineItemColumn column = columns.get(i);
        HiveType hiveType = getHiveType(column.getType());
        handles.add(createBaseColumn(column.getColumnName(), i, hiveType, hiveType.getType(TESTING_TYPE_MANAGER), REGULAR, Optional.empty()));
    }
    return handles.build();
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) LineItemColumn(io.trino.tpch.LineItemColumn)

Example 2 with LineItemColumn

use of io.trino.tpch.LineItemColumn in project trino by trinodb.

the class TestDeltaLakePageSink method getColumnHandles.

private static List<DeltaLakeColumnHandle> getColumnHandles() {
    ImmutableList.Builder<DeltaLakeColumnHandle> handles = ImmutableList.builder();
    LineItemColumn[] columns = LineItemColumn.values();
    for (LineItemColumn column : columns) {
        handles.add(new DeltaLakeColumnHandle(column.getColumnName(), getTrinoType(column.getType()), REGULAR));
    }
    return handles.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) LineItemColumn(io.trino.tpch.LineItemColumn)

Example 3 with LineItemColumn

use of io.trino.tpch.LineItemColumn in project trino by trinodb.

the class TestHivePageSink method writeTestFile.

private static long writeTestFile(HiveConfig config, HiveMetastore metastore, String outputPath) {
    HiveTransactionHandle transaction = new HiveTransactionHandle(false);
    HiveWriterStats stats = new HiveWriterStats();
    ConnectorPageSink pageSink = createPageSink(transaction, config, metastore, new Path("file:///" + outputPath), stats);
    List<LineItemColumn> columns = getTestColumns();
    List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(TESTING_TYPE_MANAGER)).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    int rows = 0;
    for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
        rows++;
        if (rows >= NUM_ROWS) {
            break;
        }
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            LineItemColumn column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
                    break;
                case DATE:
                    DATE.writeLong(blockBuilder, column.getDate(lineItem));
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
    }
    Page page = pageBuilder.build();
    pageSink.appendPage(page);
    getFutureValue(pageSink.finish());
    File outputDir = new File(outputPath);
    List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
    File outputFile = getOnlyElement(files);
    long length = outputFile.length();
    ConnectorPageSource pageSource = createPageSource(transaction, config, outputFile);
    List<Page> pages = new ArrayList<>();
    while (!pageSource.isFinished()) {
        Page nextPage = pageSource.getNextPage();
        if (nextPage != null) {
            pages.add(nextPage.getLoadedPage());
        }
    }
    MaterializedResult expectedResults = toMaterializedResult(getHiveSession(config), columnTypes, ImmutableList.of(page));
    MaterializedResult results = toMaterializedResult(getHiveSession(config), columnTypes, pages);
    assertEquals(results, expectedResults);
    assertEquals(round(stats.getInputPageSizeInBytes().getAllTime().getMax()), page.getRetainedSizeInBytes());
    return length;
}
Also used : Path(org.apache.hadoop.fs.Path) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) TypeOperators(io.trino.spi.type.TypeOperators) SplitWeight(io.trino.spi.SplitWeight) HiveMetastoreFactory(io.trino.plugin.hive.metastore.HiveMetastoreFactory) TpchColumnType(io.trino.tpch.TpchColumnType) Math.round(java.lang.Math.round) Slices(io.airlift.slice.Slices) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Path(org.apache.hadoop.fs.Path) LineItemColumn(io.trino.tpch.LineItemColumn) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) HiveTestUtils.getDefaultHiveRecordCursorProviders(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProviders) TestingNodeManager(io.trino.testing.TestingNodeManager) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HIVE_DATE(io.trino.plugin.hive.HiveType.HIVE_DATE) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) LineItemGenerator(io.trino.tpch.LineItemGenerator) LineItem(io.trino.tpch.LineItem) List(java.util.List) Stream(java.util.stream.Stream) BIGINT(io.trino.spi.type.BigintType.BIGINT) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) HivePageSinkMetadata(io.trino.plugin.hive.metastore.HivePageSinkMetadata) DATE(io.trino.spi.type.DateType.DATE) Joiner(com.google.common.base.Joiner) JsonCodec(io.airlift.json.JsonCodec) DIRECT_TO_TARGET_NEW_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY) HiveTestUtils.getDefaultHivePageSourceFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories) HIVE_DOUBLE(io.trino.plugin.hive.HiveType.HIVE_DOUBLE) PageBuilder(io.trino.spi.PageBuilder) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) JoinCompiler(io.trino.sql.gen.JoinCompiler) OptionalInt(java.util.OptionalInt) GroupByHashPageIndexerFactory(io.trino.operator.GroupByHashPageIndexerFactory) ArrayList(java.util.ArrayList) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) NONE(io.trino.plugin.hive.HiveCompressionCodec.NONE) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) FileHiveMetastore.createTestingFileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) BlockTypeOperators(io.trino.type.BlockTypeOperators) Properties(java.util.Properties) HIVE_LONG(io.trino.plugin.hive.HiveType.HIVE_LONG) HiveTestUtils.getDefaultHiveFileWriterFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) PAGE_SORTER(io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER) File(java.io.File) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) TpchColumnTypes(io.trino.tpch.TpchColumnTypes) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) HIVE_INT(io.trino.plugin.hive.HiveType.HIVE_INT) Collectors.toList(java.util.stream.Collectors.toList) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) BlockBuilder(io.trino.spi.block.BlockBuilder) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) LineItemColumn(io.trino.tpch.LineItemColumn) ArrayList(java.util.ArrayList) LineItem(io.trino.tpch.LineItem) Page(io.trino.spi.Page) PageBuilder(io.trino.spi.PageBuilder) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) TpchColumnType(io.trino.tpch.TpchColumnType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) File(java.io.File) LineItemGenerator(io.trino.tpch.LineItemGenerator) BlockBuilder(io.trino.spi.block.BlockBuilder)

Example 4 with LineItemColumn

use of io.trino.tpch.LineItemColumn in project trino by trinodb.

the class TestDeltaLakePageSink method testPageSinkStats.

@Test
public void testPageSinkStats() throws Exception {
    File tempDir = Files.createTempDir();
    try {
        DeltaLakeWriterStats stats = new DeltaLakeWriterStats();
        String tablePath = tempDir.getAbsolutePath() + "/test_table";
        ConnectorPageSink pageSink = createPageSink(new Path(tablePath), stats);
        List<LineItemColumn> columns = ImmutableList.copyOf(LineItemColumn.values());
        List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestDeltaLakePageSink::getTrinoType).collect(toList());
        PageBuilder pageBuilder = new PageBuilder(columnTypes);
        long rows = 0;
        for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
            if (rows >= NUM_ROWS) {
                break;
            }
            rows++;
            pageBuilder.declarePosition();
            for (int i = 0; i < columns.size(); i++) {
                LineItemColumn column = columns.get(i);
                BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
                writeToBlock(blockBuilder, column, lineItem);
            }
        }
        Page page = pageBuilder.build();
        pageSink.appendPage(page);
        JsonCodec<DataFileInfo> dataFileInfoCodec = new JsonCodecFactory().jsonCodec(DataFileInfo.class);
        Collection<Slice> fragments = getFutureValue(pageSink.finish());
        List<DataFileInfo> dataFileInfos = fragments.stream().map(Slice::getBytes).map(dataFileInfoCodec::fromJson).collect(toImmutableList());
        assertEquals(dataFileInfos.size(), 1);
        DataFileInfo dataFileInfo = dataFileInfos.get(0);
        List<File> files = ImmutableList.copyOf(new File(tablePath).listFiles((dir, name) -> !name.endsWith(".crc")));
        assertEquals(files.size(), 1);
        File outputFile = files.get(0);
        assertEquals(round(stats.getInputPageSizeInBytes().getAllTime().getMax()), page.getRetainedSizeInBytes());
        assertEquals(dataFileInfo.getStatistics().getNumRecords(), Optional.of(rows));
        assertEquals(dataFileInfo.getPartitionValues(), ImmutableList.of());
        assertEquals(dataFileInfo.getSize(), outputFile.length());
        assertEquals(dataFileInfo.getPath(), outputFile.getName());
        Instant now = Instant.now();
        assertTrue(dataFileInfo.getCreationTime() < now.toEpochMilli());
        assertTrue(dataFileInfo.getCreationTime() > now.minus(1, MINUTES).toEpochMilli());
    } finally {
        deleteRecursively(tempDir.toPath(), ALLOW_INSECURE);
    }
}
Also used : Slice(io.airlift.slice.Slice) PageBuilder(io.trino.spi.PageBuilder) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) Test(org.testng.annotations.Test) TypeOperators(io.trino.spi.type.TypeOperators) JoinCompiler(io.trino.sql.gen.JoinCompiler) GroupByHashPageIndexerFactory(io.trino.operator.GroupByHashPageIndexerFactory) TpchColumnType(io.trino.tpch.TpchColumnType) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) NodeVersion(io.trino.plugin.hive.NodeVersion) MINUTES(java.time.temporal.ChronoUnit.MINUTES) Math.round(java.lang.Math.round) Slices(io.airlift.slice.Slices) Path(org.apache.hadoop.fs.Path) LineItemColumn(io.trino.tpch.LineItemColumn) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) BlockTypeOperators(io.trino.type.BlockTypeOperators) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TestingTypeManager(io.trino.spi.type.TestingTypeManager) Instant(java.time.Instant) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) File(java.io.File) LineItemGenerator(io.trino.tpch.LineItemGenerator) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) LineItem(io.trino.tpch.LineItem) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) BIGINT(io.trino.spi.type.BigintType.BIGINT) JsonCodecFactory(io.airlift.json.JsonCodecFactory) Optional(java.util.Optional) Assert.assertTrue(org.testng.Assert.assertTrue) BlockBuilder(io.trino.spi.block.BlockBuilder) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) DATE(io.trino.spi.type.DateType.DATE) JsonCodec(io.airlift.json.JsonCodec) Page(io.trino.spi.Page) PageBuilder(io.trino.spi.PageBuilder) BlockBuilder(io.trino.spi.block.BlockBuilder) Path(org.apache.hadoop.fs.Path) LineItemColumn(io.trino.tpch.LineItemColumn) Instant(java.time.Instant) LineItem(io.trino.tpch.LineItem) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) TpchColumnType(io.trino.tpch.TpchColumnType) Slice(io.airlift.slice.Slice) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) File(java.io.File) JsonCodecFactory(io.airlift.json.JsonCodecFactory) LineItemGenerator(io.trino.tpch.LineItemGenerator) Test(org.testng.annotations.Test)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 LineItemColumn (io.trino.tpch.LineItemColumn)4 Files (com.google.common.io.Files)2 MoreFiles.deleteRecursively (com.google.common.io.MoreFiles.deleteRecursively)2 ALLOW_INSECURE (com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE)2 MoreFutures.getFutureValue (io.airlift.concurrent.MoreFutures.getFutureValue)2 JsonCodec (io.airlift.json.JsonCodec)2 Slices (io.airlift.slice.Slices)2 GroupByHashPageIndexerFactory (io.trino.operator.GroupByHashPageIndexerFactory)2 HDFS_ENVIRONMENT (io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT)2 Page (io.trino.spi.Page)2 PageBuilder (io.trino.spi.PageBuilder)2 BlockBuilder (io.trino.spi.block.BlockBuilder)2 ConnectorPageSink (io.trino.spi.connector.ConnectorPageSink)2 BIGINT (io.trino.spi.type.BigintType.BIGINT)2 DATE (io.trino.spi.type.DateType.DATE)2 DOUBLE (io.trino.spi.type.DoubleType.DOUBLE)2 INTEGER (io.trino.spi.type.IntegerType.INTEGER)2 Type (io.trino.spi.type.Type)2