Search in sources :

Example 1 with NationColumn

use of io.trino.tpch.NationColumn in project trino by trinodb.

the class TestOrcPageSourceFactory method readFile.

private static List<Nation> readFile(Map<NationColumn, Integer> columns, OptionalLong nationKeyPredicate, Optional<AcidInfo> acidInfo, String filePath, long fileSize) {
    TupleDomain<HiveColumnHandle> tupleDomain = TupleDomain.all();
    if (nationKeyPredicate.isPresent()) {
        tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(toHiveColumnHandle(NATION_KEY, 0), Domain.singleValue(INTEGER, nationKeyPredicate.getAsLong())));
    }
    List<HiveColumnHandle> columnHandles = columns.entrySet().stream().map(entry -> toHiveColumnHandle(entry.getKey(), entry.getValue())).collect(toImmutableList());
    List<String> columnNames = columnHandles.stream().map(HiveColumnHandle::getName).collect(toImmutableList());
    Optional<ReaderPageSource> pageSourceWithProjections = PAGE_SOURCE_FACTORY.createPageSource(new JobConf(new Configuration(false)), SESSION, new Path(filePath), 0, fileSize, fileSize, createSchema(), columnHandles, tupleDomain, acidInfo, OptionalInt.empty(), false, NO_ACID_TRANSACTION);
    checkArgument(pageSourceWithProjections.isPresent());
    checkArgument(pageSourceWithProjections.get().getReaderColumns().isEmpty(), "projected columns not expected here");
    ConnectorPageSource pageSource = pageSourceWithProjections.get().get();
    int nationKeyColumn = columnNames.indexOf("n_nationkey");
    int nameColumn = columnNames.indexOf("n_name");
    int regionKeyColumn = columnNames.indexOf("n_regionkey");
    int commentColumn = columnNames.indexOf("n_comment");
    ImmutableList.Builder<Nation> rows = ImmutableList.builder();
    while (!pageSource.isFinished()) {
        Page page = pageSource.getNextPage();
        if (page == null) {
            continue;
        }
        page = page.getLoadedPage();
        for (int position = 0; position < page.getPositionCount(); position++) {
            long nationKey = -42;
            if (nationKeyColumn >= 0) {
                nationKey = BIGINT.getLong(page.getBlock(nationKeyColumn), position);
            }
            String name = "<not read>";
            if (nameColumn >= 0) {
                name = VARCHAR.getSlice(page.getBlock(nameColumn), position).toStringUtf8();
            }
            long regionKey = -42;
            if (regionKeyColumn >= 0) {
                regionKey = BIGINT.getLong(page.getBlock(regionKeyColumn), position);
            }
            String comment = "<not read>";
            if (commentColumn >= 0) {
                comment = VARCHAR.getSlice(page.getBlock(commentColumn), position).toStringUtf8();
            }
            rows.add(new Nation(position, nationKey, name, regionKey, comment));
        }
    }
    return rows.build();
}
Also used : URISyntaxException(java.net.URISyntaxException) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Assertions(org.assertj.core.api.Assertions) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) LongPredicate(java.util.function.LongPredicate) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertFalse(org.testng.Assert.assertFalse) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) AcidUtils.deleteDeltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deleteDeltaSubdir) REGION_KEY(io.trino.tpch.NationColumn.REGION_KEY) Nation(io.trino.tpch.Nation) NationGenerator(io.trino.tpch.NationGenerator) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) Resources.getResource(com.google.common.io.Resources.getResource) NATION_KEY(io.trino.tpch.NationColumn.NATION_KEY) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) NAME(io.trino.tpch.NationColumn.NAME) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) COMMENT(io.trino.tpch.NationColumn.COMMENT) NationColumn(io.trino.tpch.NationColumn) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) TABLE_IS_TRANSACTIONAL(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_TRANSACTIONAL) TupleDomain(io.trino.spi.predicate.TupleDomain) AcidInfo(io.trino.plugin.hive.AcidInfo) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) Path(org.apache.hadoop.fs.Path) Nation(io.trino.tpch.Nation) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) JobConf(org.apache.hadoop.mapred.JobConf) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Aggregations

Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Resources.getResource (com.google.common.io.Resources.getResource)1 AcidInfo (io.trino.plugin.hive.AcidInfo)1 FileFormatDataSourceStats (io.trino.plugin.hive.FileFormatDataSourceStats)1 HiveColumnHandle (io.trino.plugin.hive.HiveColumnHandle)1 REGULAR (io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR)1 HiveColumnHandle.createBaseColumn (io.trino.plugin.hive.HiveColumnHandle.createBaseColumn)1 HiveConfig (io.trino.plugin.hive.HiveConfig)1 HivePageSourceFactory (io.trino.plugin.hive.HivePageSourceFactory)1 ORC (io.trino.plugin.hive.HiveStorageFormat.ORC)1 HDFS_ENVIRONMENT (io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT)1 SESSION (io.trino.plugin.hive.HiveTestUtils.SESSION)1 HiveType.toHiveType (io.trino.plugin.hive.HiveType.toHiveType)1 ReaderPageSource (io.trino.plugin.hive.ReaderPageSource)1 NO_ACID_TRANSACTION (io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION)1 Page (io.trino.spi.Page)1 ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)1