Search in sources :

Example 6 with HiveColumnHandle

use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.

the class HiveBucketing method getHiveBuckets.

private static Optional<Set<Integer>> getHiveBuckets(HiveBucketProperty hiveBucketProperty, List<Column> dataColumns, Map<ColumnHandle, List<NullableValue>> bindings) {
    if (bindings.isEmpty()) {
        return Optional.empty();
    }
    // Get bucket columns names
    List<String> bucketColumns = hiveBucketProperty.getBucketedBy();
    // Verify the bucket column types are supported
    Map<String, HiveType> hiveTypes = new HashMap<>();
    for (Column column : dataColumns) {
        hiveTypes.put(column.getName(), column.getType());
    }
    for (String column : bucketColumns) {
        if (!SUPPORTED_TYPES_FOR_BUCKET_FILTER.contains(hiveTypes.get(column))) {
            return Optional.empty();
        }
    }
    // Get bindings for bucket columns
    Map<String, List<NullableValue>> bucketBindings = new HashMap<>();
    for (Entry<ColumnHandle, List<NullableValue>> entry : bindings.entrySet()) {
        HiveColumnHandle columnHandle = (HiveColumnHandle) entry.getKey();
        if (bucketColumns.contains(columnHandle.getName())) {
            bucketBindings.put(columnHandle.getName(), entry.getValue());
        }
    }
    // Check that we have bindings for all bucket columns
    if (bucketBindings.size() != bucketColumns.size()) {
        return Optional.empty();
    }
    // Order bucket column bindings accordingly to bucket columns order
    List<List<NullableValue>> orderedBindings = bucketColumns.stream().map(bucketBindings::get).collect(toImmutableList());
    // Get TypeInfos for bucket columns
    List<TypeInfo> typeInfos = bucketColumns.stream().map(name -> hiveTypes.get(name).getTypeInfo()).collect(toImmutableList());
    return getHiveBuckets(hiveBucketProperty.getBucketingVersion(), hiveBucketProperty.getBucketCount(), typeInfos, orderedBindings);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BUCKETING_V2(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V2) Lists.cartesianProduct(com.google.common.collect.Lists.cartesianProduct) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Objects(java.util.Objects) List(java.util.List) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) StandardErrorCode(io.trino.spi.StandardErrorCode) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) HashMap(java.util.HashMap) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HashSet(java.util.HashSet) HiveType(io.trino.plugin.hive.HiveType) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList(com.google.common.collect.ImmutableList) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ColumnHandle(io.trino.spi.connector.ColumnHandle) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TABLE_BUCKETING_VERSION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_BUCKETING_VERSION) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) HashMap(java.util.HashMap) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HiveType(io.trino.plugin.hive.HiveType) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 7 with HiveColumnHandle

use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.

the class AbstractFileFormat method createPageSource.

static ConnectorPageSource createPageSource(HivePageSourceFactory pageSourceFactory, ConnectorSession session, File targetFile, List<String> columnNames, List<Type> columnTypes, HiveStorageFormat format) {
    checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes should have the same size");
    List<HiveColumnHandle> readColumns = getBaseColumns(columnNames, columnTypes);
    Properties schema = createSchema(format, columnNames, columnTypes);
    Optional<ReaderPageSource> readerPageSourceWithProjections = pageSourceFactory.createPageSource(conf, session, new Path(targetFile.getAbsolutePath()), 0, targetFile.length(), targetFile.length(), schema, readColumns, TupleDomain.all(), Optional.empty(), OptionalInt.empty(), false, NO_ACID_TRANSACTION);
    checkState(readerPageSourceWithProjections.isPresent(), "readerPageSourceWithProjections is not present");
    checkState(readerPageSourceWithProjections.get().getReaderColumns().isEmpty(), "projection should not be required");
    return readerPageSourceWithProjections.get().get();
}
Also used : Path(org.apache.hadoop.fs.Path) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) Properties(java.util.Properties) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 8 with HiveColumnHandle

use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.

the class TestCachingHiveMetastore method testGetPartitionNamesByParts.

@Test
public void testGetPartitionNamesByParts() {
    ImmutableList<String> expectedPartitions = ImmutableList.of(TEST_PARTITION1, TEST_PARTITION2);
    assertEquals(mockClient.getAccessCount(), 0);
    assertEquals(metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, PARTITION_COLUMN_NAMES, TupleDomain.all()).get(), expectedPartitions);
    assertEquals(mockClient.getAccessCount(), 1);
    assertEquals(metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, PARTITION_COLUMN_NAMES, TupleDomain.all()).get(), expectedPartitions);
    assertEquals(mockClient.getAccessCount(), 1);
    assertEquals(metastore.getPartitionFilterStats().getRequestCount(), 2);
    assertEquals(metastore.getPartitionFilterStats().getHitRate(), 0.5);
    metastore.flushCache();
    assertEquals(metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, PARTITION_COLUMN_NAMES, TupleDomain.all()).get(), expectedPartitions);
    assertEquals(mockClient.getAccessCount(), 2);
    assertEquals(metastore.getPartitionFilterStats().getRequestCount(), 3);
    assertEquals(metastore.getPartitionFilterStats().getHitRate(), 1.0 / 3);
    List<String> partitionColumnNames = ImmutableList.of("date_key", "key");
    HiveColumnHandle dateKeyColumn = createBaseColumn(partitionColumnNames.get(0), 0, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty());
    HiveColumnHandle keyColumn = createBaseColumn(partitionColumnNames.get(1), 1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty());
    List<HiveColumnHandle> partitionColumns = ImmutableList.of(dateKeyColumn, keyColumn);
    TupleDomain<String> withNoFilter = computePartitionKeyFilter(partitionColumns, TupleDomain.all());
    TupleDomain<String> withSingleValueFilter = computePartitionKeyFilter(partitionColumns, withColumnDomains(ImmutableMap.<HiveColumnHandle, Domain>builder().put(dateKeyColumn, Domain.create(ValueSet.ofRanges(Range.greaterThan(VARCHAR, utf8Slice("2020-10-01"))), false)).put(keyColumn, Domain.create(ValueSet.of(VARCHAR, utf8Slice("val")), false)).buildOrThrow()));
    TupleDomain<String> withNoSingleValueFilter = computePartitionKeyFilter(partitionColumns, withColumnDomains(ImmutableMap.<HiveColumnHandle, Domain>builder().put(dateKeyColumn, Domain.create(ValueSet.ofRanges(Range.greaterThan(VARCHAR, utf8Slice("2020-10-01"))), false)).put(keyColumn, Domain.create(ValueSet.ofRanges(Range.range(VARCHAR, utf8Slice("val1"), true, utf8Slice("val2"), true)), false)).buildOrThrow()));
    assertEquals(stats.getGetPartitionNamesByParts().getTime().getAllTime().getCount(), 0.0);
    metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, partitionColumnNames, withNoFilter);
    assertEquals(stats.getGetPartitionNamesByParts().getTime().getAllTime().getCount(), 0.0);
    metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, partitionColumnNames, withSingleValueFilter);
    assertEquals(stats.getGetPartitionNamesByParts().getTime().getAllTime().getCount(), 1.0);
    metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, partitionColumnNames, withNoSingleValueFilter);
    assertEquals(stats.getGetPartitionNamesByParts().getTime().getAllTime().getCount(), 2.0);
}
Also used : HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) Test(org.testng.annotations.Test)

Example 9 with HiveColumnHandle

use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.

the class TestMetastoreUtil method testComputePartitionKeyFilter.

@Test
public void testComputePartitionKeyFilter() {
    HiveColumnHandle dsColumn = partitionColumn("ds");
    HiveColumnHandle typeColumn = partitionColumn("type");
    List<HiveColumnHandle> partitionKeys = ImmutableList.of(dsColumn, typeColumn);
    Domain dsDomain = Domain.create(ValueSet.ofRanges(Range.lessThan(VARCHAR, utf8Slice("2018-05-06"))), false);
    Domain typeDomain = Domain.create(ValueSet.of(VARCHAR, utf8Slice("fruit")), false);
    TupleDomain<HiveColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.<HiveColumnHandle, Domain>builder().put(bucketColumnHandle(), Domain.create(ValueSet.of(INTEGER, 123L), false)).put(dsColumn, dsDomain).put(typeColumn, typeDomain).buildOrThrow());
    TupleDomain<String> filter = computePartitionKeyFilter(partitionKeys, tupleDomain);
    assertThat(filter.getDomains()).as("output contains only the partition keys").contains(ImmutableMap.<String, Domain>builder().put("ds", dsDomain).put("type", typeDomain).buildOrThrow());
}
Also used : Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) Test(org.testng.annotations.Test)

Example 10 with HiveColumnHandle

use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.

the class TestOrcPageSourceFactory method readFile.

private static List<Nation> readFile(Map<NationColumn, Integer> columns, OptionalLong nationKeyPredicate, Optional<AcidInfo> acidInfo, String filePath, long fileSize) {
    TupleDomain<HiveColumnHandle> tupleDomain = TupleDomain.all();
    if (nationKeyPredicate.isPresent()) {
        tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(toHiveColumnHandle(NATION_KEY, 0), Domain.singleValue(INTEGER, nationKeyPredicate.getAsLong())));
    }
    List<HiveColumnHandle> columnHandles = columns.entrySet().stream().map(entry -> toHiveColumnHandle(entry.getKey(), entry.getValue())).collect(toImmutableList());
    List<String> columnNames = columnHandles.stream().map(HiveColumnHandle::getName).collect(toImmutableList());
    Optional<ReaderPageSource> pageSourceWithProjections = PAGE_SOURCE_FACTORY.createPageSource(new JobConf(new Configuration(false)), SESSION, new Path(filePath), 0, fileSize, fileSize, createSchema(), columnHandles, tupleDomain, acidInfo, OptionalInt.empty(), false, NO_ACID_TRANSACTION);
    checkArgument(pageSourceWithProjections.isPresent());
    checkArgument(pageSourceWithProjections.get().getReaderColumns().isEmpty(), "projected columns not expected here");
    ConnectorPageSource pageSource = pageSourceWithProjections.get().get();
    int nationKeyColumn = columnNames.indexOf("n_nationkey");
    int nameColumn = columnNames.indexOf("n_name");
    int regionKeyColumn = columnNames.indexOf("n_regionkey");
    int commentColumn = columnNames.indexOf("n_comment");
    ImmutableList.Builder<Nation> rows = ImmutableList.builder();
    while (!pageSource.isFinished()) {
        Page page = pageSource.getNextPage();
        if (page == null) {
            continue;
        }
        page = page.getLoadedPage();
        for (int position = 0; position < page.getPositionCount(); position++) {
            long nationKey = -42;
            if (nationKeyColumn >= 0) {
                nationKey = BIGINT.getLong(page.getBlock(nationKeyColumn), position);
            }
            String name = "<not read>";
            if (nameColumn >= 0) {
                name = VARCHAR.getSlice(page.getBlock(nameColumn), position).toStringUtf8();
            }
            long regionKey = -42;
            if (regionKeyColumn >= 0) {
                regionKey = BIGINT.getLong(page.getBlock(regionKeyColumn), position);
            }
            String comment = "<not read>";
            if (commentColumn >= 0) {
                comment = VARCHAR.getSlice(page.getBlock(commentColumn), position).toStringUtf8();
            }
            rows.add(new Nation(position, nationKey, name, regionKey, comment));
        }
    }
    return rows.build();
}
Also used : URISyntaxException(java.net.URISyntaxException) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Assertions(org.assertj.core.api.Assertions) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) LongPredicate(java.util.function.LongPredicate) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertFalse(org.testng.Assert.assertFalse) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) AcidUtils.deleteDeltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deleteDeltaSubdir) REGION_KEY(io.trino.tpch.NationColumn.REGION_KEY) Nation(io.trino.tpch.Nation) NationGenerator(io.trino.tpch.NationGenerator) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) Resources.getResource(com.google.common.io.Resources.getResource) NATION_KEY(io.trino.tpch.NationColumn.NATION_KEY) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) NAME(io.trino.tpch.NationColumn.NAME) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) COMMENT(io.trino.tpch.NationColumn.COMMENT) NationColumn(io.trino.tpch.NationColumn) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) TABLE_IS_TRANSACTIONAL(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_TRANSACTIONAL) TupleDomain(io.trino.spi.predicate.TupleDomain) AcidInfo(io.trino.plugin.hive.AcidInfo) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) Path(org.apache.hadoop.fs.Path) Nation(io.trino.tpch.Nation) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) JobConf(org.apache.hadoop.mapred.JobConf) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Aggregations

HiveColumnHandle (io.trino.plugin.hive.HiveColumnHandle)39 ImmutableList (com.google.common.collect.ImmutableList)23 TupleDomain (io.trino.spi.predicate.TupleDomain)19 Test (org.testng.annotations.Test)18 Domain (io.trino.spi.predicate.Domain)17 List (java.util.List)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 Optional (java.util.Optional)15 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)13 String.format (java.lang.String.format)13 ImmutableSet (com.google.common.collect.ImmutableSet)11 TrinoException (io.trino.spi.TrinoException)11 BIGINT (io.trino.spi.type.BigintType.BIGINT)11 Type (io.trino.spi.type.Type)11 Map (java.util.Map)11 HdfsEnvironment (io.trino.plugin.hive.HdfsEnvironment)9 ConnectorSession (io.trino.spi.connector.ConnectorSession)9 IOException (java.io.IOException)9 REGULAR (io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR)8 HiveColumnHandle.createBaseColumn (io.trino.plugin.hive.HiveColumnHandle.createBaseColumn)8