use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class HiveBucketing method getHiveBuckets.
private static Optional<Set<Integer>> getHiveBuckets(HiveBucketProperty hiveBucketProperty, List<Column> dataColumns, Map<ColumnHandle, List<NullableValue>> bindings) {
if (bindings.isEmpty()) {
return Optional.empty();
}
// Get bucket columns names
List<String> bucketColumns = hiveBucketProperty.getBucketedBy();
// Verify the bucket column types are supported
Map<String, HiveType> hiveTypes = new HashMap<>();
for (Column column : dataColumns) {
hiveTypes.put(column.getName(), column.getType());
}
for (String column : bucketColumns) {
if (!SUPPORTED_TYPES_FOR_BUCKET_FILTER.contains(hiveTypes.get(column))) {
return Optional.empty();
}
}
// Get bindings for bucket columns
Map<String, List<NullableValue>> bucketBindings = new HashMap<>();
for (Entry<ColumnHandle, List<NullableValue>> entry : bindings.entrySet()) {
HiveColumnHandle columnHandle = (HiveColumnHandle) entry.getKey();
if (bucketColumns.contains(columnHandle.getName())) {
bucketBindings.put(columnHandle.getName(), entry.getValue());
}
}
// Check that we have bindings for all bucket columns
if (bucketBindings.size() != bucketColumns.size()) {
return Optional.empty();
}
// Order bucket column bindings accordingly to bucket columns order
List<List<NullableValue>> orderedBindings = bucketColumns.stream().map(bucketBindings::get).collect(toImmutableList());
// Get TypeInfos for bucket columns
List<TypeInfo> typeInfos = bucketColumns.stream().map(name -> hiveTypes.get(name).getTypeInfo()).collect(toImmutableList());
return getHiveBuckets(hiveBucketProperty.getBucketingVersion(), hiveBucketProperty.getBucketCount(), typeInfos, orderedBindings);
}
use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class AbstractFileFormat method createPageSource.
static ConnectorPageSource createPageSource(HivePageSourceFactory pageSourceFactory, ConnectorSession session, File targetFile, List<String> columnNames, List<Type> columnTypes, HiveStorageFormat format) {
checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes should have the same size");
List<HiveColumnHandle> readColumns = getBaseColumns(columnNames, columnTypes);
Properties schema = createSchema(format, columnNames, columnTypes);
Optional<ReaderPageSource> readerPageSourceWithProjections = pageSourceFactory.createPageSource(conf, session, new Path(targetFile.getAbsolutePath()), 0, targetFile.length(), targetFile.length(), schema, readColumns, TupleDomain.all(), Optional.empty(), OptionalInt.empty(), false, NO_ACID_TRANSACTION);
checkState(readerPageSourceWithProjections.isPresent(), "readerPageSourceWithProjections is not present");
checkState(readerPageSourceWithProjections.get().getReaderColumns().isEmpty(), "projection should not be required");
return readerPageSourceWithProjections.get().get();
}
use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class TestCachingHiveMetastore method testGetPartitionNamesByParts.
@Test
public void testGetPartitionNamesByParts() {
ImmutableList<String> expectedPartitions = ImmutableList.of(TEST_PARTITION1, TEST_PARTITION2);
assertEquals(mockClient.getAccessCount(), 0);
assertEquals(metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, PARTITION_COLUMN_NAMES, TupleDomain.all()).get(), expectedPartitions);
assertEquals(mockClient.getAccessCount(), 1);
assertEquals(metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, PARTITION_COLUMN_NAMES, TupleDomain.all()).get(), expectedPartitions);
assertEquals(mockClient.getAccessCount(), 1);
assertEquals(metastore.getPartitionFilterStats().getRequestCount(), 2);
assertEquals(metastore.getPartitionFilterStats().getHitRate(), 0.5);
metastore.flushCache();
assertEquals(metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, PARTITION_COLUMN_NAMES, TupleDomain.all()).get(), expectedPartitions);
assertEquals(mockClient.getAccessCount(), 2);
assertEquals(metastore.getPartitionFilterStats().getRequestCount(), 3);
assertEquals(metastore.getPartitionFilterStats().getHitRate(), 1.0 / 3);
List<String> partitionColumnNames = ImmutableList.of("date_key", "key");
HiveColumnHandle dateKeyColumn = createBaseColumn(partitionColumnNames.get(0), 0, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty());
HiveColumnHandle keyColumn = createBaseColumn(partitionColumnNames.get(1), 1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty());
List<HiveColumnHandle> partitionColumns = ImmutableList.of(dateKeyColumn, keyColumn);
TupleDomain<String> withNoFilter = computePartitionKeyFilter(partitionColumns, TupleDomain.all());
TupleDomain<String> withSingleValueFilter = computePartitionKeyFilter(partitionColumns, withColumnDomains(ImmutableMap.<HiveColumnHandle, Domain>builder().put(dateKeyColumn, Domain.create(ValueSet.ofRanges(Range.greaterThan(VARCHAR, utf8Slice("2020-10-01"))), false)).put(keyColumn, Domain.create(ValueSet.of(VARCHAR, utf8Slice("val")), false)).buildOrThrow()));
TupleDomain<String> withNoSingleValueFilter = computePartitionKeyFilter(partitionColumns, withColumnDomains(ImmutableMap.<HiveColumnHandle, Domain>builder().put(dateKeyColumn, Domain.create(ValueSet.ofRanges(Range.greaterThan(VARCHAR, utf8Slice("2020-10-01"))), false)).put(keyColumn, Domain.create(ValueSet.ofRanges(Range.range(VARCHAR, utf8Slice("val1"), true, utf8Slice("val2"), true)), false)).buildOrThrow()));
assertEquals(stats.getGetPartitionNamesByParts().getTime().getAllTime().getCount(), 0.0);
metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, partitionColumnNames, withNoFilter);
assertEquals(stats.getGetPartitionNamesByParts().getTime().getAllTime().getCount(), 0.0);
metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, partitionColumnNames, withSingleValueFilter);
assertEquals(stats.getGetPartitionNamesByParts().getTime().getAllTime().getCount(), 1.0);
metastore.getPartitionNamesByFilter(TEST_DATABASE, TEST_TABLE, partitionColumnNames, withNoSingleValueFilter);
assertEquals(stats.getGetPartitionNamesByParts().getTime().getAllTime().getCount(), 2.0);
}
use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class TestMetastoreUtil method testComputePartitionKeyFilter.
@Test
public void testComputePartitionKeyFilter() {
HiveColumnHandle dsColumn = partitionColumn("ds");
HiveColumnHandle typeColumn = partitionColumn("type");
List<HiveColumnHandle> partitionKeys = ImmutableList.of(dsColumn, typeColumn);
Domain dsDomain = Domain.create(ValueSet.ofRanges(Range.lessThan(VARCHAR, utf8Slice("2018-05-06"))), false);
Domain typeDomain = Domain.create(ValueSet.of(VARCHAR, utf8Slice("fruit")), false);
TupleDomain<HiveColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.<HiveColumnHandle, Domain>builder().put(bucketColumnHandle(), Domain.create(ValueSet.of(INTEGER, 123L), false)).put(dsColumn, dsDomain).put(typeColumn, typeDomain).buildOrThrow());
TupleDomain<String> filter = computePartitionKeyFilter(partitionKeys, tupleDomain);
assertThat(filter.getDomains()).as("output contains only the partition keys").contains(ImmutableMap.<String, Domain>builder().put("ds", dsDomain).put("type", typeDomain).buildOrThrow());
}
use of io.trino.plugin.hive.HiveColumnHandle in project trino by trinodb.
the class TestOrcPageSourceFactory method readFile.
private static List<Nation> readFile(Map<NationColumn, Integer> columns, OptionalLong nationKeyPredicate, Optional<AcidInfo> acidInfo, String filePath, long fileSize) {
TupleDomain<HiveColumnHandle> tupleDomain = TupleDomain.all();
if (nationKeyPredicate.isPresent()) {
tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(toHiveColumnHandle(NATION_KEY, 0), Domain.singleValue(INTEGER, nationKeyPredicate.getAsLong())));
}
List<HiveColumnHandle> columnHandles = columns.entrySet().stream().map(entry -> toHiveColumnHandle(entry.getKey(), entry.getValue())).collect(toImmutableList());
List<String> columnNames = columnHandles.stream().map(HiveColumnHandle::getName).collect(toImmutableList());
Optional<ReaderPageSource> pageSourceWithProjections = PAGE_SOURCE_FACTORY.createPageSource(new JobConf(new Configuration(false)), SESSION, new Path(filePath), 0, fileSize, fileSize, createSchema(), columnHandles, tupleDomain, acidInfo, OptionalInt.empty(), false, NO_ACID_TRANSACTION);
checkArgument(pageSourceWithProjections.isPresent());
checkArgument(pageSourceWithProjections.get().getReaderColumns().isEmpty(), "projected columns not expected here");
ConnectorPageSource pageSource = pageSourceWithProjections.get().get();
int nationKeyColumn = columnNames.indexOf("n_nationkey");
int nameColumn = columnNames.indexOf("n_name");
int regionKeyColumn = columnNames.indexOf("n_regionkey");
int commentColumn = columnNames.indexOf("n_comment");
ImmutableList.Builder<Nation> rows = ImmutableList.builder();
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page == null) {
continue;
}
page = page.getLoadedPage();
for (int position = 0; position < page.getPositionCount(); position++) {
long nationKey = -42;
if (nationKeyColumn >= 0) {
nationKey = BIGINT.getLong(page.getBlock(nationKeyColumn), position);
}
String name = "<not read>";
if (nameColumn >= 0) {
name = VARCHAR.getSlice(page.getBlock(nameColumn), position).toStringUtf8();
}
long regionKey = -42;
if (regionKeyColumn >= 0) {
regionKey = BIGINT.getLong(page.getBlock(regionKeyColumn), position);
}
String comment = "<not read>";
if (commentColumn >= 0) {
comment = VARCHAR.getSlice(page.getBlock(commentColumn), position).toStringUtf8();
}
rows.add(new Nation(position, nationKey, name, regionKey, comment));
}
}
return rows.build();
}
Aggregations