use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.
the class HiveBucketing method getHiveBucketFilter.
public static Optional<HiveBucketFilter> getHiveBucketFilter(HiveTableHandle hiveTable, TupleDomain<ColumnHandle> effectivePredicate) {
if (hiveTable.getBucketHandle().isEmpty()) {
return Optional.empty();
}
HiveBucketProperty hiveBucketProperty = hiveTable.getBucketHandle().get().toTableBucketProperty();
List<Column> dataColumns = hiveTable.getDataColumns().stream().map(HiveColumnHandle::toMetastoreColumn).collect(toImmutableList());
Optional<Map<ColumnHandle, List<NullableValue>>> bindings = TupleDomain.extractDiscreteValues(effectivePredicate);
if (bindings.isEmpty()) {
return Optional.empty();
}
Optional<Set<Integer>> buckets = getHiveBuckets(hiveBucketProperty, dataColumns, bindings.get());
if (buckets.isPresent()) {
return Optional.of(new HiveBucketFilter(buckets.get()));
}
Optional<Domain> domain = effectivePredicate.getDomains().flatMap(domains -> domains.entrySet().stream().filter(entry -> ((HiveColumnHandle) entry.getKey()).getName().equals(BUCKET_COLUMN_NAME)).findFirst().map(Entry::getValue));
if (domain.isEmpty()) {
return Optional.empty();
}
ValueSet values = domain.get().getValues();
ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
int bucketCount = hiveBucketProperty.getBucketCount();
for (int i = 0; i < bucketCount; i++) {
if (values.containsValue((long) i)) {
builder.add(i);
}
}
return Optional.of(new HiveBucketFilter(builder.build()));
}
use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.
the class HiveBucketing method getHiveBuckets.
private static Optional<Set<Integer>> getHiveBuckets(HiveBucketProperty hiveBucketProperty, List<Column> dataColumns, Map<ColumnHandle, List<NullableValue>> bindings) {
if (bindings.isEmpty()) {
return Optional.empty();
}
// Get bucket columns names
List<String> bucketColumns = hiveBucketProperty.getBucketedBy();
// Verify the bucket column types are supported
Map<String, HiveType> hiveTypes = new HashMap<>();
for (Column column : dataColumns) {
hiveTypes.put(column.getName(), column.getType());
}
for (String column : bucketColumns) {
if (!SUPPORTED_TYPES_FOR_BUCKET_FILTER.contains(hiveTypes.get(column))) {
return Optional.empty();
}
}
// Get bindings for bucket columns
Map<String, List<NullableValue>> bucketBindings = new HashMap<>();
for (Entry<ColumnHandle, List<NullableValue>> entry : bindings.entrySet()) {
HiveColumnHandle columnHandle = (HiveColumnHandle) entry.getKey();
if (bucketColumns.contains(columnHandle.getName())) {
bucketBindings.put(columnHandle.getName(), entry.getValue());
}
}
// Check that we have bindings for all bucket columns
if (bucketBindings.size() != bucketColumns.size()) {
return Optional.empty();
}
// Order bucket column bindings accordingly to bucket columns order
List<List<NullableValue>> orderedBindings = bucketColumns.stream().map(bucketBindings::get).collect(toImmutableList());
// Get TypeInfos for bucket columns
List<TypeInfo> typeInfos = bucketColumns.stream().map(name -> hiveTypes.get(name).getTypeInfo()).collect(toImmutableList());
return getHiveBuckets(hiveBucketProperty.getBucketingVersion(), hiveBucketProperty.getBucketCount(), typeInfos, orderedBindings);
}
use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.
the class TestGlueInputConverter method assertStorage.
private static void assertStorage(StorageDescriptor actual, Storage expected) {
assertEquals(actual.getLocation(), expected.getLocation());
assertEquals(actual.getSerdeInfo().getSerializationLibrary(), expected.getStorageFormat().getSerde());
assertEquals(actual.getInputFormat(), expected.getStorageFormat().getInputFormat());
assertEquals(actual.getOutputFormat(), expected.getStorageFormat().getOutputFormat());
if (expected.getBucketProperty().isPresent()) {
HiveBucketProperty bucketProperty = expected.getBucketProperty().get();
assertEquals(actual.getBucketColumns(), bucketProperty.getBucketedBy());
assertEquals(actual.getNumberOfBuckets().intValue(), bucketProperty.getBucketCount());
}
}
use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.
the class TestGlueToTrinoConverter method assertStorage.
private static void assertStorage(Storage actual, StorageDescriptor expected) {
assertEquals(actual.getLocation(), expected.getLocation());
assertEquals(actual.getStorageFormat().getSerde(), expected.getSerdeInfo().getSerializationLibrary());
assertEquals(actual.getStorageFormat().getInputFormat(), expected.getInputFormat());
assertEquals(actual.getStorageFormat().getOutputFormat(), expected.getOutputFormat());
if (!isNullOrEmpty(expected.getBucketColumns())) {
HiveBucketProperty bucketProperty = actual.getBucketProperty().get();
assertEquals(bucketProperty.getBucketedBy(), expected.getBucketColumns());
assertEquals(bucketProperty.getBucketCount(), expected.getNumberOfBuckets().intValue());
}
}
use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.
the class HiveBucketing method getHiveBucketHandle.
public static Optional<HiveBucketHandle> getHiveBucketHandle(ConnectorSession session, Table table, TypeManager typeManager) {
if (table.getParameters().containsKey(SPARK_TABLE_PROVIDER_KEY)) {
return Optional.empty();
}
Optional<HiveBucketProperty> hiveBucketProperty = table.getStorage().getBucketProperty();
if (hiveBucketProperty.isEmpty()) {
return Optional.empty();
}
if (!isSupportedBucketing(table)) {
return Optional.empty();
}
HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session);
Map<String, HiveColumnHandle> map = getRegularColumnHandles(table, typeManager, timestampPrecision).stream().collect(Collectors.toMap(HiveColumnHandle::getName, identity()));
ImmutableList.Builder<HiveColumnHandle> bucketColumns = ImmutableList.builder();
for (String bucketColumnName : hiveBucketProperty.get().getBucketedBy()) {
HiveColumnHandle bucketColumnHandle = map.get(bucketColumnName);
if (bucketColumnHandle == null) {
throw new TrinoException(HIVE_INVALID_METADATA, format("Table '%s.%s' is bucketed on non-existent column '%s'", table.getDatabaseName(), table.getTableName(), bucketColumnName));
}
bucketColumns.add(bucketColumnHandle);
}
BucketingVersion bucketingVersion = hiveBucketProperty.get().getBucketingVersion();
int bucketCount = hiveBucketProperty.get().getBucketCount();
List<SortingColumn> sortedBy = hiveBucketProperty.get().getSortedBy();
return Optional.of(new HiveBucketHandle(bucketColumns.build(), bucketingVersion, bucketCount, bucketCount, sortedBy));
}
Aggregations