use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class DeltaPageSourceProvider method getParquetTupleDomain.
public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<DeltaColumnHandle> effectivePredicate) {
if (effectivePredicate.isNone()) {
return TupleDomain.none();
}
ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
for (Map.Entry<DeltaColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
DeltaColumnHandle columnHandle = entry.getKey();
RichColumnDescriptor descriptor;
if (isPushedDownSubfield(columnHandle)) {
Subfield pushedDownSubfield = getPushedDownSubfield(columnHandle);
List<String> subfieldPath = columnPathFromSubfield(pushedDownSubfield);
descriptor = descriptorsByPath.get(subfieldPath);
} else {
descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
}
if (descriptor != null) {
predicate.put(descriptor, entry.getValue());
}
}
return TupleDomain.withColumnDomains(predicate.build());
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class HiveSplitManager method getPartitionMetadata.
private Iterable<HivePartitionMetadata> getPartitionMetadata(SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketHandle> hiveBucketHandle, ConnectorSession session, WarningCollector warningCollector, Optional<Set<HiveColumnHandle>> requestedColumns, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
if (hivePartitions.isEmpty()) {
return ImmutableList.of();
}
Optional<Set<HiveColumnHandle>> allRequestedColumns = mergeRequestedAndPredicateColumns(requestedColumns, ImmutableSet.copyOf(predicateColumns.values()));
if (hivePartitions.size() == 1) {
HivePartition firstPartition = getOnlyElement(hivePartitions);
if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), TableToPartitionMapping.empty(), encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns), ImmutableSet.of()));
}
}
StorageFormat storageFormat = table.getStorage().getStorageFormat();
Optional<HiveStorageFormat> hiveStorageFormat = getHiveStorageFormat(storageFormat);
Optional<HiveStorageFormat> resolvedHiveStorageFormat;
if (isUseParquetColumnNames(session)) {
// Use Hive Storage Format as Parquet if table is of HUDI format
resolvedHiveStorageFormat = (!hiveStorageFormat.isPresent() && isHudiFormat(storageFormat)) ? Optional.of(PARQUET) : hiveStorageFormat;
} else {
resolvedHiveStorageFormat = hiveStorageFormat;
}
Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
Map<String, PartitionSplitInfo> partitionSplitInfo = getPartitionSplitInfo(session, metastore, tableName, partitionBatch, predicateColumns, domains);
if (partitionBatch.size() != partitionSplitInfo.size()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitionSplitInfo.size()));
}
Map<String, Partition> partitions = partitionSplitInfo.entrySet().stream().collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getPartition()));
Optional<Map<String, EncryptionInformation>> encryptionInformationForPartitions = encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns, partitions);
ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
Map<String, Set<String>> partitionsNotReadable = new HashMap<>();
int unreadablePartitionsSkipped = 0;
for (HivePartition hivePartition : partitionBatch) {
Partition partition = partitions.get(hivePartition.getPartitionId());
if (partitionSplitInfo.get(hivePartition.getPartitionId()).isPruned()) {
continue;
}
if (partition == null) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
}
String partitionName = makePartName(table.getPartitionColumns(), partition.getValues());
Optional<EncryptionInformation> encryptionInformation = encryptionInformationForPartitions.map(metadata -> metadata.get(hivePartition.getPartitionId()));
if (!isOfflineDataDebugModeEnabled(session)) {
// verify partition is online
verifyOnline(tableName, Optional.of(partitionName), getProtectMode(partition), partition.getParameters());
// verify partition is not marked as non-readable
String reason = partition.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(reason)) {
if (!shouldIgnoreUnreadablePartition(session) || !partition.isEligibleToIgnore()) {
throw new HiveNotReadableException(tableName, Optional.of(partitionName), reason);
}
unreadablePartitionsSkipped++;
if (partitionsNotReadable.size() <= 3) {
partitionsNotReadable.putIfAbsent(reason, new HashSet<>(ImmutableSet.of(partitionName)));
if (partitionsNotReadable.get(reason).size() <= 3) {
partitionsNotReadable.get(reason).add(partitionName);
}
}
continue;
}
}
// Verify that the partition schema matches the table schema.
// Either adding or dropping columns from the end of the table
// without modifying existing partitions is allowed, but every
// column that exists in both the table and partition must have
// the same type.
List<Column> tableColumns = table.getDataColumns();
List<Column> partitionColumns = partition.getColumns();
if ((tableColumns == null) || (partitionColumns == null)) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partitionName));
}
TableToPartitionMapping tableToPartitionMapping = getTableToPartitionMapping(session, resolvedHiveStorageFormat, tableName, partitionName, tableColumns, partitionColumns);
if (hiveBucketHandle.isPresent() && !hiveBucketHandle.get().isVirtuallyBucketed()) {
Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
if (!partitionBucketProperty.isPresent()) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) is bucketed but partition (%s) is not bucketed", hivePartition.getTableName(), hivePartition.getPartitionId()));
}
int tableBucketCount = hiveBucketHandle.get().getTableBucketCount();
int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
List<String> tableBucketColumns = hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getName).collect(toImmutableList());
List<String> partitionBucketColumns = partitionBucketProperty.get().getBucketedBy();
if (!tableBucketColumns.equals(partitionBucketColumns) || !isBucketCountCompatible(tableBucketCount, partitionBucketCount)) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) bucketing (columns=%s, buckets=%s) is not compatible with partition (%s) bucketing (columns=%s, buckets=%s)", hivePartition.getTableName(), tableBucketColumns, tableBucketCount, hivePartition.getPartitionId(), partitionBucketColumns, partitionBucketCount));
}
}
results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), tableToPartitionMapping, encryptionInformation, partitionSplitInfo.get(hivePartition.getPartitionId()).getRedundantColumnDomains()));
}
if (unreadablePartitionsSkipped > 0) {
StringBuilder warningMessage = new StringBuilder(format("Table '%s' has %s out of %s partitions unreadable: ", tableName, unreadablePartitionsSkipped, partitionBatch.size()));
for (Entry<String, Set<String>> entry : partitionsNotReadable.entrySet()) {
warningMessage.append(String.join(", ", entry.getValue())).append("... are due to ").append(entry.getKey()).append(". ");
}
warningCollector.add(new PrestoWarning(PARTITION_NOT_READABLE, warningMessage.toString()));
}
return results.build();
});
return concat(partitionBatches);
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class SubfieldExtractor method toSubfield.
private static Optional<Subfield> toSubfield(RowExpression expression, StandardFunctionResolution functionResolution, ExpressionOptimizer expressionOptimizer, ConnectorSession connectorSession) {
List<Subfield.PathElement> elements = new ArrayList<>();
while (true) {
if (expression instanceof VariableReferenceExpression) {
Collections.reverse(elements);
return Optional.of(new Subfield(((VariableReferenceExpression) expression).getName(), unmodifiableList(elements)));
}
if (expression instanceof SpecialFormExpression && ((SpecialFormExpression) expression).getForm() == DEREFERENCE) {
SpecialFormExpression dereferenceExpression = (SpecialFormExpression) expression;
RowExpression base = dereferenceExpression.getArguments().get(0);
RowType baseType = (RowType) base.getType();
RowExpression indexExpression = expressionOptimizer.optimize(dereferenceExpression.getArguments().get(1), ExpressionOptimizer.Level.OPTIMIZED, connectorSession);
if (indexExpression instanceof ConstantExpression) {
Object index = ((ConstantExpression) indexExpression).getValue();
if (index instanceof Number) {
Optional<String> fieldName = baseType.getFields().get(((Number) index).intValue()).getName();
if (fieldName.isPresent()) {
elements.add(new Subfield.NestedField(fieldName.get()));
expression = base;
continue;
}
}
}
return Optional.empty();
}
if (expression instanceof CallExpression && functionResolution.isSubscriptFunction(((CallExpression) expression).getFunctionHandle())) {
List<RowExpression> arguments = ((CallExpression) expression).getArguments();
RowExpression indexExpression = expressionOptimizer.optimize(arguments.get(1), ExpressionOptimizer.Level.OPTIMIZED, connectorSession);
if (indexExpression instanceof ConstantExpression) {
Object index = ((ConstantExpression) indexExpression).getValue();
if (index instanceof Number) {
elements.add(new Subfield.LongSubscript(((Number) index).longValue()));
expression = arguments.get(0);
continue;
}
if (isVarcharType(indexExpression.getType())) {
elements.add(new Subfield.StringSubscript(((Slice) index).toStringUtf8()));
expression = arguments.get(0);
continue;
}
}
return Optional.empty();
}
return Optional.empty();
}
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class AbstractTestHiveClient method setupHive.
protected void setupHive(String connectorId, String databaseName, String timeZoneId) {
clientId = connectorId;
database = databaseName;
tablePartitionFormat = new SchemaTableName(database, "presto_test_partition_format");
tableUnpartitioned = new SchemaTableName(database, "presto_test_unpartitioned");
tableOffline = new SchemaTableName(database, "presto_test_offline");
tableOfflinePartition = new SchemaTableName(database, "presto_test_offline_partition");
tableNotReadable = new SchemaTableName(database, "presto_test_not_readable");
view = new SchemaTableName(database, "presto_test_view");
invalidTable = new SchemaTableName(database, INVALID_TABLE);
tableBucketedStringInt = new SchemaTableName(database, "presto_test_bucketed_by_string_int");
tableBucketedBigintBoolean = new SchemaTableName(database, "presto_test_bucketed_by_bigint_boolean");
tableBucketedDoubleFloat = new SchemaTableName(database, "presto_test_bucketed_by_double_float");
tablePartitionSchemaChange = new SchemaTableName(database, "presto_test_partition_schema_change");
tablePartitionSchemaChangeNonCanonical = new SchemaTableName(database, "presto_test_partition_schema_change_non_canonical");
tableBucketEvolution = new SchemaTableName(database, "presto_test_bucket_evolution");
invalidClientId = "hive";
invalidTableHandle = new HiveTableHandle(database, INVALID_TABLE);
invalidTableLayoutHandle = new HiveTableLayoutHandle(invalidTable, "path", ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), ImmutableList.of(new HivePartition(invalidTable, "unknown", ImmutableMap.of())), TupleDomain.all(), TRUE_CONSTANT, ImmutableMap.of(), TupleDomain.all(), Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false);
int partitionColumnIndex = MAX_PARTITION_KEY_COLUMN_INDEX;
dsColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
fileFormatColumn = new HiveColumnHandle("file_format", HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
dummyColumn = new HiveColumnHandle("dummy", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
intColumn = new HiveColumnHandle("t_int", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
invalidColumnHandle = new HiveColumnHandle(INVALID_COLUMN, HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), 0, REGULAR, Optional.empty(), Optional.empty());
List<HiveColumnHandle> partitionColumns = ImmutableList.of(dsColumn, fileFormatColumn, dummyColumn);
List<HivePartition> partitions = ImmutableList.<HivePartition>builder().add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=textfile/dummy=1", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("textfile"))).put(dummyColumn, NullableValue.of(INTEGER, 1L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=sequencefile/dummy=2", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("sequencefile"))).put(dummyColumn, NullableValue.of(INTEGER, 2L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=rctext/dummy=3", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rctext"))).put(dummyColumn, NullableValue.of(INTEGER, 3L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=rcbinary/dummy=4", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rcbinary"))).put(dummyColumn, NullableValue.of(INTEGER, 4L)).build())).build();
partitionCount = partitions.size();
tupleDomain = TupleDomain.fromFixedValues(ImmutableMap.of(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))));
TupleDomain<Subfield> domainPredicate = tupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
tableLayout = new ConnectorTableLayout(new HiveTableLayoutHandle(tablePartitionFormat, "path", partitionColumns, ImmutableList.of(new Column("t_string", HIVE_STRING, Optional.empty(), Optional.empty()), new Column("t_tinyint", HIVE_BYTE, Optional.empty(), Optional.empty()), new Column("t_smallint", HIVE_SHORT, Optional.empty(), Optional.empty()), new Column("t_int", HIVE_INT, Optional.empty(), Optional.empty()), new Column("t_bigint", HIVE_LONG, Optional.empty(), Optional.empty()), new Column("t_float", HIVE_FLOAT, Optional.empty(), Optional.empty()), new Column("t_double", HIVE_DOUBLE, Optional.empty(), Optional.empty()), new Column("t_boolean", HIVE_BOOLEAN, Optional.empty(), Optional.empty())), ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(dsColumn.getName(), dsColumn), tupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), Optional.empty(), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile")), Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile")), Range.equal(createUnboundedVarcharType(), utf8Slice("rctext")), Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L), Range.equal(INTEGER, 2L), Range.equal(INTEGER, 3L), Range.equal(INTEGER, 4L)), false))), Optional.empty(), Optional.empty(), Optional.of(new DiscretePredicates(ImmutableList.copyOf(partitionColumns), ImmutableList.of(withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 2L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rctext"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 3L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 4L)), false)))))), ImmutableList.of());
List<HivePartition> unpartitionedPartitions = ImmutableList.of(new HivePartition(tableUnpartitioned));
unpartitionedTableLayout = new ConnectorTableLayout(new HiveTableLayoutHandle(tableUnpartitioned, "path", ImmutableList.of(), ImmutableList.of(new Column("t_string", HIVE_STRING, Optional.empty(), Optional.empty()), new Column("t_tinyint", HIVE_BYTE, Optional.empty(), Optional.empty())), ImmutableMap.of(), unpartitionedPartitions, TupleDomain.all(), TRUE_CONSTANT, ImmutableMap.of(), TupleDomain.all(), Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false));
timeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(ZoneId.of(timeZoneId)));
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class TestAbstractDwrfEncryptionInformationSource method testGetReadEncryptionInformationForPartitionedTableWithTableLevelEncryption.
@Test
public void testGetReadEncryptionInformationForPartitionedTableWithTableLevelEncryption() {
Table table = createTable(DWRF, Optional.of(forTable("table_level_key", "algo", "provider")), true);
Optional<Map<String, EncryptionInformation>> encryptionInformation = encryptionInformationSource.getReadEncryptionInformation(SESSION, table, Optional.of(ImmutableSet.of(// hiveColumnIndex value does not matter in this test
new HiveColumnHandle("col_bigint", HIVE_LONG, HIVE_LONG.getTypeSignature(), 0, REGULAR, Optional.empty(), Optional.empty()), new HiveColumnHandle("col_struct", STRUCT_TYPE, STRUCT_TYPE.getTypeSignature(), 0, REGULAR, Optional.empty(), ImmutableList.of(new Subfield("col_struct.a"), new Subfield("col_struct.b.b2")), Optional.empty()))), ImmutableMap.of("ds=2020-01-01", new Partition("dbName", "tableName", ImmutableList.of("2020-01-01"), table.getStorage(), table.getDataColumns(), ImmutableMap.of(), Optional.empty(), false, true, 0), "ds=2020-01-02", new Partition("dbName", "tableName", ImmutableList.of("2020-01-02"), table.getStorage(), table.getDataColumns(), ImmutableMap.of(), Optional.empty(), false, true, 0)));
assertTrue(encryptionInformation.isPresent());
assertEquals(encryptionInformation.get(), ImmutableMap.of("ds=2020-01-01", EncryptionInformation.fromEncryptionMetadata(DwrfEncryptionMetadata.forTable("table_level_key".getBytes(), ImmutableMap.of(TEST_EXTRA_METADATA, "ds=2020-01-01"), "algo", "provider")), "ds=2020-01-02", EncryptionInformation.fromEncryptionMetadata(DwrfEncryptionMetadata.forTable("table_level_key".getBytes(), ImmutableMap.of(TEST_EXTRA_METADATA, "ds=2020-01-02"), "algo", "provider"))));
}
Aggregations