use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.
the class HiveMetadata method getPartitionsSystemTable.
private Optional<SystemTable> getPartitionsSystemTable(ConnectorSession session, SchemaTableName tableName, SchemaTableName sourceTableName) {
HiveTableHandle sourceTableHandle = getTableHandle(session, sourceTableName);
if (sourceTableHandle == null) {
return Optional.empty();
}
SchemaTableName schemaTableName = sourceTableHandle.getSchemaTableName();
Table table = metastore.getTable(new HiveIdentity(session), schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<HiveColumnHandle> partitionColumns = sourceTableHandle.getPartitionColumns();
if (partitionColumns.isEmpty()) {
return Optional.empty();
}
List<Type> partitionColumnTypes = partitionColumns.stream().map(HiveColumnHandle::getTypeSignature).map(typeManager::getType).collect(toImmutableList());
List<ColumnMetadata> partitionSystemTableColumns = partitionColumns.stream().map(column -> new ColumnMetadata(column.getName(), typeManager.getType(column.getTypeSignature()), column.getComment().orElse(null), column.isHidden())).collect(toImmutableList());
Map<Integer, HiveColumnHandle> fieldIdToColumnHandle = IntStream.range(0, partitionColumns.size()).boxed().collect(toImmutableMap(identity(), partitionColumns::get));
return Optional.of(createSystemTable(new ConnectorTableMetadata(tableName, partitionSystemTableColumns), constraint -> {
TupleDomain<ColumnHandle> targetTupleDomain = constraint.transform(fieldIdToColumnHandle::get);
Predicate<Map<ColumnHandle, NullableValue>> targetPredicate = convertToPredicate(targetTupleDomain);
Constraint targetConstraint = new Constraint(targetTupleDomain, targetPredicate);
Iterable<List<Object>> records = () -> stream(partitionManager.getPartitions(metastore, new HiveIdentity(session), sourceTableHandle, targetConstraint, table).getPartitions()).map(hivePartition -> IntStream.range(0, partitionColumns.size()).mapToObj(fieldIdToColumnHandle::get).map(columnHandle -> hivePartition.getKeys().get(columnHandle).getValue()).collect(toList())).iterator();
return new InMemoryRecordSet(partitionColumnTypes, records).cursor();
}));
}
use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.
the class HivePartitionManager method parsePartition.
public static HivePartition parsePartition(SchemaTableName tableName, String partitionName, List<HiveColumnHandle> partitionColumns, List<Type> partitionColumnTypes) {
List<String> partitionValues = extractPartitionValues(partitionName);
ImmutableMap.Builder<ColumnHandle, NullableValue> builder = ImmutableMap.builder();
for (int i = 0; i < partitionColumns.size(); i++) {
HiveColumnHandle column = partitionColumns.get(i);
NullableValue parsedValue = parsePartitionValue(partitionName, partitionValues.get(i), partitionColumnTypes.get(i));
builder.put(column, parsedValue);
}
Map<ColumnHandle, NullableValue> values = builder.build();
return new HivePartition(tableName, partitionName, values);
}
use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.
the class HivePartitionManager method getPartitions.
public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, HiveIdentity identity, ConnectorTableHandle tableHandle, Constraint constraint, Table table) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary().intersect(hiveTableHandle.getEnforcedConstraint());
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Optional<HiveBucketHandle> hiveBucketHandle = hiveTableHandle.getBucketHandle();
List<HiveColumnHandle> partitionColumns = hiveTableHandle.getPartitionColumns();
if (effectivePredicate.isNone()) {
return new HivePartitionResult(partitionColumns, ImmutableList.of(), none(), none(), none(), hiveBucketHandle, Optional.empty());
}
Optional<HiveBucketing.HiveBucketFilter> bucketFilter = HiveBucketing.getHiveBucketFilter(table, effectivePredicate);
TupleDomain<HiveColumnHandle> compactEffectivePredicate = toCompactTupleDomain(effectivePredicate, domainCompactionThreshold);
if (partitionColumns.isEmpty()) {
return new HivePartitionResult(partitionColumns, ImmutableList.of(new HivePartition(tableName)), compactEffectivePredicate, effectivePredicate, all(), hiveBucketHandle, bucketFilter);
}
List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
Iterable<HivePartition> partitionsIterable;
Predicate<Map<ColumnHandle, NullableValue>> predicate = constraint.predicate().orElse(value -> true);
if (hiveTableHandle.getPartitions().isPresent()) {
partitionsIterable = hiveTableHandle.getPartitions().get().stream().filter(partition -> partitionMatches(partitionColumns, effectivePredicate, predicate, partition)).collect(toImmutableList());
} else {
List<String> partitionNames = getFilteredPartitionNames(metastore, identity, tableName, partitionColumns, effectivePredicate, table);
partitionsIterable = () -> partitionNames.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, effectivePredicate, predicate)).filter(Optional::isPresent).map(Optional::get).iterator();
}
// All partition key domains will be fully evaluated, so we don't need to include those
TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), not(Predicates.in(partitionColumns))));
TupleDomain<ColumnHandle> enforcedTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), Predicates.in(partitionColumns)));
return new HivePartitionResult(partitionColumns, partitionsIterable, compactEffectivePredicate, remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle, bucketFilter);
}
use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.
the class HiveSplitSource method matchesUserDefinedCachedPredicates.
/**
* Validate the partitions key against all the user defined predicates
* to determine whether or not that split should be cached.
*
* @return true if partition key matches the user defined cache predicates
* false otherwise
*/
private boolean matchesUserDefinedCachedPredicates(List<HivePartitionKey> partitionKeys) {
if (userDefinedCachePredicates == null || userDefinedCachePredicates.isEmpty() || partitionKeys == null || partitionKeys.isEmpty()) {
return false;
}
try {
Map<String, HivePartitionKey> hivePartitionKeyMap = partitionKeys.stream().collect(Collectors.toMap(HivePartitionKey::getName, Function.identity()));
for (TupleDomain<ColumnMetadata> tupleDomain : userDefinedCachePredicates) {
if (!tupleDomain.getDomains().isPresent()) {
continue;
}
Map<ColumnMetadata, Domain> domainMap = tupleDomain.getDomains().get();
Collection<String> columnsDefinedInPredicate = domainMap.keySet().stream().map(ColumnMetadata::getName).collect(Collectors.toList());
if (!hivePartitionKeyMap.keySet().containsAll(columnsDefinedInPredicate)) {
continue;
}
boolean allMatches = domainMap.entrySet().stream().allMatch(entry -> {
ColumnMetadata columnMetadata = entry.getKey();
Domain domain = entry.getValue();
String partitionStringValue = hivePartitionKeyMap.get(columnMetadata.getName()).getValue();
NullableValue nullableValue;
if (partitionStringValue.equals("\\N")) {
nullableValue = NullableValue.asNull(columnMetadata.getType());
} else {
nullableValue = HiveUtil.parsePartitionValue(columnMetadata.getName(), partitionStringValue, columnMetadata.getType());
}
return domain.includesNullableValue(nullableValue.getValue());
});
if (allMatches) {
return true;
}
}
} catch (Exception ex) {
log.warn(ex, "Unable to match partition keys %s with cached predicates. Ignoring this partition key. Error = %s", partitionKeys, ex.getMessage());
}
return false;
}
use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.
the class MetastoreHiveStatisticsProvider method calculateRangeForPartitioningKey.
@VisibleForTesting
static Optional<DoubleRange> calculateRangeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions) {
if (!isRangeSupported(type)) {
return Optional.empty();
}
List<Double> values = partitions.stream().map(HivePartition::getKeys).map(keys -> keys.get(column)).filter(value -> !value.isNull()).map(NullableValue::getValue).map(value -> convertPartitionValueToDouble(type, value)).collect(toImmutableList());
if (values.isEmpty()) {
return Optional.empty();
}
double min = values.get(0);
double max = values.get(0);
for (Double value : values) {
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
return Optional.of(new DoubleRange(min, max));
}
Aggregations