Search in sources :

Example 11 with NullableValue

use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.

the class HiveMetadata method getPartitionsSystemTable.

private Optional<SystemTable> getPartitionsSystemTable(ConnectorSession session, SchemaTableName tableName, SchemaTableName sourceTableName) {
    HiveTableHandle sourceTableHandle = getTableHandle(session, sourceTableName);
    if (sourceTableHandle == null) {
        return Optional.empty();
    }
    SchemaTableName schemaTableName = sourceTableHandle.getSchemaTableName();
    Table table = metastore.getTable(new HiveIdentity(session), schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    List<HiveColumnHandle> partitionColumns = sourceTableHandle.getPartitionColumns();
    if (partitionColumns.isEmpty()) {
        return Optional.empty();
    }
    List<Type> partitionColumnTypes = partitionColumns.stream().map(HiveColumnHandle::getTypeSignature).map(typeManager::getType).collect(toImmutableList());
    List<ColumnMetadata> partitionSystemTableColumns = partitionColumns.stream().map(column -> new ColumnMetadata(column.getName(), typeManager.getType(column.getTypeSignature()), column.getComment().orElse(null), column.isHidden())).collect(toImmutableList());
    Map<Integer, HiveColumnHandle> fieldIdToColumnHandle = IntStream.range(0, partitionColumns.size()).boxed().collect(toImmutableMap(identity(), partitionColumns::get));
    return Optional.of(createSystemTable(new ConnectorTableMetadata(tableName, partitionSystemTableColumns), constraint -> {
        TupleDomain<ColumnHandle> targetTupleDomain = constraint.transform(fieldIdToColumnHandle::get);
        Predicate<Map<ColumnHandle, NullableValue>> targetPredicate = convertToPredicate(targetTupleDomain);
        Constraint targetConstraint = new Constraint(targetTupleDomain, targetPredicate);
        Iterable<List<Object>> records = () -> stream(partitionManager.getPartitions(metastore, new HiveIdentity(session), sourceTableHandle, targetConstraint, table).getPartitions()).map(hivePartition -> IntStream.range(0, partitionColumns.size()).mapToObj(fieldIdToColumnHandle::get).map(columnHandle -> hivePartition.getKeys().get(columnHandle).getValue()).collect(toList())).iterator();
        return new InMemoryRecordSet(partitionColumnTypes, records).cursor();
    }));
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Constraint(io.prestosql.spi.connector.Constraint) NullableValue(io.prestosql.spi.predicate.NullableValue) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) Predicate(java.util.function.Predicate) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) TupleDomain(io.prestosql.spi.predicate.TupleDomain) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata)

Example 12 with NullableValue

use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.

the class HivePartitionManager method parsePartition.

public static HivePartition parsePartition(SchemaTableName tableName, String partitionName, List<HiveColumnHandle> partitionColumns, List<Type> partitionColumnTypes) {
    List<String> partitionValues = extractPartitionValues(partitionName);
    ImmutableMap.Builder<ColumnHandle, NullableValue> builder = ImmutableMap.builder();
    for (int i = 0; i < partitionColumns.size(); i++) {
        HiveColumnHandle column = partitionColumns.get(i);
        NullableValue parsedValue = parsePartitionValue(partitionName, partitionValues.get(i), partitionColumnTypes.get(i));
        builder.put(column, parsedValue);
    }
    Map<ColumnHandle, NullableValue> values = builder.build();
    return new HivePartition(tableName, partitionName, values);
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) NullableValue(io.prestosql.spi.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) Constraint(io.prestosql.spi.connector.Constraint)

Example 13 with NullableValue

use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.

the class HivePartitionManager method getPartitions.

public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, HiveIdentity identity, ConnectorTableHandle tableHandle, Constraint constraint, Table table) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary().intersect(hiveTableHandle.getEnforcedConstraint());
    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Optional<HiveBucketHandle> hiveBucketHandle = hiveTableHandle.getBucketHandle();
    List<HiveColumnHandle> partitionColumns = hiveTableHandle.getPartitionColumns();
    if (effectivePredicate.isNone()) {
        return new HivePartitionResult(partitionColumns, ImmutableList.of(), none(), none(), none(), hiveBucketHandle, Optional.empty());
    }
    Optional<HiveBucketing.HiveBucketFilter> bucketFilter = HiveBucketing.getHiveBucketFilter(table, effectivePredicate);
    TupleDomain<HiveColumnHandle> compactEffectivePredicate = toCompactTupleDomain(effectivePredicate, domainCompactionThreshold);
    if (partitionColumns.isEmpty()) {
        return new HivePartitionResult(partitionColumns, ImmutableList.of(new HivePartition(tableName)), compactEffectivePredicate, effectivePredicate, all(), hiveBucketHandle, bucketFilter);
    }
    List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
    Iterable<HivePartition> partitionsIterable;
    Predicate<Map<ColumnHandle, NullableValue>> predicate = constraint.predicate().orElse(value -> true);
    if (hiveTableHandle.getPartitions().isPresent()) {
        partitionsIterable = hiveTableHandle.getPartitions().get().stream().filter(partition -> partitionMatches(partitionColumns, effectivePredicate, predicate, partition)).collect(toImmutableList());
    } else {
        List<String> partitionNames = getFilteredPartitionNames(metastore, identity, tableName, partitionColumns, effectivePredicate, table);
        partitionsIterable = () -> partitionNames.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, effectivePredicate, predicate)).filter(Optional::isPresent).map(Optional::get).iterator();
    }
    // All partition key domains will be fully evaluated, so we don't need to include those
    TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), not(Predicates.in(partitionColumns))));
    TupleDomain<ColumnHandle> enforcedTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), Predicates.in(partitionColumns)));
    return new HivePartitionResult(partitionColumns, partitionsIterable, compactEffectivePredicate, remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle, bucketFilter);
}
Also used : ValueSet(io.prestosql.spi.predicate.ValueSet) DecimalType(io.prestosql.spi.type.DecimalType) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) NullableValue(io.prestosql.spi.predicate.NullableValue) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Predicates.not(com.google.common.base.Predicates.not) Map(java.util.Map) TupleDomain.all(io.prestosql.spi.predicate.TupleDomain.all) Type(io.prestosql.spi.type.Type) Constraint(io.prestosql.spi.connector.Constraint) ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap(com.google.common.collect.ImmutableMap) RealType(io.prestosql.spi.type.RealType) Predicate(java.util.function.Predicate) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Chars.padSpaces(io.prestosql.spi.type.Chars.padSpaces) String.format(java.lang.String.format) TimestampType(io.prestosql.spi.type.TimestampType) List(java.util.List) Table(io.prestosql.plugin.hive.metastore.Table) Domain(io.prestosql.spi.predicate.Domain) FileUtils(org.apache.hadoop.hive.common.FileUtils) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) HiveUtil.parsePartitionValue(io.prestosql.plugin.hive.HiveUtil.parsePartitionValue) BigintType(io.prestosql.spi.type.BigintType) Slice(io.airlift.slice.Slice) CharType(io.prestosql.spi.type.CharType) Decimals(io.prestosql.spi.type.Decimals) TupleDomain.none(io.prestosql.spi.predicate.TupleDomain.none) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) DoubleType(io.prestosql.spi.type.DoubleType) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) SmallintType(io.prestosql.spi.type.SmallintType) VerifyException(com.google.common.base.VerifyException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Iterator(java.util.Iterator) IntegerType(io.prestosql.spi.type.IntegerType) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) TypeManager(io.prestosql.spi.type.TypeManager) TinyintType(io.prestosql.spi.type.TinyintType) Maps(com.google.common.collect.Maps) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DateType(io.prestosql.spi.type.DateType) BooleanType(io.prestosql.spi.type.BooleanType) VarcharType(io.prestosql.spi.type.VarcharType) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Optional(java.util.Optional) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) DecimalType(io.prestosql.spi.type.DecimalType) Type(io.prestosql.spi.type.Type) RealType(io.prestosql.spi.type.RealType) TimestampType(io.prestosql.spi.type.TimestampType) BigintType(io.prestosql.spi.type.BigintType) CharType(io.prestosql.spi.type.CharType) DoubleType(io.prestosql.spi.type.DoubleType) SmallintType(io.prestosql.spi.type.SmallintType) IntegerType(io.prestosql.spi.type.IntegerType) TinyintType(io.prestosql.spi.type.TinyintType) DateType(io.prestosql.spi.type.DateType) BooleanType(io.prestosql.spi.type.BooleanType) VarcharType(io.prestosql.spi.type.VarcharType) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 14 with NullableValue

use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.

the class HiveSplitSource method matchesUserDefinedCachedPredicates.

/**
 * Validate the partitions key against all the user defined predicates
 * to determine whether or not that split should be cached.
 *
 * @return true if partition key matches the user defined cache predicates
 * false otherwise
 */
private boolean matchesUserDefinedCachedPredicates(List<HivePartitionKey> partitionKeys) {
    if (userDefinedCachePredicates == null || userDefinedCachePredicates.isEmpty() || partitionKeys == null || partitionKeys.isEmpty()) {
        return false;
    }
    try {
        Map<String, HivePartitionKey> hivePartitionKeyMap = partitionKeys.stream().collect(Collectors.toMap(HivePartitionKey::getName, Function.identity()));
        for (TupleDomain<ColumnMetadata> tupleDomain : userDefinedCachePredicates) {
            if (!tupleDomain.getDomains().isPresent()) {
                continue;
            }
            Map<ColumnMetadata, Domain> domainMap = tupleDomain.getDomains().get();
            Collection<String> columnsDefinedInPredicate = domainMap.keySet().stream().map(ColumnMetadata::getName).collect(Collectors.toList());
            if (!hivePartitionKeyMap.keySet().containsAll(columnsDefinedInPredicate)) {
                continue;
            }
            boolean allMatches = domainMap.entrySet().stream().allMatch(entry -> {
                ColumnMetadata columnMetadata = entry.getKey();
                Domain domain = entry.getValue();
                String partitionStringValue = hivePartitionKeyMap.get(columnMetadata.getName()).getValue();
                NullableValue nullableValue;
                if (partitionStringValue.equals("\\N")) {
                    nullableValue = NullableValue.asNull(columnMetadata.getType());
                } else {
                    nullableValue = HiveUtil.parsePartitionValue(columnMetadata.getName(), partitionStringValue, columnMetadata.getType());
                }
                return domain.includesNullableValue(nullableValue.getValue());
            });
            if (allMatches) {
                return true;
            }
        }
    } catch (Exception ex) {
        log.warn(ex, "Unable to match partition keys %s with cached predicates. Ignoring this partition key. Error = %s", partitionKeys, ex.getMessage());
    }
    return false;
}
Also used : ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) NullableValue(io.prestosql.spi.predicate.NullableValue) Domain(io.prestosql.spi.predicate.Domain) TupleDomain(io.prestosql.spi.predicate.TupleDomain) PrestoException(io.prestosql.spi.PrestoException) FileNotFoundException(java.io.FileNotFoundException)

Example 15 with NullableValue

use of io.prestosql.spi.predicate.NullableValue in project boostkit-bigdata by kunpengcompute.

the class MetastoreHiveStatisticsProvider method calculateRangeForPartitioningKey.

@VisibleForTesting
static Optional<DoubleRange> calculateRangeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions) {
    if (!isRangeSupported(type)) {
        return Optional.empty();
    }
    List<Double> values = partitions.stream().map(HivePartition::getKeys).map(keys -> keys.get(column)).filter(value -> !value.isNull()).map(NullableValue::getValue).map(value -> convertPartitionValueToDouble(type, value)).collect(toImmutableList());
    if (values.isEmpty()) {
        return Optional.empty();
    }
    double min = values.get(0);
    double max = values.get(0);
    for (Double value : values) {
        if (value > max) {
            max = value;
        }
        if (value < min) {
            min = value;
        }
    }
    return Optional.of(new DoubleRange(min, max));
}
Also used : HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) TableStatistics(io.prestosql.spi.statistics.TableStatistics) Varchars.isVarcharType(io.prestosql.spi.type.Varchars.isVarcharType) Collections.unmodifiableList(java.util.Collections.unmodifiableList) DecimalType(io.prestosql.spi.type.DecimalType) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) NullableValue(io.prestosql.spi.predicate.NullableValue) BigDecimal(java.math.BigDecimal) HiveSessionProperties.isStatisticsEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isStatisticsEnabled) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) Map(java.util.Map) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Chars.isCharType(io.prestosql.spi.type.Chars.isCharType) Double.parseDouble(java.lang.Double.parseDouble) HiveErrorCode(io.prestosql.plugin.hive.HiveErrorCode) PrestoException(io.prestosql.spi.PrestoException) HiveSessionProperties.getPartitionStatisticsSampleSize(io.prestosql.plugin.hive.HiveSessionProperties.getPartitionStatisticsSampleSize) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Decimals.isLongDecimal(io.prestosql.spi.type.Decimals.isLongDecimal) TINYINT(io.prestosql.spi.type.TinyintType.TINYINT) DecimalStatistics(io.prestosql.plugin.hive.metastore.DecimalStatistics) String.format(java.lang.String.format) HivePartition(io.prestosql.plugin.hive.HivePartition) Objects(java.util.Objects) Decimals.isShortDecimal(io.prestosql.spi.type.Decimals.isShortDecimal) List(java.util.List) DoubleStatistics(io.prestosql.plugin.hive.metastore.DoubleStatistics) Table(io.prestosql.plugin.hive.metastore.Table) LocalDate(java.time.LocalDate) Optional(java.util.Optional) HashFunction(com.google.common.hash.HashFunction) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) OptionalDouble(java.util.OptionalDouble) Shorts(com.google.common.primitives.Shorts) Decimals(io.prestosql.spi.type.Decimals) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) DoubleRange(io.prestosql.spi.statistics.DoubleRange) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) UNPARTITIONED_ID(io.prestosql.plugin.hive.HivePartition.UNPARTITIONED_ID) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) Double.isFinite(java.lang.Double.isFinite) DateStatistics(io.prestosql.plugin.hive.metastore.DateStatistics) DATE(io.prestosql.spi.type.DateType.DATE) REAL(io.prestosql.spi.type.RealType.REAL) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) VerifyException(com.google.common.base.VerifyException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) SignedBytes(com.google.common.primitives.SignedBytes) Hashing.murmur3_128(com.google.common.hash.Hashing.murmur3_128) Ints(com.google.common.primitives.Ints) Estimate(io.prestosql.spi.statistics.Estimate) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) SMALLINT(io.prestosql.spi.type.SmallintType.SMALLINT) Double.isNaN(java.lang.Double.isNaN) IntegerStatistics(io.prestosql.plugin.hive.metastore.IntegerStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) HiveSessionProperties.isIgnoreCorruptedStatistics(io.prestosql.plugin.hive.HiveSessionProperties.isIgnoreCorruptedStatistics) DoubleRange(io.prestosql.spi.statistics.DoubleRange) NullableValue(io.prestosql.spi.predicate.NullableValue) Double.parseDouble(java.lang.Double.parseDouble) OptionalDouble(java.util.OptionalDouble) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

NullableValue (io.prestosql.spi.predicate.NullableValue)35 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)27 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)18 TupleDomain (io.prestosql.spi.predicate.TupleDomain)16 Optional (java.util.Optional)15 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)14 ImmutableList (com.google.common.collect.ImmutableList)13 ImmutableMap (com.google.common.collect.ImmutableMap)13 Domain (io.prestosql.spi.predicate.Domain)13 Objects.requireNonNull (java.util.Objects.requireNonNull)13 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)12 Type (io.prestosql.spi.type.Type)12 List (java.util.List)12 Map (java.util.Map)12 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)11 ConnectorMetadata (io.prestosql.spi.connector.ConnectorMetadata)11 PrestoException (io.prestosql.spi.PrestoException)10 Constraint (io.prestosql.spi.connector.Constraint)10 ArrayList (java.util.ArrayList)10 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)9