Search in sources :

Example 1 with ComputedStatistics

use of io.prestosql.spi.statistics.ComputedStatistics in project hetu-core by openlookeng.

the class HiveMetadata method finishInsertInternal.

private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<PartitionUpdate> partitions, HiveACIDWriteType hiveACIDWriteType) {
    HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
    List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).sorted(// sort partition updates to ensure same sequence of rename in case of
    Comparator.comparing(PartitionUpdate::getName)).collect(toList());
    HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    if (session.isSnapshotEnabled()) {
        Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
        updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
        // Remove suffix from file names in partition updates
        partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
    }
    Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
    if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
        throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
    }
    if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
        // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
            createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
        }
    }
    List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
    for (PartitionUpdate partitionUpdate : partitionUpdates) {
        if (partitionUpdate.getName().isEmpty()) {
            // insert into unpartitioned table
            if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
                throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
            }
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
            if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
                finishInsertOverwrite(session, handle, table, partitionUpdate, partitionStatistics);
            } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
                // insert into unpartitioned table
                metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
            } else {
                throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
            }
        } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
            // insert into existing partition
            List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
            metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
        } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
            finishInsertInNewPartition(session, handle, table, columnTypes, partitionUpdate, partitionComputedStatistics, hiveACIDWriteType);
        } else {
            throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
        }
    }
    if (partitions != null) {
        partitions.addAll(partitionUpdates);
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) Partition(io.prestosql.plugin.hive.metastore.Partition) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList)

Example 2 with ComputedStatistics

use of io.prestosql.spi.statistics.ComputedStatistics in project hetu-core by openlookeng.

the class HiveMetadata method finishStatisticsCollection.

@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<ComputedStatistics> computedStatistics) {
    HiveIdentity identity = new HiveIdentity(session);
    HiveTableHandle handle = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = handle.getSchemaTableName();
    Table table = metastore.getTable(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
    List<Column> partitionColumns = table.getPartitionColumns();
    List<String> partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toImmutableList());
    List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
    Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> computedStatisticsMap = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes);
    if (partitionColumns.isEmpty()) {
        // commit analyze to unpartitioned table
        metastore.setTableStatistics(identity, table, createPartitionStatistics(session, columnTypes, computedStatisticsMap.get(ImmutableList.<String>of())));
    } else {
        List<List<String>> partitionValuesList;
        if (handle.getAnalyzePartitionValues().isPresent()) {
            partitionValuesList = handle.getAnalyzePartitionValues().get();
        } else {
            partitionValuesList = metastore.getPartitionNames(identity, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())).stream().map(HiveUtil::toPartitionValues).collect(toImmutableList());
        }
        ImmutableMap.Builder<List<String>, PartitionStatistics> partitionStatistics = ImmutableMap.builder();
        Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(typeManager.getType(column.getTypeSignature())))));
        Supplier<PartitionStatistics> emptyPartitionStatistics = Suppliers.memoize(() -> Statistics.createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
        int usedComputedStatistics = 0;
        for (List<String> partitionValues : partitionValuesList) {
            ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues);
            if (collectedStatistics == null) {
                partitionStatistics.put(partitionValues, emptyPartitionStatistics.get());
            } else {
                usedComputedStatistics++;
                partitionStatistics.put(partitionValues, createPartitionStatistics(session, columnTypes, collectedStatistics));
            }
        }
        verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used");
        metastore.setPartitionStatistics(identity, table, partitionStatistics.build());
    }
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) Set(java.util.Set) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Collectors.toSet(java.util.stream.Collectors.toSet) ImmutableSet(com.google.common.collect.ImmutableSet) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) Column(io.prestosql.plugin.hive.metastore.Column) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) Constraint(io.prestosql.spi.connector.Constraint) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics)

Example 3 with ComputedStatistics

use of io.prestosql.spi.statistics.ComputedStatistics in project boostkit-bigdata by kunpengcompute.

the class HiveMetadata method finishCreateTable.

public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, Map<String, String> serdeParameters) {
    HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
    List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toList());
    LocationService.WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
    Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), writeInfo.getTargetPath(), externalTable, prestoVersion, serdeParameters);
    PrincipalPrivileges principalPrivileges = MetastoreUtil.buildInitialPrivilegeSet(handle.getTableOwner());
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    if (session.isSnapshotEnabled()) {
        Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
        updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
        // Remove suffix from file names in partition updates
        partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
    }
    if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
        // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
            createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
        }
    }
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
    PartitionStatistics tableStatistics;
    if (table.getPartitionColumns().isEmpty()) {
        HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> Statistics.reduce(first, second, Statistics.ReduceOperator.ADD)).orElse(HiveBasicStatistics.createZeroStatistics());
        tableStatistics = createPartitionStatistics(session, basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
    } else {
        tableStatistics = new PartitionStatistics(HiveBasicStatistics.createEmptyStatistics(), ImmutableMap.of());
    }
    metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false, tableStatistics);
    if (!handle.getPartitionedBy().isEmpty()) {
        if (HiveSessionProperties.isRespectTableFormat(session)) {
            verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
        }
        List<? extends Future<?>> futures = partitionUpdates.stream().map(update -> hiveMetastoreClientService.submit(() -> {
            Partition partition = buildPartitionObject(session, table, update);
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
            metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath(), partitionStatistics, HiveACIDWriteType.NONE);
        })).collect(toList());
        futures.forEach(future -> {
            try {
                future.get();
            } catch (InterruptedException | ExecutionException ignore) {
                log.debug("Get future error");
            }
        });
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) ExecutionException(java.util.concurrent.ExecutionException) Partition(io.prestosql.plugin.hive.metastore.Partition) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) Slice(io.airlift.slice.Slice) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics)

Example 4 with ComputedStatistics

use of io.prestosql.spi.statistics.ComputedStatistics in project boostkit-bigdata by kunpengcompute.

the class HiveMetadata method finishInsertInternal.

private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<PartitionUpdate> partitions, HiveACIDWriteType hiveACIDWriteType) {
    HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
    List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).sorted(// sort partition updates to ensure same sequence of rename in case of
    Comparator.comparing(PartitionUpdate::getName)).collect(toList());
    HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    if (session.isSnapshotEnabled()) {
        Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
        updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
        // Remove suffix from file names in partition updates
        partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
    }
    Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
    if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
        throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
    }
    if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
        // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
            createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
        }
    }
    List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
    for (PartitionUpdate partitionUpdate : partitionUpdates) {
        if (partitionUpdate.getName().isEmpty()) {
            // insert into unpartitioned table
            if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
                throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
            }
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
            if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
                finishInsertOverwrite(session, handle, table, partitionUpdate, partitionStatistics);
            } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
                // insert into unpartitioned table
                metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
            } else {
                throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
            }
        } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
            // insert into existing partition
            List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
            metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
        } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
            finishInsertInNewPartition(session, handle, table, columnTypes, partitionUpdate, partitionComputedStatistics, hiveACIDWriteType);
        } else {
            throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
        }
    }
    if (partitions != null) {
        partitions.addAll(partitionUpdates);
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) Partition(io.prestosql.plugin.hive.metastore.Partition) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList)

Example 5 with ComputedStatistics

use of io.prestosql.spi.statistics.ComputedStatistics in project boostkit-bigdata by kunpengcompute.

the class HiveMetadata method finishStatisticsCollection.

@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<ComputedStatistics> computedStatistics) {
    HiveIdentity identity = new HiveIdentity(session);
    HiveTableHandle handle = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = handle.getSchemaTableName();
    Table table = metastore.getTable(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
    List<Column> partitionColumns = table.getPartitionColumns();
    List<String> partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toImmutableList());
    List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
    Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> computedStatisticsMap = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes);
    if (partitionColumns.isEmpty()) {
        // commit analyze to unpartitioned table
        metastore.setTableStatistics(identity, table, createPartitionStatistics(session, columnTypes, computedStatisticsMap.get(ImmutableList.<String>of())));
    } else {
        List<List<String>> partitionValuesList;
        if (handle.getAnalyzePartitionValues().isPresent()) {
            partitionValuesList = handle.getAnalyzePartitionValues().get();
        } else {
            partitionValuesList = metastore.getPartitionNames(identity, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())).stream().map(HiveUtil::toPartitionValues).collect(toImmutableList());
        }
        ImmutableMap.Builder<List<String>, PartitionStatistics> partitionStatistics = ImmutableMap.builder();
        Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(typeManager.getType(column.getTypeSignature())))));
        Supplier<PartitionStatistics> emptyPartitionStatistics = Suppliers.memoize(() -> Statistics.createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
        int usedComputedStatistics = 0;
        for (List<String> partitionValues : partitionValuesList) {
            ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues);
            if (collectedStatistics == null) {
                partitionStatistics.put(partitionValues, emptyPartitionStatistics.get());
            } else {
                usedComputedStatistics++;
                partitionStatistics.put(partitionValues, createPartitionStatistics(session, columnTypes, collectedStatistics));
            }
        }
        verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used");
        metastore.setPartitionStatistics(identity, table, partitionStatistics.build());
    }
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) Set(java.util.Set) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Collectors.toSet(java.util.stream.Collectors.toSet) ImmutableSet(com.google.common.collect.ImmutableSet) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) Column(io.prestosql.plugin.hive.metastore.Column) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) Constraint(io.prestosql.spi.connector.Constraint) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 Joiner (com.google.common.base.Joiner)6 MoreObjects.firstNonNull (com.google.common.base.MoreObjects.firstNonNull)6 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)6 Splitter (com.google.common.base.Splitter)6 Suppliers (com.google.common.base.Suppliers)6 Verify.verify (com.google.common.base.Verify.verify)6 VerifyException (com.google.common.base.VerifyException)6 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)6 ImmutableMap (com.google.common.collect.ImmutableMap)6 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)6 ImmutableSet (com.google.common.collect.ImmutableSet)6 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)6 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)6 Iterables (com.google.common.collect.Iterables)6 Iterables.concat (com.google.common.collect.Iterables.concat)6 Maps (com.google.common.collect.Maps)6 Sets (com.google.common.collect.Sets)6 Streams.stream (com.google.common.collect.Streams.stream)6