Search in sources :

Example 1 with ConnectorOutputMetadata

use of io.prestosql.spi.connector.ConnectorOutputMetadata in project hetu-core by openlookeng.

the class HiveMetadata method finishInsertInternal.

private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<PartitionUpdate> partitions, HiveACIDWriteType hiveACIDWriteType) {
    HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
    List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).sorted(// sort partition updates to ensure same sequence of rename in case of
    Comparator.comparing(PartitionUpdate::getName)).collect(toList());
    HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    if (session.isSnapshotEnabled()) {
        Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
        updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
        // Remove suffix from file names in partition updates
        partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
    }
    Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
    if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
        throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
    }
    if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
        // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
            createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
        }
    }
    List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
    for (PartitionUpdate partitionUpdate : partitionUpdates) {
        if (partitionUpdate.getName().isEmpty()) {
            // insert into unpartitioned table
            if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
                throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
            }
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
            if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
                finishInsertOverwrite(session, handle, table, partitionUpdate, partitionStatistics);
            } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
                // insert into unpartitioned table
                metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
            } else {
                throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
            }
        } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
            // insert into existing partition
            List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
            metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
        } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
            finishInsertInNewPartition(session, handle, table, columnTypes, partitionUpdate, partitionComputedStatistics, hiveACIDWriteType);
        } else {
            throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
        }
    }
    if (partitions != null) {
        partitions.addAll(partitionUpdates);
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) Partition(io.prestosql.plugin.hive.metastore.Partition) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList)

Example 2 with ConnectorOutputMetadata

use of io.prestosql.spi.connector.ConnectorOutputMetadata in project boostkit-bigdata by kunpengcompute.

the class HiveMetadata method finishCreateTable.

public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, Map<String, String> serdeParameters) {
    HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
    List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toList());
    LocationService.WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
    Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), writeInfo.getTargetPath(), externalTable, prestoVersion, serdeParameters);
    PrincipalPrivileges principalPrivileges = MetastoreUtil.buildInitialPrivilegeSet(handle.getTableOwner());
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    if (session.isSnapshotEnabled()) {
        Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
        updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
        // Remove suffix from file names in partition updates
        partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
    }
    if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
        // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
            createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
        }
    }
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
    PartitionStatistics tableStatistics;
    if (table.getPartitionColumns().isEmpty()) {
        HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> Statistics.reduce(first, second, Statistics.ReduceOperator.ADD)).orElse(HiveBasicStatistics.createZeroStatistics());
        tableStatistics = createPartitionStatistics(session, basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
    } else {
        tableStatistics = new PartitionStatistics(HiveBasicStatistics.createEmptyStatistics(), ImmutableMap.of());
    }
    metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false, tableStatistics);
    if (!handle.getPartitionedBy().isEmpty()) {
        if (HiveSessionProperties.isRespectTableFormat(session)) {
            verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
        }
        List<? extends Future<?>> futures = partitionUpdates.stream().map(update -> hiveMetastoreClientService.submit(() -> {
            Partition partition = buildPartitionObject(session, table, update);
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
            metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath(), partitionStatistics, HiveACIDWriteType.NONE);
        })).collect(toList());
        futures.forEach(future -> {
            try {
                future.get();
            } catch (InterruptedException | ExecutionException ignore) {
                log.debug("Get future error");
            }
        });
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) ExecutionException(java.util.concurrent.ExecutionException) Partition(io.prestosql.plugin.hive.metastore.Partition) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) Slice(io.airlift.slice.Slice) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics)

Example 3 with ConnectorOutputMetadata

use of io.prestosql.spi.connector.ConnectorOutputMetadata in project boostkit-bigdata by kunpengcompute.

the class HiveMetadata method finishInsertInternal.

private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<PartitionUpdate> partitions, HiveACIDWriteType hiveACIDWriteType) {
    HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
    List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).sorted(// sort partition updates to ensure same sequence of rename in case of
    Comparator.comparing(PartitionUpdate::getName)).collect(toList());
    HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    if (session.isSnapshotEnabled()) {
        Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
        updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
        // Remove suffix from file names in partition updates
        partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
    }
    Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
    if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
        throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
    }
    if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
        // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
            createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
        }
    }
    List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
    for (PartitionUpdate partitionUpdate : partitionUpdates) {
        if (partitionUpdate.getName().isEmpty()) {
            // insert into unpartitioned table
            if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
                throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
            }
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
            if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
                finishInsertOverwrite(session, handle, table, partitionUpdate, partitionStatistics);
            } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
                // insert into unpartitioned table
                metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
            } else {
                throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
            }
        } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
            // insert into existing partition
            List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
            PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
            metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
        } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
            finishInsertInNewPartition(session, handle, table, columnTypes, partitionUpdate, partitionComputedStatistics, hiveACIDWriteType);
        } else {
            throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
        }
    }
    if (partitions != null) {
        partitions.addAll(partitionUpdates);
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) Partition(io.prestosql.plugin.hive.metastore.Partition) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) TableType(org.apache.hadoop.hive.metastore.TableType) VarcharType(io.prestosql.spi.type.VarcharType) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList)

Example 4 with ConnectorOutputMetadata

use of io.prestosql.spi.connector.ConnectorOutputMetadata in project hetu-core by openlookeng.

the class CarbondataMetadata method finishUpdateAndDelete.

private Optional<ConnectorOutputMetadata> finishUpdateAndDelete(ConnectorSession session, HiveInsertTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
    List<PartitionUpdate> partitionUpdates = new ArrayList<>();
    Optional<ConnectorOutputMetadata> connectorOutputMetadata = super.finishInsert(session, tableHandle, fragments, computedStatistics, partitionUpdates);
    /* Write SegmentUpdateStatusManager and Update SegmentStatusManager */
    Gson gson = new Gson();
    blockUpdateDetailsList = partitionUpdates.stream().map(PartitionUpdate::getMiscData).flatMap(List::stream).map(json -> gson.fromJson(StringEscapeUtils.unescapeJson(json), SegmentUpdateDetails.class)).collect(Collectors.toList());
    hdfsEnvironment.doAs(user, () -> {
        if (blockUpdateDetailsList.size() > 0) {
            CarbonTable finalCarbonTable = getCarbonTable(tableHandle.getSchemaName(), tableHandle.getTableName(), MetastoreUtil.getHiveSchema(table.get()), initialConfiguration);
            SegmentUpdateStatusManager statusManager = new SegmentUpdateStatusManager(finalCarbonTable);
            SegmentUpdateDetails[] segementDetailsList = statusManager.getUpdateStatusDetails();
            for (SegmentUpdateDetails segementDetails : segementDetailsList) {
                segementDetails.getDeletedRowsInBlock();
            }
        /*
                 * Todo: Check how to extract block bitmap for deleted records...
                 *  Use the same to mark Segment deleted instead of jus the row.
                 */
        }
    });
    return connectorOutputMetadata;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) ArrayList(java.util.ArrayList) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Gson(com.google.gson.Gson) PartitionUpdate(io.prestosql.plugin.hive.PartitionUpdate)

Example 5 with ConnectorOutputMetadata

use of io.prestosql.spi.connector.ConnectorOutputMetadata in project hetu-core by openlookeng.

the class CarbondataMetadata method finishCreateTable.

@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
    Optional<ConnectorOutputMetadata> connectorOutputMetadata;
    HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
    setExternalTable(true);
    Map<String, String> serdeParameters = initSerDeProperties(handle.getTableName());
    connectorOutputMetadata = super.finishCreateTable(session, tableHandle, fragments, computedStatistics, serdeParameters);
    writeSegmentFileAndSetLoadModel();
    return connectorOutputMetadata;
}
Also used : HiveOutputTableHandle(io.prestosql.plugin.hive.HiveOutputTableHandle) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 Joiner (com.google.common.base.Joiner)4 MoreObjects.firstNonNull (com.google.common.base.MoreObjects.firstNonNull)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)4 Splitter (com.google.common.base.Splitter)4 Suppliers (com.google.common.base.Suppliers)4 Verify.verify (com.google.common.base.Verify.verify)4 VerifyException (com.google.common.base.VerifyException)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)4 ImmutableSet (com.google.common.collect.ImmutableSet)4 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)4 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)4 Iterables (com.google.common.collect.Iterables)4 Iterables.concat (com.google.common.collect.Iterables.concat)4 Maps (com.google.common.collect.Maps)4 Sets (com.google.common.collect.Sets)4 Streams.stream (com.google.common.collect.Streams.stream)4