use of io.prestosql.spi.connector.ConnectorOutputMetadata in project hetu-core by openlookeng.
the class HiveMetadata method finishInsertInternal.
private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<PartitionUpdate> partitions, HiveACIDWriteType hiveACIDWriteType) {
HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).sorted(// sort partition updates to ensure same sequence of rename in case of
Comparator.comparing(PartitionUpdate::getName)).collect(toList());
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (session.isSnapshotEnabled()) {
Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
// Remove suffix from file names in partition updates
partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
}
Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
}
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
for (PartitionUpdate partitionUpdate : partitionUpdates) {
if (partitionUpdate.getName().isEmpty()) {
// insert into unpartitioned table
if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
finishInsertOverwrite(session, handle, table, partitionUpdate, partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
// insert into unpartitioned table
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
} else {
throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
}
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
// insert into existing partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
finishInsertInNewPartition(session, handle, table, columnTypes, partitionUpdate, partitionComputedStatistics, hiveACIDWriteType);
} else {
throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
}
}
if (partitions != null) {
partitions.addAll(partitionUpdates);
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
use of io.prestosql.spi.connector.ConnectorOutputMetadata in project boostkit-bigdata by kunpengcompute.
the class HiveMetadata method finishCreateTable.
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, Map<String, String> serdeParameters) {
HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toList());
LocationService.WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), writeInfo.getTargetPath(), externalTable, prestoVersion, serdeParameters);
PrincipalPrivileges principalPrivileges = MetastoreUtil.buildInitialPrivilegeSet(handle.getTableOwner());
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (session.isSnapshotEnabled()) {
Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
// Remove suffix from file names in partition updates
partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
}
if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
}
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
PartitionStatistics tableStatistics;
if (table.getPartitionColumns().isEmpty()) {
HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> Statistics.reduce(first, second, Statistics.ReduceOperator.ADD)).orElse(HiveBasicStatistics.createZeroStatistics());
tableStatistics = createPartitionStatistics(session, basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
} else {
tableStatistics = new PartitionStatistics(HiveBasicStatistics.createEmptyStatistics(), ImmutableMap.of());
}
metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false, tableStatistics);
if (!handle.getPartitionedBy().isEmpty()) {
if (HiveSessionProperties.isRespectTableFormat(session)) {
verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
}
List<? extends Future<?>> futures = partitionUpdates.stream().map(update -> hiveMetastoreClientService.submit(() -> {
Partition partition = buildPartitionObject(session, table, update);
PartitionStatistics partitionStatistics = createPartitionStatistics(session, update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath(), partitionStatistics, HiveACIDWriteType.NONE);
})).collect(toList());
futures.forEach(future -> {
try {
future.get();
} catch (InterruptedException | ExecutionException ignore) {
log.debug("Get future error");
}
});
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
use of io.prestosql.spi.connector.ConnectorOutputMetadata in project boostkit-bigdata by kunpengcompute.
the class HiveMetadata method finishInsertInternal.
private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<PartitionUpdate> partitions, HiveACIDWriteType hiveACIDWriteType) {
HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).sorted(// sort partition updates to ensure same sequence of rename in case of
Comparator.comparing(PartitionUpdate::getName)).collect(toList());
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (session.isSnapshotEnabled()) {
Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
// Remove suffix from file names in partition updates
partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
}
Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
}
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
for (PartitionUpdate partitionUpdate : partitionUpdates) {
if (partitionUpdate.getName().isEmpty()) {
// insert into unpartitioned table
if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
finishInsertOverwrite(session, handle, table, partitionUpdate, partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
// insert into unpartitioned table
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
} else {
throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
}
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
// insert into existing partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
finishInsertInNewPartition(session, handle, table, columnTypes, partitionUpdate, partitionComputedStatistics, hiveACIDWriteType);
} else {
throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
}
}
if (partitions != null) {
partitions.addAll(partitionUpdates);
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
use of io.prestosql.spi.connector.ConnectorOutputMetadata in project hetu-core by openlookeng.
the class CarbondataMetadata method finishUpdateAndDelete.
private Optional<ConnectorOutputMetadata> finishUpdateAndDelete(ConnectorSession session, HiveInsertTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
List<PartitionUpdate> partitionUpdates = new ArrayList<>();
Optional<ConnectorOutputMetadata> connectorOutputMetadata = super.finishInsert(session, tableHandle, fragments, computedStatistics, partitionUpdates);
/* Write SegmentUpdateStatusManager and Update SegmentStatusManager */
Gson gson = new Gson();
blockUpdateDetailsList = partitionUpdates.stream().map(PartitionUpdate::getMiscData).flatMap(List::stream).map(json -> gson.fromJson(StringEscapeUtils.unescapeJson(json), SegmentUpdateDetails.class)).collect(Collectors.toList());
hdfsEnvironment.doAs(user, () -> {
if (blockUpdateDetailsList.size() > 0) {
CarbonTable finalCarbonTable = getCarbonTable(tableHandle.getSchemaName(), tableHandle.getTableName(), MetastoreUtil.getHiveSchema(table.get()), initialConfiguration);
SegmentUpdateStatusManager statusManager = new SegmentUpdateStatusManager(finalCarbonTable);
SegmentUpdateDetails[] segementDetailsList = statusManager.getUpdateStatusDetails();
for (SegmentUpdateDetails segementDetails : segementDetailsList) {
segementDetails.getDeletedRowsInBlock();
}
/*
* Todo: Check how to extract block bitmap for deleted records...
* Use the same to mark Segment deleted instead of jus the row.
*/
}
});
return connectorOutputMetadata;
}
use of io.prestosql.spi.connector.ConnectorOutputMetadata in project hetu-core by openlookeng.
the class CarbondataMetadata method finishCreateTable.
@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
Optional<ConnectorOutputMetadata> connectorOutputMetadata;
HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
setExternalTable(true);
Map<String, String> serdeParameters = initSerDeProperties(handle.getTableName());
connectorOutputMetadata = super.finishCreateTable(session, tableHandle, fragments, computedStatistics, serdeParameters);
writeSegmentFileAndSetLoadModel();
return connectorOutputMetadata;
}
Aggregations