use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class FileHiveMetastore method addPartitions.
@Override
public synchronized void addPartitions(HiveIdentity identity, String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
requireNonNull(databaseName, "databaseName is null");
requireNonNull(tableName, "tableName is null");
requireNonNull(partitions, "partitions is null");
Table table = getRequiredTable(databaseName, tableName);
TableType tableType = TableType.valueOf(table.getTableType());
checkArgument(EnumSet.of(MANAGED_TABLE, EXTERNAL_TABLE).contains(tableType), "Invalid table type: %s", tableType);
try {
Map<Path, byte[]> schemaFiles = new LinkedHashMap<>();
for (PartitionWithStatistics partitionWithStatistics : partitions) {
Partition partition = partitionWithStatistics.getPartition();
verifiedPartition(table, partition);
Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
Path schemaPath = new Path(partitionMetadataDirectory, PRESTO_SCHEMA_FILE_NAME);
if (metadataFileSystem.exists(schemaPath)) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "Partition already exists");
}
byte[] schemaJson = partitionCodec.toJsonBytes(new PartitionMetadata(table, partitionWithStatistics));
schemaFiles.put(schemaPath, schemaJson);
}
Set<Path> createdFiles = new LinkedHashSet<>();
try {
for (Entry<Path, byte[]> entry : schemaFiles.entrySet()) {
try (OutputStream outputStream = metadataFileSystem.create(entry.getKey())) {
createdFiles.add(entry.getKey());
outputStream.write(entry.getValue());
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "Could not write partition schema", e);
}
}
} catch (Throwable e) {
for (Path createdFile : createdFiles) {
try {
metadataFileSystem.delete(createdFile, false);
} catch (IOException ignored) {
// could be ignored
}
}
throw e;
}
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, e);
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class AbstractTestHive method doInsertIntoNewPartition.
private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
// insert the data
String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
// verify partitions were created
HiveIdentity identity = new HiveIdentity(newSession());
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// verify the node versions in partitions
Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(identity, tableName.getSchemaName(), tableName.getTableName(), partitionNames);
assertEquals(partitions.size(), partitionNames.size());
for (String partitionName : partitionNames) {
Partition partition = partitions.get(partitionName).get();
assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
}
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// test rollback
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
metadata.beginQuery(session);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// rollback insert
transaction.rollback();
}
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class AbstractTestHive method eraseStatistics.
private void eraseStatistics(SchemaTableName schemaTableName) {
HiveMetastore hiveMetastoreClient = getMetastoreClient();
HiveIdentity identity = new HiveIdentity(SESSION);
hiveMetastoreClient.updateTableStatistics(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
Table table = hiveMetastoreClient.getTable(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<String> partitionColumns = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
if (!table.getPartitionColumns().isEmpty()) {
List<String> partitionNames = hiveMetastoreClient.getPartitionNames(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElse(ImmutableList.of());
List<Partition> partitions = hiveMetastoreClient.getPartitionsByNames(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionNames).entrySet().stream().map(Map.Entry::getValue).filter(Optional::isPresent).map(Optional::get).collect(toImmutableList());
for (Partition partition : partitions) {
hiveMetastoreClient.updatePartitionStatistics(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), makePartName(partitionColumns, partition.getValues()), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
}
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class HiveMetadata method finishCreateTable.
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, Map<String, String> serdeParameters) {
HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toList());
LocationService.WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), writeInfo.getTargetPath(), externalTable, prestoVersion, serdeParameters);
PrincipalPrivileges principalPrivileges = MetastoreUtil.buildInitialPrivilegeSet(handle.getTableOwner());
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (session.isSnapshotEnabled()) {
Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
// Remove suffix from file names in partition updates
partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
}
if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
}
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
PartitionStatistics tableStatistics;
if (table.getPartitionColumns().isEmpty()) {
HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> Statistics.reduce(first, second, Statistics.ReduceOperator.ADD)).orElse(HiveBasicStatistics.createZeroStatistics());
tableStatistics = createPartitionStatistics(session, basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
} else {
tableStatistics = new PartitionStatistics(HiveBasicStatistics.createEmptyStatistics(), ImmutableMap.of());
}
metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false, tableStatistics);
if (!handle.getPartitionedBy().isEmpty()) {
if (HiveSessionProperties.isRespectTableFormat(session)) {
verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
}
List<? extends Future<?>> futures = partitionUpdates.stream().map(update -> hiveMetastoreClientService.submit(() -> {
Partition partition = buildPartitionObject(session, table, update);
PartitionStatistics partitionStatistics = createPartitionStatistics(session, update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath(), partitionStatistics, HiveACIDWriteType.NONE);
})).collect(toList());
futures.forEach(future -> {
try {
future.get();
} catch (InterruptedException | ExecutionException ignore) {
log.debug("Get future error");
}
});
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class HiveMetadata method finishInsertInternal.
private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<PartitionUpdate> partitions, HiveACIDWriteType hiveACIDWriteType) {
HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).sorted(// sort partition updates to ensure same sequence of rename in case of
Comparator.comparing(PartitionUpdate::getName)).collect(toList());
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (session.isSnapshotEnabled()) {
Set<String> mergedFileNames = collectMergedFileNames(partitionUpdates);
updateSnapshotFiles(session, handle, false, mergedFileNames, OptionalLong.empty());
// Remove suffix from file names in partition updates
partitionUpdates = updateSnapshotFileNames(partitionUpdates, session.getQueryId());
}
Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
if (handle.getBucketProperty().isPresent() && HiveSessionProperties.isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
}
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
for (PartitionUpdate partitionUpdate : partitionUpdates) {
if (partitionUpdate.getName().isEmpty()) {
// insert into unpartitioned table
if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
finishInsertOverwrite(session, handle, table, partitionUpdate, partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
// insert into unpartitioned table
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
} else {
throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
}
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
// insert into existing partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, hiveACIDWriteType);
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.NEW || partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
finishInsertInNewPartition(session, handle, table, columnTypes, partitionUpdate, partitionComputedStatistics, hiveACIDWriteType);
} else {
throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
}
}
if (partitions != null) {
partitions.addAll(partitionUpdates);
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Aggregations