use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.
the class HiveMetadata method createView.
@Override
public void createView(ConnectorSession session, ConnectorTableMetadata viewMetadata, String viewData, boolean replace) {
Map<String, String> properties = ImmutableMap.<String, String>builder().put(TABLE_COMMENT, "Presto View").put(PRESTO_VIEW_FLAG, "true").put(PRESTO_VERSION_NAME, prestoVersion).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).build();
List<Column> columns = new ArrayList<>();
MetastoreContext metastoreContext = getMetastoreContext(session);
ColumnConverter columnConverter = metastoreContext.getColumnConverter();
for (ColumnMetadata column : viewMetadata.getColumns()) {
try {
HiveType hiveType = toHiveType(typeTranslator, column.getType());
columns.add(new Column(column.getName(), hiveType, Optional.ofNullable(column.getComment()), columnConverter.getTypeMetadata(hiveType, column.getType().getTypeSignature())));
} catch (PrestoException e) {
// if a view uses any unsupported hive types, include only a dummy column value
if (e.getErrorCode().equals(NOT_SUPPORTED.toErrorCode())) {
columns = ImmutableList.of(new Column("dummy", HIVE_STRING, Optional.of(format("Using dummy because column %s uses unsupported Hive type %s ", column.getName(), column.getType())), Optional.empty()));
break;
} else {
throw e;
}
}
}
SchemaTableName viewName = viewMetadata.getTable();
Table.Builder tableBuilder = Table.builder().setDatabaseName(viewName.getSchemaName()).setTableName(viewName.getTableName()).setOwner(session.getUser()).setTableType(VIRTUAL_VIEW).setDataColumns(columns).setPartitionColumns(ImmutableList.of()).setParameters(properties).setViewOriginalText(Optional.of(encodeViewData(viewData))).setViewExpandedText(Optional.of("/* Presto View */"));
tableBuilder.getStorageBuilder().setStorageFormat(VIEW_STORAGE_FORMAT).setLocation("");
Table table = tableBuilder.build();
PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(session.getUser());
Optional<Table> existing = metastore.getTable(metastoreContext, viewName.getSchemaName(), viewName.getTableName());
if (existing.isPresent()) {
if (!replace || !MetastoreUtil.isPrestoView(existing.get())) {
throw new ViewAlreadyExistsException(viewName);
}
metastore.replaceView(metastoreContext, viewName.getSchemaName(), viewName.getTableName(), table, principalPrivileges);
return;
}
try {
metastore.createTable(session, table, principalPrivileges, Optional.empty(), false, new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
} catch (TableAlreadyExistsException e) {
throw new ViewAlreadyExistsException(e.getTableName());
}
}
use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.
the class HiveSplitManager method getPartitionSplitInfo.
private Map<String, PartitionSplitInfo> getPartitionSplitInfo(ConnectorSession session, SemiTransactionalHiveMetastore metastore, SchemaTableName tableName, List<HivePartition> partitionBatch, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider());
Map<String, Optional<Partition>> partitions = metastore.getPartitionsByNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), Lists.transform(partitionBatch, HivePartition::getPartitionId));
Map<String, PartitionStatistics> partitionStatistics = ImmutableMap.of();
if (domains.isPresent() && isPartitionStatisticsBasedOptimizationEnabled(session)) {
partitionStatistics = metastore.getPartitionStatistics(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionBatch.stream().map(HivePartition::getPartitionId).collect(toImmutableSet()));
}
Map<String, String> partitionNameToLocation = new HashMap<>();
ImmutableMap.Builder<String, PartitionSplitInfo> partitionSplitInfoBuilder = ImmutableMap.builder();
for (Map.Entry<String, Optional<Partition>> entry : partitions.entrySet()) {
ImmutableSet.Builder<ColumnHandle> redundantColumnDomainsBuilder = ImmutableSet.builder();
if (!entry.getValue().isPresent()) {
throw new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Partition no longer exists: " + entry.getKey());
}
boolean pruned = false;
if (partitionStatistics.containsKey(entry.getKey())) {
Map<String, HiveColumnStatistics> columnStatistics = partitionStatistics.get(entry.getKey()).getColumnStatistics();
for (Map.Entry<String, HiveColumnHandle> predicateColumnEntry : predicateColumns.entrySet()) {
if (columnStatistics.containsKey(predicateColumnEntry.getKey())) {
Optional<ValueSet> columnsStatisticsValueSet = getColumnStatisticsValueSet(columnStatistics.get(predicateColumnEntry.getKey()), predicateColumnEntry.getValue().getHiveType());
Subfield subfield = new Subfield(predicateColumnEntry.getKey());
if (columnsStatisticsValueSet.isPresent() && domains.get().containsKey(subfield)) {
ValueSet columnPredicateValueSet = domains.get().get(subfield).getValues();
if (!columnPredicateValueSet.overlaps(columnsStatisticsValueSet.get())) {
pruned = true;
break;
}
if (columnPredicateValueSet.contains(columnsStatisticsValueSet.get())) {
redundantColumnDomainsBuilder.add(predicateColumnEntry.getValue());
}
}
}
}
}
if (!pruned) {
partitionNameToLocation.put(entry.getKey(), entry.getValue().get().getStorage().getLocation());
}
partitionSplitInfoBuilder.put(entry.getKey(), new PartitionSplitInfo(entry.getValue().get(), pruned, redundantColumnDomainsBuilder.build()));
}
metastore.setPartitionLeases(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionNameToLocation, getLeaseDuration(session));
return partitionSplitInfoBuilder.build();
}
use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.
the class HiveMetadata method finishInsertInternal.
private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
List<PartitionUpdate> partitionUpdates = getPartitionUpdates(session, fragments);
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) {
throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the zero-row files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName(), table.getStorage().getLocation(), false);
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, handle.getEncryptionInformation().map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)).orElseGet(ImmutableMap::of)));
zeroRowFileCreator.createFiles(session, hdfsContext, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), getStorageFormat(partition, table), handle.getCompressionCodec(), getSchema(partition, table));
}
}
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
Set<String> existingPartitions = getExistingPartitionNames(session.getIdentity(), metastoreContext, handle.getSchemaName(), handle.getTableName(), partitionUpdates);
for (PartitionUpdate partitionUpdate : partitionUpdates) {
if (partitionUpdate.getName().isEmpty()) {
if (handle.getEncryptionInformation().isPresent()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Inserting into an existing table with encryption enabled is not supported yet");
}
// insert into unpartitioned table
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == APPEND) {
if (handle.getEncryptionInformation().isPresent()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Inserting into an existing partition with encryption enabled is not supported yet");
}
// insert into existing partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), handle.getLocationHandle().getTargetPath().toString(), partitionValues, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) {
Map<String, String> extraPartitionMetadata = handle.getEncryptionInformation().map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)).orElseGet(ImmutableMap::of);
if (isPreferManifestsToListFiles(session) && isFileRenamingEnabled(session)) {
// Store list of file names and sizes in partition metadata when prefer_manifests_to_list_files and file_renaming_enabled are set to true
extraPartitionMetadata = updatePartitionMetadataWithFileNamesAndSizes(partitionUpdate, extraPartitionMetadata);
}
// Track the manifest blob size
getManifestSizeInBytes(session, partitionUpdate, extraPartitionMetadata).ifPresent(hivePartitionStats::addManifestSizeInBytes);
// insert into new partition or overwrite existing partition
Partition partition = partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, extraPartitionMetadata);
if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
}
if (existingPartitions.contains(partitionUpdate.getName())) {
if (partitionUpdate.getUpdateMode() == OVERWRITE) {
metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), handle.getLocationHandle().getTargetPath().toString(), partition.getValues());
} else {
throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionUpdate.getName());
}
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), table.getStorage().getLocation(), false, partition, partitionUpdate.getWritePath(), partitionStatistics);
} else {
throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
}
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).map(name -> name.isEmpty() ? UNPARTITIONED_ID : name).collect(toList())));
}
use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.
the class HiveMetadata method createMaterializedView.
@Override
public void createMaterializedView(ConnectorSession session, ConnectorTableMetadata viewMetadata, ConnectorMaterializedViewDefinition viewDefinition, boolean ignoreExisting) {
if (isExternalTable(viewMetadata.getProperties())) {
throw new PrestoException(INVALID_TABLE_PROPERTY, "Specifying external location for materialized view is not supported.");
}
Table basicTable = prepareTable(session, viewMetadata, MATERIALIZED_VIEW);
viewDefinition = new ConnectorMaterializedViewDefinition(viewDefinition.getOriginalSql(), viewDefinition.getSchema(), viewDefinition.getTable(), viewDefinition.getBaseTables(), viewDefinition.getOwner(), viewDefinition.getColumnMappings(), viewDefinition.getBaseTablesOnOuterJoinSide(), Optional.of(getPartitionedBy(viewMetadata.getProperties())));
Map<String, String> parameters = ImmutableMap.<String, String>builder().putAll(basicTable.getParameters()).put(PRESTO_MATERIALIZED_VIEW_FLAG, "true").build();
Table viewTable = Table.builder(basicTable).setParameters(parameters).setViewOriginalText(Optional.of(encodeMaterializedViewData(MATERIALIZED_VIEW_JSON_CODEC.toJson(viewDefinition)))).setViewExpandedText(Optional.of("/* Presto Materialized View */")).build();
MetastoreContext metastoreContext = getMetastoreContext(session);
validateMaterializedViewPartitionColumns(metastore, metastoreContext, viewTable, viewDefinition);
try {
PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(viewTable.getOwner());
metastore.createTable(session, viewTable, principalPrivileges, Optional.empty(), ignoreExisting, new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
} catch (TableAlreadyExistsException e) {
throw new MaterializedViewAlreadyExistsException(e.getTableName());
}
}
use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.
the class HiveMetadata method finishCreateTable.
@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
List<PartitionUpdate> partitionUpdates = getPartitionUpdates(session, fragments);
Map<String, String> tableEncryptionParameters = ImmutableMap.of();
Map<String, String> partitionEncryptionParameters = ImmutableMap.of();
if (handle.getEncryptionInformation().isPresent()) {
EncryptionInformation encryptionInformation = handle.getEncryptionInformation().get();
if (encryptionInformation.getDwrfEncryptionMetadata().isPresent()) {
if (handle.getPartitionedBy().isEmpty()) {
tableEncryptionParameters = ImmutableMap.copyOf(encryptionInformation.getDwrfEncryptionMetadata().get().getExtraMetadata());
} else {
partitionEncryptionParameters = ImmutableMap.copyOf(encryptionInformation.getDwrfEncryptionMetadata().get().getExtraMetadata());
}
}
}
WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getPreferredOrderingColumns(), ImmutableMap.<String, String>builder().putAll(handle.getAdditionalTableParameters()).putAll(tableEncryptionParameters).build(), writeInfo.getTargetPath(), MANAGED_TABLE, prestoVersion, metastoreContext);
PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(handle.getTableOwner());
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the zero-row files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName(), table.getStorage().getLocation(), true);
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, partitionEncryptionParameters));
zeroRowFileCreator.createFiles(session, hdfsContext, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), getStorageFormat(partition, table), handle.getCompressionCodec(), getSchema(partition, table));
}
}
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
PartitionStatistics tableStatistics;
if (table.getPartitionColumns().isEmpty()) {
HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> reduce(first, second, ADD)).orElse(createZeroStatistics());
tableStatistics = createPartitionStatistics(session, basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
} else {
tableStatistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of());
}
metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false, tableStatistics);
if (handle.getPartitionedBy().isEmpty()) {
return Optional.of(new HiveWrittenPartitions(ImmutableList.of(UNPARTITIONED_ID)));
}
if (isRespectTableFormat(session)) {
verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
}
for (PartitionUpdate update : partitionUpdates) {
Map<String, String> partitionParameters = partitionEncryptionParameters;
if (isPreferManifestsToListFiles(session) && isFileRenamingEnabled(session)) {
// Store list of file names and sizes in partition metadata when prefer_manifests_to_list_files and file_renaming_enabled are set to true
partitionParameters = updatePartitionMetadataWithFileNamesAndSizes(update, partitionParameters);
}
Partition partition = partitionObjectBuilder.buildPartitionObject(session, table, update, prestoVersion, partitionParameters);
PartitionStatistics partitionStatistics = createPartitionStatistics(session, update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), table.getStorage().getLocation(), true, partitionObjectBuilder.buildPartitionObject(session, table, update, prestoVersion, partitionParameters), update.getWritePath(), partitionStatistics);
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Aggregations