use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.
the class ThriftHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, Table table, List<Partition> partitions) {
List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
List<String> partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> ThriftMetastoreUtil.getHiveBasicStatistics(partition.getParameters())));
Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = getPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionBasicStatistics.keySet(), dataColumns, partitionRowCounts);
ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
for (String partitionName : partitionBasicStatistics.keySet()) {
HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
}
return result.build();
}
use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.
the class ThriftHiveMetastore method updatePartitionStatistics.
@Override
public void updatePartitionStatistics(HiveIdentity identity, String databaseName, String tableName, String partitionName, Function<PartitionStatistics, PartitionStatistics> update) {
List<Partition> partitions = getPartitionsByNames(identity, databaseName, tableName, ImmutableList.of(partitionName));
if (partitions.size() != 1) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "Metastore returned multiple partitions for name: " + partitionName);
}
Table table = getTable(identity, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
PartitionStatistics currentStatistics = requireNonNull(getPartitionStatistics(identity, table, partitions).get(partitionName), "getPartitionStatistics() returned null");
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Partition originalPartition = getOnlyElement(partitions);
Partition modifiedPartition = originalPartition.deepCopy();
HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
modifiedPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(modifiedPartition.getParameters(), basicStatistics));
alterPartitionWithoutStatistics(identity, databaseName, tableName, modifiedPartition);
updatePartitionColumnStatistics(identity, modifiedPartition, databaseName, tableName, partitionName, basicStatistics, currentStatistics, updatedStatistics);
}
use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.
the class ThriftHiveMetastore method updatePartitionsStatistics.
@Override
public synchronized void updatePartitionsStatistics(HiveIdentity identity, String databaseName, String tableName, Map<String, Function<PartitionStatistics, PartitionStatistics>> partNamesUpdateFunctionMap) {
ImmutableList.Builder<Partition> modifiedPartitionBuilder = ImmutableList.builder();
ImmutableMap.Builder<String, PartitionInfo> partitionInfoMapBuilder = ImmutableMap.builder();
Optional<Table> table = getTable(identity, databaseName, tableName);
List<Partition> partitions = getPartitionsByNames(identity, databaseName, tableName, partNamesUpdateFunctionMap.keySet().stream().collect(Collectors.toList()));
Map<String, PartitionStatistics> partitionsStatistics = getPartitionStatistics(identity, table.get(), partitions);
if (partitions.size() != partitionsStatistics.size() || partitions.size() != partNamesUpdateFunctionMap.size()) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "Metastore returned multiple partitions");
}
List<String> partColumns = table.get().getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
for (int index = 0; index < partitions.size(); index++) {
String partitionName = makePartName(partColumns, partitions.get(index).getValues());
PartitionStatistics currentStatistics = requireNonNull(partitionsStatistics.get(partitionName), "getPartitionStatistics() returned null");
PartitionStatistics updatedStatistics = partNamesUpdateFunctionMap.get(partitionName).apply(currentStatistics);
Partition originalPartition = partitions.get(index);
Partition modifiedPartition = originalPartition.deepCopy();
HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
modifiedPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(modifiedPartition.getParameters(), basicStatistics));
originalPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(originalPartition.getParameters(), basicStatistics));
modifiedPartitionBuilder.add(modifiedPartition);
partitionInfoMapBuilder.put(partitionName, new PartitionInfo(basicStatistics, currentStatistics, originalPartition, updatedStatistics));
}
alterPartitionsWithoutStatistics(databaseName, tableName, modifiedPartitionBuilder.build());
ImmutableMap<String, PartitionInfo> partitionInfoMap = partitionInfoMapBuilder.build();
partitionInfoMap.forEach((partName, partInfo) -> updatePartitionColumnStatistics(identity, partInfo.modifiedPartition, databaseName, tableName, partName, partInfo.basicStatistics, partInfo.currentStatistics, partInfo.updatedStatistics));
}
use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.
the class GlueInputConverter method convertPartition.
public static PartitionInput convertPartition(PartitionWithStatistics partitionWithStatistics) {
PartitionInput input = convertPartition(partitionWithStatistics.getPartition());
PartitionStatistics statistics = partitionWithStatistics.getStatistics();
if (!statistics.getColumnStatistics().isEmpty()) {
throw new PrestoException(NOT_SUPPORTED, "Glue metastore does not support column level statistics");
}
input.setParameters(updateStatisticsParameters(input.getParameters(), statistics.getBasicStatistics()));
return input;
}
use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.
the class MetastoreHiveStatisticsProvider method getTableStatistics.
@Override
public TableStatistics getTableStatistics(ConnectorSession session, SchemaTableName schemaTableName, Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, boolean includeColumnStatistics, Table table) {
if (!isStatisticsEnabled(session)) {
return TableStatistics.empty();
}
if (partitions.isEmpty()) {
return createZeroStatistics(columns, columnTypes);
}
int sampleSize = getPartitionStatisticsSampleSize(session);
List<HivePartition> partitionsSample = null;
SamplePartition sample = samplePartitionCache.get(table);
if (includeColumnStatistics || sample == null || sample.partitionCount != partitions.size()) {
partitionsSample = getPartitionsSample(partitions, sampleSize);
samplePartitionCache.put(table, new SamplePartition(partitions.size(), partitionsSample));
} else if (sample != null) {
partitionsSample = sample.partitionsSample;
}
try {
Map<String, PartitionStatistics> statisticsSample = statisticsProvider.getPartitionsStatistics(session, schemaTableName, partitionsSample, table);
if (!includeColumnStatistics) {
OptionalDouble averageRows = calculateAverageRowsPerPartition(statisticsSample.values());
TableStatistics.Builder result = TableStatistics.builder();
if (averageRows.isPresent()) {
result.setRowCount(Estimate.of(averageRows.getAsDouble() * partitions.size()));
}
result.setFileCount(calulateFileCount(statisticsSample.values()));
result.setOnDiskDataSizeInBytes(calculateTotalOnDiskSizeInBytes(statisticsSample.values()));
return result.build();
} else {
validatePartitionStatistics(schemaTableName, statisticsSample);
return getTableStatistics(columns, columnTypes, partitions, statisticsSample);
}
} catch (PrestoException e) {
if (e.getErrorCode().equals(HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode()) && isIgnoreCorruptedStatistics(session)) {
log.error(e);
return TableStatistics.empty();
}
throw e;
}
}
Aggregations