use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class ThriftHiveMetastore method updateTableStatistics.
@Override
public void updateTableStatistics(HiveIdentity identity, String databaseName, String tableName, AcidTransaction transaction, Function<PartitionStatistics, PartitionStatistics> update) {
Table originalTable = getTable(identity, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
PartitionStatistics currentStatistics = getTableStatistics(identity, originalTable);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Table modifiedTable = originalTable.deepCopy();
HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
modifiedTable.setParameters(updateStatisticsParameters(modifiedTable.getParameters(), basicStatistics));
if (transaction.isAcidTransactionRunning()) {
modifiedTable.setWriteId(transaction.getWriteId());
}
alterTable(identity, databaseName, tableName, modifiedTable);
io.trino.plugin.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable);
OptionalLong rowCount = basicStatistics.getRowCount();
List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream().flatMap(entry -> {
Optional<Column> column = table.getColumn(entry.getKey());
if (column.isEmpty() && isAvroTableWithSchemaSet(modifiedTable)) {
// to store statistics for a column it does not know about.
return Stream.of();
}
HiveType type = column.orElseThrow(() -> new IllegalStateException("Column not found: " + entry.getKey())).getType();
return Stream.of(createMetastoreColumnStatistics(entry.getKey(), type, entry.getValue(), rowCount));
}).collect(toImmutableList());
if (!metastoreColumnStatistics.isEmpty()) {
setTableColumnStatistics(identity, databaseName, tableName, metastoreColumnStatistics);
}
Set<String> removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(identity, databaseName, tableName, column));
}
use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class ThriftHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, Table table, List<Partition> partitions) {
List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
List<String> partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> getHiveBasicStatistics(partition.getParameters())));
Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = getPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionBasicStatistics.keySet(), dataColumns, partitionRowCounts);
ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
for (String partitionName : partitionBasicStatistics.keySet()) {
HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
}
return result.buildOrThrow();
}
use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class ThriftHiveMetastore method updatePartitionStatistics.
@Override
public void updatePartitionStatistics(HiveIdentity identity, Table table, String partitionName, Function<PartitionStatistics, PartitionStatistics> update) {
List<Partition> partitions = getPartitionsByNames(identity, table.getDbName(), table.getTableName(), ImmutableList.of(partitionName));
if (partitions.size() != 1) {
throw new TrinoException(HIVE_METASTORE_ERROR, "Metastore returned multiple partitions for name: " + partitionName);
}
Partition originalPartition = getOnlyElement(partitions);
PartitionStatistics currentStatistics = requireNonNull(getPartitionStatistics(identity, table, partitions).get(partitionName), "getPartitionStatistics() did not return statistics for partition");
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Partition modifiedPartition = originalPartition.deepCopy();
HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
modifiedPartition.setParameters(updateStatisticsParameters(modifiedPartition.getParameters(), basicStatistics));
alterPartitionWithoutStatistics(identity, table.getDbName(), table.getTableName(), modifiedPartition);
Map<String, HiveType> columns = modifiedPartition.getSd().getCols().stream().collect(toImmutableMap(FieldSchema::getName, schema -> HiveType.valueOf(schema.getType())));
setPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionName, columns, updatedStatistics.getColumnStatistics(), basicStatistics.getRowCount());
Set<String> removedStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
removedStatistics.forEach(column -> deletePartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionName, column));
}
use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class ThriftHiveMetastore method storePartitionColumnStatistics.
private void storePartitionColumnStatistics(HiveIdentity identity, String databaseName, String tableName, String partitionName, PartitionWithStatistics partitionWithStatistics) {
PartitionStatistics statistics = partitionWithStatistics.getStatistics();
Map<String, HiveColumnStatistics> columnStatistics = statistics.getColumnStatistics();
if (columnStatistics.isEmpty()) {
return;
}
Map<String, HiveType> columnTypes = partitionWithStatistics.getPartition().getColumns().stream().collect(toImmutableMap(Column::getName, Column::getType));
setPartitionColumnStatistics(identity, databaseName, tableName, partitionName, columnTypes, columnStatistics, statistics.getBasicStatistics().getRowCount());
}
use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method calculateNullsFraction.
@VisibleForTesting
static Estimate calculateNullsFraction(String column, Collection<PartitionStatistics> partitionStatistics) {
List<PartitionStatistics> statisticsWithKnownRowCountAndNullsCount = partitionStatistics.stream().filter(statistics -> {
if (statistics.getBasicStatistics().getRowCount().isEmpty()) {
return false;
}
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
if (columnStatistics == null) {
return false;
}
return columnStatistics.getNullsCount().isPresent();
}).collect(toImmutableList());
if (statisticsWithKnownRowCountAndNullsCount.isEmpty()) {
return Estimate.unknown();
}
long totalNullsCount = 0;
long totalRowCount = 0;
for (PartitionStatistics statistics : statisticsWithKnownRowCountAndNullsCount) {
long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
verifyNotNull(columnStatistics, "columnStatistics is null");
long nullsCount = columnStatistics.getNullsCount().orElseThrow(() -> new VerifyException("nullsCount is not present"));
verify(nullsCount >= 0, "nullsCount must be greater than or equal to zero");
verify(nullsCount <= rowCount, "nullsCount must be less than or equal to rowCount. nullsCount: %s. rowCount: %s.", nullsCount, rowCount);
totalNullsCount += nullsCount;
totalRowCount += rowCount;
}
if (totalRowCount == 0) {
return Estimate.zero();
}
verify(totalNullsCount <= totalRowCount, "totalNullsCount must be less than or equal to totalRowCount. totalNullsCount: %s. totalRowCount: %s.", totalNullsCount, totalRowCount);
return Estimate.of(((double) totalNullsCount) / totalRowCount);
}
Aggregations