use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.
the class ThriftHiveMetastore method updateTableStatistics.
@Override
public synchronized void updateTableStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Function<PartitionStatistics, PartitionStatistics> update) {
PartitionStatistics currentStatistics = getTableStatistics(metastoreContext, databaseName, tableName);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Table originalTable = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
Table modifiedTable = originalTable.deepCopy();
HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
modifiedTable.setParameters(updateStatisticsParameters(modifiedTable.getParameters(), basicStatistics));
alterTable(metastoreContext, databaseName, tableName, modifiedTable);
com.facebook.presto.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable, metastoreContext.getColumnConverter());
OptionalLong rowCount = basicStatistics.getRowCount();
List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream().map(entry -> createMetastoreColumnStatistics(entry.getKey(), table.getColumn(entry.getKey()).get().getType(), entry.getValue(), rowCount)).collect(toImmutableList());
if (!metastoreColumnStatistics.isEmpty()) {
setTableColumnStatistics(metastoreContext, databaseName, tableName, metastoreColumnStatistics);
}
Set<String> removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(metastoreContext, databaseName, tableName, column));
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.
the class ThriftMetastoreUtil method createStringStatistics.
private static ColumnStatisticsObj createStringStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) {
StringColumnStatsData data = new StringColumnStatsData();
statistics.getNullsCount().ifPresent(data::setNumNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs);
data.setMaxColLen(statistics.getMaxValueSizeInBytes().orElse(0));
data.setAvgColLen(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0));
return new ColumnStatisticsObj(columnName, columnType.toString(), stringStats(data));
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.
the class ThriftMetastoreUtil method createDecimalStatistics.
private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) {
DecimalColumnStatsData data = new DecimalColumnStatsData();
statistics.getDecimalStatistics().ifPresent(decimalStatistics -> {
decimalStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDecimal(value)));
decimalStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDecimal(value)));
});
statistics.getNullsCount().ifPresent(data::setNumNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs);
return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data));
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.
the class ThriftMetastoreUtil method createBinaryStatistics.
private static ColumnStatisticsObj createBinaryStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) {
BinaryColumnStatsData data = new BinaryColumnStatsData();
statistics.getNullsCount().ifPresent(data::setNumNulls);
data.setMaxColLen(statistics.getMaxValueSizeInBytes().orElse(0));
data.setAvgColLen(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0));
return new ColumnStatisticsObj(columnName, columnType.toString(), binaryStats(data));
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.
the class TestThriftHiveMetastoreUtil method testEmptyDecimalStatsToColumnStatistics.
@Test
public void testEmptyDecimalStatsToColumnStatistics() {
DecimalColumnStatsData emptyDecimalColumnStatsData = new DecimalColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(emptyDecimalColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.empty(), Optional.empty())));
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.empty());
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.empty());
assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Aggregations