use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
String partitionName = "p1=string1/p2=1234";
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName))), expected);
}
use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.
@Test
public void testGetTableStatisticsUnpartitioned() {
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected);
}
use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testValidatePartitionStatistics.
@Test
public void testValidatePartitionStatistics() {
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build(), invalidPartitionStatistics("fileCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, -1, 0, 0)).build(), invalidPartitionStatistics("rowCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, -1, 0)).build(), invalidPartitionStatistics("inMemoryDataSizeInBytes must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, -1)).build(), invalidPartitionStatistics("onDiskDataSizeInBytes must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setMaxValueSizeInBytes(-1).build())).build(), invalidColumnStatistics("maxValueSizeInBytes must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setTotalSizeInBytes(-1).build())).build(), invalidColumnStatistics("totalSizeInBytes must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(-1).build())).build(), invalidColumnStatistics("nullsCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(1).build())).build(), invalidColumnStatistics("nullsCount must be less than or equal to rowCount. nullsCount: 1. rowCount: 0."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(-1).build())).build(), invalidColumnStatistics("distinctValuesCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(1).build())).build(), invalidColumnStatistics("distinctValuesCount must be less than or equal to rowCount. distinctValuesCount: 1. rowCount: 0."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 1, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(1).setNullsCount(1).build())).build(), invalidColumnStatistics("distinctValuesCount must be less than or equal to nonNullsCount. distinctValuesCount: 1. nonNullsCount: 0."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(1), OptionalLong.of(-1), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("integerStatistics.min must be less than or equal to integerStatistics.max. integerStatistics.min: 1. integerStatistics.max: -1."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createDoubleColumnStatistics(OptionalDouble.of(1), OptionalDouble.of(-1), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("doubleStatistics.min must be less than or equal to doubleStatistics.max. doubleStatistics.min: 1.0. doubleStatistics.max: -1.0."));
validatePartitionStatistics(TABLE, ImmutableMap.of(PARTITION, PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createDoubleColumnStatistics(OptionalDouble.of(NaN), OptionalDouble.of(NaN), OptionalLong.empty(), OptionalLong.empty()))).build()));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createDecimalColumnStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(-1)), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("decimalStatistics.min must be less than or equal to decimalStatistics.max. decimalStatistics.min: 1. decimalStatistics.max: -1."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createDateColumnStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(-1)), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("dateStatistics.min must be less than or equal to dateStatistics.max. dateStatistics.min: 1970-01-02. dateStatistics.max: 1969-12-31."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createBooleanColumnStatistics(OptionalLong.of(-1), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("trueCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createBooleanColumnStatistics(OptionalLong.empty(), OptionalLong.of(-1), OptionalLong.empty()))).build(), invalidColumnStatistics("falseCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("booleanStatistics.trueCount must be less than or equal to rowCount. booleanStatistics.trueCount: 1. rowCount: 0."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createBooleanColumnStatistics(OptionalLong.empty(), OptionalLong.of(1), OptionalLong.empty()))).build(), invalidColumnStatistics("booleanStatistics.falseCount must be less than or equal to rowCount. booleanStatistics.falseCount: 1. rowCount: 0."));
}
use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.
@Test
public void testGetTableStatisticsValidationFailure() {
PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
String partitionName = "p1=string1/p2=1234";
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, corruptedStatistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
assertThatThrownBy(() -> statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)))).isInstanceOf(PrestoException.class).hasFieldOrPropertyWithValue("errorCode", HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
assertEquals(statisticsProvider.getTableStatistics(ignoreSession, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName))), TableStatistics.empty());
}
use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class AlluxioHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Set<String> partitionNames) {
Table table = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
Map<String, HiveBasicStatistics> partitionBasicStatistics = getPartitionsByNames(metastoreContext, databaseName, tableName, ImmutableList.copyOf(partitionNames)).entrySet().stream().filter(entry -> entry.getValue().isPresent()).collect(toImmutableMap(entry -> MetastoreUtil.makePartName(table.getPartitionColumns(), entry.getValue().get().getValues()), entry -> getHiveBasicStatistics(entry.getValue().get().getParameters())));
Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
List<String> dataColumns = table.getDataColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, List<ColumnStatisticsInfo>> columnStatisticss;
try {
columnStatisticss = client.getPartitionColumnStatistics(table.getDatabaseName(), table.getTableName(), partitionBasicStatistics.keySet().stream().collect(toImmutableList()), dataColumns);
} catch (AlluxioStatusException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, e);
}
Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = columnStatisticss.entrySet().stream().filter(entry -> !entry.getValue().isEmpty()).collect(toImmutableMap(Map.Entry::getKey, entry -> groupStatisticsByColumn(metastoreContext, entry.getValue(), partitionRowCounts.getOrDefault(entry.getKey(), OptionalLong.empty()))));
ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
for (String partitionName : partitionBasicStatistics.keySet()) {
HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
}
return result.build();
}
Aggregations