use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
String partitionName = "p1=string1/p2=1234";
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, statistics));
HiveColumnHandle columnHandle = createBaseColumn(COLUMN, 2, HIVE_LONG, BIGINT, REGULAR, Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(SESSION, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName))), expected);
}
use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.
@Test
public void testGetTableStatisticsValidationFailure() {
PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
String partitionName = "p1=string1/p2=1234";
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, corruptedStatistics));
assertThatThrownBy(() -> statisticsProvider.getTableStatistics(getHiveSession(new HiveConfig().setIgnoreCorruptedStatistics(false)), TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)))).isInstanceOf(TrinoException.class).hasFieldOrPropertyWithValue("errorCode", HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
assertEquals(statisticsProvider.getTableStatistics(getHiveSession(new HiveConfig().setIgnoreCorruptedStatistics(true)), TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName))), TableStatistics.empty());
}
use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class TestMetastoreHiveStatisticsProvider method testValidatePartitionStatistics.
@Test
public void testValidatePartitionStatistics() {
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build(), invalidPartitionStatistics("fileCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, -1, 0, 0)).build(), invalidPartitionStatistics("rowCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, -1, 0)).build(), invalidPartitionStatistics("inMemoryDataSizeInBytes must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, -1)).build(), invalidPartitionStatistics("onDiskDataSizeInBytes must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setMaxValueSizeInBytes(-1).build())).build(), invalidColumnStatistics("maxValueSizeInBytes must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setTotalSizeInBytes(-1).build())).build(), invalidColumnStatistics("totalSizeInBytes must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(-1).build())).build(), invalidColumnStatistics("nullsCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(1).build())).build(), invalidColumnStatistics("nullsCount must be less than or equal to rowCount. nullsCount: 1. rowCount: 0."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(-1).build())).build(), invalidColumnStatistics("distinctValuesCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(1).build())).build(), invalidColumnStatistics("distinctValuesCount must be less than or equal to rowCount. distinctValuesCount: 1. rowCount: 0."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 1, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(1).setNullsCount(1).build())).build(), invalidColumnStatistics("distinctValuesCount must be less than or equal to nonNullsCount. distinctValuesCount: 1. nonNullsCount: 0."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(1), OptionalLong.of(-1), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("integerStatistics.min must be less than or equal to integerStatistics.max. integerStatistics.min: 1. integerStatistics.max: -1."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createDoubleColumnStatistics(OptionalDouble.of(1), OptionalDouble.of(-1), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("doubleStatistics.min must be less than or equal to doubleStatistics.max. doubleStatistics.min: 1.0. doubleStatistics.max: -1.0."));
validatePartitionStatistics(TABLE, ImmutableMap.of(PARTITION, PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createDoubleColumnStatistics(OptionalDouble.of(NaN), OptionalDouble.of(NaN), OptionalLong.empty(), OptionalLong.empty()))).build()));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createDecimalColumnStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(-1)), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("decimalStatistics.min must be less than or equal to decimalStatistics.max. decimalStatistics.min: 1. decimalStatistics.max: -1."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createDateColumnStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(-1)), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("dateStatistics.min must be less than or equal to dateStatistics.max. dateStatistics.min: 1970-01-02. dateStatistics.max: 1969-12-31."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createBooleanColumnStatistics(OptionalLong.of(-1), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("trueCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createBooleanColumnStatistics(OptionalLong.empty(), OptionalLong.of(-1), OptionalLong.empty()))).build(), invalidColumnStatistics("falseCount must be greater than or equal to zero: -1"));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.empty(), OptionalLong.empty()))).build(), invalidColumnStatistics("booleanStatistics.trueCount must be less than or equal to rowCount. booleanStatistics.trueCount: 1. rowCount: 0."));
assertInvalidStatistics(PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)).setColumnStatistics(ImmutableMap.of(COLUMN, createBooleanColumnStatistics(OptionalLong.empty(), OptionalLong.of(1), OptionalLong.empty()))).build(), invalidColumnStatistics("booleanStatistics.falseCount must be less than or equal to rowCount. booleanStatistics.falseCount: 1. rowCount: 0."));
}
use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class TestThriftMetastoreUtil method testBasicStatisticsRoundTrip.
@Test
public void testBasicStatisticsRoundTrip() {
testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()));
testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.of(1), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.empty()));
testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3), OptionalLong.of(4)));
}
use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class ThriftHiveMetastore method updateTableStatistics.
@Override
public void updateTableStatistics(HiveIdentity identity, String databaseName, String tableName, AcidTransaction transaction, Function<PartitionStatistics, PartitionStatistics> update) {
Table originalTable = getTable(identity, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
PartitionStatistics currentStatistics = getTableStatistics(identity, originalTable);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Table modifiedTable = originalTable.deepCopy();
HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
modifiedTable.setParameters(updateStatisticsParameters(modifiedTable.getParameters(), basicStatistics));
if (transaction.isAcidTransactionRunning()) {
modifiedTable.setWriteId(transaction.getWriteId());
}
alterTable(identity, databaseName, tableName, modifiedTable);
io.trino.plugin.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable);
OptionalLong rowCount = basicStatistics.getRowCount();
List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream().flatMap(entry -> {
Optional<Column> column = table.getColumn(entry.getKey());
if (column.isEmpty() && isAvroTableWithSchemaSet(modifiedTable)) {
// to store statistics for a column it does not know about.
return Stream.of();
}
HiveType type = column.orElseThrow(() -> new IllegalStateException("Column not found: " + entry.getKey())).getType();
return Stream.of(createMetastoreColumnStatistics(entry.getKey(), type, entry.getValue(), rowCount));
}).collect(toImmutableList());
if (!metastoreColumnStatistics.isEmpty()) {
setTableColumnStatistics(identity, databaseName, tableName, metastoreColumnStatistics);
}
Set<String> removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(identity, databaseName, tableName, column));
}
Aggregations