use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class MetastoreHiveStatisticsProvider method getTableStatistics.
private static TableStatistics getTableStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics) {
if (statistics.isEmpty()) {
return TableStatistics.empty();
}
checkArgument(!partitions.isEmpty(), "partitions is empty");
OptionalDouble optionalAverageRowsPerPartition = calculateAverageRowsPerPartition(statistics.values());
if (!optionalAverageRowsPerPartition.isPresent()) {
return TableStatistics.empty();
}
double averageRowsPerPartition = optionalAverageRowsPerPartition.getAsDouble();
verify(averageRowsPerPartition >= 0, "averageRowsPerPartition must be greater than or equal to zero");
int queriedPartitionsCount = partitions.size();
double rowCount = averageRowsPerPartition * queriedPartitionsCount;
TableStatistics.Builder result = TableStatistics.builder();
long fileCount = calulateFileCount(statistics.values());
long totalOnDiskSize = calculateTotalOnDiskSizeInBytes(statistics.values());
result.setRowCount(Estimate.of(rowCount));
result.setFileCount(fileCount);
result.setOnDiskDataSizeInBytes(totalOnDiskSize);
for (Map.Entry<String, ColumnHandle> column : columns.entrySet()) {
String columnName = column.getKey();
HiveColumnHandle columnHandle = (HiveColumnHandle) column.getValue();
Type columnType = columnTypes.get(columnName);
ColumnStatistics columnStatistics;
TableColumnStatistics tableColumnStatistics;
if (columnHandle.isPartitionKey()) {
tableColumnStatistics = statsCache.get(partitions.get(0).getTableName().getTableName() + columnName);
if (tableColumnStatistics == null || invalidateStatsCache(tableColumnStatistics, Estimate.of(rowCount), fileCount, totalOnDiskSize)) {
columnStatistics = createPartitionColumnStatistics(columnHandle, columnType, partitions, statistics, averageRowsPerPartition, rowCount);
TableStatistics tableStatistics = new TableStatistics(Estimate.of(rowCount), fileCount, totalOnDiskSize, ImmutableMap.of());
tableColumnStatistics = new TableColumnStatistics(tableStatistics, columnStatistics);
statsCache.put(partitions.get(0).getTableName().getTableName() + columnName, tableColumnStatistics);
} else {
columnStatistics = tableColumnStatistics.columnStatistics;
}
} else {
columnStatistics = createDataColumnStatistics(columnName, columnType, rowCount, statistics.values());
}
result.setColumnStatistics(columnHandle, columnStatistics);
}
return result.build();
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
String partitionName = "p1=string1/p2=1234";
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName)), true, table), expected);
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.
@Test
public void testGetTableStatisticsUnpartitioned() {
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE)), true, table), expected);
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class HiveMetadata method getTableStatistics.
@Override
public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint, boolean includeColumnStatistics) {
if (!HiveSessionProperties.isStatisticsEnabled(session)) {
return TableStatistics.empty();
}
SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
Map<String, ColumnHandle> columns = getColumnHandles(table).entrySet().stream().filter(entry -> !((HiveColumnHandle) entry.getValue()).isHidden()).collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue));
Map<String, Type> columnTypes = columns.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> getColumnMetadata(session, tableHandle, entry.getValue()).getType()));
HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, new HiveIdentity(session), tableHandle, constraint, table);
List<HivePartition> partitions = partitionManager.getPartitionsAsList(partitionResult);
return hiveStatisticsProvider.getTableStatistics(session, ((HiveTableHandle) tableHandle).getSchemaTableName(), columns, columnTypes, partitions, includeColumnStatistics, table);
}
use of io.prestosql.spi.statistics.TableStatistics in project boostkit-bigdata by kunpengcompute.
the class AbstractTestHive method assertTableStatsComputed.
private void assertTableStatsComputed(SchemaTableName tableName, Set<String> expectedColumnStatsColumns) {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Constraint.alwaysTrue(), true);
assertFalse(tableStatistics.getRowCount().isUnknown(), "row count is unknown");
Map<String, ColumnStatistics> columnsStatistics = tableStatistics.getColumnStatistics().entrySet().stream().collect(toImmutableMap(entry -> ((HiveColumnHandle) entry.getKey()).getName(), Map.Entry::getValue));
assertEquals(columnsStatistics.keySet(), expectedColumnStatsColumns, "columns with statistics");
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
columnsStatistics.forEach((columnName, columnStatistics) -> {
ColumnHandle columnHandle = columnHandles.get(columnName);
Type columnType = metadata.getColumnMetadata(session, tableHandle, columnHandle).getType();
assertFalse(columnStatistics.getNullsFraction().isUnknown(), "unknown nulls fraction for " + columnName);
assertFalse(columnStatistics.getDistinctValuesCount().isUnknown(), "unknown distinct values count for " + columnName);
if (isVarcharType(columnType)) {
assertFalse(columnStatistics.getDataSize().isUnknown(), "unknown data size for " + columnName);
} else {
assertTrue(columnStatistics.getDataSize().isUnknown(), "unknown data size for" + columnName);
}
});
}
}
Aggregations