use of io.prestosql.spi.statistics.ColumnStatistics in project boostkit-bigdata by kunpengcompute.
the class TestHivePushdownUtil method simulationHiveMetadata.
protected static HiveMetadata simulationHiveMetadata() {
// simulation chain: HiveTransactionManager -> HiveMetadata -> ColumnMetadata + TableStatistics + ColumnStatistics
ColumnMetadata columnMetadataInt = Mockito.mock(ColumnMetadata.class);
Mockito.when(columnMetadataInt.getName()).thenReturn(COLUMN_INT.getName());
Mockito.when(columnMetadataInt.getType()).thenReturn(INTEGER);
HashMap<String, Object> propertyMap = new HashMap<>();
propertyMap.put(STORAGE_FORMAT_PROPERTY, HiveStorageFormat.ORC);
ConnectorTableMetadata connectorTableMetadata = Mockito.mock(ConnectorTableMetadata.class);
Mockito.when(connectorTableMetadata.getProperties()).thenReturn(propertyMap);
Map<ColumnHandle, ColumnStatistics> columnStatistics = new HashMap<>();
ColumnStatistics columnStatisInt = new ColumnStatistics(Estimate.zero(), Estimate.of(DISTINICT_COLUMN_NUM), Estimate.unknown(), Optional.of(new DoubleRange(1, 10)));
columnStatistics.put(COLUMN_INT, columnStatisInt);
TableStatistics statistics = new TableStatistics(Estimate.of(OFFLOAD_COLUMN_NUM), 5, 1024, columnStatistics);
HiveMetadata metadata = Mockito.mock(HiveMetadata.class);
Mockito.when(metadata.getTableMetadata(OFFLOAD_SESSION, OFFLOAD_HIVE_TABLE_HANDLE)).thenReturn(connectorTableMetadata);
Mockito.when(metadata.getColumnMetadata(Matchers.eq(OFFLOAD_SESSION), Matchers.eq(OFFLOAD_HIVE_TABLE_HANDLE), Matchers.any(ColumnHandle.class))).thenReturn(columnMetadataInt);
Map<String, ColumnHandle> columnHandleMap = ImmutableMap.of(COLUMN_INT.getName(), COLUMN_INT);
Mockito.when(metadata.getColumnHandles(OFFLOAD_SESSION, OFFLOAD_HIVE_TABLE_HANDLE)).thenReturn(columnHandleMap);
Mockito.when(metadata.getTableStatistics(Matchers.eq(OFFLOAD_SESSION), Matchers.eq(OFFLOAD_HIVE_TABLE_HANDLE), Matchers.any(Constraint.class), Matchers.eq(true))).thenReturn(statistics);
return metadata;
}
use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class TablePushdown method isTableWithUniqueColumnTableStatistics.
/**
* @param tableStatistics for the current table being parsed in the plan tree.
* @param tableHandle for the TableScanNode currently being evaluated in the plan tree.
* @return if the table satisfies the unique column requirement
*/
private boolean isTableWithUniqueColumnTableStatistics(TableStatistics tableStatistics, TableHandle tableHandle) {
boolean joinColumnExists = false;
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(ruleContext.getSession(), tableHandle);
ColumnStatistics columnStatistics = null;
if (columnHandles.containsKey(joinCriteriaStrings[0])) {
columnStatistics = tableStatistics.getColumnStatistics().get(columnHandles.get(joinCriteriaStrings[0]));
joinColumnExists = true;
} else if (columnHandles.containsKey(joinCriteriaStrings[1])) {
columnStatistics = tableStatistics.getColumnStatistics().get(columnHandles.get(joinCriteriaStrings[1]));
joinColumnExists = true;
}
if (!joinColumnExists) {
return false;
} else {
requireNonNull(columnStatistics, "Column Statistics cannot be null if the column exists for the table");
return tableStatistics.getRowCount().getValue() == columnStatistics.getDistinctValuesCount().getValue();
}
}
use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class TestConnectorFilterStatsCalculatorService method testTableStatisticsAfterFilter.
@Test
public void testTableStatisticsAfterFilter() {
// totalSize always be zero
assertPredicate("true", zeroTableStatistics, zeroTableStatistics);
assertPredicate("x < 3e0", zeroTableStatistics, unknownTableStatistics);
assertPredicate("false", zeroTableStatistics, zeroTableStatistics);
// rowCount and totalSize all NaN
assertPredicate("true", TableStatistics.empty(), TableStatistics.empty());
// rowCount and totalSize from NaN to 0.0
assertPredicate("false", TableStatistics.empty(), TableStatistics.builder().setRowCount(Estimate.zero()).build());
TableStatistics filteredToZeroStatistics = TableStatistics.builder().setRowCount(Estimate.zero()).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.of(1.0), Estimate.zero(), Estimate.zero(), Optional.empty())).build();
assertPredicate("false", originalTableStatistics, filteredToZeroStatistics);
TableStatistics filteredStatistics = TableStatistics.builder().setRowCount(Estimate.of(37.5)).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.zero(), Estimate.of(20), Estimate.unknown(), Optional.of(new DoubleRange(-10, 0)))).build();
assertPredicate("x < 0", originalTableStatistics, filteredStatistics);
TableStatistics filteredStatisticsWithoutTotalSize = TableStatistics.builder().setRowCount(Estimate.of(37.5)).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.zero(), Estimate.of(20), Estimate.unknown(), Optional.of(new DoubleRange(-10, 0)))).build();
assertPredicate("x < 0", originalTableStatisticsWithoutTotalSize, filteredStatisticsWithoutTotalSize);
}
use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class MetastoreHiveStatisticsProvider method calculateDataSize.
@VisibleForTesting
static Estimate calculateDataSize(String column, Collection<PartitionStatistics> partitionStatistics, double totalRowCount) {
List<PartitionStatistics> statisticsWithKnownRowCountAndDataSize = partitionStatistics.stream().filter(statistics -> {
if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
return false;
}
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
if (columnStatistics == null) {
return false;
}
return columnStatistics.getTotalSizeInBytes().isPresent();
}).collect(toImmutableList());
if (statisticsWithKnownRowCountAndDataSize.isEmpty()) {
return Estimate.unknown();
}
long knownRowCount = 0;
long knownDataSize = 0;
for (PartitionStatistics statistics : statisticsWithKnownRowCountAndDataSize) {
long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
verify(columnStatistics != null, "columnStatistics is null");
long dataSize = columnStatistics.getTotalSizeInBytes().orElseThrow(() -> new VerifyException("totalSizeInBytes is not present"));
verify(dataSize >= 0, "dataSize must be greater than or equal to zero");
knownRowCount += rowCount;
knownDataSize += dataSize;
}
if (totalRowCount == 0) {
return Estimate.zero();
}
if (knownRowCount == 0) {
return Estimate.unknown();
}
double averageValueDataSizeInBytes = ((double) knownDataSize) / knownRowCount;
return Estimate.of(averageValueDataSizeInBytes * totalRowCount);
}
use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class MetastoreHiveStatisticsProvider method calculateNullsFraction.
@VisibleForTesting
static Estimate calculateNullsFraction(String column, Collection<PartitionStatistics> partitionStatistics) {
List<PartitionStatistics> statisticsWithKnownRowCountAndNullsCount = partitionStatistics.stream().filter(statistics -> {
if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
return false;
}
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
if (columnStatistics == null) {
return false;
}
return columnStatistics.getNullsCount().isPresent();
}).collect(toImmutableList());
if (statisticsWithKnownRowCountAndNullsCount.isEmpty()) {
return Estimate.unknown();
}
long totalNullsCount = 0;
long totalRowCount = 0;
for (PartitionStatistics statistics : statisticsWithKnownRowCountAndNullsCount) {
long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
verify(columnStatistics != null, "columnStatistics is null");
long nullsCount = columnStatistics.getNullsCount().orElseThrow(() -> new VerifyException("nullsCount is not present"));
verify(nullsCount >= 0, "nullsCount must be greater than or equal to zero");
verify(nullsCount <= rowCount, "nullsCount must be less than or equal to rowCount. nullsCount: %s. rowCount: %s.", nullsCount, rowCount);
totalNullsCount += nullsCount;
totalRowCount += rowCount;
}
if (totalRowCount == 0) {
return Estimate.zero();
}
verify(totalNullsCount <= totalRowCount, "totalNullsCount must be less than or equal to totalRowCount. totalNullsCount: %s. totalRowCount: %s.", totalNullsCount, totalRowCount);
return Estimate.of(((double) totalNullsCount) / totalRowCount);
}
Aggregations