use of com.facebook.presto.hive.metastore.HiveColumnStatistics in project presto by prestodb.
the class TestThriftHiveMetastoreUtil method testDecimalStatsToColumnStatistics.
@Test
public void testDecimalStatsToColumnStatistics() {
DecimalColumnStatsData decimalColumnStatsData = new DecimalColumnStatsData();
BigDecimal low = new BigDecimal("0");
decimalColumnStatsData.setLowValue(toMetastoreDecimal(low));
BigDecimal high = new BigDecimal("100");
decimalColumnStatsData.setHighValue(toMetastoreDecimal(high));
decimalColumnStatsData.setNumNulls(1);
decimalColumnStatsData.setNumDVs(20);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.of(low), Optional.of(high))));
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.empty());
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.of(1));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
use of com.facebook.presto.hive.metastore.HiveColumnStatistics in project presto by prestodb.
the class TestThriftHiveMetastoreUtil method testSingleDistinctValue.
@Test
public void testSingleDistinctValue() {
DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData();
doubleColumnStatsData.setNumNulls(10);
doubleColumnStatsData.setNumDVs(1);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(10));
assertEquals(actual.getNullsCount(), OptionalLong.of(10));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(0));
doubleColumnStatsData = new DoubleColumnStatsData();
doubleColumnStatsData.setNumNulls(10);
doubleColumnStatsData.setNumDVs(1);
columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(11));
assertEquals(actual.getNullsCount(), OptionalLong.of(10));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(1));
}
use of com.facebook.presto.hive.metastore.HiveColumnStatistics in project presto by prestodb.
the class TestThriftHiveMetastoreUtil method testBooleanStatsToColumnStatistics.
@Test
public void testBooleanStatsToColumnStatistics() {
BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData();
booleanColumnStatsData.setNumTrues(100);
booleanColumnStatsData.setNumFalses(10);
booleanColumnStatsData.setNumNulls(0);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10))));
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.of(0));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
use of com.facebook.presto.hive.metastore.HiveColumnStatistics in project presto by prestodb.
the class HiveSplitManager method getPartitionSplitInfo.
private Map<String, PartitionSplitInfo> getPartitionSplitInfo(ConnectorSession session, SemiTransactionalHiveMetastore metastore, SchemaTableName tableName, List<HivePartition> partitionBatch, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider());
Map<String, Optional<Partition>> partitions = metastore.getPartitionsByNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), Lists.transform(partitionBatch, HivePartition::getPartitionId));
Map<String, PartitionStatistics> partitionStatistics = ImmutableMap.of();
if (domains.isPresent() && isPartitionStatisticsBasedOptimizationEnabled(session)) {
partitionStatistics = metastore.getPartitionStatistics(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionBatch.stream().map(HivePartition::getPartitionId).collect(toImmutableSet()));
}
Map<String, String> partitionNameToLocation = new HashMap<>();
ImmutableMap.Builder<String, PartitionSplitInfo> partitionSplitInfoBuilder = ImmutableMap.builder();
for (Map.Entry<String, Optional<Partition>> entry : partitions.entrySet()) {
ImmutableSet.Builder<ColumnHandle> redundantColumnDomainsBuilder = ImmutableSet.builder();
if (!entry.getValue().isPresent()) {
throw new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Partition no longer exists: " + entry.getKey());
}
boolean pruned = false;
if (partitionStatistics.containsKey(entry.getKey())) {
Map<String, HiveColumnStatistics> columnStatistics = partitionStatistics.get(entry.getKey()).getColumnStatistics();
for (Map.Entry<String, HiveColumnHandle> predicateColumnEntry : predicateColumns.entrySet()) {
if (columnStatistics.containsKey(predicateColumnEntry.getKey())) {
Optional<ValueSet> columnsStatisticsValueSet = getColumnStatisticsValueSet(columnStatistics.get(predicateColumnEntry.getKey()), predicateColumnEntry.getValue().getHiveType());
Subfield subfield = new Subfield(predicateColumnEntry.getKey());
if (columnsStatisticsValueSet.isPresent() && domains.get().containsKey(subfield)) {
ValueSet columnPredicateValueSet = domains.get().get(subfield).getValues();
if (!columnPredicateValueSet.overlaps(columnsStatisticsValueSet.get())) {
pruned = true;
break;
}
if (columnPredicateValueSet.contains(columnsStatisticsValueSet.get())) {
redundantColumnDomainsBuilder.add(predicateColumnEntry.getValue());
}
}
}
}
}
if (!pruned) {
partitionNameToLocation.put(entry.getKey(), entry.getValue().get().getStorage().getLocation());
}
partitionSplitInfoBuilder.put(entry.getKey(), new PartitionSplitInfo(entry.getValue().get(), pruned, redundantColumnDomainsBuilder.build()));
}
metastore.setPartitionLeases(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionNameToLocation, getLeaseDuration(session));
return partitionSplitInfoBuilder.build();
}
use of com.facebook.presto.hive.metastore.HiveColumnStatistics in project presto by prestodb.
the class ThriftHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Set<String> partitionNames) {
Table table = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
List<String> partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
Map<String, HiveBasicStatistics> partitionBasicStatistics = getPartitionsByNames(metastoreContext, databaseName, tableName, ImmutableList.copyOf(partitionNames)).stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> getHiveBasicStatistics(partition.getParameters())));
Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = getPartitionColumnStatistics(metastoreContext, databaseName, tableName, partitionNames, dataColumns, partitionRowCounts);
ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
for (String partitionName : partitionNames) {
HiveBasicStatistics basicStatistics = partitionBasicStatistics.getOrDefault(partitionName, createEmptyStatistics());
Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
}
return result.build();
}
Aggregations