use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.
the class TestSemiTransactionalHiveMetastore method updatePartitionsStatistics.
private void updatePartitionsStatistics() {
Map<String, Function<PartitionStatistics, PartitionStatistics>> partNamesUpdateMap = new HashMap<>();
List<PartitionStatistics> statistics = ImmutableList.of(STATISTICS_1, STATISTICS_1);
for (int index = 0; index < partitions.size(); index++) {
PartitionStatistics partitionStatistics = statistics.get(index);
partNamesUpdateMap.put(partitions.get(index), actualStatistics -> partitionStatistics);
}
thriftHiveMetastore.updatePartitionsStatistics(IDENTITY, MockThriftMetastoreClient.TEST_DATABASE, MockThriftMetastoreClient.TEST_TABLE_UP_NAME, partNamesUpdateMap);
}
use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.
the class MetastoreHiveStatisticsProvider method calculateDataSize.
@VisibleForTesting
static Estimate calculateDataSize(String column, Collection<PartitionStatistics> partitionStatistics, double totalRowCount) {
List<PartitionStatistics> statisticsWithKnownRowCountAndDataSize = partitionStatistics.stream().filter(statistics -> {
if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
return false;
}
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
if (columnStatistics == null) {
return false;
}
return columnStatistics.getTotalSizeInBytes().isPresent();
}).collect(toImmutableList());
if (statisticsWithKnownRowCountAndDataSize.isEmpty()) {
return Estimate.unknown();
}
long knownRowCount = 0;
long knownDataSize = 0;
for (PartitionStatistics statistics : statisticsWithKnownRowCountAndDataSize) {
long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
verify(columnStatistics != null, "columnStatistics is null");
long dataSize = columnStatistics.getTotalSizeInBytes().orElseThrow(() -> new VerifyException("totalSizeInBytes is not present"));
verify(dataSize >= 0, "dataSize must be greater than or equal to zero");
knownRowCount += rowCount;
knownDataSize += dataSize;
}
if (totalRowCount == 0) {
return Estimate.zero();
}
if (knownRowCount == 0) {
return Estimate.unknown();
}
double averageValueDataSizeInBytes = ((double) knownDataSize) / knownRowCount;
return Estimate.of(averageValueDataSizeInBytes * totalRowCount);
}
use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.
@Test
public void testGetTableStatisticsValidationFailure() {
PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
String partitionName = "p1=string1/p2=1234";
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, corruptedStatistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
assertThatThrownBy(() -> statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table)).isInstanceOf(PrestoException.class).hasFieldOrPropertyWithValue("errorCode", HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
assertEquals(statisticsProvider.getTableStatistics(ignoreSession, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table), TableStatistics.empty());
}
use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.
@Test
public void testGetTableStatisticsUnpartitioned() {
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE)), true, table), expected);
}
use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.
the class CarbondataMetadata method finishInsertInNewPartition.
@Override
protected void finishInsertInNewPartition(ConnectorSession session, HiveInsertTableHandle handle, Table table, Map<String, Type> columnTypes, PartitionUpdate partitionUpdate, Map<List<String>, ComputedStatistics> partitionComputedStatistics, HiveACIDWriteType acidWriteType) {
// insert into new partition or overwrite existing partition
if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, acidWriteType);
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
Partition partition = buildPartitionObject(session, table, partitionUpdate);
if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), partitionStatistics, acidWriteType);
}
}
Aggregations