use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.
@Test
public void testGetTableStatisticsValidationFailure() {
PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
String partitionName = "p1=string1/p2=1234";
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, corruptedStatistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
assertThatThrownBy(() -> statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table)).isInstanceOf(PrestoException.class).hasFieldOrPropertyWithValue("errorCode", HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
assertEquals(statisticsProvider.getTableStatistics(ignoreSession, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table), TableStatistics.empty());
}
use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.
@Test
public void testGetTableStatisticsUnpartitioned() {
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE)), true, table), expected);
}
use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.
the class CachingHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, Table table, List<Partition> partitionNames) {
HiveTableName hiveTableName = hiveTableName(table.getDatabaseName(), table.getTableName());
List<WithIdentity<HivePartitionName>> partitions = partitionNames.stream().map(partition -> new WithIdentity<>(updateIdentity(identity), hivePartitionName(hiveTableName, makePartitionName(table, partition)))).collect(toImmutableList());
if (skipTableCache) {
HiveIdentity identity1 = updateIdentity(identity);
return delegate.getPartitionStatistics(identity1, table, partitionNames);
}
Map<WithIdentity<HivePartitionName>, WithValidation<Table, PartitionStatistics>> statistics = getAll(partitionStatisticsCache, partitions);
if (dontVerifyCacheEntry || statistics.size() == 0) {
return statistics.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getKey().getPartitionName().get(), entry -> entry.getValue().get()));
}
Map<WithIdentity<HivePartitionName>, WithValidation<Table, PartitionStatistics>> finalStatistics = statistics;
boolean allMatch = partitionNames.stream().allMatch(partition -> finalStatistics.get(new WithIdentity<>(updateIdentity(identity), hivePartitionName(hiveTableName, makePartitionName(table, partition)))).matches(getCacheValidationPartitionParams(table, ThriftMetastoreUtil.getHiveBasicStatistics(partition.getParameters()))));
if (allMatch) {
return statistics.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getKey().getPartitionName().get(), entry -> entry.getValue().get()));
}
partitionCache.invalidate(partitions);
partitionStatisticsCache.invalidate(partitions);
statistics = getAll(partitionStatisticsCache, partitions);
return statistics.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getKey().getPartitionName().get(), entry -> entry.getValue().get()));
}
use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.
the class GlueHiveMetastore method updateTableStatistics.
@Override
public void updateTableStatistics(HiveIdentity identity, String databaseName, String tableName, Function<PartitionStatistics, PartitionStatistics> update) {
Table table = getTableOrElseThrow(identity, databaseName, tableName);
PartitionStatistics currentStatistics = getTableStatistics(identity, table);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
if (!updatedStatistics.getColumnStatistics().isEmpty()) {
throw new PrestoException(NOT_SUPPORTED, "Glue metastore does not support column level statistics");
}
try {
TableInput tableInput = GlueInputConverter.convertTable(table);
tableInput.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(table.getParameters(), updatedStatistics.getBasicStatistics()));
glueClient.updateTable(new UpdateTableRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableInput(tableInput));
} catch (EntityNotFoundException e) {
throw new TableNotFoundException(new SchemaTableName(databaseName, tableName));
} catch (AmazonServiceException e) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, e);
}
}
use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.
the class ThriftHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, Table table, List<Partition> partitions) {
List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
List<String> partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> ThriftMetastoreUtil.getHiveBasicStatistics(partition.getParameters())));
Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = getPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionBasicStatistics.keySet(), dataColumns, partitionRowCounts);
ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
for (String partitionName : partitionBasicStatistics.keySet()) {
HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
}
return result.build();
}
Aggregations