use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class TestDataCenterClient method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
Map<String, ColumnHandle> columnHandles = new LinkedHashMap<>();
DataCenterClient client = new DataCenterClient(this.config, httpClient, typeManager);
columnHandles.put("orderkey", new DataCenterColumnHandle("orderkey", DOUBLE, 0));
columnHandles.put("custkey", new DataCenterColumnHandle("custkey", DOUBLE, 1));
columnHandles.put("orderstatus", new DataCenterColumnHandle("orderstatus", createVarcharType(1), 2));
columnHandles.put("totalprice", new DataCenterColumnHandle("totalprice", DOUBLE, 3));
columnHandles.put("orderdate", new DataCenterColumnHandle("orderdate", DATE, 4));
columnHandles.put("orderpriority", new DataCenterColumnHandle("orderpriority", createVarcharType(15), 5));
columnHandles.put("clerk", new DataCenterColumnHandle("clerk", createUnboundedVarcharType(), 6));
columnHandles.put("shippriority", new DataCenterColumnHandle("shippriority", DOUBLE, 7));
columnHandles.put("comment", new DataCenterColumnHandle("comment", createVarcharType(79), 8));
TableStatistics tableStatistics = client.getTableStatistics("tpch.tiny.orders", columnHandles);
assertEquals(tableStatistics.getRowCount().getValue(), 15000.0);
Map<ColumnHandle, ColumnStatistics> columnStatistics = tableStatistics.getColumnStatistics();
for (Map.Entry<ColumnHandle, ColumnStatistics> columnstatistics : columnStatistics.entrySet()) {
ColumnHandle columnhandleKey = columnstatistics.getKey();
ColumnStatistics columnhandleValue = columnstatistics.getValue();
if (columnhandleKey.getColumnName().equals("orderkey")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 15000.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), (double) 1);
assertEquals(columnhandleValue.getRange().get().getMax(), (double) 60000);
}
if (columnhandleKey.getColumnName().equals("custkey")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1000.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), (double) 1);
assertEquals(columnhandleValue.getRange().get().getMax(), (double) 1499);
}
if (columnhandleKey.getColumnName().equals("orderstatus")) {
assertEquals(columnhandleValue.getDataSize().getValue(), 3.0);
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 3.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
}
if (columnhandleKey.getColumnName().equals("totalprice")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 14996.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), 874.89);
assertEquals(columnhandleValue.getRange().get().getMax(), 466001.28);
}
if (columnhandleKey.getColumnName().equals("orderdate")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 2401.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), (double) 8035);
assertEquals(columnhandleValue.getRange().get().getMax(), (double) 10440);
}
if (columnhandleKey.getColumnName().equals("orderpriority")) {
assertEquals(columnhandleValue.getDataSize().getValue(), 42.0);
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 5.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
}
if (columnhandleKey.getColumnName().equals("clerk")) {
assertEquals(columnhandleValue.getDataSize().getValue(), 15000.0);
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1000.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
}
if (columnhandleKey.getColumnName().equals("shippriority")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), (double) 0);
assertEquals(columnhandleValue.getRange().get().getMax(), (double) 0);
}
if (columnhandleKey.getColumnName().equals("comment")) {
assertEquals(columnhandleValue.getDataSize().getValue(), 727249.0);
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 14995.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
}
}
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class DataCenterClient method getTableStatistics.
/**
* Get remote table statistics.
*
* @param tableFullName the fully qualified table name
* @param columnHandles data center column handles
* @return the table statistics
*/
public TableStatistics getTableStatistics(String tableFullName, Map<String, ColumnHandle> columnHandles) {
String query = "SHOW STATS FOR " + tableFullName;
Iterable<List<Object>> data;
try {
data = getResults(clientSession, query);
} catch (SQLException ex) {
throw new PrestoTransportException(REMOTE_TASK_ERROR, HostAddress.fromUri(this.serverUri.uri()), "could not connect to the remote data center");
}
TableStatistics.Builder builder = TableStatistics.builder();
List<Object> lastRow = null;
for (List<Object> row : data) {
ColumnStatistics.Builder columnStatisticBuilder = new ColumnStatistics.Builder();
lastRow = row;
if (row.get(0) == null) {
// Only the last row can have the first column (column name) null
continue;
}
// row[0] is column_name
DataCenterColumnHandle columnHandle = (DataCenterColumnHandle) columnHandles.get(row.get(0).toString());
if (columnHandle == null) {
// Unknown column found
continue;
}
// row[1] is data_size
if (row.get(1) != null) {
columnStatisticBuilder.setDataSize(Estimate.of(Double.parseDouble(row.get(1).toString())));
}
// row[2] is distinct_values_count
if (row.get(2) != null) {
columnStatisticBuilder.setDistinctValuesCount(Estimate.of(Double.parseDouble(row.get(2).toString())));
}
// row[3] is nulls_fraction
if (row.get(3) != null) {
columnStatisticBuilder.setNullsFraction(Estimate.of(Double.parseDouble(row.get(3).toString())));
}
// row[5] is low_value and row[6] is high_value
if (row.get(5) != null && row.get(6) != null) {
String minStr = row.get(5).toString();
String maxStr = row.get(6).toString();
Type columnType = columnHandle.getColumnType();
if (columnType.equals(DATE)) {
LocalDate minDate = LocalDate.parse(minStr, DATE_FORMATTER);
LocalDate maxDate = LocalDate.parse(maxStr, DATE_FORMATTER);
columnStatisticBuilder.setRange(new DoubleRange(minDate.toEpochDay(), maxDate.toEpochDay()));
} else {
columnStatisticBuilder.setRange(new DoubleRange(Double.parseDouble(minStr), Double.parseDouble(maxStr)));
}
}
builder.setColumnStatistics(columnHandle, columnStatisticBuilder.build());
}
// Get row_count from the last row
if (lastRow != null && lastRow.get(4) != null) {
builder.setRowCount(Estimate.of(Double.parseDouble(lastRow.get(4).toString())));
}
return builder.build();
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class AbstractTestHive method assertTableStatsComputed.
private void assertTableStatsComputed(SchemaTableName tableName, Set<String> expectedColumnStatsColumns) {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Constraint.alwaysTrue(), true);
assertFalse(tableStatistics.getRowCount().isUnknown(), "row count is unknown");
Map<String, ColumnStatistics> columnsStatistics = tableStatistics.getColumnStatistics().entrySet().stream().collect(toImmutableMap(entry -> ((HiveColumnHandle) entry.getKey()).getName(), Map.Entry::getValue));
assertEquals(columnsStatistics.keySet(), expectedColumnStatsColumns, "columns with statistics");
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
columnsStatistics.forEach((columnName, columnStatistics) -> {
ColumnHandle columnHandle = columnHandles.get(columnName);
Type columnType = metadata.getColumnMetadata(session, tableHandle, columnHandle).getType();
assertFalse(columnStatistics.getNullsFraction().isUnknown(), "unknown nulls fraction for " + columnName);
assertFalse(columnStatistics.getDistinctValuesCount().isUnknown(), "unknown distinct values count for " + columnName);
if (isVarcharType(columnType)) {
assertFalse(columnStatistics.getDataSize().isUnknown(), "unknown data size for " + columnName);
} else {
assertTrue(columnStatistics.getDataSize().isUnknown(), "unknown data size for" + columnName);
}
});
}
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class AbstractTestHive method testPartitionStatisticsSampling.
protected void testPartitionStatisticsSampling(List<ColumnMetadata> columns, PartitionStatistics statistics) throws Exception {
SchemaTableName tableName = temporaryTable("test_partition_statistics_sampling");
try {
createDummyPartitionedTable(tableName, columns);
HiveMetastore hiveMetastoreClient = getMetastoreClient();
HiveIdentity identity = new HiveIdentity(SESSION);
hiveMetastoreClient.updatePartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> statistics);
hiveMetastoreClient.updatePartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-02", actualStatistics -> statistics);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, tableName);
TableStatistics unsampledStatistics = metadata.getTableStatistics(sampleSize(2), tableHandle, Constraint.alwaysTrue(), true);
TableStatistics sampledStatistics = metadata.getTableStatistics(sampleSize(1), tableHandle, Constraint.alwaysTrue(), true);
assertEquals(sampledStatistics, unsampledStatistics);
}
} finally {
dropTable(tableName);
}
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class MetastoreHiveStatisticsProvider method getTableStatistics.
@Override
public TableStatistics getTableStatistics(ConnectorSession session, SchemaTableName schemaTableName, Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, boolean includeColumnStatistics, Table table) {
if (!isStatisticsEnabled(session)) {
return TableStatistics.empty();
}
if (partitions.isEmpty()) {
return createZeroStatistics(columns, columnTypes);
}
int sampleSize = getPartitionStatisticsSampleSize(session);
List<HivePartition> partitionsSample = null;
SamplePartition sample = samplePartitionCache.get(table);
if (includeColumnStatistics || sample == null || sample.partitionCount != partitions.size()) {
partitionsSample = getPartitionsSample(partitions, sampleSize);
samplePartitionCache.put(table, new SamplePartition(partitions.size(), partitionsSample));
} else if (sample != null) {
partitionsSample = sample.partitionsSample;
}
try {
Map<String, PartitionStatistics> statisticsSample = statisticsProvider.getPartitionsStatistics(session, schemaTableName, partitionsSample, table);
if (!includeColumnStatistics) {
OptionalDouble averageRows = calculateAverageRowsPerPartition(statisticsSample.values());
TableStatistics.Builder result = TableStatistics.builder();
if (averageRows.isPresent()) {
result.setRowCount(Estimate.of(averageRows.getAsDouble() * partitions.size()));
}
result.setFileCount(calulateFileCount(statisticsSample.values()));
result.setOnDiskDataSizeInBytes(calculateTotalOnDiskSizeInBytes(statisticsSample.values()));
return result.build();
} else {
validatePartitionStatistics(schemaTableName, statisticsSample);
return getTableStatistics(columns, columnTypes, partitions, statisticsSample);
}
} catch (PrestoException e) {
if (e.getErrorCode().equals(HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode()) && isIgnoreCorruptedStatistics(session)) {
log.error(e);
return TableStatistics.empty();
}
throw e;
}
}
Aggregations