Search in sources :

Example 1 with DoubleRange

use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.

the class ColumnStatisticsData method toColumnStatistics.

public ColumnStatistics toColumnStatistics(long rowCount) {
    ColumnStatistics.Builder builder = ColumnStatistics.builder();
    builder.setDataSize(Estimate.of((double) nullsCount / (double) rowCount));
    builder.setDistinctValuesCount(Estimate.of(distinctValuesCount));
    builder.setDataSize(dataSize.map(Estimate::of).orElse(Estimate.unknown()));
    if (min.isPresent() && max.isPresent()) {
        builder.setRange(new DoubleRange((double) min.get(), (double) max.get()));
    }
    return builder.build();
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) DoubleRange(io.prestosql.spi.statistics.DoubleRange) Estimate(io.prestosql.spi.statistics.Estimate)

Example 2 with DoubleRange

use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.

the class TestTpcdsMetadataStatistics method testTableStatsDetails.

@Test
public void testTableStatsDetails() {
    SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.CALL_CENTER.getName());
    ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
    TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
    estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
    // all columns have stats
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    for (ColumnHandle column : columnHandles.values()) {
        assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
        assertNotNull(tableStatistics.getColumnStatistics().get(column));
    }
    // identifier
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(6)).setRange(new DoubleRange(1, 6)).build());
    // varchar
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(3)).setDataSize(Estimate.of(48.0)).build());
    // char
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setDataSize(Estimate.of(5.0)).build());
    // decimal
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setRange(new DoubleRange(-5, -5)).build());
    // date
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(4)).setRange(new DoubleRange(10227L, 11688L)).build());
    // only null values
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(1)).setDistinctValuesCount(Estimate.of(0)).build());
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DoubleRange(io.prestosql.spi.statistics.DoubleRange) TableStatistics(io.prestosql.spi.statistics.TableStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 3 with DoubleRange

use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.

the class TestTpcdsMetadataStatistics method testNullFraction.

@Test
public void testNullFraction() {
    SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
    ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
    TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    // some null values
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(3)).setRange(new DoubleRange(10819L, 11549L)).build());
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DoubleRange(io.prestosql.spi.statistics.DoubleRange) TableStatistics(io.prestosql.spi.statistics.TableStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 4 with DoubleRange

use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.

the class DataCenterClient method getTableStatistics.

/**
 * Get remote table statistics.
 *
 * @param tableFullName the fully qualified table name
 * @param columnHandles data center column handles
 * @return the table statistics
 */
public TableStatistics getTableStatistics(String tableFullName, Map<String, ColumnHandle> columnHandles) {
    String query = "SHOW STATS FOR " + tableFullName;
    Iterable<List<Object>> data;
    try {
        data = getResults(clientSession, query);
    } catch (SQLException ex) {
        throw new PrestoTransportException(REMOTE_TASK_ERROR, HostAddress.fromUri(this.serverUri.uri()), "could not connect to the remote data center");
    }
    TableStatistics.Builder builder = TableStatistics.builder();
    List<Object> lastRow = null;
    for (List<Object> row : data) {
        ColumnStatistics.Builder columnStatisticBuilder = new ColumnStatistics.Builder();
        lastRow = row;
        if (row.get(0) == null) {
            // Only the last row can have the first column (column name) null
            continue;
        }
        // row[0] is column_name
        DataCenterColumnHandle columnHandle = (DataCenterColumnHandle) columnHandles.get(row.get(0).toString());
        if (columnHandle == null) {
            // Unknown column found
            continue;
        }
        // row[1] is data_size
        if (row.get(1) != null) {
            columnStatisticBuilder.setDataSize(Estimate.of(Double.parseDouble(row.get(1).toString())));
        }
        // row[2] is distinct_values_count
        if (row.get(2) != null) {
            columnStatisticBuilder.setDistinctValuesCount(Estimate.of(Double.parseDouble(row.get(2).toString())));
        }
        // row[3] is nulls_fraction
        if (row.get(3) != null) {
            columnStatisticBuilder.setNullsFraction(Estimate.of(Double.parseDouble(row.get(3).toString())));
        }
        // row[5] is low_value and row[6] is high_value
        if (row.get(5) != null && row.get(6) != null) {
            String minStr = row.get(5).toString();
            String maxStr = row.get(6).toString();
            Type columnType = columnHandle.getColumnType();
            if (columnType.equals(DATE)) {
                LocalDate minDate = LocalDate.parse(minStr, DATE_FORMATTER);
                LocalDate maxDate = LocalDate.parse(maxStr, DATE_FORMATTER);
                columnStatisticBuilder.setRange(new DoubleRange(minDate.toEpochDay(), maxDate.toEpochDay()));
            } else {
                columnStatisticBuilder.setRange(new DoubleRange(Double.parseDouble(minStr), Double.parseDouble(maxStr)));
            }
        }
        builder.setColumnStatistics(columnHandle, columnStatisticBuilder.build());
    }
    // Get row_count from the last row
    if (lastRow != null && lastRow.get(4) != null) {
        builder.setRowCount(Estimate.of(Double.parseDouble(lastRow.get(4).toString())));
    }
    return builder.build();
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) SQLException(java.sql.SQLException) LocalDate(java.time.LocalDate) DoubleRange(io.prestosql.spi.statistics.DoubleRange) Type(io.prestosql.spi.type.Type) TypeUtil.parseType(io.prestosql.client.util.TypeUtil.parseType) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) TableStatistics(io.prestosql.spi.statistics.TableStatistics) PrestoTransportException(io.prestosql.spi.PrestoTransportException) DataCenterColumnHandle(io.hetu.core.plugin.datacenter.DataCenterColumnHandle)

Example 5 with DoubleRange

use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.

@Test
public void testGetTableStatistics() {
    String partitionName = "p1=string1/p2=1234";
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, statistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName)), true, table), expected);
}
Also used : DoubleRange(io.prestosql.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.prestosql.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) TableStatistics(io.prestosql.spi.statistics.TableStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Test(org.testng.annotations.Test)

Aggregations

DoubleRange (io.prestosql.spi.statistics.DoubleRange)10 TableStatistics (io.prestosql.spi.statistics.TableStatistics)7 ColumnStatistics (io.prestosql.spi.statistics.ColumnStatistics)5 Test (org.testng.annotations.Test)4 HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)3 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)3 PartitionStatistics (io.prestosql.plugin.hive.PartitionStatistics)3 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)3 Type (io.prestosql.spi.type.Type)3 HiveConfig (io.prestosql.plugin.hive.HiveConfig)2 HivePartition (io.prestosql.plugin.hive.HivePartition)2 HiveSessionProperties (io.prestosql.plugin.hive.HiveSessionProperties)2 OrcFileWriterConfig (io.prestosql.plugin.hive.OrcFileWriterConfig)2 ParquetFileWriterConfig (io.prestosql.plugin.hive.ParquetFileWriterConfig)2 MetastoreHiveStatisticsProvider.validatePartitionStatistics (io.prestosql.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics)2 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)2 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)2 TestingConnectorSession (io.prestosql.testing.TestingConnectorSession)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1